1use crate::arch::asm;
6use crate::core_arch::{simd::*, x86::*};
7use crate::intrinsics::simd::*;
8
9#[cfg(test)]
10use stdarch_test::assert_instr;
11
12#[allow(improper_ctypes)]
13extern "C" {
14 #[link_name = "llvm.x86.avx512bf16.cvtne2ps2bf16.128"]
15 fn cvtne2ps2bf16(a: f32x4, b: f32x4) -> i16x8;
16 #[link_name = "llvm.x86.avx512bf16.cvtne2ps2bf16.256"]
17 fn cvtne2ps2bf16_256(a: f32x8, b: f32x8) -> i16x16;
18 #[link_name = "llvm.x86.avx512bf16.cvtne2ps2bf16.512"]
19 fn cvtne2ps2bf16_512(a: f32x16, b: f32x16) -> i16x32;
20 #[link_name = "llvm.x86.avx512bf16.cvtneps2bf16.256"]
21 fn cvtneps2bf16_256(a: f32x8) -> i16x8;
22 #[link_name = "llvm.x86.avx512bf16.cvtneps2bf16.512"]
23 fn cvtneps2bf16_512(a: f32x16) -> i16x16;
24 #[link_name = "llvm.x86.avx512bf16.dpbf16ps.128"]
25 fn dpbf16ps(a: f32x4, b: i32x4, c: i32x4) -> f32x4;
26 #[link_name = "llvm.x86.avx512bf16.dpbf16ps.256"]
27 fn dpbf16ps_256(a: f32x8, b: i32x8, c: i32x8) -> f32x8;
28 #[link_name = "llvm.x86.avx512bf16.dpbf16ps.512"]
29 fn dpbf16ps_512(a: f32x16, b: i32x16, c: i32x16) -> f32x16;
30}
31
32#[inline]
37#[target_feature(enable = "avx512bf16,avx512vl")]
38#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
40pub unsafe fn _mm_cvtne2ps_pbh(a: __m128, b: __m128) -> __m128bh {
41 transmute(cvtne2ps2bf16(a.as_f32x4(), b.as_f32x4()))
42}
43
44#[inline]
50#[target_feature(enable = "avx512bf16,avx512vl")]
51#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
52#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
53pub unsafe fn _mm_mask_cvtne2ps_pbh(src: __m128bh, k: __mmask8, a: __m128, b: __m128) -> __m128bh {
54 let cvt = _mm_cvtne2ps_pbh(a, b).as_u16x8();
55 transmute(simd_select_bitmask(k, cvt, src.as_u16x8()))
56}
57
58#[inline]
64#[target_feature(enable = "avx512bf16,avx512vl")]
65#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
66#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
67pub unsafe fn _mm_maskz_cvtne2ps_pbh(k: __mmask8, a: __m128, b: __m128) -> __m128bh {
68 let cvt = _mm_cvtne2ps_pbh(a, b).as_u16x8();
69 transmute(simd_select_bitmask(k, cvt, u16x8::ZERO))
70}
71
72#[inline]
77#[target_feature(enable = "avx512bf16,avx512vl")]
78#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
79#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
80pub unsafe fn _mm256_cvtne2ps_pbh(a: __m256, b: __m256) -> __m256bh {
81 transmute(cvtne2ps2bf16_256(a.as_f32x8(), b.as_f32x8()))
82}
83
84#[inline]
89#[target_feature(enable = "avx512bf16,avx512vl")]
90#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
91#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
92pub unsafe fn _mm256_mask_cvtne2ps_pbh(
93 src: __m256bh,
94 k: __mmask16,
95 a: __m256,
96 b: __m256,
97) -> __m256bh {
98 let cvt = _mm256_cvtne2ps_pbh(a, b).as_u16x16();
99 transmute(simd_select_bitmask(k, cvt, src.as_u16x16()))
100}
101
102#[inline]
107#[target_feature(enable = "avx512bf16,avx512vl")]
108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
109#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
110pub unsafe fn _mm256_maskz_cvtne2ps_pbh(k: __mmask16, a: __m256, b: __m256) -> __m256bh {
111 let cvt = _mm256_cvtne2ps_pbh(a, b).as_u16x16();
112 transmute(simd_select_bitmask(k, cvt, u16x16::ZERO))
113}
114
115#[inline]
120#[target_feature(enable = "avx512bf16,avx512f")]
121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
122#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
123pub unsafe fn _mm512_cvtne2ps_pbh(a: __m512, b: __m512) -> __m512bh {
124 transmute(cvtne2ps2bf16_512(a.as_f32x16(), b.as_f32x16()))
125}
126
127#[inline]
133#[target_feature(enable = "avx512bf16,avx512f")]
134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
135#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
136pub unsafe fn _mm512_mask_cvtne2ps_pbh(
137 src: __m512bh,
138 k: __mmask32,
139 a: __m512,
140 b: __m512,
141) -> __m512bh {
142 let cvt = _mm512_cvtne2ps_pbh(a, b).as_u16x32();
143 transmute(simd_select_bitmask(k, cvt, src.as_u16x32()))
144}
145
146#[inline]
152#[target_feature(enable = "avx512bf16,avx512f")]
153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
154#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
155pub unsafe fn _mm512_maskz_cvtne2ps_pbh(k: __mmask32, a: __m512, b: __m512) -> __m512bh {
156 let cvt = _mm512_cvtne2ps_pbh(a, b).as_u16x32();
157 transmute(simd_select_bitmask(k, cvt, u16x32::ZERO))
158}
159
160#[inline]
164#[target_feature(enable = "avx512bf16,avx512vl")]
165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
166#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
167pub unsafe fn _mm256_cvtneps_pbh(a: __m256) -> __m128bh {
168 transmute(cvtneps2bf16_256(a.as_f32x8()))
169}
170
171#[inline]
176#[target_feature(enable = "avx512bf16,avx512vl")]
177#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
178#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
179pub unsafe fn _mm256_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m256) -> __m128bh {
180 let cvt = _mm256_cvtneps_pbh(a).as_u16x8();
181 transmute(simd_select_bitmask(k, cvt, src.as_u16x8()))
182}
183
184#[inline]
189#[target_feature(enable = "avx512bf16,avx512vl")]
190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
191#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
192pub unsafe fn _mm256_maskz_cvtneps_pbh(k: __mmask8, a: __m256) -> __m128bh {
193 let cvt = _mm256_cvtneps_pbh(a).as_u16x8();
194 transmute(simd_select_bitmask(k, cvt, u16x8::ZERO))
195}
196
197#[inline]
201#[target_feature(enable = "avx512bf16,avx512f")]
202#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
203#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
204pub unsafe fn _mm512_cvtneps_pbh(a: __m512) -> __m256bh {
205 transmute(cvtneps2bf16_512(a.as_f32x16()))
206}
207
208#[inline]
213#[target_feature(enable = "avx512bf16,avx512f")]
214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
215#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
216pub unsafe fn _mm512_mask_cvtneps_pbh(src: __m256bh, k: __mmask16, a: __m512) -> __m256bh {
217 let cvt = _mm512_cvtneps_pbh(a).as_u16x16();
218 transmute(simd_select_bitmask(k, cvt, src.as_u16x16()))
219}
220
221#[inline]
226#[target_feature(enable = "avx512bf16,avx512f")]
227#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
228#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
229pub unsafe fn _mm512_maskz_cvtneps_pbh(k: __mmask16, a: __m512) -> __m256bh {
230 let cvt = _mm512_cvtneps_pbh(a).as_u16x16();
231 transmute(simd_select_bitmask(k, cvt, u16x16::ZERO))
232}
233
234#[inline]
239#[target_feature(enable = "avx512bf16,avx512vl")]
240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
241#[cfg_attr(test, assert_instr("vdpbf16ps"))]
242pub unsafe fn _mm_dpbf16_ps(src: __m128, a: __m128bh, b: __m128bh) -> __m128 {
243 transmute(dpbf16ps(src.as_f32x4(), a.as_i32x4(), b.as_i32x4()))
244}
245
246#[inline]
252#[target_feature(enable = "avx512bf16,avx512vl")]
253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
254#[cfg_attr(test, assert_instr("vdpbf16ps"))]
255pub unsafe fn _mm_mask_dpbf16_ps(src: __m128, k: __mmask8, a: __m128bh, b: __m128bh) -> __m128 {
256 let rst = _mm_dpbf16_ps(src, a, b).as_f32x4();
257 transmute(simd_select_bitmask(k, rst, src.as_f32x4()))
258}
259
260#[inline]
266#[target_feature(enable = "avx512bf16,avx512vl")]
267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
268#[cfg_attr(test, assert_instr("vdpbf16ps"))]
269pub unsafe fn _mm_maskz_dpbf16_ps(k: __mmask8, src: __m128, a: __m128bh, b: __m128bh) -> __m128 {
270 let rst = _mm_dpbf16_ps(src, a, b).as_f32x4();
271 let zero = _mm_set1_ps(0.0_f32).as_f32x4();
272 transmute(simd_select_bitmask(k, rst, zero))
273}
274
275#[inline]
280#[target_feature(enable = "avx512bf16,avx512vl")]
281#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
282#[cfg_attr(test, assert_instr("vdpbf16ps"))]
283pub unsafe fn _mm256_dpbf16_ps(src: __m256, a: __m256bh, b: __m256bh) -> __m256 {
284 transmute(dpbf16ps_256(src.as_f32x8(), a.as_i32x8(), b.as_i32x8()))
285}
286
287#[inline]
293#[target_feature(enable = "avx512bf16,avx512vl")]
294#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
295#[cfg_attr(test, assert_instr("vdpbf16ps"))]
296pub unsafe fn _mm256_mask_dpbf16_ps(src: __m256, k: __mmask8, a: __m256bh, b: __m256bh) -> __m256 {
297 let rst = _mm256_dpbf16_ps(src, a, b).as_f32x8();
298 transmute(simd_select_bitmask(k, rst, src.as_f32x8()))
299}
300
301#[inline]
307#[target_feature(enable = "avx512bf16,avx512vl")]
308#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
309#[cfg_attr(test, assert_instr("vdpbf16ps"))]
310pub unsafe fn _mm256_maskz_dpbf16_ps(k: __mmask8, src: __m256, a: __m256bh, b: __m256bh) -> __m256 {
311 let rst = _mm256_dpbf16_ps(src, a, b).as_f32x8();
312 transmute(simd_select_bitmask(k, rst, f32x8::ZERO))
313}
314
315#[inline]
322#[target_feature(enable = "avx512bf16,avx512f")]
323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
324#[cfg_attr(test, assert_instr("vdpbf16ps"))]
325pub unsafe fn _mm512_dpbf16_ps(src: __m512, a: __m512bh, b: __m512bh) -> __m512 {
326 transmute(dpbf16ps_512(src.as_f32x16(), a.as_i32x16(), b.as_i32x16()))
327}
328
329#[inline]
335#[target_feature(enable = "avx512bf16,avx512f")]
336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
337#[cfg_attr(test, assert_instr("vdpbf16ps"))]
338pub unsafe fn _mm512_mask_dpbf16_ps(src: __m512, k: __mmask16, a: __m512bh, b: __m512bh) -> __m512 {
339 let rst = _mm512_dpbf16_ps(src, a, b).as_f32x16();
340 transmute(simd_select_bitmask(k, rst, src.as_f32x16()))
341}
342
343#[inline]
349#[target_feature(enable = "avx512bf16,avx512f")]
350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
351#[cfg_attr(test, assert_instr("vdpbf16ps"))]
352pub unsafe fn _mm512_maskz_dpbf16_ps(
353 k: __mmask16,
354 src: __m512,
355 a: __m512bh,
356 b: __m512bh,
357) -> __m512 {
358 let rst = _mm512_dpbf16_ps(src, a, b).as_f32x16();
359 transmute(simd_select_bitmask(k, rst, f32x16::ZERO))
360}
361
362#[inline]
367#[target_feature(enable = "avx512bf16,avx512f")]
368#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
369pub unsafe fn _mm512_cvtpbh_ps(a: __m256bh) -> __m512 {
370 _mm512_castsi512_ps(_mm512_slli_epi32::<16>(_mm512_cvtepi16_epi32(transmute(a))))
371}
372
373#[inline]
379#[target_feature(enable = "avx512bf16,avx512f")]
380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
381pub unsafe fn _mm512_mask_cvtpbh_ps(src: __m512, k: __mmask16, a: __m256bh) -> __m512 {
382 let cvt = _mm512_cvtpbh_ps(a);
383 transmute(simd_select_bitmask(k, cvt.as_f32x16(), src.as_f32x16()))
384}
385
386#[inline]
392#[target_feature(enable = "avx512bf16,avx512f")]
393#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
394pub unsafe fn _mm512_maskz_cvtpbh_ps(k: __mmask16, a: __m256bh) -> __m512 {
395 let cvt = _mm512_cvtpbh_ps(a);
396 transmute(simd_select_bitmask(k, cvt.as_f32x16(), f32x16::ZERO))
397}
398
399#[inline]
404#[target_feature(enable = "avx512bf16,avx512vl")]
405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
406pub unsafe fn _mm256_cvtpbh_ps(a: __m128bh) -> __m256 {
407 _mm256_castsi256_ps(_mm256_slli_epi32::<16>(_mm256_cvtepi16_epi32(transmute(a))))
408}
409
410#[inline]
416#[target_feature(enable = "avx512bf16,avx512vl")]
417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
418pub unsafe fn _mm256_mask_cvtpbh_ps(src: __m256, k: __mmask8, a: __m128bh) -> __m256 {
419 let cvt = _mm256_cvtpbh_ps(a);
420 transmute(simd_select_bitmask(k, cvt.as_f32x8(), src.as_f32x8()))
421}
422
423#[inline]
429#[target_feature(enable = "avx512bf16,avx512vl")]
430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
431pub unsafe fn _mm256_maskz_cvtpbh_ps(k: __mmask8, a: __m128bh) -> __m256 {
432 let cvt = _mm256_cvtpbh_ps(a);
433 transmute(simd_select_bitmask(k, cvt.as_f32x8(), f32x8::ZERO))
434}
435
436#[inline]
441#[target_feature(enable = "avx512bf16,avx512vl")]
442#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
443pub unsafe fn _mm_cvtpbh_ps(a: __m128bh) -> __m128 {
444 _mm_castsi128_ps(_mm_slli_epi32::<16>(_mm_cvtepi16_epi32(transmute(a))))
445}
446
447#[inline]
453#[target_feature(enable = "avx512bf16,avx512vl")]
454#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
455pub unsafe fn _mm_mask_cvtpbh_ps(src: __m128, k: __mmask8, a: __m128bh) -> __m128 {
456 let cvt = _mm_cvtpbh_ps(a);
457 transmute(simd_select_bitmask(k, cvt.as_f32x4(), src.as_f32x4()))
458}
459
460#[inline]
466#[target_feature(enable = "avx512bf16,avx512vl")]
467#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
468pub unsafe fn _mm_maskz_cvtpbh_ps(k: __mmask8, a: __m128bh) -> __m128 {
469 let cvt = _mm_cvtpbh_ps(a);
470 transmute(simd_select_bitmask(k, cvt.as_f32x4(), f32x4::ZERO))
471}
472
473#[inline]
478#[target_feature(enable = "avx512bf16,avx512f")]
479#[unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")]
480pub unsafe fn _mm_cvtsbh_ss(a: bf16) -> f32 {
481 f32::from_bits((a.to_bits() as u32) << 16)
482}
483
484#[inline]
489#[target_feature(enable = "avx512bf16,avx512vl")]
490#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
491#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
492pub unsafe fn _mm_cvtneps_pbh(a: __m128) -> __m128bh {
493 let mut dst: __m128bh;
494 asm!(
495 "vcvtneps2bf16 {dst}, {src}",
496 dst = lateout(xmm_reg) dst,
497 src = in(xmm_reg) a,
498 options(pure, nomem, nostack, preserves_flags)
499 );
500 dst
501}
502
503#[inline]
509#[target_feature(enable = "avx512bf16,avx512vl")]
510#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
511#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
512pub unsafe fn _mm_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m128) -> __m128bh {
513 let mut dst = src;
514 asm!(
515 "vcvtneps2bf16 {dst}{{{k}}},{src}",
516 dst = inlateout(xmm_reg) dst,
517 src = in(xmm_reg) a,
518 k = in(kreg) k,
519 options(pure, nomem, nostack, preserves_flags)
520 );
521 dst
522}
523
524#[inline]
530#[target_feature(enable = "avx512bf16,avx512vl")]
531#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
532#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
533pub unsafe fn _mm_maskz_cvtneps_pbh(k: __mmask8, a: __m128) -> __m128bh {
534 let mut dst: __m128bh;
535 asm!(
536 "vcvtneps2bf16 {dst}{{{k}}}{{z}},{src}",
537 dst = lateout(xmm_reg) dst,
538 src = in(xmm_reg) a,
539 k = in(kreg) k,
540 options(pure, nomem, nostack, preserves_flags)
541 );
542 dst
543}
544
545#[inline]
550#[target_feature(enable = "avx512bf16,avx512vl")]
551#[unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")]
552pub unsafe fn _mm_cvtness_sbh(a: f32) -> bf16 {
553 let value: u16 = simd_extract!(_mm_cvtneps_pbh(_mm_set_ss(a)), 0);
554 bf16::from_bits(value)
555}
556
557#[cfg(test)]
558mod tests {
559 use crate::core_arch::simd::u16x4;
560 use crate::{
561 core_arch::x86::*,
562 mem::{transmute, transmute_copy},
563 };
564 use stdarch_test::simd_test;
565
566 #[simd_test(enable = "avx512bf16,avx512vl")]
567 unsafe fn test_mm_cvtne2ps_pbh() {
568 let a_array = [178.125_f32, 10.5_f32, 3.75_f32, 50.25_f32];
569 let b_array = [-178.125_f32, -10.5_f32, -3.75_f32, -50.25_f32];
570 let a: __m128 = transmute(a_array);
571 let b: __m128 = transmute(b_array);
572 let c: __m128bh = _mm_cvtne2ps_pbh(a, b);
573 let result: [u16; 8] = transmute(c.as_u16x8());
574 #[rustfmt::skip]
575 let expected_result: [u16; 8] = [
576 0b1_10000110_0110010,
577 0b1_10000010_0101000,
578 0b1_10000000_1110000,
579 0b1_10000100_1001001,
580 0b0_10000110_0110010,
581 0b0_10000010_0101000,
582 0b0_10000000_1110000,
583 0b0_10000100_1001001,
584 ];
585 assert_eq!(result, expected_result);
586 }
587
588 #[simd_test(enable = "avx512bf16,avx512vl")]
589 unsafe fn test_mm_mask_cvtne2ps_pbh() {
590 let a_array = [178.125_f32, 10.5_f32, 3.75_f32, 50.25_f32];
591 let b_array = [-178.125_f32, -10.5_f32, -3.75_f32, -50.25_f32];
592 #[rustfmt::skip]
593 let src_array: [u16; 8] = [
594 0b0_10000110_0110010,
595 0b0_10000010_0101000,
596 0b0_10000000_1110000,
597 0b0_10000100_1001001,
598 0b0_10000110_0110010,
599 0b0_10000010_0101000,
600 0b0_10000000_1110000,
601 0b0_10000100_1001001,
602 ];
603 let src: __m128bh = transmute(src_array);
604 let a: __m128 = transmute(a_array);
605 let b: __m128 = transmute(b_array);
606 let k: __mmask8 = 0b1111_1111;
607 let c: __m128bh = _mm_mask_cvtne2ps_pbh(src, k, a, b);
608 let result: [u16; 8] = transmute(c.as_u16x8());
609 #[rustfmt::skip]
610 let expected_result: [u16; 8] = [
611 0b1_10000110_0110010,
612 0b1_10000010_0101000,
613 0b1_10000000_1110000,
614 0b1_10000100_1001001,
615 0b0_10000110_0110010,
616 0b0_10000010_0101000,
617 0b0_10000000_1110000,
618 0b0_10000100_1001001,
619 ];
620 assert_eq!(result, expected_result);
621 let k = 0b0000_0000;
622 let c = _mm_mask_cvtne2ps_pbh(src, k, a, b);
623 let result: [u16; 8] = transmute(c.as_u16x8());
624 let expected_result = src_array;
625 assert_eq!(result, expected_result);
626 }
627
628 #[simd_test(enable = "avx512bf16,avx512vl")]
629 unsafe fn test_mm_maskz_cvtne2ps_pbh() {
630 let a_array = [178.125_f32, 10.5_f32, 3.75_f32, 50.25_f32];
631 let b_array = [-178.125_f32, -10.5_f32, -3.75_f32, -50.25_f32];
632 let a: __m128 = transmute(a_array);
633 let b: __m128 = transmute(b_array);
634 let k: __mmask8 = 0b1111_1111;
635 let c: __m128bh = _mm_maskz_cvtne2ps_pbh(k, a, b);
636 let result: [u16; 8] = transmute(c.as_u16x8());
637 #[rustfmt::skip]
638 let expected_result: [u16; 8] = [
639 0b1_10000110_0110010,
640 0b1_10000010_0101000,
641 0b1_10000000_1110000,
642 0b1_10000100_1001001,
643 0b0_10000110_0110010,
644 0b0_10000010_0101000,
645 0b0_10000000_1110000,
646 0b0_10000100_1001001,
647 ];
648 assert_eq!(result, expected_result);
649 let k = 0b0011_1100;
650 let c = _mm_maskz_cvtne2ps_pbh(k, a, b);
651 let result: [u16; 8] = transmute(c.as_u16x8());
652 #[rustfmt::skip]
653 let expected_result: [u16; 8] = [
654 0,
655 0,
656 0b1_10000000_1110000,
657 0b1_10000100_1001001,
658 0b0_10000110_0110010,
659 0b0_10000010_0101000,
660 0,
661 0,
662 ];
663 assert_eq!(result, expected_result);
664 }
665
666 #[simd_test(enable = "avx512bf16,avx512vl")]
667 unsafe fn test_mm256_cvtne2ps_pbh() {
668 #[rustfmt::skip]
669 let a_array = [
670 178.125_f32,
671 10.5_f32,
672 3.75_f32,
673 50.25_f32,
674 16.5_f32,
675 255.11_f32,
676 1000.158_f32,
677 575.575_f32,
678 ];
679 let b_array = [
680 -178.125_f32,
681 -10.5_f32,
682 -3.75_f32,
683 -50.25_f32,
684 -16.5_f32,
685 -255.11_f32,
686 -1000.158_f32,
687 -575.575_f32,
688 ];
689 let a: __m256 = transmute(a_array);
690 let b: __m256 = transmute(b_array);
691 let c: __m256bh = _mm256_cvtne2ps_pbh(a, b);
692 let result: [u16; 16] = transmute(c.as_u16x16());
693 #[rustfmt::skip]
694 let expected_result: [u16; 16] = [
695 0b1_10000110_0110010,
696 0b1_10000010_0101000,
697 0b1_10000000_1110000,
698 0b1_10000100_1001001,
699 0b1_10000011_0000100,
700 0b1_10000110_1111111,
701 0b1_10001000_1111010,
702 0b1_10001000_0010000,
703 0b0_10000110_0110010,
704 0b0_10000010_0101000,
705 0b0_10000000_1110000,
706 0b0_10000100_1001001,
707 0b0_10000011_0000100,
708 0b0_10000110_1111111,
709 0b0_10001000_1111010,
710 0b0_10001000_0010000,
711 ];
712 assert_eq!(result, expected_result);
713 }
714
715 #[simd_test(enable = "avx512bf16,avx512vl")]
716 unsafe fn test_mm256_mask_cvtne2ps_pbh() {
717 #[rustfmt::skip]
718 let a_array = [
719 178.125_f32,
720 10.5_f32,
721 3.75_f32,
722 50.25_f32,
723 16.5_f32,
724 255.11_f32,
725 1000.158_f32,
726 575.575_f32,
727 ];
728 let b_array = [
729 -178.125_f32,
730 -10.5_f32,
731 -3.75_f32,
732 -50.25_f32,
733 -16.5_f32,
734 -255.11_f32,
735 -1000.158_f32,
736 -575.575_f32,
737 ];
738 let src_array: [u16; 16] = [
739 0b0_10000110_0110010,
740 0b0_10000010_0101000,
741 0b0_10000000_1110000,
742 0b0_10000100_1001001,
743 0b0_10000110_0110010,
744 0b0_10000010_0101000,
745 0b0_10000000_1110000,
746 0b0_10000100_1001001,
747 0b0_10000110_0110010,
748 0b0_10000010_0101000,
749 0b0_10000000_1110000,
750 0b0_10000100_1001001,
751 0b0_10000110_0110010,
752 0b0_10000010_0101000,
753 0b0_10000000_1110000,
754 0b0_10000100_1001001,
755 ];
756 let src: __m256bh = transmute(src_array);
757 let a: __m256 = transmute(a_array);
758 let b: __m256 = transmute(b_array);
759 let k: __mmask16 = 0xffff;
760 let c: __m256bh = _mm256_mask_cvtne2ps_pbh(src, k, a, b);
761 let result: [u16; 16] = transmute(c.as_u16x16());
762 #[rustfmt::skip]
763 let expected_result: [u16; 16] = [
764 0b1_10000110_0110010,
765 0b1_10000010_0101000,
766 0b1_10000000_1110000,
767 0b1_10000100_1001001,
768 0b1_10000011_0000100,
769 0b1_10000110_1111111,
770 0b1_10001000_1111010,
771 0b1_10001000_0010000,
772 0b0_10000110_0110010,
773 0b0_10000010_0101000,
774 0b0_10000000_1110000,
775 0b0_10000100_1001001,
776 0b0_10000011_0000100,
777 0b0_10000110_1111111,
778 0b0_10001000_1111010,
779 0b0_10001000_0010000,
780 ];
781 assert_eq!(result, expected_result);
782 let k: __mmask16 = 0;
783 let c: __m256bh = _mm256_mask_cvtne2ps_pbh(src, k, a, b);
784 let result: [u16; 16] = transmute(c.as_u16x16());
785 let expected_result = src_array;
786 assert_eq!(result, expected_result);
787 }
788
789 #[simd_test(enable = "avx512bf16,avx512vl")]
790 unsafe fn test_mm256_maskz_cvtne2ps_pbh() {
791 #[rustfmt::skip]
792 let a_array = [
793 178.125_f32,
794 10.5_f32,
795 3.75_f32,
796 50.25_f32,
797 16.5_f32,
798 255.11_f32,
799 1000.158_f32,
800 575.575_f32,
801 ];
802 let b_array = [
803 -178.125_f32,
804 -10.5_f32,
805 -3.75_f32,
806 -50.25_f32,
807 -16.5_f32,
808 -255.11_f32,
809 -1000.158_f32,
810 -575.575_f32,
811 ];
812 let a: __m256 = transmute(a_array);
813 let b: __m256 = transmute(b_array);
814 let k: __mmask16 = 0xffff;
815 let c: __m256bh = _mm256_maskz_cvtne2ps_pbh(k, a, b);
816 let result: [u16; 16] = transmute(c.as_u16x16());
817 #[rustfmt::skip]
818 let expected_result: [u16; 16] = [
819 0b1_10000110_0110010,
820 0b1_10000010_0101000,
821 0b1_10000000_1110000,
822 0b1_10000100_1001001,
823 0b1_10000011_0000100,
824 0b1_10000110_1111111,
825 0b1_10001000_1111010,
826 0b1_10001000_0010000,
827 0b0_10000110_0110010,
828 0b0_10000010_0101000,
829 0b0_10000000_1110000,
830 0b0_10000100_1001001,
831 0b0_10000011_0000100,
832 0b0_10000110_1111111,
833 0b0_10001000_1111010,
834 0b0_10001000_0010000,
835 ];
836 assert_eq!(result, expected_result);
837 let k: __mmask16 = 0b0110_1100_0011_0110;
838 let c: __m256bh = _mm256_maskz_cvtne2ps_pbh(k, a, b);
839 let result: [u16; 16] = transmute(c.as_u16x16());
840 #[rustfmt::skip]
841 let expected_result: [u16; 16] = [
842 0,
843 0b1_10000010_0101000,
844 0b1_10000000_1110000,
845 0,
846 0b1_10000011_0000100,
847 0b1_10000110_1111111,
848 0,
849 0,
850 0,
851 0,
852 0b0_10000000_1110000,
853 0b0_10000100_1001001,
854 0,
855 0b0_10000110_1111111,
856 0b0_10001000_1111010,
857 0,
858 ];
859 assert_eq!(result, expected_result);
860 }
861
862 #[simd_test(enable = "avx512bf16,avx512f")]
863 unsafe fn test_mm512_cvtne2ps_pbh() {
864 #[rustfmt::skip]
865 let a_array = [
866 178.125_f32,
867 10.5_f32,
868 3.75_f32,
869 50.25_f32,
870 16.5_f32,
871 255.11_f32,
872 1000.158_f32,
873 575.575_f32,
874 178.125_f32,
875 10.5_f32,
876 3.75_f32,
877 50.25_f32,
878 16.5_f32,
879 255.11_f32,
880 1000.158_f32,
881 575.575_f32,
882 ];
883 let b_array = [
884 -178.125_f32,
885 -10.5_f32,
886 -3.75_f32,
887 -50.25_f32,
888 -16.5_f32,
889 -255.11_f32,
890 -1000.158_f32,
891 -575.575_f32,
892 -178.125_f32,
893 -10.5_f32,
894 -3.75_f32,
895 -50.25_f32,
896 -16.5_f32,
897 -255.11_f32,
898 -1000.158_f32,
899 -575.575_f32,
900 ];
901 let a: __m512 = transmute(a_array);
902 let b: __m512 = transmute(b_array);
903 let c: __m512bh = _mm512_cvtne2ps_pbh(a, b);
904 let result: [u16; 32] = transmute(c.as_u16x32());
905 #[rustfmt::skip]
906 let expected_result: [u16; 32] = [
907 0b1_10000110_0110010,
908 0b1_10000010_0101000,
909 0b1_10000000_1110000,
910 0b1_10000100_1001001,
911 0b1_10000011_0000100,
912 0b1_10000110_1111111,
913 0b1_10001000_1111010,
914 0b1_10001000_0010000,
915 0b1_10000110_0110010,
916 0b1_10000010_0101000,
917 0b1_10000000_1110000,
918 0b1_10000100_1001001,
919 0b1_10000011_0000100,
920 0b1_10000110_1111111,
921 0b1_10001000_1111010,
922 0b1_10001000_0010000,
923 0b0_10000110_0110010,
924 0b0_10000010_0101000,
925 0b0_10000000_1110000,
926 0b0_10000100_1001001,
927 0b0_10000011_0000100,
928 0b0_10000110_1111111,
929 0b0_10001000_1111010,
930 0b0_10001000_0010000,
931 0b0_10000110_0110010,
932 0b0_10000010_0101000,
933 0b0_10000000_1110000,
934 0b0_10000100_1001001,
935 0b0_10000011_0000100,
936 0b0_10000110_1111111,
937 0b0_10001000_1111010,
938 0b0_10001000_0010000,
939 ];
940 assert_eq!(result, expected_result);
941 }
942
943 #[simd_test(enable = "avx512bf16,avx512f")]
944 unsafe fn test_mm512_mask_cvtne2ps_pbh() {
945 #[rustfmt::skip]
946 let a_array = [
947 178.125_f32,
948 10.5_f32,
949 3.75_f32,
950 50.25_f32,
951 16.5_f32,
952 255.11_f32,
953 1000.158_f32,
954 575.575_f32,
955 178.125_f32,
956 10.5_f32,
957 3.75_f32,
958 50.25_f32,
959 16.5_f32,
960 255.11_f32,
961 1000.158_f32,
962 575.575_f32,
963 ];
964 let b_array = [
965 -178.125_f32,
966 -10.5_f32,
967 -3.75_f32,
968 -50.25_f32,
969 -16.5_f32,
970 -255.11_f32,
971 -1000.158_f32,
972 -575.575_f32,
973 -178.125_f32,
974 -10.5_f32,
975 -3.75_f32,
976 -50.25_f32,
977 -16.5_f32,
978 -255.11_f32,
979 -1000.158_f32,
980 -575.575_f32,
981 ];
982 let src_array: [u16; 32] = [
983 0b0_10000110_0110010,
984 0b0_10000010_0101000,
985 0b0_10000000_1110000,
986 0b0_10000100_1001001,
987 0b0_10000110_0110010,
988 0b0_10000010_0101000,
989 0b0_10000000_1110000,
990 0b0_10000100_1001001,
991 0b0_10000110_0110010,
992 0b0_10000010_0101000,
993 0b0_10000000_1110000,
994 0b0_10000100_1001001,
995 0b0_10000110_0110010,
996 0b0_10000010_0101000,
997 0b0_10000000_1110000,
998 0b0_10000100_1001001,
999 0b0_10000110_0110010,
1000 0b0_10000010_0101000,
1001 0b0_10000000_1110000,
1002 0b0_10000100_1001001,
1003 0b0_10000110_0110010,
1004 0b0_10000010_0101000,
1005 0b0_10000000_1110000,
1006 0b0_10000100_1001001,
1007 0b0_10000110_0110010,
1008 0b0_10000010_0101000,
1009 0b0_10000000_1110000,
1010 0b0_10000100_1001001,
1011 0b0_10000110_0110010,
1012 0b0_10000010_0101000,
1013 0b0_10000000_1110000,
1014 0b0_10000100_1001001,
1015 ];
1016 let src: __m512bh = transmute(src_array);
1017 let a: __m512 = transmute(a_array);
1018 let b: __m512 = transmute(b_array);
1019 let k: __mmask32 = 0xffffffff;
1020 let c: __m512bh = _mm512_mask_cvtne2ps_pbh(src, k, a, b);
1021 let result: [u16; 32] = transmute(c.as_u16x32());
1022 #[rustfmt::skip]
1023 let expected_result: [u16; 32] = [
1024 0b1_10000110_0110010,
1025 0b1_10000010_0101000,
1026 0b1_10000000_1110000,
1027 0b1_10000100_1001001,
1028 0b1_10000011_0000100,
1029 0b1_10000110_1111111,
1030 0b1_10001000_1111010,
1031 0b1_10001000_0010000,
1032 0b1_10000110_0110010,
1033 0b1_10000010_0101000,
1034 0b1_10000000_1110000,
1035 0b1_10000100_1001001,
1036 0b1_10000011_0000100,
1037 0b1_10000110_1111111,
1038 0b1_10001000_1111010,
1039 0b1_10001000_0010000,
1040 0b0_10000110_0110010,
1041 0b0_10000010_0101000,
1042 0b0_10000000_1110000,
1043 0b0_10000100_1001001,
1044 0b0_10000011_0000100,
1045 0b0_10000110_1111111,
1046 0b0_10001000_1111010,
1047 0b0_10001000_0010000,
1048 0b0_10000110_0110010,
1049 0b0_10000010_0101000,
1050 0b0_10000000_1110000,
1051 0b0_10000100_1001001,
1052 0b0_10000011_0000100,
1053 0b0_10000110_1111111,
1054 0b0_10001000_1111010,
1055 0b0_10001000_0010000,
1056 ];
1057 assert_eq!(result, expected_result);
1058 let k: __mmask32 = 0;
1059 let c: __m512bh = _mm512_mask_cvtne2ps_pbh(src, k, a, b);
1060 let result: [u16; 32] = transmute(c.as_u16x32());
1061 let expected_result = src_array;
1062 assert_eq!(result, expected_result);
1063 }
1064
1065 #[simd_test(enable = "avx512bf16,avx512f")]
1066 unsafe fn test_mm512_maskz_cvtne2ps_pbh() {
1067 #[rustfmt::skip]
1068 let a_array = [
1069 178.125_f32,
1070 10.5_f32,
1071 3.75_f32,
1072 50.25_f32,
1073 16.5_f32,
1074 255.11_f32,
1075 1000.158_f32,
1076 575.575_f32,
1077 178.125_f32,
1078 10.5_f32,
1079 3.75_f32,
1080 50.25_f32,
1081 16.5_f32,
1082 255.11_f32,
1083 1000.158_f32,
1084 575.575_f32,
1085 ];
1086 let b_array = [
1087 -178.125_f32,
1088 -10.5_f32,
1089 -3.75_f32,
1090 -50.25_f32,
1091 -16.5_f32,
1092 -255.11_f32,
1093 -1000.158_f32,
1094 -575.575_f32,
1095 -178.125_f32,
1096 -10.5_f32,
1097 -3.75_f32,
1098 -50.25_f32,
1099 -16.5_f32,
1100 -255.11_f32,
1101 -1000.158_f32,
1102 -575.575_f32,
1103 ];
1104 let a: __m512 = transmute(a_array);
1105 let b: __m512 = transmute(b_array);
1106 let k: __mmask32 = 0xffffffff;
1107 let c: __m512bh = _mm512_maskz_cvtne2ps_pbh(k, a, b);
1108 let result: [u16; 32] = transmute(c.as_u16x32());
1109 #[rustfmt::skip]
1110 let expected_result: [u16; 32] = [
1111 0b1_10000110_0110010,
1112 0b1_10000010_0101000,
1113 0b1_10000000_1110000,
1114 0b1_10000100_1001001,
1115 0b1_10000011_0000100,
1116 0b1_10000110_1111111,
1117 0b1_10001000_1111010,
1118 0b1_10001000_0010000,
1119 0b1_10000110_0110010,
1120 0b1_10000010_0101000,
1121 0b1_10000000_1110000,
1122 0b1_10000100_1001001,
1123 0b1_10000011_0000100,
1124 0b1_10000110_1111111,
1125 0b1_10001000_1111010,
1126 0b1_10001000_0010000,
1127 0b0_10000110_0110010,
1128 0b0_10000010_0101000,
1129 0b0_10000000_1110000,
1130 0b0_10000100_1001001,
1131 0b0_10000011_0000100,
1132 0b0_10000110_1111111,
1133 0b0_10001000_1111010,
1134 0b0_10001000_0010000,
1135 0b0_10000110_0110010,
1136 0b0_10000010_0101000,
1137 0b0_10000000_1110000,
1138 0b0_10000100_1001001,
1139 0b0_10000011_0000100,
1140 0b0_10000110_1111111,
1141 0b0_10001000_1111010,
1142 0b0_10001000_0010000,
1143 ];
1144 assert_eq!(result, expected_result);
1145 let k: __mmask32 = 0b1100_1010_1001_0110_1010_0011_0101_0110;
1146 let c: __m512bh = _mm512_maskz_cvtne2ps_pbh(k, a, b);
1147 let result: [u16; 32] = transmute(c.as_u16x32());
1148 #[rustfmt::skip]
1149 let expected_result: [u16; 32] = [
1150 0,
1151 0b1_10000010_0101000,
1152 0b1_10000000_1110000,
1153 0,
1154 0b1_10000011_0000100,
1155 0,
1156 0b1_10001000_1111010,
1157 0,
1158 0b1_10000110_0110010,
1159 0b1_10000010_0101000,
1160 0,
1161 0,
1162 0,
1163 0b1_10000110_1111111,
1164 0,
1165 0b1_10001000_0010000,
1166 0,
1167 0b0_10000010_0101000,
1168 0b0_10000000_1110000,
1169 0,
1170 0b0_10000011_0000100,
1171 0,
1172 0,
1173 0b0_10001000_0010000,
1174 0,
1175 0b0_10000010_0101000,
1176 0,
1177 0b0_10000100_1001001,
1178 0,
1179 0,
1180 0b0_10001000_1111010,
1181 0b0_10001000_0010000,
1182 ];
1183 assert_eq!(result, expected_result);
1184 }
1185
1186 #[simd_test(enable = "avx512bf16,avx512vl")]
1187 unsafe fn test_mm256_cvtneps_pbh() {
1188 #[rustfmt::skip]
1189 let a_array = [
1190 178.125_f32,
1191 10.5_f32,
1192 3.75_f32,
1193 50.25_f32,
1194 16.5_f32,
1195 255.11_f32,
1196 1000.158_f32,
1197 575.575_f32,
1198 ];
1199 let a: __m256 = transmute(a_array);
1200 let c: __m128bh = _mm256_cvtneps_pbh(a);
1201 let result: [u16; 8] = transmute(c.as_u16x8());
1202 #[rustfmt::skip]
1203 let expected_result: [u16; 8] = [
1204 0b0_10000110_0110010,
1205 0b0_10000010_0101000,
1206 0b0_10000000_1110000,
1207 0b0_10000100_1001001,
1208 0b0_10000011_0000100,
1209 0b0_10000110_1111111,
1210 0b0_10001000_1111010,
1211 0b0_10001000_0010000,
1212 ];
1213 assert_eq!(result, expected_result);
1214 }
1215
1216 #[simd_test(enable = "avx512bf16,avx512vl")]
1217 unsafe fn test_mm256_mask_cvtneps_pbh() {
1218 #[rustfmt::skip]
1219 let a_array = [
1220 178.125_f32,
1221 10.5_f32,
1222 3.75_f32,
1223 50.25_f32,
1224 16.5_f32,
1225 255.11_f32,
1226 1000.158_f32,
1227 575.575_f32,
1228 ];
1229 let src_array: [u16; 8] = [
1230 0b1_10000110_0110010,
1231 0b1_10000010_0101000,
1232 0b1_10000000_1110000,
1233 0b1_10000100_1001001,
1234 0b1_10000011_0000100,
1235 0b1_10000110_1111111,
1236 0b1_10001000_1111010,
1237 0b1_10001000_0010000,
1238 ];
1239 let src: __m128bh = transmute(src_array);
1240 let a: __m256 = transmute(a_array);
1241 let k: __mmask8 = 0xff;
1242 let b = _mm256_mask_cvtneps_pbh(src, k, a);
1243 let result: [u16; 8] = transmute(b.as_u16x8());
1244 #[rustfmt::skip]
1245 let expected_result: [u16; 8] = [
1246 0b0_10000110_0110010,
1247 0b0_10000010_0101000,
1248 0b0_10000000_1110000,
1249 0b0_10000100_1001001,
1250 0b0_10000011_0000100,
1251 0b0_10000110_1111111,
1252 0b0_10001000_1111010,
1253 0b0_10001000_0010000,
1254 ];
1255 assert_eq!(result, expected_result);
1256 let k: __mmask8 = 0x0;
1257 let b: __m128bh = _mm256_mask_cvtneps_pbh(src, k, a);
1258 let result: [u16; 8] = transmute(b.as_u16x8());
1259 let expected_result: [u16; 8] = src_array;
1260 assert_eq!(result, expected_result);
1261 }
1262
1263 #[simd_test(enable = "avx512bf16,avx512vl")]
1264 unsafe fn test_mm256_maskz_cvtneps_pbh() {
1265 #[rustfmt::skip]
1266 let a_array = [
1267 178.125_f32,
1268 10.5_f32,
1269 3.75_f32,
1270 50.25_f32,
1271 16.5_f32,
1272 255.11_f32,
1273 1000.158_f32,
1274 575.575_f32,
1275 ];
1276 let a: __m256 = transmute(a_array);
1277 let k: __mmask8 = 0xff;
1278 let b = _mm256_maskz_cvtneps_pbh(k, a);
1279 let result: [u16; 8] = transmute(b.as_u16x8());
1280 #[rustfmt::skip]
1281 let expected_result: [u16; 8] = [
1282 0b0_10000110_0110010,
1283 0b0_10000010_0101000,
1284 0b0_10000000_1110000,
1285 0b0_10000100_1001001,
1286 0b0_10000011_0000100,
1287 0b0_10000110_1111111,
1288 0b0_10001000_1111010,
1289 0b0_10001000_0010000,
1290 ];
1291 assert_eq!(result, expected_result);
1292 let k: __mmask8 = 0x6;
1293 let b: __m128bh = _mm256_maskz_cvtneps_pbh(k, a);
1294 let result: [u16; 8] = transmute(b.as_u16x8());
1295 let expected_result: [u16; 8] =
1296 [0, 0b0_10000010_0101000, 0b0_10000000_1110000, 0, 0, 0, 0, 0];
1297 assert_eq!(result, expected_result);
1298 }
1299
1300 #[simd_test(enable = "avx512bf16,avx512f")]
1301 unsafe fn test_mm512_cvtneps_pbh() {
1302 #[rustfmt::skip]
1303 let a_array = [
1304 178.125_f32,
1305 10.5_f32,
1306 3.75_f32,
1307 50.25_f32,
1308 16.5_f32,
1309 255.11_f32,
1310 1000.158_f32,
1311 575.575_f32,
1312 178.125_f32,
1313 10.5_f32,
1314 3.75_f32,
1315 50.25_f32,
1316 16.5_f32,
1317 255.11_f32,
1318 1000.158_f32,
1319 575.575_f32,
1320 ];
1321 let a: __m512 = transmute(a_array);
1322 let c: __m256bh = _mm512_cvtneps_pbh(a);
1323 let result: [u16; 16] = transmute(c.as_u16x16());
1324 #[rustfmt::skip]
1325 let expected_result: [u16; 16] = [
1326 0b0_10000110_0110010,
1327 0b0_10000010_0101000,
1328 0b0_10000000_1110000,
1329 0b0_10000100_1001001,
1330 0b0_10000011_0000100,
1331 0b0_10000110_1111111,
1332 0b0_10001000_1111010,
1333 0b0_10001000_0010000,
1334 0b0_10000110_0110010,
1335 0b0_10000010_0101000,
1336 0b0_10000000_1110000,
1337 0b0_10000100_1001001,
1338 0b0_10000011_0000100,
1339 0b0_10000110_1111111,
1340 0b0_10001000_1111010,
1341 0b0_10001000_0010000,
1342 ];
1343 assert_eq!(result, expected_result);
1344 }
1345
1346 #[simd_test(enable = "avx512bf16,avx512f")]
1347 unsafe fn test_mm512_mask_cvtneps_pbh() {
1348 #[rustfmt::skip]
1349 let a_array = [
1350 178.125_f32,
1351 10.5_f32,
1352 3.75_f32,
1353 50.25_f32,
1354 16.5_f32,
1355 255.11_f32,
1356 1000.158_f32,
1357 575.575_f32,
1358 178.125_f32,
1359 10.5_f32,
1360 3.75_f32,
1361 50.25_f32,
1362 16.5_f32,
1363 255.11_f32,
1364 1000.158_f32,
1365 575.575_f32,
1366 ];
1367 let src_array: [u16; 16] = [
1368 0b1_10000110_0110010,
1369 0b1_10000010_0101000,
1370 0b1_10000000_1110000,
1371 0b1_10000100_1001001,
1372 0b1_10000011_0000100,
1373 0b1_10000110_1111111,
1374 0b1_10001000_1111010,
1375 0b1_10001000_0010000,
1376 0b1_10000110_0110010,
1377 0b1_10000010_0101000,
1378 0b1_10000000_1110000,
1379 0b1_10000100_1001001,
1380 0b1_10000011_0000100,
1381 0b1_10000110_1111111,
1382 0b1_10001000_1111010,
1383 0b1_10001000_0010000,
1384 ];
1385 let src: __m256bh = transmute(src_array);
1386 let a: __m512 = transmute(a_array);
1387 let k: __mmask16 = 0xffff;
1388 let c: __m256bh = _mm512_mask_cvtneps_pbh(src, k, a);
1389 let result: [u16; 16] = transmute(c.as_u16x16());
1390 #[rustfmt::skip]
1391 let expected_result: [u16; 16] = [
1392 0b0_10000110_0110010,
1393 0b0_10000010_0101000,
1394 0b0_10000000_1110000,
1395 0b0_10000100_1001001,
1396 0b0_10000011_0000100,
1397 0b0_10000110_1111111,
1398 0b0_10001000_1111010,
1399 0b0_10001000_0010000,
1400 0b0_10000110_0110010,
1401 0b0_10000010_0101000,
1402 0b0_10000000_1110000,
1403 0b0_10000100_1001001,
1404 0b0_10000011_0000100,
1405 0b0_10000110_1111111,
1406 0b0_10001000_1111010,
1407 0b0_10001000_0010000,
1408 ];
1409 assert_eq!(result, expected_result);
1410 let k: __mmask16 = 0;
1411 let c: __m256bh = _mm512_mask_cvtneps_pbh(src, k, a);
1412 let result: [u16; 16] = transmute(c.as_u16x16());
1413 let expected_result = src_array;
1414 assert_eq!(result, expected_result);
1415 }
1416
1417 #[simd_test(enable = "avx512bf16,avx512f")]
1418 unsafe fn test_mm512_maskz_cvtneps_pbh() {
1419 #[rustfmt::skip]
1420 let a_array = [
1421 178.125_f32,
1422 10.5_f32,
1423 3.75_f32,
1424 50.25_f32,
1425 16.5_f32,
1426 255.11_f32,
1427 1000.158_f32,
1428 575.575_f32,
1429 178.125_f32,
1430 10.5_f32,
1431 3.75_f32,
1432 50.25_f32,
1433 16.5_f32,
1434 255.11_f32,
1435 1000.158_f32,
1436 575.575_f32,
1437 ];
1438 let a: __m512 = transmute(a_array);
1439 let k: __mmask16 = 0xffff;
1440 let c: __m256bh = _mm512_maskz_cvtneps_pbh(k, a);
1441 let result: [u16; 16] = transmute(c.as_u16x16());
1442 #[rustfmt::skip]
1443 let expected_result: [u16; 16] = [
1444 0b0_10000110_0110010,
1445 0b0_10000010_0101000,
1446 0b0_10000000_1110000,
1447 0b0_10000100_1001001,
1448 0b0_10000011_0000100,
1449 0b0_10000110_1111111,
1450 0b0_10001000_1111010,
1451 0b0_10001000_0010000,
1452 0b0_10000110_0110010,
1453 0b0_10000010_0101000,
1454 0b0_10000000_1110000,
1455 0b0_10000100_1001001,
1456 0b0_10000011_0000100,
1457 0b0_10000110_1111111,
1458 0b0_10001000_1111010,
1459 0b0_10001000_0010000,
1460 ];
1461 assert_eq!(result, expected_result);
1462 let k: __mmask16 = 0x653a;
1463 let c: __m256bh = _mm512_maskz_cvtneps_pbh(k, a);
1464 let result: [u16; 16] = transmute(c.as_u16x16());
1465 #[rustfmt::skip]
1466 let expected_result: [u16; 16] = [
1467 0,
1468 0b0_10000010_0101000,
1469 0,
1470 0b0_10000100_1001001,
1471 0b0_10000011_0000100,
1472 0b0_10000110_1111111,
1473 0,
1474 0,
1475 0b0_10000110_0110010,
1476 0,
1477 0b0_10000000_1110000,
1478 0,
1479 0,
1480 0b0_10000110_1111111,
1481 0b0_10001000_1111010,
1482 0,
1483 ];
1484 assert_eq!(result, expected_result);
1485 }
1486
1487 #[simd_test(enable = "avx512bf16,avx512vl")]
1488 unsafe fn test_mm_dpbf16_ps() {
1489 let a_array = [8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32];
1490 let b_array = [-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32];
1491 let a1: __m128 = transmute(a_array);
1492 let b1: __m128 = transmute(b_array);
1493 let src: __m128 = transmute([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]);
1494 let a: __m128bh = _mm_cvtne2ps_pbh(a1, a1);
1495 let b: __m128bh = _mm_cvtne2ps_pbh(b1, b1);
1496 let c: __m128 = _mm_dpbf16_ps(src, a, b);
1497 let result: [f32; 4] = transmute(c.as_f32x4());
1498 let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32];
1499 assert_eq!(result, expected_result);
1500 }
1501
1502 #[simd_test(enable = "avx512bf16,avx512vl")]
1503 unsafe fn test_mm_mask_dpbf16_ps() {
1504 let a_array = [8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32];
1505 let b_array = [-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32];
1506 let a1: __m128 = transmute(a_array);
1507 let b1: __m128 = transmute(b_array);
1508 let k: __mmask8 = 0xf3;
1509 let src: __m128 = transmute([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]);
1510 let a: __m128bh = _mm_cvtne2ps_pbh(a1, a1);
1511 let b: __m128bh = _mm_cvtne2ps_pbh(b1, b1);
1512 let c: __m128 = _mm_mask_dpbf16_ps(src, k, a, b);
1513 let result: [f32; 4] = transmute(c.as_f32x4());
1514 let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32];
1515 assert_eq!(result, expected_result);
1516 let k: __mmask8 = 0xff;
1517 let c: __m128 = _mm_mask_dpbf16_ps(src, k, a, b);
1518 let result: [f32; 4] = transmute(c.as_f32x4());
1519 let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32];
1520 assert_eq!(result, expected_result);
1521 let k: __mmask8 = 0;
1522 let c: __m128 = _mm_mask_dpbf16_ps(src, k, a, b);
1523 let result: [f32; 4] = transmute(c.as_f32x4());
1524 let expected_result: [f32; 4] = [1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32];
1525 assert_eq!(result, expected_result);
1526 }
1527
1528 #[simd_test(enable = "avx512bf16,avx512vl")]
1529 unsafe fn test_mm_maskz_dpbf16_ps() {
1530 let a_array = [8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32];
1531 let b_array = [-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32];
1532 let a1: __m128 = transmute(a_array);
1533 let b1: __m128 = transmute(b_array);
1534 let k: __mmask8 = 0xf3;
1535 let src: __m128 = transmute([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]);
1536 let a: __m128bh = _mm_cvtne2ps_pbh(a1, a1);
1537 let b: __m128bh = _mm_cvtne2ps_pbh(b1, b1);
1538 let c: __m128 = _mm_maskz_dpbf16_ps(k, src, a, b);
1539 let result: [f32; 4] = transmute(c.as_f32x4());
1540 let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, 0.0, 0.0];
1541 assert_eq!(result, expected_result);
1542 let k: __mmask8 = 0xff;
1543 let c: __m128 = _mm_maskz_dpbf16_ps(k, src, a, b);
1544 let result: [f32; 4] = transmute(c.as_f32x4());
1545 let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32];
1546 assert_eq!(result, expected_result);
1547 let k: __mmask8 = 0;
1548 let c: __m128 = _mm_maskz_dpbf16_ps(k, src, a, b);
1549 let result: [f32; 4] = transmute(c.as_f32x4());
1550 let expected_result: [f32; 4] = [0.0, 0.0, 0.0, 0.0];
1551 assert_eq!(result, expected_result);
1552 }
1553
1554 #[simd_test(enable = "avx512bf16,avx512vl")]
1555 unsafe fn test_mm256_dpbf16_ps() {
1556 #[rustfmt::skip]
1557 let a_array = [
1558 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
1559 ];
1560 let b_array = [
1561 -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
1562 ];
1563 let a1: __m256 = transmute(a_array);
1564 let b1: __m256 = transmute(b_array);
1565 #[rustfmt::skip]
1566 let src: __m256 = transmute([
1567 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
1568 ]);
1569 let a: __m256bh = _mm256_cvtne2ps_pbh(a1, a1);
1570 let b: __m256bh = _mm256_cvtne2ps_pbh(b1, b1);
1571 let c: __m256 = _mm256_dpbf16_ps(src, a, b);
1572 let result: [f32; 8] = transmute(c.as_f32x8());
1573 #[rustfmt::skip]
1574 let expected_result: [f32; 8] = [
1575 -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
1576 ];
1577 assert_eq!(result, expected_result);
1578 }
1579
1580 #[simd_test(enable = "avx512bf16,avx512vl")]
1581 unsafe fn test_mm256_mask_dpbf16_ps() {
1582 #[rustfmt::skip]
1583 let a_array = [
1584 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
1585 ];
1586 let b_array = [
1587 -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
1588 ];
1589 let a1: __m256 = transmute(a_array);
1590 let b1: __m256 = transmute(b_array);
1591 let k: __mmask8 = 0x33;
1592 #[rustfmt::skip]
1593 let src: __m256 = transmute([
1594 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
1595 ]);
1596 let a: __m256bh = _mm256_cvtne2ps_pbh(a1, a1);
1597 let b: __m256bh = _mm256_cvtne2ps_pbh(b1, b1);
1598 let c: __m256 = _mm256_mask_dpbf16_ps(src, k, a, b);
1599 let result: [f32; 8] = transmute(c.as_f32x8());
1600 #[rustfmt::skip]
1601 let expected_result: [f32; 8] = [
1602 -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32, -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32,
1603 ];
1604 assert_eq!(result, expected_result);
1605 let k: __mmask8 = 0xff;
1606 let c: __m256 = _mm256_mask_dpbf16_ps(src, k, a, b);
1607 let result: [f32; 8] = transmute(c.as_f32x8());
1608 #[rustfmt::skip]
1609 let expected_result: [f32; 8] = [
1610 -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
1611 ];
1612 assert_eq!(result, expected_result);
1613 let k: __mmask8 = 0;
1614 let c: __m256 = _mm256_mask_dpbf16_ps(src, k, a, b);
1615 let result: [f32; 8] = transmute(c.as_f32x8());
1616 #[rustfmt::skip]
1617 let expected_result: [f32; 8] = [
1618 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
1619 ];
1620 assert_eq!(result, expected_result);
1621 }
1622
1623 #[simd_test(enable = "avx512bf16,avx512vl")]
1624 unsafe fn test_mm256_maskz_dpbf16_ps() {
1625 #[rustfmt::skip]
1626 let a_array = [
1627 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
1628 ];
1629 let b_array = [
1630 -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
1631 ];
1632 let a1: __m256 = transmute(a_array);
1633 let b1: __m256 = transmute(b_array);
1634 let k: __mmask8 = 0x33;
1635 #[rustfmt::skip]
1636 let src: __m256 = transmute([
1637 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
1638 ]);
1639 let a: __m256bh = _mm256_cvtne2ps_pbh(a1, a1);
1640 let b: __m256bh = _mm256_cvtne2ps_pbh(b1, b1);
1641 let c: __m256 = _mm256_maskz_dpbf16_ps(k, src, a, b);
1642 let result: [f32; 8] = transmute(c.as_f32x8());
1643 #[rustfmt::skip]
1644 let expected_result: [f32; 8] = [
1645 -18.0_f32, -52.0_f32, 0.0, 0.0, -18.0_f32, -52.0_f32, 0.0, 0.0,
1646 ];
1647 assert_eq!(result, expected_result);
1648 let k: __mmask8 = 0xff;
1649 let c: __m256 = _mm256_maskz_dpbf16_ps(k, src, a, b);
1650 let result: [f32; 8] = transmute(c.as_f32x8());
1651 #[rustfmt::skip]
1652 let expected_result: [f32; 8] = [
1653 -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
1654 ];
1655 assert_eq!(result, expected_result);
1656 let k: __mmask8 = 0;
1657 let c: __m256 = _mm256_maskz_dpbf16_ps(k, src, a, b);
1658 let result: [f32; 8] = transmute(c.as_f32x8());
1659 let expected_result: [f32; 8] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0];
1660 assert_eq!(result, expected_result);
1661 }
1662
1663 #[simd_test(enable = "avx512bf16,avx512f")]
1664 unsafe fn test_mm512_dpbf16_ps() {
1665 #[rustfmt::skip]
1666 let a_array = [
1667 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
1668 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
1669 ];
1670 let b_array = [
1671 -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
1672 -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
1673 ];
1674 let a1: __m512 = transmute(a_array);
1675 let b1: __m512 = transmute(b_array);
1676 let src: __m512 = transmute([
1677 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32,
1678 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
1679 ]);
1680 let a: __m512bh = _mm512_cvtne2ps_pbh(a1, a1);
1681 let b: __m512bh = _mm512_cvtne2ps_pbh(b1, b1);
1682 let c: __m512 = _mm512_dpbf16_ps(src, a, b);
1683 let result: [f32; 16] = transmute(c.as_f32x16());
1684 #[rustfmt::skip]
1685 let expected_result: [f32; 16] = [
1686 -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
1687 -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
1688 ];
1689 assert_eq!(result, expected_result);
1690 }
1691
1692 #[simd_test(enable = "avx512bf16,avx512f")]
1693 unsafe fn test_mm512_mask_dpbf16_ps() {
1694 #[rustfmt::skip]
1695 let a_array = [
1696 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
1697 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
1698 ];
1699 let b_array = [
1700 -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
1701 -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
1702 ];
1703 let a1: __m512 = transmute(a_array);
1704 let b1: __m512 = transmute(b_array);
1705 let k: __mmask16 = 0x3333;
1706 #[rustfmt::skip]
1707 let src: __m512 = transmute([
1708 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32,
1709 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
1710 ]);
1711 let a: __m512bh = _mm512_cvtne2ps_pbh(a1, a1);
1712 let b: __m512bh = _mm512_cvtne2ps_pbh(b1, b1);
1713 let c: __m512 = _mm512_mask_dpbf16_ps(src, k, a, b);
1714 let result: [f32; 16] = transmute(c.as_f32x16());
1715 #[rustfmt::skip]
1716 let expected_result: [f32; 16] = [
1717 -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32, -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32,
1718 -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32, -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32,
1719 ];
1720 assert_eq!(result, expected_result);
1721 let k: __mmask16 = 0xffff;
1722 let c: __m512 = _mm512_mask_dpbf16_ps(src, k, a, b);
1723 let result: [f32; 16] = transmute(c.as_f32x16());
1724 #[rustfmt::skip]
1725 let expected_result: [f32; 16] = [
1726 -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
1727 -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
1728 ];
1729 assert_eq!(result, expected_result);
1730 let k: __mmask16 = 0;
1731 let c: __m512 = _mm512_mask_dpbf16_ps(src, k, a, b);
1732 let result: [f32; 16] = transmute(c.as_f32x16());
1733 #[rustfmt::skip]
1734 let expected_result: [f32; 16] = [
1735 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32,
1736 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
1737 ];
1738 assert_eq!(result, expected_result);
1739 }
1740
1741 #[simd_test(enable = "avx512bf16,avx512f")]
1742 unsafe fn test_mm512_maskz_dpbf16_ps() {
1743 #[rustfmt::skip]
1744 let a_array = [
1745 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
1746 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
1747 ];
1748 let b_array = [
1749 -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
1750 -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
1751 ];
1752 let a1: __m512 = transmute(a_array);
1753 let b1: __m512 = transmute(b_array);
1754 let k: __mmask16 = 0x3333;
1755 #[rustfmt::skip]
1756 let src: __m512 = transmute([
1757 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32,
1758 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
1759 ]);
1760 let a: __m512bh = _mm512_cvtne2ps_pbh(a1, a1);
1761 let b: __m512bh = _mm512_cvtne2ps_pbh(b1, b1);
1762 let c: __m512 = _mm512_maskz_dpbf16_ps(k, src, a, b);
1763 let result: [f32; 16] = transmute(c.as_f32x16());
1764 #[rustfmt::skip]
1765 let expected_result: [f32; 16] = [
1766 -18.0_f32, -52.0_f32, 0.0, 0.0, -18.0_f32, -52.0_f32, 0.0, 0.0, -18.0_f32, -52.0_f32,
1767 0.0, 0.0, -18.0_f32, -52.0_f32, 0.0, 0.0,
1768 ];
1769 assert_eq!(result, expected_result);
1770 let k: __mmask16 = 0xffff;
1771 let c: __m512 = _mm512_maskz_dpbf16_ps(k, src, a, b);
1772 let result: [f32; 16] = transmute(c.as_f32x16());
1773 #[rustfmt::skip]
1774 let expected_result: [f32; 16] = [
1775 -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
1776 -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
1777 ];
1778 assert_eq!(result, expected_result);
1779 let k: __mmask16 = 0;
1780 let c: __m512 = _mm512_maskz_dpbf16_ps(k, src, a, b);
1781 let result: [f32; 16] = transmute(c.as_f32x16());
1782 #[rustfmt::skip]
1783 let expected_result: [f32; 16] = [
1784 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
1785 ];
1786 assert_eq!(result, expected_result);
1787 }
1788
1789 const BF16_ONE: u16 = 0b0_01111111_0000000;
1790 const BF16_TWO: u16 = 0b0_10000000_0000000;
1791 const BF16_THREE: u16 = 0b0_10000000_1000000;
1792 const BF16_FOUR: u16 = 0b0_10000001_0000000;
1793 const BF16_FIVE: u16 = 0b0_10000001_0100000;
1794 const BF16_SIX: u16 = 0b0_10000001_1000000;
1795 const BF16_SEVEN: u16 = 0b0_10000001_1100000;
1796 const BF16_EIGHT: u16 = 0b0_10000010_0000000;
1797
1798 #[simd_test(enable = "avx512bf16")]
1799 unsafe fn test_mm512_cvtpbh_ps() {
1800 let a = __m256bh([
1801 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
1802 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
1803 ]);
1804 let r = _mm512_cvtpbh_ps(a);
1805 let e = _mm512_setr_ps(
1806 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
1807 );
1808 assert_eq_m512(r, e);
1809 }
1810
1811 #[simd_test(enable = "avx512bf16")]
1812 unsafe fn test_mm512_mask_cvtpbh_ps() {
1813 let a = __m256bh([
1814 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
1815 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
1816 ]);
1817 let src = _mm512_setr_ps(
1818 9., 10., 11., 12., 13., 14., 15., 16., 9., 10., 11., 12., 13., 14., 15., 16.,
1819 );
1820 let k = 0b1010_1010_1010_1010;
1821 let r = _mm512_mask_cvtpbh_ps(src, k, a);
1822 let e = _mm512_setr_ps(
1823 9., 2., 11., 4., 13., 6., 15., 8., 9., 2., 11., 4., 13., 6., 15., 8.,
1824 );
1825 assert_eq_m512(r, e);
1826 }
1827
1828 #[simd_test(enable = "avx512bf16")]
1829 unsafe fn test_mm512_maskz_cvtpbh_ps() {
1830 let a = __m256bh([
1831 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
1832 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
1833 ]);
1834 let k = 0b1010_1010_1010_1010;
1835 let r = _mm512_maskz_cvtpbh_ps(k, a);
1836 let e = _mm512_setr_ps(
1837 0., 2., 0., 4., 0., 6., 0., 8., 0., 2., 0., 4., 0., 6., 0., 8.,
1838 );
1839 assert_eq_m512(r, e);
1840 }
1841
1842 #[simd_test(enable = "avx512bf16,avx512vl")]
1843 unsafe fn test_mm256_cvtpbh_ps() {
1844 let a = __m128bh([
1845 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
1846 ]);
1847 let r = _mm256_cvtpbh_ps(a);
1848 let e = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
1849 assert_eq_m256(r, e);
1850 }
1851
1852 #[simd_test(enable = "avx512bf16,avx512vl")]
1853 unsafe fn test_mm256_mask_cvtpbh_ps() {
1854 let a = __m128bh([
1855 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
1856 ]);
1857 let src = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
1858 let k = 0b1010_1010;
1859 let r = _mm256_mask_cvtpbh_ps(src, k, a);
1860 let e = _mm256_setr_ps(9., 2., 11., 4., 13., 6., 15., 8.);
1861 assert_eq_m256(r, e);
1862 }
1863
1864 #[simd_test(enable = "avx512bf16,avx512vl")]
1865 unsafe fn test_mm256_maskz_cvtpbh_ps() {
1866 let a = __m128bh([
1867 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
1868 ]);
1869 let k = 0b1010_1010;
1870 let r = _mm256_maskz_cvtpbh_ps(k, a);
1871 let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.);
1872 assert_eq_m256(r, e);
1873 }
1874
1875 #[simd_test(enable = "avx512bf16,avx512vl")]
1876 unsafe fn test_mm_cvtpbh_ps() {
1877 let a = __m128bh([BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, 0, 0, 0, 0]);
1878 let r = _mm_cvtpbh_ps(a);
1879 let e = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
1880 assert_eq_m128(r, e);
1881 }
1882
1883 #[simd_test(enable = "avx512bf16,avx512vl")]
1884 unsafe fn test_mm_mask_cvtpbh_ps() {
1885 let a = __m128bh([BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, 0, 0, 0, 0]);
1886 let src = _mm_setr_ps(9., 10., 11., 12.);
1887 let k = 0b1010;
1888 let r = _mm_mask_cvtpbh_ps(src, k, a);
1889 let e = _mm_setr_ps(9., 2., 11., 4.);
1890 assert_eq_m128(r, e);
1891 }
1892
1893 #[simd_test(enable = "avx512bf16,avx512vl")]
1894 unsafe fn test_mm_maskz_cvtpbh_ps() {
1895 let a = __m128bh([BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, 0, 0, 0, 0]);
1896 let k = 0b1010;
1897 let r = _mm_maskz_cvtpbh_ps(k, a);
1898 let e = _mm_setr_ps(0., 2., 0., 4.);
1899 assert_eq_m128(r, e);
1900 }
1901
1902 #[simd_test(enable = "avx512bf16")]
1903 unsafe fn test_mm_cvtsbh_ss() {
1904 let r = _mm_cvtsbh_ss(bf16::from_bits(BF16_ONE));
1905 assert_eq!(r, 1.);
1906 }
1907
1908 #[simd_test(enable = "avx512bf16,avx512vl")]
1909 unsafe fn test_mm_cvtneps_pbh() {
1910 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
1911 let r: u16x4 = transmute_copy(&_mm_cvtneps_pbh(a));
1912 let e = u16x4::new(BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR);
1913 assert_eq!(r, e);
1914 }
1915
1916 #[simd_test(enable = "avx512bf16,avx512vl")]
1917 unsafe fn test_mm_mask_cvtneps_pbh() {
1918 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
1919 let src = __m128bh([5, 6, 7, 8, !0, !0, !0, !0]);
1920 let k = 0b1010;
1921 let r: u16x4 = transmute_copy(&_mm_mask_cvtneps_pbh(src, k, a));
1922 let e = u16x4::new(5, BF16_TWO, 7, BF16_FOUR);
1923 assert_eq!(r, e);
1924 }
1925
1926 #[simd_test(enable = "avx512bf16,avx512vl")]
1927 unsafe fn test_mm_maskz_cvtneps_pbh() {
1928 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
1929 let k = 0b1010;
1930 let r: u16x4 = transmute_copy(&_mm_maskz_cvtneps_pbh(k, a));
1931 let e = u16x4::new(0, BF16_TWO, 0, BF16_FOUR);
1932 assert_eq!(r, e);
1933 }
1934
1935 #[simd_test(enable = "avx512bf16,avx512vl")]
1936 unsafe fn test_mm_cvtness_sbh() {
1937 let r = _mm_cvtness_sbh(1.);
1938 assert_eq!(r.to_bits(), BF16_ONE);
1939 }
1940}