Add autocasts for bf16 and bf16xN

sayantn · sayantn · commit 2384f28c3784 · 2025-08-28T12:09:37.000+05:30
diff --git a/compiler/rustc_codegen_llvm/src/abi.rs b/compiler/rustc_codegen_llvm/src/abi.rs
@@ -376,6 +376,8 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
         }
 
         match self.type_kind(llvm_ty) {
+            TypeKind::BFloat => rust_ty == self.type_i16(),
+
             // Some LLVM intrinsics return **non-packed** structs, but they can't be mimicked from Rust
             // due to auto field-alignment in non-packed structs (packed structs are represented in LLVM
             // as, well, packed structs, so they won't match with those either)
@@ -393,11 +395,18 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
                     },
                 )
             }
-            TypeKind::Vector if self.element_type(llvm_ty) == self.type_i1() => {
+            TypeKind::Vector => {
                 let element_count = self.vector_length(llvm_ty) as u64;
-                let int_width = element_count.next_power_of_two().max(8);
+                let llvm_element_ty = self.element_type(llvm_ty);
 
-                rust_ty == self.type_ix(int_width)
+                if llvm_element_ty == self.type_bf16() {
+                    rust_ty == self.type_vector(self.type_i16(), element_count)
+                } else if llvm_element_ty == self.type_i1() {
+                    let int_width = element_count.next_power_of_two().max(8);
+                    rust_ty == self.type_ix(int_width)
+                } else {
+                    false
+                }
             }
             _ => false,
         }
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -1755,7 +1755,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
                     self.zext_i1_vector_to_int(val, src_ty, dest_ty)
                 }
             }
-            _ => unreachable!(),
+            _ => self.bitcast(val, dest_ty), // for `bf16(xN)` <-> `u16(xN)`
         }
     }
 
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -1101,6 +1101,9 @@ unsafe extern "C" {
     pub(crate) fn LLVMDoubleTypeInContext(C: &Context) -> &Type;
     pub(crate) fn LLVMFP128TypeInContext(C: &Context) -> &Type;
 
+    // Operations on non-IEEE real types
+    pub(crate) fn LLVMBFloatTypeInContext(C: &Context) -> &Type;
+
     // Operations on function types
     pub(crate) fn LLVMFunctionType<'a>(
         ReturnType: &'a Type,
diff --git a/compiler/rustc_codegen_llvm/src/type_.rs b/compiler/rustc_codegen_llvm/src/type_.rs
@@ -176,6 +176,10 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
             )
         }
     }
+
+    pub(crate) fn type_bf16(&self) -> &'ll Type {
+        unsafe { llvm::LLVMBFloatTypeInContext(self.llcx()) }
+    }
 }
 
 impl<'ll, CX: Borrow<SCx<'ll>>> BaseTypeCodegenMethods for GenericCx<'ll, CX> {
@@ -249,7 +253,7 @@ impl<'ll, CX: Borrow<SCx<'ll>>> BaseTypeCodegenMethods for GenericCx<'ll, CX> {
 
     fn float_width(&self, ty: &'ll Type) -> usize {
         match self.type_kind(ty) {
-            TypeKind::Half => 16,
+            TypeKind::Half | TypeKind::BFloat => 16,
             TypeKind::Float => 32,
             TypeKind::Double => 64,
             TypeKind::X86_FP80 => 80,
diff --git a/tests/codegen-llvm/inject-autocast.rs b/tests/codegen-llvm/inject-autocast.rs
@@ -4,7 +4,7 @@
 #![feature(link_llvm_intrinsics, abi_unadjusted, repr_simd, simd_ffi, portable_simd, f16)]
 #![crate_type = "lib"]
 
-use std::simd::i64x2;
+use std::simd::{f32x4, i16x8, i64x2};
 
 #[repr(simd)]
 pub struct Tile([i8; 1024]);
@@ -36,6 +36,19 @@ pub unsafe fn struct_with_i1_vector_autocast(a: i64x2, b: i64x2) -> (u8, u8) {
     foo(a, b)
 }
 
+// CHECK-LABEL: @bf16_vector_autocast
+#[no_mangle]
+pub unsafe fn bf16_vector_autocast(a: f32x4) -> i16x8 {
+    extern "unadjusted" {
+        #[link_name = "llvm.x86.vcvtneps2bf16128"]
+        fn foo(a: f32x4) -> i16x8;
+    }
+
+    // CHECK: %1 = call <8 x bfloat> @llvm.x86.vcvtneps2bf16128(<4 x float> %0)
+    // CHECK-NEXT: %2 = bitcast <8 x bfloat> %1 to <8 x i16>
+    foo(a)
+}
+
 // CHECK-LABEL: @struct_autocast
 #[no_mangle]
 pub unsafe fn struct_autocast(key_metadata: u32, key: i64x2) -> Bar {
@@ -77,6 +90,8 @@ pub unsafe fn i1_vector_autocast(a: f16x8) -> u8 {
 
 // CHECK: declare { <2 x i1>, <2 x i1> } @llvm.x86.avx512.vp2intersect.q.128(<2 x i64>, <2 x i64>)
 
+// CHECK: declare <8 x bfloat> @llvm.x86.vcvtneps2bf16128(<4 x float>)
+
 // CHECK: declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32, <2 x i64>)
 
 // CHECK: declare <8 x i1> @llvm.x86.avx512fp16.fpclass.ph.128(<8 x half>, i32 immarg)

Original file line number	Diff line number	Diff line change
`@@ -376,6 +376,8 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {`
`376`	`376`	`}`
`377`	`377`
`378`	`378`	`match self.type_kind(llvm_ty) {`
	`379`	`+ TypeKind::BFloat => rust_ty == self.type_i16(),`
	`380`	`+`
`379`	`381`	`// Some LLVM intrinsics return non-packed structs, but they can't be mimicked from Rust`
`380`	`382`	`// due to auto field-alignment in non-packed structs (packed structs are represented in LLVM`
`381`	`383`	`// as, well, packed structs, so they won't match with those either)`
`@@ -393,11 +395,18 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {`
`393`	`395`	`},`
`394`	`396`	`)`
`395`	`397`	`}`
`396`		`- TypeKind::Vector if self.element_type(llvm_ty) == self.type_i1() => {`
	`398`	`+ TypeKind::Vector => {`
`397`	`399`	`let element_count = self.vector_length(llvm_ty) as u64;`
`398`		`- let int_width = element_count.next_power_of_two().max(8);`
	`400`	`+ let llvm_element_ty = self.element_type(llvm_ty);`
`399`	`401`
`400`		`- rust_ty == self.type_ix(int_width)`
	`402`	`+ if llvm_element_ty == self.type_bf16() {`
	`403`	`+ rust_ty == self.type_vector(self.type_i16(), element_count)`
	`404`	`+ } else if llvm_element_ty == self.type_i1() {`
	`405`	`+ let int_width = element_count.next_power_of_two().max(8);`
	`406`	`+ rust_ty == self.type_ix(int_width)`
	`407`	`+ } else {`
	`408`	`+ false`
	`409`	`+ }`
`401`	`410`	`}`
`402`	`411`	`_ => false,`
`403`	`412`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1755,7 +1755,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {`
`1755`	`1755`	`self.zext_i1_vector_to_int(val, src_ty, dest_ty)`
`1756`	`1756`	`}`
`1757`	`1757`	`}`
`1758`		`- _ => unreachable!(),`
	`1758`	+ _ => self.bitcast(val, dest_ty), // for `bf16(xN)` <-> `u16(xN)`
`1759`	`1759`	`}`
`1760`	`1760`	`}`
`1761`	`1761`
Original file line number	Diff line number	Diff line change
`@@ -176,6 +176,10 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {`
`176`	`176`	`)`
`177`	`177`	`}`
`178`	`178`	`}`
	`179`	`+`
	`180`	`+ pub(crate) fn type_bf16(&self) -> &'ll Type {`
	`181`	`+ unsafe { llvm::LLVMBFloatTypeInContext(self.llcx()) }`
	`182`	`+ }`
`179`	`183`	`}`
`180`	`184`
`181`	`185`	`impl<'ll, CX: Borrow<SCx<'ll>>> BaseTypeCodegenMethods for GenericCx<'ll, CX> {`
`@@ -249,7 +253,7 @@ impl<'ll, CX: Borrow<SCx<'ll>>> BaseTypeCodegenMethods for GenericCx<'ll, CX> {`
`249`	`253`
`250`	`254`	`fn float_width(&self, ty: &'ll Type) -> usize {`
`251`	`255`	`match self.type_kind(ty) {`
`252`		`- TypeKind::Half => 16,`
	`256`	`+ TypeKind::Half \| TypeKind::BFloat => 16,`
`253`	`257`	`TypeKind::Float => 32,`
`254`	`258`	`TypeKind::Double => 64,`
`255`	`259`	`TypeKind::X86_FP80 => 80,`