From bff8cec56c96730a789bc97b214b54290ef54b46 Mon Sep 17 00:00:00 2001 From: Kajetan Puchalski Date: Fri, 14 Jun 2024 17:45:59 +0100 Subject: [PATCH 1/4] std_detect: Add aarch64/linux/LLVM features Add detection for various aarch64 CPU features already supported by LLVM and Linux. This commit adds feature detection for the following features: - FEAT_CSSC - FEAT_ECV - FEAT_FAMINMAX - FEAT_FLAGM2 - FEAT_FP8 - FEAT_FP8DOT2 - FEAT_FP8DOT4 - FEAT_FP8FMA - FEAT_HBC - FEAT_LSE128 - FEAT_LUT - FEAT_MOPS - FEAT_LRCPC3 - FEAT_SVE_B16B16 - FEAT_SVE2p1 - FEAT_WFxT It also adds feature detection for FEAT_FPMR. It is somewhat of a special case because FPMR only exists as a feature in LLVM 18, it has been removed from the LLVM upstream. On that account the intention is for it to be detectable at runtime through stdarch but not have a corresponding compile-time Rust target feature. Linux features: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h LLVM features: llvm-project/llvm/lib/Target/AArch64/AArch64.td --- crates/std_detect/src/detect/arch/aarch64.rs | 63 +++++++++- .../std_detect/src/detect/os/linux/aarch64.rs | 117 ++++++++++++++++-- crates/std_detect/tests/cpu-detection.rs | 18 +++ .../std_detect/tests/macro_trailing_commas.rs | 1 + 4 files changed, 190 insertions(+), 9 deletions(-) diff --git a/crates/std_detect/src/detect/arch/aarch64.rs b/crates/std_detect/src/detect/arch/aarch64.rs index 742c2bebd9..2b833599b5 100644 --- a/crates/std_detect/src/detect/arch/aarch64.rs +++ b/crates/std_detect/src/detect/arch/aarch64.rs @@ -22,22 +22,27 @@ features! { /// * `"crc"` - FEAT_CRC /// * `"lse"` - FEAT_LSE /// * `"lse2"` - FEAT_LSE2 + /// * `"lse128"` - FEAT_LSE128 /// * `"rdm"` - FEAT_RDM /// * `"rcpc"` - FEAT_LRCPC /// * `"rcpc2"` - FEAT_LRCPC2 + /// * `"rcpc3"` - FEAT_LRCPC3 /// * `"dotprod"` - FEAT_DotProd /// * `"tme"` - FEAT_TME /// * `"fhm"` - FEAT_FHM /// * `"dit"` - FEAT_DIT /// * `"flagm"` - FEAT_FLAGM + /// * `"flagm2"` - FEAT_FLAGM2 /// * `"ssbs"` - FEAT_SSBS & FEAT_SSBS2 /// * `"sb"` - FEAT_SB /// * `"paca"` - FEAT_PAuth (address authentication) /// * `"pacg"` - FEAT_Pauth (generic authentication) /// * `"dpb"` - FEAT_DPB /// * `"dpb2"` - FEAT_DPB2 + /// * `"sve-b16b16"` - FEAT_SVE_B16B16 /// * `"sve2"` - FEAT_SVE2 - /// * `"sve2-aes"` - FEAT_SVE2_AES + /// * `"sve2p1"` - FEAT_SVE2p1 + /// * `"sve2-aes"` - FEAT_SVE_AES & FEAT_SVE_PMULL128 (SVE2 AES crypto) /// * `"sve2-sm4"` - FEAT_SVE2_SM4 /// * `"sve2-sha3"` - FEAT_SVE2_SHA3 /// * `"sve2-bitperm"` - FEAT_SVE2_BitPerm @@ -55,6 +60,18 @@ features! { /// * `"sha2"` - FEAT_SHA1 & FEAT_SHA256 /// * `"sha3"` - FEAT_SHA512 & FEAT_SHA3 /// * `"sm4"` - FEAT_SM3 & FEAT_SM4 + /// * `"hbc"` - FEAT_HBC + /// * `"mops"` - FEAT_MOPS + /// * `"ecv"` - FEAT_ECV + /// * `"cssc"` - FEAT_CSSC + /// * `"fpmr"` - FEAT_FPMR + /// * `"lut"` - FEAT_LUT + /// * `"faminmax"` - FEAT_FAMINMAX + /// * `"fp8"` - FEAT_FP8 + /// * `"fp8fma"` - FEAT_FP8FMA + /// * `"fp8dot4"` - FEAT_FP8DOT4 + /// * `"fp8dot2"` - FEAT_FP8DOT2 + /// * `"wfxt"` - FEAT_WFxT /// /// [docs]: https://developer.arm.com/documentation/ddi0487/latest #[stable(feature = "simd_aarch64", since = "1.60.0")] @@ -67,6 +84,14 @@ features! { @NO_RUNTIME_DETECTION: "v8.5a"; @NO_RUNTIME_DETECTION: "v8.6a"; @NO_RUNTIME_DETECTION: "v8.7a"; + @NO_RUNTIME_DETECTION: "v8.8a"; + @NO_RUNTIME_DETECTION: "v8.9a"; + @NO_RUNTIME_DETECTION: "v9.1a"; + @NO_RUNTIME_DETECTION: "v9.2a"; + @NO_RUNTIME_DETECTION: "v9.3a"; + @NO_RUNTIME_DETECTION: "v9.4a"; + @NO_RUNTIME_DETECTION: "v9.5a"; + @NO_RUNTIME_DETECTION: "v9a"; @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] asimd: "neon"; /// FEAT_AdvSIMD (Advanced SIMD/NEON) @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] pmull: "pmull"; @@ -85,12 +110,16 @@ features! { /// FEAT_LSE (Large System Extension - atomics) @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] lse2: "lse2"; /// FEAT_LSE2 (unaligned and register-pair atomics) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] lse128: "lse128"; + /// FEAT_LSE128 (128-bit atomics) @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rdm: "rdm"; /// FEAT_RDM (Rounding Doubling Multiply - ASIMDRDM) @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rcpc: "rcpc"; /// FEAT_LRCPC (Release consistent Processor consistent) @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rcpc2: "rcpc2"; /// FEAT_LRCPC2 (RCPC with immediate offsets) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] rcpc3: "rcpc3"; + /// FEAT_LRCPC3 (RCPC Instructions v3) @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dotprod: "dotprod"; /// FEAT_DotProd (Vector Dot-Product - ASIMDDP) @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] tme: "tme"; @@ -101,6 +130,8 @@ features! { /// FEAT_DIT (Data Independent Timing instructions) @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] flagm: "flagm"; /// FEAT_FLAGM (flag manipulation instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] flagm2: "flagm2"; + /// FEAT_FLAGM2 (flag manipulation instructions) @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] ssbs: "ssbs"; /// FEAT_SSBS & FEAT_SSBS2 (speculative store bypass safe) @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sb: "sb"; @@ -115,14 +146,18 @@ features! { /// FEAT_DPB2 (aka dcpodp - data cache clean to point of deep persistence) @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2: "sve2"; /// FEAT_SVE2 (Scalable Vector Extension 2) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sve2p1: "sve2p1"; + /// FEAT_SVE2p1 (Scalable Vector Extension 2.1) @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_aes: "sve2-aes"; - /// FEAT_SVE_AES (SVE2 AES crypto) + /// FEAT_SVE_AES & FEAT_SVE_PMULL128 (SVE2 AES crypto) @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_sm4: "sve2-sm4"; /// FEAT_SVE_SM4 (SVE2 SM4 crypto) @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_sha3: "sve2-sha3"; /// FEAT_SVE_SHA3 (SVE2 SHA3 crypto) @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_bitperm: "sve2-bitperm"; /// FEAT_SVE_BitPerm (SVE2 bit permutation instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sve_b16b16: "sve-b16b16"; + /// FEAT_SVE_B16B16 (SVE or SME Instructions) @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] frintts: "frintts"; /// FEAT_FRINTTS (float to integer rounding instructions) @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] i8mm: "i8mm"; @@ -151,4 +186,28 @@ features! { /// FEAT_SHA512 & FEAT_SHA3 (SHA2-512 & SHA3 instructions) @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sm4: "sm4"; /// FEAT_SM3 & FEAT_SM4 (SM3 & SM4 instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] hbc: "hbc"; + /// FEAT_HBC (Hinted conditional branches) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] mops: "mops"; + /// FEAT_MOPS (Standardization of memory operations) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ecv: "ecv"; + /// FEAT_ECV (Enhanced Counter Virtualization) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] cssc: "cssc"; + /// FEAT_CSSC (Common Short Sequence Compression instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fpmr: "fpmr"; + /// FEAT_FPMR (Special-purpose AArch64-FPMR register) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] lut: "lut"; + /// FEAT_LUT (Lookup Table Instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] faminmax: "faminmax"; + /// FEAT_FAMINMAX (FAMIN and FAMAX SIMD/SVE/SME instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8: "fp8"; + /// FEAT_FP8 (F8CVT Instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8fma: "fp8fma"; + /// FEAT_FP8FMA (F8FMA Instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8dot4: "fp8dot4"; + /// FEAT_FP8DOT4 (F8DP4 Instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8dot2: "fp8dot2"; + /// FEAT_FP8DOT2 (F8DP2 Instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] wfxt: "wfxt"; + /// FEAT_WFxT (WFET and WFIT Instructions) } diff --git a/crates/std_detect/src/detect/os/linux/aarch64.rs b/crates/std_detect/src/detect/os/linux/aarch64.rs index 01ce47806c..e8053b644a 100644 --- a/crates/std_detect/src/detect/os/linux/aarch64.rs +++ b/crates/std_detect/src/detect/os/linux/aarch64.rs @@ -83,11 +83,11 @@ struct AtHwcap { dcpodp: bool, sve2: bool, sveaes: bool, - // svepmull: No LLVM support. + svepmull: bool, svebitperm: bool, svesha3: bool, svesm4: bool, - // flagm2: No LLVM support. + flagm2: bool, frint: bool, // svei8mm: See i8mm feature. svef32mm: bool, @@ -99,6 +99,31 @@ struct AtHwcap { rng: bool, bti: bool, mte: bool, + ecv: bool, + // afp: bool, + // rpres: bool, + // mte3: bool, + wfxt: bool, + // ebf16: bool, + // sveebf16: bool, + cssc: bool, + // rprfm: bool, + sve2p1: bool, + smeb16b16: bool, + mops: bool, + hbc: bool, + sveb16b16: bool, + lrcpc3: bool, + lse128: bool, + fpmr: bool, + lut: bool, + faminmax: bool, + f8cvt: bool, + f8fma: bool, + f8dp4: bool, + f8dp2: bool, + f8e4m3: bool, + f8e5m2: bool, } impl From for AtHwcap { @@ -137,14 +162,16 @@ impl From for AtHwcap { sb: bit::test(auxv.hwcap, 29), paca: bit::test(auxv.hwcap, 30), pacg: bit::test(auxv.hwcap, 31), + + // AT_HWCAP2 dcpodp: bit::test(auxv.hwcap2, 0), sve2: bit::test(auxv.hwcap2, 1), sveaes: bit::test(auxv.hwcap2, 2), - // svepmull: bit::test(auxv.hwcap2, 3), + svepmull: bit::test(auxv.hwcap2, 3), svebitperm: bit::test(auxv.hwcap2, 4), svesha3: bit::test(auxv.hwcap2, 5), svesm4: bit::test(auxv.hwcap2, 6), - // flagm2: bit::test(auxv.hwcap2, 7), + flagm2: bit::test(auxv.hwcap2, 7), frint: bit::test(auxv.hwcap2, 8), // svei8mm: bit::test(auxv.hwcap2, 9), svef32mm: bit::test(auxv.hwcap2, 10), @@ -156,6 +183,31 @@ impl From for AtHwcap { rng: bit::test(auxv.hwcap2, 16), bti: bit::test(auxv.hwcap2, 17), mte: bit::test(auxv.hwcap2, 18), + ecv: bit::test(auxv.hwcap2, 19), + // afp: bit::test(auxv.hwcap2, 20), + // rpres: bit::test(auxv.hwcap2, 21), + // mte3: bit::test(auxv.hwcap2, 22), + wfxt: bit::test(auxv.hwcap2, 31), + // ebf16: bit::test(auxv.hwcap2, 32), + // sveebf16: bit::test(auxv.hwcap2, 33), + cssc: bit::test(auxv.hwcap2, 34), + // rprfm: bit::test(auxv.hwcap2, 35), + sve2p1: bit::test(auxv.hwcap2, 36), + smeb16b16: bit::test(auxv.hwcap2, 41), + mops: bit::test(auxv.hwcap2, 43), + hbc: bit::test(auxv.hwcap2, 44), + sveb16b16: bit::test(auxv.hwcap2, 45), + lrcpc3: bit::test(auxv.hwcap2, 46), + lse128: bit::test(auxv.hwcap2, 47), + fpmr: bit::test(auxv.hwcap2, 48), + lut: bit::test(auxv.hwcap2, 49), + faminmax: bit::test(auxv.hwcap2, 50), + f8cvt: bit::test(auxv.hwcap2, 51), + f8fma: bit::test(auxv.hwcap2, 52), + f8dp4: bit::test(auxv.hwcap2, 53), + f8dp2: bit::test(auxv.hwcap2, 54), + f8e4m3: bit::test(auxv.hwcap2, 55), + f8e5m2: bit::test(auxv.hwcap2, 56), } } } @@ -201,14 +253,16 @@ impl From for AtHwcap { sb: f.has("sb"), paca: f.has("paca"), pacg: f.has("pacg"), + + // AT_HWCAP2 dcpodp: f.has("dcpodp"), sve2: f.has("sve2"), sveaes: f.has("sveaes"), - // svepmull: f.has("svepmull"), + svepmull: f.has("svepmull"), svebitperm: f.has("svebitperm"), svesha3: f.has("svesha3"), svesm4: f.has("svesm4"), - // flagm2: f.has("flagm2"), + flagm2: f.has("flagm2"), frint: f.has("frint"), // svei8mm: f.has("svei8mm"), svef32mm: f.has("svef32mm"), @@ -220,6 +274,31 @@ impl From for AtHwcap { rng: f.has("rng"), bti: f.has("bti"), mte: f.has("mte"), + ecv: f.has("ecv"), + // afp: f.has("afp"), + // rpres: f.has("rpres"), + // mte3: f.has("mte3"), + wfxt: f.has("wfxt"), + // ebf16: f.has("ebf16"), + // sveebf16: f.has("sveebf16"), + cssc: f.has("cssc"), + // rprfm: f.has("rprfm"), + sve2p1: f.has("sve2p1"), + smeb16b16: f.has("smeb16b16"), + mops: f.has("mops"), + hbc: f.has("hbc"), + sveb16b16: f.has("sveb16b16"), + lrcpc3: f.has("lrcpc3"), + lse128: f.has("lse128"), + fpmr: f.has("fpmr"), + lut: f.has("lut"), + faminmax: f.has("faminmax"), + f8cvt: f.has("f8cvt"), + f8fma: f.has("f8fma"), + f8dp4: f.has("f8dp4"), + f8dp2: f.has("f8dp2"), + f8e4m3: f.has("f8e4m3"), + f8e5m2: f.has("f8e5m2"), } } } @@ -267,11 +346,14 @@ impl AtHwcap { enable_feature(Feature::crc, self.crc32); enable_feature(Feature::lse, self.atomics); enable_feature(Feature::lse2, self.uscat); + enable_feature(Feature::lse128, self.lse128); enable_feature(Feature::rcpc, self.lrcpc); // RCPC2 (rcpc-immo in LLVM) requires RCPC support enable_feature(Feature::rcpc2, self.ilrcpc && self.lrcpc); + enable_feature(Feature::rcpc3, self.lrcpc3); enable_feature(Feature::dit, self.dit); enable_feature(Feature::flagm, self.flagm); + enable_feature(Feature::flagm2, self.flagm2); enable_feature(Feature::ssbs, self.ssbs); enable_feature(Feature::sb, self.sb); enable_feature(Feature::paca, self.paca); @@ -317,8 +399,12 @@ impl AtHwcap { // SVE2 requires SVE let sve2 = self.sve2 && self.sve && asimd; enable_feature(Feature::sve2, sve2); + enable_feature(Feature::sve2p1, self.sve2p1); // SVE2 extensions require SVE2 and crypto features - enable_feature(Feature::sve2_aes, self.sveaes && sve2 && self.aes); + enable_feature( + Feature::sve2_aes, + self.sveaes && self.svepmull && sve2 && self.aes, + ); enable_feature( Feature::sve2_sm4, self.svesm4 && sve2 && self.sm3 && self.sm4, @@ -328,6 +414,23 @@ impl AtHwcap { self.svesha3 && sve2 && self.sha512 && self.sha3 && self.sha1 && self.sha2, ); enable_feature(Feature::sve2_bitperm, self.svebitperm && self.sve2); + // SVE_B16B16 can be implemented either for SVE or SME + enable_feature( + Feature::sve_b16b16, + self.bf16 && (self.sveb16b16 || self.smeb16b16), + ); + enable_feature(Feature::hbc, self.hbc); + enable_feature(Feature::mops, self.mops); + enable_feature(Feature::ecv, self.ecv); + enable_feature(Feature::lut, self.lut); + enable_feature(Feature::cssc, self.cssc); + enable_feature(Feature::fpmr, self.fpmr); + enable_feature(Feature::faminmax, self.faminmax); + enable_feature(Feature::fp8, self.f8cvt); + enable_feature(Feature::fp8fma, self.f8fma); + enable_feature(Feature::fp8dot4, self.f8dp4); + enable_feature(Feature::fp8dot2, self.f8dp2); + enable_feature(Feature::wfxt, self.wfxt); } value } diff --git a/crates/std_detect/tests/cpu-detection.rs b/crates/std_detect/tests/cpu-detection.rs index 615268e876..b43449c7f6 100644 --- a/crates/std_detect/tests/cpu-detection.rs +++ b/crates/std_detect/tests/cpu-detection.rs @@ -1,6 +1,7 @@ #![allow(internal_features)] #![feature(stdarch_internal)] #![cfg_attr(target_arch = "arm", feature(stdarch_arm_feature_detection))] +#![cfg_attr(target_arch = "aarch64", feature(stdarch_aarch64_feature_detection))] #![cfg_attr(target_arch = "powerpc", feature(stdarch_powerpc_feature_detection))] #![cfg_attr(target_arch = "powerpc64", feature(stdarch_powerpc_feature_detection))] #![cfg_attr( @@ -67,21 +68,26 @@ fn aarch64_linux() { println!("crc: {}", is_aarch64_feature_detected!("crc")); println!("lse: {}", is_aarch64_feature_detected!("lse")); println!("lse2: {}", is_aarch64_feature_detected!("lse2")); + println!("lse128: {}", is_aarch64_feature_detected!("lse128")); println!("rdm: {}", is_aarch64_feature_detected!("rdm")); println!("rcpc: {}", is_aarch64_feature_detected!("rcpc")); println!("rcpc2: {}", is_aarch64_feature_detected!("rcpc2")); + println!("rcpc3: {}", is_aarch64_feature_detected!("rcpc3")); println!("dotprod: {}", is_aarch64_feature_detected!("dotprod")); println!("tme: {}", is_aarch64_feature_detected!("tme")); println!("fhm: {}", is_aarch64_feature_detected!("fhm")); println!("dit: {}", is_aarch64_feature_detected!("dit")); println!("flagm: {}", is_aarch64_feature_detected!("flagm")); + println!("flagm2: {}", is_aarch64_feature_detected!("flagm2")); println!("ssbs: {}", is_aarch64_feature_detected!("ssbs")); println!("sb: {}", is_aarch64_feature_detected!("sb")); println!("paca: {}", is_aarch64_feature_detected!("paca")); println!("pacg: {}", is_aarch64_feature_detected!("pacg")); println!("dpb: {}", is_aarch64_feature_detected!("dpb")); println!("dpb2: {}", is_aarch64_feature_detected!("dpb2")); + println!("sve-b16b16: {}", is_aarch64_feature_detected!("sve-b16b16")); println!("sve2: {}", is_aarch64_feature_detected!("sve2")); + println!("sve2p1: {}", is_aarch64_feature_detected!("sve2p1")); println!("sve2-aes: {}", is_aarch64_feature_detected!("sve2-aes")); println!("sve2-sm4: {}", is_aarch64_feature_detected!("sve2-sm4")); println!("sve2-sha3: {}", is_aarch64_feature_detected!("sve2-sha3")); @@ -103,6 +109,18 @@ fn aarch64_linux() { println!("sha2: {}", is_aarch64_feature_detected!("sha2")); println!("sha3: {}", is_aarch64_feature_detected!("sha3")); println!("sm4: {}", is_aarch64_feature_detected!("sm4")); + println!("hbc: {}", is_aarch64_feature_detected!("hbc")); + println!("mops: {}", is_aarch64_feature_detected!("mops")); + println!("ecv: {}", is_aarch64_feature_detected!("ecv")); + println!("cssc: {}", is_aarch64_feature_detected!("cssc")); + println!("fpmr: {}", is_aarch64_feature_detected!("fpmr")); + println!("lut: {}", is_aarch64_feature_detected!("lut")); + println!("faminmax: {}", is_aarch64_feature_detected!("faminmax")); + println!("fp8: {}", is_aarch64_feature_detected!("fp8")); + println!("fp8fma: {}", is_aarch64_feature_detected!("fp8fma")); + println!("fp8dot4: {}", is_aarch64_feature_detected!("fp8dot4")); + println!("fp8dot2: {}", is_aarch64_feature_detected!("fp8dot2")); + println!("wfxt: {}", is_aarch64_feature_detected!("wfxt")); } #[test] diff --git a/crates/std_detect/tests/macro_trailing_commas.rs b/crates/std_detect/tests/macro_trailing_commas.rs index 1d1ed4d386..9f6ef074d0 100644 --- a/crates/std_detect/tests/macro_trailing_commas.rs +++ b/crates/std_detect/tests/macro_trailing_commas.rs @@ -12,6 +12,7 @@ feature(stdarch_internal) )] #![cfg_attr(target_arch = "arm", feature(stdarch_arm_feature_detection))] +#![cfg_attr(target_arch = "aarch64", feature(stdarch_aarch64_feature_detection))] #![cfg_attr(target_arch = "powerpc", feature(stdarch_powerpc_feature_detection))] #![cfg_attr(target_arch = "powerpc64", feature(stdarch_powerpc_feature_detection))] #![allow(clippy::unwrap_used, clippy::use_debug, clippy::print_stdout)] From 60ad653d691eb17a5ddfc0ed54a526c769b8ab7e Mon Sep 17 00:00:00 2001 From: Kajetan Puchalski Date: Mon, 17 Jun 2024 14:25:16 +0100 Subject: [PATCH 2/4] std_detect: Add aarch64/linux/LLVM SME features Add detection for SME features supported by LLVM and the Linux Kernel. Include commented-out hwcap fields for features supported by Linux but not by LLVM. This commit adds feature detection for the following features: - FEAT_SME - FEAT_SME_F16F16 - FEAT_SME_F64F64 - FEAT_SME_F8F16 - FEAT_SME_F8F32 - FEAT_SME_FA64 - FEAT_SME_I16I64 - FEAT_SME_LUTv2 - FEAT_SME2 - FEAT_SME2p1 - FEAT_SSVE_FP8DOT2 - FEAT_SSVE_FP8DOT4 - FEAT_SSVE_FP8FMA Linux features: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h LLVM features: llvm-project/llvm/lib/Target/AArch64/AArch64.td --- crates/std_detect/src/detect/arch/aarch64.rs | 39 ++++++++++ .../std_detect/src/detect/os/linux/aarch64.rs | 76 +++++++++++++++++++ crates/std_detect/tests/cpu-detection.rs | 22 ++++++ 3 files changed, 137 insertions(+) diff --git a/crates/std_detect/src/detect/arch/aarch64.rs b/crates/std_detect/src/detect/arch/aarch64.rs index 2b833599b5..ad64bb3588 100644 --- a/crates/std_detect/src/detect/arch/aarch64.rs +++ b/crates/std_detect/src/detect/arch/aarch64.rs @@ -72,6 +72,19 @@ features! { /// * `"fp8dot4"` - FEAT_FP8DOT4 /// * `"fp8dot2"` - FEAT_FP8DOT2 /// * `"wfxt"` - FEAT_WFxT + /// * `"sme"` - FEAT_SME + /// * `"sme-i16i64"` - FEAT_SME_I16I64 + /// * `"sme-f64f64"` - FEAT_SME_F64F64 + /// * `"sme-fa64"` - FEAT_SME_FA64 + /// * `"sme2"` - FEAT_SME2 + /// * `"sme2p1"` - FEAT_SME2p1 + /// * `"sme-f16f16"` - FEAT_SME_F16F16 + /// * `"sme-lutv2"` - FEAT_SME_LUTv2 + /// * `"sme-f8f16"` - FEAT_SME_F8F16 + /// * `"sme-f8f32"` - FEAT_SME_F8F32 + /// * `"ssve-fp8fma"` - FEAT_SSVE_FP8FMA + /// * `"ssve-fp8dot4"` - FEAT_SSVE_FP8DOT4 + /// * `"ssve-fp8dot2"` - FEAT_SSVE_FP8DOT2 /// /// [docs]: https://developer.arm.com/documentation/ddi0487/latest #[stable(feature = "simd_aarch64", since = "1.60.0")] @@ -210,4 +223,30 @@ features! { /// FEAT_FP8DOT2 (F8DP2 Instructions) @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] wfxt: "wfxt"; /// FEAT_WFxT (WFET and WFIT Instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme: "sme"; + /// FEAT_SME (Scalable Matrix Extension) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_i16i64: "sme-i16i64"; + /// FEAT_SME_I16I64 (16-bit to 64-bit integer widening outer product instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f64f64: "sme-f64f64"; + /// FEAT_SME_F64F64 (Double-precision floating-point outer product instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_fa64: "sme-fa64"; + /// FEAT_SME_FA64 (Full A64 instruction set support in Streaming SVE mode) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme2: "sme2"; + /// FEAT_SME2 (SME Version 2) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme2p1: "sme2p1"; + /// FEAT_SME2p1 (SME Version 2.1) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f16f16: "sme-f16f16"; + /// FEAT_SME_F16F16 (Non-widening half-precision FP16 to FP16 arithmetic for SME2) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_lutv2: "sme-lutv2"; + /// FEAT_SME_LUTv2 (LUTI4 Instruction) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f8f16: "sme-f8f16"; + /// FEAT_SME_F8F16 + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f8f32: "sme-f8f32"; + /// FEAT_SME_F8F32 + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8fma: "ssve-fp8fma"; + /// FEAT_SSVE_FP8FMA + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8dot4: "ssve-fp8dot4"; + /// FEAT_SSVE_FP8DOT4 + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8dot2: "ssve-fp8dot2"; + /// FEAT_SSVE_FP8DOT2 } diff --git a/crates/std_detect/src/detect/os/linux/aarch64.rs b/crates/std_detect/src/detect/os/linux/aarch64.rs index e8053b644a..6209a4a344 100644 --- a/crates/std_detect/src/detect/os/linux/aarch64.rs +++ b/crates/std_detect/src/detect/os/linux/aarch64.rs @@ -103,13 +103,26 @@ struct AtHwcap { // afp: bool, // rpres: bool, // mte3: bool, + sme: bool, + smei16i64: bool, + smef64f64: bool, + // smei8i32: bool, + // smef16f32: bool, + // smeb16f32: bool, + // smef32f32: bool, + smefa64: bool, wfxt: bool, // ebf16: bool, // sveebf16: bool, cssc: bool, // rprfm: bool, sve2p1: bool, + sme2: bool, + sme2p1: bool, + // smei16i32: bool, + // smebi32i32: bool, smeb16b16: bool, + smef16f16: bool, mops: bool, hbc: bool, sveb16b16: bool, @@ -124,6 +137,12 @@ struct AtHwcap { f8dp2: bool, f8e4m3: bool, f8e5m2: bool, + smelutv2: bool, + smef8f16: bool, + smef8f32: bool, + smesf8fma: bool, + smesf8dp4: bool, + smesf8dp2: bool, } impl From for AtHwcap { @@ -187,13 +206,26 @@ impl From for AtHwcap { // afp: bit::test(auxv.hwcap2, 20), // rpres: bit::test(auxv.hwcap2, 21), // mte3: bit::test(auxv.hwcap2, 22), + sme: bit::test(auxv.hwcap2, 23), + smei16i64: bit::test(auxv.hwcap2, 24), + smef64f64: bit::test(auxv.hwcap2, 25), + // smei8i32: bit::test(auxv.hwcap2, 26), + // smef16f32: bit::test(auxv.hwcap2, 27), + // smeb16f32: bit::test(auxv.hwcap2, 28), + // smef32f32: bit::test(auxv.hwcap2, 29), + smefa64: bit::test(auxv.hwcap2, 30), wfxt: bit::test(auxv.hwcap2, 31), // ebf16: bit::test(auxv.hwcap2, 32), // sveebf16: bit::test(auxv.hwcap2, 33), cssc: bit::test(auxv.hwcap2, 34), // rprfm: bit::test(auxv.hwcap2, 35), sve2p1: bit::test(auxv.hwcap2, 36), + sme2: bit::test(auxv.hwcap2, 37), + sme2p1: bit::test(auxv.hwcap2, 38), + // smei16i32: bit::test(auxv.hwcap2, 39), + // smebi32i32: bit::test(auxv.hwcap2, 40), smeb16b16: bit::test(auxv.hwcap2, 41), + smef16f16: bit::test(auxv.hwcap2, 42), mops: bit::test(auxv.hwcap2, 43), hbc: bit::test(auxv.hwcap2, 44), sveb16b16: bit::test(auxv.hwcap2, 45), @@ -208,6 +240,12 @@ impl From for AtHwcap { f8dp2: bit::test(auxv.hwcap2, 54), f8e4m3: bit::test(auxv.hwcap2, 55), f8e5m2: bit::test(auxv.hwcap2, 56), + smelutv2: bit::test(auxv.hwcap2, 57), + smef8f16: bit::test(auxv.hwcap2, 58), + smef8f32: bit::test(auxv.hwcap2, 59), + smesf8fma: bit::test(auxv.hwcap2, 60), + smesf8dp4: bit::test(auxv.hwcap2, 61), + smesf8dp2: bit::test(auxv.hwcap2, 62), } } } @@ -278,13 +316,26 @@ impl From for AtHwcap { // afp: f.has("afp"), // rpres: f.has("rpres"), // mte3: f.has("mte3"), + sme: f.has("sme"), + smei16i64: f.has("smei16i64"), + smef64f64: f.has("smef64f64"), + // smei8i32: f.has("smei8i32"), + // smef16f32: f.has("smef16f32"), + // smeb16f32: f.has("smeb16f32"), + // smef32f32: f.has("smef32f32"), + smefa64: f.has("smefa64"), wfxt: f.has("wfxt"), // ebf16: f.has("ebf16"), // sveebf16: f.has("sveebf16"), cssc: f.has("cssc"), // rprfm: f.has("rprfm"), sve2p1: f.has("sve2p1"), + sme2: f.has("sme2"), + sme2p1: f.has("sme2p1"), + // smei16i32: f.has("smei16i32"), + // smebi32i32: f.has("smebi32i32"), smeb16b16: f.has("smeb16b16"), + smef16f16: f.has("smef16f16"), mops: f.has("mops"), hbc: f.has("hbc"), sveb16b16: f.has("sveb16b16"), @@ -299,6 +350,12 @@ impl From for AtHwcap { f8dp2: f.has("f8dp2"), f8e4m3: f.has("f8e4m3"), f8e5m2: f.has("f8e5m2"), + smelutv2: f.has("smelutv2"), + smef8f16: f.has("smef8f16"), + smef8f32: f.has("smef8f32"), + smesf8fma: f.has("smesf8fma"), + smesf8dp4: f.has("smesf8dp4"), + smesf8dp2: f.has("smesf8dp2"), } } } @@ -431,6 +488,25 @@ impl AtHwcap { enable_feature(Feature::fp8dot4, self.f8dp4); enable_feature(Feature::fp8dot2, self.f8dp2); enable_feature(Feature::wfxt, self.wfxt); + enable_feature(Feature::sme, self.sme && self.bf16); + enable_feature(Feature::sme_i16i64, self.smei16i64 && self.sme); + enable_feature(Feature::sme_f64f64, self.smef64f64 && self.sme); + // enable_feature(Feature::sme_i8i32, self.smei8i32); + // enable_feature(Feature::sme_f16f32, self.smef16f32); + // enable_feature(Feature::sme_b16f32, self.smeb16f32); + // enable_feature(Feature::sme_f32f32, self.smef32f32); + enable_feature(Feature::sme_fa64, self.smefa64 && self.sme && sve2); + enable_feature(Feature::sme2, self.sme2 && self.sme); + enable_feature(Feature::sme2p1, self.sme2p1 && self.sme2 && self.sme); + // enable_feature(Feature::sme_i16i32, self.smei16i32); + // enable_feature(Feature::sme_bi32i32, self.smebi32i32); + enable_feature(Feature::sme_f16f16, self.smef16f16); + enable_feature(Feature::sme_lutv2, self.smelutv2); + enable_feature(Feature::sme_f8f16, self.smef8f16 && self.sme2 && self.f8cvt); + enable_feature(Feature::sme_f8f32, self.smef8f32 && self.sme2 && self.f8cvt); + enable_feature(Feature::ssve_fp8fma, self.smesf8fma && self.sme2); + enable_feature(Feature::ssve_fp8dot4, self.smesf8dp4 && self.sme2); + enable_feature(Feature::ssve_fp8dot2, self.smesf8dp2 && self.sme2); } value } diff --git a/crates/std_detect/tests/cpu-detection.rs b/crates/std_detect/tests/cpu-detection.rs index b43449c7f6..cecc53afa4 100644 --- a/crates/std_detect/tests/cpu-detection.rs +++ b/crates/std_detect/tests/cpu-detection.rs @@ -121,6 +121,28 @@ fn aarch64_linux() { println!("fp8dot4: {}", is_aarch64_feature_detected!("fp8dot4")); println!("fp8dot2: {}", is_aarch64_feature_detected!("fp8dot2")); println!("wfxt: {}", is_aarch64_feature_detected!("wfxt")); + println!("sme: {}", is_aarch64_feature_detected!("sme")); + println!("sme-i16i64: {}", is_aarch64_feature_detected!("sme-i16i64")); + println!("sme-f64f64: {}", is_aarch64_feature_detected!("sme-f64f64")); + println!("sme-fa64: {}", is_aarch64_feature_detected!("sme-fa64")); + println!("sme2: {}", is_aarch64_feature_detected!("sme2")); + println!("sme2p1: {}", is_aarch64_feature_detected!("sme2p1")); + println!("sme-f16f16: {}", is_aarch64_feature_detected!("sme-f16f16")); + println!("sme-lutv2: {}", is_aarch64_feature_detected!("sme-lutv2")); + println!("sme-f8f16: {}", is_aarch64_feature_detected!("sme-f8f16")); + println!("sme-f8f32: {}", is_aarch64_feature_detected!("sme-f8f32")); + println!( + "ssve-fp8fma: {}", + is_aarch64_feature_detected!("ssve-fp8fma") + ); + println!( + "ssve-fp8dot4: {}", + is_aarch64_feature_detected!("ssve-fp8dot4") + ); + println!( + "ssve-fp8dot2: {}", + is_aarch64_feature_detected!("ssve-fp8dot2") + ); } #[test] From 817e02b1c6d1d8d5741eec60758b6f19732228c8 Mon Sep 17 00:00:00 2001 From: Kajetan Puchalski Date: Tue, 2 Jul 2024 17:30:53 +0100 Subject: [PATCH 3/4] std_detect: Sort aarch64 features Alphabetically sort the list of aarch64 features. The list was getting a bit too chaotic so it was worth properly sorting. --- crates/std_detect/src/detect/arch/aarch64.rs | 306 +++++++++---------- 1 file changed, 153 insertions(+), 153 deletions(-) diff --git a/crates/std_detect/src/detect/arch/aarch64.rs b/crates/std_detect/src/detect/arch/aarch64.rs index ad64bb3588..b75fe933d2 100644 --- a/crates/std_detect/src/detect/arch/aarch64.rs +++ b/crates/std_detect/src/detect/arch/aarch64.rs @@ -14,77 +14,77 @@ features! { /// /// ## Supported arguments /// + /// * `"aes"` - FEAT_AES & FEAT_PMULL /// * `"asimd"` or "neon" - FEAT_AdvSIMD - /// * `"pmull"` - FEAT_PMULL - /// * `"fp"` - FEAT_FP - /// * `"fp16"` - FEAT_FP16 - /// * `"sve"` - FEAT_SVE + /// * `"bf16"` - FEAT_BF16 + /// * `"bti"` - FEAT_BTI /// * `"crc"` - FEAT_CRC - /// * `"lse"` - FEAT_LSE - /// * `"lse2"` - FEAT_LSE2 - /// * `"lse128"` - FEAT_LSE128 - /// * `"rdm"` - FEAT_RDM - /// * `"rcpc"` - FEAT_LRCPC - /// * `"rcpc2"` - FEAT_LRCPC2 - /// * `"rcpc3"` - FEAT_LRCPC3 + /// * `"cssc"` - FEAT_CSSC + /// * `"dit"` - FEAT_DIT /// * `"dotprod"` - FEAT_DotProd - /// * `"tme"` - FEAT_TME + /// * `"dpb"` - FEAT_DPB + /// * `"dpb2"` - FEAT_DPB2 + /// * `"ecv"` - FEAT_ECV + /// * `"f32mm"` - FEAT_F32MM + /// * `"f64mm"` - FEAT_F64MM + /// * `"faminmax"` - FEAT_FAMINMAX + /// * `"fcma"` - FEAT_FCMA /// * `"fhm"` - FEAT_FHM - /// * `"dit"` - FEAT_DIT /// * `"flagm"` - FEAT_FLAGM /// * `"flagm2"` - FEAT_FLAGM2 - /// * `"ssbs"` - FEAT_SSBS & FEAT_SSBS2 - /// * `"sb"` - FEAT_SB - /// * `"paca"` - FEAT_PAuth (address authentication) - /// * `"pacg"` - FEAT_Pauth (generic authentication) - /// * `"dpb"` - FEAT_DPB - /// * `"dpb2"` - FEAT_DPB2 - /// * `"sve-b16b16"` - FEAT_SVE_B16B16 - /// * `"sve2"` - FEAT_SVE2 - /// * `"sve2p1"` - FEAT_SVE2p1 - /// * `"sve2-aes"` - FEAT_SVE_AES & FEAT_SVE_PMULL128 (SVE2 AES crypto) - /// * `"sve2-sm4"` - FEAT_SVE2_SM4 - /// * `"sve2-sha3"` - FEAT_SVE2_SHA3 - /// * `"sve2-bitperm"` - FEAT_SVE2_BitPerm + /// * `"fp"` - FEAT_FP + /// * `"fp16"` - FEAT_FP16 + /// * `"fp8"` - FEAT_FP8 + /// * `"fp8dot2"` - FEAT_FP8DOT2 + /// * `"fp8dot4"` - FEAT_FP8DOT4 + /// * `"fp8fma"` - FEAT_FP8FMA + /// * `"fpmr"` - FEAT_FPMR /// * `"frintts"` - FEAT_FRINTTS + /// * `"hbc"` - FEAT_HBC /// * `"i8mm"` - FEAT_I8MM - /// * `"f32mm"` - FEAT_F32MM - /// * `"f64mm"` - FEAT_F64MM - /// * `"bf16"` - FEAT_BF16 - /// * `"rand"` - FEAT_RNG - /// * `"bti"` - FEAT_BTI - /// * `"mte"` - FEAT_MTE & FEAT_MTE2 /// * `"jsconv"` - FEAT_JSCVT - /// * `"fcma"` - FEAT_FCMA - /// * `"aes"` - FEAT_AES & FEAT_PMULL + /// * `"lse"` - FEAT_LSE + /// * `"lse128"` - FEAT_LSE128 + /// * `"lse2"` - FEAT_LSE2 + /// * `"lut"` - FEAT_LUT + /// * `"mops"` - FEAT_MOPS + /// * `"mte"` - FEAT_MTE & FEAT_MTE2 + /// * `"paca"` - FEAT_PAuth (address authentication) + /// * `"pacg"` - FEAT_Pauth (generic authentication) + /// * `"pmull"` - FEAT_PMULL + /// * `"rand"` - FEAT_RNG + /// * `"rcpc"` - FEAT_LRCPC + /// * `"rcpc2"` - FEAT_LRCPC2 + /// * `"rcpc3"` - FEAT_LRCPC3 + /// * `"rdm"` - FEAT_RDM + /// * `"sb"` - FEAT_SB /// * `"sha2"` - FEAT_SHA1 & FEAT_SHA256 /// * `"sha3"` - FEAT_SHA512 & FEAT_SHA3 /// * `"sm4"` - FEAT_SM3 & FEAT_SM4 - /// * `"hbc"` - FEAT_HBC - /// * `"mops"` - FEAT_MOPS - /// * `"ecv"` - FEAT_ECV - /// * `"cssc"` - FEAT_CSSC - /// * `"fpmr"` - FEAT_FPMR - /// * `"lut"` - FEAT_LUT - /// * `"faminmax"` - FEAT_FAMINMAX - /// * `"fp8"` - FEAT_FP8 - /// * `"fp8fma"` - FEAT_FP8FMA - /// * `"fp8dot4"` - FEAT_FP8DOT4 - /// * `"fp8dot2"` - FEAT_FP8DOT2 - /// * `"wfxt"` - FEAT_WFxT /// * `"sme"` - FEAT_SME - /// * `"sme-i16i64"` - FEAT_SME_I16I64 + /// * `"sme-f16f16"` - FEAT_SME_F16F16 /// * `"sme-f64f64"` - FEAT_SME_F64F64 + /// * `"sme-f8f16"` - FEAT_SME_F8F16 + /// * `"sme-f8f32"` - FEAT_SME_F8F32 /// * `"sme-fa64"` - FEAT_SME_FA64 + /// * `"sme-i16i64"` - FEAT_SME_I16I64 + /// * `"sme-lutv2"` - FEAT_SME_LUTv2 /// * `"sme2"` - FEAT_SME2 /// * `"sme2p1"` - FEAT_SME2p1 - /// * `"sme-f16f16"` - FEAT_SME_F16F16 - /// * `"sme-lutv2"` - FEAT_SME_LUTv2 - /// * `"sme-f8f16"` - FEAT_SME_F8F16 - /// * `"sme-f8f32"` - FEAT_SME_F8F32 - /// * `"ssve-fp8fma"` - FEAT_SSVE_FP8FMA - /// * `"ssve-fp8dot4"` - FEAT_SSVE_FP8DOT4 + /// * `"ssbs"` - FEAT_SSBS & FEAT_SSBS2 /// * `"ssve-fp8dot2"` - FEAT_SSVE_FP8DOT2 + /// * `"ssve-fp8dot4"` - FEAT_SSVE_FP8DOT4 + /// * `"ssve-fp8fma"` - FEAT_SSVE_FP8FMA + /// * `"sve"` - FEAT_SVE + /// * `"sve-b16b16"` - FEAT_SVE_B16B16 + /// * `"sve2"` - FEAT_SVE2 + /// * `"sve2-aes"` - FEAT_SVE_AES & FEAT_SVE_PMULL128 (SVE2 AES crypto) + /// * `"sve2-bitperm"` - FEAT_SVE2_BitPerm + /// * `"sve2-sha3"` - FEAT_SVE2_SHA3 + /// * `"sve2-sm4"` - FEAT_SVE2_SM4 + /// * `"sve2p1"` - FEAT_SVE2p1 + /// * `"tme"` - FEAT_TME + /// * `"wfxt"` - FEAT_WFxT /// /// [docs]: https://developer.arm.com/documentation/ddi0487/latest #[stable(feature = "simd_aarch64", since = "1.60.0")] @@ -113,140 +113,140 @@ features! { @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fp: "fp"; implied by target_features: ["neon"]; /// FEAT_FP (Floating point support) - Implied by `neon` target_feature - @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fp16: "fp16"; - /// FEAT_FP16 (Half-float support) - @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve: "sve"; - /// FEAT_SVE (Scalable Vector Extension) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] aes: "aes"; + /// FEAT_AES (AES SIMD instructions) & FEAT_PMULL (PMULL{2}, 64-bit operand variants) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] bf16: "bf16"; + /// FEAT_BF16 (BFloat16 type, plus MM instructions, plus ASIMD support) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] bti: "bti"; + /// FEAT_BTI (Branch Target Identification) @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] crc: "crc"; /// FEAT_CRC32 (Cyclic Redundancy Check) - @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] lse: "lse"; - /// FEAT_LSE (Large System Extension - atomics) - @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] lse2: "lse2"; - /// FEAT_LSE2 (unaligned and register-pair atomics) - @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] lse128: "lse128"; - /// FEAT_LSE128 (128-bit atomics) - @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rdm: "rdm"; - /// FEAT_RDM (Rounding Doubling Multiply - ASIMDRDM) - @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rcpc: "rcpc"; - /// FEAT_LRCPC (Release consistent Processor consistent) - @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rcpc2: "rcpc2"; - /// FEAT_LRCPC2 (RCPC with immediate offsets) - @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] rcpc3: "rcpc3"; - /// FEAT_LRCPC3 (RCPC Instructions v3) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] cssc: "cssc"; + /// FEAT_CSSC (Common Short Sequence Compression instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dit: "dit"; + /// FEAT_DIT (Data Independent Timing instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dpb: "dpb"; + /// FEAT_DPB (aka dcpop - data cache clean to point of persistence) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dpb2: "dpb2"; + /// FEAT_DPB2 (aka dcpodp - data cache clean to point of deep persistence) @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dotprod: "dotprod"; /// FEAT_DotProd (Vector Dot-Product - ASIMDDP) - @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] tme: "tme"; - /// FEAT_TME (Transactional Memory Extensions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ecv: "ecv"; + /// FEAT_ECV (Enhanced Counter Virtualization) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] f32mm: "f32mm"; + /// FEAT_F32MM (single-precision matrix multiplication) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] f64mm: "f64mm"; + /// FEAT_F64MM (double-precision matrix multiplication) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] faminmax: "faminmax"; + /// FEAT_FAMINMAX (FAMIN and FAMAX SIMD/SVE/SME instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fcma: "fcma"; + /// FEAT_FCMA (float complex number operations) @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fhm: "fhm"; /// FEAT_FHM (fp16 multiplication instructions) - @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dit: "dit"; - /// FEAT_DIT (Data Independent Timing instructions) @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] flagm: "flagm"; /// FEAT_FLAGM (flag manipulation instructions) @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] flagm2: "flagm2"; /// FEAT_FLAGM2 (flag manipulation instructions) - @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] ssbs: "ssbs"; - /// FEAT_SSBS & FEAT_SSBS2 (speculative store bypass safe) - @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sb: "sb"; - /// FEAT_SB (speculation barrier) - @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] paca: "paca"; - /// FEAT_PAuth (address authentication) - @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] pacg: "pacg"; - /// FEAT_PAuth (generic authentication) - @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dpb: "dpb"; - /// FEAT_DPB (aka dcpop - data cache clean to point of persistence) - @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dpb2: "dpb2"; - /// FEAT_DPB2 (aka dcpodp - data cache clean to point of deep persistence) - @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2: "sve2"; - /// FEAT_SVE2 (Scalable Vector Extension 2) - @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sve2p1: "sve2p1"; - /// FEAT_SVE2p1 (Scalable Vector Extension 2.1) - @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_aes: "sve2-aes"; - /// FEAT_SVE_AES & FEAT_SVE_PMULL128 (SVE2 AES crypto) - @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_sm4: "sve2-sm4"; - /// FEAT_SVE_SM4 (SVE2 SM4 crypto) - @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_sha3: "sve2-sha3"; - /// FEAT_SVE_SHA3 (SVE2 SHA3 crypto) - @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_bitperm: "sve2-bitperm"; - /// FEAT_SVE_BitPerm (SVE2 bit permutation instructions) - @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sve_b16b16: "sve-b16b16"; - /// FEAT_SVE_B16B16 (SVE or SME Instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fp16: "fp16"; + /// FEAT_FP16 (Half-float support) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8: "fp8"; + /// FEAT_FP8 (F8CVT Instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8dot2: "fp8dot2"; + /// FEAT_FP8DOT2 (F8DP2 Instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8dot4: "fp8dot4"; + /// FEAT_FP8DOT4 (F8DP4 Instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8fma: "fp8fma"; + /// FEAT_FP8FMA (F8FMA Instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fpmr: "fpmr"; + /// FEAT_FPMR (Special-purpose AArch64-FPMR register) @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] frintts: "frintts"; /// FEAT_FRINTTS (float to integer rounding instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] hbc: "hbc"; + /// FEAT_HBC (Hinted conditional branches) @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] i8mm: "i8mm"; /// FEAT_I8MM (integer matrix multiplication, plus ASIMD support) - @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] f32mm: "f32mm"; - /// FEAT_F32MM (single-precision matrix multiplication) - @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] f64mm: "f64mm"; - /// FEAT_F64MM (double-precision matrix multiplication) - @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] bf16: "bf16"; - /// FEAT_BF16 (BFloat16 type, plus MM instructions, plus ASIMD support) - @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rand: "rand"; - /// FEAT_RNG (Random Number Generator) - @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] bti: "bti"; - /// FEAT_BTI (Branch Target Identification) - @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] mte: "mte"; - /// FEAT_MTE & FEAT_MTE2 (Memory Tagging Extension) @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] jsconv: "jsconv"; /// FEAT_JSCVT (JavaScript float conversion instructions) - @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fcma: "fcma"; - /// FEAT_FCMA (float complex number operations) - @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] aes: "aes"; - /// FEAT_AES (AES SIMD instructions) & FEAT_PMULL (PMULL{2}, 64-bit operand variants) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] lse: "lse"; + /// FEAT_LSE (Large System Extension - atomics) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] lse128: "lse128"; + /// FEAT_LSE128 (128-bit atomics) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] lse2: "lse2"; + /// FEAT_LSE2 (unaligned and register-pair atomics) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] lut: "lut"; + /// FEAT_LUT (Lookup Table Instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] mops: "mops"; + /// FEAT_MOPS (Standardization of memory operations) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] mte: "mte"; + /// FEAT_MTE & FEAT_MTE2 (Memory Tagging Extension) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] paca: "paca"; + /// FEAT_PAuth (address authentication) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] pacg: "pacg"; + /// FEAT_PAuth (generic authentication) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rand: "rand"; + /// FEAT_RNG (Random Number Generator) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rcpc: "rcpc"; + /// FEAT_LRCPC (Release consistent Processor consistent) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rcpc2: "rcpc2"; + /// FEAT_LRCPC2 (RCPC with immediate offsets) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] rcpc3: "rcpc3"; + /// FEAT_LRCPC3 (RCPC Instructions v3) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rdm: "rdm"; + /// FEAT_RDM (Rounding Doubling Multiply - ASIMDRDM) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sb: "sb"; + /// FEAT_SB (speculation barrier) @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sha2: "sha2"; /// FEAT_SHA1 & FEAT_SHA256 (SHA1 & SHA2-256 instructions) @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sha3: "sha3"; /// FEAT_SHA512 & FEAT_SHA3 (SHA2-512 & SHA3 instructions) @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sm4: "sm4"; /// FEAT_SM3 & FEAT_SM4 (SM3 & SM4 instructions) - @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] hbc: "hbc"; - /// FEAT_HBC (Hinted conditional branches) - @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] mops: "mops"; - /// FEAT_MOPS (Standardization of memory operations) - @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ecv: "ecv"; - /// FEAT_ECV (Enhanced Counter Virtualization) - @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] cssc: "cssc"; - /// FEAT_CSSC (Common Short Sequence Compression instructions) - @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fpmr: "fpmr"; - /// FEAT_FPMR (Special-purpose AArch64-FPMR register) - @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] lut: "lut"; - /// FEAT_LUT (Lookup Table Instructions) - @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] faminmax: "faminmax"; - /// FEAT_FAMINMAX (FAMIN and FAMAX SIMD/SVE/SME instructions) - @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8: "fp8"; - /// FEAT_FP8 (F8CVT Instructions) - @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8fma: "fp8fma"; - /// FEAT_FP8FMA (F8FMA Instructions) - @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8dot4: "fp8dot4"; - /// FEAT_FP8DOT4 (F8DP4 Instructions) - @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8dot2: "fp8dot2"; - /// FEAT_FP8DOT2 (F8DP2 Instructions) - @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] wfxt: "wfxt"; - /// FEAT_WFxT (WFET and WFIT Instructions) @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme: "sme"; /// FEAT_SME (Scalable Matrix Extension) - @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_i16i64: "sme-i16i64"; - /// FEAT_SME_I16I64 (16-bit to 64-bit integer widening outer product instructions) - @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f64f64: "sme-f64f64"; - /// FEAT_SME_F64F64 (Double-precision floating-point outer product instructions) - @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_fa64: "sme-fa64"; - /// FEAT_SME_FA64 (Full A64 instruction set support in Streaming SVE mode) @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme2: "sme2"; /// FEAT_SME2 (SME Version 2) @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme2p1: "sme2p1"; /// FEAT_SME2p1 (SME Version 2.1) @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f16f16: "sme-f16f16"; /// FEAT_SME_F16F16 (Non-widening half-precision FP16 to FP16 arithmetic for SME2) - @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_lutv2: "sme-lutv2"; - /// FEAT_SME_LUTv2 (LUTI4 Instruction) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f64f64: "sme-f64f64"; + /// FEAT_SME_F64F64 (Double-precision floating-point outer product instructions) @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f8f16: "sme-f8f16"; /// FEAT_SME_F8F16 @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f8f32: "sme-f8f32"; /// FEAT_SME_F8F32 - @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8fma: "ssve-fp8fma"; - /// FEAT_SSVE_FP8FMA - @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8dot4: "ssve-fp8dot4"; - /// FEAT_SSVE_FP8DOT4 + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_fa64: "sme-fa64"; + /// FEAT_SME_FA64 (Full A64 instruction set support in Streaming SVE mode) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_i16i64: "sme-i16i64"; + /// FEAT_SME_I16I64 (16-bit to 64-bit integer widening outer product instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_lutv2: "sme-lutv2"; + /// FEAT_SME_LUTv2 (LUTI4 Instruction) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] ssbs: "ssbs"; + /// FEAT_SSBS & FEAT_SSBS2 (speculative store bypass safe) @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8dot2: "ssve-fp8dot2"; /// FEAT_SSVE_FP8DOT2 + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8dot4: "ssve-fp8dot4"; + /// FEAT_SSVE_FP8DOT4 + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8fma: "ssve-fp8fma"; + /// FEAT_SSVE_FP8FMA + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve: "sve"; + /// FEAT_SVE (Scalable Vector Extension) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2: "sve2"; + /// FEAT_SVE2 (Scalable Vector Extension 2) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sve2p1: "sve2p1"; + /// FEAT_SVE2p1 (Scalable Vector Extension 2.1) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_aes: "sve2-aes"; + /// FEAT_SVE_AES & FEAT_SVE_PMULL128 (SVE2 AES crypto) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sve_b16b16: "sve-b16b16"; + /// FEAT_SVE_B16B16 (SVE or SME Instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_bitperm: "sve2-bitperm"; + /// FEAT_SVE_BitPerm (SVE2 bit permutation instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_sha3: "sve2-sha3"; + /// FEAT_SVE_SHA3 (SVE2 SHA3 crypto) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_sm4: "sve2-sm4"; + /// FEAT_SVE_SM4 (SVE2 SM4 crypto) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] tme: "tme"; + /// FEAT_TME (Transactional Memory Extensions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] wfxt: "wfxt"; + /// FEAT_WFxT (WFET and WFIT Instructions) } From 18fab17d53bf7b7c2c4d13ec90b5a2ddc8a30bc1 Mon Sep 17 00:00:00 2001 From: Kajetan Puchalski Date: Wed, 3 Jul 2024 12:52:55 +0100 Subject: [PATCH 4/4] std_detect: Update aarch64 feature dependencies to LLVM upstream Feature dependencies for newer aarch64 fetaures differ between LLVM 18 in the Rust tree and upstream LLVM 19. This commit updates those dependencies to reflect new LLVM upstream changes. --- .../std_detect/src/detect/os/linux/aarch64.rs | 55 ++++++++++--------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/crates/std_detect/src/detect/os/linux/aarch64.rs b/crates/std_detect/src/detect/os/linux/aarch64.rs index 6209a4a344..9ebd98dad8 100644 --- a/crates/std_detect/src/detect/os/linux/aarch64.rs +++ b/crates/std_detect/src/detect/os/linux/aarch64.rs @@ -403,11 +403,12 @@ impl AtHwcap { enable_feature(Feature::crc, self.crc32); enable_feature(Feature::lse, self.atomics); enable_feature(Feature::lse2, self.uscat); - enable_feature(Feature::lse128, self.lse128); + enable_feature(Feature::lse128, self.lse128 && self.atomics); enable_feature(Feature::rcpc, self.lrcpc); // RCPC2 (rcpc-immo in LLVM) requires RCPC support - enable_feature(Feature::rcpc2, self.ilrcpc && self.lrcpc); - enable_feature(Feature::rcpc3, self.lrcpc3); + let rcpc2 = self.ilrcpc && self.lrcpc; + enable_feature(Feature::rcpc2, rcpc2); + enable_feature(Feature::rcpc3, self.lrcpc3 && rcpc2); enable_feature(Feature::dit, self.dit); enable_feature(Feature::flagm, self.flagm); enable_feature(Feature::flagm2, self.flagm2); @@ -456,7 +457,7 @@ impl AtHwcap { // SVE2 requires SVE let sve2 = self.sve2 && self.sve && asimd; enable_feature(Feature::sve2, sve2); - enable_feature(Feature::sve2p1, self.sve2p1); + enable_feature(Feature::sve2p1, self.sve2p1 && sve2); // SVE2 extensions require SVE2 and crypto features enable_feature( Feature::sve2_aes, @@ -483,30 +484,32 @@ impl AtHwcap { enable_feature(Feature::cssc, self.cssc); enable_feature(Feature::fpmr, self.fpmr); enable_feature(Feature::faminmax, self.faminmax); - enable_feature(Feature::fp8, self.f8cvt); - enable_feature(Feature::fp8fma, self.f8fma); - enable_feature(Feature::fp8dot4, self.f8dp4); - enable_feature(Feature::fp8dot2, self.f8dp2); + let fp8 = self.f8cvt && self.faminmax && self.lut && self.bf16; + enable_feature(Feature::fp8, fp8); + let fp8fma = self.f8fma && fp8; + enable_feature(Feature::fp8fma, fp8fma); + let fp8dot4 = self.f8dp4 && fp8fma; + enable_feature(Feature::fp8dot4, fp8dot4); + enable_feature(Feature::fp8dot2, self.f8dp2 && fp8dot4); enable_feature(Feature::wfxt, self.wfxt); - enable_feature(Feature::sme, self.sme && self.bf16); - enable_feature(Feature::sme_i16i64, self.smei16i64 && self.sme); - enable_feature(Feature::sme_f64f64, self.smef64f64 && self.sme); - // enable_feature(Feature::sme_i8i32, self.smei8i32); - // enable_feature(Feature::sme_f16f32, self.smef16f32); - // enable_feature(Feature::sme_b16f32, self.smeb16f32); - // enable_feature(Feature::sme_f32f32, self.smef32f32); - enable_feature(Feature::sme_fa64, self.smefa64 && self.sme && sve2); - enable_feature(Feature::sme2, self.sme2 && self.sme); - enable_feature(Feature::sme2p1, self.sme2p1 && self.sme2 && self.sme); - // enable_feature(Feature::sme_i16i32, self.smei16i32); - // enable_feature(Feature::sme_bi32i32, self.smebi32i32); - enable_feature(Feature::sme_f16f16, self.smef16f16); + let sme = self.sme && self.bf16; + enable_feature(Feature::sme, sme); + enable_feature(Feature::sme_i16i64, self.smei16i64 && sme); + enable_feature(Feature::sme_f64f64, self.smef64f64 && sme); + enable_feature(Feature::sme_fa64, self.smefa64 && sme && sve2); + let sme2 = self.sme2 && sme; + enable_feature(Feature::sme2, sme2); + enable_feature(Feature::sme2p1, self.sme2p1 && sme2); + enable_feature(Feature::sme_f16f16, self.smef16f16 && sme2); enable_feature(Feature::sme_lutv2, self.smelutv2); - enable_feature(Feature::sme_f8f16, self.smef8f16 && self.sme2 && self.f8cvt); - enable_feature(Feature::sme_f8f32, self.smef8f32 && self.sme2 && self.f8cvt); - enable_feature(Feature::ssve_fp8fma, self.smesf8fma && self.sme2); - enable_feature(Feature::ssve_fp8dot4, self.smesf8dp4 && self.sme2); - enable_feature(Feature::ssve_fp8dot2, self.smesf8dp2 && self.sme2); + let sme_f8f32 = self.smef8f32 && sme2 && fp8; + enable_feature(Feature::sme_f8f32, sme_f8f32); + enable_feature(Feature::sme_f8f16, self.smef8f16 && sme_f8f32); + let ssve_fp8fma = self.smesf8fma && sme2 && fp8; + enable_feature(Feature::ssve_fp8fma, ssve_fp8fma); + let ssve_fp8dot4 = self.smesf8dp4 && ssve_fp8fma; + enable_feature(Feature::ssve_fp8dot4, ssve_fp8dot4); + enable_feature(Feature::ssve_fp8dot2, self.smesf8dp2 && ssve_fp8dot4); } value }