Skip to content

Add runtime detection for APX-F and AVX10 #145531

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions library/std_detect/src/detect/arch/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,12 @@ features! {
/// AMX-TF32 (TensorFloat32 Operations)
@FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_transpose: "amx-transpose";
/// AMX-TRANSPOSE (Matrix Transpose Operations)
@FEATURE: #[unstable(feature = "apx_target_feature", issue = "139284")] apxf: "apxf";
/// APX-F (Advanced Performance Extensions - Foundation)
@FEATURE: #[unstable(feature = "avx10_target_feature", issue = "138843")] avx10_1: "avx10.1";
/// AVX10.1
@FEATURE: #[unstable(feature = "avx10_target_feature", issue = "138843")] avx10_2: "avx10.2";
/// AVX10.2
@FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] f16c: "f16c";
/// F16C (Conversions between IEEE-754 `binary16` and `binary32` formats)
@FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] fma: "fma";
Expand Down
66 changes: 41 additions & 25 deletions library/std_detect/src/detect/os/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,32 @@ pub(crate) fn detect_features() -> cache::Initializer {
enable(ebx, 2, Feature::widekl);
}

// This detects ABM on AMD CPUs and LZCNT on Intel CPUs.
// On intel CPUs with popcnt, lzcnt implements the
// "missing part" of ABM, so we map both to the same
// internal feature.
//
// The `is_x86_feature_detected!("lzcnt")` macro then
// internally maps to Feature::abm.
enable(extended_proc_info_ecx, 5, Feature::lzcnt);

// As Hygon Dhyana originates from AMD technology and shares most of the architecture with
// AMD's family 17h, but with different CPU Vendor ID("HygonGenuine")/Family series
// number(Family 18h).
//
// For CPUID feature bits, Hygon Dhyana(family 18h) share the same definition with AMD
// family 17h.
//
// Related AMD CPUID specification is https://www.amd.com/system/files/TechDocs/25481.pdf.
// Related Hygon kernel patch can be found on
// http://lkml.kernel.org/r/5ce86123a7b9dad925ac583d88d2f921040e859b.1538583282.git.puwen@hygon.cn
if vendor_id == *b"AuthenticAMD" || vendor_id == *b"HygonGenuine" {
// These features are available on AMD arch CPUs:
enable(extended_proc_info_ecx, 6, Feature::sse4a);
enable(extended_proc_info_ecx, 21, Feature::tbm);
enable(extended_proc_info_ecx, 11, Feature::xop);
}

// `XSAVE` and `AVX` support:
let cpu_xsave = bit::test(proc_info_ecx as usize, 26);
if cpu_xsave {
Expand All @@ -161,6 +187,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
// * AVX -> `XCR0.AVX[2]`
// * AVX-512 -> `XCR0.AVX-512[7:5]`.
// * AMX -> `XCR0.AMX[18:17]`
// * APX -> `XCR0.APX[19]`
//
// by setting the corresponding bits of `XCR0` to `1`.
//
Expand All @@ -173,6 +200,8 @@ pub(crate) fn detect_features() -> cache::Initializer {
let os_avx512_support = xcr0 & 0xe0 == 0xe0;
// Test `XCR0.AMX[18:17]` with the mask `0b110_0000_0000_0000_0000 == 0x60000`
let os_amx_support = xcr0 & 0x60000 == 0x60000;
// Test `XCR0.APX[19]` with the mask `0b1000_0000_0000_0000_0000 == 0x80000`
let os_apx_support = xcr0 & 0x80000 == 0x80000;

// Only if the OS and the CPU support saving/restoring the AVX
// registers we enable `xsave` support:
Expand Down Expand Up @@ -262,33 +291,20 @@ pub(crate) fn detect_features() -> cache::Initializer {
enable(amx_feature_flags_eax, 8, Feature::amx_movrs);
}
}
}
}

// This detects ABM on AMD CPUs and LZCNT on Intel CPUs.
// On intel CPUs with popcnt, lzcnt implements the
// "missing part" of ABM, so we map both to the same
// internal feature.
//
// The `is_x86_feature_detected!("lzcnt")` macro then
// internally maps to Feature::abm.
enable(extended_proc_info_ecx, 5, Feature::lzcnt);
if os_apx_support {
enable(extended_features_edx_leaf_1, 21, Feature::apxf);
}

// As Hygon Dhyana originates from AMD technology and shares most of the architecture with
// AMD's family 17h, but with different CPU Vendor ID("HygonGenuine")/Family series
// number(Family 18h).
//
// For CPUID feature bits, Hygon Dhyana(family 18h) share the same definition with AMD
// family 17h.
//
// Related AMD CPUID specification is https://www.amd.com/system/files/TechDocs/25481.pdf.
// Related Hygon kernel patch can be found on
// http://lkml.kernel.org/r/5ce86123a7b9dad925ac583d88d2f921040e859b.1538583282.git.puwen@hygon.cn
if vendor_id == *b"AuthenticAMD" || vendor_id == *b"HygonGenuine" {
// These features are available on AMD arch CPUs:
enable(extended_proc_info_ecx, 6, Feature::sse4a);
enable(extended_proc_info_ecx, 21, Feature::tbm);
enable(extended_proc_info_ecx, 11, Feature::xop);
let avx10_1 = enable(extended_features_edx_leaf_1, 19, Feature::avx10_1);
if avx10_1 {
let CpuidResult { ebx, .. } = unsafe { __cpuid(0x24) };
let avx10_version = ebx & 0xff;
if avx10_version >= 2 {
value.set(Feature::avx10_2 as u32);
}
}
}
}
}

Expand Down
Loading