Skip to content

Commit fc5d883

Browse files
committed
[AArch64] Improve host feature detection.
SVE depends on a combination of host support and operating system support. Sometimes those don't line up with detected host CPU name; make sure SVE is disabled when it isn't available. Implement this for both Windows and Linux. (We don't have a codepath for other operating systems. If someone wants to implement this, it should be possible to adapt fmv code from compiler-rt.) While I'm here, also add support for detecting other Windows CPU features. For Windows, declare constants ourselves so the code builds on older SDKs; we also do this in compiler-rt.
1 parent d136fbd commit fc5d883

File tree

1 file changed

+62
-2
lines changed

1 file changed

+62
-2
lines changed

llvm/lib/TargetParser/Host.cpp

Lines changed: 62 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2260,20 +2260,80 @@ StringMap<bool> sys::getHostCPUFeatures() {
22602260
uint32_t Sha2 = CAP_SHA1 | CAP_SHA2;
22612261
Features["aes"] = (crypto & Aes) == Aes;
22622262
Features["sha2"] = (crypto & Sha2) == Sha2;
2263+
2264+
// SVE support is disabled in for cores which are identified as supporting
2265+
// SVE; disable SVE if we don't detect support at runtime.
2266+
if (!Features.contains("sve"))
2267+
Features["sve"] = false;
22632268
#endif
22642269

22652270
return Features;
22662271
}
22672272
#elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64) || \
22682273
defined(__arm64ec__) || defined(_M_ARM64EC))
2274+
#ifndef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE
2275+
#define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE 43
2276+
#endif
2277+
#ifndef PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE
2278+
#define PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE 44
2279+
#endif
2280+
#ifndef PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE
2281+
#define PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE 45
2282+
#endif
2283+
#ifndef PF_ARM_SVE_INSTRUCTIONS_AVAILABLE
2284+
#define PF_ARM_SVE_INSTRUCTIONS_AVAILABLE 46
2285+
#endif
2286+
#ifndef PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE
2287+
#define PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE 47
2288+
#endif
2289+
#ifndef PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE
2290+
#define PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE 50
2291+
#endif
2292+
#ifndef PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE
2293+
#define PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE 55
2294+
#endif
2295+
#ifndef PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE
2296+
#define PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE 56
2297+
#endif
2298+
#ifndef PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE
2299+
#define PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE 57
2300+
#endif
2301+
#ifndef PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE
2302+
#define PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE 58
2303+
#endif
2304+
#ifndef PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE
2305+
#define PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE 59
2306+
#endif
22692307
StringMap<bool> sys::getHostCPUFeatures() {
22702308
StringMap<bool> Features;
22712309

22722310
// If we're asking the OS at runtime, believe what the OS says
2273-
Features["neon"] =
2274-
IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE);
22752311
Features["crc"] =
22762312
IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE);
2313+
Features["lse"] =
2314+
IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE);
2315+
Features["dotprod"] =
2316+
IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE);
2317+
Features["jscvt"] =
2318+
IsProcessorFeaturePresent(PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE);
2319+
Features["rcpc"] =
2320+
IsProcessorFeaturePresent(PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE);
2321+
Features["sve"] =
2322+
IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE);
2323+
Features["sve2"] =
2324+
IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE);
2325+
Features["sve-aes"] =
2326+
IsProcessorFeaturePresent(PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE);
2327+
Features["sve-sha3"] =
2328+
IsProcessorFeaturePresent(PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE);
2329+
Features["sve-sm4"] =
2330+
IsProcessorFeaturePresent(PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE);
2331+
Features["f32mm"] =
2332+
IsProcessorFeaturePresent(PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE);
2333+
Features["f64mm"] =
2334+
IsProcessorFeaturePresent(PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE);
2335+
Features["i8mm"] =
2336+
IsProcessorFeaturePresent(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE);
22772337

22782338
// Avoid inferring "crypto" means more than the traditional AES + SHA2
22792339
bool TradCrypto =

0 commit comments

Comments
 (0)