|
1 | 1 | /*
|
2 |
| - * Naive CPU SIMD features detection. |
| 2 | + * Python CPU SIMD features detection. |
3 | 3 | *
|
4 |
| - * See Modules/black2module.c. |
| 4 | + * See https://en.wikipedia.org/wiki/CPUID for details. |
5 | 5 | */
|
6 | 6 |
|
7 | 7 | #include "Python.h"
|
8 | 8 | #include "pycore_cpuinfo.h"
|
9 | 9 |
|
10 |
| -#include <stdbool.h> |
| 10 | +#define CPUID_REG(ARG) ARG |
11 | 11 |
|
| 12 | +/* |
| 13 | + * For simplicity, we only enable SIMD instructions for Intel CPUs, |
| 14 | + * even though we could support ARM NEON and POWER. |
| 15 | + */ |
12 | 16 | #if defined(__x86_64__) && defined(__GNUC__)
|
13 | 17 | # include <cpuid.h>
|
14 | 18 | #elif defined(_M_X64)
|
15 | 19 | # include <intrin.h>
|
| 20 | +#else |
| 21 | +# undef CPUID_REG |
| 22 | +# define CPUID_REG(ARG) Py_UNUSED(ARG) |
16 | 23 | #endif
|
17 | 24 |
|
18 | 25 | // AVX2 cannot be compiled on macOS ARM64 (yet it can be compiled on x86_64).
|
|
24 | 31 | # undef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS
|
25 | 32 | #endif
|
26 | 33 |
|
| 34 | +/* |
| 35 | + * The macros below describe masks to apply on CPUID output registers. |
| 36 | + * |
| 37 | + * Each macro is of the form [REGISTER][PAGE]_[FEATURE] where |
| 38 | + * |
| 39 | + * - REGISTER is either EBX, ECX or EDX, |
| 40 | + * - PAGE is either 1 or 7 depending, and |
| 41 | + * - FEATURE is an SIMD instruction set. |
| 42 | + */ |
27 | 43 | #define EDX1_SSE (1 << 25) // sse, EDX, page 1, bit 25
|
28 | 44 | #define EDX1_SSE2 (1 << 26) // sse2, EDX, page 1, bit 26
|
29 | 45 | #define ECX1_SSE3 (1 << 9) // sse3, ECX, page 1, bit 0
|
|
33 | 49 | #define EBX7_AVX2 (1 << 5) // avx2, EBX, page 7, bit 5
|
34 | 50 | #define ECX7_AVX512_VBMI (1 << 1) // avx512-vbmi, ECX, page 7, bit 1
|
35 | 51 |
|
36 |
| -void |
37 |
| -_Py_detect_cpu_simd_features(_py_cpu_simd_flags *flags) |
38 |
| -{ |
39 |
| - if (flags->done) { |
40 |
| - return; |
41 |
| - } |
| 52 | +#define CHECK_CPUID_REGISTER(REGISTER, MASK) ((REGISTER) & (MASK)) == 0 ? 0 : 1 |
42 | 53 |
|
43 |
| - int eax1 = 0, ebx1 = 0, ecx1 = 0, edx1 = 0; |
44 |
| - int eax7 = 0, ebx7 = 0, ecx7 = 0, edx7 = 0; |
| 54 | +/* |
| 55 | + * Indicate whether the CPUID input EAX=1 may be needed to |
| 56 | + * detect SIMD basic features (e.g., SSE). |
| 57 | + */ |
| 58 | +#if defined(CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \ |
| 59 | + || defined(CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \ |
| 60 | + || defined(CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \ |
| 61 | + || defined(CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS) \ |
| 62 | + || defined(CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS) \ |
| 63 | + || defined(CAN_COMPILE_SIMD_AVX_INSTRUCTIONS) |
| 64 | +# define MAY_DETECT_CPUID_SIMD_FEATURES |
| 65 | +#endif |
| 66 | + |
| 67 | +/* |
| 68 | + * Indicate whether the CPUID input EAX=7 may be needed to |
| 69 | + * detect SIMD extended features (e.g., AVX2 or AVX-512). |
| 70 | + */ |
| 71 | +#if defined(CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) \ |
| 72 | + || defined(CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS) |
| 73 | +# define MAY_DETECT_CPUID_SIMD_EXTENDED_FEATURES |
| 74 | +#endif |
| 75 | + |
| 76 | +static inline void |
| 77 | +get_cpuid_info(int32_t level /* input eax */, |
| 78 | + int32_t count /* input ecx */, |
| 79 | + int32_t *CPUID_REG(eax), |
| 80 | + int32_t *CPUID_REG(ebx), |
| 81 | + int32_t *CPUID_REG(ecx), |
| 82 | + int32_t *CPUID_REG(edx)) |
| 83 | +{ |
45 | 84 | #if defined(__x86_64__) && defined(__GNUC__)
|
46 |
| - __cpuid_count(1, 0, eax1, ebx1, ecx1, edx1); |
47 |
| - __cpuid_count(7, 0, eax7, ebx7, ecx7, edx7); |
| 85 | + __cpuid_count(level, count, *eax, *ebx, *ecx, *edx); |
48 | 86 | #elif defined(_M_X64)
|
49 |
| - int info1[4] = {0}; |
50 |
| - __cpuidex(info1, 1, 0); |
51 |
| - eax1 = info1[0]; |
52 |
| - ebx1 = info1[1]; |
53 |
| - ecx1 = info1[2]; |
54 |
| - edx1 = info1[3]; |
55 |
| - |
56 |
| - int info7[4] = {0}; |
57 |
| - __cpuidex(info7, 7, 0); |
58 |
| - eax7 = info7[0]; |
59 |
| - ebx7 = info7[1]; |
60 |
| - ecx7 = info7[2]; |
61 |
| - edx7 = info7[3]; |
62 |
| -#else |
63 |
| - // use (void) expressions to avoid warnings |
64 |
| - (void) eax1; (void) ebx1; (void) ecx1; (void) edx1; |
65 |
| - (void) eax7; (void) ebx7; (void) ecx7; (void) edx7; |
| 87 | + int32_t info[4] = {0}; |
| 88 | + __cpuidex(info, page, count); |
| 89 | + *eax = info[0]; |
| 90 | + *ebx = info[1]; |
| 91 | + *ecx = info[2]; |
| 92 | + *edx = info[3]; |
66 | 93 | #endif
|
| 94 | +} |
67 | 95 |
|
| 96 | +/* Processor Info and Feature Bits (EAX=1, ECX=0). */ |
| 97 | +static inline void |
| 98 | +detect_cpu_simd_features(py_cpu_simd_flags *flags) |
| 99 | +{ |
| 100 | + int32_t eax = 0, ebx = 0, ecx = 0, edx = 0; |
| 101 | + get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); |
68 | 102 | #ifdef CAN_COMPILE_SIMD_SSE_INSTRUCTIONS
|
69 |
| - flags->sse = (edx1 & EDX1_SSE) != 0; |
70 |
| -#else |
71 |
| - flags->sse = false; |
| 103 | + flags->sse = CHECK_CPUID_REGISTER(edx, EDX1_SSE); |
72 | 104 | #endif
|
73 | 105 | #ifdef CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS
|
74 |
| - flags->sse2 = (edx1 & EDX1_SSE2) != 0; |
75 |
| -#else |
76 |
| - flags->sse2 = false; |
| 106 | + flags->sse2 = CHECK_CPUID_REGISTER(edx, EDX1_SSE2); |
77 | 107 | #endif
|
78 | 108 | #ifdef CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS
|
79 |
| - flags->sse3 = (ecx1 & ECX1_SSE3) != 0; |
80 |
| - #else |
| 109 | + flags->sse3 = CHECK_CPUID_REGISTER(ecx, ECX1_SSE3); |
81 | 110 | #endif
|
82 |
| - flags->sse3 = false; |
83 | 111 | #ifdef CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS
|
84 |
| - flags->sse41 = (ecx1 & ECX1_SSE4_1) != 0; |
85 |
| -#else |
86 |
| - flags->sse41 = false; |
| 112 | + flags->sse41 = CHECK_CPUID_REGISTER(ecx, ECX1_SSE4_1); |
87 | 113 | #endif
|
88 | 114 | #ifdef CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS
|
89 |
| - flags->sse42 = (ecx1 & ECX1_SSE4_2) != 0; |
90 |
| -#else |
91 |
| - flags->sse42 = false; |
| 115 | + flags->sse42 = CHECK_CPUID_REGISTER(ecx, ECX1_SSE4_2); |
92 | 116 | #endif
|
93 | 117 | #ifdef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS
|
94 |
| - flags->avx = (ecx1 & ECX1_AVX) != 0; |
95 |
| -#else |
96 |
| - flags->avx = false; |
| 118 | + flags->avx = CHECK_CPUID_REGISTER(ecx, ECX1_AVX); |
97 | 119 | #endif
|
| 120 | +} |
| 121 | + |
| 122 | +/* Extended feature bits (EAX=7, ECX=0). */ |
| 123 | +static inline void |
| 124 | +detect_cpu_simd_extended_features(py_cpu_simd_flags *flags) |
| 125 | +{ |
| 126 | + int32_t eax = 0, ebx = 0, ecx = 0, edx = 0; |
| 127 | + get_cpuid_info(7, 0, &eax, &ebx, &ecx, &edx); |
98 | 128 | #ifdef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS
|
99 |
| - flags->avx2 = (ebx7 & EBX7_AVX2) != 0; |
100 |
| -#else |
101 |
| - flags->avx2 = false; |
| 129 | + flags->avx2 = CHECK_CPUID_REGISTER(ebx, EBX7_AVX2); |
102 | 130 | #endif
|
103 | 131 | #ifdef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS
|
104 |
| - flags->avx512vbmi = (ecx7 & ECX7_AVX512_VBMI) != 0; |
105 |
| -#else |
106 |
| - flags->avx512vbmi = false; |
| 132 | + flags->avx512vbmi = CHECK_CPUID_REGISTER(ecx, ECX7_AVX512_VBMI); |
107 | 133 | #endif
|
| 134 | +} |
108 | 135 |
|
109 |
| - flags->done = true; |
| 136 | +void |
| 137 | +_Py_detect_cpu_simd_features(py_cpu_simd_flags *flags) |
| 138 | +{ |
| 139 | + if (flags->done) { |
| 140 | + return; |
| 141 | + } |
| 142 | +#ifdef MAY_DETECT_CPUID_SIMD_FEATURES |
| 143 | + detect_cpu_simd_features(flags); |
| 144 | +#else |
| 145 | + flags->sse = flags->sse2 = flags->sse3 = flags->sse41 = flags->sse42 = 0; |
| 146 | + flags->avx = 0; |
| 147 | +#endif |
| 148 | +#ifdef MAY_DETECT_CPUID_SIMD_EXTENDED_FEATURES |
| 149 | + detect_cpu_simd_extended_features(flags); |
| 150 | +#else |
| 151 | + flags->avx2 = flags->avx512vbmi = 0; |
| 152 | +#endif |
| 153 | + flags->done = 1; |
110 | 154 | }
|
0 commit comments