Skip to content

Commit c33fb91

Browse files
committed
make the interface friendlier for future adjustments
1 parent 5006686 commit c33fb91

File tree

2 files changed

+115
-61
lines changed

2 files changed

+115
-61
lines changed

Include/internal/pycore_cpuinfo.h

+16-6
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,25 @@ extern "C" {
99
# error "this header requires Py_BUILD_CORE define"
1010
#endif
1111

12-
#include <stdbool.h>
13-
1412
typedef struct {
15-
bool sse, sse2, sse3, sse41, sse42, avx, avx2, avx512vbmi;
16-
bool done;
17-
} _py_cpu_simd_flags;
13+
/* Streaming SIMD Extensions */
14+
uint8_t sse: 1;
15+
uint8_t sse2: 1;
16+
uint8_t sse3: 1;
17+
uint8_t sse41: 1; // SSE4.1
18+
uint8_t sse42: 1; // SSE4.2
19+
20+
/* Advanced Vector Extensions */
21+
uint8_t avx: 1;
22+
uint8_t avx2: 1;
23+
uint8_t avx512vbmi: 1; // AVX-512 Vector Byte Manipulation Instructions
24+
25+
uint8_t done; // indicate whether the structure was filled or not
26+
} py_cpu_simd_flags;
1827

28+
/* Detect the available SIMD features on this machine. */
1929
extern void
20-
_Py_detect_cpu_simd_features(_py_cpu_simd_flags *flags);
30+
_Py_detect_cpu_simd_features(py_cpu_simd_flags *flags);
2131

2232
#ifdef __cplusplus
2333
}

Python/cpuinfo.c

+99-55
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,25 @@
11
/*
2-
* Naive CPU SIMD features detection.
2+
* Python CPU SIMD features detection.
33
*
4-
* See Modules/black2module.c.
4+
* See https://en.wikipedia.org/wiki/CPUID for details.
55
*/
66

77
#include "Python.h"
88
#include "pycore_cpuinfo.h"
99

10-
#include <stdbool.h>
10+
#define CPUID_REG(ARG) ARG
1111

12+
/*
13+
* For simplicity, we only enable SIMD instructions for Intel CPUs,
14+
* even though we could support ARM NEON and POWER.
15+
*/
1216
#if defined(__x86_64__) && defined(__GNUC__)
1317
# include <cpuid.h>
1418
#elif defined(_M_X64)
1519
# include <intrin.h>
20+
#else
21+
# undef CPUID_REG
22+
# define CPUID_REG(ARG) Py_UNUSED(ARG)
1623
#endif
1724

1825
// AVX2 cannot be compiled on macOS ARM64 (yet it can be compiled on x86_64).
@@ -24,6 +31,15 @@
2431
# undef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS
2532
#endif
2633

34+
/*
35+
* The macros below describe masks to apply on CPUID output registers.
36+
*
37+
* Each macro is of the form [REGISTER][PAGE]_[FEATURE] where
38+
*
39+
* - REGISTER is either EBX, ECX or EDX,
40+
* - PAGE is either 1 or 7 depending, and
41+
* - FEATURE is an SIMD instruction set.
42+
*/
2743
#define EDX1_SSE (1 << 25) // sse, EDX, page 1, bit 25
2844
#define EDX1_SSE2 (1 << 26) // sse2, EDX, page 1, bit 26
2945
#define ECX1_SSE3 (1 << 9) // sse3, ECX, page 1, bit 0
@@ -33,78 +49,106 @@
3349
#define EBX7_AVX2 (1 << 5) // avx2, EBX, page 7, bit 5
3450
#define ECX7_AVX512_VBMI (1 << 1) // avx512-vbmi, ECX, page 7, bit 1
3551

36-
void
37-
_Py_detect_cpu_simd_features(_py_cpu_simd_flags *flags)
38-
{
39-
if (flags->done) {
40-
return;
41-
}
52+
#define CHECK_CPUID_REGISTER(REGISTER, MASK) ((REGISTER) & (MASK)) == 0 ? 0 : 1
4253

43-
int eax1 = 0, ebx1 = 0, ecx1 = 0, edx1 = 0;
44-
int eax7 = 0, ebx7 = 0, ecx7 = 0, edx7 = 0;
54+
/*
55+
* Indicate whether the CPUID input EAX=1 may be needed to
56+
* detect SIMD basic features (e.g., SSE).
57+
*/
58+
#if defined(CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \
59+
|| defined(CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \
60+
|| defined(CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \
61+
|| defined(CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS) \
62+
|| defined(CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS) \
63+
|| defined(CAN_COMPILE_SIMD_AVX_INSTRUCTIONS)
64+
# define MAY_DETECT_CPUID_SIMD_FEATURES
65+
#endif
66+
67+
/*
68+
* Indicate whether the CPUID input EAX=7 may be needed to
69+
* detect SIMD extended features (e.g., AVX2 or AVX-512).
70+
*/
71+
#if defined(CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) \
72+
|| defined(CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS)
73+
# define MAY_DETECT_CPUID_SIMD_EXTENDED_FEATURES
74+
#endif
75+
76+
static inline void
77+
get_cpuid_info(int32_t level /* input eax */,
78+
int32_t count /* input ecx */,
79+
int32_t *CPUID_REG(eax),
80+
int32_t *CPUID_REG(ebx),
81+
int32_t *CPUID_REG(ecx),
82+
int32_t *CPUID_REG(edx))
83+
{
4584
#if defined(__x86_64__) && defined(__GNUC__)
46-
__cpuid_count(1, 0, eax1, ebx1, ecx1, edx1);
47-
__cpuid_count(7, 0, eax7, ebx7, ecx7, edx7);
85+
__cpuid_count(level, count, *eax, *ebx, *ecx, *edx);
4886
#elif defined(_M_X64)
49-
int info1[4] = {0};
50-
__cpuidex(info1, 1, 0);
51-
eax1 = info1[0];
52-
ebx1 = info1[1];
53-
ecx1 = info1[2];
54-
edx1 = info1[3];
55-
56-
int info7[4] = {0};
57-
__cpuidex(info7, 7, 0);
58-
eax7 = info7[0];
59-
ebx7 = info7[1];
60-
ecx7 = info7[2];
61-
edx7 = info7[3];
62-
#else
63-
// use (void) expressions to avoid warnings
64-
(void) eax1; (void) ebx1; (void) ecx1; (void) edx1;
65-
(void) eax7; (void) ebx7; (void) ecx7; (void) edx7;
87+
int32_t info[4] = {0};
88+
__cpuidex(info, page, count);
89+
*eax = info[0];
90+
*ebx = info[1];
91+
*ecx = info[2];
92+
*edx = info[3];
6693
#endif
94+
}
6795

96+
/* Processor Info and Feature Bits (EAX=1, ECX=0). */
97+
static inline void
98+
detect_cpu_simd_features(py_cpu_simd_flags *flags)
99+
{
100+
int32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
101+
get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx);
68102
#ifdef CAN_COMPILE_SIMD_SSE_INSTRUCTIONS
69-
flags->sse = (edx1 & EDX1_SSE) != 0;
70-
#else
71-
flags->sse = false;
103+
flags->sse = CHECK_CPUID_REGISTER(edx, EDX1_SSE);
72104
#endif
73105
#ifdef CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS
74-
flags->sse2 = (edx1 & EDX1_SSE2) != 0;
75-
#else
76-
flags->sse2 = false;
106+
flags->sse2 = CHECK_CPUID_REGISTER(edx, EDX1_SSE2);
77107
#endif
78108
#ifdef CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS
79-
flags->sse3 = (ecx1 & ECX1_SSE3) != 0;
80-
#else
109+
flags->sse3 = CHECK_CPUID_REGISTER(ecx, ECX1_SSE3);
81110
#endif
82-
flags->sse3 = false;
83111
#ifdef CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS
84-
flags->sse41 = (ecx1 & ECX1_SSE4_1) != 0;
85-
#else
86-
flags->sse41 = false;
112+
flags->sse41 = CHECK_CPUID_REGISTER(ecx, ECX1_SSE4_1);
87113
#endif
88114
#ifdef CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS
89-
flags->sse42 = (ecx1 & ECX1_SSE4_2) != 0;
90-
#else
91-
flags->sse42 = false;
115+
flags->sse42 = CHECK_CPUID_REGISTER(ecx, ECX1_SSE4_2);
92116
#endif
93117
#ifdef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS
94-
flags->avx = (ecx1 & ECX1_AVX) != 0;
95-
#else
96-
flags->avx = false;
118+
flags->avx = CHECK_CPUID_REGISTER(ecx, ECX1_AVX);
97119
#endif
120+
}
121+
122+
/* Extended feature bits (EAX=7, ECX=0). */
123+
static inline void
124+
detect_cpu_simd_extended_features(py_cpu_simd_flags *flags)
125+
{
126+
int32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
127+
get_cpuid_info(7, 0, &eax, &ebx, &ecx, &edx);
98128
#ifdef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS
99-
flags->avx2 = (ebx7 & EBX7_AVX2) != 0;
100-
#else
101-
flags->avx2 = false;
129+
flags->avx2 = CHECK_CPUID_REGISTER(ebx, EBX7_AVX2);
102130
#endif
103131
#ifdef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS
104-
flags->avx512vbmi = (ecx7 & ECX7_AVX512_VBMI) != 0;
105-
#else
106-
flags->avx512vbmi = false;
132+
flags->avx512vbmi = CHECK_CPUID_REGISTER(ecx, ECX7_AVX512_VBMI);
107133
#endif
134+
}
108135

109-
flags->done = true;
136+
void
137+
_Py_detect_cpu_simd_features(py_cpu_simd_flags *flags)
138+
{
139+
if (flags->done) {
140+
return;
141+
}
142+
#ifdef MAY_DETECT_CPUID_SIMD_FEATURES
143+
detect_cpu_simd_features(flags);
144+
#else
145+
flags->sse = flags->sse2 = flags->sse3 = flags->sse41 = flags->sse42 = 0;
146+
flags->avx = 0;
147+
#endif
148+
#ifdef MAY_DETECT_CPUID_SIMD_EXTENDED_FEATURES
149+
detect_cpu_simd_extended_features(flags);
150+
#else
151+
flags->avx2 = flags->avx512vbmi = 0;
152+
#endif
153+
flags->done = 1;
110154
}

0 commit comments

Comments
 (0)