File tree Expand file tree Collapse file tree 5 files changed +21
-7
lines changed
.ci/docker/ci_commit_pins
runtime/core/portable_type/c10/c10 Expand file tree Collapse file tree 5 files changed +21
-7
lines changed Original file line number Diff line number Diff line change 1
- 59d5cf083b4f860dea76fe8936076177f9367f10
1
+ 01f1cc44cbbfdf6307aa01b803a4ee22f9ade946
Original file line number Diff line number Diff line change @@ -35,7 +35,11 @@ set(XNNPACK_BUILD_TESTS
35
35
set (XNNPACK_ENABLE_AVXVNNI
36
36
OFF
37
37
CACHE BOOL ""
38
- )
38
+ )
39
+ # Work around observed failure: https://github.com/pytorch/executorch/pull/10362#issuecomment-2906391232
40
+ set (XNNPACK_ENABLE_AVX512VNNIGFNI
41
+ OFF
42
+ CACHE BOOL "" )
39
43
40
44
if (EXECUTORCH_XNNPACK_ENABLE_KLEIDI )
41
45
set (XNNPACK_ENABLE_KLEIDIAI
Original file line number Diff line number Diff line change @@ -71,7 +71,7 @@ def python_is_compatible():
71
71
#
72
72
# NOTE: If you're changing, make the corresponding change in .ci/docker/ci_commit_pins/pytorch.txt
73
73
# by picking the hash from the same date in https://hud.pytorch.org/hud/pytorch/pytorch/nightly/
74
- NIGHTLY_VERSION = "dev20250524 "
74
+ NIGHTLY_VERSION = "dev20250422 "
75
75
76
76
77
77
def install_requirements (use_pytorch_nightly ):
Original file line number Diff line number Diff line change @@ -508,4 +508,14 @@ __host__ __device__
508
508
509
509
#endif
510
510
511
+ // This macro is used to find older C++ compilers
512
+ // that don't support move optimization for return values.
513
+
514
+ #if (defined(__GNUC__) && __GNUC__ < 13) || \
515
+ (defined(__clang_major__) && __clang_major__ < 13 )
516
+ #define C10_RETURN_MOVE_IF_OLD_COMPILER 1
517
+ #else
518
+ #define C10_RETURN_MOVE_IF_OLD_COMPILER 0
519
+ #endif
520
+
511
521
#endif // C10_MACROS_MACROS_H_
Original file line number Diff line number Diff line change @@ -31,7 +31,7 @@ inline C10_HOST_DEVICE float f32_from_bits(uint16_t src) {
31
31
uint32_t tmp = src;
32
32
tmp <<= 16 ;
33
33
34
- #if defined(USE_ROCM)
34
+ #if defined(USE_ROCM) && defined(__HIPCC__)
35
35
float * tempRes;
36
36
37
37
// We should be using memcpy in order to respect the strict aliasing rule
@@ -48,7 +48,7 @@ inline C10_HOST_DEVICE float f32_from_bits(uint16_t src) {
48
48
inline C10_HOST_DEVICE uint16_t bits_from_f32 (float src) {
49
49
uint32_t res = 0 ;
50
50
51
- #if defined(USE_ROCM)
51
+ #if defined(USE_ROCM) && defined(__HIPCC__)
52
52
// We should be using memcpy in order to respect the strict aliasing rule
53
53
// but it fails in the HIP environment.
54
54
uint32_t * tempRes = reinterpret_cast <uint32_t *>(&src);
@@ -61,7 +61,7 @@ inline C10_HOST_DEVICE uint16_t bits_from_f32(float src) {
61
61
}
62
62
63
63
inline C10_HOST_DEVICE uint16_t round_to_nearest_even (float src) {
64
- #if defined(USE_ROCM)
64
+ #if defined(USE_ROCM) && defined(__HIPCC__)
65
65
if (src != src) {
66
66
#elif defined(_MSC_VER)
67
67
if (isnan (src)) {
@@ -87,7 +87,7 @@ struct alignas(2) BFloat16 {
87
87
uint16_t x;
88
88
89
89
// HIP wants __host__ __device__ tag, CUDA does not
90
- #if defined(USE_ROCM)
90
+ #if defined(USE_ROCM) && defined(__HIPCC__)
91
91
C10_HOST_DEVICE BFloat16 () = default ;
92
92
#else
93
93
BFloat16 () = default ;
You can’t perform that action at this time.
0 commit comments