Skip to content

Commit f0927c2

Browse files
authored
[release/2.5] Added rocm specific logic to is_big_gpu check for inductor (#1752)
1 parent c3ba1e8 commit f0927c2

File tree

1 file changed

+13
-1
lines changed

1 file changed

+13
-1
lines changed

torch/_inductor/utils.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1042,8 +1042,20 @@ def __len__(self):
10421042

10431043
@functools.lru_cache(None)
10441044
def is_big_gpu(index) -> bool:
1045+
prop = torch.cuda.get_device_properties(index)
1046+
1047+
# SM logic is not relevant to ROCm gpus
1048+
# Arbitrarily skipping the older models
1049+
if torch.version.hip is not None:
1050+
if prop.major < 9 or prop.major == 10:
1051+
log.warning(
1052+
"GPU arch does not support max_autotune_gemm mode usage"
1053+
)
1054+
return False
1055+
return True
1056+
10451057
min_sms = 68 # 3080
1046-
avail_sms = torch.cuda.get_device_properties(index).multi_processor_count
1058+
avail_sms = prop.multi_processor_count
10471059
if avail_sms < min_sms:
10481060
log.warning(
10491061
"Not enough SMs to use max_autotune_gemm mode",

0 commit comments

Comments
 (0)