ROCm: Enable tf32 testing on test_nn (#55)

jagadish-amd · pruthvistony · commit 1290efeacf75 · 2025-05-07T22:40:07.000-05:00
* Add trailing comma for consistency in gfx architecture list

Signed-off-by: Jagadish Krishnamoorthy &lt;jagadish.krishnamoorthy@amd.com&gt;

* ROCm: Enable tf32 testing on test_nn

Signed-off-by: Jagadish Krishnamoorthy &lt;jagadish.krishnamoorthy@amd.com&gt;

---------

Signed-off-by: Jagadish Krishnamoorthy &lt;jagadish.krishnamoorthy@amd.com&gt;
diff --git a/torch/cuda/__init__.py b/torch/cuda/__init__.py
@@ -168,6 +168,21 @@ def _check_bf16_tensor_supported(device: _device_t):
         return False
 
 
+def is_tf32_supported() -> bool:
+    r"""Return a bool indicating if the current CUDA/ROCm device supports dtype tf32."""
+    if torch.version.hip:
+        prop_name = torch.cuda.get_device_properties().gcnArchName
+        archs = ("gfx94", "gfx95")
+        for arch in archs:
+            if arch in prop_name:
+                return True
+        return False
+
+    # Otherwise, tf32 is supported on CUDA platforms that natively (i.e. no emulation)
+    # support bfloat16.
+    return is_bf16_supported(including_emulation=False)
+
+
 def _sleep(cycles):
     torch._C._cuda_sleep(cycles)
 
diff --git a/torch/testing/_internal/common_cuda.py b/torch/testing/_internal/common_cuda.py
@@ -150,6 +150,9 @@ def tf32_off():
 
 @contextlib.contextmanager
 def tf32_on(self, tf32_precision=1e-5):
+    if torch.version.hip:
+        hip_allow_tf32 = os.environ.get("HIPBLASLT_ALLOW_TF32", None)
+        os.environ["HIPBLASLT_ALLOW_TF32"] = "1"
     old_allow_tf32_matmul = torch.backends.cuda.matmul.allow_tf32
     old_precision = self.precision
     try:
@@ -158,6 +161,11 @@ def tf32_on(self, tf32_precision=1e-5):
         with torch.backends.cudnn.flags(enabled=None, benchmark=None, deterministic=None, allow_tf32=True):
             yield
     finally:
+        if torch.version.hip:
+            if hip_allow_tf32 is not None:
+                os.environ["HIPBLASLT_ALLOW_TF32"] = hip_allow_tf32
+            else:
+                del os.environ["HIPBLASLT_ALLOW_TF32"]
         torch.backends.cuda.matmul.allow_tf32 = old_allow_tf32_matmul
         self.precision = old_precision