ROCm: Enable tf32 testing on test_nn (#55)

jagadish-amd · dnikolaev-amd · commit 1586ea1e422d · 2025-04-24T21:25:15.000Z
* Add trailing comma for consistency in gfx architecture list

Signed-off-by: Jagadish Krishnamoorthy &lt;jagadish.krishnamoorthy@amd.com&gt;

* ROCm: Enable tf32 testing on test_nn

Signed-off-by: Jagadish Krishnamoorthy &lt;jagadish.krishnamoorthy@amd.com&gt;

---------

Signed-off-by: Jagadish Krishnamoorthy &lt;jagadish.krishnamoorthy@amd.com&gt;
(cherry picked from commit 00a0d8b3ff035b560c320b082ac3e0158e4ee1c4)
diff --git a/torch/cuda/__init__.py b/torch/cuda/__init__.py
@@ -217,9 +217,12 @@ def _check_bf16_tensor_supported(device: _device_t):
 
 def is_tf32_supported() -> bool:
     r"""Return a bool indicating if the current CUDA/ROCm device supports dtype tf32."""
-    # Check for ROCm.  If true, return false, since PyTorch does not currently support
-    # tf32 on ROCm.
     if torch.version.hip:
+        prop_name = torch.cuda.get_device_properties().gcnArchName
+        archs = ("gfx94", "gfx95")
+        for arch in archs:
+            if arch in prop_name:
+                return True
         return False
 
     # Otherwise, tf32 is supported on CUDA platforms that natively (i.e. no emulation)
diff --git a/torch/testing/_internal/common_cuda.py b/torch/testing/_internal/common_cuda.py
@@ -145,6 +145,9 @@ def tf32_off():
 
 @contextlib.contextmanager
 def tf32_on(self, tf32_precision=1e-5):
+    if torch.version.hip:
+        hip_allow_tf32 = os.environ.get("HIPBLASLT_ALLOW_TF32", None)
+        os.environ["HIPBLASLT_ALLOW_TF32"] = "1"
     old_allow_tf32_matmul = torch.backends.cuda.matmul.allow_tf32
     old_precision = self.precision
     try:
@@ -153,6 +156,11 @@ def tf32_on(self, tf32_precision=1e-5):
         with torch.backends.cudnn.flags(enabled=None, benchmark=None, deterministic=None, allow_tf32=True):
             yield
     finally:
+        if torch.version.hip:
+            if hip_allow_tf32 is not None:
+                os.environ["HIPBLASLT_ALLOW_TF32"] = hip_allow_tf32
+            else:
+                del os.environ["HIPBLASLT_ALLOW_TF32"]
         torch.backends.cuda.matmul.allow_tf32 = old_allow_tf32_matmul
         self.precision = old_precision