diff --git a/aten/src/ATen/cuda/CUDAContext.cpp b/aten/src/ATen/cuda/CUDAContext.cpp index 58248acfe1795..0a4649d9c41ad 100644 --- a/aten/src/ATen/cuda/CUDAContext.cpp +++ b/aten/src/ATen/cuda/CUDAContext.cpp @@ -54,15 +54,13 @@ Allocator* getCUDADeviceAllocator() { } /* Handles */ -#ifndef __HIP_PLATFORM_HCC__ - cusparseHandle_t getCurrentCUDASparseHandle() { - return THCState_getCurrentSparseHandle(at::globalContext().getTHCState()); - } +cusparseHandle_t getCurrentCUDASparseHandle() { + return THCState_getCurrentSparseHandle(at::globalContext().getTHCState()); +} - cublasHandle_t getCurrentCUDABlasHandle() { - return THCState_getCurrentBlasHandle(at::globalContext().getTHCState()); - } -#endif +cublasHandle_t getCurrentCUDABlasHandle() { + return THCState_getCurrentBlasHandle(at::globalContext().getTHCState()); +} } // namespace cuda diff --git a/aten/src/ATen/cuda/CUDAContext.h b/aten/src/ATen/cuda/CUDAContext.h index 83a890da4d535..3a480d2ca4e4e 100644 --- a/aten/src/ATen/cuda/CUDAContext.h +++ b/aten/src/ATen/cuda/CUDAContext.h @@ -59,10 +59,8 @@ CAFFE2_API void uncheckedSetCurrentCUDAStream(CUDAStream stream); CAFFE2_API Allocator* getCUDADeviceAllocator(); /* Handles */ -#ifndef __HIP_PLATFORM_HCC__ CAFFE2_API cusparseHandle_t getCurrentCUDASparseHandle(); CAFFE2_API cublasHandle_t getCurrentCUDABlasHandle(); -#endif } // namespace cuda diff --git a/test/test_autograd.py b/test/test_autograd.py index f9ccfb6c958e9..0642e87399c67 100644 --- a/test/test_autograd.py +++ b/test/test_autograd.py @@ -1406,6 +1406,7 @@ def test_unused_output(self): expected_grad[:2] = grad_output self.assertEqual(x.grad.data, expected_grad) + @skipIfRocm def test_ctc_loss(self): batch_size = 64 num_labels = 101 diff --git a/test/test_cuda.py b/test/test_cuda.py index cdf8d46ce236c..2c647b08cbd60 100644 --- a/test/test_cuda.py +++ b/test/test_cuda.py @@ -268,11 +268,11 @@ def tmp(t): ('div', small_3d, lambda t: [number(3.14, 3, t)], '', types, False, "skipIfRocm:ByteTensor,CharTensor,FloatTensor,HalfTensor,ShortTensor"), ('div', small_3d, lambda t: [small_3d_positive(t)], 'tensor'), - ('pow', small_3d, lambda t: [number(3.14, 3, t)], None, float_types, False, "skipIfRocm:HalfTensor"), - ('pow', small_3d, lambda t: [number(1., 1, t)], 'pow1', types, False, "skipIfRocm:HalfTensor"), - ('pow', small_3d, lambda t: [number(2., 2, t)], 'pow2', types, False, "skipIfRocm:HalfTensor"), - ('pow', small_3d, lambda t: [number(3., 3, t)], 'pow3', types, False, "skipIfRocm:HalfTensor"), - ('pow', small_3d, lambda t: [number(-1., -1, t)], 'pow-1', float_types, False, "skipIfRocm:HalfTensor"), + ('pow', small_3d, lambda t: [number(3.14, 3, t)], None, float_types), + ('pow', small_3d, lambda t: [number(1., 1, t)], 'pow1'), + ('pow', small_3d, lambda t: [number(2., 2, t)], 'pow2'), + ('pow', small_3d, lambda t: [number(3., 3, t)], 'pow3'), + ('pow', small_3d, lambda t: [number(-1., -1, t)], 'pow-1', float_types), # HalfTensor gives bad result at pow-2 with data sampled from torch.randn ('pow', small_3d, lambda t: [number(-2., -2, t)], 'pow-2', float_types_no_half, False, "skipIfRocm:HalfTensor,FloatTensor"), diff --git a/test/test_dataloader.py b/test/test_dataloader.py index 020486c1fbda3..3d9af20c85965 100644 --- a/test/test_dataloader.py +++ b/test/test_dataloader.py @@ -371,6 +371,7 @@ def test_segfault(self): finally: p.terminate() + @skipIfRocm def test_timeout(self): p = ErrorTrackingProcess(target=_test_timeout) p.start() diff --git a/test/test_jit.py b/test/test_jit.py index 22e7a5f69b467..b2a83e00bcc46 100644 --- a/test/test_jit.py +++ b/test/test_jit.py @@ -7243,6 +7243,7 @@ def test_dcgan_models(self): self._test_dcgan_models(self, device='cpu') @unittest.skipIf(not RUN_CUDA, "no CUDA") + @skipIfRocm def test_dcgan_models_cuda(self): # XXX: export_import on CUDA modules doesn't work (#11480) self._test_dcgan_models(self, device='cuda', check_export_import=False) @@ -7365,11 +7366,13 @@ def test_mnist(self): self._test_mnist(self, device='cpu') @unittest.skipIf(not RUN_CUDA, "no CUDA") + @skipIfRocm def test_mnist_cuda(self): # XXX: export_import on CUDA modules doesn't work (#11480) self._test_mnist(self, device='cuda', check_export_import=False) @unittest.skipIf(not RUN_CUDA, "no CUDA") + @skipIfRocm def test_mnist_training_leaks_no_memory_cuda(self): net = MnistNet().cuda() # MnistNet uses dropout, don't check its trace diff --git a/test/test_nn.py b/test/test_nn.py index 0d61d72f3ceb6..eee4e3a7c7475 100644 --- a/test/test_nn.py +++ b/test/test_nn.py @@ -4202,6 +4202,7 @@ def get_inputs(input_shape, hidden_shape, mode): test(input_shape, hidden_shape, mode) @unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported") + @skipIfRocm def test_rnn_check_device(self): input_size = 3 hidden_size = 5 diff --git a/test/test_sparse.py b/test/test_sparse.py index 0e91dca37d4c3..f95d7256c4042 100644 --- a/test/test_sparse.py +++ b/test/test_sparse.py @@ -1033,6 +1033,7 @@ def _all_narrow_combs(self, shape): for length in range(dim_sz - start): yield [dim, start, length] + @skipIfRocm def test_narrow(self): shape = [3, 3, 4, 2] input, _, _ = self._gen_sparse(4, 19, shape) @@ -1437,6 +1438,7 @@ def test_tensor(indices, values, indices_equal, values_equal): test_tensor(indices, values, False, True) # An empty tensor's data_ptr is always equal to 0 @cpu_only # just run once, we test both cpu and cuda + @skipIfRocm def test_constructor_device_legacy(self): i = torch.tensor([[0, 1, 1], [2, 0, 2]]) v = torch.tensor([3., 4., 5.]) @@ -1583,6 +1585,7 @@ def test_resize(self): self._test_resize_shape([1, 1], [1, 2, 3], [2, 2, 3], [1, 1], [1, 2, 0], [2, 2, 0]) + @skipIfRocm def test_is_nonzero(self): self.assertTrue(torch.sparse_coo_tensor(([0],), 1., (1,)).is_nonzero()) self.assertFalse(torch.sparse_coo_tensor(([0],), 0., (1,)).is_nonzero()) diff --git a/test/test_torch.py b/test/test_torch.py index 84ef8a22e050b..3026548b99043 100644 --- a/test/test_torch.py +++ b/test/test_torch.py @@ -3999,6 +3999,7 @@ def test_is_signed_cuda(self): self.assertEqual(torch.cuda.HalfTensor(10).is_signed(), True) @skipIfNoLapack + @skipIfRocm def test_gesv(self): a = torch.Tensor(((6.80, -2.11, 5.66, 5.97, 8.23), (-6.05, -3.30, 5.36, -4.44, 1.08), @@ -4130,6 +4131,7 @@ def test_gesv_batched_dims(self): self._test_gesv_batched_dims(self, lambda t: t) @skipIfNoLapack + @skipIfRocm def test_qr(self): # Since the QR decomposition is unique only up to the signs of the rows of @@ -4312,10 +4314,12 @@ def _test_trtrs(self, cast): self.assertEqual(res1, tb, 0) @skipIfNoLapack + @skipIfRocm def test_trtrs(self): self._test_trtrs(self, lambda t: t) @skipIfNoLapack + @skipIfRocm def test_gels(self): def _test_underdetermined(a, b, expectedNorm): m = a.size()[0] @@ -4431,6 +4435,7 @@ def check_norm(a, b, expected_norm, gels_result): self.assertEqual((torch.mm(a, tb) - b).norm(), expectedNorm, 1e-8) @skipIfNoLapack + @skipIfRocm def test_eig(self): a = torch.Tensor(((1.96, 0.00, 0.00, 0.00, 0.00), (-6.49, 3.80, 0.00, 0.00, 0.00),