Enable sparse functionality on ROCm (#241)

iotamudelta · web-flow · commit f8aa318e0ea5 · 2018-10-04T08:35:27.000-05:00
* Enable sparse functions for ROCm * Reenable test_sparse unit tests that are now passing in ROCm (#208) * Reenable test_sparse unit tests that are now passing * It's a flaky test for us - skip.
diff --git a/aten/src/ATen/native/sparse/cuda/SparseCUDABlas.cu b/aten/src/ATen/native/sparse/cuda/SparseCUDABlas.cu
@@ -9,7 +9,6 @@
 
 namespace at { namespace native { namespace sparse { namespace cuda {
 
-#ifndef __HIP_PLATFORM_HCC__
 
 std::string cusparseGetErrorString(cusparseStatus_t status) {
   switch(status)
@@ -224,6 +223,5 @@ void XcoosortByRow(int64_t m, int64_t n, int64_t nnz, int *cooRows, int *cooCols
   CUSPARSE_CHECK(cusparseXcoosortByRow(handle, i_m, i_n, i_nnz, cooRows, cooCols, P, pBuffer));
 }
 
-#endif
 
 }}}} // namespace at::native::sparse::cuda
diff --git a/aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cu b/aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cu
@@ -25,7 +25,6 @@
 namespace at { namespace native {
 
 SparseTensor coalesce_sparse_cuda(const SparseTensor& self) {
-#ifndef __HIP_PLATFORM_HCC__
   int64_t nnz = self._nnz();
   if (self.is_coalesced()) {
     return self;
@@ -151,9 +150,6 @@ SparseTensor coalesce_sparse_cuda(const SparseTensor& self) {
 
   THCudaCheck(cudaGetLastError());
   return dst;
-#else
-  AT_ERROR("coalesce_sparse_cuda: HIP not supported");
-#endif
 }
 
 }} // namespace at::native
diff --git a/aten/src/ATen/native/sparse/cuda/SparseCUDATensorMath.cu b/aten/src/ATen/native/sparse/cuda/SparseCUDATensorMath.cu
@@ -22,7 +22,6 @@ namespace at { namespace native {
 // Utility functions
 // --------------------------------------------------------------------
 
-#ifndef __HIP_PLATFORM_HCC__
 namespace {
   IntTensor _to_csr_int(const LongTensor& rowIndices, int64_t dim, int64_t nnz) {
     IntTensor csr = at::empty({dim+1}, CUDA(kInt));
@@ -32,7 +31,6 @@ namespace {
     return csr;
   }
 }
-#endif
 
 // NB: Deleted spaddcmul (aka addcmul_, but not actually wired up), spaddcdiv (not
 // wired at all)
@@ -42,7 +40,6 @@ namespace {
 // --------------------------------------------------------------------
 
 Tensor& s_addmm_out_sparse_dense_cuda(Tensor& r_, const Tensor& t, const SparseTensor& sparse_, const Tensor& dense, Scalar beta, Scalar alpha) {
-#ifndef __HIP_PLATFORM_HCC__
   AT_ASSERT(t.is_cuda()); // dispatch argument
   AT_CHECK(r_.is_cuda(), "addmm: expected 'out' to be CUDA, but got CPU");
   AT_CHECK(sparse_.is_cuda(), "addmm: expected 'mat1' to be CUDA, but got CPU");
@@ -142,9 +139,6 @@ Tensor& s_addmm_out_sparse_dense_cuda(Tensor& r_, const Tensor& t, const SparseT
 
   r_.copy_(r__);
   return r_;
-#else
-  AT_ERROR("s_addmm_out_sparse_dense_cuda: HIP not supported");
-#endif
 }
 
 Tensor s_addmm_sparse_dense_cuda(
@@ -176,7 +170,6 @@ Tensor& s_addmm_sparse_dense_cuda_(
 // --------------------------------------------------------------------
 
 SparseTensor& hspmm_out_sparse_cuda(SparseTensor& r_, const SparseTensor& sparse_, const Tensor& dense/* , Scalar alpha */) {
-#ifndef __HIP_PLATFORM_HCC__
   AT_ASSERT(sparse_.is_cuda()); // dispatch argument
   AT_CHECK(r_.is_cuda(), "hspmm: expected 'out' to be CUDA, but got CPU");
   AT_CHECK(dense.is_cuda(), "hspmm: expected 'mat2' to be CUDA, but got CPU");
@@ -232,9 +225,6 @@ SparseTensor& hspmm_out_sparse_cuda(SparseTensor& r_, const SparseTensor& sparse
   _get_sparse_impl(r_)->set_indices_and_values_unsafe(indices, values);
 
   return r_;
-#else
-  AT_ERROR("hspmm_out_sparse_cuda: HIP not supported");
-#endif
 }
 
 SparseTensor hspmm_sparse_cuda(const SparseTensor& sparse, const Tensor& dense) {
@@ -249,7 +239,6 @@ SparseTensor hspmm_sparse_cuda(const SparseTensor& sparse, const Tensor& dense)
 // --------------------------------------------------------------------
 
 Tensor& add_out_dense_sparse_cuda(Tensor& r_, const Tensor& dense, SparseTensorRef sparse_, at::Scalar value) {
-#ifndef __HIP_PLATFORM_HCC__
   const SparseTensor& sparse = sparse_.tref;
 
   AT_ASSERT(dense.is_cuda()); // dispatch argument
@@ -344,17 +333,13 @@ Tensor& add_out_dense_sparse_cuda(Tensor& r_, const Tensor& dense, SparseTensorR
   THCudaCheck(cudaGetLastError());
 
   return r_;
-#else
-  AT_ERROR("add_out_dense_sparse_cuda: HIP not supported");
-#endif
 }
 
 // --------------------------------------------------------------------
 // add(SparseTensor, SparseTensor, Scalar)  [broadcasts]
 // --------------------------------------------------------------------
 
 SparseTensor& add_out_sparse_cuda(SparseTensor& r_, const SparseTensor& t, const SparseTensor& src, Scalar value) {
-#ifndef __HIP_PLATFORM_HCC__
   AT_ASSERT(t.is_cuda()); // dispatch argument
   AT_CHECK(src.is_cuda(), "add: expected 'other' to be CUDA, but got CPU");
   AT_CHECK(r_.is_cuda(), "add: expected 'out' to be CUDA, but got CPU");
@@ -401,17 +386,13 @@ SparseTensor& add_out_sparse_cuda(SparseTensor& r_, const SparseTensor& t, const
   //   }
 
   return r_;
-#else
-  AT_ERROR("s_add_out_sparse_cuda: HIP not supported");
-#endif
 }
 
 // --------------------------------------------------------------------
 // mul(SparseTensor, SparseTensor)  [broadcasts]
 // --------------------------------------------------------------------
 
 SparseTensor& mul_out_sparse_cuda(SparseTensor& r_, const SparseTensor& t_, const SparseTensor& src_) {
-#ifndef __HIP_PLATFORM_HCC__
   if (src_.dim() == 0) {
     return mul_out_sparse_zerodim(r_, t_, src_);
   } else if (t_.dim() == 0) {
@@ -480,9 +461,6 @@ SparseTensor& mul_out_sparse_cuda(SparseTensor& r_, const SparseTensor& t_, cons
   _get_sparse_impl(r_)->set_coalesced(true);
 
   return r_;
-#else
-  AT_ERROR("mul_out_sparse_cuda: HIP not supported");
-#endif
 }
 
 }} // namespace at::native
diff --git a/test/test_sparse.py b/test/test_sparse.py
@@ -479,7 +479,6 @@ def test_tensor(x, exp_i, exp_v):
         exp_v = self.ValueTensor(2, 3, 0)
         test_tensor(x, exp_i, exp_v)
 
-    @skipIfRocm
     def test_clone(self):
         def test_shape(sparse_dims, nnz, with_size):
             x = self._gen_sparse(sparse_dims, nnz, with_size)[0]
@@ -824,7 +823,6 @@ def test_spadd_hybrid(self):
         self._test_spadd_shape(0, [50, 30, 0], [2, 0])
         self._test_spadd_shape(10, [50, 30, 20], [2, 0])
 
-    @skipIfRocm
     def test_norm(self):
         def test_shape(sparse_dims, nnz, with_size):
             x, _, _ = self._gen_sparse(sparse_dims, nnz, with_size)
@@ -924,7 +922,6 @@ def test_basic_ops_hybrid(self):
         self._test_basic_ops_shape(0, 0, [10, 10, 10], [2, 0])
         self._test_basic_ops_shape(0, 0, [10, 10, 0], [2, 0])
 
-    @skipIfRocm
     def test_add_dense_sparse_mismatch(self):
         def test_shape(dense_size, sparse_dims_shape, dense_dims_shape, sparse_size):
             x = torch.zeros(dense_size, dtype=self.value_dtype, device=self.device)
@@ -1198,7 +1195,6 @@ def test_storage_not_null(self):
 
     @cuda_only
     @unittest.skipIf(torch.cuda.device_count() < 2, "only one GPU detected")
-    @skipIfRocm
     def test_same_gpu(self):
         def check_device(x, device_id):
             self.assertEqual(x.get_device(), device_id)
@@ -1308,7 +1304,6 @@ def test_factory(self):
                                 self.assertEqual(device, sparse_tensor._values().device)
                             self.assertEqual(True, sparse_tensor.requires_grad)
 
-    @skipIfRocm
     def test_factory_size_check(self):
         indices = self.IndexTensor([[1, 2],
                                     [0, 2]])
@@ -1374,7 +1369,6 @@ def test_factory_empty_indices(self):
         expected_indices = torch.empty((4, 0), dtype=torch.long, device=device)
         self.assertEqual(tensor._indices(), expected_indices)
 
-    @skipIfRocm
     def test_factory_nnz(self):
         indices = self.IndexTensor([[0]])  # (sparseDims, nnz): (1, 1)
         values = self.ValueTensor([[1, 1], [1, 1]])  # (nnz, ...): (2, 2)
@@ -1408,7 +1402,6 @@ def test_shape(i_shape, v_shape, size, expected_size):
         test_shape([3, 0], [0, 2, 4, 0], [0, 0, 0, 2, 4, 0], [0, 0, 0, 2, 4, 0])
         test_shape([3, 0], [0, 2, 4, 0], [1, 2, 3, 2, 4, 0], [1, 2, 3, 2, 4, 0])
 
-    @skipIfRocm
     def test_factory_dense_dims(self):
         indices = self.IndexTensor([[0]])
         values = self.ValueTensor([[[1, 1, 1], [1, 1, 1]]])
@@ -1439,7 +1432,6 @@ def test_factory_type_inference(self):
         self.assertEqual(torch.int64, t.dtype)
 
     @cuda_only
-    @skipIfRocm
     def test_factory_device_type_inference(self):
         # both indices/values are CUDA
         shape = (1, 3)
@@ -1552,7 +1544,6 @@ def test_empty_full(self):
             TestTorch._test_empty_full(self, all_sparse_dtypes, torch.sparse_coo, None)
             TestTorch._test_empty_full(self, all_sparse_dtypes, torch.sparse_coo, torch.device('cuda:0'))
 
-    @skipIfRocm
     def test_is_sparse(self):
         x = torch.randn(3, 3)
         self.assertFalse(x.is_sparse)
@@ -1602,7 +1593,6 @@ def _test_resize_shape(self, x_i, x_v, x_size, y_i, y_v, y_size):
         self.assertEqual(x.to_dense().view(-1)[0:x_v_numel].view(x_v),
                          x_dense.view(-1)[0:x_v_numel].view(x_v))
 
-    @skipIfRocm
     def test_resize(self):
         # 1. Expand the size of some dense dimensions [Supported]
         self._test_resize_shape([1, 1], [1, 2, 3], [2, 2, 3],
@@ -1693,7 +1683,6 @@ def setUp(self):
 
 class TestSparseOneOff(TestCase):
     @unittest.skipIf(not TEST_CUDA, 'CUDA not available')
-    @skipIfRocm
     def test_cuda_from_cpu(self):
         with self.assertRaisesRegex(
                 RuntimeError,
@@ -1717,7 +1706,6 @@ def test_cuda_from_cpu(self):
                                      [0, 4, 4, 0])
 
     @unittest.skipIf(not TEST_CUDA, 'CUDA not available')
-    @skipIfRocm
     def test_cuda_sparse_cpu_dense_add(self):
         x = torch.zeros(3, 4, 4)
         sparse_y = torch.cuda.sparse.FloatTensor(torch.zeros(1, 4).long().cuda(),
diff --git a/tools/amd_build/pyHIPIFY/cuda_to_hip_mappings.py b/tools/amd_build/pyHIPIFY/cuda_to_hip_mappings.py
@@ -2174,6 +2174,18 @@
     ("cusparseOperation_t", ("hipsparseOperation_t", CONV_TYPE, API_SPARSE)),
     ("cusparseCreate", ("hipsparseCreate", CONV_MATH_FUNC, API_SPARSE)),
     ("cusparseDestroy", ("hipsparseDestroy", CONV_MATH_FUNC, API_SPARSE)),
+    ("cusparseXcoo2csr", ("hipsparseXcoo2csr", CONV_MATH_FUNC, API_SPARSE)),
+    ("cusparseMatDescr_t", ("hipsparseMatDescr_t", CONV_MATH_FUNC, API_SPARSE)),
+    ("cusparseCreateMatDescr", ("hipsparseCreateMatDescr", CONV_MATH_FUNC, API_SPARSE)),
+    ("cusparseScsrmm2", ("hipsparseScsrmm2", CONV_MATH_FUNC, API_SPARSE)),
+    ("cusparseDcsrmm2", ("hipsparseDcsrmm2", CONV_MATH_FUNC, API_SPARSE)),
+    ("cusparseXcsrsort_bufferSizeExt", ("hipsparseXcsrsort_bufferSizeExt", CONV_MATH_FUNC, API_SPARSE)),
+    ("cusparseXcsrsort", ("hipsparseXcsrsort", CONV_MATH_FUNC, API_SPARSE)),
+    ("cusparseXcoosort_bufferSizeExt", ("hipsparseXcoosort_bufferSizeExt", CONV_MATH_FUNC, API_SPARSE)),
+    ("cusparseXcoosortByRow", ("hipsparseXcoosortByRow", CONV_MATH_FUNC, API_SPARSE)),
+    ("cusparseSetStream", ("hipsparseSetStream", CONV_MATH_FUNC, API_SPARSE)),
+    ("cusparseCreateIdentityPermutation", ("hipsparseCreateIdentityPermutation", CONV_MATH_FUNC, API_SPARSE)),
+    ("cusparseSetMatIndexBase", ("hipsparseSetMatIndexBase", CONV_MATH_FUNC, API_SPARSE)),
     ("CUSPARSE_STATUS_SUCCESS", ("HIPSPARSE_STATUS_SUCCESS", CONV_NUMERIC_LITERAL, API_SPARSE)),
     ("CUSPARSE_STATUS_NOT_INITIALIZED", ("HIPSPARSE_STATUS_NOT_INITIALIZED", CONV_NUMERIC_LITERAL, API_SPARSE)),
     ("CUSPARSE_STATUS_ALLOC_FAILED", ("HIPSPARSE_STATUS_ALLOC_FAILED", CONV_NUMERIC_LITERAL, API_SPARSE)),
@@ -2183,6 +2195,12 @@
     ("CUSPARSE_STATUS_INTERNAL_ERROR", ("HIPSPARSE_STATUS_INTERNAL_ERROR", CONV_NUMERIC_LITERAL, API_SPARSE)),
     ("CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED", ("HIPSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED", CONV_NUMERIC_LITERAL, API_SPARSE)),
     ("CUSPARSE_STATUS_ARCH_MISMATCH", ("HIPSPARSE_STATUS_ARCH_MISMATCH", CONV_NUMERIC_LITERAL, API_SPARSE)),
+    ("CUSPARSE_STATUS_ZERO_PIVOT", ("HIPSPARSE_STATUS_ZERO_PIVOT", CONV_NUMERIC_LITERAL, API_SPARSE)),
+    ("CUSPARSE_OPERATION_TRANSPOSE", ("HIPSPARSE_OPERATION_TRANSPOSE", CONV_NUMERIC_LITERAL, API_SPARSE)),
+    ("CUSPARSE_OPERATION_NON_TRANSPOSE", ("HIPSPARSE_OPERATION_NON_TRANSPOSE", CONV_NUMERIC_LITERAL, API_SPARSE)),
+    ("CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE", ("HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE", CONV_NUMERIC_LITERAL, API_SPARSE)),
+    ("CUSPARSE_INDEX_BASE_ZERO", ("HIPSPARSE_INDEX_BASE_ZERO", CONV_NUMERIC_LITERAL, API_SPARSE)),
+    ("CUSPARSE_INDEX_BASE_ONE", ("HIPSPARSE_INDEX_BASE_ONE", CONV_NUMERIC_LITERAL, API_SPARSE)),
 ])
 
 PYTORCH_SPECIFIC_MAPPINGS = collections.OrderedDict([

Original file line number	Diff line number	Diff line change
`@@ -9,7 +9,6 @@`
`9`	`9`
`10`	`10`	`namespace at { namespace native { namespace sparse { namespace cuda {`
`11`	`11`
`12`		`-#ifndef __HIP_PLATFORM_HCC__`
`13`	`12`
`14`	`13`	`std::string cusparseGetErrorString(cusparseStatus_t status) {`
`15`	`14`	`switch(status)`
`@@ -224,6 +223,5 @@ void XcoosortByRow(int64_t m, int64_t n, int64_t nnz, int cooRows, int cooCols`
`224`	`223`	`CUSPARSE_CHECK(cusparseXcoosortByRow(handle, i_m, i_n, i_nnz, cooRows, cooCols, P, pBuffer));`
`225`	`224`	`}`
`226`	`225`
`227`		`-#endif`
`228`	`226`
`229`	`227`	`}}}} // namespace at::native::sparse::cuda`