ROCm
diff --git a/‎aten/src/ATen/CMakeLists.txt‎
Lines changed: 11 additions & 3 deletions b/‎aten/src/ATen/CMakeLists.txt‎
Lines changed: 11 additions & 3 deletions
diff --git a/‎aten/src/ATen/cuda/CUDAConfig.h.in‎
Lines changed: 1 addition & 0 deletions b/‎aten/src/ATen/cuda/CUDAConfig.h.in‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎aten/src/ATen/cuda/detail/CUDAHooks.cpp‎
Lines changed: 4 additions & 0 deletions b/‎aten/src/ATen/cuda/detail/CUDAHooks.cpp‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎aten/src/ATen/cuda/detail/CUDAHooks.h‎
Lines changed: 1 addition & 0 deletions b/‎aten/src/ATen/cuda/detail/CUDAHooks.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎aten/src/ATen/cudnn/Descriptors.h‎
Lines changed: 1 addition & 1 deletion b/‎aten/src/ATen/cudnn/Descriptors.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎aten/src/ATen/cudnn/Handles.cpp‎ renamed to ‎aten/src/ATen/cudnn/Handle.cpp‎
Lines changed: 1 addition & 1 deletion b/‎aten/src/ATen/cudnn/Handles.cpp‎ renamed to ‎aten/src/ATen/cudnn/Handle.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎aten/src/ATen/cudnn/Handles.h‎ renamed to ‎aten/src/ATen/cudnn/Handle.h‎ b/‎aten/src/ATen/cudnn/Handles.h‎ renamed to ‎aten/src/ATen/cudnn/Handle.h‎
diff --git a/‎aten/src/ATen/cudnn/Utils.h‎
Lines changed: 1 addition & 1 deletion b/‎aten/src/ATen/cudnn/Utils.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎aten/src/ATen/detail/CUDAHooksInterface.h‎
Lines changed: 4 additions & 0 deletions b/‎aten/src/ATen/detail/CUDAHooksInterface.h‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎aten/src/ATen/miopen/Descriptors.cpp‎
Lines changed: 116 additions & 0 deletions b/‎aten/src/ATen/miopen/Descriptors.cpp‎
Lines changed: 116 additions & 0 deletions
@@ -50,6 +50,8 @@ FILE(GLOB cuda_cpp "cuda/*.cpp" "cuda/detail/*.cpp")
 FILE(GLOB cuda_cu "cuda/*.cu" "cuda/detail/*.cu")
 FILE(GLOB cudnn_h "cudnn/*.h" "cudnn/*.cuh")
 FILE(GLOB cudnn_cpp "cudnn/*.cpp")
+FILE(GLOB miopen_h "miopen/*.h")
+FILE(GLOB miopen_cpp "miopen/*.cpp")
 FILE(GLOB mkl_cpp "mkl/*.cpp")
 FILE(GLOB mkldnn_cpp "mkldnn/*.cpp")
 
@@ -58,6 +60,7 @@ FILE(GLOB native_sparse_cpp "native/sparse/*.cpp")
 FILE(GLOB native_sparse_cuda_cu "native/sparse/cuda/*.cu")
 FILE(GLOB native_sparse_cuda_cpp "native/sparse/cuda/*.cpp")
 FILE(GLOB native_cudnn_cpp "native/cudnn/*.cpp")
+FILE(GLOB native_miopen_cpp "native/miopen/*.cpp")
 FILE(GLOB native_cuda_cu "native/cuda/*.cu")
 FILE(GLOB native_cuda_cpp "native/cuda/*.cpp")
 FILE(GLOB native_mkl_cpp "native/mkl/*.cpp")
@@ -74,9 +77,14 @@ endif()
 IF(USE_CUDA OR USE_ROCM)
   list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/cuda)
   set(ATen_CUDA_SRCS ${ATen_CUDA_SRCS} ${cuda_cu} ${native_cuda_cu} ${native_sparse_cuda_cu})
-  set(all_cuda_cpp ${native_cudnn_cpp} ${native_sparse_cuda_cpp} ${cuda_cpp} ${native_cuda_cpp} ${cuda_generated_cpp} ${ATen_CUDA_SRCS})
-  IF(CUDNN_FOUND)
-    SET(all_cuda_cpp ${all_cuda_cpp} ${cudnn_cpp})
+  set(all_cuda_cpp ${native_sparse_cuda_cpp} ${cuda_cpp} ${native_cuda_cpp} ${cuda_generated_cpp} ${ATen_CUDA_SRCS})
+  IF(USE_CUDA)
+    SET(all_cuda_cpp ${native_cudnn_cpp} ${native_miopen_cpp} ${all_cuda_cpp})
+    IF(CUDNN_FOUND)
+      SET(all_cuda_cpp ${all_cuda_cpp} ${cudnn_cpp})
+    ENDIF()
+  ELSEIF(USE_ROCM)
+    SET(all_cuda_cpp ${native_cudnn_cpp} ${native_miopen_cpp} ${miopen_cpp} ${all_cuda_cpp})
   ENDIF()
 endif()
 
 
@@ -5,3 +5,4 @@
 //    c.f. https://stackoverflow.com/questions/33759787/generating-an-error-if-checked-boolean-macro-is-not-defined
 
 #define AT_CUDNN_ENABLED() @AT_CUDNN_ENABLED@
+#define AT_MIOPEN_ENABLED() @AT_MIOPEN_ENABLED@
@@ -119,6 +119,10 @@ bool CUDAHooks::compiledWithCuDNN() const {
   return AT_CUDNN_ENABLED();
 }
 
+bool CUDAHooks::compiledWithMIOpen() const {
+  return AT_MIOPEN_ENABLED();
+}
+
 bool CUDAHooks::supportsDilatedConvolutionWithCuDNN() const {
 #if AT_CUDNN_ENABLED()
   cudaDeviceProp* prop =
 
@@ -18,6 +18,7 @@ struct CUDAHooks : public at::CUDAHooksInterface {
   Allocator* getPinnedMemoryAllocator() const override;
   void registerCUDATypes(Context*) const override;
   bool compiledWithCuDNN() const override;
+  bool compiledWithMIOpen() const override;
   bool supportsDilatedConvolutionWithCuDNN() const override;
   long versionCuDNN() const override;
   double batchnormMinEpsilonCuDNN() const override;
 
@@ -339,7 +339,7 @@ union Constant
   double d;
   Constant(cudnnDataType_t dataType, double value) {
     if (dataType == CUDNN_DATA_HALF || dataType == CUDNN_DATA_FLOAT) {
-      f = (float) value;
+      f = static_cast<float>(value);
     } else {
       d = value;
     }
 
@@ -1,4 +1,4 @@
-#include "Handles.h"
+#include "Handle.h"
 
 #include "ATen/cuda/Exceptions.h"
 
 
@@ -4,7 +4,7 @@
 #include "ATen/cuda/Exceptions.h"
 #include "THC/THC.h"
 #include "cudnn-wrapper.h"
-#include "Handles.h"
+#include "Handle.h"
 
 namespace at { namespace native {
 
 
@@ -85,6 +85,10 @@ struct AT_API CUDAHooksInterface {
     return false;
   }
 
+  virtual bool compiledWithMIOpen() const {
+    return false;
+  }
+
   virtual bool supportsDilatedConvolutionWithCuDNN() const {
     return false;
   }
 
@@ -0,0 +1,116 @@
+#include "Descriptors.h"
+#include <ATen/ATen.h>
+
+namespace at { namespace native {
+
+namespace {
+
+inline miopenDataType_t getDataType(const at::Type& t) {
+  auto scalar_type = t.scalarType();
+  if (scalar_type == at::kFloat) {
+    return miopenFloat;
+  } else if (scalar_type == at::kHalf) {
+    return miopenHalf;
+  }
+  throw std::runtime_error("TensorDescriptor only supports float and half tensors");
+}
+
+inline miopenDataType_t getDataType(const at::Tensor& t) {
+  return getDataType(t.type());
+}
+
+} // anonymous namespace
+
+
+void TensorDescriptor::set(const at::Tensor &t, size_t pad) {
+  set(getDataType(t), t.sizes(), t.strides(), pad);
+}
+
+static int MIOPEN_DIM_MAX = 4;
+
+void TensorDescriptor::set(miopenDataType_t datatype, IntList t_sizes, IntList t_strides, size_t pad) {
+  size_t dim = t_sizes.size();
+  if (dim > MIOPEN_DIM_MAX || pad > MIOPEN_DIM_MAX)
+#define _STR(X) #X
+#define STR(X) _STR(X)
+    throw std::runtime_error("MIOpen supports only up to " STR(MIOPEN_DIM_MAX) " dimensions");
+#undef _STR
+#undef STR
+  int size[MIOPEN_DIM_MAX];
+  int stride[MIOPEN_DIM_MAX];
+  for (size_t i = 0; i < dim; ++i) {
+    size[i] = static_cast<int>(t_sizes[i]);
+    stride[i] = static_cast<int>(t_strides[i]);
+  }
+  for (size_t i = dim; i < pad; ++i) {
+    size[i] = 1;
+    stride[i] = 1;
+  }
+  set(datatype, static_cast<int>(std::max(dim, pad)), size, stride);
+}
+
+std::string miopenTypeToString(miopenDataType_t dtype) {
+  switch (dtype) {
+    case miopenFloat:
+      return "miopenFloat";
+    case miopenHalf:
+      return "miopenHalf";
+    default:
+      std::ostringstream oss;
+      oss << "(unknown data-type " << static_cast<int>(dtype) << ")";
+      return oss.str();
+  }
+}
+
+std::ostream& operator<<(std::ostream & out, const TensorDescriptor& d) {
+  out << "TensorDescriptor " << static_cast<void*>(d.desc()) << "\n";
+  int nbDims = 4;
+  int dimA[MIOPEN_DIM_MAX];
+  int strideA[MIOPEN_DIM_MAX];
+  miopenDataType_t dtype;
+  miopenGetTensorDescriptor(d.desc(), &dtype, dimA, strideA);
+  out << "    type = " << miopenTypeToString(dtype) << "\n";
+  out << "    nbDims = " << nbDims << "\n";
+  // Read out only nbDims of the arrays!
+  out << "    dimA = ";
+  for (auto i : ArrayRef<int>{dimA, static_cast<size_t>(nbDims)}) {
+    out << i << ", ";
+  }
+  out << "\n";
+  out << "    strideA = ";
+  for (auto i : ArrayRef<int>{strideA, static_cast<size_t>(nbDims)}) {
+    out << i << ", ";
+  }
+  out << "\n";
+  return out;
+}
+
+void TensorDescriptor::print() { std::cout << *this; }
+
+void FilterDescriptor::set(const at::Tensor &t, int64_t pad) {
+  auto dim = t.ndimension();
+  if (dim > MIOPEN_DIM_MAX || pad > MIOPEN_DIM_MAX)
+#define _STR(X) #X
+#define STR(X) _STR(X)
+    throw std::runtime_error("MIOpen supports only up to " STR(MIOPEN_DIM_MAX) " dimensions");
+#undef _STR
+#undef STR
+  if (!t.is_contiguous()) {
+    throw std::runtime_error("MIOpen filters (a.k.a. weights) must be contiguous");
+  }
+  int size[MIOPEN_DIM_MAX];
+  int stride[MIOPEN_DIM_MAX];
+  for (int i = 0; i < dim; ++i) {
+    size[i] = (int) t.size(i);
+  }
+  for (int i = dim; i < pad; ++i) {
+    size[i] = (int) 1;
+  }
+  for (int i = dim - 1; i >=0; --i) {
+    stride[i] = (i == dim - 1) ? 1 : stride[i+1] * size[i+1];
+  }
+  dim = std::max(dim, pad);
+  set(getDataType(t), (int) dim, size, stride);
+}
+
+}}
Original file line number	Diff line number	Diff line change
`@@ -5,3 +5,4 @@`
`5`	`5`	`// c.f. https://stackoverflow.com/questions/33759787/generating-an-error-if-checked-boolean-macro-is-not-defined`
`6`	`6`
`7`	`7`	`#define AT_CUDNN_ENABLED() @AT_CUDNN_ENABLED@`
	`8`	`+#define AT_MIOPEN_ENABLED() @AT_MIOPEN_ENABLED@`
Original file line number	Diff line number	Diff line change
`@@ -339,7 +339,7 @@ union Constant`
`339`	`339`	`double d;`
`340`	`340`	`Constant(cudnnDataType_t dataType, double value) {`
`341`	`341`	`if (dataType == CUDNN_DATA_HALF \|\| dataType == CUDNN_DATA_FLOAT) {`
`342`		`- f = (float) value;`
	`342`	`+ f = static_cast<float>(value);`
`343`	`343`	`} else {`
`344`	`344`	`d = value;`
`345`	`345`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-#include "Handles.h"`
	`1`	`+#include "Handle.h"`
`2`	`2`
`3`	`3`	`#include "ATen/cuda/Exceptions.h"`
`4`	`4`
Original file line number	Diff line number	Diff line change
`@@ -85,6 +85,10 @@ struct AT_API CUDAHooksInterface {`
`85`	`85`	`return false;`
`86`	`86`	`}`
`87`	`87`
	`88`	`+ virtual bool compiledWithMIOpen() const {`
	`89`	`+ return false;`
	`90`	`+ }`
	`91`	`+`
`88`	`92`	`virtual bool supportsDilatedConvolutionWithCuDNN() const {`
`89`	`93`	`return false;`
`90`	`94`	`}`