ROCm · iotamudelta · Aug 13, 2018 · Jul 9, 2018 · Jul 17, 2018 · Jul 18, 2018
diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
@@ -50,6 +50,8 @@ FILE(GLOB cuda_cpp "cuda/*.cpp" "cuda/detail/*.cpp")
 FILE(GLOB cuda_cu "cuda/*.cu" "cuda/detail/*.cu")
 FILE(GLOB cudnn_h "cudnn/*.h" "cudnn/*.cuh")
 FILE(GLOB cudnn_cpp "cudnn/*.cpp")
+FILE(GLOB miopen_h "miopen/*.h")
+FILE(GLOB miopen_cpp "miopen/*.cpp")
 FILE(GLOB mkl_cpp "mkl/*.cpp")
 FILE(GLOB mkldnn_cpp "mkldnn/*.cpp")
 
@@ -58,6 +60,7 @@ FILE(GLOB native_sparse_cpp "native/sparse/*.cpp")
 FILE(GLOB native_sparse_cuda_cu "native/sparse/cuda/*.cu")
 FILE(GLOB native_sparse_cuda_cpp "native/sparse/cuda/*.cpp")
 FILE(GLOB native_cudnn_cpp "native/cudnn/*.cpp")
+FILE(GLOB native_miopen_cpp "native/miopen/*.cpp")
 FILE(GLOB native_cuda_cu "native/cuda/*.cu")
 FILE(GLOB native_cuda_cpp "native/cuda/*.cpp")
 FILE(GLOB native_mkl_cpp "native/mkl/*.cpp")
@@ -74,9 +77,14 @@ endif()
 IF(USE_CUDA OR USE_ROCM)
   list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/cuda)
   set(ATen_CUDA_SRCS ${ATen_CUDA_SRCS} ${cuda_cu} ${native_cuda_cu} ${native_sparse_cuda_cu})
-  set(all_cuda_cpp ${native_cudnn_cpp} ${native_sparse_cuda_cpp} ${cuda_cpp} ${native_cuda_cpp} ${cuda_generated_cpp} ${ATen_CUDA_SRCS})
-  IF(CUDNN_FOUND)
-    SET(all_cuda_cpp ${all_cuda_cpp} ${cudnn_cpp})
+  set(all_cuda_cpp ${native_sparse_cuda_cpp} ${cuda_cpp} ${native_cuda_cpp} ${cuda_generated_cpp} ${ATen_CUDA_SRCS})
+  IF(USE_CUDA)
+    SET(all_cuda_cpp ${native_cudnn_cpp} ${native_miopen_cpp} ${all_cuda_cpp})
+    IF(CUDNN_FOUND)
+      SET(all_cuda_cpp ${all_cuda_cpp} ${cudnn_cpp})
+    ENDIF()
+  ELSEIF(USE_ROCM)
+    SET(all_cuda_cpp ${native_cudnn_cpp} ${native_miopen_cpp} ${miopen_cpp} ${all_cuda_cpp})
   ENDIF()
 endif()
 

diff --git a/aten/src/ATen/cuda/CUDAConfig.h.in b/aten/src/ATen/cuda/CUDAConfig.h.in
@@ -5,3 +5,4 @@
 //    c.f. https://stackoverflow.com/questions/33759787/generating-an-error-if-checked-boolean-macro-is-not-defined
 
 #define AT_CUDNN_ENABLED() @AT_CUDNN_ENABLED@
+#define AT_MIOPEN_ENABLED() @AT_MIOPEN_ENABLED@
diff --git a/aten/src/ATen/cuda/detail/CUDAHooks.cpp b/aten/src/ATen/cuda/detail/CUDAHooks.cpp
@@ -119,6 +119,10 @@ bool CUDAHooks::compiledWithCuDNN() const {
   return AT_CUDNN_ENABLED();
 }
 
+bool CUDAHooks::compiledWithMIOpen() const {
+  return AT_MIOPEN_ENABLED();
+}
+
 bool CUDAHooks::supportsDilatedConvolutionWithCuDNN() const {
 #if AT_CUDNN_ENABLED()
   cudaDeviceProp* prop =

diff --git a/aten/src/ATen/cuda/detail/CUDAHooks.h b/aten/src/ATen/cuda/detail/CUDAHooks.h
@@ -18,6 +18,7 @@ struct CUDAHooks : public at::CUDAHooksInterface {
   Allocator* getPinnedMemoryAllocator() const override;
   void registerCUDATypes(Context*) const override;
   bool compiledWithCuDNN() const override;
+  bool compiledWithMIOpen() const override;
   bool supportsDilatedConvolutionWithCuDNN() const override;
   long versionCuDNN() const override;
   double batchnormMinEpsilonCuDNN() const override;

diff --git a/aten/src/ATen/cudnn/Descriptors.h b/aten/src/ATen/cudnn/Descriptors.h
@@ -339,7 +339,7 @@ union Constant
   double d;
   Constant(cudnnDataType_t dataType, double value) {
     if (dataType == CUDNN_DATA_HALF || dataType == CUDNN_DATA_FLOAT) {
-      f = (float) value;
+      f = static_cast<float>(value);
     } else {
       d = value;
     }

diff --git a/aten/src/ATen/cudnn/Handles.cpp → aten/src/ATen/cudnn/Handle.cpp b/aten/src/ATen/cudnn/Handles.cpp → aten/src/ATen/cudnn/Handle.cpp
@@ -1,4 +1,4 @@
-#include "Handles.h"
+#include "Handle.h"
 
 #include "ATen/cuda/Exceptions.h"
 

diff --git a/aten/src/ATen/cudnn/Handles.h → aten/src/ATen/cudnn/Handle.h b/aten/src/ATen/cudnn/Handles.h → aten/src/ATen/cudnn/Handle.h
diff --git a/aten/src/ATen/cudnn/Utils.h b/aten/src/ATen/cudnn/Utils.h
@@ -4,7 +4,7 @@
 #include "ATen/cuda/Exceptions.h"
 #include "THC/THC.h"
 #include "cudnn-wrapper.h"
-#include "Handles.h"
+#include "Handle.h"
 
 namespace at { namespace native {
 

diff --git a/aten/src/ATen/detail/CUDAHooksInterface.h b/aten/src/ATen/detail/CUDAHooksInterface.h
@@ -85,6 +85,10 @@ struct AT_API CUDAHooksInterface {
     return false;
   }
 
+  virtual bool compiledWithMIOpen() const {
+    return false;
+  }
+
   virtual bool supportsDilatedConvolutionWithCuDNN() const {
     return false;
   }

diff --git a/aten/src/ATen/miopen/Descriptors.cpp b/aten/src/ATen/miopen/Descriptors.cpp
@@ -0,0 +1,116 @@
+#include "Descriptors.h"
+#include <ATen/ATen.h>
+
+namespace at { namespace native {
+
+namespace {
+
+inline miopenDataType_t getDataType(const at::Type& t) {
+  auto scalar_type = t.scalarType();
+  if (scalar_type == at::kFloat) {
+    return miopenFloat;
+  } else if (scalar_type == at::kHalf) {
+    return miopenHalf;
+  }
+  throw std::runtime_error("TensorDescriptor only supports float and half tensors");
+}
+
+inline miopenDataType_t getDataType(const at::Tensor& t) {
+  return getDataType(t.type());
+}
+
+} // anonymous namespace
+
+
+void TensorDescriptor::set(const at::Tensor &t, size_t pad) {
+  set(getDataType(t), t.sizes(), t.strides(), pad);
+}
+
+static int MIOPEN_DIM_MAX = 4;
+
+void TensorDescriptor::set(miopenDataType_t datatype, IntList t_sizes, IntList t_strides, size_t pad) {
+  size_t dim = t_sizes.size();
+  if (dim > MIOPEN_DIM_MAX || pad > MIOPEN_DIM_MAX)
+#define _STR(X) #X
+#define STR(X) _STR(X)
+    throw std::runtime_error("MIOpen supports only up to " STR(MIOPEN_DIM_MAX) " dimensions");
+#undef _STR
+#undef STR
+  int size[MIOPEN_DIM_MAX];
+  int stride[MIOPEN_DIM_MAX];
+  for (size_t i = 0; i < dim; ++i) {
+    size[i] = static_cast<int>(t_sizes[i]);
+    stride[i] = static_cast<int>(t_strides[i]);
+  }
+  for (size_t i = dim; i < pad; ++i) {
+    size[i] = 1;
+    stride[i] = 1;
+  }
+  set(datatype, static_cast<int>(std::max(dim, pad)), size, stride);
+}
+
+std::string miopenTypeToString(miopenDataType_t dtype) {
+  switch (dtype) {
+    case miopenFloat:
+      return "miopenFloat";
+    case miopenHalf:
+      return "miopenHalf";
+    default:
+      std::ostringstream oss;
+      oss << "(unknown data-type " << static_cast<int>(dtype) << ")";
+      return oss.str();
+  }
+}
+
+std::ostream& operator<<(std::ostream & out, const TensorDescriptor& d) {
+  out << "TensorDescriptor " << static_cast<void*>(d.desc()) << "\n";
+  int nbDims = 4;
+  int dimA[MIOPEN_DIM_MAX];
+  int strideA[MIOPEN_DIM_MAX];
+  miopenDataType_t dtype;
+  miopenGetTensorDescriptor(d.desc(), &dtype, dimA, strideA);
+  out << "    type = " << miopenTypeToString(dtype) << "\n";
+  out << "    nbDims = " << nbDims << "\n";
+  // Read out only nbDims of the arrays!
+  out << "    dimA = ";
+  for (auto i : ArrayRef<int>{dimA, static_cast<size_t>(nbDims)}) {
+    out << i << ", ";
+  }
+  out << "\n";
+  out << "    strideA = ";
+  for (auto i : ArrayRef<int>{strideA, static_cast<size_t>(nbDims)}) {
+    out << i << ", ";
+  }
+  out << "\n";
+  return out;
+}
+
+void TensorDescriptor::print() { std::cout << *this; }
+
+void FilterDescriptor::set(const at::Tensor &t, int64_t pad) {
+  auto dim = t.ndimension();
+  if (dim > MIOPEN_DIM_MAX || pad > MIOPEN_DIM_MAX)
+#define _STR(X) #X
+#define STR(X) _STR(X)
+    throw std::runtime_error("MIOpen supports only up to " STR(MIOPEN_DIM_MAX) " dimensions");
+#undef _STR
+#undef STR
+  if (!t.is_contiguous()) {
+    throw std::runtime_error("MIOpen filters (a.k.a. weights) must be contiguous");
+  }
+  int size[MIOPEN_DIM_MAX];
+  int stride[MIOPEN_DIM_MAX];
+  for (int i = 0; i < dim; ++i) {
+    size[i] = (int) t.size(i);
+  }
+  for (int i = dim; i < pad; ++i) {
+    size[i] = (int) 1;
+  }
+  for (int i = dim - 1; i >=0; --i) {
+    stride[i] = (i == dim - 1) ? 1 : stride[i+1] * size[i+1];
+  }
+  dim = std::max(dim, pad);
+  set(getDataType(t), (int) dim, size, stride);
+}
+
+}}
diff --git a/aten/src/ATen/miopen/Descriptors.h b/aten/src/ATen/miopen/Descriptors.h
@@ -0,0 +1,144 @@
+#pragma once
+
+#include "Exceptions.h"
+
+#include "miopen-wrapper.h"
+#include <ATen/ATen.h>
+#include <ATen/TensorUtils.h>
+
+namespace at { namespace native {
+
+inline int dataSize(miopenDataType_t dataType)
+{
+  switch (dataType) {
+    case miopenHalf: return 2;
+    case miopenFloat: return 4;
+    default: return 8;
+  }
+}
+
+// This function modifies 'stride' in place so that the stride for
+// dim i is the product of the sizes of dims i+1 to the end.
+static inline void fixSizeOneDimStride(int dim, const int *size, int *stride) {
+  int64_t z = 1;
+  for(int d = dim-1; d >= 0; d--)
+  {
+    if (size[d] == 1) {
+      stride[d] = z;
+    } else {
+      z *= size[d];
+    }
+  }
+}
+
+template <typename T, miopenStatus_t (*dtor)(T*)>
+struct DescriptorDeleter {
+  void operator()(T* x) {
+    if (x != nullptr) {
+      MIOPEN_CHECK(dtor(x));
+    }
+  }
+};
+
+// A generic class for wrapping MIOpen descriptor types.  All you need
+// is to give the underlying type the Descriptor_t points to (usually,
+// if it's miopenTensorDescriptor_t it points to miopenTensorStruct),
+// the constructor and the destructor.  Subclasses are responsible
+// for defining a set() function to actually set the descriptor.
+//
+// Descriptors default construct to a nullptr, and have a descriptor
+// initialized the first time you call set() or any other initializing
+// function.
+template <typename T, miopenStatus_t (*ctor)(T**), miopenStatus_t (*dtor)(T*)>
+class Descriptor
+{
+public:
+  // Use desc() to access the underlying descriptor pointer in
+  // a read-only fashion.  Most client code should use this.
+  // If the descriptor was never initialized, this will return
+  // nullptr.
+  T* desc() const { return desc_.get(); }
+  T* desc() { return desc_.get(); }
+
+  // Use mut_desc() to access the underlying desciptor pointer
+  // if you intend to modify what it points to (e.g., using
+  // miopenSetFooDescriptor).  This will ensure that the descriptor
+  // is initialized.  Code in this file will use this function.
+  T* mut_desc() { init(); return desc_.get(); }
+protected:
+  void init() {
+    if (desc_ == nullptr) {
+      T* raw_desc;
+      MIOPEN_CHECK(ctor(&raw_desc));
+      desc_.reset(raw_desc);
+    }
+  }
+private:
+  std::unique_ptr<T, DescriptorDeleter<T, dtor>> desc_;
+};
+
+class TensorDescriptor
+  : public Descriptor<miopenTensorDescriptor,
+                      &miopenCreateTensorDescriptor,
+                      &miopenDestroyTensorDescriptor>
+{
+public:
+  TensorDescriptor() {}
+  explicit TensorDescriptor(const at::Tensor &t, size_t pad = 0) {
+    set(t, pad);
+  }
+
+  void set(const at::Tensor &t, size_t pad = 0);
+  void set(miopenDataType_t dataType, IntList sizes, IntList strides, size_t pad = 0);
+
+  void print();
+
+private:
+  void set(miopenDataType_t dataType, int dim, int* size, int* stride) {
+    fixSizeOneDimStride(dim, size, stride);
+    MIOPEN_CHECK(miopenSetTensorDescriptor(mut_desc(), dataType, dim, size, stride));
+  }
+};
+
+std::ostream& operator<<(std::ostream & out, const TensorDescriptor& d);
+
+class FilterDescriptor
+  : public Descriptor<miopenTensorDescriptor,
+                      &miopenCreateTensorDescriptor,
+                      &miopenDestroyTensorDescriptor>
+{
+public:
+  void set(const at::Tensor &t, int64_t pad = 0);
+
+private:
+  void set(miopenDataType_t dataType, int dim, int* size, int* stride) {
+    MIOPEN_CHECK(miopenSetTensorDescriptor(mut_desc(), dataType, dim, size, stride));
+  }
+};
+
+struct ConvolutionDescriptor
+  : public Descriptor<miopenConvolutionDescriptor,
+                      &miopenCreateConvolutionDescriptor,
+                      &miopenDestroyConvolutionDescriptor>
+{
+  void set(miopenDataType_t dataType, int dim, int* pad, int* stride, int * upscale /* aka dilation */, int groups) {
+    miopenDataType_t mathType = dataType;
+    if (dataType == miopenHalf) mathType = miopenFloat;
+    MIOPEN_CHECK(miopenInitConvolutionDescriptor(mut_desc(), miopenConvolution, *pad, *pad, *stride, *stride, 1, 1));
+  }
+};
+
+union Constant
+{
+  float f;
+  double d;
+  Constant(miopenDataType_t dataType, double value) {
+    if (dataType == miopenHalf || dataType == miopenFloat) {
+      f = static_cast<float>(value);
+    } else {
+      d = value;
+    }
+  }
+};
+
+}}  // namespace
diff --git a/aten/src/ATen/miopen/Exceptions.h b/aten/src/ATen/miopen/Exceptions.h
@@ -0,0 +1,43 @@
+#pragma once
+
+#include "miopen-wrapper.h"
+#include <string>
+#include <stdexcept>
+#include <sstream>
+
+struct THCState;
+
+namespace at { namespace native {
+
+class miopen_exception : public std::runtime_error {
+public:
+  miopenStatus_t status;
+  miopen_exception(miopenStatus_t status, const char* msg)
+      : std::runtime_error(msg)
+      , status(status) {}
+  miopen_exception(miopenStatus_t status, const std::string& msg)
+      : std::runtime_error(msg)
+      , status(status) {}
+};
+
+inline void MIOPEN_CHECK(miopenStatus_t status)
+{
+  if (status != miopenStatusSuccess) {
+    if (status == miopenStatusNotImplemented) {
+        throw miopen_exception(status, std::string(miopenGetErrorString(status)) +
+                ". This error may appear if you passed in a non-contiguous input.");
+    }
+    throw miopen_exception(status, miopenGetErrorString(status));
+  }
+}
+
+inline void HIP_CHECK(hipError_t error)
+{
+  if (error != hipSuccess) {
+    std::string msg("HIP error: ");
+    msg += hipGetErrorString(error);
+    throw std::runtime_error(msg);
+  }
+}
+
+}} // namespace at::native
Original file line number	Diff line number	Diff line change
Expand Up		@@ -5,3 +5,4 @@
		// c.f. https://stackoverflow.com/questions/33759787/generating-an-error-if-checked-boolean-macro-is-not-defined

		#define AT_CUDNN_ENABLED() @AT_CUDNN_ENABLED@
		#define AT_MIOPEN_ENABLED() @AT_MIOPEN_ENABLED@