Skip to content

Add support for MIOpen #22

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Aug 13, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions aten/src/ATen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ FILE(GLOB cuda_cpp "cuda/*.cpp" "cuda/detail/*.cpp")
FILE(GLOB cuda_cu "cuda/*.cu" "cuda/detail/*.cu")
FILE(GLOB cudnn_h "cudnn/*.h" "cudnn/*.cuh")
FILE(GLOB cudnn_cpp "cudnn/*.cpp")
FILE(GLOB miopen_h "miopen/*.h")
FILE(GLOB miopen_cpp "miopen/*.cpp")
FILE(GLOB mkl_cpp "mkl/*.cpp")
FILE(GLOB mkldnn_cpp "mkldnn/*.cpp")

Expand All @@ -58,6 +60,7 @@ FILE(GLOB native_sparse_cpp "native/sparse/*.cpp")
FILE(GLOB native_sparse_cuda_cu "native/sparse/cuda/*.cu")
FILE(GLOB native_sparse_cuda_cpp "native/sparse/cuda/*.cpp")
FILE(GLOB native_cudnn_cpp "native/cudnn/*.cpp")
FILE(GLOB native_miopen_cpp "native/miopen/*.cpp")
FILE(GLOB native_cuda_cu "native/cuda/*.cu")
FILE(GLOB native_cuda_cpp "native/cuda/*.cpp")
FILE(GLOB native_mkl_cpp "native/mkl/*.cpp")
Expand All @@ -74,9 +77,14 @@ endif()
IF(USE_CUDA OR USE_ROCM)
list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/cuda)
set(ATen_CUDA_SRCS ${ATen_CUDA_SRCS} ${cuda_cu} ${native_cuda_cu} ${native_sparse_cuda_cu})
set(all_cuda_cpp ${native_cudnn_cpp} ${native_sparse_cuda_cpp} ${cuda_cpp} ${native_cuda_cpp} ${cuda_generated_cpp} ${ATen_CUDA_SRCS})
IF(CUDNN_FOUND)
SET(all_cuda_cpp ${all_cuda_cpp} ${cudnn_cpp})
set(all_cuda_cpp ${native_sparse_cuda_cpp} ${cuda_cpp} ${native_cuda_cpp} ${cuda_generated_cpp} ${ATen_CUDA_SRCS})
IF(USE_CUDA)
SET(all_cuda_cpp ${native_cudnn_cpp} ${native_miopen_cpp} ${all_cuda_cpp})
IF(CUDNN_FOUND)
SET(all_cuda_cpp ${all_cuda_cpp} ${cudnn_cpp})
ENDIF()
ELSEIF(USE_ROCM)
SET(all_cuda_cpp ${native_cudnn_cpp} ${native_miopen_cpp} ${miopen_cpp} ${all_cuda_cpp})
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, miopen will be mandatory, and not optional?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes - given how MIOpen is part of the regular, free ROCm releases and in the radeon repo, it is easy enough for users to install that we don't think this is a burden while providing better performance on our hardware than the default kernels.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If miopen is mandatory, why do you need a AT_MIOPEN_ENABLED macro?

ENDIF()
endif()

Expand Down
1 change: 1 addition & 0 deletions aten/src/ATen/cuda/CUDAConfig.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
// c.f. https://stackoverflow.com/questions/33759787/generating-an-error-if-checked-boolean-macro-is-not-defined

#define AT_CUDNN_ENABLED() @AT_CUDNN_ENABLED@
#define AT_MIOPEN_ENABLED() @AT_MIOPEN_ENABLED@
4 changes: 4 additions & 0 deletions aten/src/ATen/cuda/detail/CUDAHooks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,10 @@ bool CUDAHooks::compiledWithCuDNN() const {
return AT_CUDNN_ENABLED();
}

bool CUDAHooks::compiledWithMIOpen() const {
return AT_MIOPEN_ENABLED();
}

bool CUDAHooks::supportsDilatedConvolutionWithCuDNN() const {
#if AT_CUDNN_ENABLED()
cudaDeviceProp* prop =
Expand Down
1 change: 1 addition & 0 deletions aten/src/ATen/cuda/detail/CUDAHooks.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ struct CUDAHooks : public at::CUDAHooksInterface {
Allocator* getPinnedMemoryAllocator() const override;
void registerCUDATypes(Context*) const override;
bool compiledWithCuDNN() const override;
bool compiledWithMIOpen() const override;
bool supportsDilatedConvolutionWithCuDNN() const override;
long versionCuDNN() const override;
double batchnormMinEpsilonCuDNN() const override;
Expand Down
2 changes: 1 addition & 1 deletion aten/src/ATen/cudnn/Descriptors.h
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ union Constant
double d;
Constant(cudnnDataType_t dataType, double value) {
if (dataType == CUDNN_DATA_HALF || dataType == CUDNN_DATA_FLOAT) {
f = (float) value;
f = static_cast<float>(value);
} else {
d = value;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#include "Handles.h"
#include "Handle.h"
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, ugh, this is my fault. When I made a comment about naming this Handle.h, not Handles.h, I had forgotten that the existing cuDNN code called the header Handles.h. Can you readd a cudnn/Handles.h header which just is #include <ATen/cudnn/Handle.h> for BC? (I know there are external users of the handles API.)


#include "ATen/cuda/Exceptions.h"

Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion aten/src/ATen/cudnn/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include "ATen/cuda/Exceptions.h"
#include "THC/THC.h"
#include "cudnn-wrapper.h"
#include "Handles.h"
#include "Handle.h"

namespace at { namespace native {

Expand Down
4 changes: 4 additions & 0 deletions aten/src/ATen/detail/CUDAHooksInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ struct AT_API CUDAHooksInterface {
return false;
}

virtual bool compiledWithMIOpen() const {
return false;
}

virtual bool supportsDilatedConvolutionWithCuDNN() const {
return false;
}
Expand Down
116 changes: 116 additions & 0 deletions aten/src/ATen/miopen/Descriptors.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#include "Descriptors.h"
#include <ATen/ATen.h>

namespace at { namespace native {

namespace {

inline miopenDataType_t getDataType(const at::Type& t) {
auto scalar_type = t.scalarType();
if (scalar_type == at::kFloat) {
return miopenFloat;
} else if (scalar_type == at::kHalf) {
return miopenHalf;
}
throw std::runtime_error("TensorDescriptor only supports float and half tensors");
}

inline miopenDataType_t getDataType(const at::Tensor& t) {
return getDataType(t.type());
}

} // anonymous namespace


void TensorDescriptor::set(const at::Tensor &t, size_t pad) {
set(getDataType(t), t.sizes(), t.strides(), pad);
}

static int MIOPEN_DIM_MAX = 4;

void TensorDescriptor::set(miopenDataType_t datatype, IntList t_sizes, IntList t_strides, size_t pad) {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All of this padding nonsense is to get around some weirdness in the cuDNN API where you have to expand out the dimension of tensors in some cases; the API won't broadcast them for you. Is this still true for MIOpen?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is almost certainly needed. Leaving it around.

size_t dim = t_sizes.size();
if (dim > MIOPEN_DIM_MAX || pad > MIOPEN_DIM_MAX)
#define _STR(X) #X
#define STR(X) _STR(X)
throw std::runtime_error("MIOpen supports only up to " STR(MIOPEN_DIM_MAX) " dimensions");
#undef _STR
#undef STR
int size[MIOPEN_DIM_MAX];
int stride[MIOPEN_DIM_MAX];
for (size_t i = 0; i < dim; ++i) {
size[i] = static_cast<int>(t_sizes[i]);
stride[i] = static_cast<int>(t_strides[i]);
}
for (size_t i = dim; i < pad; ++i) {
size[i] = 1;
stride[i] = 1;
}
set(datatype, static_cast<int>(std::max(dim, pad)), size, stride);
}

std::string miopenTypeToString(miopenDataType_t dtype) {
switch (dtype) {
case miopenFloat:
return "miopenFloat";
case miopenHalf:
return "miopenHalf";
default:
std::ostringstream oss;
oss << "(unknown data-type " << static_cast<int>(dtype) << ")";
return oss.str();
}
}

std::ostream& operator<<(std::ostream & out, const TensorDescriptor& d) {
out << "TensorDescriptor " << static_cast<void*>(d.desc()) << "\n";
int nbDims = 4;
int dimA[MIOPEN_DIM_MAX];
int strideA[MIOPEN_DIM_MAX];
miopenDataType_t dtype;
miopenGetTensorDescriptor(d.desc(), &dtype, dimA, strideA);
out << " type = " << miopenTypeToString(dtype) << "\n";
out << " nbDims = " << nbDims << "\n";
// Read out only nbDims of the arrays!
out << " dimA = ";
for (auto i : ArrayRef<int>{dimA, static_cast<size_t>(nbDims)}) {
out << i << ", ";
}
out << "\n";
out << " strideA = ";
for (auto i : ArrayRef<int>{strideA, static_cast<size_t>(nbDims)}) {
out << i << ", ";
}
out << "\n";
return out;
}

void TensorDescriptor::print() { std::cout << *this; }

void FilterDescriptor::set(const at::Tensor &t, int64_t pad) {
auto dim = t.ndimension();
if (dim > MIOPEN_DIM_MAX || pad > MIOPEN_DIM_MAX)
#define _STR(X) #X
#define STR(X) _STR(X)
throw std::runtime_error("MIOpen supports only up to " STR(MIOPEN_DIM_MAX) " dimensions");
#undef _STR
#undef STR
if (!t.is_contiguous()) {
throw std::runtime_error("MIOpen filters (a.k.a. weights) must be contiguous");
}
int size[MIOPEN_DIM_MAX];
int stride[MIOPEN_DIM_MAX];
for (int i = 0; i < dim; ++i) {
size[i] = (int) t.size(i);
}
for (int i = dim; i < pad; ++i) {
size[i] = (int) 1;
}
for (int i = dim - 1; i >=0; --i) {
stride[i] = (i == dim - 1) ? 1 : stride[i+1] * size[i+1];
}
dim = std::max(dim, pad);
set(getDataType(t), (int) dim, size, stride);
}

}}
144 changes: 144 additions & 0 deletions aten/src/ATen/miopen/Descriptors.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#pragma once

#include "Exceptions.h"

#include "miopen-wrapper.h"
#include <ATen/ATen.h>
#include <ATen/TensorUtils.h>

namespace at { namespace native {

inline int dataSize(miopenDataType_t dataType)
{
switch (dataType) {
case miopenHalf: return 2;
case miopenFloat: return 4;
default: return 8;
}
}

// This function modifies 'stride' in place so that the stride for
// dim i is the product of the sizes of dims i+1 to the end.
static inline void fixSizeOneDimStride(int dim, const int *size, int *stride) {
int64_t z = 1;
for(int d = dim-1; d >= 0; d--)
{
if (size[d] == 1) {
stride[d] = z;
} else {
z *= size[d];
}
}
}

template <typename T, miopenStatus_t (*dtor)(T*)>
struct DescriptorDeleter {
void operator()(T* x) {
if (x != nullptr) {
MIOPEN_CHECK(dtor(x));
}
}
};

// A generic class for wrapping MIOpen descriptor types. All you need
// is to give the underlying type the Descriptor_t points to (usually,
// if it's miopenTensorDescriptor_t it points to miopenTensorStruct),
// the constructor and the destructor. Subclasses are responsible
// for defining a set() function to actually set the descriptor.
//
// Descriptors default construct to a nullptr, and have a descriptor
// initialized the first time you call set() or any other initializing
// function.
template <typename T, miopenStatus_t (*ctor)(T**), miopenStatus_t (*dtor)(T*)>
class Descriptor
{
public:
// Use desc() to access the underlying descriptor pointer in
// a read-only fashion. Most client code should use this.
// If the descriptor was never initialized, this will return
// nullptr.
T* desc() const { return desc_.get(); }
T* desc() { return desc_.get(); }

// Use mut_desc() to access the underlying desciptor pointer
// if you intend to modify what it points to (e.g., using
// miopenSetFooDescriptor). This will ensure that the descriptor
// is initialized. Code in this file will use this function.
T* mut_desc() { init(); return desc_.get(); }
protected:
void init() {
if (desc_ == nullptr) {
T* raw_desc;
MIOPEN_CHECK(ctor(&raw_desc));
desc_.reset(raw_desc);
}
}
private:
std::unique_ptr<T, DescriptorDeleter<T, dtor>> desc_;
};

class TensorDescriptor
: public Descriptor<miopenTensorDescriptor,
&miopenCreateTensorDescriptor,
&miopenDestroyTensorDescriptor>
{
public:
TensorDescriptor() {}
explicit TensorDescriptor(const at::Tensor &t, size_t pad = 0) {
set(t, pad);
}

void set(const at::Tensor &t, size_t pad = 0);
void set(miopenDataType_t dataType, IntList sizes, IntList strides, size_t pad = 0);

void print();

private:
void set(miopenDataType_t dataType, int dim, int* size, int* stride) {
fixSizeOneDimStride(dim, size, stride);
MIOPEN_CHECK(miopenSetTensorDescriptor(mut_desc(), dataType, dim, size, stride));
}
};

std::ostream& operator<<(std::ostream & out, const TensorDescriptor& d);

class FilterDescriptor
: public Descriptor<miopenTensorDescriptor,
&miopenCreateTensorDescriptor,
&miopenDestroyTensorDescriptor>
{
public:
void set(const at::Tensor &t, int64_t pad = 0);

private:
void set(miopenDataType_t dataType, int dim, int* size, int* stride) {
MIOPEN_CHECK(miopenSetTensorDescriptor(mut_desc(), dataType, dim, size, stride));
}
};

struct ConvolutionDescriptor
: public Descriptor<miopenConvolutionDescriptor,
&miopenCreateConvolutionDescriptor,
&miopenDestroyConvolutionDescriptor>
{
void set(miopenDataType_t dataType, int dim, int* pad, int* stride, int * upscale /* aka dilation */, int groups) {
miopenDataType_t mathType = dataType;
if (dataType == miopenHalf) mathType = miopenFloat;
MIOPEN_CHECK(miopenInitConvolutionDescriptor(mut_desc(), miopenConvolution, *pad, *pad, *stride, *stride, 1, 1));
}
};

union Constant
{
float f;
double d;
Constant(miopenDataType_t dataType, double value) {
if (dataType == miopenHalf || dataType == miopenFloat) {
f = static_cast<float>(value);
} else {
d = value;
}
}
};

}} // namespace
43 changes: 43 additions & 0 deletions aten/src/ATen/miopen/Exceptions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#pragma once

#include "miopen-wrapper.h"
#include <string>
#include <stdexcept>
#include <sstream>

struct THCState;

namespace at { namespace native {

class miopen_exception : public std::runtime_error {
public:
miopenStatus_t status;
miopen_exception(miopenStatus_t status, const char* msg)
: std::runtime_error(msg)
, status(status) {}
miopen_exception(miopenStatus_t status, const std::string& msg)
: std::runtime_error(msg)
, status(status) {}
};

inline void MIOPEN_CHECK(miopenStatus_t status)
{
if (status != miopenStatusSuccess) {
if (status == miopenStatusNotImplemented) {
throw miopen_exception(status, std::string(miopenGetErrorString(status)) +
". This error may appear if you passed in a non-contiguous input.");
}
throw miopen_exception(status, miopenGetErrorString(status));
}
}

inline void HIP_CHECK(hipError_t error)
{
if (error != hipSuccess) {
std::string msg("HIP error: ");
msg += hipGetErrorString(error);
throw std::runtime_error(msg);
}
}

}} // namespace at::native
Loading