-
Notifications
You must be signed in to change notification settings - Fork 68
Add support for MIOpen #22
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
5ee2a47
7c626af
0899ff0
64c6e0d
cf299b5
ac5d1ff
7b304f2
7afe0d8
2f62182
faa191d
78eea43
3774e54
ed19fc6
1d019fb
1f15fae
e77c732
7e3319f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
#include "Handles.h" | ||
#include "Handle.h" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, ugh, this is my fault. When I made a comment about naming this |
||
|
||
#include "ATen/cuda/Exceptions.h" | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
#include "Descriptors.h" | ||
#include <ATen/ATen.h> | ||
|
||
namespace at { namespace native { | ||
|
||
namespace { | ||
|
||
inline miopenDataType_t getDataType(const at::Type& t) { | ||
auto scalar_type = t.scalarType(); | ||
if (scalar_type == at::kFloat) { | ||
return miopenFloat; | ||
} else if (scalar_type == at::kHalf) { | ||
return miopenHalf; | ||
} | ||
throw std::runtime_error("TensorDescriptor only supports float and half tensors"); | ||
} | ||
|
||
inline miopenDataType_t getDataType(const at::Tensor& t) { | ||
return getDataType(t.type()); | ||
} | ||
|
||
} // anonymous namespace | ||
|
||
|
||
void TensorDescriptor::set(const at::Tensor &t, size_t pad) { | ||
set(getDataType(t), t.sizes(), t.strides(), pad); | ||
} | ||
|
||
static int MIOPEN_DIM_MAX = 4; | ||
|
||
void TensorDescriptor::set(miopenDataType_t datatype, IntList t_sizes, IntList t_strides, size_t pad) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All of this padding nonsense is to get around some weirdness in the cuDNN API where you have to expand out the dimension of tensors in some cases; the API won't broadcast them for you. Is this still true for MIOpen? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is almost certainly needed. Leaving it around. |
||
size_t dim = t_sizes.size(); | ||
if (dim > MIOPEN_DIM_MAX || pad > MIOPEN_DIM_MAX) | ||
#define _STR(X) #X | ||
#define STR(X) _STR(X) | ||
throw std::runtime_error("MIOpen supports only up to " STR(MIOPEN_DIM_MAX) " dimensions"); | ||
#undef _STR | ||
#undef STR | ||
int size[MIOPEN_DIM_MAX]; | ||
int stride[MIOPEN_DIM_MAX]; | ||
for (size_t i = 0; i < dim; ++i) { | ||
size[i] = static_cast<int>(t_sizes[i]); | ||
stride[i] = static_cast<int>(t_strides[i]); | ||
} | ||
for (size_t i = dim; i < pad; ++i) { | ||
size[i] = 1; | ||
stride[i] = 1; | ||
} | ||
set(datatype, static_cast<int>(std::max(dim, pad)), size, stride); | ||
} | ||
|
||
std::string miopenTypeToString(miopenDataType_t dtype) { | ||
switch (dtype) { | ||
case miopenFloat: | ||
return "miopenFloat"; | ||
case miopenHalf: | ||
return "miopenHalf"; | ||
default: | ||
std::ostringstream oss; | ||
oss << "(unknown data-type " << static_cast<int>(dtype) << ")"; | ||
return oss.str(); | ||
} | ||
} | ||
|
||
std::ostream& operator<<(std::ostream & out, const TensorDescriptor& d) { | ||
out << "TensorDescriptor " << static_cast<void*>(d.desc()) << "\n"; | ||
int nbDims = 4; | ||
int dimA[MIOPEN_DIM_MAX]; | ||
int strideA[MIOPEN_DIM_MAX]; | ||
miopenDataType_t dtype; | ||
miopenGetTensorDescriptor(d.desc(), &dtype, dimA, strideA); | ||
out << " type = " << miopenTypeToString(dtype) << "\n"; | ||
out << " nbDims = " << nbDims << "\n"; | ||
// Read out only nbDims of the arrays! | ||
out << " dimA = "; | ||
for (auto i : ArrayRef<int>{dimA, static_cast<size_t>(nbDims)}) { | ||
out << i << ", "; | ||
} | ||
out << "\n"; | ||
out << " strideA = "; | ||
for (auto i : ArrayRef<int>{strideA, static_cast<size_t>(nbDims)}) { | ||
out << i << ", "; | ||
} | ||
out << "\n"; | ||
return out; | ||
} | ||
|
||
void TensorDescriptor::print() { std::cout << *this; } | ||
|
||
void FilterDescriptor::set(const at::Tensor &t, int64_t pad) { | ||
auto dim = t.ndimension(); | ||
if (dim > MIOPEN_DIM_MAX || pad > MIOPEN_DIM_MAX) | ||
#define _STR(X) #X | ||
#define STR(X) _STR(X) | ||
throw std::runtime_error("MIOpen supports only up to " STR(MIOPEN_DIM_MAX) " dimensions"); | ||
#undef _STR | ||
#undef STR | ||
if (!t.is_contiguous()) { | ||
throw std::runtime_error("MIOpen filters (a.k.a. weights) must be contiguous"); | ||
} | ||
int size[MIOPEN_DIM_MAX]; | ||
int stride[MIOPEN_DIM_MAX]; | ||
for (int i = 0; i < dim; ++i) { | ||
size[i] = (int) t.size(i); | ||
} | ||
for (int i = dim; i < pad; ++i) { | ||
size[i] = (int) 1; | ||
} | ||
for (int i = dim - 1; i >=0; --i) { | ||
stride[i] = (i == dim - 1) ? 1 : stride[i+1] * size[i+1]; | ||
} | ||
dim = std::max(dim, pad); | ||
set(getDataType(t), (int) dim, size, stride); | ||
} | ||
|
||
}} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
#pragma once | ||
|
||
#include "Exceptions.h" | ||
|
||
#include "miopen-wrapper.h" | ||
#include <ATen/ATen.h> | ||
#include <ATen/TensorUtils.h> | ||
|
||
namespace at { namespace native { | ||
|
||
inline int dataSize(miopenDataType_t dataType) | ||
{ | ||
switch (dataType) { | ||
case miopenHalf: return 2; | ||
case miopenFloat: return 4; | ||
default: return 8; | ||
} | ||
} | ||
|
||
// This function modifies 'stride' in place so that the stride for | ||
// dim i is the product of the sizes of dims i+1 to the end. | ||
static inline void fixSizeOneDimStride(int dim, const int *size, int *stride) { | ||
int64_t z = 1; | ||
for(int d = dim-1; d >= 0; d--) | ||
{ | ||
if (size[d] == 1) { | ||
stride[d] = z; | ||
} else { | ||
z *= size[d]; | ||
} | ||
} | ||
} | ||
|
||
template <typename T, miopenStatus_t (*dtor)(T*)> | ||
struct DescriptorDeleter { | ||
void operator()(T* x) { | ||
if (x != nullptr) { | ||
MIOPEN_CHECK(dtor(x)); | ||
} | ||
} | ||
}; | ||
|
||
// A generic class for wrapping MIOpen descriptor types. All you need | ||
// is to give the underlying type the Descriptor_t points to (usually, | ||
// if it's miopenTensorDescriptor_t it points to miopenTensorStruct), | ||
// the constructor and the destructor. Subclasses are responsible | ||
// for defining a set() function to actually set the descriptor. | ||
// | ||
// Descriptors default construct to a nullptr, and have a descriptor | ||
// initialized the first time you call set() or any other initializing | ||
// function. | ||
template <typename T, miopenStatus_t (*ctor)(T**), miopenStatus_t (*dtor)(T*)> | ||
class Descriptor | ||
{ | ||
public: | ||
// Use desc() to access the underlying descriptor pointer in | ||
// a read-only fashion. Most client code should use this. | ||
// If the descriptor was never initialized, this will return | ||
// nullptr. | ||
T* desc() const { return desc_.get(); } | ||
T* desc() { return desc_.get(); } | ||
|
||
// Use mut_desc() to access the underlying desciptor pointer | ||
// if you intend to modify what it points to (e.g., using | ||
// miopenSetFooDescriptor). This will ensure that the descriptor | ||
// is initialized. Code in this file will use this function. | ||
T* mut_desc() { init(); return desc_.get(); } | ||
protected: | ||
void init() { | ||
if (desc_ == nullptr) { | ||
T* raw_desc; | ||
MIOPEN_CHECK(ctor(&raw_desc)); | ||
desc_.reset(raw_desc); | ||
} | ||
} | ||
private: | ||
std::unique_ptr<T, DescriptorDeleter<T, dtor>> desc_; | ||
}; | ||
|
||
class TensorDescriptor | ||
: public Descriptor<miopenTensorDescriptor, | ||
&miopenCreateTensorDescriptor, | ||
&miopenDestroyTensorDescriptor> | ||
{ | ||
public: | ||
TensorDescriptor() {} | ||
explicit TensorDescriptor(const at::Tensor &t, size_t pad = 0) { | ||
set(t, pad); | ||
} | ||
|
||
void set(const at::Tensor &t, size_t pad = 0); | ||
void set(miopenDataType_t dataType, IntList sizes, IntList strides, size_t pad = 0); | ||
|
||
void print(); | ||
|
||
private: | ||
void set(miopenDataType_t dataType, int dim, int* size, int* stride) { | ||
fixSizeOneDimStride(dim, size, stride); | ||
MIOPEN_CHECK(miopenSetTensorDescriptor(mut_desc(), dataType, dim, size, stride)); | ||
} | ||
}; | ||
|
||
std::ostream& operator<<(std::ostream & out, const TensorDescriptor& d); | ||
|
||
class FilterDescriptor | ||
: public Descriptor<miopenTensorDescriptor, | ||
&miopenCreateTensorDescriptor, | ||
&miopenDestroyTensorDescriptor> | ||
{ | ||
public: | ||
void set(const at::Tensor &t, int64_t pad = 0); | ||
|
||
private: | ||
void set(miopenDataType_t dataType, int dim, int* size, int* stride) { | ||
MIOPEN_CHECK(miopenSetTensorDescriptor(mut_desc(), dataType, dim, size, stride)); | ||
} | ||
}; | ||
|
||
struct ConvolutionDescriptor | ||
: public Descriptor<miopenConvolutionDescriptor, | ||
&miopenCreateConvolutionDescriptor, | ||
&miopenDestroyConvolutionDescriptor> | ||
{ | ||
void set(miopenDataType_t dataType, int dim, int* pad, int* stride, int * upscale /* aka dilation */, int groups) { | ||
miopenDataType_t mathType = dataType; | ||
if (dataType == miopenHalf) mathType = miopenFloat; | ||
MIOPEN_CHECK(miopenInitConvolutionDescriptor(mut_desc(), miopenConvolution, *pad, *pad, *stride, *stride, 1, 1)); | ||
} | ||
}; | ||
|
||
union Constant | ||
{ | ||
float f; | ||
double d; | ||
Constant(miopenDataType_t dataType, double value) { | ||
if (dataType == miopenHalf || dataType == miopenFloat) { | ||
f = static_cast<float>(value); | ||
} else { | ||
d = value; | ||
} | ||
} | ||
}; | ||
|
||
}} // namespace |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
#pragma once | ||
|
||
#include "miopen-wrapper.h" | ||
#include <string> | ||
#include <stdexcept> | ||
#include <sstream> | ||
|
||
struct THCState; | ||
|
||
namespace at { namespace native { | ||
|
||
class miopen_exception : public std::runtime_error { | ||
public: | ||
miopenStatus_t status; | ||
miopen_exception(miopenStatus_t status, const char* msg) | ||
: std::runtime_error(msg) | ||
, status(status) {} | ||
miopen_exception(miopenStatus_t status, const std::string& msg) | ||
: std::runtime_error(msg) | ||
, status(status) {} | ||
}; | ||
|
||
inline void MIOPEN_CHECK(miopenStatus_t status) | ||
{ | ||
if (status != miopenStatusSuccess) { | ||
if (status == miopenStatusNotImplemented) { | ||
throw miopen_exception(status, std::string(miopenGetErrorString(status)) + | ||
". This error may appear if you passed in a non-contiguous input."); | ||
} | ||
throw miopen_exception(status, miopenGetErrorString(status)); | ||
} | ||
} | ||
|
||
inline void HIP_CHECK(hipError_t error) | ||
{ | ||
if (error != hipSuccess) { | ||
std::string msg("HIP error: "); | ||
msg += hipGetErrorString(error); | ||
throw std::runtime_error(msg); | ||
} | ||
} | ||
|
||
}} // namespace at::native |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So, miopen will be mandatory, and not optional?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes - given how MIOpen is part of the regular, free ROCm releases and in the radeon repo, it is easy enough for users to install that we don't think this is a burden while providing better performance on our hardware than the default kernels.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If miopen is mandatory, why do you need a
AT_MIOPEN_ENABLED
macro?