ROCm
diff --git a/‎.gitignore
Lines changed: 3 additions & 0 deletions b/‎.gitignore
Lines changed: 3 additions & 0 deletions
diff --git a/‎aten/src/ATen/Allocator.h
Lines changed: 1 addition & 1 deletion b/‎aten/src/ATen/Allocator.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎aten/src/ATen/Backend.h
Lines changed: 77 additions & 5 deletions b/‎aten/src/ATen/Backend.h
Lines changed: 77 additions & 5 deletions
diff --git a/‎aten/src/ATen/Context.cpp
Lines changed: 1 addition & 1 deletion b/‎aten/src/ATen/Context.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎aten/src/ATen/Context.h
Lines changed: 14 additions & 9 deletions b/‎aten/src/ATen/Context.h
Lines changed: 14 additions & 9 deletions
diff --git a/‎aten/src/ATen/Device.h
Lines changed: 2 additions & 21 deletions b/‎aten/src/ATen/Device.h
Lines changed: 2 additions & 21 deletions
diff --git a/‎aten/src/ATen/Formatting.cpp
Lines changed: 1 addition & 1 deletion b/‎aten/src/ATen/Formatting.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎aten/src/ATen/TensorOptions.h
Lines changed: 10 additions & 4 deletions b/‎aten/src/ATen/TensorOptions.h
Lines changed: 10 additions & 4 deletions
diff --git a/‎aten/src/ATen/core/Macros.h
Lines changed: 12 additions & 20 deletions b/‎aten/src/ATen/core/Macros.h
Lines changed: 12 additions & 20 deletions
diff --git a/‎aten/src/ATen/core/typeid.h
Lines changed: 33 additions & 6 deletions b/‎aten/src/ATen/core/typeid.h
Lines changed: 33 additions & 6 deletions
@@ -25,6 +25,9 @@ aten/src/ATen/cuda/CUDAConfig.h
 build/
 dist/
 docs/src/**/*
+docs/cpp/xml/
+docs/cpp/html/
+docs/cpp/api/
 test/.coverage
 test/cpp/api/mnist
 test/data/gpu_tensors.pt
 
@@ -23,7 +23,7 @@ class DataPtr {
 public:
   // Choice of CPU here is arbitrary; if there's an "undefined" device
   // we could use that too
-  DataPtr() : ptr_(), device_(kCPU) {}
+  DataPtr() : ptr_(), device_(DeviceType::CPU) {}
   DataPtr(void* data, Device device)
     : ptr_(data), device_(device) {}
   DataPtr(void* data, void* ctx, DeleterFnPtr ctx_deleter, Device device)
 
@@ -3,18 +3,25 @@
 #include <ATen/core/TensorTypeId.h>
 #include <ATen/core/TensorTypeIdRegistration.h>
 #include <ATen/core/Error.h>
+#include <ATen/core/DeviceType.h>
 
 #include <stdexcept>
 
 namespace at {
 
+/**
+ * This legacy enum class defines the set of backends supported by
+ * old school, code generated Type-based ATen.  The reason we are
+ * sunsetting this enum class is because it doesn't allow for
+ * open registration of backends.  TensorTypeId is the replacement
+ * for Backend which supports open registration.
+ *
+ * ARE YOU SURE YOU WANT TO USE THIS TYPE?  Think about if SparseCPU/SparseCUDA
+ * would make sense in your use case.  If it doesn't make sense, maybe
+ * you want DeviceType.
+ */
 enum class Backend { CPU, CUDA, SparseCPU, SparseCUDA, Undefined, NumOptions };
 
-constexpr Backend kCPU = Backend::CPU;
-constexpr Backend kCUDA = Backend::CUDA;
-constexpr Backend kSparseCPU = Backend::SparseCPU;
-constexpr Backend kSparseCUDA = Backend::SparseCUDA;
-
 static inline Backend toSparse(Backend b) {
   switch (b) {
     case Backend::CPU:
@@ -78,6 +85,71 @@ static inline TensorTypeId backendToTensorTypeId(Backend b) {
   }
 }
 
+static inline DeviceType backendToDeviceType(Backend b) {
+  switch (b) {
+    case Backend::CPU:
+      return DeviceType::CPU;
+    case Backend::CUDA:
+      return DeviceType::CUDA;
+    case Backend::SparseCPU:
+      return DeviceType::CPU;
+    case Backend::SparseCUDA:
+      return DeviceType::CUDA;
+    case Backend::Undefined:
+      AT_ERROR("Undefined backend is not a valid device type");
+    default:
+      AT_ERROR("Unknown backend");
+  }
+}
+
+static inline Backend deviceTypeToBackend(DeviceType d) {
+  switch (d) {
+    case DeviceType::CPU:
+      return Backend::CPU;
+    case DeviceType::CUDA:
+      return Backend::CUDA;
+    default:
+      AT_ERROR("Unknown device type ", d);
+  }
+}
+
+static inline Backend backendToCPU(Backend b) {
+  switch (b) {
+    case Backend::CPU:
+      return Backend::CPU;
+    case Backend::CUDA:
+      return Backend::CPU;
+    case Backend::SparseCPU:
+      return Backend::SparseCPU;
+    case Backend::SparseCUDA:
+      return Backend::SparseCPU;
+    case Backend::Undefined:
+      return Backend::Undefined;
+    default:
+      AT_ERROR("Unknown backend");
+  }
+}
+
+static inline Backend backendToCUDA(Backend b) {
+  switch (b) {
+    case Backend::CPU:
+      return Backend::CUDA;
+    case Backend::CUDA:
+      return Backend::CUDA;
+    case Backend::SparseCPU:
+      return Backend::SparseCUDA;
+    case Backend::SparseCUDA:
+      return Backend::SparseCUDA;
+    case Backend::Undefined:
+      return Backend::Undefined;
+    default:
+      AT_ERROR("Unknown backend");
+  }
+}
+
+constexpr DeviceType kCPU = DeviceType::CPU;
+constexpr DeviceType kCUDA = DeviceType::CUDA;
+
 static inline const char* toString(Backend b) {
   switch (b) {
     case Backend::CPU:
 
@@ -32,7 +32,7 @@ Context::Context()
   THSetDefaultErrorHandler(errorHandler,nullptr);
   THSetDefaultArgErrorHandler(argErrorHandler,nullptr);
 
-  generator_registry[static_cast<int>(Backend::CPU)]
+  generator_registry[static_cast<int>(DeviceType::CPU)]
     .reset(new CPUGenerator(this));
   Type::registerCPU(this);
 }
 
@@ -25,7 +25,7 @@ class AT_API Context {
     return type_registry[static_cast<int>(p)][static_cast<int>(s)].get();
   }
   Type * getTypeOpt(Backend p, ScalarType s) {
-    initCUDAIfNeeded(p);
+    if (p != Backend::Undefined) initCUDAIfNeeded(backendToDeviceType(p));
     auto type = getTypeRaw(p, s);
 
     if(!type) {
@@ -42,11 +42,11 @@ class AT_API Context {
     if (!type) AT_ERROR(toString(p), toString(s), "Type is not enabled.");
     return *type;
   }
-  Generator & defaultGenerator(Backend p) {
-    initCUDAIfNeeded(p);
-    auto & generator = generator_registry[static_cast<int>(p)];
+  Generator & defaultGenerator(DeviceType device_type) {
+    initCUDAIfNeeded(device_type);
+    auto & generator = generator_registry[static_cast<int>(device_type)];
     if(!generator)
-      AT_ERROR(toString(p), " backend type not enabled.");
+      AT_ERROR(DeviceTypeName(device_type), " backend type not enabled.");
     return *generator;
   }
   bool hasMKL() const;
@@ -64,7 +64,7 @@ class AT_API Context {
   THCState* lazyInitCUDA() {
     std::call_once(thc_init,[&] {
       thc_state = detail::getCUDAHooks().initCUDA();
-      generator_registry[static_cast<int>(Backend::CUDA)] =
+      generator_registry[static_cast<int>(DeviceType::CUDA)] =
         detail::getCUDAHooks().initCUDAGenerator(this);
       detail::getCUDAHooks().registerCUDATypes(this);
     });
@@ -95,16 +95,17 @@ class AT_API Context {
   bool deterministicCuDNN() const;
   void setDeterministicCuDNN(bool);
   std::unique_ptr<Generator>
-    generator_registry[static_cast<int>(Backend::NumOptions)];
+    generator_registry[static_cast<int>(DeviceType::COMPILE_TIME_MAX_DEVICE_TYPES)];
 private:
   // NB: type_registry has nullptr for all CUDA backends until
   // CUDA initialization has occurred
   std::unique_ptr<Type> type_registry
     [static_cast<int>(Backend::NumOptions)]
     [static_cast<int>(ScalarType::NumOptions)];
-  void initCUDAIfNeeded(Backend p) {
-    if(p == Backend::CUDA)
+  void initCUDAIfNeeded(DeviceType p) {
+    if (p == DeviceType::CUDA) {
       lazyInitCUDA();
+    }
   }
   std::once_flag thc_init;
   bool enabled_cudnn = true;
@@ -132,6 +133,10 @@ static inline Type& getType(Backend p, ScalarType s) {
   return globalContext().getType(p, s);
 }
 
+static inline Type& getType(DeviceType p, ScalarType s) {
+  return globalContext().getType(deviceTypeToBackend(p), s);
+}
+
 static inline Type& CPU(ScalarType s) {
   return getType(Backend::CPU, s);
 }
 
@@ -1,9 +1,10 @@
 #pragma once
 
-#include <ATen/ScalarType.h>
+#include <ATen/ATenGeneral.h>
 #include <ATen/core/Error.h>
 #include <ATen/core/DeviceType.h>
 #include <ATen/core/Error.h>
+#include <ATen/Backend.h>
 
 #include <cstddef>
 #include <iosfwd>
@@ -24,21 +25,6 @@ namespace at {
 struct Device {
   using Type = at::DeviceType;
 
-  /// Converts a `Backend` to a `DeviceType` if possible.
-  static DeviceType backend_to_type(Backend backend) {
-    switch (backend) {
-      case kCPU:
-      case kSparseCPU:
-        return DeviceType::CPU;
-      case kCUDA:
-      case kSparseCUDA:
-        return DeviceType::CUDA;
-      default:
-        AT_ERROR(
-            "Invalid backend ", toString(backend), " for Device construction");
-    }
-  }
-
   /// Constructs a new `Device` from a `DeviceType` and an optional device
   /// index.
   /* implicit */ Device(DeviceType type, int32_t index = -1)
@@ -60,11 +46,6 @@ struct Device {
   /// `<device-index>` optionally specifies a device index.
   /* implicit */ Device(const std::string& device_string);
 
-  /// Constructs a new `Device` from a `Backend` (which is converted to a
-  /// `DeviceType`, if possible) and an optional device index.
-  /* implicit */ Device(Backend backend, int32_t index = -1)
-      : Device(backend_to_type(backend), index) {}
-
   /// Returns true if the type and index of this `Device` matches that of
   /// `other`.
   bool operator==(const Device& other) const noexcept {
 
@@ -250,7 +250,7 @@ std::ostream& print(std::ostream& stream, const Tensor & tensor_, int64_t linesi
     stream << "size:\n" << tensor_.sizes() << "\n";
     stream << "]";
   } else {
-    Type& cpudouble = tensor_.type().toBackend(kCPU).toScalarType(kDouble);
+    Type& cpudouble = tensor_.type().toBackend(Backend::CPU).toScalarType(kDouble);
     Tensor tensor = tensor_.toType(cpudouble).contiguous();
     if(tensor.ndimension() == 0) {
       stream << defaultfloat << tensor.data<double>()[0] << std::endl;
 
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <ATen/Backend.h>
 #include <ATen/Context.h>
 #include <ATen/Device.h>
 #include <ATen/DeviceGuard.h>
@@ -67,7 +68,7 @@ struct AT_API TensorOptions {
       type_ = &type;
     }
     this->dtype(type.scalarType());
-    this->device({type.backend(), device_index});
+    this->device({backendToDeviceType(type.backend()), device_index});
     this->layout(type.layout());
   }
 
@@ -84,7 +85,12 @@ struct AT_API TensorOptions {
   /// Constructs a `TensorOptions` object from a backend, forwarded to the
   /// `Device` constructor.
   /* implicit */ TensorOptions(Backend backend)
-      : TensorOptions(Device(backend)) {}
+      : TensorOptions(Device(backendToDeviceType(backend))) {}
+
+  /// Constructs a `TensorOptions` object from a device type, forwarded to the
+  /// `Device` constructor.
+  /* implicit */ TensorOptions(DeviceType device_type)
+      : TensorOptions(Device(device_type)) {}
 
   /// Constructs a `TensorOptions` object with the given dtype.
   /* implicit */ TensorOptions(ScalarType dtype) : TensorOptions() {
@@ -190,9 +196,9 @@ struct AT_API TensorOptions {
   Backend backend() const noexcept {
     Backend backend;
     if (device_.type() == Device::Type::CPU) {
-      backend = (layout_ == kStrided) ? kCPU : kSparseCPU;
+      backend = (layout_ == kStrided) ? Backend::CPU : Backend::SparseCPU;
     } else {
-      backend = (layout_ == kStrided) ? kCUDA : kSparseCUDA;
+      backend = (layout_ == kStrided) ? Backend::CUDA : Backend::SparseCUDA;
     }
     return backend;
   }
 
@@ -7,30 +7,22 @@
 // static library (in which case, saying the symbol is coming
 // from a DLL would be incorrect).
 
-#define AT_CORE_EXPORT
-#define AT_CORE_IMPORT
-
 #ifdef _WIN32
-  #ifndef AT_CORE_STATIC_WINDOWS
-    #undef AT_CORE_EXPORT
-    #undef AT_CORE_IMPORT
-    #define AT_CORE_EXPORT __declspec(dllexport)
-    #define AT_CORE_IMPORT __declspec(dllimport)
-  #endif // !defined(AT_CORE_STATIC_WINDOWS)
-#else  // _WIN32
-  #if defined(__GNUC__) || defined(__llvm__)
-    #undef AT_CORE_EXPORT
-    #undef AT_CORE_IMPORT
-    #define AT_CORE_EXPORT __attribute__((__visibility__("default")))
-    #define AT_CORE_IMPORT AT_CORE_EXPORT
-  #endif // defined(__GNUC__) || defined(__llvm__)
-#endif  // _WIN32
-
+#if !defined(AT_CORE_STATIC_WINDOWS)
+// TODO: unfiy the controlling macros.
 #if defined(CAFFE2_BUILD_MAIN_LIBS) || defined(ATen_cpu_EXPORTS) || defined(caffe2_EXPORTS)
-  #define AT_CORE_API AT_CORE_EXPORT
+#define AT_CORE_API __declspec(dllexport)
 #else // defined(CAFFE2_BUILD_MAIN_LIBS) || defined(ATen_cpu_EXPORTS) || defined(caffe2_EXPORTS)
-  #define AT_CORE_API AT_CORE_IMPORT
+#define AT_CORE_API __declspec(dllimport)
 #endif // defined(CAFFE2_BUILD_MAIN_LIBS) || defined(ATen_cpu_EXPORTS) || defined(caffe2_EXPORTS)
+#else // !defined(AT_CORE_STATIC_WINDOWS)
+#define AT_CORE_API
+#endif // !defined(AT_CORE_STATIC_WINDOWS)
+#else  // _WIN32
+#if defined(__GNUC__)
+#define AT_CORE_API __attribute__((__visibility__("default")))
+#endif // defined(__GNUC__)
+#endif  // _WIN32
 
 // Disable the copy and assignment operator for a class. Note that this will
 // disable the usage of the class in std containers.
 
@@ -391,26 +391,53 @@ inline bool operator!=(const TypeMeta& lhs, const TypeMeta& rhs) noexcept {
  *
  * NOTE: the macro needs to be invoked in ::caffe2 namespace
  */
-
+// Implementation note: in MSVC, we will need to prepend the AT_CORE_API
+// keyword in order to get things compiled properly. in Linux, gcc seems to
+// create attribute ignored error for explicit template instantiations, see
+//   http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0537r0.html
+//   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=51930
+// and as a result, we define these two macros slightly differently.
+// TODO(jiayq): AT_CORE_API below is not correct, because we may use the
+// definition in third party dependent libraries. The proper way is to use
+// CAFFE2_EXPORT (which explicitly requires dllexport). Marking this as a
+// todo item when the unified build is finished.
+#ifdef _MSC_VER
 #define CAFFE_KNOWN_TYPE(T)                                               \
   template <>                                                             \
-  AT_CORE_EXPORT TypeIdentifier TypeMeta::Id<T>() {                       \
+  AT_CORE_API TypeIdentifier TypeMeta::Id<T>() {                          \
     static const TypeIdentifier type_id = TypeIdentifier::createTypeId(); \
     static TypeNameRegisterer<T> registerer(type_id, #T);                 \
     return type_id;                                                       \
   }
+#else // _MSC_VER
+#define CAFFE_KNOWN_TYPE(T)                                               \
+  template <>                                                             \
+  TypeIdentifier TypeMeta::Id<T>() {                                      \
+    static const TypeIdentifier type_id = TypeIdentifier::createTypeId(); \
+    static TypeNameRegisterer<T> registerer(type_id, #T);                 \
+    return type_id;                                                       \
+  }
+#endif
 
 /**
  * CAFFE_DECLARE_KNOWN_TYPE and CAFFE_DEFINE_KNOWN_TYPE are used
  * to preallocate ids for types that are queried very often so that they
  * can be resolved at compile time. Please use CAFFE_KNOWN_TYPE() instead
  * for your own types to allocate dynamic ids for them.
  */
-#define CAFFE_DECLARE_KNOWN_TYPE(PreallocatedId, T)                       \
-  template <>                                                             \
-  AT_CORE_EXPORT inline AT_CORE_API TypeIdentifier TypeMeta::Id<T>() {    \
-    return TypeIdentifier(PreallocatedId);                                \
+#ifdef _MSC_VER
+#define CAFFE_DECLARE_KNOWN_TYPE(PreallocatedId, T)       \
+  template <>                                             \
+  inline AT_CORE_API TypeIdentifier TypeMeta::Id<T>() {   \
+    return TypeIdentifier(PreallocatedId);                \
   }
+#else // _MSC_VER
+#define CAFFE_DECLARE_KNOWN_TYPE(PreallocatedId, T) \
+  template <>                                       \
+  inline TypeIdentifier TypeMeta::Id<T>() {         \
+    return TypeIdentifier(PreallocatedId);          \
+  }
+#endif
 
 #define CONCAT_IMPL(x, y) x##y
 #define MACRO_CONCAT(x, y) CONCAT_IMPL(x, y)
Original file line number	Diff line number	Diff line change
`@@ -32,7 +32,7 @@ Context::Context()`
`32`	`32`	`THSetDefaultErrorHandler(errorHandler,nullptr);`
`33`	`33`	`THSetDefaultArgErrorHandler(argErrorHandler,nullptr);`
`34`	`34`
`35`		`- generator_registry[static_cast<int>(Backend::CPU)]`
	`35`	`+ generator_registry[static_cast<int>(DeviceType::CPU)]`
`36`	`36`	`.reset(new CPUGenerator(this));`
`37`	`37`	`Type::registerCPU(this);`
`38`	`38`	`}`