diff --git a/CMakeLists.txt b/CMakeLists.txt index 60d69ef2d9bc6..488605d5ea459 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,11 +5,10 @@ cmake_minimum_required(VERSION 3.5 FATAL_ERROR) # ---[ Project and semantic versioning. project(Caffe2 CXX C) -set(CAFFE2_VERSION_MAJOR 0) -set(CAFFE2_VERSION_MINOR 8) -set(CAFFE2_VERSION_PATCH 2) -set(CAFFE2_VERSION - "${CAFFE2_VERSION_MAJOR}.${CAFFE2_VERSION_MINOR}.${CAFFE2_VERSION_PATCH}") +set(CMAKE_CXX_STANDARD 11) +if (NOT MSVC) + set(CMAKE_C_STANDARD 11) +endif() # One variable that determines whether the current cmake process is being run # with the main Caffe2 library. This is useful for building modules - if @@ -134,6 +133,22 @@ if (ANDROID OR IOS) set(BUILD_ATEN_MOBILE ON) endif() +# ---[ Utils +# TODO: merge the following 3 files into cmake/public/utils.cmake. +include(cmake/Utils.cmake) +include(cmake/public/utils.cmake) + +# ---[ Version numbers for generated libraries +set(TORCH_DEFAULT_VERSION "1.0.0") +set(TORCH_BUILD_VERSION "${TORCH_DEFAULT_VERSION}" CACHE STRING "Torch build version") +if (NOT TORCH_BUILD_VERSION) + # An empty string was specified so force version to the default + set(TORCH_BUILD_VERSION "${TORCH_DEFAULT_VERSION}" + CACHE STRING "Torch build version" FORCE) +endif() +caffe2_parse_version_str(TORCH ${TORCH_BUILD_VERSION}) +caffe2_parse_version_str(CAFFE2 ${TORCH_BUILD_VERSION}) + # ---[ CMake scripts + modules list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules) @@ -160,11 +175,6 @@ include(cmake/MiscCheck.cmake) # External projects include(ExternalProject) -# ---[ Utils -# TODO: merge the following 3 files into cmake/public/utils.cmake. -include(cmake/Utils.cmake) -include(cmake/public/utils.cmake) - # ---[ Dependencies include(cmake/Dependencies.cmake) @@ -294,6 +304,7 @@ include_directories(BEFORE ${PROJECT_BINARY_DIR}) include_directories(BEFORE ${PROJECT_SOURCE_DIR}/aten/src/) # ---[ Main build +add_subdirectory(c10) add_subdirectory(caffe2) # --[ Documentation diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0f46fa1cf62a7..f0be7e770b97e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -262,9 +262,9 @@ than Linux, which are worth keeping in mind when fixing these problems. 1. Symbols are NOT exported by default on Windows; instead, you have to explicitly mark a symbol as exported/imported in a header file with `__declspec(dllexport)` / `__declspec(dllimport)`. We have codified this pattern into a set of macros - which follow the convention `*_API`, e.g., `AT_API` inside ATen. (Every separate - shared library needs a unique macro name, because symbol visibility is on a per - shared library basis.) + which follow the convention `*_API`, e.g., `CAFFE2_API` inside Caffe2 and ATen. + (Every separate shared library needs a unique macro name, because symbol visibility + is on a per shared library basis. See c10/macros/Macros.h for more details.) The upshot is if you see an "unresolved external" error in your Windows build, this is probably because you forgot to mark a function with `*_API`. However, there is diff --git a/README.md b/README.md index e0aa68bf8b3e2..918aac0627cf2 100644 --- a/README.md +++ b/README.md @@ -163,7 +163,7 @@ conda install numpy pyyaml mkl mkl-include setuptools cmake cffi typing conda install -c mingfeima mkldnn # Add LAPACK support for the GPU -conda install -c pytorch magma-cuda80 # or magma-cuda90 if CUDA 9 +conda install -c pytorch magma-cuda92 # or [magma-cuda80 | magma-cuda91] depending on your cuda version ``` On macOS diff --git a/aten/src/ATen/CPUGeneral.h b/aten/src/ATen/CPUGeneral.h index b406669053dd8..04bd0aacb528f 100644 --- a/aten/src/ATen/CPUGeneral.h +++ b/aten/src/ATen/CPUGeneral.h @@ -1,12 +1,12 @@ #pragma once -// Using AT_API is crucial as otherwise you'll see +// Using CAFFE2_API is crucial as otherwise you'll see // linking errors using MSVC // See https://msdn.microsoft.com/en-us/library/a90k134d.aspx -// This header adds this if using AT_API +// This header adds this if using CAFFE2_API #include "ATen/core/ATenGeneral.h" namespace at { -AT_API void set_num_threads(int); -AT_API int get_num_threads(); +CAFFE2_API void set_num_threads(int); +CAFFE2_API int get_num_threads(); } diff --git a/aten/src/ATen/CPUTypeDefault.h b/aten/src/ATen/CPUTypeDefault.h index c9776b7b0a2cc..6a854c98d16e3 100644 --- a/aten/src/ATen/CPUTypeDefault.h +++ b/aten/src/ATen/CPUTypeDefault.h @@ -3,7 +3,7 @@ namespace at { -struct AT_API CPUTypeDefault : public TypeDefault { +struct CAFFE2_API CPUTypeDefault : public TypeDefault { CPUTypeDefault(TensorTypeId type_id, bool is_variable, bool is_undefined) : TypeDefault(type_id, is_variable, is_undefined) {} Allocator* allocator() const override; diff --git a/aten/src/ATen/Context.h b/aten/src/ATen/Context.h index 4e147cffabbe8..1f546f8574a78 100644 --- a/aten/src/ATen/Context.h +++ b/aten/src/ATen/Context.h @@ -22,10 +22,10 @@ namespace at { -struct Tensor; +class Tensor; -class AT_API Context { -public: +class CAFFE2_API Context { + public: Context(); TypeExtendedInterface* getNonVariableTypeRaw(Backend p, ScalarType s) { return static_cast(globalLegacyTypeDispatch().getNonVariableTypeRaw(p, s)); @@ -133,7 +133,7 @@ class AT_API Context { friend struct Type; }; -AT_API Context & globalContext(); +CAFFE2_API Context& globalContext(); static inline void init() { globalContext(); @@ -153,11 +153,11 @@ static inline TypeExtendedInterface& getNonVariableType(DeviceType p, ScalarType return globalContext().getNonVariableType(deviceTypeToBackend(p), s); } -AT_API TypeExtendedInterface& getType(TensorOptions options); -AT_API TypeExtendedInterface& getType(const TensorImpl*); -AT_API TypeExtendedInterface& getType(const Tensor&); +CAFFE2_API TypeExtendedInterface& getType(TensorOptions options); +CAFFE2_API TypeExtendedInterface& getType(const TensorImpl*); +CAFFE2_API TypeExtendedInterface& getType(const Tensor&); -AT_API Allocator* getCPUAllocator(); +CAFFE2_API Allocator* getCPUAllocator(); static inline TypeExtendedInterface& CPU(ScalarType s) { return getNonVariableType(Backend::CPU, s); diff --git a/aten/src/ATen/DLConvertor.h b/aten/src/ATen/DLConvertor.h index 5ed9899fc5500..d254fb568fd09 100644 --- a/aten/src/ATen/DLConvertor.h +++ b/aten/src/ATen/DLConvertor.h @@ -10,8 +10,8 @@ namespace at { -AT_API ScalarType toScalarType(const DLDataType& dtype); -AT_API DLManagedTensor * toDLPack(const Tensor& src); -AT_API Tensor fromDLPack(const DLManagedTensor* src); +CAFFE2_API ScalarType toScalarType(const DLDataType& dtype); +CAFFE2_API DLManagedTensor* toDLPack(const Tensor& src); +CAFFE2_API Tensor fromDLPack(const DLManagedTensor* src); } //namespace at diff --git a/aten/src/ATen/ExpandUtils.h b/aten/src/ATen/ExpandUtils.h index 3453155da5b1d..cd95271adf427 100644 --- a/aten/src/ATen/ExpandUtils.h +++ b/aten/src/ATen/ExpandUtils.h @@ -9,9 +9,12 @@ namespace at { -AT_API std::vector infer_size(IntList a, IntList b); -AT_API std::tuple, std::vector > inferExpandGeometry( - IntList tensor_sizes, IntList tensor_strides, IntList sizes); +CAFFE2_API std::vector infer_size(IntList a, IntList b); +CAFFE2_API std::tuple, std::vector> +inferExpandGeometry( + IntList tensor_sizes, + IntList tensor_strides, + IntList sizes); // avoid copy-construction of Tensor by using a reference_wrapper. inline void check_defined(std::initializer_list> tensors, const char *api_name) { diff --git a/aten/src/ATen/SparseTensorImpl.h b/aten/src/ATen/SparseTensorImpl.h index 42b670bea0854..7ffb68a4963c0 100644 --- a/aten/src/ATen/SparseTensorImpl.h +++ b/aten/src/ATen/SparseTensorImpl.h @@ -5,7 +5,7 @@ #include "ATen/core/Error.h" namespace at { -struct AT_API SparseTensorImpl : public TensorImpl { +struct CAFFE2_API SparseTensorImpl : public TensorImpl { // Stored in COO format, indices + values. // INVARIANTS: @@ -157,11 +157,11 @@ struct AT_API SparseTensorImpl : public TensorImpl { sparseDims_ = sparseDims; denseDims_ = denseDims; - auto empty_indices = indices().type().tensor({sparseDims, 0}); + auto empty_indices = at::empty({sparseDims, 0}, indices().options()); std::vector values_size = {0}; auto dense_size = sizes().slice(sparseDims); values_size.insert(values_size.end(), dense_size.begin(), dense_size.end()); - auto empty_values = values().type().tensor(values_size); + auto empty_values = at::empty(values_size, values().options()); set_indices_and_values_unsafe(empty_indices, empty_values); refresh_numel(); } diff --git a/aten/src/ATen/TensorGeometry.cpp b/aten/src/ATen/TensorGeometry.cpp index b11c7bb159900..20ab6bb6690c5 100644 --- a/aten/src/ATen/TensorGeometry.cpp +++ b/aten/src/ATen/TensorGeometry.cpp @@ -12,8 +12,4 @@ bool TensorGeometry::is_contiguous() const { return at::geometry_is_contiguous(sizes_, strides_); } -Tensor TensorGeometry::zeros_with_stride(const Type& type) const { - return type.tensor(sizes_, strides_).zero_(); -} - } // namespace at diff --git a/aten/src/ATen/TensorGeometry.h b/aten/src/ATen/TensorGeometry.h index 5f441ed8fa71c..c989d2ca8f7d0 100644 --- a/aten/src/ATen/TensorGeometry.h +++ b/aten/src/ATen/TensorGeometry.h @@ -5,7 +5,7 @@ namespace at { -struct AT_API TensorGeometry { +struct CAFFE2_API TensorGeometry { TensorGeometry() : storage_offset_(0) {} explicit TensorGeometry(IntList sizes) @@ -30,9 +30,6 @@ struct AT_API TensorGeometry { // true if the tensor is contiguous bool is_contiguous() const; - // creates a new tensor with the sizes and strides of the source - Tensor zeros_with_stride(const Type& type) const; - int64_t dim() const { return sizes_.size(); } int64_t size(int64_t dim) const { dim = maybe_wrap_dim(dim, this->dim()); diff --git a/aten/src/ATen/TensorOperators.h b/aten/src/ATen/TensorOperators.h index 57a986b5d46f7..f4bdab0bf35d7 100644 --- a/aten/src/ATen/TensorOperators.h +++ b/aten/src/ATen/TensorOperators.h @@ -68,9 +68,9 @@ inline Tensor Tensor::operator[](int64_t index) const { #define AT_FORALL_BINARY_OPS(_) \ _(+,x.add(y), y.add(x)) \ _(*,x.mul(y), y.mul(x)) \ -_(-,x.sub(y), y.type().tensor().resize_(y.sizes()).fill_(x).sub_(y)) \ -_(/,x.div(y), y.type().tensor().resize_(y.sizes()).fill_(x).div_(y)) \ -_(%,x.remainder(y), y.type().tensor().resize_(y.sizes()).fill_(x).remainder_(y)) \ +_(-,x.sub(y), ::at::empty(y.sizes(), y.options()).fill_(x).sub_(y)) \ +_(/,x.div(y), ::at::empty(y.sizes(), y.options()).fill_(x).div_(y)) \ +_(%,x.remainder(y), ::at::empty(y.sizes(), y.options()).fill_(x).remainder_(y)) \ _(<,x.lt(y), y.gt(x)) \ _(<=,x.le(y), y.ge(x)) \ _(>,x.gt(y),y.lt(x)) \ diff --git a/aten/src/ATen/TensorUtils.h b/aten/src/ATen/TensorUtils.h index 2443bde4b482c..f65093a586004 100644 --- a/aten/src/ATen/TensorUtils.h +++ b/aten/src/ATen/TensorUtils.h @@ -12,7 +12,7 @@ namespace at { // make sense. These are particularly useful for native functions, // which do NO argument checking by default. -struct AT_API TensorArg { +struct CAFFE2_API TensorArg { Tensor tensor; const char* name; int pos; // 1-indexed @@ -22,7 +22,7 @@ struct AT_API TensorArg { const Tensor& operator*() const { return tensor; } }; -struct AT_API TensorGeometryArg { +struct CAFFE2_API TensorGeometryArg { TensorGeometry tensor; const char* name; int pos; // 1-indexed @@ -49,40 +49,80 @@ using CheckedFrom = const char*; // not TensorGeometryArg, because the Tensor to TensorGeometry // conversion will blow up if you have undefined tensors. -AT_API std::ostream& operator<<(std::ostream & out, TensorGeometryArg t); -AT_API void checkDim(CheckedFrom c, const TensorGeometryArg& t, int64_t dim); +CAFFE2_API std::ostream& operator<<(std::ostream& out, TensorGeometryArg t); +CAFFE2_API void checkDim( + CheckedFrom c, + const TensorGeometryArg& t, + int64_t dim); // NB: this is an inclusive-exclusive range -AT_API void checkDimRange(CheckedFrom c, const TensorGeometryArg& t, int64_t dim_start, int64_t dim_end); -AT_API void checkSameDim(CheckedFrom c, const TensorGeometryArg& t1, const TensorGeometryArg& t2); -AT_API void checkContiguous(CheckedFrom c, const TensorGeometryArg& t); -AT_API void checkAllContiguous(CheckedFrom c, at::ArrayRef ts); -AT_API void checkSize(CheckedFrom c, const TensorGeometryArg& t, IntList sizes); -AT_API void checkSize(CheckedFrom c, const TensorGeometryArg& t, int64_t dim, int64_t size); -AT_API void checkNumel(CheckedFrom c, const TensorGeometryArg& t, int64_t numel); -AT_API void checkSameNumel(CheckedFrom c, const TensorGeometryArg& t1, const TensorGeometryArg& t2); -AT_API void checkAllSameNumel(CheckedFrom c, ArrayRef tensors); -AT_API void checkScalarType(CheckedFrom c, const TensorArg& t, ScalarType s); -AT_API void checkScalarTypes(CheckedFrom c, const TensorArg& t, at::ArrayRef l); -AT_API void checkSameGPU(CheckedFrom c, const TensorArg& t1, const TensorArg& t2); -AT_API void checkAllSameGPU(CheckedFrom c, ArrayRef tensors); -AT_API void checkSameType(CheckedFrom c, const TensorArg& t1, const TensorArg& t2); -AT_API void checkAllSameType(CheckedFrom c, ArrayRef tensors); -AT_API void checkSameSize(CheckedFrom c, const TensorArg& t1, const TensorArg& t2); -AT_API void checkDefined(CheckedFrom c, const TensorArg& t); -AT_API void checkAllDefined(CheckedFrom c, at::ArrayRef t); +CAFFE2_API void checkDimRange( + CheckedFrom c, + const TensorGeometryArg& t, + int64_t dim_start, + int64_t dim_end); +CAFFE2_API void checkSameDim( + CheckedFrom c, + const TensorGeometryArg& t1, + const TensorGeometryArg& t2); +CAFFE2_API void checkContiguous(CheckedFrom c, const TensorGeometryArg& t); +CAFFE2_API void checkAllContiguous(CheckedFrom c, at::ArrayRef ts); +CAFFE2_API void checkSize( + CheckedFrom c, + const TensorGeometryArg& t, + IntList sizes); +CAFFE2_API void checkSize( + CheckedFrom c, + const TensorGeometryArg& t, + int64_t dim, + int64_t size); +CAFFE2_API void checkNumel( + CheckedFrom c, + const TensorGeometryArg& t, + int64_t numel); +CAFFE2_API void checkSameNumel( + CheckedFrom c, + const TensorGeometryArg& t1, + const TensorGeometryArg& t2); +CAFFE2_API void checkAllSameNumel(CheckedFrom c, ArrayRef tensors); +CAFFE2_API void checkScalarType( + CheckedFrom c, + const TensorArg& t, + ScalarType s); +CAFFE2_API void checkScalarTypes( + CheckedFrom c, + const TensorArg& t, + at::ArrayRef l); +CAFFE2_API void checkSameGPU( + CheckedFrom c, + const TensorArg& t1, + const TensorArg& t2); +CAFFE2_API void checkAllSameGPU(CheckedFrom c, ArrayRef tensors); +CAFFE2_API void checkSameType( + CheckedFrom c, + const TensorArg& t1, + const TensorArg& t2); +CAFFE2_API void checkAllSameType(CheckedFrom c, ArrayRef tensors); +CAFFE2_API void checkSameSize( + CheckedFrom c, + const TensorArg& t1, + const TensorArg& t2); +CAFFE2_API void checkDefined(CheckedFrom c, const TensorArg& t); +CAFFE2_API void checkAllDefined(CheckedFrom c, at::ArrayRef t); // FixMe: does TensorArg slow things down? -AT_API void checkBackend(CheckedFrom c, at::ArrayRef t, at::Backend backend); +CAFFE2_API void checkBackend( + CheckedFrom c, + at::ArrayRef t, + at::Backend backend); // Methods for getting data_ptr if tensor is defined -AT_API void * maybe_data_ptr(const Tensor& tensor); -AT_API void * maybe_data_ptr(const TensorArg& tensor); +CAFFE2_API void* maybe_data_ptr(const Tensor& tensor); +CAFFE2_API void* maybe_data_ptr(const TensorArg& tensor); // Return if the tensor geometry represented by `sizes` and `strides` is contiguous // Although we cache is_contiguous in tensor now, this is till useful because it // allows checking if a particular geometry is contiguous without explicitly // constructing a tensor, e.g., when you want to choose a kernel strategy based // on whether a subgeometry is contiguous. -AT_API bool geometry_is_contiguous(IntList sizes, IntList strides); - +CAFFE2_API bool geometry_is_contiguous(IntList sizes, IntList strides); } diff --git a/aten/src/ATen/Utils.h b/aten/src/ATen/Utils.h index c4473d1471ab7..21ade98cba79c 100644 --- a/aten/src/ATen/Utils.h +++ b/aten/src/ATen/Utils.h @@ -24,7 +24,7 @@ namespace at { -AT_API int _crash_if_asan(int); +CAFFE2_API int _crash_if_asan(int); static inline const Storage& checked_storage( const Storage& expr, diff --git a/aten/src/ATen/core/ATenCoreTest.h b/aten/src/ATen/core/ATenCoreTest.h index a6769b10b93ee..93f894ea66b97 100644 --- a/aten/src/ATen/core/ATenCoreTest.h +++ b/aten/src/ATen/core/ATenCoreTest.h @@ -4,5 +4,5 @@ namespace at { -AT_CORE_API int CoreTest(); +CAFFE2_API int CoreTest(); } diff --git a/aten/src/ATen/core/ATenGeneral.h b/aten/src/ATen/core/ATenGeneral.h index cbc1d6f13692f..cb946c93c9b96 100644 --- a/aten/src/ATen/core/ATenGeneral.h +++ b/aten/src/ATen/core/ATenGeneral.h @@ -1,8 +1,3 @@ #pragma once #include "ATen/core/Macros.h" - -// TODO: Merge the *_API macros. -#define AT_API AT_CORE_API -#define AT_EXPORT AT_CORE_EXPORT -#define AT_IMPORT AT_CORE_IMPORT diff --git a/aten/src/ATen/core/Allocator.h b/aten/src/ATen/core/Allocator.h index dc520008e3bbf..a3bae36efe4a4 100644 --- a/aten/src/ATen/core/Allocator.h +++ b/aten/src/ATen/core/Allocator.h @@ -115,7 +115,7 @@ struct Allocator { } }; -struct AT_CORE_API InefficientStdFunctionContext { +struct CAFFE2_API InefficientStdFunctionContext { std::unique_ptr> ptr_; InefficientStdFunctionContext( std::unique_ptr>&& ptr) diff --git a/aten/src/ATen/core/Backtrace.h b/aten/src/ATen/core/Backtrace.h index 9aa3ac826ce78..7a4e9e6b1dba2 100644 --- a/aten/src/ATen/core/Backtrace.h +++ b/aten/src/ATen/core/Backtrace.h @@ -8,7 +8,7 @@ namespace at { /// Utility to demangle a C++ symbol name. -AT_CORE_API std::string demangle(const char* name); +CAFFE2_API std::string demangle(const char* name); /// Returns the printable name of the type. template @@ -21,7 +21,7 @@ inline const char* demangle_type() { #endif // __GXX_RTTI } -AT_CORE_API std::string get_backtrace( +CAFFE2_API std::string get_backtrace( size_t frames_to_skip = 0, size_t maximum_number_of_frames = 64, bool skip_python_frames = true); diff --git a/aten/src/ATen/core/Device.h b/aten/src/ATen/core/Device.h index cd3efb6734e2d..a06d5f1e0d166 100644 --- a/aten/src/ATen/core/Device.h +++ b/aten/src/ATen/core/Device.h @@ -21,7 +21,7 @@ namespace at { /// 1. A negative index represents the current device, a non-negative index /// represents a specific, concrete device, /// 2. When the device type is CPU, the device index must be zero. -struct AT_CORE_API Device { +struct CAFFE2_API Device { using Type = at::DeviceType; /// Constructs a new `Device` from a `DeviceType` and an optional device @@ -92,7 +92,7 @@ struct AT_CORE_API Device { int32_t index_ = -1; }; -AT_CORE_API std::ostream& operator<<( +CAFFE2_API std::ostream& operator<<( std::ostream& stream, const at::Device& device); diff --git a/aten/src/ATen/core/DeviceType.h b/aten/src/ATen/core/DeviceType.h index 870b1e5bf9e53..a4342eade903a 100644 --- a/aten/src/ATen/core/DeviceType.h +++ b/aten/src/ATen/core/DeviceType.h @@ -26,11 +26,11 @@ enum class DeviceType : int32_t { ONLY_FOR_TEST = 20901701, // This device type is only for test. }; -AT_CORE_API std::string DeviceTypeName( +CAFFE2_API std::string DeviceTypeName( at::DeviceType d, bool lower_case = false); -AT_CORE_API std::ostream& operator<<(std::ostream& stream, at::DeviceType type); +CAFFE2_API std::ostream& operator<<(std::ostream& stream, at::DeviceType type); } // namespace at diff --git a/aten/src/ATen/core/Error.h b/aten/src/ATen/core/Error.h index de3231180f4f7..a36608256ddf0 100644 --- a/aten/src/ATen/core/Error.h +++ b/aten/src/ATen/core/Error.h @@ -19,7 +19,7 @@ namespace at { namespace detail { // Obtains the base name from a full path. -AT_CORE_API std::string StripBasename(const std::string& full_path); +CAFFE2_API std::string StripBasename(const std::string& full_path); inline std::ostream& _str(std::ostream& ss) { return ss; @@ -56,7 +56,7 @@ inline std::string str(const char* c_str) { } /// Represents a location in source code (for debugging). -struct AT_CORE_API SourceLocation { +struct CAFFE2_API SourceLocation { const char* function; const char* file; uint32_t line; @@ -71,7 +71,7 @@ std::ostream& operator<<(std::ostream& out, const SourceLocation& loc); /// /// NB: at::Error is handled specially by the default torch to suppress the /// backtrace, see torch/csrc/Exceptions.h -class AT_CORE_API Error : public std::exception { +class CAFFE2_API Error : public std::exception { std::vector msg_stack_; std::string backtrace_; @@ -128,7 +128,7 @@ class AT_CORE_API Error : public std::exception { } }; -class AT_CORE_API Warning { +class CAFFE2_API Warning { using handler_t = void (*)(const SourceLocation& source_location, const char* msg); @@ -152,7 +152,7 @@ class AT_CORE_API Warning { // A utility function to return an exception std::string by prepending its // exception type before its what() content -AT_CORE_API std::string GetExceptionString(const std::exception& e); +CAFFE2_API std::string GetExceptionString(const std::exception& e); } // namespace at diff --git a/aten/src/ATen/core/Formatting.h b/aten/src/ATen/core/Formatting.h index c6ac26b8a9e0e..4906770271f27 100644 --- a/aten/src/ATen/core/Formatting.h +++ b/aten/src/ATen/core/Formatting.h @@ -8,10 +8,13 @@ namespace at { -AT_API std::ostream& operator<<(std::ostream & out, IntList list); -AT_API std::ostream& operator<<(std::ostream & out, Backend b); -AT_API std::ostream& operator<<(std::ostream & out, const Type & t); -AT_API std::ostream& print(std::ostream& stream, const Tensor & tensor, int64_t linesize); +CAFFE2_API std::ostream& operator<<(std::ostream& out, IntList list); +CAFFE2_API std::ostream& operator<<(std::ostream& out, Backend b); +CAFFE2_API std::ostream& operator<<(std::ostream& out, const Type& t); +CAFFE2_API std::ostream& print( + std::ostream& stream, + const Tensor& tensor, + int64_t linesize); static inline std::ostream& operator<<(std::ostream & out, const Tensor & t) { return print(out,t,80); } diff --git a/aten/src/ATen/core/Generator.h b/aten/src/ATen/core/Generator.h index b8894c4307b04..fce3d35636c27 100644 --- a/aten/src/ATen/core/Generator.h +++ b/aten/src/ATen/core/Generator.h @@ -5,7 +5,7 @@ namespace at { -struct AT_API Generator { +struct CAFFE2_API Generator { Generator() {}; Generator(const Generator& other) = delete; Generator(Generator&& other) = delete; diff --git a/aten/src/ATen/core/Half.h b/aten/src/ATen/core/Half.h index 47a8e8e52d2ad..ec72fe0067dcb 100644 --- a/aten/src/ATen/core/Half.h +++ b/aten/src/ATen/core/Half.h @@ -34,8 +34,8 @@ namespace at { namespace detail { -AT_CORE_API float halfbits2float(unsigned short bits); -AT_CORE_API unsigned short float2halfbits(float value); +CAFFE2_API float halfbits2float(unsigned short bits); +CAFFE2_API unsigned short float2halfbits(float value); } // namespace detail @@ -178,7 +178,7 @@ To checked_convert(From f, const char* name) { return convert(f); } -AT_CORE_API std::ostream& operator<<(std::ostream& out, const Half& value); +CAFFE2_API std::ostream& operator<<(std::ostream& out, const Half& value); } // namespace at diff --git a/aten/src/ATen/core/IdWrapper.h b/aten/src/ATen/core/IdWrapper.h index 58632ce111db5..268fe6725356f 100644 --- a/aten/src/ATen/core/IdWrapper.h +++ b/aten/src/ATen/core/IdWrapper.h @@ -22,7 +22,7 @@ namespace at { * for you, given the underlying type supports it. */ template -class AT_CORE_API IdWrapper { +class CAFFE2_API IdWrapper { public: using underlying_type = UnderlyingType; using concrete_type = ConcreteType; diff --git a/aten/src/ATen/core/LegacyTypeDispatch.h b/aten/src/ATen/core/LegacyTypeDispatch.h index 578e02e739d0d..53cedf04e4601 100644 --- a/aten/src/ATen/core/LegacyTypeDispatch.h +++ b/aten/src/ATen/core/LegacyTypeDispatch.h @@ -30,7 +30,7 @@ namespace at { -struct AT_CORE_API LegacyTypeInitInterface { +struct CAFFE2_API LegacyTypeInitInterface { virtual ~LegacyTypeInitInterface() {} virtual void initCPU() const { AT_ERROR("cannot use CPU without ATen library"); @@ -42,15 +42,15 @@ struct AT_CORE_API LegacyTypeInitInterface { AT_ERROR("cannot use complex without ATen Complex library"); } }; -struct AT_CORE_API LegacyTypeInitArgs {}; +struct CAFFE2_API LegacyTypeInitArgs {}; AT_DECLARE_REGISTRY(LegacyTypeInitRegistry, LegacyTypeInitInterface, LegacyTypeInitArgs); #define REGISTER_LEGACY_TYPE_INIT(clsname) AT_REGISTER_CLASS(LegacyTypeInitRegistry, clsname, clsname) -AT_CORE_API const LegacyTypeInitInterface& getLegacyTypeInit(); +CAFFE2_API const LegacyTypeInitInterface& getLegacyTypeInit(); struct Type; -struct AT_CORE_API LegacyTypeDeleter { +struct CAFFE2_API LegacyTypeDeleter { using TypeDeleterFun = void(Type*); TypeDeleterFun *fn_ = nullptr; LegacyTypeDeleter() {} @@ -62,8 +62,8 @@ struct AT_CORE_API LegacyTypeDeleter { } }; -class AT_CORE_API LegacyTypeDispatch { -public: +class CAFFE2_API LegacyTypeDispatch { + public: using TypeUniquePtr = std::unique_ptr; // WARNING: This function has the precondition that you have // initialized the type you want to call. This initialization @@ -150,6 +150,6 @@ class AT_CORE_API LegacyTypeDispatch { [static_cast(ScalarType::NumOptions)]; }; -AT_CORE_API LegacyTypeDispatch & globalLegacyTypeDispatch(); +CAFFE2_API LegacyTypeDispatch& globalLegacyTypeDispatch(); } // namespace at diff --git a/aten/src/ATen/core/Macros.h b/aten/src/ATen/core/Macros.h index 244124475bc08..cb48b68782ab0 100644 --- a/aten/src/ATen/core/Macros.h +++ b/aten/src/ATen/core/Macros.h @@ -3,41 +3,7 @@ #include #include -// You can use the definition AT_CORE_STATIC_WINDOWS to control whether -// or not we apply __declspec. You will want to set this as -// -DAT_CORE_STATIC_WINDOWS=1 when compiling code which links -// against ATen/core on Windows, when ATen/core is built as a -// static library (in which case, saying the symbol is coming -// from a DLL would be incorrect). - -#ifdef _WIN32 -#if !defined(AT_CORE_STATIC_WINDOWS) -#define AT_CORE_EXPORT __declspec(dllexport) -#define AT_CORE_IMPORT __declspec(dllimport) -#else // !defined(AT_CORE_STATIC_WINDOWS) -#define AT_CORE_EXPORT -#define AT_CORE_IMPORT -#endif // !defined(AT_CORE_STATIC_WINDOWS) -#else // _WIN32 -#if defined(__GNUC__) -#define AT_CORE_EXPORT __attribute__((__visibility__("default"))) -#else // defined(__GNUC__) -#define AT_CORE_EXPORT -#endif // defined(__GNUC__) -#define AT_CORE_IMPORT AT_CORE_EXPORT -#endif // _WIN32 - -// AT_CORE_API is a macro that, depends on whether you are building the -// main library or not, resolves to either AT_CORE_EXPORT or -// AT_CORE_IMPORT. -// - -// TODO: unify the controlling macros. -#if defined(CAFFE2_BUILD_MAIN_LIBS) || defined(ATen_cpu_EXPORTS) || defined(caffe2_EXPORTS) -#define AT_CORE_API AT_CORE_EXPORT -#else // defined(CAFFE2_BUILD_MAIN_LIBS) || defined(ATen_cpu_EXPORTS) || defined(caffe2_EXPORTS) -#define AT_CORE_API AT_CORE_IMPORT -#endif // defined(CAFFE2_BUILD_MAIN_LIBS) || defined(ATen_cpu_EXPORTS) || defined(caffe2_EXPORTS) +#include "c10/macros/Macros.h" #ifdef __CUDACC__ // Designates functions callable from the host (CPU) and the device (GPU) @@ -50,13 +16,6 @@ #define AT_DEVICE #endif -// Disable the copy and assignment operator for a class. Note that this will -// disable the usage of the class in std containers. -#define AT_DISABLE_COPY_AND_ASSIGN(classname) \ - classname(const classname&) = delete; \ - classname& operator=(const classname&) = delete - - #if defined(__ANDROID__) #define AT_ANDROID 1 #define AT_MOBILE 1 diff --git a/aten/src/ATen/core/OptionsGuard.h b/aten/src/ATen/core/OptionsGuard.h index b359638d53a61..fc078db6bf90b 100644 --- a/aten/src/ATen/core/OptionsGuard.h +++ b/aten/src/ATen/core/OptionsGuard.h @@ -20,7 +20,7 @@ struct DefaultTensorOptions { /// Defined in OptionsGuard.cpp because we can't use optional in headers, due /// to Windows and other compilers. /// TODO: The inability to use optional in headers is no longer true - AT_API static TensorOptions& get(); + CAFFE2_API static TensorOptions& get(); private: /// This is an optional because of compiler bugs that mis-initialize static @@ -64,8 +64,9 @@ struct OptionsGuard { #else // AT_MOBILE struct DefaultTensorOptions { - AT_API static const TensorOptions& get(); -private: + CAFFE2_API static const TensorOptions& get(); + + private: static TensorOptions options_; }; diff --git a/aten/src/ATen/core/Registry.h b/aten/src/ATen/core/Registry.h index 8f3caffe49154..98a3e4a18c725 100644 --- a/aten/src/ATen/core/Registry.h +++ b/aten/src/ATen/core/Registry.h @@ -44,7 +44,7 @@ inline void PrintOffendingKey(const std::string& key) { * objects. */ template -class AT_API Registry { +class CAFFE2_API Registry { public: typedef std::function Creator; @@ -114,7 +114,7 @@ class AT_API Registry { }; template -class AT_API Registerer { +class CAFFE2_API Registerer { public: Registerer( const SrcType& key, @@ -152,11 +152,12 @@ class AT_API Registerer { * declaration, as well as creating a convenient typename for its corresponding * registerer. */ -#define AT_DECLARE_TYPED_REGISTRY( \ - RegistryName, SrcType, ObjectType, PtrType, ...) \ - AT_API Registry, __VA_ARGS__>* RegistryName(); \ - typedef Registerer, __VA_ARGS__> \ - Registerer##RegistryName; \ +#define AT_DECLARE_TYPED_REGISTRY( \ + RegistryName, SrcType, ObjectType, PtrType, ...) \ + CAFFE2_API Registry, __VA_ARGS__>* \ + RegistryName(); \ + typedef Registerer, __VA_ARGS__> \ + Registerer##RegistryName; \ extern template class Registerer, __VA_ARGS__>; #define AT_DEFINE_TYPED_REGISTRY( \ diff --git a/aten/src/ATen/core/Scalar.h b/aten/src/ATen/core/Scalar.h index de01a56ce3374..f1b40d6f8053b 100644 --- a/aten/src/ATen/core/Scalar.h +++ b/aten/src/ATen/core/Scalar.h @@ -12,10 +12,10 @@ namespace at { -struct Tensor; +class Tensor; -class AT_API Scalar { -public: +class CAFFE2_API Scalar { + public: Scalar() : Scalar(int64_t(0)) {} #define DEFINE_IMPLICIT_CTOR(type,name,member) \ diff --git a/aten/src/ATen/core/ScalarType.h b/aten/src/ATen/core/ScalarType.h index 6fe88bfadb05f..fad2f765fe433 100644 --- a/aten/src/ATen/core/ScalarType.h +++ b/aten/src/ATen/core/ScalarType.h @@ -178,17 +178,17 @@ static inline ScalarType promoteTypes(ScalarType a, ScalarType b) { /* u1 i1 i2 i4 i8 f2 f4 f8 */ /* u1 */ { u1, i2, i2, i4, i8, f2, f4, f8 }, /* i1 */ { i2, i1, i2, i4, i8, f2, f4, f8 }, - /* i2 */ { i2, i2, i2, i4, i8, f4, f4, f8 }, - /* i4 */ { i4, i4, i4, i4, i8, f8, f4, f8 }, - /* i8 */ { i8, i8, i8, i8, i8, f8, f4, f8 }, - /* f2 */ { f2, f2, f4, f8, f8, f2, f4, f8 }, + /* i2 */ { i2, i2, i2, i4, i8, f2, f4, f8 }, + /* i4 */ { i4, i4, i4, i4, i8, f2, f4, f8 }, + /* i8 */ { i8, i8, i8, i8, i8, f2, f4, f8 }, + /* f2 */ { f2, f2, f2, f2, f2, f2, f4, f8 }, /* f4 */ { f4, f4, f4, f4, f4, f4, f4, f8 }, /* f8 */ { f8, f8, f8, f8, f8, f8, f8, f8 }, }; return _promoteTypesLookup[static_cast(a)][static_cast(b)]; } -struct Tensor; +class Tensor; typedef ArrayRef IntList; typedef ArrayRef TensorList; diff --git a/aten/src/ATen/core/SmallVector.h b/aten/src/ATen/core/SmallVector.h index 483144794f46e..cd2c2f51f4960 100644 --- a/aten/src/ATen/core/SmallVector.h +++ b/aten/src/ATen/core/SmallVector.h @@ -59,7 +59,7 @@ static inline uint64_t NextPowerOf2(uint64_t A) { } // namespace detail /// This is all the non-templated stuff common to all SmallVectors. -class AT_CORE_API SmallVectorBase { +class CAFFE2_API SmallVectorBase { protected: void *BeginX, *EndX, *CapacityX; diff --git a/aten/src/ATen/core/SparseTensorRef.h b/aten/src/ATen/core/SparseTensorRef.h index 9c9fada2dc711..9a5bbddb783c0 100644 --- a/aten/src/ATen/core/SparseTensorRef.h +++ b/aten/src/ATen/core/SparseTensorRef.h @@ -2,7 +2,7 @@ namespace at { -struct Tensor; +class Tensor; struct SparseTensorRef { explicit SparseTensorRef(const Tensor& t): tref(t) {} const Tensor& tref; diff --git a/aten/src/ATen/core/Storage.h b/aten/src/ATen/core/Storage.h index ab201be88d630..cd42b33d12e2b 100644 --- a/aten/src/ATen/core/Storage.h +++ b/aten/src/ATen/core/Storage.h @@ -4,8 +4,8 @@ namespace at { -struct AT_API Storage { -public: +struct CAFFE2_API Storage { + public: Storage() {} Storage(c10::intrusive_ptr ptr) : storage_impl_(std::move(ptr)) {} Storage( diff --git a/aten/src/ATen/core/StorageImpl.h b/aten/src/ATen/core/StorageImpl.h index cc63bd0090666..bba2df4e0d1be 100644 --- a/aten/src/ATen/core/StorageImpl.h +++ b/aten/src/ATen/core/StorageImpl.h @@ -10,7 +10,7 @@ namespace at { struct Type; -struct AT_API StorageImpl : public c10::intrusive_ptr_target { +struct CAFFE2_API StorageImpl : public c10::intrusive_ptr_target { public: StorageImpl( caffe2::TypeMeta data_type, diff --git a/aten/src/ATen/core/Tensor.h b/aten/src/ATen/core/Tensor.h index 7445c332200da..fa31741313db3 100644 --- a/aten/src/ATen/core/Tensor.h +++ b/aten/src/ATen/core/Tensor.h @@ -15,7 +15,7 @@ namespace at { struct Generator; struct Type; -struct Tensor; +class Tensor; struct TensorOptions; } // namespace at @@ -37,11 +37,12 @@ namespace at { // // Note that Tensor can also be NULL, i.e. it is not associated with any underlying TensorImpl, and // special care must be taken to handle this. -struct AT_API Tensor { +class CAFFE2_API Tensor { +public: Tensor(){}; Tensor(c10::intrusive_ptr tensor_impl) - : tensor_impl_(std::move(tensor_impl)) { - if (tensor_impl_.get() == nullptr) { + : impl_(std::move(tensor_impl)) { + if (impl_.get() == nullptr) { throw std::runtime_error("TensorBaseImpl with nullptr not supported"); } } @@ -50,25 +51,25 @@ struct AT_API Tensor { Tensor(Tensor&&) = default; int64_t dim() const { - return tensor_impl_->dim(); + return impl_->dim(); } TensorImpl * unsafeGetTensorImpl() const { - return tensor_impl_.get(); + return impl_.get(); } TensorImpl * unsafeReleaseTensorImpl() { - return tensor_impl_.release(); + return impl_.release(); } const c10::intrusive_ptr& getIntrusivePtr() const { - return tensor_impl_; + return impl_; } bool defined() const { - return tensor_impl_; + return impl_; } void reset() { - tensor_impl_.reset(); + impl_.reset(); } // The following overloads are very intruiging. Consider the following @@ -102,11 +103,11 @@ struct AT_API Tensor { // Tensor& operator=(const Tensor&) & = default; // Tensor& operator=(Tensor&&) & = default; Tensor& operator=(const Tensor& x) & { - tensor_impl_ = x.tensor_impl_; + impl_ = x.impl_; return *this; } Tensor& operator=(Tensor&& x) & { - tensor_impl_ = std::move(x.tensor_impl_); + impl_ = std::move(x.impl_); return *this; } @@ -115,37 +116,37 @@ struct AT_API Tensor { Tensor& operator=(Tensor&&) &&; bool is_same(const Tensor& other) const noexcept { - return tensor_impl_ == other.tensor_impl_; + return impl_ == other.impl_; } size_t use_count() const noexcept { - return tensor_impl_.use_count(); + return impl_.use_count(); } size_t weak_use_count() const noexcept { - return tensor_impl_.weak_use_count(); + return impl_.weak_use_count(); } const char * toString() const; IntList sizes() const { - return tensor_impl_->sizes(); + return impl_->sizes(); } IntList strides() const { - return tensor_impl_->strides(); + return impl_->strides(); } int64_t ndimension() const { return dim(); } Type & type() const { - return tensor_impl_->type(); + return impl_->type(); } TensorTypeId type_id() const { - return tensor_impl_->type_id(); + return impl_->type_id(); } ScalarType scalar_type() const { - return dataTypeToScalarType(tensor_impl_->dtype().id()); + return dataTypeToScalarType(impl_->dtype().id()); } const Storage& storage() const { - return tensor_impl_->storage(); + return impl_->storage(); } Tensor toType(const Type & t, bool non_blocking=false) const; Tensor & copy_(const Tensor & src, bool non_blocking=false); @@ -172,20 +173,12 @@ struct AT_API Tensor { template T * data() const; + template + T item() const; + // Purposely not defined here to avoid inlining void print() const; - //toLongData(), toFloatData() etc. - #define TO_TYPE_DATA(T,name,_) \ - T * to##name##Data() const; - AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_EXCEPT_COMPLEX_HALF(TO_TYPE_DATA) - #undef TO_TYPE_DATA - - #define TO_C_TYPE(T,name,_) \ - T toC##name () const; - AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_EXCEPT_COMPLEX_HALF(TO_C_TYPE) - #undef TO_C_TYPE - // Return a `TensorAccessor` for CPU `Tensor`s. You have to specify scalar type and // dimension. template @@ -230,18 +223,18 @@ struct AT_API Tensor { // ~~~~~ Autograd API ~~~~~ Tensor& set_requires_grad(bool requires_grad) { - tensor_impl_->set_requires_grad(requires_grad); + impl_->set_requires_grad(requires_grad); return *this; } bool requires_grad() const { - return tensor_impl_->requires_grad(); + return impl_->requires_grad(); } Tensor& grad() { - return tensor_impl_->grad(); + return impl_->grad(); } const Tensor& grad() const { - return tensor_impl_->grad(); + return impl_->grad(); } void set_data(Tensor new_data); @@ -653,35 +646,35 @@ struct AT_API Tensor { friend struct WeakTensor; protected: - c10::intrusive_ptr tensor_impl_; + c10::intrusive_ptr impl_; }; -struct AT_API WeakTensor { - WeakTensor(const Tensor& t) : weak_tensor_impl_(t.tensor_impl_) {} +struct CAFFE2_API WeakTensor { + WeakTensor(const Tensor& t) : weak_impl_(t.impl_) {} // XXX: this can return undefined tensors // Ideally it would be at::optional, but MSVC is too cool for that Tensor lock() const { - return Tensor(weak_tensor_impl_.lock()); + return Tensor(weak_impl_.lock()); } bool is_same(const WeakTensor& other) const noexcept { - return weak_tensor_impl_ == other.weak_tensor_impl_; + return weak_impl_ == other.weak_impl_; } size_t use_count() const noexcept { - return weak_tensor_impl_.use_count(); + return weak_impl_.use_count(); } size_t weak_use_count() const noexcept { - return weak_tensor_impl_.weak_use_count(); + return weak_impl_.weak_use_count(); } TensorImpl* unsafeGetTensorImpl() const { - return weak_tensor_impl_._unsafe_get_target(); + return weak_impl_._unsafe_get_target(); } private: - c10::weak_intrusive_ptr weak_tensor_impl_; + c10::weak_intrusive_ptr weak_impl_; }; } // namespace at diff --git a/aten/src/ATen/core/TensorImpl.h b/aten/src/ATen/core/TensorImpl.h index d2f98ff52780f..27232e2a3a8e9 100644 --- a/aten/src/ATen/core/TensorImpl.h +++ b/aten/src/ATen/core/TensorImpl.h @@ -16,11 +16,11 @@ namespace at { class Scalar; struct Type; struct Storage; -struct Tensor; +class Tensor; } // namespace at namespace at { -struct AT_API TensorImpl : public c10::intrusive_ptr_target { +struct CAFFE2_API TensorImpl : public c10::intrusive_ptr_target { TensorImpl() = delete; TensorImpl(TensorTypeId type_id, const caffe2::TypeMeta& data_type, Allocator *allocator, bool is_variable); TensorImpl(Storage&& storage, TensorTypeId type_id, bool is_variable); @@ -69,9 +69,11 @@ struct AT_API TensorImpl : public c10::intrusive_ptr_target { // numbers. Otherwise, they behave like their non-wrapped equivalents. // See [Result type computation] in TensorIterator.h. bool is_wrapped_number() const { + AT_ASSERT(!is_variable()); return is_wrapped_number_; } void set_wrapped_number(bool value) { + AT_ASSERT(!is_variable()); AT_ASSERT(dim() == 0); is_wrapped_number_ = value; } @@ -97,10 +99,12 @@ struct AT_API TensorImpl : public c10::intrusive_ptr_target { template inline T * data() const { + AT_ASSERT(!is_variable()); return storage_.data() + storage_offset_; } inline void* data() const { + AT_ASSERT(!is_variable()); return static_cast( static_cast(storage_.data()) + data_type_.itemsize() * storage_offset_); @@ -108,6 +112,7 @@ struct AT_API TensorImpl : public c10::intrusive_ptr_target { template inline T * unsafe_data() const { + AT_ASSERT(!is_variable()); return storage_.unsafe_data() + storage_offset_; } @@ -155,6 +160,7 @@ struct AT_API TensorImpl : public c10::intrusive_ptr_target { // sizes/strides are in bounds for the storage that is allocated; // this is the responsibility of the caller void set_sizes_and_strides(at::IntList new_size, at::IntList new_stride) { + AT_ASSERT(!is_variable()); AT_CHECK( new_size.size() == new_stride.size(), "dimensionality of sizes (", @@ -174,12 +180,12 @@ struct AT_API TensorImpl : public c10::intrusive_ptr_target { bool is_variable() const { return is_variable_; }; private: - int64_t storage_offset_; + int64_t storage_offset_ = 0; std::vector sizes_; std::vector strides_; - bool is_contiguous_; - int64_t numel_; + bool is_contiguous_ = true; + int64_t numel_ = -1; int64_t compute_numel() const { int64_t n = 1; @@ -192,9 +198,11 @@ struct AT_API TensorImpl : public c10::intrusive_ptr_target { protected: void refresh_numel() { + AT_ASSERT(!is_variable()); numel_ = compute_numel(); } void refresh_contiguous() { + AT_ASSERT(!is_variable()); is_contiguous_ = compute_contiguous(); } TensorTypeId type_id_; diff --git a/aten/src/ATen/core/TensorMethods.h b/aten/src/ATen/core/TensorMethods.h index 789340dc1b91d..c6197b4fc2d08 100644 --- a/aten/src/ATen/core/TensorMethods.h +++ b/aten/src/ATen/core/TensorMethods.h @@ -1241,16 +1241,16 @@ inline Device Tensor::device() const { " but found ", \ at::toString(type().scalarType())); \ return static_cast(this->data_ptr()); \ - } \ - inline T* Tensor::to##name##Data() const { \ - return data(); \ } AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_EXCEPT_COMPLEX_HALF(DEFINE_CAST) #undef DEFINE_CAST -#define DEFINE_TO_C_TYPE(T,name,_) \ -inline T Tensor::toC##name () const { return _local_scalar().to##name (); } +#define DEFINE_TO_C_TYPE(T, name, _) \ + template <> \ + inline T Tensor::item() const { \ + return _local_scalar().to##name(); \ + } AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_EXCEPT_COMPLEX_HALF(DEFINE_TO_C_TYPE) #undef DEFINE_TO_C_TYPE diff --git a/aten/src/ATen/core/TensorOptions.h b/aten/src/ATen/core/TensorOptions.h index 2b589e9b13f48..4ae7b3452bddf 100644 --- a/aten/src/ATen/core/TensorOptions.h +++ b/aten/src/ATen/core/TensorOptions.h @@ -47,7 +47,7 @@ namespace at { /// at::zeros({2,2}, at::device({at::kCUDA, 1})); // place on device 1 /// at::zeros({2,2}, at::requires_grad()); /// -struct AT_API TensorOptions { +struct CAFFE2_API TensorOptions { TensorOptions() : TensorOptions(/*use_thread_local_default_options=*/true) {} /// Constructs the `TensorOptions` with defaults taken from the thread local diff --git a/aten/src/ATen/core/TensorTypeId.h b/aten/src/ATen/core/TensorTypeId.h index d01437bbe9197..ac584263c8018 100644 --- a/aten/src/ATen/core/TensorTypeId.h +++ b/aten/src/ATen/core/TensorTypeId.h @@ -17,7 +17,7 @@ using _tensorTypeId_underlyingType = uint8_t; * Dynamic type ID of a Tensor argument. It represents something like * CPUTensor, etc. */ -class AT_CORE_API TensorTypeId final +class CAFFE2_API TensorTypeId final : public at:: IdWrapper { public: @@ -32,10 +32,10 @@ class AT_CORE_API TensorTypeId final : IdWrapper(id) {} friend class TensorTypeIdCreator; - friend AT_CORE_API std::ostream& operator<<(std::ostream&, TensorTypeId); + friend CAFFE2_API std::ostream& operator<<(std::ostream&, TensorTypeId); }; -AT_CORE_API std::ostream& operator<<(std::ostream&, at::TensorTypeId); +CAFFE2_API std::ostream& operator<<(std::ostream&, at::TensorTypeId); } // namespace at diff --git a/aten/src/ATen/core/TensorTypeIdRegistration.h b/aten/src/ATen/core/TensorTypeIdRegistration.h index a7b30932cebe8..c252a6ef6e443 100644 --- a/aten/src/ATen/core/TensorTypeIdRegistration.h +++ b/aten/src/ATen/core/TensorTypeIdRegistration.h @@ -16,7 +16,7 @@ namespace at { -class AT_CORE_API TensorTypeIdCreator final { +class CAFFE2_API TensorTypeIdCreator final { public: TensorTypeIdCreator(); @@ -29,10 +29,10 @@ class AT_CORE_API TensorTypeIdCreator final { private: std::atomic last_id_; - AT_DISABLE_COPY_AND_ASSIGN(TensorTypeIdCreator); + C10_DISABLE_COPY_AND_ASSIGN(TensorTypeIdCreator); }; -class AT_CORE_API TensorTypeIdRegistry final { +class CAFFE2_API TensorTypeIdRegistry final { public: TensorTypeIdRegistry(); @@ -43,10 +43,10 @@ class AT_CORE_API TensorTypeIdRegistry final { std::unordered_set registeredTypeIds_; std::mutex mutex_; - AT_DISABLE_COPY_AND_ASSIGN(TensorTypeIdRegistry); + C10_DISABLE_COPY_AND_ASSIGN(TensorTypeIdRegistry); }; -class AT_CORE_API TensorTypeIds final { +class CAFFE2_API TensorTypeIds final { public: static TensorTypeIds& singleton(); @@ -61,14 +61,14 @@ class AT_CORE_API TensorTypeIds final { TensorTypeIdCreator creator_; TensorTypeIdRegistry registry_; - AT_DISABLE_COPY_AND_ASSIGN(TensorTypeIds); + C10_DISABLE_COPY_AND_ASSIGN(TensorTypeIds); }; inline constexpr at::TensorTypeId TensorTypeIds::undefined() noexcept { return TensorTypeIdCreator::undefined(); } -class AT_CORE_API TensorTypeIdRegistrar final { +class CAFFE2_API TensorTypeIdRegistrar final { public: TensorTypeIdRegistrar(); ~TensorTypeIdRegistrar(); @@ -78,14 +78,15 @@ class AT_CORE_API TensorTypeIdRegistrar final { private: at::TensorTypeId id_; - AT_DISABLE_COPY_AND_ASSIGN(TensorTypeIdRegistrar); + C10_DISABLE_COPY_AND_ASSIGN(TensorTypeIdRegistrar); }; inline at::TensorTypeId TensorTypeIdRegistrar::id() const noexcept { return id_; } -#define AT_DECLARE_TENSOR_TYPE(TensorName) AT_CORE_API at::TensorTypeId TensorName(); +#define AT_DECLARE_TENSOR_TYPE(TensorName) \ + CAFFE2_API at::TensorTypeId TensorName(); #define AT_DEFINE_TENSOR_TYPE(TensorName) \ at::TensorTypeId TensorName() { \ diff --git a/aten/src/ATen/core/Type.h b/aten/src/ATen/core/Type.h index fdec4c6408d4c..3a2ccbe1e45ed 100644 --- a/aten/src/ATen/core/Type.h +++ b/aten/src/ATen/core/Type.h @@ -33,7 +33,7 @@ class Context; struct Allocator; struct Generator; struct Storage; -struct Tensor; +class Tensor; static inline void noop_deleter(void*) {} @@ -76,7 +76,7 @@ enum class TypeID { NumOptions }; -struct AT_API Type { +struct CAFFE2_API Type { explicit Type(TensorTypeId type_id, bool is_variable, bool is_undefined) : type_id_(type_id), is_variable_(is_variable), is_undefined_(is_undefined) {} @@ -364,8 +364,6 @@ struct AT_API Type { virtual Tensor & log_normal_(Tensor & self, double mean, double std, Generator * generator) const = 0; virtual Tensor & exponential_(Tensor & self, double lambd, Generator * generator) const = 0; virtual Tensor & geometric_(Tensor & self, double p, Generator * generator) const = 0; - virtual Tensor tensor(Storage storage, int64_t storageOffset, IntList size, IntList stride) const = 0; - virtual Tensor tensor(IntList size, IntList stride) const = 0; virtual Tensor abs(const Tensor & self) const = 0; virtual Tensor & abs_(Tensor & self) const = 0; virtual Tensor acos(const Tensor & self) const = 0; @@ -579,15 +577,6 @@ struct AT_API Type { virtual Tensor & sub_(Tensor & self, Scalar other, Scalar alpha) const = 0; virtual Tensor addmm(const Tensor & self, const Tensor & mat1, const Tensor & mat2, Scalar beta, Scalar alpha) const = 0; virtual Tensor & addmm_(Tensor & self, const Tensor & mat1, const Tensor & mat2, Scalar beta, Scalar alpha) const = 0; - virtual Tensor tensor() const = 0; - virtual Tensor tensor(IntList size) const = 0; - virtual Tensor native_sparse_coo_tensor(IntList size) const = 0; - virtual Tensor native_sparse_coo_tensor(const Tensor & indices, const Tensor & values) const = 0; - virtual Tensor native_sparse_coo_tensor(const Tensor & indices, const Tensor & values, IntList size) const = 0; - virtual Tensor sparse_coo_tensor(IntList size) const = 0; - virtual Tensor sparse_coo_tensor(const Tensor & indices, const Tensor & values) const = 0; - virtual Tensor sparse_coo_tensor(const Tensor & indices, const Tensor & values, IntList size) const = 0; - virtual Tensor _native_sparse_coo_tensor_unsafe(const Tensor & indices, const Tensor & values, IntList size) const = 0; virtual Tensor & sparse_resize_(Tensor & self, IntList size, int64_t sparseDims, int64_t denseDims) const = 0; virtual Tensor & sparse_resize_and_clear_(Tensor & self, IntList size, int64_t sparseDims, int64_t denseDims) const = 0; virtual Tensor sparse_mask(const Tensor & self, SparseTensorRef mask) const = 0; @@ -611,7 +600,6 @@ struct AT_API Type { TensorTypeId type_id_; bool is_variable_; bool is_undefined_; - }; } // namespace at diff --git a/aten/src/ATen/core/UndefinedTensorImpl.h b/aten/src/ATen/core/UndefinedTensorImpl.h index 6c734950d90ca..7a6866187c5f2 100644 --- a/aten/src/ATen/core/UndefinedTensorImpl.h +++ b/aten/src/ATen/core/UndefinedTensorImpl.h @@ -4,8 +4,8 @@ namespace at { -struct AT_API UndefinedTensorImpl final : public TensorImpl { -public: +struct CAFFE2_API UndefinedTensorImpl final : public TensorImpl { + public: // Without this, we get: // error: identifier "at::UndefinedTensorImpl::_singleton" is undefined in device code // (ostensibly because the constexpr tricks MSVC into trying to compile this diff --git a/aten/src/ATen/core/UniqueVoidPtr.h b/aten/src/ATen/core/UniqueVoidPtr.h index a7c9d6119bfcd..daa6cdd373578 100644 --- a/aten/src/ATen/core/UniqueVoidPtr.h +++ b/aten/src/ATen/core/UniqueVoidPtr.h @@ -10,7 +10,7 @@ using DeleterFnPtr = void (*)(void*); namespace detail { // Does not delete anything -AT_CORE_API void deleteNothing(void*); +CAFFE2_API void deleteNothing(void*); // A detail::UniqueVoidPtr is an owning smart pointer like unique_ptr, but // with three major differences: diff --git a/aten/src/ATen/core/VariableHooksInterface.h b/aten/src/ATen/core/VariableHooksInterface.h index 09c972255ea6f..e8fd4da9e2753 100644 --- a/aten/src/ATen/core/VariableHooksInterface.h +++ b/aten/src/ATen/core/VariableHooksInterface.h @@ -20,8 +20,7 @@ namespace at { // // We may choose to absorb autograd into ATen, in which case this interface is obsolete. // -struct AT_API VariableHooksInterface { - +struct CAFFE2_API VariableHooksInterface { // This should never actually be implemented, but it is used to // squelch -Werror=non-virtual-dtor virtual ~VariableHooksInterface() {} @@ -34,18 +33,17 @@ struct AT_API VariableHooksInterface { // no-op if Variable not available; it'll get handled (if at all) when // libtorch.so gets loaded } - }; // NB: dummy argument to suppress "ISO C++11 requires at least one argument // for the "..." in a variadic macro" -struct AT_API VariableHooksArgs {}; +struct CAFFE2_API VariableHooksArgs {}; AT_DECLARE_REGISTRY(VariableHooksRegistry, VariableHooksInterface, VariableHooksArgs) #define REGISTER_VARIABLE_HOOKS(clsname) AT_REGISTER_CLASS(VariableHooksRegistry, clsname, clsname) namespace detail { - AT_API const VariableHooksInterface& getVariableHooks(); +CAFFE2_API const VariableHooksInterface& getVariableHooks(); } } // namespace at diff --git a/aten/src/ATen/core/blob.cpp b/aten/src/ATen/core/blob.cpp new file mode 100644 index 0000000000000..930255194639b --- /dev/null +++ b/aten/src/ATen/core/blob.cpp @@ -0,0 +1 @@ +#include diff --git a/aten/src/ATen/core/blob.h b/aten/src/ATen/core/blob.h new file mode 100644 index 0000000000000..17a09f33616e7 --- /dev/null +++ b/aten/src/ATen/core/blob.h @@ -0,0 +1,217 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace caffe2 { + +class Tensor; + +/** + * @brief Blob is a general container that hosts a typed pointer. + * + * A Blob hosts a pointer as well as its type, and takes charge of deleting it + * properly when the blob is deallocated or re-allocated with a new type. A blob + * could contain anything, although the most common case is to contain a Tensor. + */ +class CAFFE2_API Blob final : public c10::intrusive_ptr_target { + public: + using DestroyCall = void(void*); + + /** + * Initializes an empty Blob. + */ + Blob() noexcept : meta_(), pointer_(nullptr), destroy_(nullptr) {} + ~Blob() { + Reset(); + } + + Blob(Blob&& other) noexcept : Blob() { + swap(other); + } + + Blob& operator=(Blob&& other) noexcept { + Blob(std::move(other)).swap(*this); + return *this; + } + + /** + * Checks if the content stored in the blob is of type T. + */ + template + bool IsType() const noexcept { + return meta_.Match(); + } + + /** + * Returns the meta info of the blob. + */ + inline const TypeMeta& meta() const noexcept { + return meta_; + } + + /** + * Returns a printable typename of the blob. + */ + inline const char* TypeName() const noexcept { + return meta_.name(); + } + + /** + * @brief Gets the const reference of the stored object. The code checks if + * the stored object is of the desired type. + */ + // TODO(jerryzh): add a Get(DeviceType) function? + template + const T& Get() const { + AT_ASSERTM( + IsType(), + "wrong type for the Blob instance. Blob contains ", + meta_.name(), + " while caller expects ", + TypeMeta::TypeName()); + // TODO: after we add Get(DeviceType) + // and changed all the callsites, we can add + // a static assert here to enforce T != Tensor + return *static_cast(pointer_); + } + + const void* GetRaw() const noexcept { + return pointer_; + } + void* GetRaw() noexcept { + return pointer_; + } + + /** + * @brief Gets a mutable pointer to the stored object. + * + * If the current object is not of the right type, a new object is created + * and the old object is freed. Note that type T should have a default + * constructor. Otherwise, create the object yourself first, and use + * Reset(). + */ + template + T* GetMutable() { + static_assert( + std::is_default_constructible::value, + "GetMutable can't be called with non-default-constructible types. " + "Try using specialized methods"); + if (IsType()) { + return static_cast(pointer_); + } else { + // TODO Re-enable logging + // VLOG(1) << "Create new mutable object " << TypeMeta::TypeName(); + return Reset(new T()); + } + } + + template + T* GetMutableOrNull() { + if (IsType()) { + return static_cast(pointer_); + } else { + return nullptr; + } + } + + /** + * Sets the underlying object to the allocated one. The Blob then takes over + * the ownership of the passed in pointer. If there is already an object in + * the Blob, the old object is freed. + * + * This is used when the underlying class T does not have a default ctor, or + * complex initializations needs to be done outside the blob. + */ + template + T* Reset(T* allocated) { + if (pointer_ && destroy_) { + destroy_(pointer_); + } + meta_ = TypeMeta::Make(); + pointer_ = static_cast(allocated); + destroy_ = &Destroy; + return allocated; + } + + /** + * Sets the underlying object to the allocated one, but does not take over + * the ownership of the passed in pointer. If there is already an object in + * the Blob, the old object is freed. + * + * Unlike Reset, this does not take over the ownership of the pointer and the + * caller is responsible for making sure that the lifetime of the allocated + * blob outlasts the lifetime of any access to this blob, until another Reset + * call is made or the blob is destructed. + */ + template + typename std::remove_const::type* ShareExternal( + typename std::remove_const::type* allocated) { + return static_cast(ShareExternal( + static_cast(allocated), + TypeMeta::Make::type>())); + } + + void* ShareExternal(void* allocated, const TypeMeta& meta) { + if (pointer_ && destroy_) { + destroy_(pointer_); + } + meta_ = meta; + pointer_ = static_cast(allocated); + destroy_ = nullptr; + return allocated; + } + + /** + * Resets the Blob to an empty one. + */ + inline void Reset() { + if (pointer_ && destroy_) { + destroy_(pointer_); + } + pointer_ = nullptr; + meta_ = TypeMeta(); + destroy_ = nullptr; + } + + /** + * @brief Swaps the underlying storage of two blobs. + */ + void swap(Blob& rhs) { + using std::swap; + swap(meta_, rhs.meta_); + swap(pointer_, rhs.pointer_); + swap(destroy_, rhs.destroy_); + } + + private: + /** + * @brief A destroy call that is used to properly deconstruct objects. + */ + template + static void Destroy(void* pointer) { + delete static_cast(pointer); + } + TypeMeta meta_; + void* pointer_ = nullptr; + DestroyCall* destroy_ = nullptr; + + C10_DISABLE_COPY_AND_ASSIGN(Blob); +}; + +inline void swap(Blob& lhs, Blob& rhs) { + lhs.swap(rhs); +} + +inline std::ostream& operator<<(std::ostream& out, const Blob& v) { + return out << "Blob[" << v.TypeName() << "]"; +} + +} // namespace caffe2 diff --git a/aten/src/ATen/core/context_base.h b/aten/src/ATen/core/context_base.h index 0a653ba0a1237..326cae5eb9691 100644 --- a/aten/src/ATen/core/context_base.h +++ b/aten/src/ATen/core/context_base.h @@ -25,7 +25,7 @@ class BaseContext; functions that are invoked statically before in Tensor class, e.g. New, We will merge this with Allocator later. */ -class AT_CORE_API BaseStaticContext { +class CAFFE2_API BaseStaticContext { public: virtual ~BaseStaticContext() noexcept {} @@ -55,7 +55,7 @@ class AT_CORE_API BaseStaticContext { * functions in the BaseContext class. * TODO: add docs after this is finalized. */ -class AT_CORE_API BaseContext { +class CAFFE2_API BaseContext { public: virtual ~BaseContext() noexcept {} @@ -192,9 +192,9 @@ using at::BaseContext; using at::BaseStaticContext; using StaticContextMap = std::unordered_map; -AT_API StaticContextMap& GetStaticContexts(); -AT_API void set_static_context(at::DeviceType t, BaseStaticContext* ptr); -AT_API BaseStaticContext* get_static_context(at::DeviceType t); +CAFFE2_API StaticContextMap& GetStaticContexts(); +CAFFE2_API void set_static_context(at::DeviceType t, BaseStaticContext* ptr); +CAFFE2_API BaseStaticContext* get_static_context(at::DeviceType t); template struct StaticContextFunctionRegisterer { diff --git a/aten/src/ATen/core/intrusive_ptr.h b/aten/src/ATen/core/intrusive_ptr.h index 961915555a375..4dc3c501e9433 100644 --- a/aten/src/ATen/core/intrusive_ptr.h +++ b/aten/src/ATen/core/intrusive_ptr.h @@ -33,7 +33,7 @@ namespace c10 { // tells us if the object was allocated by us. If it wasn't, no // intrusive_ptr for you! -class AT_CORE_API intrusive_ptr_target { +class CAFFE2_API intrusive_ptr_target { // Note [Weak references for intrusive refcounting] // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // Here's the scheme: @@ -114,7 +114,7 @@ class AT_CORE_API intrusive_ptr_target { namespace detail { template -struct AT_CORE_EXPORT intrusive_target_default_null_type final { +struct C10_EXPORT intrusive_target_default_null_type final { static constexpr TTarget* singleton() noexcept { return nullptr; } @@ -136,7 +136,7 @@ class weak_intrusive_ptr; template < class TTarget, class NullType = detail::intrusive_target_default_null_type> -class AT_CORE_EXPORT intrusive_ptr final { +class C10_EXPORT intrusive_ptr final { private: static_assert( std::is_base_of::value, @@ -391,7 +391,7 @@ inline bool operator!=( template < typename TTarget, class NullType = detail::intrusive_target_default_null_type> -class AT_CORE_EXPORT weak_intrusive_ptr final { +class C10_EXPORT weak_intrusive_ptr final { private: static_assert( std::is_base_of::value, @@ -739,13 +739,13 @@ namespace std { // To allow intrusive_ptr and weak_intrusive_ptr inside std::unordered_map or // std::unordered_set, we need std::hash template -struct AT_CORE_EXPORT hash> { +struct C10_EXPORT hash> { size_t operator()(const c10::intrusive_ptr& x) const { return std::hash()(x.get()); } }; template -struct AT_CORE_EXPORT hash> { +struct C10_EXPORT hash> { size_t operator()(const c10::weak_intrusive_ptr& x) const { return std::hash()(x._unsafe_get_target()); } diff --git a/aten/src/ATen/core/ivalue.cpp b/aten/src/ATen/core/ivalue.cpp index 3d2b56893e718..8dfb1e8ebb75b 100644 --- a/aten/src/ATen/core/ivalue.cpp +++ b/aten/src/ATen/core/ivalue.cpp @@ -1,12 +1,15 @@ #include #include -#define TORCH_FORALL_TAGS(_) \ - _(None) _(Tensor) _(Double) _(Int) _(Tuple) _(IntList) _(DoubleList) _(String) _(TensorList) +#define TORCH_FORALL_TAGS(_) \ + _(None) \ + _(Tensor) _(Double) _(Int) _(Tuple) _(IntList) _(DoubleList) _(String) \ + _(TensorList) _(Blob) namespace torch { namespace jit { -AT_API c10::intrusive_ptr ConstantString::create(std::string str_) { +CAFFE2_API c10::intrusive_ptr ConstantString::create( + std::string str_) { return c10::make_intrusive(std::move(str_)); } diff --git a/aten/src/ATen/core/ivalue.h b/aten/src/ATen/core/ivalue.h index 914598f6ceb42..513845d4c12af 100644 --- a/aten/src/ATen/core/ivalue.h +++ b/aten/src/ATen/core/ivalue.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -14,7 +15,7 @@ template using Shared = c10::intrusive_ptr; // string -struct AT_API ConstantString final : c10::intrusive_ptr_target { +struct CAFFE2_API ConstantString final : c10::intrusive_ptr_target { private: const std::string str_; public: @@ -27,14 +28,14 @@ struct AT_API ConstantString final : c10::intrusive_ptr_target { operator const std::string & () const { return string(); } - AT_API friend std::ostream& operator<<( + CAFFE2_API friend std::ostream& operator<<( std::ostream& out, const ConstantString& v); }; // non-mutable list template -struct AT_CORE_EXPORT ConstantList final : c10::intrusive_ptr_target { +struct C10_EXPORT ConstantList final : c10::intrusive_ptr_target { private: const std::vector elements_; public: @@ -64,10 +65,12 @@ using DoubleList = ConstantList; // to mark whether that type is a subtype of c10::intrusive_ptr_target and needs // retain/release calls. -#define TORCH_FORALL_TAGS(_) \ - _(None) _(Tensor) _(Double) _(Int) _(Tuple) _(IntList) _(DoubleList) _(String) _(TensorList) +#define TORCH_FORALL_TAGS(_) \ + _(None) \ + _(Tensor) _(Double) _(Int) _(Tuple) _(IntList) _(DoubleList) _(String) \ + _(TensorList) _(Blob) -struct AT_API IValue final { +struct CAFFE2_API IValue final { IValue() : payload{0} , tag(Tag::None) @@ -125,6 +128,25 @@ struct AT_API IValue final { return at::Tensor(toIntrusivePtr()); } + IValue(caffe2::Blob blob) : tag(Tag::Blob), is_intrusive_ptr(true) { + // TODO (after Tensor merge) If we pass in a Blob holding a Tensor, extract + // and + // store it as a Tensor instead. + payload.as_intrusive_ptr = + c10::make_intrusive(std::move(blob)).release(); + } + bool isBlob() const { + return Tag::Blob == tag; + } + caffe2::Blob& toBlob() & { + AT_ASSERT(isBlob()); + return *static_cast(payload.as_intrusive_ptr); + } + const caffe2::Blob& toBlob() const& { + AT_ASSERT(isBlob()); + return *static_cast(payload.as_intrusive_ptr); + } + // Tuple IValue(c10::intrusive_ptr v); bool isTuple() const { return Tag::Tuple == tag; } @@ -277,7 +299,9 @@ struct AT_API IValue final { template T to() const &; - AT_API friend std::ostream& operator<<(std::ostream& out, const IValue& v); + CAFFE2_API friend std::ostream& operator<<( + std::ostream& out, + const IValue& v); private: // NOTE: IValue tags are intentionally private. In the future we may encode diff --git a/aten/src/ATen/core/typeid.h b/aten/src/ATen/core/typeid.h index 2ed81cb1e1c8a..9055746ea377d 100644 --- a/aten/src/ATen/core/typeid.h +++ b/aten/src/ATen/core/typeid.h @@ -47,7 +47,8 @@ class TypeMeta; * use TypeIdentifier with custom types. This is for example used to store the * dtype of tensors. */ -class AT_CORE_API TypeIdentifier final : public at::IdWrapper { +class CAFFE2_API TypeIdentifier final + : public at::IdWrapper { public: static TypeIdentifier createTypeId(); @@ -61,6 +62,8 @@ class AT_CORE_API TypeIdentifier final : public at::IdWrapper& gTypeNames(); -AT_CORE_API std::unordered_set& gRegisteredTypeNames(); +CAFFE2_API std::unordered_map& gTypeNames(); +CAFFE2_API std::unordered_set& gRegisteredTypeNames(); +inline const char* TypeIdentifier::name() const noexcept { + auto it = gTypeNames().find(*this); + assert(it != gTypeNames().end()); + return it->second.c_str(); +} -AT_CORE_API std::mutex& gTypeRegistrationMutex(); +CAFFE2_API std::mutex& gTypeRegistrationMutex(); template struct TypeNameRegisterer { @@ -139,7 +147,7 @@ struct TypeNameRegisterer { * stores some additional data such as the item size and the name of the type * for run-time inspection. */ -class AT_CORE_API TypeMeta { +class CAFFE2_API TypeMeta { public: using PlacementNew = void(void*, size_t); using TypedCopy = void(const void*, void*, size_t); @@ -240,7 +248,7 @@ class AT_CORE_API TypeMeta { * is generated during run-time. Do NOT serialize the id for storage. */ template - AT_CORE_API static TypeIdentifier Id(); + CAFFE2_API static TypeIdentifier Id(); /** * Returns the item size of the type. This is equivalent to sizeof(T). @@ -396,20 +404,16 @@ inline bool operator!=(const TypeMeta& lhs, const TypeMeta& rhs) noexcept { * * NOTE: the macro needs to be invoked in ::caffe2 namespace */ -// Implementation note: in MSVC, we will need to prepend the AT_CORE_API +// Implementation note: in MSVC, we will need to prepend the CAFFE2_API // keyword in order to get things compiled properly. in Linux, gcc seems to // create attribute ignored error for explicit template instantiations, see // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0537r0.html // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=51930 // and as a result, we define these two macros slightly differently. -// TODO(jiayq): AT_CORE_API below is not correct, because we may use the -// definition in third party dependent libraries. The proper way is to use -// CAFFE2_EXPORT (which explicitly requires dllexport). Marking this as a -// todo item when the unified build is finished. #ifdef _MSC_VER #define CAFFE_KNOWN_TYPE(T) \ template <> \ - AT_CORE_EXPORT TypeIdentifier TypeMeta::Id() { \ + C10_EXPORT TypeIdentifier TypeMeta::Id() { \ static const TypeIdentifier type_id = TypeIdentifier::createTypeId(); \ static TypeNameRegisterer registerer(type_id, #T); \ return type_id; \ @@ -431,10 +435,10 @@ inline bool operator!=(const TypeMeta& lhs, const TypeMeta& rhs) noexcept { * for your own types to allocate dynamic ids for them. */ #ifdef _MSC_VER -#define CAFFE_DECLARE_KNOWN_TYPE(PreallocatedId, T) \ - template <> \ - inline AT_CORE_API TypeIdentifier TypeMeta::Id() { \ - return TypeIdentifier(PreallocatedId); \ +#define CAFFE_DECLARE_KNOWN_TYPE(PreallocatedId, T) \ + template <> \ + inline CAFFE2_API TypeIdentifier TypeMeta::Id() { \ + return TypeIdentifier(PreallocatedId); \ } #else // _MSC_VER #define CAFFE_DECLARE_KNOWN_TYPE(PreallocatedId, T) \ diff --git a/aten/src/ATen/cuda/CUDAContext.cpp b/aten/src/ATen/cuda/CUDAContext.cpp index e36178d47439a..0a4649d9c41ad 100644 --- a/aten/src/ATen/cuda/CUDAContext.cpp +++ b/aten/src/ATen/cuda/CUDAContext.cpp @@ -58,6 +58,10 @@ cusparseHandle_t getCurrentCUDASparseHandle() { return THCState_getCurrentSparseHandle(at::globalContext().getTHCState()); } +cublasHandle_t getCurrentCUDABlasHandle() { + return THCState_getCurrentBlasHandle(at::globalContext().getTHCState()); +} + } // namespace cuda } // namespace at diff --git a/aten/src/ATen/cuda/CUDAContext.h b/aten/src/ATen/cuda/CUDAContext.h index 279ac1d9b1e05..3a480d2ca4e4e 100644 --- a/aten/src/ATen/cuda/CUDAContext.h +++ b/aten/src/ATen/cuda/CUDAContext.h @@ -9,6 +9,7 @@ #include "cuda_runtime_api.h" #include "cusparse.h" +#include "cublas_v2.h" namespace at { namespace cuda { @@ -35,31 +36,31 @@ manage their own state. There is only a single CUDA context/state. */ /* Device info */ -AT_API int64_t getNumGPUs(); +CAFFE2_API int64_t getNumGPUs(); -AT_API int64_t current_device(); +CAFFE2_API int64_t current_device(); -AT_API void set_device(int64_t device); +CAFFE2_API void set_device(int64_t device); -AT_API cudaDeviceProp* getCurrentDeviceProperties(); +CAFFE2_API cudaDeviceProp* getCurrentDeviceProperties(); -AT_API cudaDeviceProp* getDeviceProperties(int64_t device); +CAFFE2_API cudaDeviceProp* getDeviceProperties(int64_t device); /* Streams */ -AT_API CUDAStream createCUDAStream( - const bool isHighPriority = false -, int64_t device = -1); +CAFFE2_API CUDAStream +createCUDAStream(const bool isHighPriority = false, int64_t device = -1); -AT_API CUDAStream getDefaultCUDAStream(int64_t device = -1); -AT_API CUDAStream getCurrentCUDAStream(int64_t device = -1); +CAFFE2_API CUDAStream getDefaultCUDAStream(int64_t device = -1); +CAFFE2_API CUDAStream getCurrentCUDAStream(int64_t device = -1); -AT_API void setCurrentCUDAStream(CUDAStream stream); -AT_API void uncheckedSetCurrentCUDAStream(CUDAStream stream); +CAFFE2_API void setCurrentCUDAStream(CUDAStream stream); +CAFFE2_API void uncheckedSetCurrentCUDAStream(CUDAStream stream); -AT_API Allocator* getCUDADeviceAllocator(); +CAFFE2_API Allocator* getCUDADeviceAllocator(); /* Handles */ -AT_API cusparseHandle_t getCurrentCUDASparseHandle(); +CAFFE2_API cusparseHandle_t getCurrentCUDASparseHandle(); +CAFFE2_API cublasHandle_t getCurrentCUDABlasHandle(); } // namespace cuda diff --git a/aten/src/ATen/detail/CUDAHooksInterface.h b/aten/src/ATen/detail/CUDAHooksInterface.h index 4e60ee1597cc4..69149932ac7b9 100644 --- a/aten/src/ATen/detail/CUDAHooksInterface.h +++ b/aten/src/ATen/detail/CUDAHooksInterface.h @@ -47,7 +47,7 @@ constexpr const char* CUDA_HELP = // TODO: Consider putting the stub definitions in another class, so that one // never forgets to implement each virtual function in the real implementation // in CUDAHooks. This probably doesn't buy us much though. -struct AT_API CUDAHooksInterface { +struct CAFFE2_API CUDAHooksInterface { // This should never actually be implemented, but it is used to // squelch -Werror=non-virtual-dtor virtual ~CUDAHooksInterface() {} @@ -129,14 +129,14 @@ struct AT_API CUDAHooksInterface { // NB: dummy argument to suppress "ISO C++11 requires at least one argument // for the "..." in a variadic macro" -struct AT_API CUDAHooksArgs {}; +struct CAFFE2_API CUDAHooksArgs {}; AT_DECLARE_REGISTRY(CUDAHooksRegistry, CUDAHooksInterface, CUDAHooksArgs) #define REGISTER_CUDA_HOOKS(clsname) \ AT_REGISTER_CLASS(CUDAHooksRegistry, clsname, clsname) namespace detail { -AT_API const CUDAHooksInterface& getCUDAHooks(); +CAFFE2_API const CUDAHooksInterface& getCUDAHooks(); /// This class exists to let us access `cudaSetDevice`, `cudaGetDevice` and CUDA /// error handling functions, when CUDA is available. These functions will first @@ -144,7 +144,7 @@ AT_API const CUDAHooksInterface& getCUDAHooks(); /// the `cudaSetDevice`/`cudaGetDevice` functions. This allows us to access them /// with only a single pointer indirection, while virtual dispatch would require /// two (one for the virtual call, one for `cudaSetDevice`/`cudaGetDevice`). -struct AT_API DynamicCUDAInterface { +struct CAFFE2_API DynamicCUDAInterface { static void (*set_device)(int32_t); static void (*get_device)(int32_t*); static void (*unchecked_set_device)(int32_t); diff --git a/aten/src/ATen/detail/ComplexHooksInterface.h b/aten/src/ATen/detail/ComplexHooksInterface.h index 80ecfb6f26f83..e5d5c3ec2a83f 100644 --- a/aten/src/ATen/detail/ComplexHooksInterface.h +++ b/aten/src/ATen/detail/ComplexHooksInterface.h @@ -7,7 +7,7 @@ namespace at { class Context; -struct AT_API ComplexHooksInterface { +struct CAFFE2_API ComplexHooksInterface { virtual ~ComplexHooksInterface() {} virtual void registerComplexTypes(Context*) const { @@ -15,13 +15,13 @@ struct AT_API ComplexHooksInterface { } }; -struct AT_API ComplexHooksArgs {}; +struct CAFFE2_API ComplexHooksArgs {}; AT_DECLARE_REGISTRY(ComplexHooksRegistry, ComplexHooksInterface, ComplexHooksArgs) #define REGISTER_COMPLEX_HOOKS(clsname) \ AT_REGISTER_CLASS(ComplexHooksRegistry, clsname, clsname) namespace detail { -AT_API const ComplexHooksInterface& getComplexHooks(); +CAFFE2_API const ComplexHooksInterface& getComplexHooks(); } } diff --git a/aten/src/ATen/function_wrapper.py b/aten/src/ATen/function_wrapper.py index 323701d69e837..189cadf0b6d1c 100644 --- a/aten/src/ATen/function_wrapper.py +++ b/aten/src/ATen/function_wrapper.py @@ -154,7 +154,7 @@ def TypedDict(name, attrs, total=True): # type: ignore """) # add a native declaration for a native function NATIVE_DECLARATION = CodeTemplate("""\ -AT_API ${return_type} ${native_type_method_dispatch}(${formals_with_defaults}); +CAFFE2_API ${return_type} ${native_type_method_dispatch}(${formals_with_defaults}); """) # special method definition for factory functions in Functions.h diff --git a/aten/src/ATen/native/Activation.cpp b/aten/src/ATen/native/Activation.cpp index 30ccc616c6e7c..84f83946094c8 100644 --- a/aten/src/ATen/native/Activation.cpp +++ b/aten/src/ATen/native/Activation.cpp @@ -37,11 +37,11 @@ Tensor & celu_(Tensor & self, Scalar alpha) { } Tensor rrelu(const Tensor & self, Scalar lower, Scalar upper, bool training, Generator* generator) { - return at::rrelu_with_noise(self, self.type().tensor(), lower, upper, training, generator); + return at::rrelu_with_noise(self, at::empty({0}, self.options()), lower, upper, training, generator); } Tensor & rrelu_(Tensor & self, Scalar lower, Scalar upper, bool training, Generator* generator) { - return at::rrelu_with_noise_(self, self.type().tensor(), lower, upper, training, generator); + return at::rrelu_with_noise_(self, at::empty({0}, self.options()), lower, upper, training, generator); } // ----------------------------------- diff --git a/aten/src/ATen/native/BinaryOps.cpp b/aten/src/ATen/native/BinaryOps.cpp index 517d00164e37f..5ddb36bb5b4dd 100644 --- a/aten/src/ATen/native/BinaryOps.cpp +++ b/aten/src/ATen/native/BinaryOps.cpp @@ -16,7 +16,7 @@ DEFINE_DISPATCH(div_stub); Tensor& add_out(Tensor& result, const Tensor& self, const Tensor& other, Scalar alpha) { if (other.is_sparse()) { if (!result.defined()) { - result = self.type().tensor(); + result = at::empty({0}, self.options()); } if (self.is_sparse()) { at::_sparse_add_out(result, self, other, alpha); @@ -44,7 +44,7 @@ Tensor& add_(Tensor& self, const Tensor& other, Scalar alpha) { Tensor& div_out(Tensor& result, const Tensor& self, const Tensor& other) { if (self.is_sparse()) { if (!result.defined()) { - result = self.type().tensor(); + result = at::empty({0}, self.options()); } if (other.dim() != 0) { AT_ERROR("div(): sparse division only supports division by a scalar ", @@ -69,7 +69,7 @@ Tensor& div_(Tensor& self, const Tensor& other) { Tensor& mul_out(Tensor& result, const Tensor& self, const Tensor& other) { if (self.is_sparse() || other.is_sparse()) { if (!result.defined()) { - result = self.type().tensor(); + result = at::empty({0}, self.options()); } return at::_sparse_mul_out(result, self, other); } @@ -90,7 +90,7 @@ Tensor& mul_(Tensor& self, const Tensor& other) { Tensor& sub_out(Tensor& result, const Tensor& self, const Tensor& other, Scalar alpha) { if (other.is_sparse()) { if (!result.defined()) { - result = self.type().tensor(); + result = at::empty({0}, self.options()); } if (!self.sizes().equals(other.sizes())) { AT_ERROR("sizes do not match"); diff --git a/aten/src/ATen/native/Convolution.cpp b/aten/src/ATen/native/Convolution.cpp index cee0ccd212f5c..77bc209c7036d 100644 --- a/aten/src/ATen/native/Convolution.cpp +++ b/aten/src/ATen/native/Convolution.cpp @@ -318,7 +318,7 @@ at::Tensor _convolution( weight = view4d(weight); } - auto output = input.type().tensor(); + auto output = at::empty({0}, input.options()); if (params.is_depthwise(input, weight)) { /* output.resize_(output_size(input, weight)); */ diff --git a/aten/src/ATen/native/ConvolutionTBC.cpp b/aten/src/ATen/native/ConvolutionTBC.cpp index 0c2ac96dce806..8b9779313bf89 100644 --- a/aten/src/ATen/native/ConvolutionTBC.cpp +++ b/aten/src/ATen/native/ConvolutionTBC.cpp @@ -33,11 +33,11 @@ Tensor conv_tbc(const Tensor& self, const Tensor& weight, const Tensor& bias, in "the weight tensor (output channels)."); // input * weights + bias -> output_features - Tensor output = self.type().tensor({ + Tensor output = at::empty({ olen, input_size[1], weight_size[2], - }); + }, self.options()); output.copy_(bias.expand(output.sizes())); for (int k = 0; k < kw; k++) { int iShift = std::max(0, static_cast(k - real_pad)); diff --git a/aten/src/ATen/native/DispatchStub.h b/aten/src/ATen/native/DispatchStub.h index 42ef6a4f6bb5f..c803ecd3f353b 100644 --- a/aten/src/ATen/native/DispatchStub.h +++ b/aten/src/ATen/native/DispatchStub.h @@ -49,10 +49,10 @@ enum class CPUCapability { CPUCapability get_cpu_capability(); template -struct AT_API DispatchStub; +struct CAFFE2_API DispatchStub; template -struct AT_API DispatchStub { +struct CAFFE2_API DispatchStub { using FnPtr = rT (*) (Args...); template @@ -114,9 +114,9 @@ struct RegisterDispatch { // adding parentheses and using helper struct to get rid of the parentheses, do // not work with MSVC. So do a `using`-declaration if you need to pass in such // `fn`, e.g., grid_sampler_2d_backward_cpu_kernel in GridSampleKernel.h. -#define DECLARE_DISPATCH(fn, name) \ +#define DECLARE_DISPATCH(fn, name) \ struct name : DispatchStub {}; \ - extern AT_API struct name name + extern CAFFE2_API struct name name #define DEFINE_DISPATCH(name) struct name name diff --git a/aten/src/ATen/native/Distance.cpp b/aten/src/ATen/native/Distance.cpp index 08f306869d89f..f075269291d64 100644 --- a/aten/src/ATen/native/Distance.cpp +++ b/aten/src/ATen/native/Distance.cpp @@ -26,7 +26,7 @@ Tensor _pdist_forward(const Tensor& self, const double p) { AT_CHECK(self.is_contiguous(), "_pdist_forward requires contiguous input"); auto device = self.type().device_type(); AT_CHECK(device == kCPU || device == kCUDA, "_pdist_forward only supports CPU and CUDA devices, got: ", device); - Tensor result = self.type().tensor(); + Tensor result = at::empty({0}, self.options()); if (self.size(0) <= 1) { result.resize_({0}); } else { diff --git a/aten/src/ATen/native/Distributions.cpp b/aten/src/ATen/native/Distributions.cpp index 3a2d1da5bd9a5..9810c9128980e 100644 --- a/aten/src/ATen/native/Distributions.cpp +++ b/aten/src/ATen/native/Distributions.cpp @@ -173,7 +173,7 @@ Tensor& bernoulli_scalar_cpu_(Tensor& self, double p, Generator* gen) { Tensor _standard_gamma_grad_cpu(const Tensor& self, const Tensor& output) { - Tensor ret = self.type().tensor(self.sizes()); + Tensor ret = at::empty(self.sizes(), self.options()); AT_DISPATCH_FLOATING_TYPES(self.type(), "_standard_gamma_grad", [&] { CPU_tensor_apply3(ret, self, output, [](scalar_t& ret_val, const scalar_t& self_val, const scalar_t &output_val) { diff --git a/aten/src/ATen/native/Embedding.cpp b/aten/src/ATen/native/Embedding.cpp index 99fa4c701d4bb..72518fbd4a0e8 100644 --- a/aten/src/ATen/native/Embedding.cpp +++ b/aten/src/ATen/native/Embedding.cpp @@ -66,12 +66,12 @@ Tensor embedding_sparse_backward( int64_t num_features = grad_.size(-1); auto weight_size = std::array{{ num_weights, num_features }}; - auto& dense_type = grad.type(); + auto dense_options = grad.options(); // check if all our grad come from padding_idx if (grad.numel() == 0) { - return at::_sparse_coo_tensor_unsafe(indices_.type().tensor({1, 0}), - dense_type.tensor({0, num_features}), + return at::_sparse_coo_tensor_unsafe(at::empty({1, 0}, indices_.options()), + at::empty({0, num_features}, dense_options), weight_size); } @@ -168,7 +168,7 @@ Tensor & embedding_renorm_cpu_( continue; } auto row = self[sorted_indices[i]]; - auto norm = row.norm(norm_type).toCDouble(); + auto norm = row.norm(norm_type).item(); if (norm > max_norm) { auto scale = max_norm / (norm + 1e-7); row *= scale; diff --git a/aten/src/ATen/native/Indexing.cpp b/aten/src/ATen/native/Indexing.cpp index 5566fd397320a..bb06719d85a49 100644 --- a/aten/src/ATen/native/Indexing.cpp +++ b/aten/src/ATen/native/Indexing.cpp @@ -73,7 +73,7 @@ static std::vector expandByteTensors(const Tensor & self, TensorList ind if (special_empty) { // We can't call select on an empty tensor so we just create an empty // tensor. - result.emplace_back(nonzero.type().tensor()); + result.emplace_back(at::empty({0}, nonzero.options())); } else { result.emplace_back(nonzero.select(1, j)); } @@ -143,8 +143,8 @@ static Tensor unsqueezeN(const Tensor & src, int64_t before, int64_t after) { static Tensor wrapIndexOnce(const Tensor & index, int64_t dim, int64_t dim_size) { if (index.numel() != 0) { - auto max_idx = index.max().toCLong(); - auto min_idx = index.min().toCLong(); + auto max_idx = index.max().item(); + auto min_idx = index.min().item(); AT_CHECK(max_idx < dim_size, "index ", max_idx, " is out of bounds for dimension ", dim, " with size ", dim_size); AT_CHECK(min_idx >= -dim_size, diff --git a/aten/src/ATen/native/LegacyBridge.cpp b/aten/src/ATen/native/LegacyBridge.cpp index 0aaf2149b42a0..5fc554410ac9c 100644 --- a/aten/src/ATen/native/LegacyBridge.cpp +++ b/aten/src/ATen/native/LegacyBridge.cpp @@ -150,10 +150,6 @@ Tensor tensor(const Type& dtype, ArrayRef size) { } } -Tensor sparse_coo_tensor(const Type& dtype, ArrayRef size) { - return at::getType(dtype.options().layout(at::kSparse)).native_sparse_coo_tensor(size); -} - Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values) { return at::getType(values.options().layout(at::kSparse)).native_sparse_coo_tensor(indices, values); } @@ -162,6 +158,21 @@ Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values, ArrayRef size, const TensorOptions& options) { + TensorOptions toptions = options; + return at::getType(toptions.layout(at::kSparse)).native_sparse_coo_tensor(size); +} + +Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values, const TensorOptions& options) { + TensorOptions toptions = options; + return at::getType(toptions.layout(at::kSparse)).native_sparse_coo_tensor(indices, values); +} + +Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values, ArrayRef size, const TensorOptions& options) { + TensorOptions toptions = options; + return at::getType(toptions.layout(at::kSparse)).native_sparse_coo_tensor(indices, values, size); +} + Tensor _sparse_coo_tensor_unsafe(const Tensor& indices, const Tensor& values, ArrayRef size) { return at::getType(values.options().layout(at::kSparse))._native_sparse_coo_tensor_unsafe(indices, values, size); } diff --git a/aten/src/ATen/native/Linear.cpp b/aten/src/ATen/native/Linear.cpp index bdf9602fe9ae0..7b0d89d4d5675 100644 --- a/aten/src/ATen/native/Linear.cpp +++ b/aten/src/ATen/native/Linear.cpp @@ -404,7 +404,7 @@ Tensor _trilinear(const Tensor& i1_, const Tensor& i2_, const Tensor& i3_, int64_t slicemul2 = (expand2[unroll_dim] ? 0 : 1); int64_t slicemul3 = (expand3[unroll_dim] ? 0 : 1); - auto output = i1.type().tensor(output_size).zero_(); + auto output = at::zeros(output_size, i1.options()); if (! sumdim[unroll_dim]) { for (int64_t k = 0; k < unroll_size; k++) { Tensor buf = at::native::sumproduct_pair(i1.narrow(unroll_dim, k * slicemul1, 1), diff --git a/aten/src/ATen/native/LinearAlgebra.cpp b/aten/src/ATen/native/LinearAlgebra.cpp index 04bf617081387..0cd08c5b2c491 100644 --- a/aten/src/ATen/native/LinearAlgebra.cpp +++ b/aten/src/ATen/native/LinearAlgebra.cpp @@ -22,7 +22,7 @@ static inline std::tuple _lu_det_P_diag_U_info(const Tensor std::tie(lu, p, info) = self.unsqueeze(0).btrifact_with_info(); p.squeeze_(0); lu.squeeze_(0); - int int_info = info.squeeze_().toCInt(); + int int_info = info.squeeze_().item(); AT_CHECK(int_info >= 0, "LU factorization (getrf) failed with info = ", int_info); auto n = self.size(0); auto num_exchanges = (at::arange(1, n + 1, p.type()) != p).nonzero().size(0); @@ -63,7 +63,7 @@ Tensor logdet(const Tensor& self) { } else { det = diag_U.prod().mul_(det_P); } - if (det.sign().toCDouble() <= 0) { + if (det.sign().item() <= 0) { return det.log_(); // in order to get proper -inf (det=0) or nan (det<0) } else { return diag_U.abs().log().sum(); @@ -88,7 +88,7 @@ std::tuple slogdet(const Tensor& self) { } Tensor inverse(const Tensor& self) { - Tensor result = self.type().tensor(); + Tensor result = at::empty({0}, self.options()); return at::native::inverse_out(result, self); } @@ -111,7 +111,7 @@ Tensor pinverse(const Tensor& self, double rcond) { "of floating types"); if (self.numel() == 0) { // Match NumPy - return self.type().tensor({self.size(1), self.size(0)}); + return at::empty({self.size(1), self.size(0)}, self.options()); } Tensor U, S, V; std::tie(U, S, V) = self.svd(); @@ -345,7 +345,7 @@ static inline Tensor& bmm_out_or_baddbmm_(Tensor& self_or_result, const Tensor& Tensor baddbmm_cpu(const Tensor& self, const Tensor& batch1, const Tensor& batch2, Scalar beta, Scalar alpha) { - Tensor result = self.type().tensor(); + Tensor result = at::empty({0}, self.options()); return at::native::baddbmm_out_cpu(result, self, batch1, batch2, beta, alpha); } @@ -362,7 +362,7 @@ Tensor& baddbmm__cpu(Tensor& self, const Tensor& batch1, const Tensor& batch2, S } Tensor bmm_cpu(const Tensor& self, const Tensor& mat2) { - Tensor result = self.type().tensor(); + Tensor result = at::empty({0}, self.options()); return at::native::bmm_out_cpu(result, self, mat2); } diff --git a/aten/src/ATen/native/ReduceOps.cpp b/aten/src/ATen/native/ReduceOps.cpp index c976121e77ae3..9e61db8543fbc 100644 --- a/aten/src/ATen/native/ReduceOps.cpp +++ b/aten/src/ATen/native/ReduceOps.cpp @@ -321,7 +321,7 @@ Tensor sum(const Tensor& self, IntList dim, ScalarType dtype) { Tensor _sum(const Tensor &self, int64_t dim_, bool keepdim) { int64_t dim = maybe_wrap_dim(dim_, self.dim()); - Tensor result = self.type().tensor(); + Tensor result = at::empty({0}, self.options()); return at::_sum_out(result, self, dim, keepdim); } @@ -343,7 +343,7 @@ Tensor prod(const Tensor& self, int64_t dim, ScalarType dtype) { Tensor _prod(const Tensor &self, int64_t dim_, bool keepdim) { int64_t dim = maybe_wrap_dim(dim_, self.dim()); - Tensor result = self.type().tensor(); + Tensor result = at::empty({0}, self.options()); return at::_prod_out(result, self, dim, keepdim); } @@ -365,7 +365,7 @@ Tensor& logsumexp_out(Tensor& result, const Tensor &self, int64_t dim_, bool kee Tensor logsumexp(const Tensor &self, int64_t dim_, bool keepdim) { int64_t dim = maybe_wrap_dim(dim_, self.dim()); - Tensor result = self.type().tensor(); + Tensor result = at::empty({0}, self.options()); return at::native::logsumexp_out(result, self, dim, keepdim); } @@ -639,7 +639,7 @@ Tensor _norm(const Tensor &self, Scalar p) { } Tensor norm(const Tensor& self, Scalar p, int64_t dim, bool keepdim) { - Tensor result = self.type().tensor(); + Tensor result = at::empty({0}, self.options()); return at::native::norm_out(result, self, p, dim, keepdim); } @@ -648,7 +648,7 @@ Tensor norm(const Tensor& self, Scalar p) { } Tensor all(const Tensor& self, int64_t dim, bool keepdim) { - Tensor result = self.type().tensor(); + Tensor result = at::empty({0}, self.options()); return at::native::all_out(result, self, dim, keepdim); } @@ -665,7 +665,7 @@ Tensor &all_out(Tensor &result, const Tensor &self, int64_t dim, bool keepdim) { } Tensor any(const Tensor& self, int64_t dim, bool keepdim) { - Tensor result = self.type().tensor(); + Tensor result = at::empty({0}, self.options()); return at::native::any_out(result, self, dim, keepdim); } @@ -690,7 +690,7 @@ Tensor var(const Tensor& self, bool unbiased) { } Tensor var(const Tensor& self, int64_t dim, bool unbiased, bool keepdim) { - Tensor result = self.type().tensor(); + Tensor result = at::empty({0}, self.options()); return at::native::var_out(result, self, dim, unbiased, keepdim); } @@ -715,7 +715,7 @@ Tensor std(const Tensor& self, bool unbiased) { } Tensor std(const Tensor& self, int64_t dim, bool unbiased, bool keepdim) { - Tensor result = self.type().tensor(); + Tensor result = at::empty({0}, self.options()); return at::native::std_out(result, self, dim, unbiased, keepdim); } diff --git a/aten/src/ATen/native/RoiPooling.cpp b/aten/src/ATen/native/RoiPooling.cpp index 1a089a9f473c1..26aeee9caf719 100644 --- a/aten/src/ATen/native/RoiPooling.cpp +++ b/aten/src/ATen/native/RoiPooling.cpp @@ -28,13 +28,13 @@ std::tuple RoiPooling2d_forward_cpu( auto inputWidth = input.size(3); // Output Tensor is (num_rois, C, pooledHeight, pooledWidth) - auto output = input.type().tensor({proposals, inputChannels, pooledHeight, pooledWidth}); + auto output = at::empty({proposals, inputChannels, pooledHeight, pooledWidth}, input.options()); // TODO: need some mechanism for determining train vs. test // During training, we need to store the argmaxes for the pooling operation, so // the argmaxes Tensor should be the same size as the output Tensor - auto argmaxes = input.type().toScalarType(kInt).tensor({proposals, inputChannels, pooledHeight, pooledWidth}); + auto argmaxes = at::empty({proposals, inputChannels, pooledHeight, pooledWidth}, input.options().dtype(kInt)); AT_CHECK(input.is_contiguous(), "input must be contiguous"); AT_CHECK(rois.is_contiguous(), "rois must be contiguous"); diff --git a/aten/src/ATen/native/TensorCompare.cpp b/aten/src/ATen/native/TensorCompare.cpp index 40c4ce39addeb..1cca4191fd079 100644 --- a/aten/src/ATen/native/TensorCompare.cpp +++ b/aten/src/ATen/native/TensorCompare.cpp @@ -34,7 +34,7 @@ DEFINE_DISPATCH(max_kernel); DEFINE_DISPATCH(min_kernel); bool allclose(const Tensor& self, const Tensor& other, double rtol, double atol, bool equal_nan) { - return at::isclose(self, other, rtol, atol, equal_nan).all().toCByte(); + return at::isclose(self, other, rtol, atol, equal_nan).all().item(); } Tensor isclose(const Tensor& self, const Tensor& other, double rtol, double atol, bool equal_nan) { @@ -85,7 +85,7 @@ Tensor where(const Tensor& condition, const Tensor& self, const Tensor& other) { } Tensor _s_where_cpu(const Tensor& condition, const Tensor& self, const Tensor& other) { - Tensor ret = self.type().tensor(self.sizes()); + Tensor ret = at::empty(self.sizes(), self.options()); AT_DISPATCH_ALL_TYPES(ret.type(), "where", [&] { where_cpu(ret, condition, self, other); }); @@ -93,8 +93,8 @@ Tensor _s_where_cpu(const Tensor& condition, const Tensor& self, const Tensor& o } std::tuple kthvalue(const Tensor& self, int64_t k, int64_t dim, bool keepdim) { - Tensor values = self.type().tensor(); - Tensor indices = self.type().toScalarType(kLong).tensor(); + Tensor values = at::empty({0}, self.options()); + Tensor indices = at::empty({0}, self.options().dtype(kLong)); return at::native::kthvalue_out(values, indices, self, k, dim, keepdim); } @@ -113,8 +113,8 @@ std::tuple kthvalue_out(Tensor& values, Tensor& indices, } std::tuple median(const Tensor& self, int64_t dim, bool keepdim) { - Tensor values = self.type().tensor(); - Tensor indices = self.type().toScalarType(kLong).tensor(); + Tensor values = at::empty({0}, self.options()); + Tensor indices = at::empty({0}, self.options().dtype(kLong)); return at::native::median_out(values, indices, self, dim, keepdim); } @@ -133,8 +133,8 @@ std::tuple median_out(Tensor& values, Tensor& indices, } std::tuple mode(const Tensor& self, int64_t dim, bool keepdim) { - Tensor values = self.type().tensor(); - Tensor indices = self.type().toScalarType(kLong).tensor(); + Tensor values = at::empty({0}, self.options()); + Tensor indices = at::empty({0}, self.options().dtype(kLong)); return at::native::mode_out(values, indices, self, dim, keepdim); } @@ -168,8 +168,8 @@ std::tuple _max_out_cpu(Tensor& max, Tensor& max_indices, } std::tuple max(const Tensor& self, int64_t dim, bool keepdim) { - Tensor max = self.type().tensor(); - Tensor max_indices = self.type().toScalarType(kLong).tensor(); + Tensor max = at::empty({0}, self.options()); + Tensor max_indices = at::empty({0}, self.options().dtype(kLong)); return at::native::max_out(max, max_indices, self, dim, keepdim); } @@ -211,8 +211,8 @@ std::tuple _min_out_cpu(Tensor& min, Tensor& min_indices, } std::tuple min(const Tensor& self, int64_t dim, bool keepdim) { - Tensor min = self.type().tensor(); - Tensor min_indices = self.type().toScalarType(kLong).tensor(); + Tensor min = at::empty({0}, self.options()); + Tensor min_indices = at::empty({0}, self.options().dtype(kLong)); return at::native::min_out(min, min_indices, self, dim, keepdim); } diff --git a/aten/src/ATen/native/TensorFactories.cpp b/aten/src/ATen/native/TensorFactories.cpp index 178045d9fd0de..2e37acc951a61 100644 --- a/aten/src/ATen/native/TensorFactories.cpp +++ b/aten/src/ATen/native/TensorFactories.cpp @@ -118,6 +118,12 @@ Tensor& empty_out(Tensor& result, IntList size) { return result; } +Tensor empty_strided(IntList size, IntList stride, const TensorOptions& options) { + // Note [Native bindings for legacy TH factory functions] + return getFactoryType(options).tensor(size, stride); +} + + // Temporary type cast operators. These are needed to trace type-casts now since // Type's are not supported in the IR. Instead, we call down to these // specialized operators for each datatype. diff --git a/aten/src/ATen/native/TensorIterator.cpp b/aten/src/ATen/native/TensorIterator.cpp index c3535a92a0572..97645c0d0256c 100644 --- a/aten/src/ATen/native/TensorIterator.cpp +++ b/aten/src/ATen/native/TensorIterator.cpp @@ -153,7 +153,7 @@ void TensorIterator::allocate_outputs() { for (int dim = 0; dim < ndim(); dim++) { tensor_stride[dim] /= element_size; } - *op.tensor = op.type->tensor(tensor_shape, tensor_stride); + *op.tensor = at::empty_strided(tensor_shape, tensor_stride, op.type->options()); } } } diff --git a/aten/src/ATen/native/TensorIterator.h b/aten/src/ATen/native/TensorIterator.h index 3faedbec6bb32..7d97d7f7f6635 100644 --- a/aten/src/ATen/native/TensorIterator.h +++ b/aten/src/ATen/native/TensorIterator.h @@ -50,7 +50,7 @@ namespace at { -struct AT_API OperandInfo { +struct CAFFE2_API OperandInfo { OperandInfo() {} OperandInfo(const Tensor& t) : tensor(const_cast(&t)) {} @@ -82,7 +82,7 @@ struct AT_API OperandInfo { struct SplitUntil32Bit; -struct AT_API TensorIterator { +struct CAFFE2_API TensorIterator { struct Builder; friend struct Builder; @@ -212,8 +212,8 @@ struct TensorIterator::Builder { /// A container-like struct that acts as if it contains splits of a /// TensorIterator that can use 32-bit indexing. Taken together the splits cover /// the original TensorIterator. -struct AT_API SplitUntil32Bit { - struct AT_API iterator { +struct CAFFE2_API SplitUntil32Bit { + struct CAFFE2_API iterator { iterator() {}; iterator(const TensorIterator& iter); iterator(iterator&&) = default; diff --git a/aten/src/ATen/native/TensorShape.cpp b/aten/src/ATen/native/TensorShape.cpp index 634e7a443d21f..c470f554c1423 100644 --- a/aten/src/ATen/native/TensorShape.cpp +++ b/aten/src/ATen/native/TensorShape.cpp @@ -133,7 +133,7 @@ Tensor expand_as(const Tensor& self, const Tensor& other) { } Tensor as_strided(const Tensor& self, IntList size, IntList stride, int64_t storage_offset) { - return self.type().tensor().set_(self.storage(), storage_offset, size, stride); + return at::empty({0}, self.options()).set_(self.storage(), storage_offset, size, stride); } Tensor &as_strided_(Tensor& self, IntList size, IntList stride, int64_t storage_offset) { @@ -196,7 +196,7 @@ Tensor repeat(const Tensor& self, IntList repeats) { Tensor xtensor = self.expand(padded_size); - Tensor result = self.type().tensor(target_size); + Tensor result = at::empty(target_size, self.options()); Tensor urtensor = at::alias(result); for (int64_t i = 0; i < xtensor.dim(); ++i) { // can't unfold with step 0, so make sure step is at least 1 diff --git a/aten/src/ATen/native/UnaryOps.cpp b/aten/src/ATen/native/UnaryOps.cpp index 89a13e14b8b2e..f6434b2c957c1 100644 --- a/aten/src/ATen/native/UnaryOps.cpp +++ b/aten/src/ATen/native/UnaryOps.cpp @@ -31,17 +31,17 @@ namespace at { namespace native { Tensor clamp(const Tensor& self, Scalar min, Scalar max) { - Tensor result = self.type().tensor(); + Tensor result = at::empty({0}, self.options()); return clamp_out(result, self, min, max); } Tensor clamp_max(const Tensor& self, Scalar max) { - Tensor result = self.type().tensor(); + Tensor result = at::empty({0}, self.options()); return clamp_max_out(result, self, max); } Tensor clamp_min(const Tensor& self, Scalar min) { - Tensor result = self.type().tensor(); + Tensor result = at::empty({0}, self.options()); return clamp_min_out(result, self, min); } @@ -99,7 +99,7 @@ Tensor& fill_(Tensor& self, const Tensor& value) { Tensor mvlgamma(const Tensor& self, int64_t p) { AT_CHECK(at::isFloatingType(self.type().scalarType()), "mvlgamma is not implemented for ", self.type()); - AT_CHECK((self > 0.5 * (p - 1.)).all().toCByte(), + AT_CHECK((self > 0.5 * (p - 1.)).all().item(), "Condition for computing multivariate log-gamma not met"); AT_CHECK(p >= 1, "p has to be greater than or equal to 1"); Tensor args = native::arange(-p / 2. + 0.5, 0.5, 0.5, self.options()); @@ -110,7 +110,7 @@ Tensor mvlgamma(const Tensor& self, int64_t p) { Tensor& mvlgamma_(Tensor& self, int64_t p) { AT_CHECK(at::isFloatingType(self.type().scalarType()), "mvlgamma is not implemented for ", self.type()); - AT_CHECK((self > 0.5 * (p - 1.)).all().toCByte(), + AT_CHECK((self > 0.5 * (p - 1.)).all().item(), "Condition for computing multivariate log-gamma not met"); AT_CHECK(p >= 1, "p has to be greater than or equal to 1"); Tensor args = native::arange(-p / 2. + 0.5, 0.5, 0.5, self.options()); @@ -123,7 +123,7 @@ Tensor& mvlgamma_(Tensor& self, int64_t p) { #define IMPLEMENT_UNARY_OP_VEC(op) \ Tensor op(const Tensor& self) { \ - Tensor result = self.type().tensor(); \ + Tensor result = at::empty({0}, self.options()); \ return at::op##_out(result, self); \ } \ Tensor& _##op##__cpu(Tensor& self_) { \ @@ -143,7 +143,7 @@ Tensor& mvlgamma_(Tensor& self, int64_t p) { #define IMPLEMENT_UNARY_OP_TH(op) \ Tensor op(const Tensor& self) { \ - Tensor result = self.type().tensor(); \ + Tensor result = at::empty({0}, self.options()); \ return at::op##_out(result, self); \ } \ Tensor& _##op##__cpu(Tensor& self) { \ diff --git a/aten/src/ATen/native/cuda/Activation.cu b/aten/src/ATen/native/cuda/Activation.cu index 1bce68730f0d0..505054b8d431c 100644 --- a/aten/src/ATen/native/cuda/Activation.cu +++ b/aten/src/ATen/native/cuda/Activation.cu @@ -72,11 +72,10 @@ Tensor prelu_cuda(const Tensor& self, const Tensor& weight_) { AT_CHECK(input_ndim > 0, "Not allow zero-dim input tensor."); int64_t channel_size = 1; // channel_size default to 1 - int64_t input_dim0_size = 1, input_stride0 = 1, input_stride1 = 1; + int64_t input_stride0 = 1, input_stride1 = 1; if (input_ndim > 1) { channel_size = input.size(1); // channel is the 2nd dim of input - input_dim0_size = input.size(0); input_stride0 = strides[0]; input_stride1 = strides[1]; } @@ -189,11 +188,10 @@ std::tuple prelu_backward_cuda(const Tensor& grad_out_, const Te AT_CHECK(input_ndim > 0, "Not allow zero-dim input tensor."); int64_t channel_size = 1; // channel_size default to 1 - int64_t input_dim0_size = 1, input_stride0 = 1, input_stride1 = 1; + int64_t input_stride0 = 1, input_stride1 = 1; if (input_ndim > 1) { channel_size = input.size(1); // channel is the 2nd dim of input - input_dim0_size = input.size(0); input_stride0 = strides[0]; input_stride1 = strides[1]; } diff --git a/aten/src/ATen/native/cuda/CuFFTPlanCache.h b/aten/src/ATen/native/cuda/CuFFTPlanCache.h index 8715a9ef460ee..c8ea2c3c69196 100644 --- a/aten/src/ATen/native/cuda/CuFFTPlanCache.h +++ b/aten/src/ATen/native/cuda/CuFFTPlanCache.h @@ -346,6 +346,7 @@ class CuFFTConfig { // be fine for now. // TODO: When CUDA 10 comes out, check if the bug is fixed or if we need another // number for CUDA 10. +// Update: bug related to cuFFT plan cache max size has been fixed in CUDA 10. constexpr int64_t CUFFT_MAX_PLAN_NUM = 1023; static_assert(CUFFT_MAX_PLAN_NUM >= 0 && CUFFT_MAX_PLAN_NUM <= std::numeric_limits::max(), "CUFFT_MAX_PLAN_NUM not in size_t range"); @@ -389,12 +390,17 @@ class CuFFTParamsLRUCache { // Miss // remove if needed + // bug related to cuFFT plan cache max size has been fixed + // in CUDA 10. Hence, when compiling with CUDA 10, just + // don't do the erase. + #if CUDA_VERSION < 10000 if (_usage_list.size() >= _max_size) { auto last = _usage_list.end(); last--; _cache_map.erase(last->first); _usage_list.pop_back(); } + #endif // construct new plan at list front, then insert into _cache_map _usage_list.emplace_front(std::piecewise_construct, @@ -414,7 +420,8 @@ class CuFFTParamsLRUCache { void resize(int64_t new_size) { _set_max_size(new_size); - + // no-op when compiling with CUDA 10. + #if CUDA_VERSION < 10000 auto cur_size = _usage_list.size(); if (cur_size > _max_size) { auto delete_it = _usage_list.end(); @@ -424,17 +431,26 @@ class CuFFTParamsLRUCache { } _usage_list.erase(delete_it, _usage_list.end()); } + #endif } size_t size() const { return _cache_map.size(); } - size_t max_size() const noexcept { return _max_size; } + size_t max_size() const noexcept { + #if CUDA_VERSION < 10000 + return _max_size; + #else + return size(); + #endif + } private: // Only sets size and does value check. Does not resize the data structures. void _set_max_size(int64_t new_size) { + #if CUDA_VERSION < 10000 AT_CHECK(new_size <= CUFFT_MAX_PLAN_NUM, "cuFFT plan cache size can not be larger than ", CUFFT_MAX_PLAN_NUM, ", but got ", new_size); + #endif AT_CHECK(new_size >= 0, "cuFFT plan cache size must be non-negative, but got ", new_size); _max_size = static_cast(new_size); diff --git a/aten/src/ATen/native/cuda/DistanceKernel.cu b/aten/src/ATen/native/cuda/DistanceKernel.cu index 02c143254ced7..f6128389f16f2 100644 --- a/aten/src/ATen/native/cuda/DistanceKernel.cu +++ b/aten/src/ATen/native/cuda/DistanceKernel.cu @@ -192,7 +192,7 @@ void pdist_backward_kernel_impl(Tensor& result, const Tensor& grad, const Tensor const dim3 grid(grid_x, grid_y); const dim3 block(block_x, block_y); - Tensor buffer = result.type().tensor({n - 1, result.size(0), result.size(1)}); + Tensor buffer = at::empty({n - 1, result.size(0), result.size(1)}, result.options()); AT_DISPATCH_FLOATING_TYPES(self.type(), "pdist_cuda_backward", [&] { if (p == 1.0) { pdist_backward_kernel_cuda_impl::one><<>>(buffer.data(), grad.data(), self.data(), dist.data(), grad.stride(0), n, m, dist.numel(), p); diff --git a/aten/src/ATen/native/cuda/Distributions.cu b/aten/src/ATen/native/cuda/Distributions.cu index fc908714f18f2..50ea3a9bf32b2 100644 --- a/aten/src/ATen/native/cuda/Distributions.cu +++ b/aten/src/ATen/native/cuda/Distributions.cu @@ -182,7 +182,7 @@ void bernoulli_scalar_cuda_kernel( namespace at { namespace native { Tensor _s_poisson_cuda(const Tensor& lambda, Generator* gen) { - Tensor ret = lambda.type().tensor(lambda.sizes()); + Tensor ret = at::empty(lambda.sizes(), lambda.options()); AT_DISPATCH_FLOATING_TYPES_AND_HALF(ret.type(), "poisson", [&] { poisson_cuda_kernel(ret, lambda, next_philox_seed(gen, 20)); }); @@ -190,7 +190,7 @@ Tensor _s_poisson_cuda(const Tensor& lambda, Generator* gen) { } Tensor _s_gamma_cuda(const Tensor& alpha, Generator* gen) { - Tensor ret = alpha.type().tensor(alpha.sizes()); + Tensor ret = at::empty(alpha.sizes(), alpha.options()); AT_DISPATCH_FLOATING_TYPES_AND_HALF(ret.type(), "gamma", [&] { gamma_cuda_kernel(ret, alpha, next_philox_seed(gen, 10)); }); @@ -198,7 +198,7 @@ Tensor _s_gamma_cuda(const Tensor& alpha, Generator* gen) { } Tensor _standard_gamma_grad_cuda(const Tensor& self, const Tensor& output) { - Tensor ret = self.type().tensor(self.sizes()); + Tensor ret = at::empty(self.sizes(), self.options()); AT_DISPATCH_FLOATING_TYPES_AND_HALF(self.type(), "_standard_gamma_grad", [&] { gamma_grad_cuda_kernel(ret, self, output); }); diff --git a/aten/src/ATen/native/cuda/Dropout.cu b/aten/src/ATen/native/cuda/Dropout.cu index 2d133a70dc23b..6976565de059a 100644 --- a/aten/src/ATen/native/cuda/Dropout.cu +++ b/aten/src/ATen/native/cuda/Dropout.cu @@ -97,7 +97,7 @@ void masked_scale_kernel(at::Tensor& ret, const at::Tensor src, const at::Tensor std::tuple fused_dropout_cuda(const Tensor& self, double p, Generator * gen){ Tensor ret = at::empty_like(self); - Tensor mask = self.type().toScalarType(kByte).tensor(self.sizes()); + Tensor mask = at::empty(self.sizes(), self.options().dtype(kByte)); const int64_t nelem = self.numel(); const int64_t block_size = 256; unsigned int blocks_per_sm = at::cuda::getCurrentDeviceProperties()->maxThreadsPerMultiProcessor/block_size; diff --git a/aten/src/ATen/native/cuda/Embedding.cu b/aten/src/ATen/native/cuda/Embedding.cu index 27b079fe219e2..ddc01923859b1 100644 --- a/aten/src/ATen/native/cuda/Embedding.cu +++ b/aten/src/ATen/native/cuda/Embedding.cu @@ -349,7 +349,7 @@ Tensor & embedding_renorm_cuda_(Tensor & self, const Tensor & indices, // FIXME: thrust::unique only removes consecutive elements that are equal. // We have race conditions when indices contain duplicates which are not // adjacent - auto unique_indices = indices.type().tensor(indices.numel()); + auto unique_indices = at::empty(indices.numel(), indices.options()); auto unique_data = device_ptr(unique_indices.data()); auto end = thrust::unique_copy(policy, indices_data, indices_data + num_indices, unique_data); auto num_unique_indices = static_cast(end - unique_data); diff --git a/aten/src/ATen/native/cuda/Gesv.cu b/aten/src/ATen/native/cuda/Gesv.cu index 80c7aaeb74f6a..a5802192eb77d 100644 --- a/aten/src/ATen/native/cuda/Gesv.cu +++ b/aten/src/ATen/native/cuda/Gesv.cu @@ -53,12 +53,16 @@ static magma_queue_t createMagmaQueue(const Tensor& tensor) { magma_queue_create_from_cuda( tensor.get_device(), at::cuda::getCurrentCUDAStream(), - THCState_getCurrentBlasHandle(context.getTHCState()), - THCState_getCurrentSparseHandle(context.getTHCState()), + at::cuda::getCurrentCUDABlasHandle(), + at::cuda::getCurrentCUDASparseHandle(), &magma_queue); return magma_queue; } +static void destroyMagmaQueue(magma_queue_t& existing_queue) { + magma_queue_destroy(existing_queue); +} + static inline magma_int_t magma_int_cast(int64_t value, const char* varname) { auto result = static_cast(value); if (static_cast(result) != value) { @@ -117,9 +121,11 @@ AT_ERROR("gesv: MAGMA library not found in " ipiv_array[i] = &ipiv_data[i * n]; } + magma_queue_t gesv_queue = createMagmaQueue(b); magmaGesvBatched( n, nrhs, A_array, n, ipiv_array, b_array, n, - info_array, batch_size, createMagmaQueue(b)); + info_array, batch_size, gesv_queue); + destroyMagmaQueue(gesv_queue); for (int64_t i = 0; i < batch_size; i++) { infos[i] = info_array[i]; diff --git a/aten/src/ATen/native/cuda/Loops.cuh b/aten/src/ATen/native/cuda/Loops.cuh index b3435bd0f6bfb..6b5a0e59d08ab 100644 --- a/aten/src/ATen/native/cuda/Loops.cuh +++ b/aten/src/ATen/native/cuda/Loops.cuh @@ -63,6 +63,13 @@ template void gpu_nullary_kernel(TensorIterator& iter, const func_t& f) { ASSERT_HOST_DEVICE_LAMBDA(func_t); + if (!iter.can_use_32bit_indexing()) { + for (auto& sub_iter : iter.with_32bit_indexing()) { + gpu_nullary_kernel(sub_iter, f); + } + return; + } + char* out_data = (char*)iter.data_ptr(0); using traits = function_traits; @@ -93,6 +100,13 @@ template void gpu_unary_kernel(TensorIterator& iter, const func_t& f) { ASSERT_HOST_DEVICE_LAMBDA(func_t); + if (!iter.can_use_32bit_indexing()) { + for (auto& sub_iter : iter.with_32bit_indexing()) { + gpu_unary_kernel(sub_iter, f); + } + return; + } + char* out_data = (char*)iter.data_ptr(0); const char* in1_data = (char*)iter.data_ptr(1); diff --git a/aten/src/ATen/native/cuda/RoiPooling.cu b/aten/src/ATen/native/cuda/RoiPooling.cu index 0fd3f1d6efd15..6c0a90d4c2f48 100644 --- a/aten/src/ATen/native/cuda/RoiPooling.cu +++ b/aten/src/ATen/native/cuda/RoiPooling.cu @@ -122,13 +122,13 @@ std::tuple RoiPooling2d_forward_cuda( auto inputWidth = input.size(3); // Output Tensor is (num_rois, C, pooledHeight, pooledWidth) - auto output = input.type().tensor({proposals, inputChannels, pooledHeight, pooledWidth}); + auto output = at::empty({proposals, inputChannels, pooledHeight, pooledWidth}, input.options()); // TODO: need some mechanism for determining train vs. test // During training, we need to store the argmaxes for the pooling operation, so // the argmaxes Tensor should be the same size as the output Tensor - auto argmaxes = input.type().toScalarType(kInt).tensor({proposals, inputChannels, pooledHeight, pooledWidth}); + auto argmaxes = at::empty({proposals, inputChannels, pooledHeight, pooledWidth}, input.options().dtype(kInt)); AT_CHECK(input.is_contiguous(), "input must be contiguous"); AT_CHECK(rois.is_contiguous(), "rois must be contiguous"); @@ -198,7 +198,7 @@ Tensor RoiPooling2d_backward_cuda( auto inputHeight = input.size(2); auto inputWidth = input.size(3); - auto gradInput = input.type().tensor(input.sizes()); + auto gradInput = at::empty(input.sizes(), input.options()); dim3 block(512); dim3 grid((gradInput.numel() + 512 - 1) / 512); diff --git a/aten/src/ATen/native/cuda/SpectralOps.cu b/aten/src/ATen/native/cuda/SpectralOps.cu index 38b1dddb49627..51ab68a4f78f1 100644 --- a/aten/src/ATen/native/cuda/SpectralOps.cu +++ b/aten/src/ATen/native/cuda/SpectralOps.cu @@ -184,7 +184,7 @@ static inline Tensor _run_cufft( auto& ctx = at::globalContext(); // set output - auto output = input.type().tensor(output_sizes); + auto output = at::empty(output_sizes, input.options()); // set to current stream CUFFT_CHECK(cufftSetStream(plan, at::cuda::getCurrentCUDAStream())); diff --git a/aten/src/ATen/native/cuda/SummaryOps.cu b/aten/src/ATen/native/cuda/SummaryOps.cu index 2ab983c17721a..0ef8ebabf065a 100644 --- a/aten/src/ATen/native/cuda/SummaryOps.cu +++ b/aten/src/ATen/native/cuda/SummaryOps.cu @@ -258,7 +258,7 @@ Tensor _bincount_cuda_template( AT_ERROR("input and weights should have the same length"); } - auto nbins = self.max().toCLong() + 1L; + auto nbins = self.max().item() + 1L; nbins = std::max(nbins, minlength); // alloc output counter on GPU Tensor output; diff --git a/aten/src/ATen/native/cuda/TensorCompare.cu b/aten/src/ATen/native/cuda/TensorCompare.cu index 8e0cf4e1b76c6..8f99241ca35a9 100644 --- a/aten/src/ATen/native/cuda/TensorCompare.cu +++ b/aten/src/ATen/native/cuda/TensorCompare.cu @@ -32,7 +32,7 @@ Tensor _s_where_cuda( const Tensor& condition, const Tensor& self, const Tensor& other) { - Tensor ret = self.type().tensor(self.sizes()); + Tensor ret = at::empty(self.sizes(), self.options()); AT_DISPATCH_ALL_TYPES_AND_HALF(ret.type(), "where", [&] { where_cuda(ret, condition, self, other); }); diff --git a/aten/src/ATen/native/cuda/TensorFactories.cu b/aten/src/ATen/native/cuda/TensorFactories.cu index 309b54a299caa..cbddd0ae87a13 100644 --- a/aten/src/ATen/native/cuda/TensorFactories.cu +++ b/aten/src/ATen/native/cuda/TensorFactories.cu @@ -49,14 +49,14 @@ Tensor& randperm_out_cuda(Tensor& result, int64_t n, Generator* generator) { result.copy_(randperm_out_cuda(result_float, n, generator)); } else { if (n < 30000) { // For small inputs, we offload it to CPU instead. - auto result_cpu = result.type().cpu().tensor({n}); + auto result_cpu = at::empty({n}, result.options().device(kCPU)); randperm_out(result_cpu, n, generator); result.copy_(result_cpu); } else { // Generate random values for the keys array AT_DISPATCH_ALL_TYPES( result.type(), "randperm_out_cuda", [&] { - auto keys = result.type().tensor(result.sizes()).random_(generator); + auto keys = at::empty(result.sizes(), result.options()).random_(generator); auto result_data = thrust::device_ptr(result.data()); auto keys_data = thrust::device_ptr(keys.data()); diff --git a/aten/src/ATen/native/cuda/WeightNorm.cu b/aten/src/ATen/native/cuda/WeightNorm.cu index 67d8f39e2de71..5700ca559f0fe 100644 --- a/aten/src/ATen/native/cuda/WeightNorm.cu +++ b/aten/src/ATen/native/cuda/WeightNorm.cu @@ -329,7 +329,7 @@ std::tuple weight_norm_cuda at::ScalarType::Float : g.type().scalarType(); // Will this create norms on the same device as g, regardless of what the thread's default // current device is? I believe so, because Type::* functions are DeviceGuard()ed. - auto norms = g.type().toScalarType(AccType).tensor(g.sizes(), g.strides()); + auto norms = at::empty_strided(g.sizes(), g.strides(), g.options().dtype(AccType)); const int ndims = v.dim(); diff --git a/aten/src/ATen/native/cudnn/AffineGridGenerator.cpp b/aten/src/ATen/native/cudnn/AffineGridGenerator.cpp index 463d4ffea3cf0..a12df78c767e2 100644 --- a/aten/src/ATen/native/cudnn/AffineGridGenerator.cpp +++ b/aten/src/ATen/native/cudnn/AffineGridGenerator.cpp @@ -59,7 +59,7 @@ Tensor cudnn_affine_grid_generator_forward( checkContiguous(c, theta); checkSize(c, theta, {N, 2, 3}); - auto grid_t = theta->type().tensor(); + auto grid_t = at::empty({0}, theta->options()); grid_t.resize_({N, H, W, 2}); auto dataType = getCudnnDataType(*theta); @@ -82,7 +82,7 @@ Tensor cudnn_affine_grid_generator_backward( checkContiguous(c, grad_grid); checkSize(c, grad_grid, {N, H, W, 2}); - auto grad_theta_t = grad_grid->type().tensor(); + auto grad_theta_t = at::empty({0}, grad_grid->options()); grad_theta_t.resize_({N, 2, 3}); auto dataType = getCudnnDataType(grad_theta_t); diff --git a/aten/src/ATen/native/cudnn/BatchNorm.cpp b/aten/src/ATen/native/cudnn/BatchNorm.cpp index d54fe256b2915..427f7e00d9d90 100644 --- a/aten/src/ATen/native/cudnn/BatchNorm.cpp +++ b/aten/src/ATen/native/cudnn/BatchNorm.cpp @@ -94,7 +94,7 @@ std::tuple cudnn_batch_norm( #endif } - auto output_t = input->type().tensor(input->sizes()); + auto output_t = at::empty(input->sizes(), input->options()); TensorArg output{ output_t, "output", 0 }; auto handle = getCudnnHandle(); @@ -108,8 +108,8 @@ std::tuple cudnn_batch_norm( if (training) { int64_t num_features = input_t.size(1); - save_mean = weight_t.type().tensor({ num_features }); - save_var = weight_t.type().tensor({ num_features }); + save_mean = at::empty({ num_features }, weight_t.options()); + save_var = at::empty({ num_features }, weight_t.options()); AT_CUDNN_CHECK(cudnnBatchNormalizationForwardTraining( handle, mode, &one, &zero, idesc.desc(), input->data_ptr(), @@ -190,9 +190,9 @@ std::tuple cudnn_batch_norm_backward( #endif } - auto grad_input_t = input->type().tensor(input->sizes()); - auto grad_weight_t = weight->type().tensor(weight->sizes()); - auto grad_bias_t = weight->type().tensor(weight->sizes()); + auto grad_input_t = at::empty(input->sizes(), input->options()); + auto grad_weight_t = at::empty(weight->sizes(), weight->options()); + auto grad_bias_t = at::empty(weight->sizes(), weight->options()); auto handle = getCudnnHandle(); auto dataType = getCudnnDataType(*input); diff --git a/aten/src/ATen/native/cudnn/Conv.cpp b/aten/src/ATen/native/cudnn/Conv.cpp index afbd7653aefa6..9638740c24a6a 100644 --- a/aten/src/ATen/native/cudnn/Conv.cpp +++ b/aten/src/ATen/native/cudnn/Conv.cpp @@ -836,9 +836,10 @@ Tensor cudnn_convolution_forward( checkAllSameType(c, {input, weight}); checkAllSameGPU(c, {input, weight}); - auto output_t = input->type().tensor( + auto output_t = at::empty( conv_output_size(input->sizes(), weight->sizes(), - padding, stride, dilation, groups)); + padding, stride, dilation, groups), + input->options()); // Avoid ambiguity of "output" when this is being used as backwards TensorArg output{ output_t, "result", 0 }; @@ -976,7 +977,7 @@ Tensor cudnn_convolution_backward_input( checkAllSameType(c, {grad_output, weight}); checkAllSameGPU(c, {grad_output, weight}); - auto grad_input_t = grad_output->type().tensor(input_size); + auto grad_input_t = at::empty(input_size, grad_output->options()); // Avoid "grad_input" when this is being used as transposed convolution TensorArg grad_input{ grad_input_t, "result", 0 }; @@ -1111,7 +1112,7 @@ Tensor cudnn_convolution_backward_weight( checkAllSameType(c, {grad_output, input}); checkAllSameGPU(c, {grad_output, input}); - auto grad_weight_t = grad_output->type().tensor(weight_size); + auto grad_weight_t = at::empty(weight_size, grad_output->options()); // For uniformity with everything else, although it seems grad_weight // would be unambiguous too. @@ -1179,8 +1180,8 @@ Tensor cudnn_convolution_backward_bias( TensorArg grad_output{ grad_output_t, "grad_output", 1 }; setCuDNNStreamToCurrent(); - auto grad_bias_t = grad_output->type().tensor( - { grad_output->size(output_channels_dim) }); + auto grad_bias_t = at::empty( + { grad_output->size(output_channels_dim) }, grad_output->options()); TensorArg grad_bias{ grad_bias_t, "result", 0 }; diff --git a/aten/src/ATen/native/cudnn/GridSampler.cpp b/aten/src/ATen/native/cudnn/GridSampler.cpp index e859344bcc369..f9b7781036520 100644 --- a/aten/src/ATen/native/cudnn/GridSampler.cpp +++ b/aten/src/ATen/native/cudnn/GridSampler.cpp @@ -75,7 +75,7 @@ Tensor cudnn_grid_sampler_forward( checkGridSize(c, grid, input); checkDim(c, input, 4); - auto output_t = input->type().tensor(); + auto output_t = at::empty({0}, input->options()); output_t.resize_({input->size(0), input->size(1), grid->size(1), grid->size(2)}); TensorDescriptor idesc{ *input }; // input descriptor @@ -114,9 +114,9 @@ std::tuple cudnn_grid_sampler_backward( checkDim(c, input, 4); checkDim(c, grad_output, 4); - auto grad_input_t = input->type().tensor(); + auto grad_input_t = at::empty({0}, input->options()); grad_input_t.resize_(input->sizes()); - auto grad_grid_t = grid->type().tensor(); + auto grad_grid_t = at::empty({0}, grid->options()); grad_grid_t.resize_(grid->sizes()); TensorDescriptor idesc{ *input }; // input descriptor diff --git a/aten/src/ATen/native/cudnn/LossCTC.cpp b/aten/src/ATen/native/cudnn/LossCTC.cpp index 98c0cb7918f02..28fd81f9a9a99 100644 --- a/aten/src/ATen/native/cudnn/LossCTC.cpp +++ b/aten/src/ATen/native/cudnn/LossCTC.cpp @@ -75,7 +75,7 @@ std::tuple _cudnn_ctc_loss(const Tensor& log_probs_t, const Tens algo, ctc_loss_desc.desc(), &workspace_size)); - Tensor workspace = log_probs->type().toScalarType(kByte).tensor(workspace_size); // new way of doing this with empty? + Tensor workspace = at::empty(workspace_size, log_probs->options().dtype(kByte)); Tensor costs = at::empty({log_probs->size(1)}, log_probs->options()); AT_CUDNN_CHECK(cudnnCTCLoss(handle, probs_desc.desc(), probs.data_ptr(), diff --git a/aten/src/ATen/native/cudnn/RNN.cpp b/aten/src/ATen/native/cudnn/RNN.cpp index 09c9365793ec7..35af9919d46d2 100644 --- a/aten/src/ATen/native/cudnn/RNN.cpp +++ b/aten/src/ATen/native/cudnn/RNN.cpp @@ -464,7 +464,7 @@ namespace { mat_numel * num_linear_layers / 2, 1}; // Generate a new parameter tensor which is a view into the // weight_buf. - Tensor param = weight_buf.type().tensor().set_(weight_buf.storage(), offset, size); + Tensor param = at::empty({0}, weight_buf.options()).set_(weight_buf.storage(), offset, size); params.emplace_back(std::move(param)); layer_params_count++; } else { @@ -616,7 +616,7 @@ Tensor _cudnn_rnn_flatten_weight( x_desc.set(getCudnnDataType(any_param), x_geom.sizes(), x_geom.strides(), 5); auto num_weights = get_num_weights(handle, rnn_desc, x_desc, rnn.datatype); - auto weight_buf = any_param.type().tensor(num_weights).zero_(); + auto weight_buf = at::zeros(num_weights, any_param.options()); FilterDescriptor w_desc; w_desc.set(weight_buf, 3); @@ -691,13 +691,13 @@ std::tuple _cudnn_rnn( "rnn: cx is not contiguous"); auto x = input.contiguous(); - auto output = input.type().tensor(output_size); - auto hy = hx.type().tensor(hidden_size); + auto output = at::empty(output_size, input.options()); + auto hy = at::empty(hidden_size, hx.options()); Tensor cy; if (cx.defined()) { - cy = cx.type().tensor(hidden_size); + cy = at::empty(hidden_size, cx.options()); } else { - cy = hx.type().tensor(); // NB: Not allowed to return undefined tensors + cy = at::empty({0}, hx.options()); // NB: Not allowed to return undefined tensors } auto y = output; @@ -709,7 +709,7 @@ std::tuple _cudnn_rnn( FilterDescriptor w_desc; if (!weight_buf.defined()) { auto num_weights = get_num_weights(handle, descs.rnn_desc, descs.x_descs[0], fn.rnn.datatype); - weight_buf = x.type().tensor(num_weights); + weight_buf = at::empty(num_weights, x.options()); w_desc.set(weight_buf, 3); weight_buf.zero_(); std::vector params; @@ -734,7 +734,7 @@ std::tuple _cudnn_rnn( x_descs_arr.data(), &workspace_size )); - Tensor workspace = input.type().toScalarType(kByte).tensor(workspace_size); + Tensor workspace = at::empty(workspace_size, input.options().dtype(kByte)); Tensor reserve; // NB: Previously, the test was for fn.requires_grad, but we don't have @@ -748,7 +748,7 @@ std::tuple _cudnn_rnn( x_descs_arr.data(), &reserve_size )); - reserve = input.type().toScalarType(kByte).tensor(reserve_size); + reserve = at::empty(reserve_size, input.options().dtype(kByte)); AT_CUDNN_CHECK(cudnnRNNForwardTraining( handle, descs.rnn_desc.desc(), @@ -764,7 +764,7 @@ std::tuple _cudnn_rnn( reserve.data_ptr(), reserve.size(0) )); } else { // inference - reserve = input.type().toScalarType(kByte).tensor(); + reserve = at::empty({0}, input.options().dtype(kByte)); AT_CUDNN_CHECK(cudnnRNNForwardInference( handle, descs.rnn_desc.desc(), @@ -836,12 +836,12 @@ std::tuple _cudnn_rnn_backward_input( auto dy = grad_output.contiguous(); auto y = output; auto w = weight_buf; - auto dx = input.type().tensor(input.sizes()); // TODO: more compact way of saying this + auto dx = at::empty(input.sizes(), input.options()); // TODO: more compact way of saying this auto dhy = grad_hy.contiguous().view(hidden_size); auto dcy = grad_cy.defined() ? grad_cy.contiguous().view(hidden_size) : Tensor(); - auto dhx = hx.type().tensor(hidden_size); + auto dhx = at::empty(hidden_size, hx.options()); AT_ASSERTM(cx.defined() || !output_mask[2], "illegally required grad of cx for non-LSTM RNN"); - auto dcx = cx.defined() ? cx.type().tensor(hidden_size) : Tensor(); + auto dcx = cx.defined() ? at::empty(hidden_size, cx.options()) : Tensor(); AT_CHECK(fn_train, "cudnn RNN backward can only be called in training mode"); @@ -881,7 +881,7 @@ std::tuple _cudnn_rnn_backward_input( &workspace_size )); // TODO: put this in the correct device??? - Tensor workspace = input.type().toScalarType(kByte).tensor(workspace_size); + Tensor workspace = at::empty(workspace_size, input.options().dtype(kByte)); AT_CUDNN_CHECK(cudnnRNNBackwardData( handle, @@ -965,7 +965,7 @@ std::vector _cudnn_rnn_backward_weight( auto x = input.contiguous(); const auto& y = output; - auto dw = weight_buf.type().tensor(weight_buf.sizes()).zero_(); + auto dw = at::zeros(weight_buf.sizes(), weight_buf.options()); cudnnRNNAlgo_t algo = get_algo(fn.rnn, fn.tensors); fn.rnn.set_algo(algo); @@ -984,7 +984,7 @@ std::vector _cudnn_rnn_backward_weight( x_descs_arr.data(), &workspace_size )); - Tensor workspace = input.type().toScalarType(kByte).tensor(workspace_size); + Tensor workspace = at::empty(workspace_size, input.options().dtype(kByte)); AT_CUDNN_CHECK(cudnnRNNBackwardWeights( handle, @@ -1001,7 +1001,7 @@ std::vector _cudnn_rnn_backward_weight( std::vector grad_weight_arr; grad_weight_arr.reserve( weight.numel() ); for (const auto& w : weight_arr) { - grad_weight_arr.emplace_back(w.type().tensor(w.sizes()).zero_()); + grad_weight_arr.emplace_back(at::zeros(w.sizes(), w.options())); } std::vector grad_params_arr; @@ -1125,7 +1125,7 @@ DropoutState& get_dropout_state(const Type& tp, double dropout_p, bool train) { : ten_dropout_state_cache.at(device); if (train && dropout_p > 0 && !state.buffer.defined()) { std::unique_lock lock {state.mutex}; - int64_t seed = at::empty({}, at::kLong).random_().toCLong(); + int64_t seed = at::empty({}, at::kLong).random_().item(); state.buffer = at::_cudnn_init_dropout_state( tp.toScalarType(at::kByte), dropout_p, train, seed); // NB: CUDA binds the event to a device at creation time, so we can initialize it @@ -1155,7 +1155,7 @@ Tensor try_get_weight_buf( // Try to get parameter storage auto & any_param = parameters.at(0); auto param_storage = any_param.storage(); - auto weight_buf = any_param.type().tensor().set_(param_storage); + auto weight_buf = at::empty({0}, any_param.options()).set_(param_storage); if (weight_buf.size(0) < num_params) { return {}; } else if (weight_buf.size(0) > num_params) { diff --git a/aten/src/ATen/native/miopen/BatchNorm_miopen.cpp b/aten/src/ATen/native/miopen/BatchNorm_miopen.cpp index c9d25780bd65d..f7d163bee732e 100644 --- a/aten/src/ATen/native/miopen/BatchNorm_miopen.cpp +++ b/aten/src/ATen/native/miopen/BatchNorm_miopen.cpp @@ -89,7 +89,7 @@ std::tuple miopen_batch_norm( mode = miopenBNSpatial; } - auto output_t = input->type().tensor(input->sizes()); + auto output_t = at::empty(input->sizes(), input->options()); TensorArg output{ output_t, "output", 0 }; auto handle = getMiopenHandle(); @@ -103,8 +103,8 @@ std::tuple miopen_batch_norm( if (training) { int64_t num_features = input_t.size(1); - save_mean = weight_t.type().tensor({ num_features }); - save_var = weight_t.type().tensor({ num_features }); + save_mean = at::empty({ num_features }, weight_t.options()); + save_var = at::empty({ num_features }, weight_t.options()); MIOPEN_CHECK(miopenBatchNormalizationForwardTraining( handle, mode, &one, &zero, idesc.desc(), input->data_ptr(), @@ -177,9 +177,9 @@ std::tuple miopen_batch_norm_backward( mode = miopenBNSpatial; } - auto grad_input_t = input->type().tensor(input->sizes()); - auto grad_weight_t = weight->type().tensor(weight->sizes()); - auto grad_bias_t = weight->type().tensor(weight->sizes()); + auto grad_input_t = at::empty(input->sizes(), input->options()); + auto grad_weight_t = at::empty(weight->sizes(), weight->options()); + auto grad_bias_t = at::empty(weight->sizes(), weight->options()); auto handle = getMiopenHandle(); auto dataType = getMiopenDataType(*input); diff --git a/aten/src/ATen/native/miopen/Conv_miopen.cpp b/aten/src/ATen/native/miopen/Conv_miopen.cpp index 9aeaad7355861..6515574a299c6 100644 --- a/aten/src/ATen/native/miopen/Conv_miopen.cpp +++ b/aten/src/ATen/native/miopen/Conv_miopen.cpp @@ -616,9 +616,10 @@ Tensor miopen_convolution_forward( checkAllSameType(c, {input, weight}); checkAllSameGPU(c, {input, weight}); - auto output_t = input->type().tensor( + auto output_t = at::empty( conv_output_size(input->sizes(), weight->sizes(), - padding, stride, dilation, groups)); + padding, stride, dilation, groups), + input->options()); // Avoid ambiguity of "output" when this is being used as backwards TensorArg output{ output_t, "result", 0 }; @@ -734,7 +735,7 @@ Tensor miopen_convolution_backward_input( checkAllSameType(c, {grad_output, weight}); checkAllSameGPU(c, {grad_output, weight}); - auto grad_input_t = grad_output->type().tensor(input_size); + auto grad_input_t = at::empty(input_size, grad_output->options()); // Avoid "grad_input" when this is being used as transposed convolution TensorArg grad_input{ grad_input_t, "result", 0 }; @@ -859,7 +860,7 @@ Tensor miopen_convolution_backward_weight( checkAllSameType(c, {grad_output, input}); checkAllSameGPU(c, {grad_output, input}); - auto grad_weight_t = grad_output->type().tensor(weight_size); + auto grad_weight_t = at::empty(weight_size, grad_output->options()); // For uniformity with everything else, although it seems grad_weight // would be unambiguous too. @@ -917,8 +918,7 @@ Tensor miopen_convolution_backward_bias( TensorArg grad_output{ grad_output_t, "grad_output", 1 }; setMIOpenStreamToCurrent(); - auto grad_bias_t = grad_output->type().tensor( - { grad_output->size(output_channels_dim) }); + auto grad_bias_t = at::empty( { grad_output->size(output_channels_dim) }, grad_output->options()); TensorArg grad_bias{ grad_bias_t, "result", 0 }; diff --git a/aten/src/ATen/native/mkl/SpectralOps.cpp b/aten/src/ATen/native/mkl/SpectralOps.cpp index 2c81d69d3b843..1d92de58bb7ec 100644 --- a/aten/src/ATen/native/mkl/SpectralOps.cpp +++ b/aten/src/ATen/native/mkl/SpectralOps.cpp @@ -207,7 +207,7 @@ Tensor _fft_mkl(const Tensor& self, int64_t signal_ndim, onumel *= osize; } } - Tensor output = input.type().tensor(output_sizes); + Tensor output = at::empty(output_sizes, input.options()); // precision DFTI_CONFIG_VALUE prec; diff --git a/aten/src/ATen/native/mkldnn/Conv.cpp b/aten/src/ATen/native/mkldnn/Conv.cpp index ddbd6977645e7..adfe15decbc9b 100644 --- a/aten/src/ATen/native/mkldnn/Conv.cpp +++ b/aten/src/ATen/native/mkldnn/Conv.cpp @@ -70,8 +70,8 @@ at::Tensor mkldnn_convolution( const at::Tensor& input, const at::Tensor& weight, const at::Tensor& bias, IntList padding, IntList stride, IntList dilation, int64_t groups) { - auto output = input.type().tensor(conv_output_size( - input.sizes(), weight.sizes(), padding, stride, dilation, groups)); + auto output = at::empty(conv_output_size( + input.sizes(), weight.sizes(), padding, stride, dilation, groups), input.options()); auto cpu_engine = CpuEngine::Instance().get_engine(); @@ -182,7 +182,7 @@ Tensor mkldnn_convolution_backward_input( IntList input_size, const at::Tensor& grad_output, const at::Tensor& weight, IntList padding, IntList stride, IntList dilation, int64_t groups, bool bias_defined) { - auto grad_input = grad_output.type().tensor(input_size); + auto grad_input = at::empty(input_size, grad_output.options()); auto cpu_engine = CpuEngine::Instance().get_engine(); @@ -294,11 +294,11 @@ std::tuple mkldnn_convolution_backward_weights( IntList weight_size, const at::Tensor& grad_output, const at::Tensor& input, IntList padding, IntList stride, IntList dilation, int64_t groups, bool bias_defined) { - auto grad_weight = grad_output.type().tensor(weight_size); + auto grad_weight = at::empty(weight_size, grad_output.options()); Tensor grad_bias; if (bias_defined) { - grad_bias = grad_output.type().tensor({grad_output.size(1)}); + grad_bias = at::empty({grad_output.size(1)}, grad_output.options()); } auto cpu_engine = CpuEngine::Instance().get_engine(); diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml index f54c9110c21f2..2cc0995dabada 100644 --- a/aten/src/ATen/native/native_functions.yaml +++ b/aten/src/ATen/native/native_functions.yaml @@ -647,6 +647,8 @@ - func: empty_like(Tensor self, *, TensorOptions options) -> Tensor +- func: empty_strided(IntList size, IntList stride, *, TensorOptions options={}) -> Tensor + - func: erf(Tensor self) -> Tensor variants: function, method @@ -1887,11 +1889,13 @@ - func: native_tensor(Type self_ty) -> Tensor + variants: [] dispatch: SparseCPU: new_sparse SparseCUDA: new_sparse - func: native_tensor(Type self_ty, IntList size) -> Tensor + variants: [] dispatch: SparseCPU: new_with_size_sparse SparseCUDA: new_with_size_sparse @@ -1932,15 +1936,17 @@ SparseCPU: new_with_tensor_and_size_sparse SparseCUDA: new_with_tensor_and_size_sparse -- func: sparse_coo_tensor(Type dtype, IntList size) -> Tensor - variants: [] - - func: sparse_coo_tensor(IndexTensor indices, Tensor values) -> Tensor - variants: [] - func: sparse_coo_tensor(IndexTensor indices, Tensor values, IntList size) -> Tensor - variants: [] +# FIXME: would be nicer if TensorOptions was optional based; not adding default arguments for options given +# the default would never make sense. +- func: sparse_coo_tensor(IntList size, *, TensorOptions options) -> Tensor + +- func: sparse_coo_tensor(IndexTensor indices, Tensor values, *, TensorOptions options) -> Tensor + +- func: sparse_coo_tensor(IndexTensor indices, Tensor values, IntList size, *, TensorOptions options) -> Tensor - func: _native_sparse_coo_tensor_unsafe(IndexTensor indices, Tensor values, IntList size) -> Tensor variants: [] diff --git a/aten/src/ATen/native/sparse/SparseTensor.cpp b/aten/src/ATen/native/sparse/SparseTensor.cpp index 49efed2a1e066..83aee52cf8102 100644 --- a/aten/src/ATen/native/sparse/SparseTensor.cpp +++ b/aten/src/ATen/native/sparse/SparseTensor.cpp @@ -286,8 +286,8 @@ SparseTensor coalesce_sparse_cpu(const SparseTensor& self) { SparseTensor dst = new_sparse(self.type()); _get_sparse_impl(dst)->resize_(sparseDims, denseDims, self.sizes()); // TODO: is there a more idiomatic way to do this? - LongTensor newIndices = indices.type().tensor(indices.sizes()); - Tensor newValues = values.type().tensor(values.sizes()); + LongTensor newIndices = at::empty(indices.sizes(), indices.options()); + Tensor newValues = at::empty(values.sizes(), values.options()); _alias_into_sparse(dst, newIndices, newValues); LongTensor indicesBuffer; @@ -348,7 +348,7 @@ SparseTensor& sparse_mask_out_cpu(SparseTensor& r, const Tensor& t, const Sparse int64_t sparseDims = mask._sparseDims(); LongTensor mask_indices = mask._indices(); Tensor mask_values = mask._values(); - Tensor r_values = r._values().type().tensor(mask_values.sizes()); + Tensor r_values = at::empty(mask_values.sizes(), r._values().options()); _alias_into_sparse(r, mask_indices.clone(), r_values); _get_sparse_impl(r)->set_coalesced(mask.is_coalesced()); int64_t r_nnz = mask._nnz(); @@ -392,7 +392,7 @@ SparseTensor& sparse_mask_out_cpu(SparseTensor& r, const Tensor& t, const Sparse } SparseTensor sparse_mask_cpu(const Tensor& t, SparseTensorRef mask) { - SparseTensor r = t.type().toSparse().tensor(); + SparseTensor r = at::empty({0}, t.options().layout(kSparse)); sparse_mask_out_cpu(r, t, mask.tref); return r; } diff --git a/aten/src/ATen/native/sparse/SparseTensorMath.cpp b/aten/src/ATen/native/sparse/SparseTensorMath.cpp index ec074b5a6c8a8..8a8668fc48b8a 100644 --- a/aten/src/ATen/native/sparse/SparseTensorMath.cpp +++ b/aten/src/ATen/native/sparse/SparseTensorMath.cpp @@ -135,7 +135,7 @@ SparseTensor& pow_out_sparse_scalar(SparseTensor& r, const SparseTensor& t_, Sca } SparseTensor pow_sparse_scalar(const SparseTensor& t, Scalar value) { - SparseTensor r = t.type().tensor(); + SparseTensor r = at::empty({0}, t.options()); pow_out_sparse_scalar(r, t, value); return r; } @@ -208,7 +208,7 @@ SparseTensor& add_out_sparse_cpu(SparseTensor& r, const SparseTensor& t, const S Tensor t_values = t._values(); LongTensor src_indices = src._indices(); Tensor s_values = src._values(); - LongTensor r_indices = t_indices.type().tensor({sparseDims, max_nnz}); + LongTensor r_indices = at::empty({sparseDims, max_nnz}, t_indices.options()); Tensor r_values = _new_values_with_size_of(s_values, max_nnz).zero_(); r.resize_as_(src); _get_sparse_impl(r)->set_indices_and_values_unsafe(r_indices, r_values); @@ -387,7 +387,7 @@ SparseTensor& mul_out_sparse_cpu(SparseTensor& r, const Tensor& t_, const Tensor Tensor t_values = t._values(); LongTensor src_indices = src._indices(); Tensor s_values = src._values(); - LongTensor r_indices = t_indices.type().tensor({sparseDims, max_nnz}); + LongTensor r_indices = at::empty({sparseDims, max_nnz}, t_indices.options()); Tensor r_values = _new_values_with_size_of(t_values, max_nnz).zero_(); r.resize_as_(src); _get_sparse_impl(r)->set_indices_and_values_unsafe(r_indices, r_values); @@ -570,7 +570,7 @@ Tensor s_addmm_sparse_dense_cpu( Scalar beta, Scalar alpha ) { - Tensor r = t.type().tensor(); + Tensor r = at::empty({0}, t.options()); s_addmm_out_sparse_dense_cpu(r, t, sparse, dense, beta, alpha); return r; } @@ -646,7 +646,7 @@ SparseTensor& hspmm_out_sparse_cpu(SparseTensor& r, const SparseTensor& sparse_, } int64_t outNnz = i + 1; indices.resize_({1, outNnz}); - Tensor values = dense.type().tensor({outNnz, n}); + Tensor values = at::empty({outNnz, n}, dense.options()); std::vector new_size = _get_sparse_impl(newSparse)->sizes().vec(); new_size[0] = outNnz; @@ -660,7 +660,7 @@ SparseTensor& hspmm_out_sparse_cpu(SparseTensor& r, const SparseTensor& sparse_, } SparseTensor hspmm_sparse_cpu(const SparseTensor& sparse, const Tensor& dense) { - SparseTensor r = sparse.type().tensor(); + SparseTensor r = at::empty({0}, sparse.options()); hspmm_out_sparse_cpu(r, sparse, dense); return r; } @@ -787,7 +787,7 @@ Tensor& _sspaddmm_out_only_sparse(Tensor& result, const Tensor& self, // sparse, dense -> sparse Tensor smm(const Tensor& self, const Tensor& mat2) { - auto result = self.type().tensor(); + auto result = at::empty({0}, self.options()); at::sspaddmm_out(result, result, self, mat2, 0.0, 1.0); return result; } @@ -795,7 +795,7 @@ Tensor smm(const Tensor& self, const Tensor& mat2) { // sparse, sparse, dense, real, real -> sparse Tensor sspaddmm(const Tensor& self, const Tensor& mat1, const Tensor& mat2, Scalar beta, Scalar alpha) { - auto result = self.type().tensor(); + auto result = at::empty({0}, self.options()); at::sspaddmm_out(result, self, mat1, mat2, beta, alpha); return result; } diff --git a/aten/src/ATen/native/sparse/SparseUtils.h b/aten/src/ATen/native/sparse/SparseUtils.h index 3ce0eee53353e..2626eedebaf5e 100644 --- a/aten/src/ATen/native/sparse/SparseUtils.h +++ b/aten/src/ATen/native/sparse/SparseUtils.h @@ -110,7 +110,7 @@ inline LongTensor _newFlattenedIndices(const SparseTensor& self, bool forceClone inline Tensor _new_values_with_size_of(const Tensor& values, int64_t nnz) { std::vector size = values.sizes().vec(); size[0] = nnz; - return values.type().tensor(size); + return at::empty(size, values.options()); } diff --git a/aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cpp b/aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cpp index 107a30f51c2a9..ab9fb15c62873 100644 --- a/aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cpp +++ b/aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cpp @@ -21,7 +21,7 @@ SparseTensor& sparse_mask_out_cuda(SparseTensor& r, const Tensor& t, const Spars } LongTensor mask_indices = mask._indices(); Tensor mask_values = mask._values(); - Tensor r_values = r._values().type().tensor(mask_values.sizes()); + Tensor r_values = at::empty(mask_values.sizes(), r._values().options()); _alias_into_sparse(r, mask_indices.clone(), r_values); _get_sparse_impl(r)->set_coalesced(mask.is_coalesced()); _get_sparse_impl(r)->set_nnz_and_narrow(mask._nnz()); @@ -51,7 +51,7 @@ SparseTensor& sparse_mask_out_cuda(SparseTensor& r, const Tensor& t, const Spars } SparseTensor sparse_mask_cuda(const Tensor& t, SparseTensorRef mask) { - SparseTensor r = t.type().toSparse().tensor(); + SparseTensor r = at::empty({0}, t.options().layout(kSparse)); sparse_mask_out_cuda(r, t, mask.tref); return r; } diff --git a/aten/src/ATen/native/sparse/cuda/SparseCUDATensorMath.cu b/aten/src/ATen/native/sparse/cuda/SparseCUDATensorMath.cu index 15d9afc04307a..7579b90b70e07 100644 --- a/aten/src/ATen/native/sparse/cuda/SparseCUDATensorMath.cu +++ b/aten/src/ATen/native/sparse/cuda/SparseCUDATensorMath.cu @@ -228,7 +228,7 @@ SparseTensor& hspmm_out_sparse_cuda(SparseTensor& r_, const SparseTensor& sparse } SparseTensor hspmm_sparse_cuda(const SparseTensor& sparse, const Tensor& dense) { - SparseTensor r = sparse.type().tensor(); + SparseTensor r = at::empty({0}, sparse.options()); hspmm_out_sparse_cuda(r, sparse, dense); return r; } diff --git a/aten/src/ATen/preprocess_declarations.py b/aten/src/ATen/preprocess_declarations.py index d27d0da7240fc..68bb4ecc531ca 100644 --- a/aten/src/ATen/preprocess_declarations.py +++ b/aten/src/ATen/preprocess_declarations.py @@ -220,8 +220,6 @@ def signature(option, i=None, value=None): def is_extended_method(option): if 'method' in option['variants']: return False - elif not option['variants']: - return False else: return True diff --git a/aten/src/ATen/templates/NativeFunctions.h b/aten/src/ATen/templates/NativeFunctions.h index c6355127734b1..1ca3e495358cb 100644 --- a/aten/src/ATen/templates/NativeFunctions.h +++ b/aten/src/ATen/templates/NativeFunctions.h @@ -4,7 +4,6 @@ #include #include -#include #include #include @@ -17,7 +16,7 @@ namespace at { struct Generator; class Scalar; -struct Tensor; +class Tensor; struct Type; } // namespace at @@ -49,23 +48,23 @@ inline Tensor from_blob( } // These functions are defined in native/TensorFactories.cpp. -#define TENSOR(T, S, _1) \ - AT_API Tensor tensor(ArrayRef values, const TensorOptions& options); \ - inline Tensor tensor( \ - std::initializer_list values, const TensorOptions& options) { \ - return native::tensor(ArrayRef(values), options); \ - } \ - inline Tensor tensor(T value, const TensorOptions& options) { \ - return native::tensor(ArrayRef(value), options); \ - } \ - inline Tensor tensor(ArrayRef values) { \ - return native::tensor(std::move(values), at::dtype(k##S)); \ - } \ - inline Tensor tensor(std::initializer_list values) { \ - return native::tensor(ArrayRef(values)); \ - } \ - inline Tensor tensor(T value) { \ - return native::tensor(ArrayRef(value)); \ +#define TENSOR(T, S, _1) \ + CAFFE2_API Tensor tensor(ArrayRef values, const TensorOptions& options); \ + inline Tensor tensor( \ + std::initializer_list values, const TensorOptions& options) { \ + return native::tensor(ArrayRef(values), options); \ + } \ + inline Tensor tensor(T value, const TensorOptions& options) { \ + return native::tensor(ArrayRef(value), options); \ + } \ + inline Tensor tensor(ArrayRef values) { \ + return native::tensor(std::move(values), at::dtype(k##S)); \ + } \ + inline Tensor tensor(std::initializer_list values) { \ + return native::tensor(ArrayRef(values)); \ + } \ + inline Tensor tensor(T value) { \ + return native::tensor(ArrayRef(value)); \ } AT_FORALL_SCALAR_TYPES_EXCEPT_HALF(TENSOR) #undef TENSOR diff --git a/aten/src/ATen/templates/Tensor.h b/aten/src/ATen/templates/Tensor.h index 85e7c84961d6e..1d5ac020f231e 100644 --- a/aten/src/ATen/templates/Tensor.h +++ b/aten/src/ATen/templates/Tensor.h @@ -15,7 +15,7 @@ namespace at { struct Generator; struct Type; -struct Tensor; +class Tensor; struct TensorOptions; } // namespace at @@ -37,11 +37,12 @@ namespace at { // // Note that Tensor can also be NULL, i.e. it is not associated with any underlying TensorImpl, and // special care must be taken to handle this. -struct AT_API Tensor { +class CAFFE2_API Tensor { +public: Tensor(){}; Tensor(c10::intrusive_ptr tensor_impl) - : tensor_impl_(std::move(tensor_impl)) { - if (tensor_impl_.get() == nullptr) { + : impl_(std::move(tensor_impl)) { + if (impl_.get() == nullptr) { throw std::runtime_error("TensorBaseImpl with nullptr not supported"); } } @@ -50,25 +51,25 @@ struct AT_API Tensor { Tensor(Tensor&&) = default; int64_t dim() const { - return tensor_impl_->dim(); + return impl_->dim(); } TensorImpl * unsafeGetTensorImpl() const { - return tensor_impl_.get(); + return impl_.get(); } TensorImpl * unsafeReleaseTensorImpl() { - return tensor_impl_.release(); + return impl_.release(); } const c10::intrusive_ptr& getIntrusivePtr() const { - return tensor_impl_; + return impl_; } bool defined() const { - return tensor_impl_; + return impl_; } void reset() { - tensor_impl_.reset(); + impl_.reset(); } // The following overloads are very intruiging. Consider the following @@ -102,11 +103,11 @@ struct AT_API Tensor { // Tensor& operator=(const Tensor&) & = default; // Tensor& operator=(Tensor&&) & = default; Tensor& operator=(const Tensor& x) & { - tensor_impl_ = x.tensor_impl_; + impl_ = x.impl_; return *this; } Tensor& operator=(Tensor&& x) & { - tensor_impl_ = std::move(x.tensor_impl_); + impl_ = std::move(x.impl_); return *this; } @@ -115,37 +116,37 @@ struct AT_API Tensor { Tensor& operator=(Tensor&&) &&; bool is_same(const Tensor& other) const noexcept { - return tensor_impl_ == other.tensor_impl_; + return impl_ == other.impl_; } size_t use_count() const noexcept { - return tensor_impl_.use_count(); + return impl_.use_count(); } size_t weak_use_count() const noexcept { - return tensor_impl_.weak_use_count(); + return impl_.weak_use_count(); } const char * toString() const; IntList sizes() const { - return tensor_impl_->sizes(); + return impl_->sizes(); } IntList strides() const { - return tensor_impl_->strides(); + return impl_->strides(); } int64_t ndimension() const { return dim(); } Type & type() const { - return tensor_impl_->type(); + return impl_->type(); } TensorTypeId type_id() const { - return tensor_impl_->type_id(); + return impl_->type_id(); } ScalarType scalar_type() const { - return dataTypeToScalarType(tensor_impl_->dtype().id()); + return dataTypeToScalarType(impl_->dtype().id()); } const Storage& storage() const { - return tensor_impl_->storage(); + return impl_->storage(); } Tensor toType(const Type & t, bool non_blocking=false) const; Tensor & copy_(const Tensor & src, bool non_blocking=false); @@ -172,20 +173,12 @@ struct AT_API Tensor { template T * data() const; + template + T item() const; + // Purposely not defined here to avoid inlining void print() const; - //toLongData(), toFloatData() etc. - #define TO_TYPE_DATA(T,name,_) \ - T * to##name##Data() const; - AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_EXCEPT_COMPLEX_HALF(TO_TYPE_DATA) - #undef TO_TYPE_DATA - - #define TO_C_TYPE(T,name,_) \ - T toC##name () const; - AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_EXCEPT_COMPLEX_HALF(TO_C_TYPE) - #undef TO_C_TYPE - // Return a `TensorAccessor` for CPU `Tensor`s. You have to specify scalar type and // dimension. template @@ -230,18 +223,18 @@ struct AT_API Tensor { // ~~~~~ Autograd API ~~~~~ Tensor& set_requires_grad(bool requires_grad) { - tensor_impl_->set_requires_grad(requires_grad); + impl_->set_requires_grad(requires_grad); return *this; } bool requires_grad() const { - return tensor_impl_->requires_grad(); + return impl_->requires_grad(); } Tensor& grad() { - return tensor_impl_->grad(); + return impl_->grad(); } const Tensor& grad() const { - return tensor_impl_->grad(); + return impl_->grad(); } void set_data(Tensor new_data); @@ -267,35 +260,35 @@ struct AT_API Tensor { friend struct WeakTensor; protected: - c10::intrusive_ptr tensor_impl_; + c10::intrusive_ptr impl_; }; -struct AT_API WeakTensor { - WeakTensor(const Tensor& t) : weak_tensor_impl_(t.tensor_impl_) {} +struct CAFFE2_API WeakTensor { + WeakTensor(const Tensor& t) : weak_impl_(t.impl_) {} // XXX: this can return undefined tensors // Ideally it would be at::optional, but MSVC is too cool for that Tensor lock() const { - return Tensor(weak_tensor_impl_.lock()); + return Tensor(weak_impl_.lock()); } bool is_same(const WeakTensor& other) const noexcept { - return weak_tensor_impl_ == other.weak_tensor_impl_; + return weak_impl_ == other.weak_impl_; } size_t use_count() const noexcept { - return weak_tensor_impl_.use_count(); + return weak_impl_.use_count(); } size_t weak_use_count() const noexcept { - return weak_tensor_impl_.weak_use_count(); + return weak_impl_.weak_use_count(); } TensorImpl* unsafeGetTensorImpl() const { - return weak_tensor_impl_._unsafe_get_target(); + return weak_impl_._unsafe_get_target(); } private: - c10::weak_intrusive_ptr weak_tensor_impl_; + c10::weak_intrusive_ptr weak_impl_; }; } // namespace at diff --git a/aten/src/ATen/templates/TensorMethods.h b/aten/src/ATen/templates/TensorMethods.h index 8283bea01f6be..70f56bd37697d 100644 --- a/aten/src/ATen/templates/TensorMethods.h +++ b/aten/src/ATen/templates/TensorMethods.h @@ -81,16 +81,16 @@ inline Device Tensor::device() const { " but found ", \ at::toString(type().scalarType())); \ return static_cast(this->data_ptr()); \ - } \ - inline T* Tensor::to##name##Data() const { \ - return data(); \ } AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_EXCEPT_COMPLEX_HALF(DEFINE_CAST) #undef DEFINE_CAST -#define DEFINE_TO_C_TYPE(T,name,_) \ -inline T Tensor::toC##name () const { return _local_scalar().to##name (); } +#define DEFINE_TO_C_TYPE(T, name, _) \ + template <> \ + inline T Tensor::item() const { \ + return _local_scalar().to##name(); \ + } AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_EXCEPT_COMPLEX_HALF(DEFINE_TO_C_TYPE) #undef DEFINE_TO_C_TYPE diff --git a/aten/src/ATen/templates/Type.h b/aten/src/ATen/templates/Type.h index 0e00a5d3499fc..fbbf88823ea24 100644 --- a/aten/src/ATen/templates/Type.h +++ b/aten/src/ATen/templates/Type.h @@ -33,7 +33,7 @@ class Context; struct Allocator; struct Generator; struct Storage; -struct Tensor; +class Tensor; static inline void noop_deleter(void*) {} @@ -47,7 +47,7 @@ enum class TypeID { NumOptions }; -struct AT_API Type { +struct CAFFE2_API Type { explicit Type(TensorTypeId type_id, bool is_variable, bool is_undefined) : type_id_(type_id), is_variable_(is_variable), is_undefined_(is_undefined) {} @@ -140,7 +140,6 @@ struct AT_API Type { TensorTypeId type_id_; bool is_variable_; bool is_undefined_; - }; } // namespace at diff --git a/aten/src/ATen/templates/TypeDefault.h b/aten/src/ATen/templates/TypeDefault.h index e4a75abb48993..73c1f0f1d27cd 100644 --- a/aten/src/ATen/templates/TypeDefault.h +++ b/aten/src/ATen/templates/TypeDefault.h @@ -6,7 +6,7 @@ namespace at { -struct AT_API TypeDefault : public TypeExtendedInterface { +struct CAFFE2_API TypeDefault : public TypeExtendedInterface { explicit TypeDefault(TensorTypeId type_id, bool is_variable, bool is_undefined) : TypeExtendedInterface(type_id, is_variable, is_undefined) {} diff --git a/aten/src/ATen/templates/TypeExtendedInterface.h b/aten/src/ATen/templates/TypeExtendedInterface.h index 82cb658c9eeea..03af27f146b66 100644 --- a/aten/src/ATen/templates/TypeExtendedInterface.h +++ b/aten/src/ATen/templates/TypeExtendedInterface.h @@ -3,7 +3,7 @@ namespace at { -struct AT_API TypeExtendedInterface : public Type { +struct CAFFE2_API TypeExtendedInterface : public Type { explicit TypeExtendedInterface(TensorTypeId type_id, bool is_variable, bool is_undefined) : Type(type_id, is_variable, is_undefined) {} ${pure_virtual_extended_type_method_declarations} diff --git a/aten/src/ATen/test/apply_utils_test.cpp b/aten/src/ATen/test/apply_utils_test.cpp index 22be6de7acbc0..ab7e3522bbeda 100644 --- a/aten/src/ATen/test/apply_utils_test.cpp +++ b/aten/src/ATen/test/apply_utils_test.cpp @@ -37,10 +37,10 @@ void test(Type& type, IntList shape, int64_t a = 0, int64_t b = 1) { empty_t.fill_(3); empty_t.exp_(); - auto a0 = type.tensor(); - auto a1 = type.tensor(); - auto a2 = type.tensor(); - auto a3 = type.tensor(); + auto a0 = at::empty({0}, type.options()); + auto a1 = at::empty({0}, type.options()); + auto a2 = at::empty({0}, type.options()); + auto a3 = at::empty({0}, type.options()); auto a4 = CPU(kDouble).tensor(); std::vector tensors({a0, a1, a2, a3, a4}); diff --git a/aten/src/ATen/test/atest.cpp b/aten/src/ATen/test/atest.cpp index 8dffa3d7c02c7..edb3f79fd2d55 100644 --- a/aten/src/ATen/test/atest.cpp +++ b/aten/src/ATen/test/atest.cpp @@ -18,7 +18,7 @@ void trace() { trace += foo_a[i][i]; } - EXPECT_FLOAT_EQ(foo.trace().toCFloat(), trace); + EXPECT_FLOAT_EQ(foo.trace().item(), trace); } // TEST_CASE( "atest", "[]" ) { @@ -27,7 +27,6 @@ TEST(atest, atest) { manual_seed(123, at::kCUDA); auto foo = rand({12,6}); - EXPECT_EQ(foo.data(), foo.toFloatData()); EXPECT_EQ(foo.size(0), 12); EXPECT_EQ(foo.size(1), 6); diff --git a/aten/src/ATen/test/basic.cpp b/aten/src/ATen/test/basic.cpp index c64fdec0089df..361d24b5a6b76 100644 --- a/aten/src/ATen/test/basic.cpp +++ b/aten/src/ATen/test/basic.cpp @@ -21,7 +21,7 @@ using Catch::Matchers::StartsWith; static void test(Type & type) { CATCH_SECTION( "resize" ) { - auto a = type.tensor(); + auto a = at::empty({0}, type.options()); a.resize_({3,4}); CATCH_REQUIRE(a.numel() == 12); a.resize_({5, 7}); @@ -31,15 +31,15 @@ static void test(Type & type) { CATCH_SECTION( "ones and dot" ) { Tensor b0 = ones({1, 1}, type); - CATCH_REQUIRE(2 == (b0+b0).sum().toCDouble()); + CATCH_REQUIRE(2 == (b0+b0).sum().item()); Tensor b1 = ones({1, 2}, type); - CATCH_REQUIRE(4 == (b1+b1).sum().toCDouble()); + CATCH_REQUIRE(4 == (b1+b1).sum().item()); Tensor b = ones({3, 4}, type); - CATCH_REQUIRE(24 == (b+b).sum().toCDouble()); + CATCH_REQUIRE(24 == (b+b).sum().item()); CATCH_REQUIRE(12 == b.numel()); - CATCH_REQUIRE(b.view(-1).dot(b.view(-1)).toCDouble() == 12); + CATCH_REQUIRE(b.view(-1).dot(b.view(-1)).item() == 12); } CATCH_SECTION( "rand" ) { @@ -54,7 +54,7 @@ static void test(Type & type) { auto z = b.sort(1); auto z_sorted = std::get<0>(z); - CATCH_REQUIRE(z_sorted[0][0].toCFloat() < z_sorted[0][1].toCFloat()); + CATCH_REQUIRE(z_sorted[0][0].item() < z_sorted[0][1].item()); } if(type.backend() != Backend::CUDA) @@ -62,7 +62,7 @@ static void test(Type & type) { Tensor b = randperm(15, type); Tensor rv, ri; std::tie(rv, ri) = sort(b, 0); - CATCH_REQUIRE(rv[0].toCFloat() <= rv[1].toCFloat()); + CATCH_REQUIRE(rv[0].item() <= rv[1].item()); } CATCH_SECTION( "context" ) { @@ -89,7 +89,7 @@ static void test(Type & type) { auto end = std::chrono::high_resolution_clock::now(); //TODO TEST PERF? std::cout << std::dec << " " << std::chrono::duration_cast(end-begin).count() << " ms" << std::endl; - CATCH_REQUIRE(norm(100000*d).toCDouble() == norm(r).toCDouble()); + CATCH_REQUIRE(norm(100000*d).item() == norm(r).item()); } CATCH_SECTION( "loads of adds (with copy)" ) { @@ -102,7 +102,7 @@ static void test(Type & type) { auto end = std::chrono::high_resolution_clock::now(); //TODO TEST PERF? std::cout << std::dec << " " << std::chrono::duration_cast(end-begin).count() << " ms" << std::endl; - CATCH_REQUIRE(norm(100000*d).toCDouble() == norm(r).toCDouble()); + CATCH_REQUIRE(norm(100000*d).item() == norm(r).item()); } CATCH_SECTION( "isContiguous" ) { @@ -154,7 +154,7 @@ static void test(Type & type) { CATCH_SECTION( "abs(value)" ) { Tensor r = at::abs(type.scalarTensor(-3)); - CATCH_REQUIRE(r.toCInt() == 3); + CATCH_REQUIRE(r.item() == 3); } //TODO(zach): operator overloads @@ -195,7 +195,7 @@ static void test(Type & type) { auto f = rand({3,4}, type); f[2] = zeros({4}, type); f[1][0] = -1; - CATCH_REQUIRE(f[2][0].toCDouble() == 0); + CATCH_REQUIRE(f[2][0].item() == 0); } CATCH_SECTION( "tensor from TH" ) { @@ -206,14 +206,14 @@ static void test(Type & type) { CATCH_REQUIRE_NOTHROW(tt); } - CATCH_SECTION( "toCFloat" ) { + CATCH_SECTION( "item" ) { Tensor a = zeros({3,4}); Tensor b = ones({3,7}); Tensor c = cat({a,b},1); CATCH_REQUIRE(c.size(1) == 11); Tensor e = rand({}); - CATCH_REQUIRE(*e.data() == e.sum().toCFloat()); + CATCH_REQUIRE(*e.data() == e.sum().item()); } CATCH_SECTION( "to string" ) { diff --git a/aten/src/ATen/test/scalar_tensor_test.cpp b/aten/src/ATen/test/scalar_tensor_test.cpp index 964f6260e7d9f..a89ca81da017f 100644 --- a/aten/src/ATen/test/scalar_tensor_test.cpp +++ b/aten/src/ATen/test/scalar_tensor_test.cpp @@ -234,7 +234,7 @@ void test(Type &T) { [&]() { int64_t dim0 = lhs.dim() == 0 ? 1 : lhs.size(0); int64_t dim1 = rhs.dim() == 0 ? 1 : rhs.size(0); - require_equal_size_dim(result, result.type().tensor({dim0, dim1})); + require_equal_size_dim(result, at::empty({dim0, dim1}, result.options())); }();); } diff --git a/aten/src/ATen/test/scalar_test.cpp b/aten/src/ATen/test/scalar_test.cpp index 247830c3cc839..10ffa9afc326f 100644 --- a/aten/src/ATen/test/scalar_test.cpp +++ b/aten/src/ATen/test/scalar_test.cpp @@ -71,7 +71,7 @@ CATCH_TEST_CASE( "scalar test", "[]" ) { auto t = ones({4,4}); auto wha2 = zeros({4,4}).add(t).sum(); - CATCH_REQUIRE( wha2.toCDouble() == 16.0 ); + CATCH_REQUIRE( wha2.item() == 16.0 ); CATCH_REQUIRE( t.sizes()[0] == 4 ); CATCH_REQUIRE( t.sizes()[1] == 4 ); @@ -116,10 +116,10 @@ CATCH_TEST_CASE( "scalar test", "[]" ) { // test direct C-scalar type conversions { auto x = ones({1,2}, T); - _CATCH_REQUIRE_THROWS(x.toCFloat()); + _CATCH_REQUIRE_THROWS(x.item()); } auto float_one = ones({}, T); - CATCH_REQUIRE(float_one.toCFloat() == 1); - CATCH_REQUIRE(float_one.toCInt() == 1); - CATCH_REQUIRE((float_one.toCHalf() == 1)); + CATCH_REQUIRE(float_one.item() == 1); + CATCH_REQUIRE(float_one.item() == 1); + CATCH_REQUIRE((float_one.item() == 1)); } diff --git a/aten/src/TH/THAllocator.h b/aten/src/TH/THAllocator.h index 578d689400baf..4cb316adc0174 100644 --- a/aten/src/TH/THAllocator.h +++ b/aten/src/TH/THAllocator.h @@ -32,8 +32,8 @@ TH_API THAllocator* getTHDefaultAllocator(void); // the non-file descriptor constructor enum WithFd { WITH_FD }; -class AT_API THMapAllocator { -public: +class CAFFE2_API THMapAllocator { + public: THMapAllocator(const char *filename, int flags, size_t size); THMapAllocator(WithFd, const char *filename, int fd, int flags, size_t size); THMapAllocator(const THMapAllocator&) = delete; @@ -82,12 +82,14 @@ class AT_API THMapAllocator { }; // Base-from-member idiom -struct AT_API THRefcountedMapAllocatorArgCheck { +struct CAFFE2_API THRefcountedMapAllocatorArgCheck { THRefcountedMapAllocatorArgCheck(int flags); }; -class AT_API THRefcountedMapAllocator : private THRefcountedMapAllocatorArgCheck, public THMapAllocator { -public: +class CAFFE2_API THRefcountedMapAllocator + : private THRefcountedMapAllocatorArgCheck, + public THMapAllocator { + public: THRefcountedMapAllocator(const char *filename, int flags, size_t size); THRefcountedMapAllocator(WithFd, const char *filename, int fd, int flags, size_t size); diff --git a/aten/src/TH/generic/THTensorFastGetSet.hpp b/aten/src/TH/generic/THTensorFastGetSet.hpp index f3c202fd4234b..f430839565471 100644 --- a/aten/src/TH/generic/THTensorFastGetSet.hpp +++ b/aten/src/TH/generic/THTensorFastGetSet.hpp @@ -3,47 +3,47 @@ #else static inline scalar_t THTensor_(fastGetLegacy1dNoScalars)(THTensor *self, int64_t x0) { - return (THStorage_(data)(THTensor_getStoragePtr(self))+self->storage_offset())[(x0)*THTensor_strideLegacyNoScalars(self, 0)]; + return self->unsafe_data()[x0*THTensor_strideLegacyNoScalars(self, 0)]; } static inline scalar_t THTensor_(fastGet1d)(THTensor *self, int64_t x0) { - return (THStorage_(data)(THTensor_getStoragePtr(self))+self->storage_offset())[(x0)*self->stride(0)]; + return self->unsafe_data()[x0*self->stride(0)]; } static inline scalar_t THTensor_(fastGet2d)(THTensor *self, int64_t x0, int64_t x1) { - return (THStorage_(data)(THTensor_getStoragePtr(self))+self->storage_offset())[(x0)*self->stride(0)+(x1)*self->stride(1)]; + return self->unsafe_data()[x0*self->stride(0)+x1*self->stride(1)]; } static inline scalar_t THTensor_(fastGet3d)(THTensor *self, int64_t x0, int64_t x1, int64_t x2) { - return (THStorage_(data)(THTensor_getStoragePtr(self))+self->storage_offset())[(x0)*self->stride(0)+(x1)*self->stride(1)+(x2)*self->stride(2)]; + return self->unsafe_data()[x0*self->stride(0)+x1*self->stride(1)+x2*self->stride(2)]; } static inline scalar_t THTensor_(fastGet4d)(THTensor *self, int64_t x0, int64_t x1, int64_t x2, int64_t x3) { - return (THStorage_(data)(THTensor_getStoragePtr(self))+self->storage_offset())[(x0)*self->stride(0)+(x1)*self->stride(1)+(x2)*self->stride(2)+(x3)*self->stride(3)]; + return self->unsafe_data()[x0*self->stride(0)+x1*self->stride(1)+x2*self->stride(2)+x3*self->stride(3)]; } static inline scalar_t THTensor_(fastGet5d)(THTensor *self, int64_t x0, int64_t x1, int64_t x2, int64_t x3, int64_t x4) { - return (THStorage_(data)(THTensor_getStoragePtr(self))+self->storage_offset())[(x0)*self->stride(0)+(x1)*self->stride(1)+(x2)*self->stride(2)+(x3)*self->stride(3)+(x4)*self->stride(4)]; + return self->unsafe_data()[x0*self->stride(0)+x1*self->stride(1)+x2*self->stride(2)+x3*self->stride(3)+(x4)*self->stride(4)]; } static inline void THTensor_(fastSet1d)(THTensor *self, int64_t x0, scalar_t value) { - (THStorage_(data)(THTensor_getStoragePtr(self))+self->storage_offset())[(x0)*self->stride(0)] = value; + self->unsafe_data()[x0*self->stride(0)] = value; } static inline void THTensor_(fastSet2d)(THTensor *self, int64_t x0, int64_t x1, scalar_t value) { - (THStorage_(data)(THTensor_getStoragePtr(self))+self->storage_offset())[(x0)*self->stride(0)+(x1)*self->stride(1)] = value; + self->unsafe_data()[x0*self->stride(0)+x1*self->stride(1)] = value; } static inline void THTensor_(fastSet3d)(THTensor *self, int64_t x0, int64_t x1, int64_t x2, scalar_t value) { - (THStorage_(data)(THTensor_getStoragePtr(self))+self->storage_offset())[(x0)*self->stride(0)+(x1)*self->stride(1)+(x2)*self->stride(2)] = value; + self->unsafe_data()[x0*self->stride(0)+x1*self->stride(1)+x2*self->stride(2)] = value; } static inline void THTensor_(fastSet4d)(THTensor *self, int64_t x0, int64_t x1, int64_t x2, int64_t x3, scalar_t value) { - (THStorage_(data)(THTensor_getStoragePtr(self))+self->storage_offset())[(x0)*self->stride(0)+(x1)*self->stride(1)+(x2)*self->stride(2)+(x3)*self->stride(3)] = value; + self->unsafe_data()[x0*self->stride(0)+x1*self->stride(1)+x2*self->stride(2)+x3*self->stride(3)] = value; } static inline void THTensor_(fastSet5d)(THTensor *self, int64_t x0, int64_t x1, int64_t x2, int64_t x3, int64_t x4, scalar_t value) { - (THStorage_(data)(THTensor_getStoragePtr(self))+self->storage_offset())[(x0)*self->stride(0)+(x1)*self->stride(1)+(x2)*self->stride(2)+(x3)*self->stride(3)+(x4)*self->stride(4)] = value; + self->unsafe_data()[x0*self->stride(0)+x1*self->stride(1)+x2*self->stride(2)+x3*self->stride(3)+(x4)*self->stride(4)] = value; } #endif diff --git a/aten/src/THC/THCAllocator.h b/aten/src/THC/THCAllocator.h index 8dadcc034b2c2..323f745a4ac30 100644 --- a/aten/src/THC/THCAllocator.h +++ b/aten/src/THC/THCAllocator.h @@ -7,8 +7,8 @@ THC_API THAllocator* getTHCudaHostAllocator(void); // IPC doesn't support (re)allocation #ifdef __cplusplus -class AT_API THCIpcDeleter { -public: +class CAFFE2_API THCIpcDeleter { + public: THCIpcDeleter(void* data, int device) : data_(data), device_(device) {}; ~THCIpcDeleter(); static at::DataPtr makeDataPtr(void* data, int device); diff --git a/binaries/benchmark_helper.cc b/binaries/benchmark_helper.cc index 001c8e965f6a6..f481a6292c7f5 100644 --- a/binaries/benchmark_helper.cc +++ b/binaries/benchmark_helper.cc @@ -163,7 +163,7 @@ void loadInput( CAFFE_THROW("Not support GPU on mobile."); #endif } else { - caffe2::TensorCPU* tensor = blob->GetMutableTensor(caffe2::CPU); + caffe2::TensorCPU* tensor = BlobGetMutableTensor(blob, caffe2::CPU); CHECK_NOTNULL(tensor); tensor->Resize(input_dims); if (input_type_list[i] == "uint8_t") { @@ -200,7 +200,7 @@ void fillInputBlob( int protos_size = tensor_kv.second.protos_size(); caffe2::TensorProto* tensor_proto = tensor_kv.second.mutable_protos(iteration % protos_size); - caffe2::TensorCPU* tensor = blob->GetMutableTensor(caffe2::CPU); + caffe2::TensorCPU* tensor = BlobGetMutableTensor(blob, caffe2::CPU); if (tensor_proto->data_type() == caffe2::TensorProto::STRING) { int total_size = tensor_proto->string_data_size(); for (size_t i = 0; i < total_size; i++) { @@ -298,7 +298,7 @@ void writeOutput( #endif } else { writeTextOutput( - workspace->GetBlob(name)->GetMutableTensor(caffe2::CPU), + BlobGetMutableTensor(workspace->GetBlob(name), caffe2::CPU), output_prefix, name); } diff --git a/binaries/speed_benchmark.cc b/binaries/speed_benchmark.cc index 5914e3f58b44b..fd502cf3c078a 100644 --- a/binaries/speed_benchmark.cc +++ b/binaries/speed_benchmark.cc @@ -137,7 +137,7 @@ int main(int argc, char** argv) { if (blob == nullptr) { blob = workspace->CreateBlob(input_names[i]); } - caffe2::TensorCPU* tensor = blob->GetMutableTensor(caffe2::CPU); + caffe2::TensorCPU* tensor = BlobGetMutableTensor(blob, caffe2::CPU); CHECK_NOTNULL(tensor); tensor->Resize(input_dims); if (input_type_list[i] == "uint8_t") { diff --git a/binaries/tutorial_blob.cc b/binaries/tutorial_blob.cc index f379eac663cbe..ac74ebb5ffb78 100644 --- a/binaries/tutorial_blob.cc +++ b/binaries/tutorial_blob.cc @@ -47,7 +47,7 @@ int main(int argc, char** argv) { LOG(INFO) << "Is the blob type float? " << myblob.IsType(); - + const int& myint_const = myblob.Get(); LOG(INFO) << "The value of the int number stored in the blob is: " @@ -80,7 +80,7 @@ int main(int argc, char** argv) { std::string* pvec = new std::string(); myblob.Reset(pvec); // no need to release pvec, myblob takes ownership. - + LOG(INFO) << "Is the blob now of type string? " << myblob.IsType(); diff --git a/c10/CMakeLists.txt b/c10/CMakeLists.txt new file mode 100644 index 0000000000000..4b7bab4f42eeb --- /dev/null +++ b/c10/CMakeLists.txt @@ -0,0 +1,43 @@ +# Main build file for the C10 library. +# +# Note that the C10 library should maintain minimal dependencies - especially, +# it should not depend on any library that is implementation specific or +# backend specific. It should in particular NOT be dependent on any generated +# protobuf header files, because protobuf header files will transitively force +# one to link against a specific protobuf version. + +# ---[ Configure macro file. +set(C10_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # used in cmake_macros.h.in +configure_file( + ${CMAKE_CURRENT_LIST_DIR}/macros/cmake_macros.h.in + ${CMAKE_BINARY_DIR}/c10/macros/cmake_macros.h) + +# Note: if you want to add ANY dependency to the c10 library, make sure you +# check with the core PyTorch developers as the dependendency will be +# transitively passed on to all libraries dependent on PyTorch. +file(GLOB_RECURSE C10_SRCS *.cpp) +file(GLOB_RECURSE C10_HEADERS *.h) +add_library(c10 ${C10_SRCS} ${C10_HEADERS}) +# If building shared library, set dllimport/dllexport proper. +target_compile_options(c10 PRIVATE "-DC10_BUILD_MAIN_LIB") +# Enable hidden visibility if compiler supports it. +if (${COMPILER_SUPPORTS_HIDDEN_VISIBILITY}) + target_compile_options(c10 PRIVATE "-fvisibility=hidden") +endif() + +target_include_directories( + c10 PUBLIC + $ + $ + $) + +# ---[ Installation +# Note: for now, we will put all export path into one single Caffe2Targets group +# to deal with the cmake deployment need. Inside the Caffe2Targets set, the +# individual libraries like libc10.so and libcaffe2.so are still self-contained. +install(TARGETS c10 EXPORT Caffe2Targets DESTINATION lib) +install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR} + DESTINATION include + FILES_MATCHING PATTERN "*.h") +install(FILES ${CMAKE_BINARY_DIR}/c10/macros/cmake_macros.h + DESTINATION include/c10/macros) diff --git a/c10/c10_dummy.cpp b/c10/c10_dummy.cpp new file mode 100644 index 0000000000000..df4e73171da3f --- /dev/null +++ b/c10/c10_dummy.cpp @@ -0,0 +1,7 @@ +#include "c10/c10_dummy.h" + +namespace c10 { +bool HasC10() { + return true; +} +} // namespace c10 diff --git a/c10/c10_dummy.h b/c10/c10_dummy.h new file mode 100644 index 0000000000000..cf6c6b30c14bb --- /dev/null +++ b/c10/c10_dummy.h @@ -0,0 +1,7 @@ +#pragma once + +#include "c10/macros/Macros.h" + +namespace c10 { +C10_API bool HasC10(); +} diff --git a/c10/macros/Export.h b/c10/macros/Export.h new file mode 100644 index 0000000000000..8e593e0100bbf --- /dev/null +++ b/c10/macros/Export.h @@ -0,0 +1,76 @@ +/* Header file to define the common scaffolding for exported symbols. + * + * Export is by itself a quite tricky situation to deal with, and if you are + * hitting this file, make sure you start with the background here: + * - Linux: https://gcc.gnu.org/wiki/Visibility + * - Windows: + * https://docs.microsoft.com/en-us/cpp/cpp/dllexport-dllimport?view=vs-2017 + * + * Do NOT include this file directly. Instead, use c10/macros/Macros.h + */ + +#pragma once + +// You do not need to edit this part of file unless you are changing the core +// pytorch export abstractions. +// +// This part defines the C10 core export and import macros. This is controlled +// by whether we are building shared libraries or not, which is determined +// during build time and codified in c10/core/cmake_macros.h. +// When the library is built as a shared lib, EXPORT and IMPORT will contain +// visibility attributes. If it is being built as a static lib, then EXPORT +// and IMPORT basically have no effect. + +// As a rule of thumb, you should almost NEVER mix static and shared builds for +// libraries that depend on c10. AKA, if c10 is built as a static library, we +// recommend everything dependent on c10 to be built statically. If c10 is built +// as a shared library, everything dependent on it should be built as shared. In +// the PyTorch project, all native libraries shall use the macro +// C10_BUILD_SHARED_LIB to check whether pytorch is building shared or static +// libraries. + +#ifdef _WIN32 +#if defined(C10_BUILD_SHARED_LIBS) +#define C10_EXPORT __declspec(dllexport) +#define C10_IMPORT __declspec(dllimport) +#else +#define C10_EXPORT +#define C10_IMPORT +#endif +#else // _WIN32 +#if defined(__GNUC__) +#define C10_EXPORT __attribute__((__visibility__("default"))) +#else // defined(__GNUC__) +#define C10_EXPORT +#endif // defined(__GNUC__) +#define C10_IMPORT C10_EXPORT +#endif // _WIN32 + +// Definition of an adaptive XX_API macro, that depends on whether you are +// building the library itself or not, routes to XX_EXPORT and XX_IMPORT. +// Basically, you will need to do this for each shared library that you are +// building, and the instruction is as follows: assuming that you are building +// a library called libawesome.so. You should: +// (1) for your cmake target (usually done by "add_library(awesome, ...)"), +// define a macro called AWESOME_BUILD_MAIN_DLL using +// target_compile_options. +// (2) define the AWESOME_API macro similar to the one below. +// And in the source file of your awesome library, use AWESOME_API to +// annotate public symbols. + +// Here, for the C10 library, we will define the macro C10_API for both import +// and export. + +// This one is being used by libc10.so +#ifdef C10_BUILD_MAIN_DLL +#define C10_API C10_EXPORT +#else +#define C10_API C10_IMPORT +#endif + +// This one is being used by libcaffe2.so +#ifdef CAFFE2_BUILD_MAIN_LIB +#define CAFFE2_API C10_EXPORT +#else +#define CAFFE2_API C10_IMPORT +#endif diff --git a/c10/macros/Legacy.h b/c10/macros/Legacy.h new file mode 100644 index 0000000000000..86752a838acd3 --- /dev/null +++ b/c10/macros/Legacy.h @@ -0,0 +1,7 @@ +/* A centralized location to provide legacy macro support, and a warning about + * when this legacy compatibility symbol is going to removed in the future. + * + * Do NOT include this file directly. Instead, use c10/macros/Macros.h + */ + +#pragma once diff --git a/c10/macros/Macros.h b/c10/macros/Macros.h new file mode 100644 index 0000000000000..2b438d670f00d --- /dev/null +++ b/c10/macros/Macros.h @@ -0,0 +1,32 @@ +/* Main entry for c10/macros. + * + * In your code, include c10/macros/Macros.h directly, instead of individual + * files in this folder. + */ + +#pragma once + +// For build systems that do not directly depend on CMake and directly build +// from the source directory (such as Buck), one may not have a cmake_macros.h +// file at all. In this case, the build system is responsible for providing +// correct macro definitions corresponding to the cmake_macros.h.in file. +// +// In such scenarios, one should define the macro +// C10_USING_CUSTOM_GENERATED_MACROS +// to inform this header that it does not need to include the cmake_macros.h +// file. + +#ifndef C10_USING_CUSTOM_GENERATED_MACROS +#include "c10/macros/cmake_macros.h" +#endif // C10_USING_CUSTOM_GENERATED_MACROS + +#include "c10/macros/Export.h" + +// Disable the copy and assignment operator for a class. Note that this will +// disable the usage of the class in std containers. +#define C10_DISABLE_COPY_AND_ASSIGN(classname) \ + classname(const classname&) = delete; \ + classname& operator=(const classname&) = delete + +// Finally, file that provides legacy support for macros +#include "c10/macros/Legacy.h" diff --git a/c10/macros/cmake_macros.h.in b/c10/macros/cmake_macros.h.in new file mode 100644 index 0000000000000..73bc803f06355 --- /dev/null +++ b/c10/macros/cmake_macros.h.in @@ -0,0 +1,6 @@ +// Automatically generated header file for the C10 library. +// Do not include this file directly. Instead, include c10/macros/Macros.h. + +#pragma once + +#cmakedefine C10_BUILD_SHARED_LIBS diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt index d7490686ab757..3dda37c4c5b1f 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -194,7 +194,6 @@ target_include_directories(caffe2_protos INTERFACE $) target_link_libraries(caffe2_protos PUBLIC protobuf::libprotobuf) # Compile exposed libraries. -list(APPEND Caffe2_CPU_SRCs $) add_library(caffe2 ${Caffe2_CPU_SRCS}) if (NOT WIN32) target_compile_options(caffe2 PRIVATE "-fvisibility=hidden") @@ -206,6 +205,7 @@ if (${CAFFE2_LINK_LOCAL_PROTOBUF}) else() target_link_libraries(caffe2 PUBLIC protobuf::libprotobuf) endif() +target_link_libraries(caffe2 PUBLIC c10) target_link_libraries(caffe2 PUBLIC ${Caffe2_PUBLIC_DEPENDENCY_LIBS}) target_link_libraries(caffe2 PRIVATE ${Caffe2_DEPENDENCY_LIBS}) target_link_libraries(caffe2 PRIVATE ${Caffe2_DEPENDENCY_WHOLE_LINK_LIBS}) @@ -333,7 +333,7 @@ if(USE_CUDA) # NB: This must be target_compile_definitions, not target_compile_options, # as the latter is not respected by nvcc if (MSVC) - target_compile_definitions(caffe2_gpu PRIVATE "-DCAFFE2_CUDA_BUILD_MAIN_LIB") + target_compile_definitions(caffe2_gpu PRIVATE "-DCAFFE2_CUDA_BUILD_MAIN_LIB") endif() # Set standard properties on the target diff --git a/caffe2/contrib/gloo/allgather_ops.h b/caffe2/contrib/gloo/allgather_ops.h index 1f55233a095c8..f97a00f8956ee 100644 --- a/caffe2/contrib/gloo/allgather_ops.h +++ b/caffe2/contrib/gloo/allgather_ops.h @@ -114,7 +114,7 @@ class AllgatherOp final : public Operator { params.size = Input(1).size(); params.meta = Input(1).meta(); for (auto i = 0; i < params.inputs.size(); i++) { - params.inputs[i] = Input(i + 1).template raw_data(); + params.inputs[i] = Input(i + 1).raw_data(); } params.outputs.resize(OutputSize()); params.outputs[0] = Output(0)->raw_mutable_data(params.meta); diff --git a/caffe2/contrib/gloo/allreduce_ops.h b/caffe2/contrib/gloo/allreduce_ops.h index 85d10c313085f..f3b1bd3560b3d 100644 --- a/caffe2/contrib/gloo/allreduce_ops.h +++ b/caffe2/contrib/gloo/allreduce_ops.h @@ -117,8 +117,8 @@ class AllreduceOp final : public Operator { params.inputs.resize(InputSize() - 1); params.outputs.resize(OutputSize()); for (auto i = 0; i < params.inputs.size(); i++) { - params.inputs[i] = Input(i + 1).template raw_data(); - params.outputs[i] = Output(i)->template raw_mutable_data(); + params.inputs[i] = Input(i + 1).raw_data(); + params.outputs[i] = Output(i)->raw_mutable_data(); } params.size = Output(0)->size(); params.meta = Output(0)->meta(); diff --git a/caffe2/contrib/gloo/broadcast_ops.h b/caffe2/contrib/gloo/broadcast_ops.h index e525b8e158f4c..171dbbd8c97a1 100644 --- a/caffe2/contrib/gloo/broadcast_ops.h +++ b/caffe2/contrib/gloo/broadcast_ops.h @@ -95,8 +95,8 @@ class BroadcastOp final : public Operator { params.inputs.resize(InputSize() - 1); params.outputs.resize(OutputSize()); for (auto i = 0; i < params.inputs.size(); i++) { - params.inputs[i] = Input(i + 1).template raw_data(); - params.outputs[i] = Output(i)->template raw_mutable_data(); + params.inputs[i] = Input(i + 1).raw_data(); + params.outputs[i] = Output(i)->raw_mutable_data(); } params.size = Output(0)->size(); params.meta = Output(0)->meta(); diff --git a/caffe2/contrib/gloo/common.cc b/caffe2/contrib/gloo/common.cc index 21ce0343d8181..d4929938f1917 100644 --- a/caffe2/contrib/gloo/common.cc +++ b/caffe2/contrib/gloo/common.cc @@ -12,7 +12,7 @@ namespace caffe2 { namespace gloo { void signalFailure(Blob* status_blob, std::exception& /* unused */) { - auto* res = status_blob->GetMutableTensor(CPU); + auto* res = BlobGetMutableTensor(status_blob, CPU); res->Resize(1); res->template mutable_data()[0] = 1; } diff --git a/caffe2/contrib/gloo/reduce_scatter_ops.h b/caffe2/contrib/gloo/reduce_scatter_ops.h index 069c523869493..559b35618a108 100644 --- a/caffe2/contrib/gloo/reduce_scatter_ops.h +++ b/caffe2/contrib/gloo/reduce_scatter_ops.h @@ -108,15 +108,15 @@ class ReduceScatterOp final : public Operator { params.inputs.resize(InputSize() - 2); params.outputs.resize(OutputSize() - 1); for (auto i = 0; i < params.inputs.size(); i++) { - params.inputs[i] = Input(i + 1).template raw_data(); - params.outputs[i] = Output(i)->template raw_mutable_data(); + params.inputs[i] = Input(i + 1).raw_data(); + params.outputs[i] = Output(i)->raw_mutable_data(); } params.size = Output(0)->size(); params.meta = Output(0)->meta(); // Verify recvCountsSize == comm_size CAFFE_ENFORCE_EQ(Input(InputSize() - 1).size(), params.context->size); - int* recvCounts = (int*)Input(InputSize() - 1).template raw_data(); + int* recvCounts = (int*)Input(InputSize() - 1).raw_data(); recvCounts_.assign(recvCounts, recvCounts + Input(InputSize() - 1).size()); } diff --git a/caffe2/contrib/nccl/cuda_nccl_gpu.cc b/caffe2/contrib/nccl/cuda_nccl_gpu.cc index b544445a26873..490a69b91abf5 100644 --- a/caffe2/contrib/nccl/cuda_nccl_gpu.cc +++ b/caffe2/contrib/nccl/cuda_nccl_gpu.cc @@ -72,7 +72,7 @@ class NCCLContext { cudaEvent_t master_event_; std::vector events_; - AT_DISABLE_COPY_AND_ASSIGN(NCCLContext); + C10_DISABLE_COPY_AND_ASSIGN(NCCLContext); }; // We share the contexts across multiple operators, hence the diff --git a/caffe2/contrib/nervana/nervana_fc_op_gpu_test.cc b/caffe2/contrib/nervana/nervana_fc_op_gpu_test.cc index 972d9231dcf9c..9eee8973142ed 100644 --- a/caffe2/contrib/nervana/nervana_fc_op_gpu_test.cc +++ b/caffe2/contrib/nervana/nervana_fc_op_gpu_test.cc @@ -22,7 +22,7 @@ static void AddConstInput(const std::vector& shape, const float value, option.set_device_type(PROTO_CUDA); CUDAContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CUDA); + auto* tensor = BlobGetMutableTensor(blob, CUDA); tensor->Resize(shape); math::Set(tensor->size(), value, tensor->mutable_data(), diff --git a/caffe2/contrib/tensorrt/tensorrt_tranformer.cc b/caffe2/contrib/tensorrt/tensorrt_tranformer.cc index 3612d8b46f1f8..2dd17e0016990 100644 --- a/caffe2/contrib/tensorrt/tensorrt_tranformer.cc +++ b/caffe2/contrib/tensorrt/tensorrt_tranformer.cc @@ -95,10 +95,10 @@ void BlobToTensorProto( } // Set values - if (blob->IsTensorType(CPU)) { + if (BlobIsTensorType(*blob, CPU)) { const auto& cpu_tensor = blob->template Get(); CPUTensorToTensorProto(cpu_tensor, t); - } else if (blob->IsTensorType(CUDA)) { + } else if (BlobIsTensorType(*blob, CUDA)) { const auto& cuda_tensor = blob->template Get(); const auto cpu_tensor = TensorCPU(cuda_tensor, context); context->FinishDeviceComputation(); diff --git a/caffe2/core/blob.h b/caffe2/core/blob.h index 870fc88322b15..e09a54cbd2df5 100644 --- a/caffe2/core/blob.h +++ b/caffe2/core/blob.h @@ -6,246 +6,37 @@ #include #include #include - -#include "caffe2/core/blob_serializer_base.h" #include "caffe2/core/common.h" + +#include +#include #include "caffe2/core/logging.h" #include "caffe2/core/tensor.h" -#include "caffe2/core/typeid.h" -#include "caffe2/proto/caffe2_pb.h" namespace caffe2 { -/** - * @brief Blob is a general container that hosts a typed pointer. - * - * A Blob hosts a pointer as well as its type, and takes charge of deleting it - * properly when the blob is deallocated or re-allocated with a new type. A blob - * could contain anything, although the most common case is to contain a Tensor. - */ -class CAFFE2_API Blob final { - public: - using DestroyCall = void(void*); - - /** - * Initializes an empty Blob. - */ - Blob() noexcept : meta_(), pointer_(nullptr), destroy_(nullptr) {} - ~Blob() { Reset(); } - - Blob(Blob&& other) noexcept : Blob() { - swap(other); - } - - Blob& operator=(Blob&& other) noexcept { - Blob(std::move(other)).swap(*this); - return *this; - } - - /** - * Checks if the content stored in the blob is of type T. - */ - template - bool IsType() const noexcept { - return meta_.Match(); - } - - bool IsTensorType(DeviceType device_type) const { - bool is_match = meta_.Match(); - auto* tensor = static_cast(pointer_); - if (is_match && tensor && tensor->GetDeviceType() == device_type) { - return true; - } +inline bool BlobIsTensorType(const Blob& blob, DeviceType device_type) { + bool is_match = blob.meta().Match(); + if (!is_match) { return false; } + const Tensor* tensor = &blob.Get(); + return tensor && tensor->GetDeviceType() == device_type; +} - /** - * Returns the meta info of the blob. - */ - inline const TypeMeta& meta() const noexcept { return meta_; } - - /** - * Returns a printable typename of the blob. - */ - inline const char* TypeName() const noexcept { return meta_.name(); } - - /** - * @brief Gets the const reference of the stored object. The code checks if - * the stored object is of the desired type. - */ - // TODO(jerryzh): add a Get(DeviceType) function? - template - const T& Get() const { - CAFFE_ENFORCE( - IsType(), - "wrong type for the Blob instance. Blob contains ", - meta_.name(), - " while caller expects ", - TypeMeta::TypeName()); - // TODO: after we add Get(DeviceType) - // and changed all the callsites, we can add - // a static assert here to enforce T != Tensor - return *static_cast(pointer_); - } - - const void* GetRaw() const noexcept { - return pointer_; - } - void* GetRaw() noexcept { - return pointer_; - } - - /** - * @brief Gets a mutable pointer to the stored object. - * - * If the current object is not of the right type, a new object is created - * and the old object is freed. Note that type T should have a default - * constructor. Otherwise, create the object yourself first, and use - * Reset(). - */ - template - T* GetMutable() { - static_assert( - std::is_default_constructible::value, - "GetMutable can't be called with non-default-constructible types. " - "Try using specialized methods"); - static_assert( - !std::is_same::value, - "Use GetMutableTensor(DeviceType) instead"); - if (IsType()) { - return static_cast(pointer_); - } else { - VLOG(1) << "Create new mutable object " << TypeMeta::TypeName(); - return Reset(new T()); - } - } - - template - T* GetMutableOrNull() { - if (IsType()) { - return static_cast(pointer_); - } else { - return nullptr; - } - } - - inline Tensor* GetMutableTensor(DeviceType device_type) { - if (IsTensorType(device_type)) { - return static_cast(pointer_); - } else { - VLOG(1) << "Create new mutable object " << TypeMeta::TypeName() - << " DeviceType:" << device_type; - return Reset(new Tensor(device_type)); - } - } - - /** - * Sets the underlying object to the allocated one. The Blob then takes over - * the ownership of the passed in pointer. If there is already an object in - * the Blob, the old object is freed. - * - * This is used when the underlying class T does not have a default ctor, or - * complex initializations needs to be done outside the blob. - */ - template - T* Reset(T* allocated) { - if (pointer_ && destroy_) { - destroy_(pointer_); - } - meta_ = TypeMeta::Make(); - pointer_ = static_cast(allocated); - destroy_ = &Destroy; - return allocated; - } - - inline void* - Reset(void* allocated, const TypeMeta& meta, DestroyCall* destroy) { - if (pointer_ && destroy_) { - destroy_(pointer_); - } - meta_ = meta; - pointer_ = static_cast(allocated); - destroy_ = destroy; - return allocated; - } - - /** - * Releases the ownership, if any, this Blob has on the underlying pointer. - * The user is then responsible for freeing the data if needed - */ - inline DestroyCall* Release() { - DestroyCall* d = destroy_; - destroy_ = nullptr; - return d; - } - - /** - * Sets the underlying object to the allocated one, but does not take over - * the ownership of the passed in pointer. If there is already an object in - * the Blob, the old object is freed. - * - * Unlike Reset, this does not take over the ownership of the pointer and the - * caller is responsible for making sure that the lifetime of the allocated - * blob outlasts the lifetime of any access to this blob, until another Reset - * call is made or the blob is destructed. - */ - template - typename std::remove_const::type* ShareExternal( - typename std::remove_const::type* allocated) { - return static_cast(ShareExternal( - static_cast(allocated), - TypeMeta::Make::type>())); - } - - void* ShareExternal(void* allocated, const TypeMeta& meta) { - if (pointer_ && destroy_) { - destroy_(pointer_); - } - meta_ = meta; - pointer_ = static_cast(allocated); - destroy_ = nullptr; - return allocated; - } - - /** - * Resets the Blob to an empty one. - */ - inline void Reset() { - if (pointer_ && destroy_) { - destroy_(pointer_); +inline Tensor* BlobGetMutableTensor(Blob* blob, DeviceType device_type) { + if (blob->IsType()) { + Tensor* tensor = blob->GetMutable(); + if (tensor->GetDeviceType() == device_type) { + return tensor; } - pointer_ = nullptr; - meta_ = TypeMeta(); - destroy_ = nullptr; } - /** - * @brief Swaps the underlying storage of two blobs. - */ - void swap(Blob& rhs) { - using std::swap; - swap(meta_, rhs.meta_); - swap(pointer_, rhs.pointer_); - swap(destroy_, rhs.destroy_); - } - - private: - /** - * @brief A destroy call that is used to properly deconstruct objects. - */ - template - static void Destroy(void* pointer) { - delete static_cast(pointer); - } - TypeMeta meta_; - void* pointer_ = nullptr; - DestroyCall* destroy_ = nullptr; - - AT_DISABLE_COPY_AND_ASSIGN(Blob); -}; - -inline void swap(Blob& lhs, Blob& rhs) { - lhs.swap(rhs); + // if we're here, then either Blob didn't hold a Tensor + // or that Tensor had the wrong DeviceType. + VLOG(1) << "Create new mutable object " << TypeMeta::TypeName() + << " DeviceType:" << device_type; + return blob->Reset(new Tensor(device_type)); } } // namespace caffe2 diff --git a/caffe2/core/blob_gpu_test.cc b/caffe2/core/blob_gpu_test.cc index e8fdf47f69ddb..55eafdede7269 100644 --- a/caffe2/core/blob_gpu_test.cc +++ b/caffe2/core/blob_gpu_test.cc @@ -132,7 +132,7 @@ TYPED_TEST(TensorGPUDeathTest, CannotAccessDataWhenEmpty) { for (int i = 0; i < 6; ++i) { \ cpu_tensor.mutable_data()[i] = static_cast(i); \ } \ - blob.GetMutableTensor(CUDA)->CopyFrom(cpu_tensor); \ + BlobGetMutableTensor(&blob, CUDA)->CopyFrom(cpu_tensor); \ string serialized = SerializeBlob(blob, "test"); \ BlobProto proto; \ CAFFE_ENFORCE(proto.ParseFromString(serialized)); \ @@ -149,7 +149,7 @@ TYPED_TEST(TensorGPUDeathTest, CannotAccessDataWhenEmpty) { } \ Blob new_blob; \ EXPECT_NO_THROW(DeserializeBlob(serialized, &new_blob)); \ - EXPECT_TRUE(new_blob.IsTensorType(CUDA)); \ + EXPECT_TRUE(BlobIsTensorType(new_blob, CUDA)); \ Tensor new_cpu_tensor(blob.Get(), CPU); \ EXPECT_EQ(new_cpu_tensor.ndim(), 2); \ EXPECT_EQ(new_cpu_tensor.dim(0), 2); \ @@ -199,7 +199,7 @@ TEST(TensorTest, TensorSerializationMultiDevices) { // Test if the restored blob is still of the same device. blob.Reset(); EXPECT_NO_THROW(DeserializeBlob(serialized, &blob)); - EXPECT_TRUE(blob.IsTensorType(CUDA)); + EXPECT_TRUE(BlobIsTensorType(blob, CUDA)); EXPECT_EQ(GetGPUIDForPointer(blob.Get().data()), gpu_id); // Test if we force the restored blob on a different device, we @@ -207,7 +207,7 @@ TEST(TensorTest, TensorSerializationMultiDevices) { blob.Reset(); proto.mutable_tensor()->mutable_device_detail()->set_cuda_gpu_id(0); EXPECT_NO_THROW(DeserializeBlob(proto.SerializeAsString(), &blob)); - EXPECT_TRUE(blob.IsTensorType(CUDA)); + EXPECT_TRUE(BlobIsTensorType(blob, CUDA)); EXPECT_EQ(GetGPUIDForPointer(blob.Get().data()), 0); } } diff --git a/caffe2/core/blob_serialization.cc b/caffe2/core/blob_serialization.cc index 7ff5a2b25eacc..d4ef19db69ce4 100644 --- a/caffe2/core/blob_serialization.cc +++ b/caffe2/core/blob_serialization.cc @@ -363,7 +363,8 @@ void TensorDeserializer::Deserialize(const BlobProto& blob_proto, Blob* blob) { auto tensor_proto = blob_proto.tensor(); Deserialize( tensor_proto, - blob->GetMutableTensor( + BlobGetMutableTensor( + blob, static_cast(tensor_proto.device_detail().device_type()))); } diff --git a/caffe2/core/blob_test.cc b/caffe2/core/blob_test.cc index 24b2a2d0593d3..bb2f4ba6a9181 100644 --- a/caffe2/core/blob_test.cc +++ b/caffe2/core/blob_test.cc @@ -86,15 +86,15 @@ TEST(BlobTest, Blob) { int* int_unused CAFFE2_UNUSED = blob.GetMutable(); EXPECT_TRUE(blob.IsType()); EXPECT_FALSE(blob.IsType()); - EXPECT_FALSE(blob.IsTensorType(CPU)); + EXPECT_FALSE(BlobIsTensorType(blob, CPU)); BlobTestFoo* foo_unused CAFFE2_UNUSED = blob.GetMutable(); EXPECT_TRUE(blob.IsType()); EXPECT_FALSE(blob.IsType()); - EXPECT_FALSE(blob.IsTensorType(CPU)); + EXPECT_FALSE(BlobIsTensorType(blob, CPU)); - Tensor* tensor_unused CAFFE2_UNUSED = blob.GetMutableTensor(CPU); - EXPECT_TRUE(blob.IsTensorType(CPU)); + Tensor* tensor_unused CAFFE2_UNUSED = BlobGetMutableTensor(&blob, CPU); + EXPECT_TRUE(BlobIsTensorType(blob, CPU)); EXPECT_FALSE(blob.IsType()); EXPECT_FALSE(blob.IsType()); } @@ -600,7 +600,7 @@ TEST(TensorDeathTest, CannotCastDownLargeDims) { #define TEST_SERIALIZATION_WITH_TYPE(TypeParam, field_name) \ TEST(TensorTest, TensorSerialization_##TypeParam) { \ Blob blob; \ - Tensor* tensor = blob.GetMutableTensor(CPU); \ + Tensor* tensor = BlobGetMutableTensor(&blob, CPU); \ tensor->Resize(2, 3); \ for (int i = 0; i < 6; ++i) { \ tensor->mutable_data()[i] = static_cast(i); \ @@ -621,7 +621,7 @@ TEST(TensorDeathTest, CannotCastDownLargeDims) { } \ Blob new_blob; \ EXPECT_NO_THROW(DeserializeBlob(serialized, &new_blob)); \ - EXPECT_TRUE(new_blob.IsTensorType(CPU)); \ + EXPECT_TRUE(BlobIsTensorType(new_blob, CPU)); \ const TensorCPU& new_tensor = blob.Get(); \ EXPECT_EQ(new_tensor.ndim(), 2); \ EXPECT_EQ(new_tensor.dim(0), 2); \ @@ -634,7 +634,7 @@ TEST(TensorDeathTest, CannotCastDownLargeDims) { \ TEST(EmptyTensorTest, TensorSerialization_##TypeParam) { \ Blob blob; \ - TensorCPU* tensor = blob.GetMutableTensor(CPU); \ + TensorCPU* tensor = BlobGetMutableTensor(&blob, CPU); \ tensor->Resize(0, 3); \ tensor->mutable_data(); \ string serialized = SerializeBlob(blob, "test"); \ @@ -650,7 +650,7 @@ TEST(TensorDeathTest, CannotCastDownLargeDims) { EXPECT_EQ(tensor_proto.field_name##_size(), 0); \ Blob new_blob; \ EXPECT_NO_THROW(DeserializeBlob(serialized, &new_blob)); \ - EXPECT_TRUE(new_blob.IsTensorType(CPU)); \ + EXPECT_TRUE(BlobIsTensorType(new_blob, CPU)); \ const TensorCPU& new_tensor = blob.Get(); \ EXPECT_EQ(new_tensor.ndim(), 2); \ EXPECT_EQ(new_tensor.dim(0), 0); \ @@ -669,7 +669,7 @@ TEST_SERIALIZATION_WITH_TYPE(int64_t, int64_data) TEST(TensorTest, TensorSerialization_CustomType) { Blob blob; - TensorCPU* tensor = blob.GetMutableTensor(CPU); + TensorCPU* tensor = BlobGetMutableTensor(&blob, CPU); tensor->Resize(2, 3); for (int i = 0; i < 6; ++i) { tensor->mutable_data()[i].val = i; @@ -681,7 +681,7 @@ TEST(TensorTest, TensorSerialization_CustomType) { EXPECT_EQ(proto.type(), "Tensor"); Blob new_blob; EXPECT_NO_THROW(DeserializeBlob(serialized, &new_blob)); - EXPECT_TRUE(new_blob.IsTensorType(CPU)); + EXPECT_TRUE(BlobIsTensorType(new_blob, CPU)); const TensorCPU& new_tensor = blob.Get(); EXPECT_EQ(new_tensor.ndim(), 2); EXPECT_EQ(new_tensor.dim(0), 2); @@ -696,7 +696,7 @@ TEST(TensorTest, TensorSerialization_CustomType) { TEST(TensorTest, Half) { const int64_t kSize = 3000000; Blob blob; - TensorCPU* tensor = blob.GetMutableTensor(CPU); + TensorCPU* tensor = BlobGetMutableTensor(&blob, CPU); tensor->Resize(kSize); for (int i = 0; i < tensor->size(); ++i) { tensor->mutable_data()[i].x = i % 10000; @@ -724,7 +724,7 @@ TEST(TensorTest, Half) { } Blob new_blob; EXPECT_NO_THROW(DeserializeBlob(serialized, &new_blob)); - EXPECT_TRUE(new_blob.IsTensorType(CPU)); + EXPECT_TRUE(BlobIsTensorType(new_blob, CPU)); const TensorCPU& new_tensor = blob.Get(); EXPECT_EQ(new_tensor.ndim(), 1); EXPECT_EQ(new_tensor.dim(0), kSize); @@ -860,7 +860,7 @@ TYPED_TEST(TypedTensorTest, BigTensorSerialization) { { VLOG(1) << "Test begin"; Blob blob; - Tensor* tensor = blob.GetMutableTensor(CPU); + Tensor* tensor = BlobGetMutableTensor(&blob, CPU); VLOG(1) << "Allocating blob"; tensor->Resize(d1, d2); auto mutableData = tensor->mutable_data(); @@ -903,7 +903,7 @@ TYPED_TEST(TypedTensorTest, BigTensorSerialization) { load_op->Run(); VLOG(1) << "Reading blob from workspace"; auto new_blob = ws.GetBlob("test"); - EXPECT_TRUE(new_blob->IsTensorType(CPU)); + EXPECT_TRUE(BlobIsTensorType(*new_blob, CPU)); const auto& new_tensor = new_blob->Get(); EXPECT_EQ(new_tensor.ndim(), d1); @@ -1030,7 +1030,7 @@ TEST(CustomChunkSize, BigTensorSerialization) { int64_t size = d1 * d2; Blob blob; - TensorCPU* tensor = blob.GetMutableTensor(CPU); + TensorCPU* tensor = BlobGetMutableTensor(&blob, CPU); tensor->Resize(d1, d2); tensor->mutable_data(); std::mutex mutex; diff --git a/caffe2/core/common.h b/caffe2/core/common.h index 048d634df80df..93bbf341b5061 100644 --- a/caffe2/core/common.h +++ b/caffe2/core/common.h @@ -26,7 +26,7 @@ // is automatically generated by the cmake script during build. #include "caffe2/core/macros.h" -#include "ATen/core/Macros.h" +#include "c10/macros/Macros.h" namespace caffe2 { @@ -94,48 +94,6 @@ using std::vector; #define CAFFE2_NORETURN __attribute__((noreturn)) #endif -// Defines CAFFE2_EXPORT and CAFFE2_IMPORT. On Windows, this corresponds to -// different declarations (dllexport and dllimport). On Linux/Mac, it just -// resolves to the same "default visibility" setting. -#if defined(_MSC_VER) -#if defined(CAFFE2_BUILD_SHARED_LIBS) -#define CAFFE2_EXPORT __declspec(dllexport) -#define CAFFE2_IMPORT __declspec(dllimport) -#else -#define CAFFE2_EXPORT -#define CAFFE2_IMPORT -#endif -#else -#if defined(__GNUC__) -#define CAFFE2_EXPORT __attribute__((__visibility__("default"))) -#else -#define CAFFE2_EXPORT -#endif -#define CAFFE2_IMPORT CAFFE2_EXPORT -#endif - -// CAFFE2_API is a macro that, depends on whether you are building the -// main caffe2 library or not, resolves to either CAFFE2_EXPORT or -// CAFFE2_IMPORT. -// -// This is used in e.g. Caffe2's protobuf files: when building the main library, -// it is defined as CAFFE2_EXPORT to fix a Windows global-variable-in-dll -// issue, and for anyone dependent on Caffe2 it will be defined as -// CAFFE2_IMPORT. - -#ifdef CAFFE2_BUILD_MAIN_LIB -#define CAFFE2_API CAFFE2_EXPORT -#else -#define CAFFE2_API CAFFE2_IMPORT -#endif - -#ifdef CAFFE2_BUILD_OBSERVER_LIB -#define CAFFE2_OBSERVER_API CAFFE2_EXPORT -#else -#define CAFFE2_OBSERVER_API CAFFE2_IMPORT -#endif - - #if defined(_MSC_VER) #define NOMINMAX #endif diff --git a/caffe2/core/common_cudnn.h b/caffe2/core/common_cudnn.h index 5332026eedb0c..c0961c4c6411a 100644 --- a/caffe2/core/common_cudnn.h +++ b/caffe2/core/common_cudnn.h @@ -258,7 +258,7 @@ class cudnnTensorDescWrapper { cudnnTensorFormat_t format_; cudnnDataType_t type_; vector dims_; - AT_DISABLE_COPY_AND_ASSIGN(cudnnTensorDescWrapper); + C10_DISABLE_COPY_AND_ASSIGN(cudnnTensorDescWrapper); }; class cudnnFilterDescWrapper { @@ -312,7 +312,7 @@ class cudnnFilterDescWrapper { StorageOrder order_; cudnnDataType_t type_; vector dims_; - AT_DISABLE_COPY_AND_ASSIGN(cudnnFilterDescWrapper); + C10_DISABLE_COPY_AND_ASSIGN(cudnnFilterDescWrapper); }; diff --git a/caffe2/core/cudnn_wrappers.h b/caffe2/core/cudnn_wrappers.h index b518914e50402..1bd39fa62a399 100644 --- a/caffe2/core/cudnn_wrappers.h +++ b/caffe2/core/cudnn_wrappers.h @@ -89,7 +89,7 @@ class CuDNNState { cudaStream_t stream_{nullptr}; CuDNNWorkspace workspace_; size_t gpu_id_{0}; - AT_DISABLE_COPY_AND_ASSIGN(CuDNNState); + C10_DISABLE_COPY_AND_ASSIGN(CuDNNState); }; /** @@ -153,7 +153,7 @@ class CuDNNWrapper { CAFFE2_COMPILE_TIME_MAX_GPUS>; static PerGPUCuDNNStates& cudnn_states(); - AT_DISABLE_COPY_AND_ASSIGN(CuDNNWrapper); + C10_DISABLE_COPY_AND_ASSIGN(CuDNNWrapper); }; }; // namespace caffe2 diff --git a/caffe2/core/db.cc b/caffe2/core/db.cc index 386787b51c353..720c2dcaa46de 100644 --- a/caffe2/core/db.cc +++ b/caffe2/core/db.cc @@ -119,7 +119,7 @@ class MiniDBTransaction : public Transaction { FILE* file_; std::lock_guard lock_; - AT_DISABLE_COPY_AND_ASSIGN(MiniDBTransaction); + C10_DISABLE_COPY_AND_ASSIGN(MiniDBTransaction); }; class MiniDB : public DB { diff --git a/caffe2/core/db.h b/caffe2/core/db.h index 06b74d11bd585..39f8b6f3f02b0 100644 --- a/caffe2/core/db.h +++ b/caffe2/core/db.h @@ -52,7 +52,7 @@ class CAFFE2_API Cursor { */ virtual bool Valid() = 0; - AT_DISABLE_COPY_AND_ASSIGN(Cursor); + C10_DISABLE_COPY_AND_ASSIGN(Cursor); }; /** @@ -71,7 +71,7 @@ class CAFFE2_API Transaction { */ virtual void Commit() = 0; - AT_DISABLE_COPY_AND_ASSIGN(Transaction); + C10_DISABLE_COPY_AND_ASSIGN(Transaction); }; /** @@ -99,7 +99,7 @@ class CAFFE2_API DB { protected: Mode mode_; - AT_DISABLE_COPY_AND_ASSIGN(DB); + C10_DISABLE_COPY_AND_ASSIGN(DB); }; // Database classes are registered by their names so we can do optional @@ -285,7 +285,7 @@ class CAFFE2_API DBReader { uint32_t num_shards_; uint32_t shard_id_; - AT_DISABLE_COPY_AND_ASSIGN(DBReader); + C10_DISABLE_COPY_AND_ASSIGN(DBReader); }; class CAFFE2_API DBReaderSerializer : public BlobSerializerBase { diff --git a/caffe2/core/dispatch/KernelRegistration.h b/caffe2/core/dispatch/KernelRegistration.h index 9ebc20b7ab0a6..619cef616222b 100644 --- a/caffe2/core/dispatch/KernelRegistration.h +++ b/caffe2/core/dispatch/KernelRegistration.h @@ -57,7 +57,7 @@ class KernelRegistrar final { const typename Schema::dispatch::dispatch_key_type dispatch_key_; bool owns_registration_; - AT_DISABLE_COPY_AND_ASSIGN(KernelRegistrar); + C10_DISABLE_COPY_AND_ASSIGN(KernelRegistrar); }; /** diff --git a/caffe2/core/flags.cc b/caffe2/core/flags.cc index e7c19efde21b3..a84d298466dc0 100644 --- a/caffe2/core/flags.cc +++ b/caffe2/core/flags.cc @@ -9,7 +9,7 @@ namespace caffe2 { #ifdef CAFFE2_USE_GFLAGS -CAFFE2_EXPORT void SetUsageMessage(const string& str) { +C10_EXPORT void SetUsageMessage(const string& str) { if (UsageMessage() != nullptr) { // Usage message has already been set, so we will simply return. return; @@ -17,16 +17,16 @@ CAFFE2_EXPORT void SetUsageMessage(const string& str) { gflags::SetUsageMessage(str); } -CAFFE2_EXPORT const char* UsageMessage() { +C10_EXPORT const char* UsageMessage() { return gflags::ProgramUsage(); } -CAFFE2_EXPORT bool ParseCaffeCommandLineFlags(int* pargc, char*** pargv) { +C10_EXPORT bool ParseCaffeCommandLineFlags(int* pargc, char*** pargv) { if (*pargc == 0) return true; return gflags::ParseCommandLineFlags(pargc, pargv, true); } -CAFFE2_EXPORT bool CommandLineFlagsHasBeenParsed() { +C10_EXPORT bool CommandLineFlagsHasBeenParsed() { // There is no way we query gflags right now, so we will simply return true. return true; } @@ -48,11 +48,14 @@ std::stringstream& GlobalInitStream() { static string gUsageMessage = "(Usage message not set.)"; } +C10_EXPORT void SetUsageMessage(const string& str) { + gUsageMessage = str; +} +C10_EXPORT const char* UsageMessage() { + return gUsageMessage.c_str(); +} -CAFFE2_EXPORT void SetUsageMessage(const string& str) { gUsageMessage = str; } -CAFFE2_EXPORT const char* UsageMessage() { return gUsageMessage.c_str(); } - -CAFFE2_EXPORT bool ParseCaffeCommandLineFlags(int* pargc, char*** pargv) { +C10_EXPORT bool ParseCaffeCommandLineFlags(int* pargc, char*** pargv) { if (*pargc == 0) return true; char** argv = *pargv; bool success = true; @@ -136,18 +139,22 @@ CAFFE2_EXPORT bool ParseCaffeCommandLineFlags(int* pargc, char*** pargv) { return success; } -CAFFE2_EXPORT bool CommandLineFlagsHasBeenParsed() { +C10_EXPORT bool CommandLineFlagsHasBeenParsed() { return gCommandLineFlagsParsed; } template <> -CAFFE2_EXPORT bool Caffe2FlagParser::Parse(const string& content, string* value) { +C10_EXPORT bool Caffe2FlagParser::Parse( + const string& content, + string* value) { *value = content; return true; } template <> -CAFFE2_EXPORT bool Caffe2FlagParser::Parse(const string& content, int* value) { +C10_EXPORT bool Caffe2FlagParser::Parse( + const string& content, + int* value) { try { *value = std::atoi(content.c_str()); return true; @@ -159,7 +166,9 @@ CAFFE2_EXPORT bool Caffe2FlagParser::Parse(const string& content, int* valu } template <> -CAFFE2_EXPORT bool Caffe2FlagParser::Parse(const string& content, int64_t* value) { +C10_EXPORT bool Caffe2FlagParser::Parse( + const string& content, + int64_t* value) { try { static_assert(sizeof(long long) == sizeof(int64_t), ""); #ifdef __ANDROID__ @@ -177,7 +186,9 @@ CAFFE2_EXPORT bool Caffe2FlagParser::Parse(const string& content, int64 } template <> -CAFFE2_EXPORT bool Caffe2FlagParser::Parse(const string& content, double* value) { +C10_EXPORT bool Caffe2FlagParser::Parse( + const string& content, + double* value) { try { *value = std::atof(content.c_str()); return true; @@ -190,7 +201,9 @@ CAFFE2_EXPORT bool Caffe2FlagParser::Parse(const string& content, double } template <> -CAFFE2_EXPORT bool Caffe2FlagParser::Parse(const string& content, bool* value) { +C10_EXPORT bool Caffe2FlagParser::Parse( + const string& content, + bool* value) { if (content == "false" || content == "False" || content == "FALSE" || content == "0") { *value = false; diff --git a/caffe2/core/flags.h b/caffe2/core/flags.h index 2226b66af56fd..4e39c7bdebf13 100644 --- a/caffe2/core/flags.h +++ b/caffe2/core/flags.h @@ -79,14 +79,14 @@ namespace gflags = google; // (3) Gflags has a design issue that does not properly expose the global flags, // if one builds the library with -fvisibility=hidden. The current gflags (as of // Aug 2018) only deals with the Windows case using dllexport, and not the Linux -// counterparts. As a result, we will explciitly use CAFFE2_EXPORT to export the +// counterparts. As a result, we will explciitly use C10_EXPORT to export the // flags defined in Caffe2. This is done via a global reference, so the flag // itself is not duplicated - under the hood it is the same global gflags flag. -#define CAFFE2_GFLAGS_DEF_WRAPPER( \ - type, real_type, name, default_value, help_str) \ - DEFINE_##type(name, default_value, help_str); \ - namespace caffe2 { \ - CAFFE2_EXPORT real_type& FLAGS_##name = ::FLAGS_##name; \ +#define CAFFE2_GFLAGS_DEF_WRAPPER( \ + type, real_type, name, default_value, help_str) \ + DEFINE_##type(name, default_value, help_str); \ + namespace caffe2 { \ + C10_EXPORT real_type& FLAGS_##name = ::FLAGS_##name; \ } #define CAFFE2_DEFINE_int(name, default_value, help_str) \ @@ -102,11 +102,11 @@ namespace gflags = google; string, ::fLS::clstring, name, default_value, help_str) // DECLARE_typed_var should be used in header files and in the global namespace. -#define CAFFE2_GFLAGS_DECLARE_WRAPPER(type, real_type, name) \ - DECLARE_##type(name); \ - namespace caffe2 { \ - CAFFE2_IMPORT extern real_type& FLAGS_##name; \ - } // namespace caffe2 +#define CAFFE2_GFLAGS_DECLARE_WRAPPER(type, real_type, name) \ + DECLARE_##type(name); \ + namespace caffe2 { \ + C10_IMPORT extern real_type& FLAGS_##name; \ + } // namespace caffe2 #define CAFFE2_DECLARE_int(name) \ CAFFE2_GFLAGS_DECLARE_WRAPPER(int32, gflags::int32, name) @@ -150,22 +150,22 @@ CAFFE_DECLARE_REGISTRY(Caffe2FlagsRegistry, Caffe2FlagParser, const string&); // write the CAFFE2_DEFINE_* and CAFFE2_DECLARE_* macros outside any namespace // as well. -#define CAFFE2_DEFINE_typed_var(type, name, default_value, help_str) \ - namespace caffe2 { \ - CAFFE2_EXPORT type FLAGS_##name = default_value; \ - namespace { \ - class Caffe2FlagParser_##name : public Caffe2FlagParser { \ - public: \ - explicit Caffe2FlagParser_##name(const string& content) { \ - success_ = Caffe2FlagParser::Parse(content, &FLAGS_##name); \ - } \ - }; \ - } \ - RegistererCaffe2FlagsRegistry g_Caffe2FlagsRegistry_##name( \ - #name, \ - Caffe2FlagsRegistry(), \ - RegistererCaffe2FlagsRegistry::DefaultCreator, \ - "(" #type ", default " #default_value ") " help_str); \ +#define CAFFE2_DEFINE_typed_var(type, name, default_value, help_str) \ + namespace caffe2 { \ + C10_EXPORT type FLAGS_##name = default_value; \ + namespace { \ + class Caffe2FlagParser_##name : public Caffe2FlagParser { \ + public: \ + explicit Caffe2FlagParser_##name(const string& content) { \ + success_ = Caffe2FlagParser::Parse(content, &FLAGS_##name); \ + } \ + }; \ + } \ + RegistererCaffe2FlagsRegistry g_Caffe2FlagsRegistry_##name( \ + #name, \ + Caffe2FlagsRegistry(), \ + RegistererCaffe2FlagsRegistry::DefaultCreator, \ + "(" #type ", default " #default_value ") " help_str); \ } #define CAFFE2_DEFINE_int(name, default_value, help_str) \ @@ -180,9 +180,9 @@ CAFFE_DECLARE_REGISTRY(Caffe2FlagsRegistry, Caffe2FlagParser, const string&); CAFFE2_DEFINE_typed_var(string, name, default_value, help_str) // DECLARE_typed_var should be used in header files and in the global namespace. -#define CAFFE2_DECLARE_typed_var(type, name) \ - namespace caffe2 { \ - CAFFE2_IMPORT extern type FLAGS_##name; \ +#define CAFFE2_DECLARE_typed_var(type, name) \ + namespace caffe2 { \ + C10_IMPORT extern type FLAGS_##name; \ } // namespace caffe2 #define CAFFE2_DECLARE_int(name) CAFFE2_DECLARE_typed_var(int, name) diff --git a/caffe2/core/hip/common_miopen.h b/caffe2/core/hip/common_miopen.h index 59fa0f429f8ac..ecdf376e47490 100644 --- a/caffe2/core/hip/common_miopen.h +++ b/caffe2/core/hip/common_miopen.h @@ -164,7 +164,7 @@ class miopenTensorDescWrapper miopenTensorDescriptor_t desc_; miopenDataType_t type_; vector dims_; - AT_DISABLE_COPY_AND_ASSIGN(miopenTensorDescWrapper); + C10_DISABLE_COPY_AND_ASSIGN(miopenTensorDescWrapper); }; } // namespace caffe2 diff --git a/caffe2/core/hip/miopen_wrapper.h b/caffe2/core/hip/miopen_wrapper.h index 910db8b79d788..328c7522258d9 100644 --- a/caffe2/core/hip/miopen_wrapper.h +++ b/caffe2/core/hip/miopen_wrapper.h @@ -92,7 +92,7 @@ class MIOPENState hipStream_t stream_{nullptr}; MIOPENWorkspace workspace_; size_t gpu_id_{0}; - AT_DISABLE_COPY_AND_ASSIGN(MIOPENState); + C10_DISABLE_COPY_AND_ASSIGN(MIOPENState); }; /** @@ -157,7 +157,7 @@ class MIOPENWrapper CAFFE2_COMPILE_TIME_MAX_HIP_GPUS>; static PerGPUMIOPENStates& miopen_states(); - AT_DISABLE_COPY_AND_ASSIGN(MIOPENWrapper); + C10_DISABLE_COPY_AND_ASSIGN(MIOPENWrapper); }; }; // namespace caffe2 diff --git a/caffe2/core/hip/net_async_dag_hip.cc b/caffe2/core/hip/net_async_dag_hip.cc index fa35b2a8c2161..faac5b119f576 100644 --- a/caffe2/core/hip/net_async_dag_hip.cc +++ b/caffe2/core/hip/net_async_dag_hip.cc @@ -58,7 +58,7 @@ class ProfiledRange ProfiledRange(const OperatorDef& def, Color color) {} private: - AT_DISABLE_COPY_AND_ASSIGN(ProfiledRange); + C10_DISABLE_COPY_AND_ASSIGN(ProfiledRange); }; } // namespace diff --git a/caffe2/core/logging.h b/caffe2/core/logging.h index 37fcd939c4d61..288c34afd5dbe 100644 --- a/caffe2/core/logging.h +++ b/caffe2/core/logging.h @@ -8,6 +8,7 @@ #include #include +#include "caffe2/core/common.h" #include "caffe2/core/flags.h" // CAFFE2_LOG_THRESHOLD is a compile time flag that would allow us to turn off diff --git a/caffe2/core/macros.h.in b/caffe2/core/macros.h.in index a055de0fdc254..188853296f816 100644 --- a/caffe2/core/macros.h.in +++ b/caffe2/core/macros.h.in @@ -11,7 +11,6 @@ #define CAFFE2_VERSION_MAJOR @CAFFE2_VERSION_MAJOR@ #define CAFFE2_VERSION_MINOR @CAFFE2_VERSION_MINOR@ #define CAFFE2_VERSION_PATCH @CAFFE2_VERSION_PATCH@ -#define CAFFE2_GIT_VERSION "@CAFFE2_GIT_VERSION@" static_assert( CAFFE2_VERSION_MINOR < 100, @@ -54,7 +53,6 @@ static_assert( // Useful build settings that are recorded in the compiled binary #define CAFFE2_BUILD_STRINGS { \ - {"GIT_VERSION", "${CAFFE2_GIT_VERSION}"}, \ {"CXX_FLAGS", "${CMAKE_CXX_FLAGS}"}, \ {"BUILD_TYPE", "${CMAKE_BUILD_TYPE}"}, \ {"BLAS", "${BLAS}"}, \ diff --git a/caffe2/core/net.h b/caffe2/core/net.h index 962363ad0270e..57fd53f1de4f1 100644 --- a/caffe2/core/net.h +++ b/caffe2/core/net.h @@ -124,7 +124,7 @@ class CAFFE2_API NetBase : public Observable { string name_; vector events_; std::shared_ptr net_def_; - AT_DISABLE_COPY_AND_ASSIGN(NetBase); + C10_DISABLE_COPY_AND_ASSIGN(NetBase); }; class CAFFE2_API ExecutorHelper { diff --git a/caffe2/core/net_async_base.h b/caffe2/core/net_async_base.h index 7edec76c439a9..502233e7f045b 100644 --- a/caffe2/core/net_async_base.h +++ b/caffe2/core/net_async_base.h @@ -125,7 +125,7 @@ class CAFFE2_API AsyncNetBase : public NetBase { bool use_per_net_pools_; bool is_blocking_; - AT_DISABLE_COPY_AND_ASSIGN(AsyncNetBase); + C10_DISABLE_COPY_AND_ASSIGN(AsyncNetBase); private: void storeExceptionPtr(); diff --git a/caffe2/core/net_async_dag_gpu.cc b/caffe2/core/net_async_dag_gpu.cc index 225337d1452b9..550a760826edd 100644 --- a/caffe2/core/net_async_dag_gpu.cc +++ b/caffe2/core/net_async_dag_gpu.cc @@ -71,7 +71,7 @@ class ProfiledRange { private: nvtxRangeId_t range_ = 0; - AT_DISABLE_COPY_AND_ASSIGN(ProfiledRange); + C10_DISABLE_COPY_AND_ASSIGN(ProfiledRange); }; #else @@ -81,7 +81,7 @@ class ProfiledRange { ProfiledRange(const OperatorDef& def, Color color) {} private: - AT_DISABLE_COPY_AND_ASSIGN(ProfiledRange); + C10_DISABLE_COPY_AND_ASSIGN(ProfiledRange); }; #endif // ifdef CAFFE2_USE_NVTX diff --git a/caffe2/core/net_async_dag_gpu.h b/caffe2/core/net_async_dag_gpu.h index 62ae301e4cbf2..845e5160d27b9 100644 --- a/caffe2/core/net_async_dag_gpu.h +++ b/caffe2/core/net_async_dag_gpu.h @@ -32,7 +32,7 @@ class AsyncDAGNet : public DAGNetBase { int stream(const DeviceOption& device_option); static thread_local std::vector stream_counters_; - AT_DISABLE_COPY_AND_ASSIGN(AsyncDAGNet); + C10_DISABLE_COPY_AND_ASSIGN(AsyncDAGNet); }; } // namespace caffe2 diff --git a/caffe2/core/net_async_polling.h b/caffe2/core/net_async_polling.h index 8b3d6db8d695e..9c4a284f0d13a 100644 --- a/caffe2/core/net_async_polling.h +++ b/caffe2/core/net_async_polling.h @@ -40,7 +40,7 @@ class AsyncPollingNet : public AsyncNetBase { void reset() override; std::atomic has_chain_failed_; - AT_DISABLE_COPY_AND_ASSIGN(AsyncPollingNet); + C10_DISABLE_COPY_AND_ASSIGN(AsyncPollingNet); }; } // namespace caffe2 diff --git a/caffe2/core/net_async_scheduling.h b/caffe2/core/net_async_scheduling.h index 8576fca1bb07f..4fcdf4b731681 100644 --- a/caffe2/core/net_async_scheduling.h +++ b/caffe2/core/net_async_scheduling.h @@ -30,7 +30,7 @@ class CAFFE2_API AsyncSchedulingNet : public AsyncNetBase { std::atomic processed_tasks_num_; - AT_DISABLE_COPY_AND_ASSIGN(AsyncSchedulingNet); + C10_DISABLE_COPY_AND_ASSIGN(AsyncSchedulingNet); }; } // namespace caffe2 diff --git a/caffe2/core/net_dag.h b/caffe2/core/net_dag.h index 078fa63a4238a..ab3ce0f6f3fa1 100644 --- a/caffe2/core/net_dag.h +++ b/caffe2/core/net_dag.h @@ -84,7 +84,7 @@ class CAFFE2_API DAGNetBase : public NetBase { mutable std::vector stats_; std::unordered_map> task_timers_; - AT_DISABLE_COPY_AND_ASSIGN(DAGNetBase); + C10_DISABLE_COPY_AND_ASSIGN(DAGNetBase); }; class CAFFE2_API DAGNet : public DAGNetBase { diff --git a/caffe2/core/net_simple.h b/caffe2/core/net_simple.h index a8ac751dbb5ed..c114fd8d224f2 100644 --- a/caffe2/core/net_simple.h +++ b/caffe2/core/net_simple.h @@ -48,7 +48,7 @@ class CAFFE2_API SimpleNet : public NetBase { vector> operators_; - AT_DISABLE_COPY_AND_ASSIGN(SimpleNet); + C10_DISABLE_COPY_AND_ASSIGN(SimpleNet); }; } // namespace caffe2 diff --git a/caffe2/core/net_simple_async.h b/caffe2/core/net_simple_async.h index 38c3255bf4df3..ea5aae959870f 100644 --- a/caffe2/core/net_simple_async.h +++ b/caffe2/core/net_simple_async.h @@ -43,7 +43,7 @@ class AsyncSimpleNet : public NetBase { vector> operators_; - AT_DISABLE_COPY_AND_ASSIGN(AsyncSimpleNet); + C10_DISABLE_COPY_AND_ASSIGN(AsyncSimpleNet); }; } // namespace caffe2 diff --git a/caffe2/core/nomnigraph/include/nomnigraph/Representations/NeuralNet.h b/caffe2/core/nomnigraph/include/nomnigraph/Representations/NeuralNet.h index 2a03e428619b3..e7a889980365c 100644 --- a/caffe2/core/nomnigraph/include/nomnigraph/Representations/NeuralNet.h +++ b/caffe2/core/nomnigraph/include/nomnigraph/Representations/NeuralNet.h @@ -259,7 +259,7 @@ template using enable_if_t = typename std::enable_if::type; template -struct CAFFE2_EXPORT inheritedFrom { +struct C10_EXPORT inheritedFrom { static constexpr bool value = std::is_base_of::value && !std::is_same::value; }; @@ -267,14 +267,15 @@ struct CAFFE2_EXPORT inheritedFrom { // This is just a way to fix issues when the isa<> implementation // can't automatically downcast. template -struct CAFFE2_EXPORT is_impl { +struct C10_EXPORT is_impl { inline static bool impl(N n) { return isa(n->data()); } }; template -struct CAFFE2_EXPORT is_impl::value>> { +struct C10_EXPORT + is_impl::value>> { inline static bool impl(N n) { if (!isa(n->data().get())) { return false; @@ -285,7 +286,8 @@ struct CAFFE2_EXPORT is_impl -struct CAFFE2_EXPORT is_impl::value>> { +struct C10_EXPORT + is_impl::value>> { inline static bool impl(N n) { if (!isa(n->data().get())) { return false; @@ -303,14 +305,15 @@ inline bool is(N n) { // This is just a way to fix issues when the dyn_cast<> implementation // can't automatically downcast. template -struct CAFFE2_EXPORT get_impl { +struct C10_EXPORT get_impl { inline static T* impl(N n) { return dyn_cast(n->data().get()); } }; template -struct CAFFE2_EXPORT get_impl::value>> { +struct C10_EXPORT + get_impl::value>> { inline static T* impl(N n) { if (!is(n)) { assert(0 && "Cannot get type from node"); @@ -322,7 +325,8 @@ struct CAFFE2_EXPORT get_impl -struct CAFFE2_EXPORT get_impl::value>> { +struct C10_EXPORT + get_impl::value>> { inline static T* impl(N n) { if (!is(n)) { assert(0 && "Cannot get type from node"); @@ -422,7 +426,7 @@ CAFFE2_API std::vector getOutputs(NNGraph::NodeRef n); CAFFE2_API void coalesceInsertedDataDependencies(repr::NNModule* m); template -struct CAFFE2_EXPORT NodeHelper {}; +struct C10_EXPORT NodeHelper {}; struct NNNodeMatchCriteria { std::function predicate; diff --git a/caffe2/core/observer.h b/caffe2/core/observer.h index e10ab0bb7eac6..378a7569d37bb 100644 --- a/caffe2/core/observer.h +++ b/caffe2/core/observer.h @@ -51,7 +51,7 @@ class Observable { virtual ~Observable() = default; - AT_DISABLE_COPY_AND_ASSIGN(Observable); + C10_DISABLE_COPY_AND_ASSIGN(Observable); using Observer = ObserverBase; diff --git a/caffe2/core/operator.h b/caffe2/core/operator.h index 25aa801d265db..1a968c4c3755f 100644 --- a/caffe2/core/operator.h +++ b/caffe2/core/operator.h @@ -122,7 +122,7 @@ class CAFFE2_API OperatorBase : public Observable { static_assert( std::is_same::value, "Output(int, DeviceType) is only available for Tensor"); - return outputs_.at(idx)->GetMutableTensor(type); + return BlobGetMutableTensor(outputs_.at(idx), type); } template @@ -149,7 +149,7 @@ class CAFFE2_API OperatorBase : public Observable { } inline bool InputIsTensorType(int idx, DeviceType device_type) { - return inputs_.at(idx)->IsTensorType(device_type); + return BlobIsTensorType(*inputs_.at(idx), device_type); } template @@ -162,7 +162,7 @@ class CAFFE2_API OperatorBase : public Observable { } inline bool OutputIsTensorType(int idx, DeviceType type) { - return outputs_.at(idx)->IsTensorType(type); + return BlobIsTensorType(*outputs_.at(idx), type); } inline int InputSize() const { @@ -397,7 +397,7 @@ class CAFFE2_API OperatorBase : public Observable { // An event used by asynchronous execution. std::unique_ptr event_; - AT_DISABLE_COPY_AND_ASSIGN(OperatorBase); + C10_DISABLE_COPY_AND_ASSIGN(OperatorBase); }; // If your operator does not need any specialized contructor or destructor, @@ -825,7 +825,7 @@ CAFFE_DECLARE_REGISTRY( #define REGISTER_CPU_OPERATOR_CREATOR(key, ...) \ CAFFE_REGISTER_CREATOR(CPUOperatorRegistry, key, __VA_ARGS__) #define REGISTER_CPU_OPERATOR(name, ...) \ - CAFFE2_IMPORT void CAFFE2_PLEASE_ADD_OPERATOR_SCHEMA_FOR_##name();\ + C10_IMPORT void CAFFE2_PLEASE_ADD_OPERATOR_SCHEMA_FOR_##name(); \ static void CAFFE2_UNUSED CAFFE_ANONYMOUS_VARIABLE_CPU##name() { \ CAFFE2_PLEASE_ADD_OPERATOR_SCHEMA_FOR_##name(); \ } \ @@ -844,7 +844,7 @@ CAFFE_DECLARE_REGISTRY( #define REGISTER_CUDA_OPERATOR_CREATOR(key, ...) \ CAFFE_REGISTER_CREATOR(CUDAOperatorRegistry, key, __VA_ARGS__) #define REGISTER_CUDA_OPERATOR(name, ...) \ - CAFFE2_IMPORT void CAFFE2_PLEASE_ADD_OPERATOR_SCHEMA_FOR_##name(); \ + C10_IMPORT void CAFFE2_PLEASE_ADD_OPERATOR_SCHEMA_FOR_##name(); \ static void CAFFE2_UNUSED CAFFE_ANONYMOUS_VARIABLE_CUDA##name() { \ CAFFE2_PLEASE_ADD_OPERATOR_SCHEMA_FOR_##name(); \ } \ @@ -869,10 +869,10 @@ CAFFE_DECLARE_REGISTRY( #define REGISTER_HIP_OPERATOR_CREATOR(key, ...) \ CAFFE_REGISTER_CREATOR(HIPOperatorRegistry, key, __VA_ARGS__) #define REGISTER_HIP_OPERATOR(name, ...) \ - CAFFE2_IMPORT void CAFFE2_PLEASE_ADD_OPERATOR_SCHEMA_FOR_##name(); \ + C10_IMPORT void CAFFE2_PLEASE_ADD_OPERATOR_SCHEMA_FOR_##name(); \ static void CAFFE2_UNUSED CAFFE_ANONYMOUS_VARIABLE_HIP##name() { \ - CAFFE2_PLEASE_ADD_OPERATOR_SCHEMA_FOR_##name(); \ - } \ + CAFFE2_PLEASE_ADD_OPERATOR_SCHEMA_FOR_##name(); \ + } \ CAFFE_REGISTER_CLASS(HIPOperatorRegistry, name, __VA_ARGS__) #define REGISTER_HIP_OPERATOR_STR(str_name, ...) \ CAFFE_REGISTER_TYPED_CLASS(HIPOperatorRegistry, str_name, __VA_ARGS__) diff --git a/caffe2/core/operator_schema.cc b/caffe2/core/operator_schema.cc index a76a0df9bd004..3082810b85cde 100644 --- a/caffe2/core/operator_schema.cc +++ b/caffe2/core/operator_schema.cc @@ -415,7 +415,7 @@ std::vector OpSchema::SupplyDenseFillers( return fillers; } -CAFFE2_EXPORT std::ostream& operator<<(std::ostream& out, const OpSchema& schema) { +C10_EXPORT std::ostream& operator<<(std::ostream& out, const OpSchema& schema) { if (!schema.args().empty()) { out << "Arguments:" << std::endl; for (const auto& arg : schema.args()) { diff --git a/caffe2/core/operator_schema.h b/caffe2/core/operator_schema.h index e0b6495647ebd..54a6a17b8a0d2 100644 --- a/caffe2/core/operator_schema.h +++ b/caffe2/core/operator_schema.h @@ -576,16 +576,16 @@ OpSchema::Cost PointwiseCostInference( #ifndef CAFFE2_NO_OPERATOR_SCHEMA -#define OPERATOR_SCHEMA(name) \ - CAFFE2_EXPORT void CAFFE2_PLEASE_ADD_OPERATOR_SCHEMA_FOR_##name(){}; \ - static OpSchema* CAFFE_ANONYMOUS_VARIABLE(name) CAFFE2_UNUSED = \ +#define OPERATOR_SCHEMA(name) \ + C10_EXPORT void CAFFE2_PLEASE_ADD_OPERATOR_SCHEMA_FOR_##name(){}; \ + static OpSchema* CAFFE_ANONYMOUS_VARIABLE(name) CAFFE2_UNUSED = \ &OpSchemaRegistry::NewSchema(#name, __FILE__, __LINE__) #else // CAFFE2_NO_OPERATOR_SCHEMA -#define OPERATOR_SCHEMA(name) \ - CAFFE2_EXPORT void CAFFE2_PLEASE_ADD_OPERATOR_SCHEMA_FOR_##name(){}; \ - static OpSchema* CAFFE_ANONYMOUS_VARIABLE(name) CAFFE2_UNUSED = \ +#define OPERATOR_SCHEMA(name) \ + C10_EXPORT void CAFFE2_PLEASE_ADD_OPERATOR_SCHEMA_FOR_##name(){}; \ + static OpSchema* CAFFE_ANONYMOUS_VARIABLE(name) CAFFE2_UNUSED = \ 1 ? nullptr : &OpSchemaRegistry::NewSchema(#name, __FILE__, __LINE__) #endif // CAFFE2_NO_OPERATOR_SCHEMA diff --git a/caffe2/core/plan_executor.cc b/caffe2/core/plan_executor.cc index 2c0ad9e7a8127..8e48b6b7beabc 100644 --- a/caffe2/core/plan_executor.cc +++ b/caffe2/core/plan_executor.cc @@ -131,7 +131,8 @@ struct WorkspaceIdInjector { "Integer overflow while calculating GLOBAL_WORKSPACE_ID blob"); int32_t global_ws_id = (seq_++) + (static_cast(node_id) << 16); Blob* global_ws_id_blob = workspace->CreateLocalBlob(GLOBAL_WORKSPACE_ID); - TensorCPU* global_ws_id_tensor = global_ws_id_blob->GetMutableTensor(CPU); + TensorCPU* global_ws_id_tensor = + BlobGetMutableTensor(global_ws_id_blob, CPU); global_ws_id_tensor->Resize(); global_ws_id_tensor->template mutable_data()[0] = global_ws_id; VLOG(1) << "Adding " << GLOBAL_WORKSPACE_ID << " = " << global_ws_id; diff --git a/caffe2/core/qtensor.h b/caffe2/core/qtensor.h index f277ffdbdd0a6..385ebf1d5f9f8 100644 --- a/caffe2/core/qtensor.h +++ b/caffe2/core/qtensor.h @@ -14,7 +14,7 @@ namespace caffe2 { template -class CAFFE2_EXPORT QTensor { +class C10_EXPORT QTensor { public: QTensor() {} virtual ~QTensor() {} diff --git a/caffe2/core/registry.h b/caffe2/core/registry.h index 7db975077ea8b..f026795b23c3e 100644 --- a/caffe2/core/registry.h +++ b/caffe2/core/registry.h @@ -100,7 +100,7 @@ class Registry { CaffeMap help_message_; std::mutex register_mutex_; - AT_DISABLE_COPY_AND_ASSIGN(Registry); + C10_DISABLE_COPY_AND_ASSIGN(Registry); }; template @@ -142,16 +142,16 @@ class Registerer { * declaration, as well as creating a convenient typename for its corresponding * registerer. */ -#define CAFFE_DECLARE_TYPED_REGISTRY( \ - RegistryName, SrcType, ObjectType, PtrType, ...) \ - CAFFE2_EXPORT Registry, ##__VA_ARGS__>* \ - RegistryName(); \ - typedef Registerer, ##__VA_ARGS__> \ +#define CAFFE_DECLARE_TYPED_REGISTRY( \ + RegistryName, SrcType, ObjectType, PtrType, ...) \ + C10_EXPORT Registry, ##__VA_ARGS__>* \ + RegistryName(); \ + typedef Registerer, ##__VA_ARGS__> \ Registerer##RegistryName; #define CAFFE_DEFINE_TYPED_REGISTRY( \ RegistryName, SrcType, ObjectType, PtrType, ...) \ - CAFFE2_EXPORT Registry, ##__VA_ARGS__>* \ + C10_EXPORT Registry, ##__VA_ARGS__>* \ RegistryName() { \ static Registry, ##__VA_ARGS__>* registry = \ new Registry, ##__VA_ARGS__>(); \ diff --git a/caffe2/core/tensor.cc b/caffe2/core/tensor.cc index e142e1a6b6a90..caa0ba9ea55f4 100644 --- a/caffe2/core/tensor.cc +++ b/caffe2/core/tensor.cc @@ -86,7 +86,7 @@ vector GetTensorInfo( CHECK(tc); CHECK(tc->unsafeGetTensorImpl()); CHECK(tc->unsafeGetTensorImpl()->storage().unsafeGetStorageImpl()); - *capacity = tc->capacity_nbytes(); + *capacity = tc->storage().capacity(); tc->ExtractDeviceOption(device); return tc->dims(); } diff --git a/caffe2/core/tensor.h b/caffe2/core/tensor.h index 286718d4268ca..1e4cac2788b56 100644 --- a/caffe2/core/tensor.h +++ b/caffe2/core/tensor.h @@ -262,10 +262,6 @@ class CAFFE2_API Tensor final { return impl_.get()->nbytes(); } - inline size_t capacity_nbytes() const { - return impl_.get()->capacity_nbytes(); - } - inline const vector& dims() const { return impl_.get()->dims(); } @@ -322,6 +318,10 @@ class CAFFE2_API Tensor final { const Storage& storage() { return impl_->storage(); } + + const Storage& storage() const { + return impl_->storage(); + } }; using TensorCPU = Tensor; diff --git a/caffe2/core/tensor_impl.h b/caffe2/core/tensor_impl.h index 20c398f7e4c82..53c812f55e297 100644 --- a/caffe2/core/tensor_impl.h +++ b/caffe2/core/tensor_impl.h @@ -693,11 +693,6 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target { ; } - // NB: This capacity may also include available space - // in the storage BEFORE the tensor data, if storage_offset != 0 - inline size_t capacity_nbytes() const { - return storage_.capacity(); - } /** * Returns the dimensions of the tensor as a vector. */ diff --git a/caffe2/core/timer.h b/caffe2/core/timer.h index a290ffc4aadc1..a0384b0dbdbd0 100644 --- a/caffe2/core/timer.h +++ b/caffe2/core/timer.h @@ -41,7 +41,7 @@ class Timer { protected: std::chrono::time_point start_time_; - AT_DISABLE_COPY_AND_ASSIGN(Timer); + C10_DISABLE_COPY_AND_ASSIGN(Timer); }; } diff --git a/caffe2/core/workspace.h b/caffe2/core/workspace.h index 11bf9c413c596..2ad486c328f56 100644 --- a/caffe2/core/workspace.h +++ b/caffe2/core/workspace.h @@ -151,7 +151,7 @@ class CAFFE2_API Workspace { auto* to_blob = CreateBlob(blob); CAFFE_ENFORCE(to_blob); const auto& from_tensor = from_blob->template Get(); - auto* to_tensor = to_blob->GetMutableTensor(Context::GetDeviceType()); + auto* to_tensor = BlobGetMutableTensor(to_blob, Context::GetDeviceType()); to_tensor->CopyFrom(from_tensor); } } @@ -328,7 +328,7 @@ class CAFFE2_API Workspace { std::mutex thread_pool_creation_mutex_; std::shared_ptr bookkeeper_; - AT_DISABLE_COPY_AND_ASSIGN(Workspace); + C10_DISABLE_COPY_AND_ASSIGN(Workspace); }; } // namespace caffe2 diff --git a/caffe2/db/create_db_op.h b/caffe2/db/create_db_op.h index ac7c137cea9aa..6a964f86d1b43 100644 --- a/caffe2/db/create_db_op.h +++ b/caffe2/db/create_db_op.h @@ -34,7 +34,7 @@ class CreateDBOp final : public Operator { string db_name_; uint32_t num_shards_; uint32_t shard_id_; - AT_DISABLE_COPY_AND_ASSIGN(CreateDBOp); + C10_DISABLE_COPY_AND_ASSIGN(CreateDBOp); }; } // namespace caffe2 diff --git a/caffe2/db/leveldb.cc b/caffe2/db/leveldb.cc index 23a188027ece7..fe2665f3a6f0e 100644 --- a/caffe2/db/leveldb.cc +++ b/caffe2/db/leveldb.cc @@ -51,7 +51,7 @@ class LevelDBTransaction : public Transaction { leveldb::DB* db_; std::unique_ptr batch_; - AT_DISABLE_COPY_AND_ASSIGN(LevelDBTransaction); + C10_DISABLE_COPY_AND_ASSIGN(LevelDBTransaction); }; class LevelDB : public DB { diff --git a/caffe2/db/lmdb.cc b/caffe2/db/lmdb.cc index 2eb65bb7aa738..a2eee9910655a 100644 --- a/caffe2/db/lmdb.cc +++ b/caffe2/db/lmdb.cc @@ -114,7 +114,7 @@ class LMDBTransaction final : public Transaction { MDB_dbi mdb_dbi_; MDB_txn* mdb_txn_; - AT_DISABLE_COPY_AND_ASSIGN(LMDBTransaction); + C10_DISABLE_COPY_AND_ASSIGN(LMDBTransaction); }; class LMDB : public DB { diff --git a/caffe2/db/protodb.cc b/caffe2/db/protodb.cc index 2473ad23b6c45..fdaaaf57f1716 100644 --- a/caffe2/db/protodb.cc +++ b/caffe2/db/protodb.cc @@ -60,7 +60,7 @@ class ProtoDBTransaction : public Transaction { TensorProtos* proto_; std::unordered_set existing_names_; - AT_DISABLE_COPY_AND_ASSIGN(ProtoDBTransaction); + C10_DISABLE_COPY_AND_ASSIGN(ProtoDBTransaction); }; class ProtoDB : public DB { diff --git a/caffe2/ideep/operators/concat_split_op.cc b/caffe2/ideep/operators/concat_split_op.cc index 8d011cd3be8bf..38ffdc9942645 100644 --- a/caffe2/ideep/operators/concat_split_op.cc +++ b/caffe2/ideep/operators/concat_split_op.cc @@ -33,8 +33,9 @@ class IDEEPConcatOp final : public IDEEPOperator { if (OperatorBase::InputBlob(i).template IsType()) { inputs.emplace_back(Input(i)); } else { - CAFFE_ENFORCE(OperatorBase::InputBlob(i).IsTensorType(CPU), - "Expect cpu tensor if not itensor"); + CAFFE_ENFORCE( + BlobIsTensorType(OperatorBase::InputBlob(i), CPU), + "Expect cpu tensor if not itensor"); auto& tensor_cpu = OperatorBase::Input(i, CPU); CAFFE_ENFORCE(tensor_cpu.dims().size() == 0 || tensor_cpu.size_from_dim(0) == 0, diff --git a/caffe2/ideep/operators/operator_fallback_ideep.h b/caffe2/ideep/operators/operator_fallback_ideep.h index 08e6de2ae3f0d..3226a08c4af9c 100644 --- a/caffe2/ideep/operators/operator_fallback_ideep.h +++ b/caffe2/ideep/operators/operator_fallback_ideep.h @@ -89,7 +89,7 @@ class IDEEPFallbackOp final : public IDEEPOperator { local_input_blobs_[i]->Reset(); } input_share_[i] = false; - auto dtensor = local_input_blobs_[i]->GetMutableTensor(CPU); + auto dtensor = BlobGetMutableTensor(local_input_blobs_[i], CPU); dtensor->Resize(input.get_dims()); if (input.is_public_format()) { dtensor->ShareExternalPointer( @@ -121,7 +121,7 @@ class IDEEPFallbackOp final : public IDEEPOperator { continue; } CAFFE_ENFORCE( - local_output_blobs_[i]->IsTensorType(CPU), + BlobIsTensorType(*local_output_blobs_[i], CPU), "IDEEP fallback op currently does not support non-TensorCPU " "output type who needs copying."); const auto& src = local_output_blobs_[i]->template Get(); @@ -153,7 +153,7 @@ class IDEEPFallbackOp final : public IDEEPOperator { VLOG(2) << "Output " << base_def_.output(i) << " as CPUTensor"; Blob* dst = OperatorBase::OutputBlob(i); dst->Reset(new Tensor(CPU)); - auto dtensor = dst->GetMutableTensor(CPU); + auto dtensor = BlobGetMutableTensor(dst, CPU); dtensor->Resize(src_dims); dtensor->ShareData(src); } diff --git a/caffe2/ideep/operators/utility_ops.cc b/caffe2/ideep/operators/utility_ops.cc index 626568a989b93..468a42df1a923 100644 --- a/caffe2/ideep/operators/utility_ops.cc +++ b/caffe2/ideep/operators/utility_ops.cc @@ -31,7 +31,7 @@ class CopyIDEEPToCPUOp final : public IDEEPOperator { USE_IDEEP_DEF_ALIASES(); bool RunOnDevice() override { const auto& input_blob = OperatorBase::InputBlob(0); - if (input_blob.IsTensorType(CPU)) { + if (BlobIsTensorType(input_blob, CPU)) { VLOG(2) << "Directing sharing of TensorCPU"; const auto& X = OperatorBase::Input(0, CPU); auto* Y = OperatorBase::Output(0, CPU); diff --git a/caffe2/mkl/operators/operator_fallback_mkl.h b/caffe2/mkl/operators/operator_fallback_mkl.h index 6d9713b74612d..a3135758813ec 100644 --- a/caffe2/mkl/operators/operator_fallback_mkl.h +++ b/caffe2/mkl/operators/operator_fallback_mkl.h @@ -66,10 +66,10 @@ class MKLFallbackOp final : public Operator { for (int i = 0; i < InputSize(); ++i) { if (OperatorBase::InputIsType>(i)) { OperatorBase::Input>(i).CopyTo( - local_input_blobs_[i]->GetMutableTensor(CPU)); + BlobGetMutableTensor(local_input_blobs_[i], CPU)); } else if (OperatorBase::InputIsType>(i)) { OperatorBase::Input>(i).CopyTo( - local_input_blobs_[i]->GetMutableTensor(CPU)); + BlobGetMutableTensor(local_input_blobs_[i], CPU)); } else { VLOG(1) << "Input " << i << " is not MKLMemory. Skipping copy."; // Note(jiayq): This removes a const but conceptually @@ -93,7 +93,7 @@ class MKLFallbackOp final : public Operator { continue; } CAFFE_ENFORCE( - local_output_blobs_[i]->IsTensorType(CPU), + BlobIsTensorType(*local_output_blobs_[i], CPU), "MKL fallback op currently does not support non-TensorCPU " "output type who needs copying."); const auto& src = local_output_blobs_[i]->template Get(); diff --git a/caffe2/mkl/utils/mkl_memory.h b/caffe2/mkl/utils/mkl_memory.h index bd0ad40422079..736d8ede8cf53 100644 --- a/caffe2/mkl/utils/mkl_memory.h +++ b/caffe2/mkl/utils/mkl_memory.h @@ -58,7 +58,7 @@ class PrimitiveWrapper { private: dnnPrimitive_t primitive_ = 0; - AT_DISABLE_COPY_AND_ASSIGN(PrimitiveWrapper); + C10_DISABLE_COPY_AND_ASSIGN(PrimitiveWrapper); }; template @@ -138,7 +138,7 @@ class LayoutWrapper { private: dnnLayout_t layout_ = 0; - AT_DISABLE_COPY_AND_ASSIGN(LayoutWrapper); + C10_DISABLE_COPY_AND_ASSIGN(LayoutWrapper); }; /** @@ -557,7 +557,7 @@ class MKLMemory { // The primitive to use to convert from internal layout to user layout PrimitiveWrapper convert_out_; - AT_DISABLE_COPY_AND_ASSIGN(MKLMemory); + C10_DISABLE_COPY_AND_ASSIGN(MKLMemory); }; template @@ -575,7 +575,7 @@ class MKLWorkspace { private: void* buffer_; - AT_DISABLE_COPY_AND_ASSIGN(MKLWorkspace); + C10_DISABLE_COPY_AND_ASSIGN(MKLWorkspace); }; } // namespace mkl diff --git a/caffe2/mobile/contrib/arm-compute/core/net_gl.h b/caffe2/mobile/contrib/arm-compute/core/net_gl.h index 029d888b1ebf9..1dc93dedc3fff 100644 --- a/caffe2/mobile/contrib/arm-compute/core/net_gl.h +++ b/caffe2/mobile/contrib/arm-compute/core/net_gl.h @@ -57,7 +57,7 @@ class GLNet : public NetBase { vector> operators_; - AT_DISABLE_COPY_AND_ASSIGN(GLNet); + C10_DISABLE_COPY_AND_ASSIGN(GLNet); }; } // namespace caffe2 diff --git a/caffe2/mobile/contrib/arm-compute/operators/copy_op.cc b/caffe2/mobile/contrib/arm-compute/operators/copy_op.cc index 111af03f8602b..06ec2b50acc17 100644 --- a/caffe2/mobile/contrib/arm-compute/operators/copy_op.cc +++ b/caffe2/mobile/contrib/arm-compute/operators/copy_op.cc @@ -43,7 +43,7 @@ bool CopyFromGLOp::RunOnDevice() { if (first_run_) { first_run_ = false; for (int i = 0; i < Inputs().size(); ++i) { - auto* Y = OperatorBase::Outputs()[i]->GetMutableTensor(CPU); + auto* Y = BlobGetMutableTensor(OperatorBase::Outputs()[i], CPU); Y->Resize(inputs_[i]->dims()); Y->template mutable_data(); } @@ -54,7 +54,7 @@ bool CopyFromGLOp::RunOnDevice() { // GLTensor auto* X = inputs_[i].get(); X->lazy_allocate(Xblob, second_run_, true); - auto* Y = OperatorBase::Outputs()[i]->GetMutableTensor(CPU); + auto* Y = BlobGetMutableTensor(OperatorBase::Outputs()[i], CPU); Timer timer; timer.Start(); getTensorCPU(*X, *Y); diff --git a/caffe2/mobile/contrib/arm-compute/test/gl_operator_test.h b/caffe2/mobile/contrib/arm-compute/test/gl_operator_test.h index daa7ef008fc7b..68f79e84a89f8 100644 --- a/caffe2/mobile/contrib/arm-compute/test/gl_operator_test.h +++ b/caffe2/mobile/contrib/arm-compute/test/gl_operator_test.h @@ -27,7 +27,7 @@ template void PopulateCPUBlob(Workspace *ws, bool random, std::string name, std::vector dims, int val = 1, int dist_shift = 0, float variance = 1) { Blob *blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(dims); T *t_data = tensor->mutable_data(); std::random_device rd; diff --git a/caffe2/mobile/contrib/ios/mpscnn/mpscnn.mm b/caffe2/mobile/contrib/ios/mpscnn/mpscnn.mm index 52f746f63f317..742f8e48f4e9e 100644 --- a/caffe2/mobile/contrib/ios/mpscnn/mpscnn.mm +++ b/caffe2/mobile/contrib/ios/mpscnn/mpscnn.mm @@ -489,13 +489,13 @@ bool RunOnDevice() override { "noise_size", 491 /* prime to avoid artifacts */); // Treaded as half4 in the kernel, so need half4 here. noiseSize = divRoundUp(noiseSize, 4) * 4; - if (!noiseBlob->IsTensorType(CPU) || + if (!BlobIsTensorType(*noiseBlob, CPU) || noiseBlob->Get().size() != noiseSize) { VLOG(2) << "Initializing stylizer with noise: " << noiseSize; caffe2::Timer rt; // Initialize random noise on first use. // Cache it to maintain temporal consistency. - auto* t = noiseBlob->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(noiseBlob, CPU); t->Resize(noiseSize); math::RandGaussian( t->size(), diff --git a/caffe2/mobile/contrib/ios/mpscnn/mpscnn_test.mm b/caffe2/mobile/contrib/ios/mpscnn/mpscnn_test.mm index 7216b16611aa2..7ac629019c58c 100644 --- a/caffe2/mobile/contrib/ios/mpscnn/mpscnn_test.mm +++ b/caffe2/mobile/contrib/ios/mpscnn/mpscnn_test.mm @@ -94,7 +94,7 @@ void testMPSCNN() { Workspace ws; for (auto i = 0; i < N; ++i) { - auto* t = ws.CreateBlob(cpu(i))->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob(cpu(i)), CPU); t->Resize(BS, C, H, W); CPUContext ctx; math::RandGaussian( @@ -152,7 +152,7 @@ void testMPSCNN() { Workspace ws; for (auto i = 0; i < N; ++i) { - auto* t = ws.CreateBlob(cpu(i))->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob(cpu(i)), CPU); switch (ndim) { case 1: t->Resize(5); @@ -210,7 +210,7 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNNormalizePlanarYUV Test: "; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(batch_size, channels, 8, 13); CPUContext ctx; math::RandGaussian( @@ -218,14 +218,14 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("mean")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("mean"), CPU); t->Resize(1, channels); CPUContext ctx; math::RandGaussian( t->size(), 0, 1, t->mutable_data(), &ctx); } { - auto* t = ws.CreateBlob("stddev")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("stddev"), CPU); t->Resize(1, channels); CPUContext ctx; math::RandUniform( @@ -290,7 +290,7 @@ void testMPSCNN() { for (const auto dim : {10, 40}) { Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(batchSize, channels, dim, dim); CPUContext ctx; // Too noisy. @@ -299,7 +299,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize(channels); CPUContext ctx; for (auto i = 0; i < t->size(); ++i) { @@ -310,7 +310,7 @@ void testMPSCNN() { // t->mutable_data(), &ctx); } { - auto* t = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(channels); CPUContext ctx; for (auto i = 0; i < t->size(); ++i) { @@ -321,7 +321,7 @@ void testMPSCNN() { // t->mutable_data(), &ctx); } { - auto* t = ws.CreateBlob("pw")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("pw"), CPU); t->Resize(prelu == PreluTy::SHARED ? 1 : channels); CPUContext ctx; // Too noisy. @@ -409,7 +409,7 @@ void testMPSCNN() { Workspace ws; const auto channels = array ? 12 : 3; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(batch_size, channels, 8, 13); CPUContext ctx; math::RandGaussian( @@ -417,7 +417,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(shared ? channels : 1); CPUContext ctx; math::RandGaussian( @@ -480,7 +480,7 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNSpatialBN Test: " << channels; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(batch_size, channels, 8, 13); CPUContext ctx; math::RandGaussian( @@ -488,7 +488,7 @@ void testMPSCNN() { } for (const std::string name : {"scale", "bias", "mean", "var"}) { - auto* t = ws.CreateBlob(name)->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob(name), CPU); t->Resize(channels); CPUContext ctx; // High mean to avoid var division by zero. @@ -575,7 +575,7 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNFC Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(batchSize, CIn, H, W); CPUContext ctx; math::RandGaussian( @@ -583,7 +583,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize(COut, CIn * H * W); CPUContext ctx; math::RandGaussian( @@ -591,7 +591,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(COut); CPUContext ctx; math::RandGaussian( @@ -682,8 +682,8 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNPool Test: " << pool; Workspace ws; { - auto* t = - ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor( + ws.CreateBlob("X_cpu"), CPU); t->Resize(batchSize, 8, 8, 13); CPUContext ctx; math::RandGaussian( @@ -784,7 +784,7 @@ void testMPSCNN() { std::vector>{{1, 3, 50, 80}, {1, 12, 50, 80}}) { Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(dims); CPUContext ctx; math::RandGaussian( @@ -860,7 +860,7 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNPreprocess Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(1, 8, 13, 4); CPUContext ctx; for (auto i = 0; i < t->size(); ++i) { @@ -869,7 +869,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("mean")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("mean"), CPU); t->Resize(3); CPUContext ctx; t->mutable_data()[0] = 100; @@ -940,7 +940,7 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNDeprocess Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(1, 3, 8, 24); CPUContext ctx; for (auto i = 0; i < t->size(); ++i) { @@ -949,7 +949,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("mean")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("mean"), CPU); t->Resize(3); CPUContext ctx; t->mutable_data()[0] = 100; @@ -999,7 +999,7 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNDeprocess Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(1, 3, 1280, 720); CPUContext ctx; for (auto i = 0; i < t->size(); ++i) { @@ -1008,7 +1008,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("mean")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("mean"), CPU); t->Resize(3); CPUContext ctx; t->mutable_data()[0] = 30; @@ -1072,7 +1072,8 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNConv Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = + BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(batchSize, 12, 57, 72); CPUContext ctx; math::RandGaussian( @@ -1080,7 +1081,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize(8, 12, kernel_h, kernel_w); CPUContext ctx; math::RandGaussian( @@ -1092,7 +1093,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(8); CPUContext ctx; math::RandGaussian( @@ -1188,7 +1189,7 @@ void testMPSCNN() { Workspace ws; int output_channels = input_channels * channel_multiplier; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(batchSize, input_channels, 57, 72); CPUContext ctx; math::RandGaussian( @@ -1196,7 +1197,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize(output_channels, 1, 3, 3); CPUContext ctx; math::RandGaussian( @@ -1204,7 +1205,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(output_channels); CPUContext ctx; math::RandGaussian( @@ -1275,7 +1276,7 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNConvRelu Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(1, 12, 57, 72); CPUContext ctx; math::RandGaussian( @@ -1283,7 +1284,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize(8, 12, 3, 3); CPUContext ctx; math::RandGaussian( @@ -1291,7 +1292,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(8); CPUContext ctx; math::RandGaussian( @@ -1385,7 +1386,7 @@ void testMPSCNN() { LOG(INFO) << "MPSConv Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(1, 12, 57, 72); CPUContext ctx; math::RandGaussian( @@ -1393,7 +1394,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize(8, 12, 3, 3); CPUContext ctx; math::RandGaussian( @@ -1401,7 +1402,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(8); CPUContext ctx; math::RandGaussian( @@ -1493,7 +1494,7 @@ void testMPSCNN() { LOG(INFO) << "MPSConv Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(batchSize, C, 12, 16); CPUContext ctx; math::RandGaussian( @@ -1501,7 +1502,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize(M, C, K, K); CPUContext ctx; math::RandGaussian( @@ -1509,7 +1510,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(M); CPUContext ctx; math::RandGaussian( @@ -1607,7 +1608,7 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNConv Test - group"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(batchSize, C, 12, 16); CPUContext ctx; math::RandGaussian( @@ -1615,7 +1616,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize(M, C / group, K, K); CPUContext ctx; math::RandGaussian( @@ -1623,7 +1624,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(M); CPUContext ctx; math::RandGaussian( @@ -1726,7 +1727,7 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNMul Test"; Workspace ws; { - auto* t = ws.CreateBlob("X0_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X0_cpu"), CPU); t->Resize(1, 12, 57, 72); CPUContext ctx; math::RandGaussian( @@ -1734,7 +1735,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("X1_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X1_cpu"), CPU); t->Resize(72); CPUContext ctx; math::RandGaussian( @@ -1791,7 +1792,7 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNSub Test"; Workspace ws; { - auto* t = ws.CreateBlob("X0_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X0_cpu"), CPU); t->Resize(1, 12, 57, 72); CPUContext ctx; math::RandGaussian( @@ -1799,7 +1800,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("X1_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X1_cpu"), CPU); t->Resize(72); CPUContext ctx; math::RandGaussian( @@ -1856,7 +1857,7 @@ void testMPSCNN() { LOG(INFO) << "MPSAdd Test"; Workspace ws; { - auto* t = ws.CreateBlob("X0_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X0_cpu"), CPU); t->Resize(1, 12, 57, 72); CPUContext ctx; math::RandGaussian( @@ -1864,7 +1865,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("X1_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X1_cpu"), CPU); t->Resize(1, 12, 57, 72); CPUContext ctx; math::RandGaussian( @@ -1921,7 +1922,7 @@ void testMPSCNN() { LOG(INFO) << "MPSAdd Test"; Workspace ws; { - auto* t = ws.CreateBlob("X0_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X0_cpu"), CPU); t->Resize(1, 12, 57, 72); CPUContext ctx; math::RandGaussian( @@ -1929,7 +1930,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("X1_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X1_cpu"), CPU); t->Resize(1, 12, 57, 72); CPUContext ctx; math::RandGaussian( @@ -2011,7 +2012,7 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNNeuron Test: " << n; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(1, 4, 12, 12); CPUContext ctx; math::RandGaussian( @@ -2065,7 +2066,7 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNDropout Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(1, 12, 57, 72); CPUContext ctx; math::RandGaussian( @@ -2136,7 +2137,7 @@ void testMPSCNN() { << " - scale: " << scale; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(1, channels, 40, 40); CPUContext ctx; math::RandGaussian( @@ -2144,7 +2145,7 @@ void testMPSCNN() { } { // Use the batch-first encoding (n, [bbox]) - auto* t = ws.CreateBlob("R")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("R"), CPU); t->Resize(6, 5); for (auto i = 0; i < t->dim32(0); ++i) { t->mutable_data()[5 * i + 0] = 0; // batch @@ -2250,14 +2251,14 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNRoIWarp Test 2"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(1, 8, 40, 40); CPUContext ctx; math::RandGaussian( t->size(), 4, 2, t->mutable_data(), &ctx); } { - auto* t = ws.CreateBlob("R")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("R"), CPU); t->Resize(6, 4); for (auto i = 0; i < t->dim32(0); ++i) { t->mutable_data()[4 * i + 0] = (i % 4 + 1) * 1.0 / scale; @@ -2362,7 +2363,7 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNResizeNearestOp Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, 37, 89); CPUContext ctx; math::RandGaussian( @@ -2497,7 +2498,7 @@ void testMPSCNN() { vector im_info{60, 80, 0.166667}; vector anchors{-38, -16, 53, 31, -120, -120, 135, 135}; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(num_images, A, H, W); for (auto i = 0; i < t->size(); ++i) { t->mutable_data()[i] = scores[i]; @@ -2505,7 +2506,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("bbox_delta_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("bbox_delta_cpu"), CPU); t->Resize(num_images, 4 * A, H, W); for (auto i = 0; i < t->size(); ++i) { t->mutable_data()[i] = bbx[i]; @@ -2513,7 +2514,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("im_info")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("im_info"), CPU); t->Resize(num_images, 3); for (auto i = 0; i < t->size(); ++i) { t->mutable_data()[i] = im_info[i]; @@ -2521,7 +2522,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("anchors")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("anchors"), CPU); t->Resize(A, 4); for (auto i = 0; i < t->size(); ++i) { t->mutable_data()[i] = anchors[i]; @@ -2587,7 +2588,7 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNSoftmax Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); // Only works for spatial dimension of (1, 1) - weird. t->Resize(batchSize, 12, 1, 1); CPUContext ctx; @@ -2661,8 +2662,8 @@ void testMPSCNN() { LOG(INFO) << "MPSConvTranspose Test"; Workspace ws; { - auto* t = - ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor( + ws.CreateBlob("X_cpu"), CPU); t->Resize(batchSize, inputChannels, 8, 12); CPUContext ctx; math::RandGaussian( @@ -2675,7 +2676,7 @@ void testMPSCNN() { { auto* t = - ws.CreateBlob("W")->GetMutableTensor(CPU); + BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize( inputChannels, outputChannels, @@ -2692,7 +2693,7 @@ void testMPSCNN() { { auto* t = - ws.CreateBlob("b")->GetMutableTensor(CPU); + BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(outputChannels); CPUContext ctx; math::RandGaussian( @@ -2809,7 +2810,7 @@ void testMPSCNN() { << batchSize; Workspace ws; for (auto i = 0; i < numInputs; ++i) { - auto* t = ws.CreateBlob(cpu(i))->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob(cpu(i)), CPU); t->Resize(batchSize, array ? (i + 1) * 4 : 4, 10, 10); CPUContext ctx; math::RandGaussian( @@ -2891,7 +2892,7 @@ void testMPSCNN() { } Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(batchSize, inputChannels, 53, 47); CPUContext ctx; math::RandGaussian( @@ -2964,7 +2965,7 @@ void testMPSCNN() { << numInputs << ", " << batchSize; Workspace ws; for (auto i = 0; i < numInputs; ++i) { - auto* t = ws.CreateBlob(cpu(i))->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob(cpu(i)), CPU); t->Resize(batchSize, channelCount, 9, 17); CPUContext ctx; math::RandGaussian( @@ -3336,8 +3337,8 @@ void compareModels(const NetDef& initNet, NetDef predictNet) { Workspace cws; cws.RunNetOnce(initNet); { - auto* t = - cws.CreateBlob(predictNet.external_input(0))->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor( + cws.CreateBlob(predictNet.external_input(0)), CPU); t->Resize(1, 224, 224, 4); for (auto i = 0; i < t->size(); ++i) { t->mutable_data()[i] = i % 225; @@ -3348,8 +3349,8 @@ void compareModels(const NetDef& initNet, NetDef predictNet) { Workspace mws; mws.RunNetOnce(initNet); { - auto* t = - mws.CreateBlob(predictNet.external_input(0))->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor( + mws.CreateBlob(predictNet.external_input(0)), CPU); t->Resize(1, 224, 224, 4); for (auto i = 0; i < t->size(); ++i) { t->mutable_data()[i] = i % 225; @@ -3397,16 +3398,16 @@ void verifyRewrite( dumpDef(predictNet); dumpDef(metalPredictNet); -#define RUN_NET(ws, predictNet) \ - ws.RunNetOnce(initNet); \ - { \ - auto* t = \ - ws.CreateBlob(predictNet.external_input(0))->GetMutableTensor(CPU); \ - t->Resize(inputDims); \ - CPUContext ctx; \ - math::RandGaussian( \ - t->size(), 0, 1, t->mutable_data(), &ctx); \ - } \ +#define RUN_NET(ws, predictNet) \ + ws.RunNetOnce(initNet); \ + { \ + auto* t = BlobGetMutableTensor( \ + ws.CreateBlob(predictNet.external_input(0)), CPU); \ + t->Resize(inputDims); \ + CPUContext ctx; \ + math::RandGaussian( \ + t->size(), 0, 1, t->mutable_data(), &ctx); \ + } \ ws.RunNetOnce(predictNet); // initialize diff --git a/caffe2/mobile/contrib/ios/pool_test.cc b/caffe2/mobile/contrib/ios/pool_test.cc index 47fd405eef01e..3f78c5d1fcd6a 100644 --- a/caffe2/mobile/contrib/ios/pool_test.cc +++ b/caffe2/mobile/contrib/ios/pool_test.cc @@ -16,7 +16,7 @@ void AddNoiseInput(const vector& shape, const string& name, Workspace* DeviceOption option; CPUContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(shape); math::RandGaussian( diff --git a/caffe2/mobile/contrib/ios/resize_test.cc b/caffe2/mobile/contrib/ios/resize_test.cc index 1c08df0f32a1c..428c395fe442d 100644 --- a/caffe2/mobile/contrib/ios/resize_test.cc +++ b/caffe2/mobile/contrib/ios/resize_test.cc @@ -16,7 +16,7 @@ void AddNoiseInput(const vector& shape, const string& name, Workspace* DeviceOption option; CPUContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(shape); math::RandGaussian( diff --git a/caffe2/mobile/contrib/nnapi/nnapi.cc b/caffe2/mobile/contrib/nnapi/nnapi.cc index 45ea26c44cc96..56f1fc28986a7 100644 --- a/caffe2/mobile/contrib/nnapi/nnapi.cc +++ b/caffe2/mobile/contrib/nnapi/nnapi.cc @@ -679,7 +679,7 @@ void NNApi::init(const TensorVector& inputs, TensorVector* outputs) { output_dims.push_back(dim); } - auto* tensor = ws_.CreateBlob(blob)->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(ws_.CreateBlob(blob), CPU); tensor->Resize(output_dims); outputs->push_back(tensor); diff --git a/caffe2/mobile/contrib/nnapi/nnapi_benchmark.cc b/caffe2/mobile/contrib/nnapi/nnapi_benchmark.cc index 359e7767746b6..c14e9ed26376e 100644 --- a/caffe2/mobile/contrib/nnapi/nnapi_benchmark.cc +++ b/caffe2/mobile/contrib/nnapi/nnapi_benchmark.cc @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + #include "caffe2/core/init.h" #include "caffe2/core/operator.h" #include "caffe2/core/tensor.h" @@ -43,14 +43,14 @@ static double benchmark_conv_caffe2( ws = &localWs; } { - auto* t = ws->CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws->CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; math::RandGaussian( t->size(), 0, 30, t->mutable_data(), &ctx); } { - auto* t = ws->CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws->CreateBlob("W"), CPU); if (group == 1) { t->Resize(K, C, kernel, kernel); } else { @@ -61,7 +61,7 @@ static double benchmark_conv_caffe2( t->size(), 0, 30, t->mutable_data(), &ctx); } { - auto* t = ws->CreateBlob("B")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws->CreateBlob("B"), CPU); t->Resize(K); CPUContext ctx; math::RandGaussian( @@ -129,14 +129,14 @@ static double benchmark_conv_nnapi( ws = &localWs; } { - auto* t = ws->CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws->CreateBlob("X_cpu"), CPU); t->Resize(N, H, W, C); CPUContext ctx; math::RandGaussian( t->size(), 0, 30, t->mutable_data(), &ctx); } { - auto* t = ws->CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws->CreateBlob("W"), CPU); if (group > 1) { CAFFE_ENFORCE_EQ(C, group); t->Resize(1, kernel, kernel, C); @@ -148,7 +148,7 @@ static double benchmark_conv_nnapi( t->size(), 0, 30, t->mutable_data(), &ctx); } { - auto* t = ws->CreateBlob("B")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws->CreateBlob("B"), CPU); t->Resize(K); CPUContext ctx; math::RandGaussian( @@ -190,7 +190,7 @@ static double benchmark_conv_nnapi( NetDef initNet; NNApi model(initNet, netdef, ws); std::vector inputs, outputs; - inputs.push_back(ws->GetBlob("X_cpu")->GetMutableTensor(CPU)); + inputs.push_back(BlobGetMutableTensor(ws->GetBlob("X_cpu"), CPU)); CAFFE_ENFORCE(model.run(inputs, &outputs)); for (int i = 0; i < warmup; i++) { @@ -220,14 +220,14 @@ static double benchmark_conv_nnapi_int8( ws = &localWs; } { - auto* t = ws->CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws->CreateBlob("X_cpu"), CPU); t->Resize(N, H, W, C); for (int i = 0; i < t->size(); i++) { t->mutable_data()[i] = rand() % 10; } } { - auto* t = ws->CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws->CreateBlob("W"), CPU); if (group > 1) { CAFFE_ENFORCE_EQ(C, group); t->Resize(1, kernel, kernel, C); @@ -243,7 +243,7 @@ static double benchmark_conv_nnapi_int8( // should be of ANEURALNETWORKS_TENSOR_INT32, with zeroPoint of 0 and // bias_scale == input_scale * filter_scale. { - auto* t = ws->CreateBlob("B")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws->CreateBlob("B"), CPU); t->Resize(K); for (int i = 0; i < t->size(); i++) { t->mutable_data()[i] = rand() % 10; @@ -322,7 +322,7 @@ static double benchmark_conv_nnapi_int8( NetDef initNet; NNApi model(initNet, netdef, ws); std::vector inputs, outputs; - inputs.push_back(ws->GetBlob("X_cpu")->GetMutableTensor(CPU)); + inputs.push_back(BlobGetMutableTensor(ws->GetBlob("X_cpu"), CPU)); CAFFE_ENFORCE(model.run(inputs, &outputs)); for (int i = 0; i < warmup; i++) { diff --git a/caffe2/mobile/contrib/nnapi/nnapi_test.cc b/caffe2/mobile/contrib/nnapi/nnapi_test.cc index deab1ca7b43f7..9b4608dc07aee 100644 --- a/caffe2/mobile/contrib/nnapi/nnapi_test.cc +++ b/caffe2/mobile/contrib/nnapi/nnapi_test.cc @@ -55,7 +55,7 @@ static void test_relu(int N, int C, int H, int W) { // CPU reference Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, H, W, C); CPUContext ctx; math::RandGaussian( @@ -81,7 +81,7 @@ static void test_relu(int N, int C, int H, int W) { NetDef initNet; NNApi model(initNet, netdef, &ws); std::vector inputs, outputs; - inputs.push_back(ws.GetBlob("X_cpu")->GetMutableTensor(CPU)); + inputs.push_back(BlobGetMutableTensor(ws.GetBlob("X_cpu"), CPU)); EXPECT_TRUE(model.run(inputs, &outputs)); const auto& t_nn = *outputs[0]; @@ -103,21 +103,21 @@ static void test_conv_NHWC( int stride_w) { Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, H, W, C); CPUContext ctx; math::RandGaussian( t->size(), 0, 30, t->mutable_data(), &ctx); } { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize(K, kernel, kernel, C); CPUContext ctx; math::RandGaussian( t->size(), 0, 30, t->mutable_data(), &ctx); } { - auto* t = ws.CreateBlob("B")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("B"), CPU); t->Resize(K); CPUContext ctx; math::RandGaussian( @@ -189,7 +189,7 @@ static void test_conv_NHWC( NetDef initNet; NNApi model(initNet, netdef, &ws); std::vector inputs, outputs; - inputs.push_back(ws.GetBlob("X_cpu")->GetMutableTensor(CPU)); + inputs.push_back(BlobGetMutableTensor(ws.GetBlob("X_cpu"), CPU)); EXPECT_TRUE(model.run(inputs, &outputs)); const auto& t_nn = *outputs[0]; @@ -211,21 +211,21 @@ static void test_depthwise_conv_NHWC( int stride_w) { Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, H, W, C); CPUContext ctx; math::RandGaussian( t->size(), 0, 30, t->mutable_data(), &ctx); } { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize(1, kernel, kernel, D); CPUContext ctx; math::RandGaussian( t->size(), 0, 30, t->mutable_data(), &ctx); } { - auto* t = ws.CreateBlob("B")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("B"), CPU); t->Resize(D); CPUContext ctx; math::RandGaussian( @@ -406,7 +406,7 @@ static void test_depthwise_conv_NHWC( NetDef initNet; NNApi model(initNet, netdef, &ws); std::vector inputs, outputs; - inputs.push_back(ws.GetBlob("X_cpu")->GetMutableTensor(CPU)); + inputs.push_back(BlobGetMutableTensor(ws.GetBlob("X_cpu"), CPU)); EXPECT_TRUE(model.run(inputs, &outputs)); const auto& t_nn = *outputs[0]; @@ -428,7 +428,7 @@ static void test_pooling( int stride_w) { Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, H, W, C); CPUContext ctx; math::RandGaussian( @@ -496,7 +496,7 @@ static void test_pooling( NetDef initNet; NNApi model(initNet, netdef, &ws); std::vector inputs, outputs; - inputs.push_back(ws.GetBlob("X_cpu")->GetMutableTensor(CPU)); + inputs.push_back(BlobGetMutableTensor(ws.GetBlob("X_cpu"), CPU)); EXPECT_TRUE(model.run(inputs, &outputs)); const auto& t_nn = *outputs[0]; @@ -506,7 +506,7 @@ static void test_pooling( static void test_softmax(int N, int C, int H = 1, int W = 1) { Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); if (H == 1 && W == 1) { t->Resize(N, C); } else { @@ -538,7 +538,7 @@ static void test_softmax(int N, int C, int H = 1, int W = 1) { NetDef initNet; NNApi model(initNet, netdef, &ws); std::vector inputs, outputs; - inputs.push_back(ws.GetBlob("X_cpu")->GetMutableTensor(CPU)); + inputs.push_back(BlobGetMutableTensor(ws.GetBlob("X_cpu"), CPU)); EXPECT_TRUE(model.run(inputs, &outputs)); const auto& t_nn = *outputs[0]; diff --git a/caffe2/mobile/contrib/opengl/test/opengl_test.cc b/caffe2/mobile/contrib/opengl/test/opengl_test.cc index 49a875184c10d..690a33cb854f1 100644 --- a/caffe2/mobile/contrib/opengl/test/opengl_test.cc +++ b/caffe2/mobile/contrib/opengl/test/opengl_test.cc @@ -178,7 +178,7 @@ void testOpenGLCopyOps(int N, int C, int H, int W, float error, int tile_x = 1, LOG(INFO) << "OPENGLCopyFrom/To Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; math::RandGaussian(t->size(), 0, 1, t->mutable_data(), &ctx); @@ -275,7 +275,7 @@ void testOpenGLConv(int N, << " Op: " << glPoolOperationName[poolOp]; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; if (random_input) { @@ -301,7 +301,7 @@ void testOpenGLConv(int N, } if (poolOp != AveragePool && poolOp != MaxPool) { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); if (poolOp != ConvTranspose && poolOp != ConvTransposePRelu && poolOp != ConvTransposeRelu) { t->Resize(K, C, kernel_h, kernel_w); } else { @@ -343,7 +343,7 @@ void testOpenGLConv(int N, // bias { - auto* t = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(K); CPUContext ctx; if (random_input) { @@ -367,7 +367,7 @@ void testOpenGLConv(int N, } if (poolOp == ConvPRelu || poolOp == ConvTransposePRelu) { - auto* t = ws.CreateBlob("p")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("p"), CPU); t->Resize(K); CPUContext ctx; if (random_input) { @@ -532,7 +532,7 @@ void testOpenGLPRelu( << "C: " << C << ", H: " << H << ", W: " << W; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; // Too noisy. @@ -541,7 +541,7 @@ void testOpenGLPRelu( // prelu scale { - auto* t = ws.CreateBlob("p")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("p"), CPU); t->Resize(prelu_size); CPUContext ctx; math::RandGaussian(t->size(), 0, 1, t->mutable_data(), &ctx); @@ -603,7 +603,7 @@ void testOpenGLRelu(int N, int C, int H, int W, int input_tile_x, int input_tile << "C: " << C << ", H: " << H << ", W: " << W; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; // Too noisy. @@ -664,13 +664,13 @@ void testOpenGLAdd(int N, int C, int H, int W, float error = 0.1, int input_tile << "C: " << C << ", H: " << H << ", W: " << W; Workspace ws; { - auto* t0 = ws.CreateBlob("X_cpu0")->GetMutableTensor(CPU); + auto* t0 = BlobGetMutableTensor(ws.CreateBlob("X_cpu0"), CPU); t0->Resize(N, C, H, W); CPUContext ctx0; // Too noisy. math::RandGaussian(t0->size(), 0, 30, t0->mutable_data(), &ctx0); - auto* t1 = ws.CreateBlob("X_cpu1")->GetMutableTensor(CPU); + auto* t1 = BlobGetMutableTensor(ws.CreateBlob("X_cpu1"), CPU); t1->Resize(N, C, H, W); CPUContext ctx1; // Too noisy. @@ -750,13 +750,13 @@ void testOpenGLSub(int N, int C, int H, int W, float error = 0.1) { Workspace ws; { - auto* t0 = ws.CreateBlob("X_cpu0")->GetMutableTensor(CPU); + auto* t0 = BlobGetMutableTensor(ws.CreateBlob("X_cpu0"), CPU); t0->Resize(N, C, H, W); CPUContext ctx0; // Too noisy. math::RandGaussian(t0->size(), 0, 30, t0->mutable_data(), &ctx0); - auto* t1 = ws.CreateBlob("X_cpu1")->GetMutableTensor(CPU); + auto* t1 = BlobGetMutableTensor(ws.CreateBlob("X_cpu1"), CPU); t1->Resize(N, C, H, W); CPUContext ctx1; // Too noisy. @@ -814,8 +814,8 @@ void testOpenGLConcat(int N, std::vector Cs, int H, int W, bool tiling = fa << "H: " << H << ", W: " << W; Workspace ws; for (int i = 0; i < Cs.size(); i++) { - auto* t = - ws.CreateBlob("X_cpu" + caffe2::to_string(i))->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor( + ws.CreateBlob("X_cpu" + caffe2::to_string(i)), CPU); t->Resize(N, Cs[i], H, W); CPUContext ctx0; // Too noisy. @@ -891,7 +891,7 @@ void testOpenGLSigmoid(int N, int C, int H, int W, float error) { << "C: " << C << ", H: " << H << ", W: " << W; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; // Too noisy. @@ -942,7 +942,7 @@ void testOpenGLTanh(int N, int C, int H, int W, float error) { << "C: " << C << ", H: " << H << ", W: " << W; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; math::RandGaussian(t->size(), 0, 2, t->mutable_data(), &ctx); @@ -992,14 +992,14 @@ void testOpenGLMul(int N, int C, int H, int W, float error) { << "C: " << C << ", H: " << H << ", W: " << W; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; math::RandGaussian(t->size(), -10, 10, t->mutable_data(), &ctx); } { - auto* t = ws.CreateBlob("B")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("B"), CPU); t->Resize(1); CPUContext ctx; math::RandGaussian(t->size(), -10, 10, t->mutable_data(), &ctx); @@ -1060,7 +1060,7 @@ void testOpenGLSoftmax(int N, int D, float error, bool tiled = false) { LOG(INFO) << "OpenGL Softmax Test " << "N: " << N << " D: " << D << " Tiled:" << tiled; Workspace ws; - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); { t->Resize(N, D); CPUContext ctx; @@ -1151,7 +1151,7 @@ void testOpenGLInstanceNorm(int N, int C, int H, int W, float error) { << "C: " << C << ", H: " << H << ", W: " << W; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; // Too noisy. @@ -1163,7 +1163,7 @@ void testOpenGLInstanceNorm(int N, int C, int H, int W, float error) { // scale { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize(C); CPUContext ctx; for (auto i = 0; i < t->size(); ++i) { @@ -1172,7 +1172,7 @@ void testOpenGLInstanceNorm(int N, int C, int H, int W, float error) { } // bias { - auto* t = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(C); CPUContext ctx; for (auto i = 0; i < t->size(); ++i) { @@ -1254,7 +1254,7 @@ void testOpenGLInstanceNormPRelu(int N, int C, int H, int W, float error) { << "C: " << C << ", H: " << H << ", W: " << W; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; // Too noisy. @@ -1266,7 +1266,7 @@ void testOpenGLInstanceNormPRelu(int N, int C, int H, int W, float error) { // scale { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize(C); CPUContext ctx; for (auto i = 0; i < t->size(); ++i) { @@ -1275,7 +1275,7 @@ void testOpenGLInstanceNormPRelu(int N, int C, int H, int W, float error) { } // bias { - auto* t = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(C); CPUContext ctx; for (auto i = 0; i < t->size(); ++i) { @@ -1284,7 +1284,7 @@ void testOpenGLInstanceNormPRelu(int N, int C, int H, int W, float error) { } // prelu scale { - auto* t = ws.CreateBlob("p")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("p"), CPU); t->Resize(C); CPUContext ctx; math::RandGaussian(t->size(), 0, 1, t->mutable_data(), &ctx); @@ -1385,7 +1385,7 @@ void OpenGL_speedtest(int N, << " C: " << C << " H: " << H << " W: " << W; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; if (random_input) { @@ -1399,7 +1399,7 @@ void OpenGL_speedtest(int N, } { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize(K, C, kernel_h, kernel_w); CPUContext ctx; if (random_input) { @@ -1413,7 +1413,7 @@ void OpenGL_speedtest(int N, } { - auto* t = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(K); CPUContext ctx; if (random_input) { @@ -1479,7 +1479,7 @@ void testOpenGLPadImage( { Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; math::RandGaussian(t->size(), 0, 1, t->mutable_data(), &ctx); @@ -1593,7 +1593,7 @@ void testOpenGLResize(int N, { Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; math::RandGaussian(t->size(), 0, 1, t->mutable_data(), &ctx); @@ -1675,7 +1675,7 @@ void testOpenGLPreprocess(int N, int C, int H, int W, float error) { LOG(INFO) << "OpenGL Preprocess Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, H, W, C); CPUContext ctx; for (auto i = 0; i < t->size(); ++i) { @@ -1684,7 +1684,7 @@ void testOpenGLPreprocess(int N, int C, int H, int W, float error) { } { - auto* t = ws.CreateBlob("mean")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("mean"), CPU); t->Resize(3); CPUContext ctx; t->mutable_data()[0] = 100; @@ -1748,7 +1748,7 @@ void testOpenGLDeprocess(int N, int C, int H, int W, float error) { LOG(INFO) << "OpenGLDeprocess Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; for (auto i = 0; i < t->size(); ++i) { @@ -1757,7 +1757,7 @@ void testOpenGLDeprocess(int N, int C, int H, int W, float error) { } { - auto* t = ws.CreateBlob("mean")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("mean"), CPU); t->Resize(3); CPUContext ctx; t->mutable_data()[0] = 30; @@ -1800,7 +1800,7 @@ void testOpenGLNormPlanarYUV(int N, int C, int H, int W, float error) { LOG(INFO) << "OpenGLNormPlanarYUV Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, 3, H, W); CPUContext ctx; for (auto i = 0; i < t->size(); ++i) { @@ -1809,7 +1809,7 @@ void testOpenGLNormPlanarYUV(int N, int C, int H, int W, float error) { } { - auto* t = ws.CreateBlob("mean")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("mean"), CPU); t->Resize(1, 3); CPUContext ctx; t->mutable_data()[0] = 30; @@ -1818,7 +1818,7 @@ void testOpenGLNormPlanarYUV(int N, int C, int H, int W, float error) { } { - auto* t = ws.CreateBlob("stdev")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("stdev"), CPU); t->Resize(1, 3); CPUContext ctx; t->mutable_data()[0] = 6; @@ -1879,7 +1879,7 @@ void OpenGL_copyops_speedtest(int N, LOG(INFO) << "OpenGL CopyOps Speed Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; if (random_input) { @@ -1893,7 +1893,7 @@ void OpenGL_copyops_speedtest(int N, } { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize(K, C, kernel_h, kernel_w); CPUContext ctx; if (random_input) { @@ -1907,7 +1907,7 @@ void OpenGL_copyops_speedtest(int N, } { - auto* t = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(K); CPUContext ctx; if (random_input) { @@ -1990,8 +1990,8 @@ void compareModelsForOpenGL(std::string name, Workspace cws; cws.RunNetOnce(initNet); - auto* t_cpu = cws.CreateBlob(truncatedPredictNet.external_input(0)) - ->GetMutableTensor(CPU); + auto* t_cpu = BlobGetMutableTensor( + cws.CreateBlob(truncatedPredictNet.external_input(0)), CPU); if (name == "styleTransfer") { CAFFE_ENFORCE_EQ(input_order, "NHWC"); CAFFE_ENFORCE_EQ(input_type, "uint8_t"); @@ -2032,26 +2032,26 @@ void compareModelsForOpenGL(std::string name, Workspace mws; mws.RunNetOnce(initNet); - auto* t_gl = mws.CreateBlob(truncatedOpenGLPredictNet.external_input(0)) - ->GetMutableTensor(CPU); + auto* t_gl = BlobGetMutableTensor( + mws.CreateBlob(truncatedOpenGLPredictNet.external_input(0)), CPU); if (name == "styleTransfer") { CAFFE_ENFORCE_EQ(input_order, "NHWC"); CAFFE_ENFORCE_EQ(input_type, "uint8_t"); t_gl->Resize(1, height, width, channel); uint8_t* input = t_gl->mutable_data(); - memcpy(input, t_cpu->mutable_data(), t_cpu->capacity_nbytes()); + memcpy(input, t_cpu->mutable_data(), t_cpu->storage().capacity()); } else if (name == "segmentation") { CAFFE_ENFORCE_EQ(input_order, "NCHW"); CAFFE_ENFORCE_EQ(input_type, "float"); t_gl->Resize(1, channel, height, width); float* input = t_gl->mutable_data(); - memcpy(input, t_cpu->mutable_data(), t_cpu->capacity_nbytes()); + memcpy(input, t_cpu->mutable_data(), t_cpu->storage().capacity()); } else if (name == "denoiser") { CAFFE_ENFORCE_EQ(input_order, "NCHW"); CAFFE_ENFORCE_EQ(input_type, "float"); t_gl->Resize(1, channel, height, width); float* input = t_gl->mutable_data(); - memcpy(input, t_cpu->mutable_data(), t_cpu->capacity_nbytes()); + memcpy(input, t_cpu->mutable_data(), t_cpu->storage().capacity()); } cws.RunNetOnce(truncatedPredictNet); @@ -2116,7 +2116,7 @@ void compareBatchedToTiledModels(std::string name, tws.RunNetOnce(initNet); auto* t_batch = - tws.CreateBlob(bachedNet.external_input(0))->GetMutableTensor(CPU); + BlobGetMutableTensor(tws.CreateBlob(bachedNet.external_input(0)), CPU); if (name == "styleTransfer") { CAFFE_ENFORCE_EQ(input_order, "NHWC"); CAFFE_ENFORCE_EQ(input_type, "uint8_t"); @@ -2143,20 +2143,20 @@ void compareBatchedToTiledModels(std::string name, bws.RunNetOnce(initNet); auto* t_tiling = - bws.CreateBlob(tiledNet.external_input(0))->GetMutableTensor(CPU); + BlobGetMutableTensor(bws.CreateBlob(tiledNet.external_input(0)), CPU); if (name == "styleTransfer") { CAFFE_ENFORCE_EQ(input_order, "NHWC"); CAFFE_ENFORCE_EQ(input_type, "uint8_t"); t_tiling->Resize(1, height, width, channel); uint8_t* input = t_tiling->mutable_data(); - memcpy(input, t_batch->mutable_data(), t_batch->capacity_nbytes()); + memcpy(input, t_batch->mutable_data(), t_batch->storage().capacity()); } else if (name == "segmentation") { CAFFE_ENFORCE_EQ(input_order, "NCHW"); CAFFE_ENFORCE_EQ(input_type, "float"); t_tiling->Resize(1, channel, height, width); float* input = t_tiling->mutable_data(); - memcpy(input, t_batch->mutable_data(), t_batch->capacity_nbytes()); + memcpy(input, t_batch->mutable_data(), t_batch->storage().capacity()); } bws.RunNetOnce(bachedNet); diff --git a/caffe2/mobile/contrib/snpe/snpe_op_benchmark.cc b/caffe2/mobile/contrib/snpe/snpe_op_benchmark.cc index deced71964496..cfeed00e8b973 100644 --- a/caffe2/mobile/contrib/snpe/snpe_op_benchmark.cc +++ b/caffe2/mobile/contrib/snpe/snpe_op_benchmark.cc @@ -14,7 +14,7 @@ #define POPULATE_DATA(_n, _s, _l) \ do { \ Blob* _blob = ws.CreateBlob((_n)); \ - auto* _tensor = _blob->GetMutableTensor(CPU); \ + auto* _tensor = BlobGetMutableTensor(_blob, CPU); \ _tensor->Resize((_s)); \ memcpy(_tensor->mutable_data(), data_##_l, _tensor->nbytes()); \ } while (0) @@ -23,7 +23,7 @@ #define POPULATE_DATA(_n, _s, _l) \ do { \ Blob* _blob = ws.CreateBlob((_n)); \ - auto* _tensor = _blob->GetMutableTensor(CPU); \ + auto* _tensor = BlobGetMutableTensor(_blob, CPU); \ _tensor->Resize((_s)); \ memset(_tensor->mutable_data(), 1, _tensor->nbytes()); \ } while (0) @@ -43,7 +43,7 @@ void AddConstInput(const vector& shape, DeviceOption option; CPUContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(shape); math::Set(tensor->size(), value, tensor->mutable_data(), @@ -56,7 +56,7 @@ void AddNoiseInput(const vector& shape, DeviceOption option; CPUContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(shape); math::RandGaussian( diff --git a/caffe2/mobile/contrib/ulp2/ulp_test.cc b/caffe2/mobile/contrib/ulp2/ulp_test.cc index a1c1af0f6dfb8..6316b05284fba 100644 --- a/caffe2/mobile/contrib/ulp2/ulp_test.cc +++ b/caffe2/mobile/contrib/ulp2/ulp_test.cc @@ -289,13 +289,13 @@ void ConvTest2b1b(int IC, int KH, int KW, int H, int W, int OC, int N, ConvArgs def.add_arg()->CopyFrom(MakeArgument("pad_r", args.pad_r)); def.add_arg()->CopyFrom(MakeArgument("pad_t", args.pad_t)); def.add_arg()->CopyFrom(MakeArgument("pad_b", args.pad_b)); - auto* Xws = ws.CreateBlob("X")->GetMutableTensor(CPU); + auto* Xws = BlobGetMutableTensor(ws.CreateBlob("X"), CPU); Xws->ResizeLike(X); Xws->ShareExternalPointer(X.mutable_data(), X.size()); - auto* Wws = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* Wws = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); Wws->ResizeLike(W_); Wws->ShareExternalPointer(W_.mutable_data(), W_.size()); - auto* bws = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* bws = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); bws->ResizeLike(bias); bws->ShareExternalPointer(bias.mutable_data(), bias.size()); ws.RunOperatorOnce(def); diff --git a/caffe2/operators/atomic_ops.cc b/caffe2/operators/atomic_ops.cc index 2ce97b0d58c5f..2c8f17649f516 100644 --- a/caffe2/operators/atomic_ops.cc +++ b/caffe2/operators/atomic_ops.cc @@ -2,6 +2,11 @@ #include "caffe2/core/context.h" #include "caffe2/core/operator.h" +#ifdef CAFFE2_USE_IDEEP +#include +#include +#endif + namespace caffe2 { namespace fb { namespace { @@ -85,6 +90,10 @@ class CheckAtomicBoolOp final : public Operator { REGISTER_CPU_OPERATOR(CreateMutex, CreateMutexOp); REGISTER_CPU_OPERATOR(AtomicFetchAdd, AtomicFetchAddOp); +#ifdef CAFFE2_USE_IDEEP +REGISTER_IDEEP_OPERATOR(CreateMutex, IDEEPFallbackOp>); +#endif + REGISTER_CPU_OPERATOR(CreateAtomicBool, CreateAtomicBoolOp); REGISTER_CPU_OPERATOR(ConditionalSetAtomicBool, ConditionalSetAtomicBoolOp); REGISTER_CPU_OPERATOR(CheckAtomicBool, CheckAtomicBoolOp); diff --git a/caffe2/operators/batch_matmul_op_gpu_test.cc b/caffe2/operators/batch_matmul_op_gpu_test.cc index 804296307d6ef..31e179b3e41f8 100644 --- a/caffe2/operators/batch_matmul_op_gpu_test.cc +++ b/caffe2/operators/batch_matmul_op_gpu_test.cc @@ -30,7 +30,7 @@ class BatchMatMulOpGPUTest : public testing::Test { const float value, const string& name) { Blob* blob = ws_.CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CUDA); + auto* tensor = BlobGetMutableTensor(blob, CUDA); tensor->Resize(dims); math::Set( tensor->size(), diff --git a/caffe2/operators/batch_matmul_op_test.cc b/caffe2/operators/batch_matmul_op_test.cc index 45db7dd5b8484..c74829b4f8f9c 100644 --- a/caffe2/operators/batch_matmul_op_test.cc +++ b/caffe2/operators/batch_matmul_op_test.cc @@ -24,7 +24,7 @@ class BatchMatMulOpTest : public testing::Test { const float value, const string& name) { Blob* blob = ws_.CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(dims); math::Set( tensor->size(), diff --git a/caffe2/operators/boolean_unmask_ops_test.cc b/caffe2/operators/boolean_unmask_ops_test.cc index 8814be17153d4..b0c5f7dcdfff0 100644 --- a/caffe2/operators/boolean_unmask_ops_test.cc +++ b/caffe2/operators/boolean_unmask_ops_test.cc @@ -16,7 +16,7 @@ static void AddScalarInput( Workspace* ws, bool isEmpty = false) { Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); if (!isEmpty) { tensor->Resize(vector{1}); *(tensor->template mutable_data()) = value; diff --git a/caffe2/operators/conv_op_shared.cc b/caffe2/operators/conv_op_shared.cc index b9f54b6d55be7..155b6f0cd2456 100644 --- a/caffe2/operators/conv_op_shared.cc +++ b/caffe2/operators/conv_op_shared.cc @@ -27,8 +27,8 @@ void runWithSharedBuffer( auto* mutexPtr = mutexBlob->GetMutable>(); std::lock_guard g(**mutexPtr); - auto* buffer = - ws->GetBlob("__CAFFE2_SHARED_CONV_BUFFER_CPU__")->GetMutableTensor(CPU); + auto* buffer = BlobGetMutableTensor( + ws->GetBlob("__CAFFE2_SHARED_CONV_BUFFER_CPU__"), CPU); f(buffer); } } diff --git a/caffe2/operators/conv_op_shared_gpu.cc b/caffe2/operators/conv_op_shared_gpu.cc index f80d15a5d9054..c1f37c7f1362f 100644 --- a/caffe2/operators/conv_op_shared_gpu.cc +++ b/caffe2/operators/conv_op_shared_gpu.cc @@ -20,8 +20,8 @@ void runWithSharedBuffer( auto* mutexPtr = mutexBlob->GetMutable>(); std::lock_guard g(**mutexPtr); - auto* buffer = - ws->GetBlob("__CAFFE2_SHARED_CONV_BUFFER_CUDA__")->GetMutableTensor(CUDA); + auto* buffer = BlobGetMutableTensor( + ws->GetBlob("__CAFFE2_SHARED_CONV_BUFFER_CUDA__"), CUDA); f(buffer); } } diff --git a/caffe2/operators/conv_transpose_op_mobile_test.cc b/caffe2/operators/conv_transpose_op_mobile_test.cc index 6eb45eb5f8d17..3bc2951664353 100644 --- a/caffe2/operators/conv_transpose_op_mobile_test.cc +++ b/caffe2/operators/conv_transpose_op_mobile_test.cc @@ -17,7 +17,7 @@ void AddConstInput(const vector& shape, DeviceOption option; CPUContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(shape); math::Set( tensor->size(), value, tensor->template mutable_data(), &context); @@ -29,7 +29,7 @@ void AddNoiseInput(const vector& shape, DeviceOption option; CPUContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(shape); math::RandGaussian( diff --git a/caffe2/operators/dataset_ops.cc b/caffe2/operators/dataset_ops.cc index 8329422428083..e3c0abe83d8b4 100644 --- a/caffe2/operators/dataset_ops.cc +++ b/caffe2/operators/dataset_ops.cc @@ -1428,7 +1428,7 @@ class TreeCursorSerializer : public BlobSerializerBase { // serialize offsets as a tensor if (cursor->offsets.size() > 0) { Blob offsets_blob; - auto* offsets = offsets_blob.GetMutableTensor(CPU); + auto* offsets = BlobGetMutableTensor(&offsets_blob, CPU); offsets->Resize(cursor->offsets.size()); std::copy( cursor->offsets.begin(), diff --git a/caffe2/operators/dropout_op_cudnn.cc b/caffe2/operators/dropout_op_cudnn.cc index a68a1263f6f45..8a40c731143f4 100644 --- a/caffe2/operators/dropout_op_cudnn.cc +++ b/caffe2/operators/dropout_op_cudnn.cc @@ -150,7 +150,7 @@ bool CuDNNDropoutOp::DoRunWithType() { // Reshape tensor descriptors if necessary if (X.dims() != cudnn_input_dims_ && !is_test_) { CAFFE_ENFORCE(scratch_blob_); - Tensor* states = scratch_blob_->GetMutableTensor(CUDA); + Tensor* states = BlobGetMutableTensor(scratch_blob_, CUDA); cudnn_input_dims_ = X.dims(); CUDNN_ENFORCE(cudnnSetTensor4dDescriptor( data_desc_, diff --git a/caffe2/operators/elementwise_op_test.h b/caffe2/operators/elementwise_op_test.h index bcd547e28f098..b785d040c8f1a 100644 --- a/caffe2/operators/elementwise_op_test.h +++ b/caffe2/operators/elementwise_op_test.h @@ -19,7 +19,7 @@ void FillTensor( const std::vector& shape, const std::vector& values) { auto* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(Context::GetDeviceType()); + auto* tensor = BlobGetMutableTensor(blob, Context::GetDeviceType()); tensor->Resize(shape); auto* mutable_data = tensor->template mutable_data(); const O_Type* data = reinterpret_cast(values.data()); diff --git a/caffe2/operators/expand_squeeze_dims_op.h b/caffe2/operators/expand_squeeze_dims_op.h index 505b1ec7d6909..37a3b5716127d 100644 --- a/caffe2/operators/expand_squeeze_dims_op.h +++ b/caffe2/operators/expand_squeeze_dims_op.h @@ -112,7 +112,7 @@ class SqueezeOp : public Operator { vector dims_; public: - AT_DISABLE_COPY_AND_ASSIGN(SqueezeOp); + C10_DISABLE_COPY_AND_ASSIGN(SqueezeOp); }; } // namespace caffe2 #endif // CAFFE2_OPERATORS_EXPAND_SQUEEZE_DIMS_OP_H_ diff --git a/caffe2/operators/generate_proposals_op_test.cc b/caffe2/operators/generate_proposals_op_test.cc index 2b3a033a665df..da7fdc650879c 100644 --- a/caffe2/operators/generate_proposals_op_test.cc +++ b/caffe2/operators/generate_proposals_op_test.cc @@ -18,7 +18,7 @@ static void AddConstInput( DeviceOption option; CPUContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(shape); math::Set( tensor->size(), value, tensor->template mutable_data(), &context); @@ -34,7 +34,7 @@ static void AddLinSpacedInput( DeviceOption option; CPUContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(shape); EigenVectorMap tensor_vec( tensor->template mutable_data(), tensor->size()); @@ -51,7 +51,7 @@ static void AddInput( DeviceOption option; CPUContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(shape); EigenVectorMap tensor_vec( tensor->template mutable_data(), tensor->size()); diff --git a/caffe2/operators/index_ops.cc b/caffe2/operators/index_ops.cc index 241b0ff97c607..2fb8f3b338dc6 100644 --- a/caffe2/operators/index_ops.cc +++ b/caffe2/operators/index_ops.cc @@ -353,7 +353,7 @@ class IndexSerializer : public BlobSerializerBase { SerializationAcceptor acceptor) override { auto& base = blob.template Get>(); Blob tensor_blob; - auto* tensor_out = tensor_blob.GetMutableTensor(CPU); + auto* tensor_out = BlobGetMutableTensor(&tensor_blob, CPU); if (base->Type().Match()) { doStore(base, tensor_out); diff --git a/caffe2/operators/onnx_while_op.h b/caffe2/operators/onnx_while_op.h index dbd5103952469..7a3c34cfbf7cc 100644 --- a/caffe2/operators/onnx_while_op.h +++ b/caffe2/operators/onnx_while_op.h @@ -213,23 +213,23 @@ class ONNXWhileOp final : public Operator { lcd_tensors_.clear(); for (int i = 2; i < body_net_def.external_input_size(); ++i) { Blob* b = loop_ws_->CreateBlob(body_net_def.external_input(i)); - Tensor* t = b->GetMutableTensor(Context::GetDeviceType()); + Tensor* t = BlobGetMutableTensor(b, Context::GetDeviceType()); lcd_tensors_.push_back(t); } // First output is the iteration variable auto* iteration_var_blob = loop_ws_->CreateBlob( body_net_def.external_input(0)); iteration_var_ = - iteration_var_blob->GetMutableTensor(Context::GetDeviceType()); + BlobGetMutableTensor(iteration_var_blob, Context::GetDeviceType()); - input_condition_var_ = - loop_ws_->CreateBlob(body_net_def.external_input(1)) - ->GetMutableTensor(Context::GetDeviceType()); + input_condition_var_ = BlobGetMutableTensor( + loop_ws_->CreateBlob(body_net_def.external_input(1)), + Context::GetDeviceType()); auto* condition_var_blob = loop_ws_->CreateBlob(body_net_def.external_output(0)); condition_var_ = - condition_var_blob->GetMutableTensor(Context::GetDeviceType()); + BlobGetMutableTensor(condition_var_blob, Context::GetDeviceType()); condition_var_->Resize(1); condition_var_->template mutable_data(); diff --git a/caffe2/operators/onnxifi_op.cc b/caffe2/operators/onnxifi_op.cc index d1b0824f1b319..767a37d5fc792 100644 --- a/caffe2/operators/onnxifi_op.cc +++ b/caffe2/operators/onnxifi_op.cc @@ -15,7 +15,7 @@ void BlobToTensorDescriptor( // Memory type // We only allow weights to be CPU tensor for now CAFFE_ENFORCE( - blob->IsTensorType(CPU), + BlobIsTensorType(*blob, CPU), "Initialization blob ", name, " needs to be TensorCPU"); diff --git a/caffe2/operators/operator_fallback_gpu.h b/caffe2/operators/operator_fallback_gpu.h index 8ef39e7c0e78d..5b3a38dbfbd13 100644 --- a/caffe2/operators/operator_fallback_gpu.h +++ b/caffe2/operators/operator_fallback_gpu.h @@ -65,8 +65,8 @@ class GPUFallbackOpEx final : public Operator { bool need_sync = false; for (int i = 0; i < InputSize(); ++i) { if (this->InputIsTensorType(i, CUDA)) { - local_input_blobs_[i]->GetMutableTensor(CPU)->CopyFrom( - Input(i), &context_); + BlobGetMutableTensor(local_input_blobs_[i], CPU) + ->CopyFrom(Input(i), &context_); need_sync = true; } else { VLOG(1) << "Input " << i << " is not TensorCUDA. Skipping copy."; @@ -95,7 +95,7 @@ class GPUFallbackOpEx final : public Operator { continue; } CAFFE_ENFORCE( - local_output_blobs_[i]->IsTensorType(CPU), + BlobIsTensorType(*local_output_blobs_[i], CPU), "GPU fallback op currently does not support non-TensorCPU " "output type who needs copying."); Output(i)->CopyFrom(local_output_blobs_[i]->template Get()); diff --git a/caffe2/operators/operator_fallback_gpu_test.cc b/caffe2/operators/operator_fallback_gpu_test.cc index 964708bc10906..0870a4be2dd7b 100644 --- a/caffe2/operators/operator_fallback_gpu_test.cc +++ b/caffe2/operators/operator_fallback_gpu_test.cc @@ -40,7 +40,7 @@ TEST(OperatorFallbackTest, IncrementByOneOp) { for (int i = 0; i < 6; ++i) { source_tensor.mutable_data()[i] = i; } - ws.CreateBlob("X")->GetMutableTensor(CPU)->CopyFrom(source_tensor); + BlobGetMutableTensor(ws.CreateBlob("X"), CPU)->CopyFrom(source_tensor); unique_ptr op(CreateOperator(op_def, &ws)); EXPECT_TRUE(op.get() != nullptr); EXPECT_TRUE(op->Run()); @@ -64,7 +64,7 @@ TEST(OperatorFallbackTest, GPUIncrementByOneOp) { for (int i = 0; i < 6; ++i) { source_tensor.mutable_data()[i] = i; } - ws.CreateBlob("X")->GetMutableTensor(CUDA)->CopyFrom(source_tensor); + BlobGetMutableTensor(ws.CreateBlob("X"), CUDA)->CopyFrom(source_tensor); unique_ptr op(CreateOperator(op_def, &ws)); EXPECT_TRUE(op.get() != nullptr); EXPECT_TRUE(op->Run()); diff --git a/caffe2/operators/partition_ops.h b/caffe2/operators/partition_ops.h index 94bd1e6150cef..32f31f97d878c 100644 --- a/caffe2/operators/partition_ops.h +++ b/caffe2/operators/partition_ops.h @@ -221,7 +221,7 @@ class PartitionOp : public PartitionOpBase { return true; } - AT_DISABLE_COPY_AND_ASSIGN(PartitionOp); + C10_DISABLE_COPY_AND_ASSIGN(PartitionOp); }; class LengthsPartitionOp : public PartitionOpBase { @@ -302,7 +302,7 @@ class LengthsPartitionOp : public PartitionOpBase { return true; } - AT_DISABLE_COPY_AND_ASSIGN(LengthsPartitionOp); + C10_DISABLE_COPY_AND_ASSIGN(LengthsPartitionOp); vector out_length_; }; diff --git a/caffe2/operators/reshape_op_gpu_test.cc b/caffe2/operators/reshape_op_gpu_test.cc index 3537ab69d058f..d4ac325a78b80 100644 --- a/caffe2/operators/reshape_op_gpu_test.cc +++ b/caffe2/operators/reshape_op_gpu_test.cc @@ -20,7 +20,7 @@ static void AddConstInput( option.set_device_type(PROTO_CUDA); CUDAContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CUDA); + auto* tensor = BlobGetMutableTensor(blob, CUDA); tensor->Resize(shape); math::Set( tensor->size(), value, tensor->template mutable_data(), &context); diff --git a/caffe2/operators/rnn/recurrent_network_blob_fetcher_op.h b/caffe2/operators/rnn/recurrent_network_blob_fetcher_op.h index 98675cea858d5..63d58f3ccd8f6 100644 --- a/caffe2/operators/rnn/recurrent_network_blob_fetcher_op.h +++ b/caffe2/operators/rnn/recurrent_network_blob_fetcher_op.h @@ -43,11 +43,10 @@ class RecurrentNetworkBlobFetcherOp final : public Operator { prefix_ + std::string("_") + blob_name + caffe2::to_string(i); blob_names_vector.push_back(newBlobName); - ws_->CreateBlob(newBlobName) - ->GetMutableTensor(CPU) + BlobGetMutableTensor(ws_->CreateBlob(newBlobName), CPU) ->ResizeLike(currentTensor); auto type = Context::GetDeviceType(); - auto* newTensor = ws_->GetBlob(newBlobName)->GetMutableTensor(type); + auto* newTensor = BlobGetMutableTensor(ws_->GetBlob(newBlobName), type); newTensor->CopyFrom(currentTensor); } } diff --git a/caffe2/operators/rnn/recurrent_network_executor.h b/caffe2/operators/rnn/recurrent_network_executor.h index 7e37e562e77a5..4cb53a6d7d330 100644 --- a/caffe2/operators/rnn/recurrent_network_executor.h +++ b/caffe2/operators/rnn/recurrent_network_executor.h @@ -111,10 +111,10 @@ class RecurrentNetworkExecutorBase { // the forward-only mode. std::string this_timestep_blob = timestep_blob_ + "_rnnexec_t" + caffe2::to_string(t); - ws->CreateBlob(this_timestep_blob)->GetMutableTensor(CPU)->Resize(1); + BlobGetMutableTensor(ws->CreateBlob(this_timestep_blob), CPU)->Resize(1); auto b = ws->GetBlob(this_timestep_blob); CAFFE_ENFORCE(b); - b->GetMutableTensor(CPU)->template mutable_data()[0] = t; + BlobGetMutableTensor(b, CPU)->template mutable_data()[0] = t; // Copy the operators from template for (auto& template_rnn_op : timestep_ops_template_) { diff --git a/caffe2/operators/rnn/recurrent_network_op.h b/caffe2/operators/rnn/recurrent_network_op.h index 2421bc44263af..21b3064a6fac3 100644 --- a/caffe2/operators/rnn/recurrent_network_op.h +++ b/caffe2/operators/rnn/recurrent_network_op.h @@ -52,10 +52,11 @@ struct CAFFE2_API ScratchWorkspaces { }; inline void UpdateTimestepBlob(Workspace* ws, std::string blob_name, int t) { - ws->CreateBlob(blob_name)->GetMutableTensor(CPU)->Resize(1); + BlobGetMutableTensor(ws->CreateBlob(blob_name), CPU)->Resize(1); auto timestepBlob = ws->GetBlob(blob_name); CAFFE_ENFORCE(timestepBlob); - timestepBlob->GetMutableTensor(CPU)->template mutable_data()[0] = t; + BlobGetMutableTensor(timestepBlob, CPU)->template mutable_data()[0] = + t; } CAFFE2_API std::map GetRecurrentMapping( @@ -71,8 +72,9 @@ void applyOffsetAlias( << " at offset: " << oc.offset; auto srcBlob = ws->GetBlob(oc.src); CAFFE_ENFORCE(srcBlob); - auto* src = srcBlob->GetMutableTensor(Context::GetDeviceType()); - auto* dst = ws->GetBlob(oc.dst)->GetMutableTensor(Context::GetDeviceType()); + auto* src = BlobGetMutableTensor(srcBlob, Context::GetDeviceType()); + auto* dst = + BlobGetMutableTensor(ws->GetBlob(oc.dst), Context::GetDeviceType()); auto timestep = src->size() / src->dim(0); auto dims = src->dims(); const int32_t startDstTimestep = @@ -113,7 +115,7 @@ void initializeRecurrentInput( Context* context) { auto stateBlob = ws->GetBlob(rc.state); CAFFE_ENFORCE(stateBlob); - auto* state = stateBlob->GetMutableTensor(Context::GetDeviceType()); + auto* state = BlobGetMutableTensor(stateBlob, Context::GetDeviceType()); auto inputBlob = ws->GetBlob(rc.input); CAFFE_ENFORCE(inputBlob); @@ -660,7 +662,7 @@ class RecurrentNetworkGradientOp final : public Operator { auto gBlob = sharedWs_->GetBlob(param.grad); CAFFE_ENFORCE(gBlob); - auto* g = gBlob->GetMutableTensor(Context::GetDeviceType()); + auto* g = BlobGetMutableTensor(gBlob, Context::GetDeviceType()); g->ResizeLike(p); math::Set( g->size(), @@ -676,7 +678,7 @@ class RecurrentNetworkGradientOp final : public Operator { auto gBlob = sharedWs_->CreateBlob(rg.grad); CAFFE_ENFORCE(gBlob); - auto* g = gBlob->GetMutableTensor(Context::GetDeviceType()); + auto* g = BlobGetMutableTensor(gBlob, Context::GetDeviceType()); g->ResizeLike(p); CAFFE_ENFORCE_EQ(g->ndim(), 3); const auto timestep = g->size() / g->dim(0); @@ -703,7 +705,7 @@ class RecurrentNetworkGradientOp final : public Operator { << ". Size: " << Input(gradientInputIndex).size(); auto pGradientBlob = sharedWs_->GetBlob(gradientName); CAFFE_ENFORCE(pGradientBlob); - auto* g = pGradientBlob->GetMutableTensor(Context::GetDeviceType()); + auto* g = BlobGetMutableTensor(pGradientBlob, Context::GetDeviceType()); g->ResizeLike(Input(gradientInputIndex)); g->template mutable_data(); } @@ -717,7 +719,7 @@ class RecurrentNetworkGradientOp final : public Operator { << rg.lastExternalGrad << " for final time step (sep. blob)"; auto gBlob = sharedWs_->GetBlob(rg.grad); CAFFE_ENFORCE(gBlob); - auto* g = gBlob->GetMutableTensor(Context::GetDeviceType()); + auto* g = BlobGetMutableTensor(gBlob, Context::GetDeviceType()); auto oglastBlob = sharedWs_->GetBlob(rg.lastExternalGrad); CAFFE_ENFORCE(oglastBlob); @@ -779,7 +781,7 @@ class RecurrentNetworkGradientOp final : public Operator { T* output_data = Output(outputIdx)->template mutable_data(); auto pBlob = sharedWs_->GetBlob(recurrentGradients_[i].grad); CAFFE_ENFORCE(pBlob); - auto* p = pBlob->GetMutableTensor(Context::GetDeviceType()); + auto* p = BlobGetMutableTensor(pBlob, Context::GetDeviceType()); if (Input(inputId).ndim() >= 2) { // Gradient states blob should live. And if it gets changed by the diff --git a/caffe2/operators/roi_align_op_gpu_test.cc b/caffe2/operators/roi_align_op_gpu_test.cc index 2647a97d6f0b9..7257ec44c2598 100644 --- a/caffe2/operators/roi_align_op_gpu_test.cc +++ b/caffe2/operators/roi_align_op_gpu_test.cc @@ -18,7 +18,7 @@ void AddConstInput( Context* context, Workspace* ws) { Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(Context::GetDeviceType()); + auto* tensor = BlobGetMutableTensor(blob, Context::GetDeviceType()); tensor->Resize(shape); math::Set( tensor->size(), value, tensor->template mutable_data(), context); @@ -39,7 +39,7 @@ void AddInput( const string& name, Workspace* ws) { Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(shape); EigenVectorMap tensor_vec( tensor->template mutable_data(), tensor->size()); @@ -57,7 +57,7 @@ void AddInput( tmp_vec.array() = utils::AsEArrXt(values); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CUDA); + auto* tensor = BlobGetMutableTensor(blob, CUDA); tensor->CopyFrom(tmp); } diff --git a/caffe2/operators/slice_op.cu b/caffe2/operators/slice_op.cu index 475d8329c9249..8ddb204ebd5b4 100644 --- a/caffe2/operators/slice_op.cu +++ b/caffe2/operators/slice_op.cu @@ -302,7 +302,7 @@ class SliceGradientOp : public Operator { ends_(this->template GetRepeatedArgument("ends")), statically_inited_(false) {} - AT_DISABLE_COPY_AND_ASSIGN(SliceGradientOp); + C10_DISABLE_COPY_AND_ASSIGN(SliceGradientOp); bool RunOnDevice() override { if (InputSize() == 4) { diff --git a/caffe2/operators/slice_op.h b/caffe2/operators/slice_op.h index e7f8919bb81c8..6149f077669d7 100644 --- a/caffe2/operators/slice_op.h +++ b/caffe2/operators/slice_op.h @@ -249,7 +249,7 @@ class SliceOp : public Operator { output, data, starts_host_, ends_host_, &context_); } - AT_DISABLE_COPY_AND_ASSIGN(SliceOp); + C10_DISABLE_COPY_AND_ASSIGN(SliceOp); protected: std::vector starts_; @@ -269,7 +269,7 @@ class SliceGradientOp : public Operator { ends_(this->template GetRepeatedArgument("ends")), statically_inited_(false) {} - AT_DISABLE_COPY_AND_ASSIGN(SliceGradientOp); + C10_DISABLE_COPY_AND_ASSIGN(SliceGradientOp); bool RunOnDevice() override { if (InputSize() == 4) { diff --git a/caffe2/operators/string_ops_test.cc b/caffe2/operators/string_ops_test.cc index c9ba13efb5025..2092ae804f2c3 100644 --- a/caffe2/operators/string_ops_test.cc +++ b/caffe2/operators/string_ops_test.cc @@ -9,7 +9,7 @@ class StringJoinOpTest : public testing::Test { public: bool runOp(const TensorCPU& input) { auto* blob = ws_.CreateBlob("X"); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->ResizeLike(input); tensor->ShareData(input); @@ -26,7 +26,7 @@ class StringJoinOpTest : public testing::Test { const std::string* checkAndGetOutput(int outputSize) { const auto* output = ws_.GetBlob("Y"); EXPECT_NE(output, nullptr); - EXPECT_TRUE(output->IsTensorType(CPU)); + EXPECT_TRUE(BlobIsTensorType(*output, CPU)); const auto& outputTensor = output->Get(); EXPECT_EQ(outputTensor.ndim(), 1); EXPECT_EQ(outputTensor.dim(0), outputSize); @@ -42,7 +42,7 @@ TEST_F(StringJoinOpTest, testString1DJoin) { std::vector input = {"a", "xx", "c"}; auto blob = caffe2::make_unique(); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob.get(), CPU); tensor->Resize(input.size()); auto* data = tensor->template mutable_data(); for (int i = 0; i < input.size(); ++i) { @@ -62,7 +62,7 @@ TEST_F(StringJoinOpTest, testString2DJoin) { {"dd", "ee", "ff"}}; auto blob = caffe2::make_unique(); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob.get(), CPU); tensor->Resize(input.size(), input[0].size()); auto* data = tensor->template mutable_data(); for (int i = 0; i < input.size(); ++i) { @@ -82,7 +82,7 @@ TEST_F(StringJoinOpTest, testFloat1DJoin) { std::vector input = {3.90f, 5.234f, 8.12f}; auto blob = caffe2::make_unique(); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob.get(), CPU); tensor->Resize(input.size()); auto* data = tensor->template mutable_data(); for (int i = 0; i < input.size(); ++i) { @@ -102,7 +102,7 @@ TEST_F(StringJoinOpTest, testFloat2DJoin) { {4.67f, 5.90f, 6.32f}}; auto blob = caffe2::make_unique(); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob.get(), CPU); tensor->Resize(input.size(), input[0].size()); auto* data = tensor->template mutable_data(); for (int i = 0; i < input.size(); ++i) { @@ -122,7 +122,7 @@ TEST_F(StringJoinOpTest, testLong2DJoin) { std::vector> input = {{100, 200}, {1000, 2000}}; auto blob = caffe2::make_unique(); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob.get(), CPU); tensor->Resize(input.size(), input[0].size()); auto* data = tensor->template mutable_data(); for (int i = 0; i < input.size(); ++i) { diff --git a/caffe2/operators/stylizer_ops.cc b/caffe2/operators/stylizer_ops.cc index a6d395fe9ba64..bfc41a462999b 100644 --- a/caffe2/operators/stylizer_ops.cc +++ b/caffe2/operators/stylizer_ops.cc @@ -82,10 +82,10 @@ class PackedInt8BGRANHWCToNCHWCStylizerPreprocessOp auto defaultNoiseSize = OperatorBase::GetSingleArgument( "noise_size", 491 /* prime to avoid artifacts */); - if (!noiseBlob->IsTensorType(CPU)) { + if (!BlobIsTensorType(*noiseBlob, CPU)) { // Initialize random noise on first use. // Cache it to maintain temporal consistency. - auto* t = noiseBlob->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(noiseBlob, CPU); #if defined(__ARM_NEON__) || defined(__ARM_NEON) // Noise space is larger for vectorized code due to the diff --git a/caffe2/operators/tensor_protos_db_input.h b/caffe2/operators/tensor_protos_db_input.h index cd081bf959e39..e9f5b1a8f8455 100644 --- a/caffe2/operators/tensor_protos_db_input.h +++ b/caffe2/operators/tensor_protos_db_input.h @@ -56,7 +56,7 @@ bool TensorProtosDBInput::Prefetch() { protos.mutable_protos(i)->clear_device_detail(); } deserializer.Deserialize( - protos.protos(i), prefetched_blobs_[i].GetMutableTensor(CPU)); + protos.protos(i), BlobGetMutableTensor(&prefetched_blobs_[i], CPU)); } } else { vector temp_tensors; @@ -74,11 +74,11 @@ bool TensorProtosDBInput::Prefetch() { vector dims( protos.protos(i).dims().begin(), protos.protos(i).dims().end()); dims.insert(dims.begin(), batch_size_); - prefetched_blobs_[i].GetMutableTensor(CPU)->Resize(dims); + BlobGetMutableTensor(&prefetched_blobs_[i], CPU)->Resize(dims); } } for (int i = 0; i < protos.protos_size(); ++i) { - TensorCPU* dst = prefetched_blobs_[i].GetMutableTensor(CPU); + TensorCPU* dst = BlobGetMutableTensor(&prefetched_blobs_[i], CPU); TensorCPU& src = temp_tensors[i]; if (protos.protos(i).has_device_detail()) { protos.mutable_protos(i)->clear_device_detail(); diff --git a/caffe2/operators/tt_linear_op.h b/caffe2/operators/tt_linear_op.h index 421c26e318b6e..1a5cdc344ce4a 100644 --- a/caffe2/operators/tt_linear_op.h +++ b/caffe2/operators/tt_linear_op.h @@ -52,7 +52,7 @@ class TTLinearOp final : public Operator { int cores_idx = 0; // Temporary buffer to facilitate multiplication of TT-cores with input - auto Y_buf = Y_temp_->GetMutableTensor(Context::GetDeviceType()); + auto Y_buf = BlobGetMutableTensor(Y_temp_.get(), Context::GetDeviceType()); Y_buf->ResizeLike(X); Y_buf->CopyFrom(X); diff --git a/caffe2/operators/utility_ops_gpu_test.cc b/caffe2/operators/utility_ops_gpu_test.cc index f500afaf9ed24..1099d900cbefd 100644 --- a/caffe2/operators/utility_ops_gpu_test.cc +++ b/caffe2/operators/utility_ops_gpu_test.cc @@ -19,7 +19,7 @@ static void AddConstInput( option.set_device_type(PROTO_CUDA); CUDAContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CUDA); + auto* tensor = BlobGetMutableTensor(blob, CUDA); tensor->Resize(shape); math::Set( tensor->size(), value, tensor->template mutable_data(), &context); diff --git a/caffe2/operators/utility_ops_test.cc b/caffe2/operators/utility_ops_test.cc index 379dd52655c4f..a3a2a409674ed 100644 --- a/caffe2/operators/utility_ops_test.cc +++ b/caffe2/operators/utility_ops_test.cc @@ -16,7 +16,7 @@ static void AddConstInput( DeviceOption option; CPUContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(shape); math::Set( tensor->size(), value, tensor->template mutable_data(), &context); diff --git a/caffe2/opt/fusion.cc b/caffe2/opt/fusion.cc index fdf5fdc31e104..8c324a97c5093 100644 --- a/caffe2/opt/fusion.cc +++ b/caffe2/opt/fusion.cc @@ -44,10 +44,10 @@ bool fuseConvBNHelper(repr::NNModule* nn, caffe2::Workspace* ws) { CAFFE_ENFORCE( bnInputs.size() >= 5, "Invalid batch normalization input size"); -#define EXPOSE_TENSOR_DATA(name, index, inputs) \ - auto name = repr::nn::get(inputs[index]); \ - assert(ws->HasBlob(name->getName()) && "Blob not in workspace"); \ - auto name##Tensor = ws->GetBlob(name->getName())->GetMutableTensor(CPU); \ +#define EXPOSE_TENSOR_DATA(name, index, inputs) \ + auto name = repr::nn::get(inputs[index]); \ + assert(ws->HasBlob(name->getName()) && "Blob not in workspace"); \ + auto name##Tensor = BlobGetMutableTensor(ws->GetBlob(name->getName()), CPU); \ auto name##Data = name##Tensor->mutable_data(); EXPOSE_TENSOR_DATA(filter, 1, convInputs); @@ -76,7 +76,7 @@ bool fuseConvBNHelper(repr::NNModule* nn, caffe2::Workspace* ws) { nn->dataFlow.createEdge(convBiasNode, convNode); auto* blob = ws->CreateBlob(convBiasName); - caffe2::TensorCPU* tensor = blob->GetMutableTensor(caffe2::CPU); + caffe2::TensorCPU* tensor = BlobGetMutableTensor(blob, caffe2::CPU); CHECK_NOTNULL(tensor); // Get output channel size_t c = filterTensor->dim32(0); diff --git a/caffe2/opt/fusion.h b/caffe2/opt/fusion.h index 33dc2e4c54b1a..0973ade54b383 100644 --- a/caffe2/opt/fusion.h +++ b/caffe2/opt/fusion.h @@ -37,7 +37,7 @@ CAFFE2_API void fuseConvBN(repr::NNModule* nn, caffe2::Workspace* ws); // \param postprocess Functor to postprocess the conv node, // attaching additional attributes if necessary template -CAFFE2_EXPORT void fuseActivation( +C10_EXPORT void fuseActivation( repr::NNModule* nn, std::function should_fuse, std::function postprocess) { diff --git a/caffe2/opt/onnxifi_transformer.cc b/caffe2/opt/onnxifi_transformer.cc index ce79df56ecb72..a048503fea99c 100644 --- a/caffe2/opt/onnxifi_transformer.cc +++ b/caffe2/opt/onnxifi_transformer.cc @@ -173,7 +173,7 @@ NetDef OnnxifiTransformer::SubnetToOnnxifiOp( // Feed into workspace as CPU Tensors auto* blob = ws->CreateBlob(t.name()); - auto* cpu_tensor = blob->GetMutableTensor(CPU); + auto* cpu_tensor = BlobGetMutableTensor(blob, CPU); std::vector dims; for(const auto& d : t.dims()) { dims.push_back(d); diff --git a/caffe2/opt/sink.cc b/caffe2/opt/sink.cc index c4d73d7abb12d..ed4cd8a372537 100644 --- a/caffe2/opt/sink.cc +++ b/caffe2/opt/sink.cc @@ -8,7 +8,7 @@ namespace opt { using namespace nom; -CAFFE2_EXPORT void sinkMaxPool(nom::repr::NNModule* nn) { +C10_EXPORT void sinkMaxPool(nom::repr::NNModule* nn) { for (auto max_pool_node : repr::nn::nodeIterator(nn->dataFlow)) { if (repr::nn::getInputs(max_pool_node).size() != 1) { diff --git a/caffe2/perfkernels/CMakeLists.txt b/caffe2/perfkernels/CMakeLists.txt index 3781bbb6afb6b..a5701da807f4f 100644 --- a/caffe2/perfkernels/CMakeLists.txt +++ b/caffe2/perfkernels/CMakeLists.txt @@ -17,8 +17,8 @@ set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} ${common_srcs}) if (NOT MSVC AND CAFFE2_COMPILER_SUPPORTS_AVX2_EXTENSIONS) add_library(Caffe2_perfkernels_avx OBJECT ${avx_srcs}) add_library(Caffe2_perfkernels_avx2 OBJECT ${avx2_srcs}) - add_dependencies(Caffe2_perfkernels_avx Caffe2_PROTO) - add_dependencies(Caffe2_perfkernels_avx2 Caffe2_PROTO) + add_dependencies(Caffe2_perfkernels_avx Caffe2_PROTO c10) + add_dependencies(Caffe2_perfkernels_avx2 Caffe2_PROTO c10) if (MSVC) set_target_properties( Caffe2_perfkernels_avx PROPERTIES COMPILE_FLAGS "/arch:AVX") diff --git a/caffe2/predictor/predictor.cc b/caffe2/predictor/predictor.cc index 84dac93753d37..7775e69776450 100644 --- a/caffe2/predictor/predictor.cc +++ b/caffe2/predictor/predictor.cc @@ -10,14 +10,14 @@ void enforceIsTensor(Workspace* ws, const std::string& name) { auto blob = ws->GetBlob(name); CAFFE_ENFORCE(blob, "Blob does not exist: ", name); CAFFE_ENFORCE( - blob->IsTensorType(CPU), "Blob is not a CPU Tensor: ", name); + BlobIsTensorType(*blob, CPU), "Blob is not a CPU Tensor: ", name); } TensorCPU* getTensor(Workspace* ws, const std::string& name) { enforceIsTensor(ws, name); auto* blob = ws->GetBlob(name); CAFFE_ENFORCE(blob, "Blob: ", name, " does not exist"); - return blob->GetMutableTensor(CPU); + return BlobGetMutableTensor(blob, CPU); } void shareInputTensor( @@ -60,7 +60,7 @@ Predictor::Predictor(PredictorConfig config) : config_(std::move(config)) { for (const auto& name : config_.predict_net->external_input()) { if (!initialized.count(name)) { auto* blob = config_.ws->CreateBlob(name); - blob->GetMutableTensor(CPU); + BlobGetMutableTensor(blob, CPU); } } CAFFE_ENFORCE(config_.ws->CreateNet(config_.predict_net)); diff --git a/caffe2/predictor/predictor_test.cc b/caffe2/predictor/predictor_test.cc index ae4f73e9da0ad..a0245cd7a86d6 100644 --- a/caffe2/predictor/predictor_test.cc +++ b/caffe2/predictor/predictor_test.cc @@ -135,7 +135,7 @@ std::unique_ptr randomTensor( const std::vector& dims, CPUContext* ctx) { auto blob = make_unique(); - auto* t = blob->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(blob.get(), CPU); t->Resize(dims); math::RandUniform( t->size(), -1.0, 1.0, t->template mutable_data(), ctx); @@ -180,7 +180,7 @@ TEST_F(PredictorTest, SimpleBatchSized) { auto inputData = randomTensor({1, 4}, ctx_.get()); Predictor::TensorList input; input.emplace_back(CPU); - auto tensor = inputData->GetMutableTensor(CPU); + auto tensor = BlobGetMutableTensor(inputData.get(), CPU); input.back().ResizeLike(*tensor); input.back().ShareData(*tensor); Predictor::TensorList output; @@ -196,7 +196,7 @@ TEST_F(PredictorTest, SimpleBatchSizedMapInput) { auto inputData = randomTensor({1, 4}, ctx_.get()); Predictor::TensorMap input; auto iter = input.emplace("data", Tensor(CPU)); - auto tensor = inputData->GetMutableTensor(CPU); + auto tensor = BlobGetMutableTensor(inputData.get(), CPU); iter.first->second.ResizeLike(*tensor); iter.first->second.ShareData(*tensor); diff --git a/caffe2/python/data_parallel_model_test.py b/caffe2/python/data_parallel_model_test.py index ebf3c3b8cd44a..1b9b4929bb0f9 100644 --- a/caffe2/python/data_parallel_model_test.py +++ b/caffe2/python/data_parallel_model_test.py @@ -831,6 +831,7 @@ def param_update_fun(model): return workspace.FetchBlob("{}_0/partest/i2h_w".format(model._device_prefix)) + @unittest.skipIf("IN_CIRCLECI" in os.environ, "FIXME: flaky test in CircleCI") def test_equiv_recurrent(self): ''' Test that the model produces exactly same results given diff --git a/caffe2/python/onnx/tests/onnx_backend_test.py b/caffe2/python/onnx/tests/onnx_backend_test.py index 8ffaef3004d9a..ad229a97f807d 100644 --- a/caffe2/python/onnx/tests/onnx_backend_test.py +++ b/caffe2/python/onnx/tests/onnx_backend_test.py @@ -59,6 +59,10 @@ if 'JENKINS_URL' in os.environ: backend_test.exclude(r'(test_vgg19|test_vgg)') +# FIXME: flaky test in CircleCI +if "IN_CIRCLECI" in os.environ: + backend_test.exclude(r'(test_dynamic_slice_cpu)') + # import all test cases at global scope to make them visible to python.unittest globals().update(backend_test .enable_report() diff --git a/caffe2/python/operator_test/cross_entropy_ops_test.py b/caffe2/python/operator_test/cross_entropy_ops_test.py index 5ee60d877c33b..f97b0c5809d5f 100644 --- a/caffe2/python/operator_test/cross_entropy_ops_test.py +++ b/caffe2/python/operator_test/cross_entropy_ops_test.py @@ -8,6 +8,8 @@ import hypothesis.strategies as st import numpy as np +import unittest +import os def sigmoid(x): return 1.0 / (1.0 + np.exp(-x)) @@ -248,6 +250,7 @@ def weighted_sigmoid_xentr_logit_grad_ref(g_out, outputs, fwd_inputs): output_to_grad='xentropy', grad_reference=weighted_sigmoid_xentr_logit_grad_ref) + @unittest.skipIf("IN_CIRCLECI" in os.environ, "FIXME: flaky test in CircleCI") @given(n=st.integers(2, 10), b=st.integers(1, 5), **hu.gcs_cpu_only) diff --git a/caffe2/python/operator_test/im2col_col2im_test.py b/caffe2/python/operator_test/im2col_col2im_test.py index 6db6cae47ad1c..46b16f4356ff5 100644 --- a/caffe2/python/operator_test/im2col_col2im_test.py +++ b/caffe2/python/operator_test/im2col_col2im_test.py @@ -10,6 +10,9 @@ import hypothesis.strategies as st import numpy as np +import unittest +import os + class TestReduceFrontSum(hu.HypothesisTestCase): @given(batch_size=st.integers(1, 3), @@ -111,6 +114,7 @@ def test_im2col_layout(self, batch_size, stride, pad, kernel, dilation, atol=1e-4, rtol=1e-4) + @unittest.skipIf("IN_CIRCLECI" in os.environ, "FIXME: flaky test in CircleCI") @given(batch_size=st.integers(1, 3), stride=st.integers(1, 3), pad=st.integers(0, 3), diff --git a/caffe2/python/pybind_state.cc b/caffe2/python/pybind_state.cc index 81197047102ff..9a1d715bfdf22 100644 --- a/caffe2/python/pybind_state.cc +++ b/caffe2/python/pybind_state.cc @@ -328,7 +328,7 @@ void addObjectMethods(py::module& m) { }) .def( "tensor", - [](Blob* blob) { return py::cast(blob->GetMutableTensor(CPU)); }, + [](Blob* blob) { return py::cast(BlobGetMutableTensor(blob, CPU)); }, py::return_value_policy::reference_internal) .def( "_feed", diff --git a/caffe2/python/pybind_state.h b/caffe2/python/pybind_state.h index 59f39dd313032..4f81569e42936 100644 --- a/caffe2/python/pybind_state.h +++ b/caffe2/python/pybind_state.h @@ -43,7 +43,7 @@ void addObjectMethods(pybind11::module& m); // Get current workspace Workspace* GetCurrentWorkspace(); -class CAFFE2_EXPORT BlobFetcherBase { +class C10_EXPORT BlobFetcherBase { public: struct FetchedBlob { pybind11::object obj; @@ -60,7 +60,7 @@ class BlobFeederBase { Feed(const DeviceOption& option, PyArrayObject* array, Blob* blob) = 0; }; -CAFFE2_EXPORT CAFFE_DECLARE_TYPED_REGISTRY( +C10_EXPORT CAFFE_DECLARE_TYPED_REGISTRY( BlobFetcherRegistry, TypeIdentifier, BlobFetcherBase, @@ -234,7 +234,7 @@ class TensorFeeder : public BlobFeederBase { FeedTensor( option, original_array, - blob->GetMutableTensor(Context::GetDeviceType())); + BlobGetMutableTensor(blob, Context::GetDeviceType())); } }; @@ -366,31 +366,32 @@ class PythonOpBase : public Operator { // make sure output blob is initialized before creating the binding if (forced_cpu_outputs_.count(i)) { - blob->GetMutableTensor(Context::GetDeviceType()); + BlobGetMutableTensor(blob, Context::GetDeviceType()); } else { - blob->GetMutableTensor(Context::GetDeviceType()); + BlobGetMutableTensor(blob, Context::GetDeviceType()); } py::object py_obj; if (blob->template IsType()) { if (use_dlpack) { DLPackWrapper wrapper( - blob->GetMutableTensor(Context::GetDeviceType()), cpu_option); + BlobGetMutableTensor(blob, Context::GetDeviceType()), + cpu_option); py_obj = py::cast(wrapper, py::return_value_policy::copy); } else { py_obj = py::cast( - blob->GetMutableTensor(Context::GetDeviceType()), + BlobGetMutableTensor(blob, Context::GetDeviceType()), py::return_value_policy::reference); } } else { if (use_dlpack) { DLPackWrapper wrapper( - blob->GetMutableTensor(Context::GetDeviceType()), + BlobGetMutableTensor(blob, Context::GetDeviceType()), this->device_option()); py_obj = py::cast(wrapper, py::return_value_policy::copy); } else { py_obj = py::cast( - blob->GetMutableTensor(Context::GetDeviceType()), + BlobGetMutableTensor(blob, Context::GetDeviceType()), py::return_value_policy::reference); } } diff --git a/caffe2/python/pybind_state_ideep.cc b/caffe2/python/pybind_state_ideep.cc index ebad6cf8d9683..f0307f7b6485d 100644 --- a/caffe2/python/pybind_state_ideep.cc +++ b/caffe2/python/pybind_state_ideep.cc @@ -163,8 +163,8 @@ class IDeepFeeder : public BlobFeederBase { DeviceOption cpu_option(option); cpu_option.set_device_type(DeviceTypeProto::PROTO_CPU); TensorFeeder cpu_tensor_feeder; - cpu_tensor_feeder.FeedTensor(cpu_option, original_array, - blob->GetMutableTensor(CPU)); + cpu_tensor_feeder.FeedTensor( + cpu_option, original_array, BlobGetMutableTensor(blob, CPU)); } } catch (ideep::error &e) { LOG(ERROR) << "IDEEP error: " << e.message; diff --git a/caffe2/python/serialized_test/data/operator_test/string_ops_test.test_string_prefix.zip b/caffe2/python/serialized_test/data/operator_test/string_ops_test.test_string_prefix.zip deleted file mode 100644 index e4019f68dfd0e..0000000000000 Binary files a/caffe2/python/serialized_test/data/operator_test/string_ops_test.test_string_prefix.zip and /dev/null differ diff --git a/caffe2/python/serialized_test/data/operator_test/string_ops_test.test_string_prefix/inout.npz b/caffe2/python/serialized_test/data/operator_test/string_ops_test.test_string_prefix/inout.npz deleted file mode 100644 index 0dfa5f9790c01..0000000000000 Binary files a/caffe2/python/serialized_test/data/operator_test/string_ops_test.test_string_prefix/inout.npz and /dev/null differ diff --git a/caffe2/python/serialized_test/data/operator_test/string_ops_test.test_string_prefix/op.pb b/caffe2/python/serialized_test/data/operator_test/string_ops_test.test_string_prefix/op.pb deleted file mode 100644 index b1f14dad9aefd..0000000000000 Binary files a/caffe2/python/serialized_test/data/operator_test/string_ops_test.test_string_prefix/op.pb and /dev/null differ diff --git a/caffe2/python/serialized_test/data/operator_test/string_ops_test.test_string_suffix.zip b/caffe2/python/serialized_test/data/operator_test/string_ops_test.test_string_suffix.zip deleted file mode 100644 index cc60f7242ee69..0000000000000 Binary files a/caffe2/python/serialized_test/data/operator_test/string_ops_test.test_string_suffix.zip and /dev/null differ diff --git a/caffe2/python/serialized_test/data/operator_test/string_ops_test.test_string_suffix/inout.npz b/caffe2/python/serialized_test/data/operator_test/string_ops_test.test_string_suffix/inout.npz deleted file mode 100644 index 0dfa5f9790c01..0000000000000 Binary files a/caffe2/python/serialized_test/data/operator_test/string_ops_test.test_string_suffix/inout.npz and /dev/null differ diff --git a/caffe2/python/serialized_test/data/operator_test/string_ops_test.test_string_suffix/op.pb b/caffe2/python/serialized_test/data/operator_test/string_ops_test.test_string_suffix/op.pb deleted file mode 100644 index d59c513004803..0000000000000 Binary files a/caffe2/python/serialized_test/data/operator_test/string_ops_test.test_string_suffix/op.pb and /dev/null differ diff --git a/caffe2/python/serialized_test/serialized_test_util.py b/caffe2/python/serialized_test/serialized_test_util.py index feb5d8e127cb8..67081fa77d025 100644 --- a/caffe2/python/serialized_test/serialized_test_util.py +++ b/caffe2/python/serialized_test/serialized_test_util.py @@ -11,9 +11,9 @@ import inspect import numpy as np import os -import re import shutil import sys +import tempfile import threading from zipfile import ZipFile @@ -140,16 +140,15 @@ def parse_proto(x): source_dir = self.get_output_dir() test_name = self.get_output_filename() - full_dir = os.path.join(source_dir, test_name) - _prepare_dir(full_dir) + temp_dir = tempfile.mkdtemp() with ZipFile(os.path.join(source_dir, test_name + '.zip')) as z: - loaded = z.extractall(full_dir) + z.extractall(temp_dir) - op_path = os.path.join(full_dir, 'op.pb') - inout_path = os.path.join(full_dir, 'inout.npz') - loaded = np.load(inout_path, encoding='bytes') + op_path = os.path.join(temp_dir, 'op.pb') + inout_path = os.path.join(temp_dir, 'inout.npz') # load serialized input and output + loaded = np.load(inout_path, encoding='bytes') loaded_inputs = loaded['inputs'].tolist() inputs_equal = True for (x, y) in zip(inputs, loaded_inputs): @@ -157,16 +156,16 @@ def parse_proto(x): inputs_equal = False loaded_outputs = loaded['outputs'].tolist() - # load operator - with open(op_path, 'rb') as f: - loaded_op = f.read() - - op_proto = parse_proto(loaded_op) - device_type = loaded['device_type'] - device_option = caffe2_pb2.DeviceOption(device_type=int(device_type)) - # if inputs are not the same, run serialized input through serialized op if not inputs_equal: + # load operator + with open(op_path, 'rb') as f: + loaded_op = f.read() + + op_proto = parse_proto(loaded_op) + device_type = loaded['device_type'] + device_option = caffe2_pb2.DeviceOption(device_type=int(device_type)) + outputs = hu.runOpOnInput(device_option, op_proto, loaded_inputs) grad_ops = _getGradientOrNone(op_proto) @@ -176,12 +175,13 @@ def parse_proto(x): # assert gradient op is equal for i in range(len(grad_ops)): - with open(os.path.join(full_dir, 'grad_{}.pb'.format(i)), 'rb') as f: + grad_path = os.path.join(temp_dir, 'grad_{}.pb'.format(i)) + with open(grad_path, 'rb') as f: loaded_grad = f.read() grad_proto = parse_proto(loaded_grad) self.assertTrue(grad_proto == grad_ops[i]) - shutil.rmtree(full_dir) + shutil.rmtree(temp_dir) def assertSerializedOperatorChecks( self, diff --git a/caffe2/python/test_util.py b/caffe2/python/test_util.py index 769679e46f2b7..dc1f737013223 100644 --- a/caffe2/python/test_util.py +++ b/caffe2/python/test_util.py @@ -16,6 +16,28 @@ def rand_array(*dims): return np.array(np.random.rand(*dims) - 0.5).astype(np.float32) +def randBlob(name, type, *dims, **kwargs): + offset = kwargs['offset'] if 'offset' in kwargs else 0.0 + workspace.FeedBlob(name, np.random.rand(*dims).astype(type) + offset) + + +def randBlobFloat32(name, *dims, **kwargs): + randBlob(name, np.float32, *dims, **kwargs) + + +def randBlobsFloat32(names, *dims, **kwargs): + for name in names: + randBlobFloat32(name, *dims, **kwargs) + + +def str_compare(a, b, encoding="utf8"): + if isinstance(a, bytes): + a = a.decode(encoding) + if isinstance(b, bytes): + b = b.decode(encoding) + return a == b + + class TestCase(unittest.TestCase): @classmethod def setUpClass(cls): diff --git a/caffe2/python/transformations_test.py b/caffe2/python/transformations_test.py index 383b8410ea6ae..26f5450605a1c 100644 --- a/caffe2/python/transformations_test.py +++ b/caffe2/python/transformations_test.py @@ -23,39 +23,32 @@ import numpy as np from caffe2.python.transformations import Transformer -from caffe2.python import core, workspace, test_util +from caffe2.python import core, workspace +from caffe2.python import test_util as tu transformer = Transformer() -def str_compare(a, b, encoding="utf8"): - if isinstance(a, bytes): - a = a.decode(encoding) - if isinstance(b, bytes): - b = b.decode(encoding) - return a == b - - -class TestTransformations(test_util.TestCase): +class TestTransformations(tu.TestCase): def test_transformer_AddNNPACK(self): net = core.Net("net") net.Conv(["X", "w", "b"], ["Y"], stride=1, pad=0, kernel=3, order="NCHW") net.Relu(["Y"], ["Y2"]) transformer.AddNNPACK(net) - assert str_compare(net.Proto().op[0].engine, "NNPACK") + assert tu.str_compare(net.Proto().op[0].engine, "NNPACK") def test_transformer_FuseNNPACKConvRelu(self): net = core.Net("net") net.Conv(["X", "w", "b"], ["Y"], stride=1, pad=0, kernel=3, order="NCHW") net.Relu(["Y"], ["Y2"]) transformer.AddNNPACK(net) # get the NNPACK engine - assert str_compare(net.Proto().op[0].engine, "NNPACK") + assert tu.str_compare(net.Proto().op[0].engine, "NNPACK") transformer.FuseNNPACKConvRelu(net) assert len(net.Proto().op) == 1 has_activation_arg = False for arg in net.Proto().op[0].arg: - if str_compare(arg.name, "activation"): - assert str_compare(arg.s, "Relu") + if tu.str_compare(arg.name, "activation"): + assert tu.str_compare(arg.s, "Relu") has_activation_arg = True assert has_activation_arg @@ -65,12 +58,12 @@ def test_noFuseNNPACKConvRelu(self): net.Relu(["Y"], ["Y2"]) net.Relu(["Y"], ["Y3"]) transformer.AddNNPACK(net) # get the NNPACK engine - assert str_compare(net.Proto().op[0].engine, "NNPACK") + assert tu.str_compare(net.Proto().op[0].engine, "NNPACK") transformer.FuseNNPACKConvRelu(net) assert len(net.Proto().op) == 3 has_activation_arg = False for arg in net.Proto().op[0].arg: - if str_compare(arg.name, "activation") and str_compare(arg.s, "Relu"): + if tu.str_compare(arg.name, "activation") and tu.str_compare(arg.s, "Relu"): has_activation_arg = True assert not has_activation_arg @@ -79,13 +72,13 @@ def test_transformer_FuseNNPACKConvReluNoInplace(self): net.Conv(["X", "w", "b"], ["Y"], stride=1, pad=0, kernel=3, order="NCHW") net.Relu(["Y"], ["X"]) transformer.AddNNPACK(net) # get the NNPACK engine - assert str_compare(net.Proto().op[0].engine, "NNPACK") + assert tu.str_compare(net.Proto().op[0].engine, "NNPACK") transformer.FuseNNPACKConvRelu(net) assert len(net.Proto().op) == 1 has_activation_arg = False for arg in net.Proto().op[0].arg: - if str_compare(arg.name, "activation"): - assert str_compare(arg.s, "Relu") + if tu.str_compare(arg.name, "activation"): + assert tu.str_compare(arg.s, "Relu") has_activation_arg = True assert has_activation_arg assert net.Proto().op[0].output[0] != net.Proto().op[0].input[0] @@ -95,13 +88,13 @@ def test_transformer_FuseNNPACKConvReluInplaceRelu(self): net.Conv(["X", "w", "b"], ["Y"], stride=1, pad=0, kernel=3, order="NCHW") net.Relu(["Y"], ["Y"]) transformer.AddNNPACK(net) # get the NNPACK engine - assert str_compare(net.Proto().op[0].engine, "NNPACK") + assert tu.str_compare(net.Proto().op[0].engine, "NNPACK") transformer.FuseNNPACKConvRelu(net) assert len(net.Proto().op) == 1 has_activation_arg = False for arg in net.Proto().op[0].arg: - if str_compare(arg.name, "activation"): - assert str_compare(arg.s, "Relu") + if tu.str_compare(arg.name, "activation"): + assert tu.str_compare(arg.s, "Relu") has_activation_arg = True assert has_activation_arg assert net.Proto().op[0].output[0] != net.Proto().op[0].input[0] @@ -112,13 +105,13 @@ def test_transformer_FuseNNPACKConvReluPingPongNaming(self): net.Relu(["Y"], ["X"]) net.Conv(["X", "w", "b"], ["Y"], stride=1, pad=0, kernel=3, order="NCHW") transformer.AddNNPACK(net) # get the NNPACK engine - assert str_compare(net.Proto().op[0].engine, "NNPACK") + assert tu.str_compare(net.Proto().op[0].engine, "NNPACK") transformer.FuseNNPACKConvRelu(net) assert len(net.Proto().op) == 2 has_activation_arg = False for arg in net.Proto().op[0].arg: - if str_compare(arg.name, "activation"): - assert str_compare(arg.s, "Relu") + if tu.str_compare(arg.name, "activation"): + assert tu.str_compare(arg.s, "Relu") has_activation_arg = True assert has_activation_arg assert net.Proto().op[0].output[0] != net.Proto().op[0].input[0] @@ -131,13 +124,13 @@ def test_transformer_FuseNNPACKConvReluFollowedByMultipleInputOp(self): net.Conv(["Y2", "w", "b"], ["Y"], stride=1, pad=0, kernel=3, order="NCHW") net.Relu(["Y"], ["Y2"]) transformer.AddNNPACK(net) # get the NNPACK engine - assert str_compare(net.Proto().op[0].engine, "NNPACK") + assert tu.str_compare(net.Proto().op[0].engine, "NNPACK") transformer.FuseNNPACKConvRelu(net) assert len(net.Proto().op) == 2 has_activation_arg = False for arg in net.Proto().op[0].arg: - if str_compare(arg.name, "activation"): - assert str_compare(arg.s, "Relu") + if tu.str_compare(arg.name, "activation"): + assert tu.str_compare(arg.s, "Relu") has_activation_arg = True assert has_activation_arg assert net.Proto().op[0].output[0] != net.Proto().op[0].input[0] @@ -150,13 +143,13 @@ def test_transformer_FuseNNPACKConvReluInplaceFollowedByMultipleInputOp(self): net.Conv(["Y", "w", "b"], ["Y2"], stride=1, pad=0, kernel=3, order="NCHW") net.Relu(["Y2"], ["Y2"]) transformer.AddNNPACK(net) # get the NNPACK engine - assert str_compare(net.Proto().op[0].engine, "NNPACK") + assert tu.str_compare(net.Proto().op[0].engine, "NNPACK") transformer.FuseNNPACKConvRelu(net) assert len(net.Proto().op) == 2 has_activation_arg = False for arg in net.Proto().op[0].arg: - if str_compare(arg.name, "activation"): - assert str_compare(arg.s, "Relu") + if tu.str_compare(arg.name, "activation"): + assert tu.str_compare(arg.s, "Relu") has_activation_arg = True assert has_activation_arg assert net.Proto().op[0].output[0] != net.Proto().op[0].input[0] @@ -168,8 +161,8 @@ def test_transformer_SinkMaxPool(self): net.MaxPool(["Y"], ["Y1"], kernel=3) net.Relu(["Y1"], ["Y1"]) transformer.SinkMaxPool(net) - assert str_compare(net.Proto().op[1].type, "Relu") - assert str_compare(net.Proto().op[2].type, "MaxPool") + assert tu.str_compare(net.Proto().op[1].type, "Relu") + assert tu.str_compare(net.Proto().op[2].type, "MaxPool") @given( size=st.integers(7, 10), @@ -196,18 +189,16 @@ def test_transformer_FuseConvBN(self, size, input_channels, seed, order, epsilon np.random.seed(seed) if order == "NCHW": - workspace.FeedBlob("X", np.random.rand(1, c, h, w).astype(np.float32)) - workspace.FeedBlob("w", np.random.rand(c, c, k, k).astype(np.float32)) + tu.randBlobFloat32("X", 1, c, h, w) + tu.randBlobFloat32("w", c, c, k, k) else: - workspace.FeedBlob("X", np.random.rand(1, h, w, c).astype(np.float32)) - workspace.FeedBlob("w", np.random.rand(c, k, k, c).astype(np.float32)) - workspace.FeedBlob("b", np.random.rand(c).astype(np.float32)) - workspace.FeedBlob("scale", np.random.rand(c).astype(np.float32)) - workspace.FeedBlob("bias", np.random.rand(c).astype(np.float32)) - workspace.FeedBlob("mean", np.random.rand(c).astype(np.float32)) + tu.randBlobFloat32("X", 1, h, w, c) + tu.randBlobFloat32("w", c, k, k, c) + tu.randBlobsFloat32(["b", "scale", "bias", "mean"], c) + # This is necessary because 1/sqrt(var) is used and if var is too small # we get floating point artifacts that cause test failures - workspace.FeedBlob("var", np.random.rand(c).astype(np.float32) + 0.5) + tu.randBlobFloat32("var", c, offset=0.5) workspace.RunNetOnce(net) preTransformOutput = workspace.FetchBlob("Y2").flatten() workspace.FeedBlob("Y2", np.zeros((1, 1))) @@ -250,17 +241,15 @@ def test_transformer_FuseConvBNNoConvBias(self, size, input_channels, seed, orde np.random.seed(seed) if order == "NCHW": - workspace.FeedBlob("X", np.random.rand(1, c, h, w).astype(np.float32)) - workspace.FeedBlob("w", np.random.rand(c, c, k, k).astype(np.float32)) + tu.randBlobFloat32("X", 1, c, h, w) + tu.randBlobFloat32("w", c, c, k, k) else: - workspace.FeedBlob("X", np.random.rand(1, h, w, c).astype(np.float32)) - workspace.FeedBlob("w", np.random.rand(c, k, k, c).astype(np.float32)) - workspace.FeedBlob("scale", np.random.rand(c).astype(np.float32)) - workspace.FeedBlob("bias", np.random.rand(c).astype(np.float32)) - workspace.FeedBlob("mean", np.random.rand(c).astype(np.float32)) + tu.randBlobFloat32("X", 1, h, w, c) + tu.randBlobFloat32("w", c, k, k, c) + tu.randBlobsFloat32(["scale", "bias", "mean"], c) # This is necessary because 1/sqrt(var) is used and if var is too small # we get floating point artifacts that cause test failures - workspace.FeedBlob("var", np.random.rand(c).astype(np.float32) + 0.5) + tu.randBlobFloat32("var", c, offset=0.5) workspace.RunNetOnce(net) preTransformOutput = workspace.FetchBlob("Y2").flatten() workspace.FeedBlob("Y2", np.zeros((1, 1))) @@ -303,17 +292,15 @@ def test_transformer_FuseConvBNNoConvBiasDuplicatedName(self, size, input_channe np.random.seed(seed) if order == "NCHW": - workspace.FeedBlob("X", np.random.rand(1, c, h, w).astype(np.float32)) - workspace.FeedBlob("w", np.random.rand(c, c, k, k).astype(np.float32)) + tu.randBlobFloat32("X", 1, c, h, w) + tu.randBlobFloat32("w", c, c, k, k) else: - workspace.FeedBlob("X", np.random.rand(1, h, w, c).astype(np.float32)) - workspace.FeedBlob("w", np.random.rand(c, k, k, c).astype(np.float32)) - workspace.FeedBlob("scale", np.random.rand(c).astype(np.float32)) - workspace.FeedBlob("_bias0", np.random.rand(c).astype(np.float32)) - workspace.FeedBlob("mean", np.random.rand(c).astype(np.float32)) + tu.randBlobFloat32("X", 1, h, w, c) + tu.randBlobFloat32("w", c, k, k, c) + tu.randBlobsFloat32(["scale", "_bias0", "mean"], c) # This is necessary because 1/sqrt(var) is used and if var is too small # we get floating point artifacts that cause test failures - workspace.FeedBlob("var", np.random.rand(c).astype(np.float32) + 0.5) + tu.randBlobFloat32("var", c, offset=0.5) workspace.RunNetOnce(net) preTransformOutput = workspace.FetchBlob("Y2").flatten() workspace.FeedBlob("Y2", np.zeros((1, 1))) @@ -366,15 +353,12 @@ def test_transformer_FuseConv3DBN( ) np.random.seed(seed) - workspace.FeedBlob("X", np.random.rand(1, c, t, h, w).astype(np.float32)) - workspace.FeedBlob("w", np.random.rand(c, c, kt, kh, kw).astype(np.float32)) - workspace.FeedBlob("b", np.random.rand(c).astype(np.float32)) - workspace.FeedBlob("scale", np.random.rand(c).astype(np.float32)) - workspace.FeedBlob("bias", np.random.rand(c).astype(np.float32)) - workspace.FeedBlob("mean", np.random.rand(c).astype(np.float32)) + tu.randBlobFloat32("X", 1, c, t, h, w) + tu.randBlobFloat32("w", c, c, kt, kh, kw) + tu.randBlobsFloat32(["b", "scale", "bias", "mean"], c) # This is necessary because 1/sqrt(var) is used and if var is too small # we get floating point artifacts that cause test failures - workspace.FeedBlob("var", np.random.rand(c).astype(np.float32) + 0.5) + tu.randBlobFloat32("var", c, offset=0.5) workspace.RunNetOnce(net) preTransformOutput = workspace.FetchBlob("Y2").flatten() workspace.FeedBlob("Y2", np.zeros((1, 1))) diff --git a/caffe2/queue/blobs_queue_db.cc b/caffe2/queue/blobs_queue_db.cc index 06a6985848ce2..bd7795c94ad2e 100644 --- a/caffe2/queue/blobs_queue_db.cc +++ b/caffe2/queue/blobs_queue_db.cc @@ -32,7 +32,7 @@ class CreateBlobsQueueDBOp : public Operator { } private: - AT_DISABLE_COPY_AND_ASSIGN(CreateBlobsQueueDBOp); + C10_DISABLE_COPY_AND_ASSIGN(CreateBlobsQueueDBOp); }; REGISTER_CPU_OPERATOR(CreateBlobsQueueDB, CreateBlobsQueueDBOp); diff --git a/caffe2/sgd/iter_op.cc b/caffe2/sgd/iter_op.cc index df9e261f2ea7f..ac964018b99e7 100644 --- a/caffe2/sgd/iter_op.cc +++ b/caffe2/sgd/iter_op.cc @@ -1,5 +1,10 @@ #include "caffe2/sgd/iter_op.h" +#ifdef CAFFE2_USE_IDEEP +#include +#include +#endif + namespace caffe2 { void MutexSerializer::Serialize( @@ -22,6 +27,10 @@ void MutexDeserializer::Deserialize(const BlobProto& /* unused */, Blob* blob) { REGISTER_CPU_OPERATOR(Iter, IterOp); REGISTER_CPU_OPERATOR(AtomicIter, AtomicIterOp); +#ifdef CAFFE2_USE_IDEEP +REGISTER_IDEEP_OPERATOR(AtomicIter, IDEEPFallbackOp>); +#endif + REGISTER_BLOB_SERIALIZER( (TypeMeta::Id>()), MutexSerializer); diff --git a/caffe2/share/contrib/depthwise/depthwise3x3_conv_op_test.cc b/caffe2/share/contrib/depthwise/depthwise3x3_conv_op_test.cc index 4ac3524d49d8a..d102985e2fd7a 100644 --- a/caffe2/share/contrib/depthwise/depthwise3x3_conv_op_test.cc +++ b/caffe2/share/contrib/depthwise/depthwise3x3_conv_op_test.cc @@ -19,7 +19,7 @@ void AddNoiseInput( DeviceOption option; CPUContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(shape); math::RandGaussian( diff --git a/caffe2/share/contrib/nnpack/conv_op.cc b/caffe2/share/contrib/nnpack/conv_op.cc index 05c945106c52d..f11e05b67392c 100644 --- a/caffe2/share/contrib/nnpack/conv_op.cc +++ b/caffe2/share/contrib/nnpack/conv_op.cc @@ -231,11 +231,12 @@ bool NNPACKConvOp::RunOnDeviceWithOrderNCHW() { (transformedFilterSize + sizeof(float) - 1) / sizeof(float); for (auto g = 0; g < group_; g++) { - transformedFilters_[g] = ws_->CreateBlob( - "__transformed_kernel_" + - to_string(__sync_fetch_and_add( - &precomputed_transform_id, 1))) - ->GetMutableTensor(CPU); + transformedFilters_[g] = BlobGetMutableTensor( + ws_->CreateBlob( + "__transformed_kernel_" + + to_string( + __sync_fetch_and_add(&precomputed_transform_id, 1))), + CPU); transformedFilters_[g]->Resize(transformedFilterElements); status = nnp_convolution_inference( diff --git a/caffe2/share/contrib/nnpack/nnpack_test.cc b/caffe2/share/contrib/nnpack/nnpack_test.cc index 2f892118982da..10eb6348becc0 100644 --- a/caffe2/share/contrib/nnpack/nnpack_test.cc +++ b/caffe2/share/contrib/nnpack/nnpack_test.cc @@ -19,7 +19,7 @@ void AddNoiseInput( DeviceOption option; CPUContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(shape); math::RandGaussian( diff --git a/caffe2/utils/GpuDefs.cuh b/caffe2/utils/GpuDefs.cuh index cf54f9e851bfa..0f94ae9e018ba 100644 --- a/caffe2/utils/GpuDefs.cuh +++ b/caffe2/utils/GpuDefs.cuh @@ -8,7 +8,7 @@ namespace caffe2 { // Static definition of GPU warp size for unrolling and code generation #ifdef __CUDA_ARCH__ -#if __CUDA_ARCH__ <= 700 +#if __CUDA_ARCH__ <= 750 constexpr int kWarpSize = 32; #else #error Unknown __CUDA_ARCH__; please define parameters for compute capability diff --git a/caffe2/utils/hip/math_blas_hip_test.cc b/caffe2/utils/hip/math_blas_hip_test.cc index 911c2b09868fc..a5df5900ee23a 100644 --- a/caffe2/utils/hip/math_blas_hip_test.cc +++ b/caffe2/utils/hip/math_blas_hip_test.cc @@ -26,13 +26,13 @@ TEST(MathROCBLASTest, GemmNoTransNoTrans) { vector shapeX{5, 10}; vector shapeW{10, 6}; vector shapeY{5, 6}; - auto* tensorX = blobX->GetMutableTensor(HIP); + auto* tensorX = BlobGetMutableTensor(blobX, HIP); tensorX->Resize(shapeX); - auto* tensorW = blobW->GetMutableTensor(HIP); + auto* tensorW = BlobGetMutableTensor(blobW, HIP); tensorW->Resize(shapeW); - auto* tensorY = blobY->GetMutableTensor(HIP); + auto* tensorY = BlobGetMutableTensor(blobY, HIP); tensorY->Resize(shapeY); - auto* tensorY_host = blobY_host->GetMutableTensor(CPU); + auto* tensorY_host = BlobGetMutableTensor(blobY_host, CPU); tensorY_host->Resize(shapeY); EXPECT_EQ(tensorX->size(), 50); @@ -126,13 +126,13 @@ TEST(MathROCBLASTest, GemmNoTransTrans) { vector shapeX{5, 10}; vector shapeW{6, 10}; vector shapeY{5, 6}; - auto* tensorX = blobX->GetMutableTensor(HIP); + auto* tensorX = BlobGetMutableTensor(blobX, HIP); tensorX->Resize(shapeX); - auto* tensorW = blobW->GetMutableTensor(HIP); + auto* tensorW = BlobGetMutableTensor(blobW, HIP); tensorW->Resize(shapeW); - auto* tensorY = blobY->GetMutableTensor(HIP); + auto* tensorY = BlobGetMutableTensor(blobY, HIP); tensorY->Resize(shapeY); - auto* tensorY_host = blobY_host->GetMutableTensor(CPU); + auto* tensorY_host = BlobGetMutableTensor(blobY_host, CPU); tensorY_host->Resize(shapeY); EXPECT_EQ(tensorX->size(), 50); @@ -225,13 +225,13 @@ TEST(MathROCBLASTest, GemvNoTrans) { vector shapeA{5, 10}; vector shapeX{10}; vector shapeY{5}; - auto* tensorA = blobA->GetMutableTensor(HIP); + auto* tensorA = BlobGetMutableTensor(blobA, HIP); tensorA->Resize(shapeA); - auto* tensorX = blobX->GetMutableTensor(HIP); + auto* tensorX = BlobGetMutableTensor(blobX, HIP); tensorX->Resize(shapeX); - auto* tensorY = blobY->GetMutableTensor(HIP); + auto* tensorY = BlobGetMutableTensor(blobY, HIP); tensorY->Resize(shapeY); - auto* tensorY_host = blobY_host->GetMutableTensor(CPU); + auto* tensorY_host = BlobGetMutableTensor(blobY_host, CPU); tensorY_host->Resize(shapeY); EXPECT_EQ(tensorA->size(), 50); @@ -315,13 +315,13 @@ TEST(MathROCBLASTest, GemvTrans) { vector shapeA{6, 10}; vector shapeX{6}; vector shapeY{10}; - auto* tensorA = blobA->GetMutableTensor(HIP); + auto* tensorA = BlobGetMutableTensor(blobA, HIP); tensorA->Resize(shapeA); - auto* tensorX = blobX->GetMutableTensor(HIP); + auto* tensorX = BlobGetMutableTensor(blobX, HIP); tensorX->Resize(shapeX); - auto* tensorY = blobY->GetMutableTensor(HIP); + auto* tensorY = BlobGetMutableTensor(blobY, HIP); tensorY->Resize(shapeY); - auto* tensorY_host = blobY_host->GetMutableTensor(CPU); + auto* tensorY_host = BlobGetMutableTensor(blobY_host, CPU); tensorY_host->Resize(shapeY); EXPECT_EQ(tensorA->size(), 60); diff --git a/caffe2/utils/math_cpu.cc b/caffe2/utils/math_cpu.cc index 18e20e4fa4141..e770bcfd9afae 100644 --- a/caffe2/utils/math_cpu.cc +++ b/caffe2/utils/math_cpu.cc @@ -76,7 +76,7 @@ namespace math { // (transpose) if the argument TransA or TransB is set to CblasNoTrans or // CblasTrans, respectively, for each of A and B. template <> -CAFFE2_EXPORT void Gemm( +C10_EXPORT void Gemm( const CBLAS_TRANSPOSE trans_A, const CBLAS_TRANSPOSE trans_B, const int M, @@ -134,7 +134,7 @@ CAFFE2_EXPORT void Gemm( } template <> -CAFFE2_EXPORT void GemmEx( +C10_EXPORT void GemmEx( const CBLAS_TRANSPOSE trans_A, const CBLAS_TRANSPOSE trans_B, const int M, @@ -206,7 +206,7 @@ CAFFE2_EXPORT void GemmEx( } template <> -CAFFE2_EXPORT void Gemv( +C10_EXPORT void Gemv( const CBLAS_TRANSPOSE trans_A, const int M, const int N, @@ -245,7 +245,7 @@ CAFFE2_EXPORT void Gemv( #define CAFFE2_SPECIALIZED_DOT(T) \ template <> \ - CAFFE2_EXPORT void Dot( \ + C10_EXPORT void Dot( \ const int N, const T* a, const T* b, T* y, CPUContext* context) { \ *y = ConstEigenVectorMap(a, N).dot(ConstEigenVectorMap(b, N)); \ } @@ -254,12 +254,12 @@ CAFFE2_SPECIALIZED_DOT(float) #define CAFFE2_SPECIALIZED_AXPY(T) \ template <> \ - CAFFE2_EXPORT void Axpy( \ + C10_EXPORT void Axpy( \ const int N, const T alpha, const T* x, T* Y, CPUContext* context) { \ EigenVectorMap(Y, N) += ConstEigenVectorMap(x, N) * alpha; \ } \ template <> \ - CAFFE2_EXPORT void Axpy( \ + C10_EXPORT void Axpy( \ const int N, const T* alpha, const T* x, T* Y, CPUContext* context) { \ EigenVectorMap(Y, N) += ConstEigenVectorMap(x, N) * (*alpha); \ } @@ -268,7 +268,7 @@ CAFFE2_SPECIALIZED_AXPY(float) #define CAFFE2_SPECIALIZED_AXPBY(T) \ template <> \ - CAFFE2_EXPORT void Axpby( \ + C10_EXPORT void Axpby( \ const int N, \ const T alpha, \ const T* x, \ @@ -279,7 +279,7 @@ CAFFE2_SPECIALIZED_AXPY(float) y_arr = y_arr * beta + ConstEigenVectorArrayMap(x, N) * alpha; \ } \ template <> \ - CAFFE2_EXPORT void Axpby( \ + C10_EXPORT void Axpby( \ const int N, \ const T* alpha, \ const T* x, \ @@ -295,7 +295,7 @@ CAFFE2_SPECIALIZED_AXPBY(float) #else // CAFFE2_USE_EIGEN_FOR_BLAS template <> -CAFFE2_EXPORT void Gemm( +C10_EXPORT void Gemm( const CBLAS_TRANSPOSE trans_A, const CBLAS_TRANSPOSE trans_B, const int M, @@ -328,7 +328,7 @@ CAFFE2_EXPORT void Gemm( } template <> -CAFFE2_EXPORT void GemmEx( +C10_EXPORT void GemmEx( const CBLAS_TRANSPOSE trans_A, const CBLAS_TRANSPOSE trans_B, const int M, @@ -361,7 +361,7 @@ CAFFE2_EXPORT void GemmEx( } template <> -CAFFE2_EXPORT void Gemv( +C10_EXPORT void Gemv( const CBLAS_TRANSPOSE trans_A, const int M, const int N, @@ -377,7 +377,7 @@ CAFFE2_EXPORT void Gemv( #define CAFFE2_SPECIALIZED_SCALE(TAlpha, TData, prefix) \ template <> \ - CAFFE2_EXPORT void Scale( \ + C10_EXPORT void Scale( \ const int n, \ const TAlpha alpha, \ const TData* x, \ @@ -391,7 +391,7 @@ CAFFE2_EXPORT void Gemv( } \ } \ template <> \ - CAFFE2_EXPORT void Scale( \ + C10_EXPORT void Scale( \ const int n, \ const TAlpha* alpha, \ const TData* x, \ @@ -411,7 +411,7 @@ CAFFE2_SPECIALIZED_SCALE(float, double, d) #define CAFFE2_SPECIALIZED_DOT(T, prefix) \ template <> \ - CAFFE2_EXPORT void Dot( \ + C10_EXPORT void Dot( \ const int N, const T* a, const T* b, T* y, CPUContext*) { \ *y = cblas_##prefix##dot(N, a, 1, b, 1); \ } @@ -420,12 +420,12 @@ CAFFE2_SPECIALIZED_DOT(float, s) #define CAFFE2_SPECIALIZED_AXPY(T, prefix) \ template <> \ - CAFFE2_EXPORT void Axpy( \ + C10_EXPORT void Axpy( \ const int N, const T alpha, const T* x, T* y, CPUContext*) { \ cblas_##prefix##axpy(N, alpha, x, 1, y, 1); \ } \ template <> \ - CAFFE2_EXPORT void Axpy( \ + C10_EXPORT void Axpy( \ const int N, const T* alpha, const T* x, T* y, CPUContext*) { \ cblas_##prefix##axpy(N, *alpha, x, 1, y, 1); \ } @@ -437,7 +437,7 @@ CAFFE2_SPECIALIZED_AXPY(float, s) #ifdef CAFFE2_USE_MKL #define CAFFE2_SPECIALIZED_AXPBY(T, prefix) \ template <> \ - CAFFE2_EXPORT void Axpby( \ + C10_EXPORT void Axpby( \ const int N, \ const T alpha, \ const T* x, \ @@ -447,7 +447,7 @@ CAFFE2_SPECIALIZED_AXPY(float, s) cblas_##prefix##axpby(N, alpha, x, 1, beta, y, 1); \ } \ template <> \ - CAFFE2_EXPORT void Axpby( \ + C10_EXPORT void Axpby( \ const int N, \ const T* alpha, \ const T* x, \ @@ -459,7 +459,7 @@ CAFFE2_SPECIALIZED_AXPY(float, s) #else // CAFFE2_USE_MKL #define CAFFE2_SPECIALIZED_AXPBY(T, prefix) \ template <> \ - CAFFE2_EXPORT void Axpby( \ + C10_EXPORT void Axpby( \ const int N, \ const T alpha, \ const T* x, \ @@ -470,7 +470,7 @@ CAFFE2_SPECIALIZED_AXPY(float, s) cblas_##prefix##axpy(N, alpha, x, 1, y, 1); \ } \ template <> \ - CAFFE2_EXPORT void Axpby( \ + C10_EXPORT void Axpby( \ const int N, \ const T* alpha, \ const T* x, \ @@ -488,7 +488,7 @@ CAFFE2_SPECIALIZED_AXPBY(float, s) #define CAFFE2_SPECIALIZED_SCALE(TAlpha, TData) \ template <> \ - CAFFE2_EXPORT void Scale( \ + C10_EXPORT void Scale( \ const int n, \ const TAlpha alpha, \ const TData* x, \ @@ -498,7 +498,7 @@ CAFFE2_SPECIALIZED_AXPBY(float, s) ConstEigenVectorMap(x, n) * static_cast(alpha); \ } \ template <> \ - CAFFE2_EXPORT void Scale( \ + C10_EXPORT void Scale( \ const int n, \ const TAlpha* alpha, \ const TData* x, \ @@ -517,7 +517,7 @@ CAFFE2_SPECIALIZED_SCALE(std::int64_t, std::int64_t) #undef CAFFE2_SPECIALIZED_SCALE template <> -CAFFE2_EXPORT void GemmBatched( +C10_EXPORT void GemmBatched( const CBLAS_TRANSPOSE trans_A, const CBLAS_TRANSPOSE trans_B, const int batch_size, @@ -563,7 +563,7 @@ CAFFE2_EXPORT void GemmBatched( } template <> -CAFFE2_EXPORT void GemmStridedBatched( +C10_EXPORT void GemmStridedBatched( const CBLAS_TRANSPOSE trans_A, const CBLAS_TRANSPOSE trans_B, const int batch_size, @@ -632,10 +632,11 @@ CAFFE2_EXPORT void GemmStridedBatched( //////////////////////////////////////////////////////////////////////////////// #ifdef CAFFE2_USE_MKL -#define DELEGATE_SIMPLE_UNARY_FUNCTION(T, Funcname, OriginalFunc, ...) \ - template <> \ - CAFFE2_EXPORT void Funcname(const int N, const T* x, T* y, CPUContext*) { \ - OriginalFunc(N, x, y, ##__VA_ARGS__); \ +#define DELEGATE_SIMPLE_UNARY_FUNCTION(T, Funcname, OriginalFunc, ...) \ + template <> \ + C10_EXPORT void Funcname( \ + const int N, const T* x, T* y, CPUContext*) { \ + OriginalFunc(N, x, y, ##__VA_ARGS__); \ } DELEGATE_SIMPLE_UNARY_FUNCTION( float, @@ -683,7 +684,7 @@ DELEGATE_SIMPLE_UNARY_FUNCTION(double, Inv, vdInv) #define DELEGATE_SINCOS_FUNCTION(T, OriginalFunc) \ template <> \ - CAFFE2_EXPORT void SinCos( \ + C10_EXPORT void SinCos( \ const int N, const T* a, T* ys, T* yc, CPUContext*) { \ OriginalFunc(N, a, ys, yc); \ } @@ -691,10 +692,11 @@ DELEGATE_SINCOS_FUNCTION(float, vsSinCos) DELEGATE_SINCOS_FUNCTION(double, vdSinCos) #undef DELEGATE_SINCOS_FUNCTION -#define DELEGATE_POWX_FUNCTION(T, OriginalFunc) \ - template <> \ - CAFFE2_EXPORT void Powx(const int N, const T* a, T b, T* y, CPUContext*) { \ - OriginalFunc(N, a, b, y); \ +#define DELEGATE_POWX_FUNCTION(T, OriginalFunc) \ + template <> \ + C10_EXPORT void Powx( \ + const int N, const T* a, T b, T* y, CPUContext*) { \ + OriginalFunc(N, a, b, y); \ } DELEGATE_POWX_FUNCTION(float, vsPowx) DELEGATE_POWX_FUNCTION(double, vdPowx) @@ -702,7 +704,7 @@ DELEGATE_POWX_FUNCTION(double, vdPowx) #define DELEGATE_SIMPLE_BINARY_FUNCTION(T, Func, FuncImpl) \ template <> \ - CAFFE2_EXPORT void Func( \ + C10_EXPORT void Func( \ const int N, const T* A, const T* B, T* C, CPUContext*) { \ FuncImpl(N, A, B, C); \ } @@ -718,10 +720,11 @@ DELEGATE_SIMPLE_BINARY_FUNCTION(double, Div, vdDiv) #else // CAFFE2_USE_MKL -#define DELEGATE_SIMPLE_UNARY_FUNCTION(T, Funcname, expr) \ - template <> \ - CAFFE2_EXPORT void Funcname(const int N, const T* x, T* y, CPUContext*) { \ - EigenVectorMap(y, N) = ConstEigenVectorArrayMap(x, N).expr(); \ +#define DELEGATE_SIMPLE_UNARY_FUNCTION(T, Funcname, expr) \ + template <> \ + C10_EXPORT void Funcname( \ + const int N, const T* x, T* y, CPUContext*) { \ + EigenVectorMap(y, N) = ConstEigenVectorArrayMap(x, N).expr(); \ } DELEGATE_SIMPLE_UNARY_FUNCTION(float, Exp, exp) DELEGATE_SIMPLE_UNARY_FUNCTION(double, Exp, exp) @@ -750,7 +753,7 @@ DELEGATE_SIMPLE_UNARY_FUNCTION(double, Rsqrt, rsqrt) #define DELEGATE_SINCOS_FUNCTION(T) \ template <> \ - CAFFE2_EXPORT void SinCos( \ + C10_EXPORT void SinCos( \ const int N, const T* x, T* ys, T* yc, CPUContext*) { \ EigenVectorMap(ys, N) = ConstEigenVectorArrayMap(x, N).sin(); \ EigenVectorMap(yc, N) = ConstEigenVectorArrayMap(x, N).cos(); \ @@ -761,7 +764,8 @@ DELEGATE_SINCOS_FUNCTION(double) #define DELEGATE_TANH_FUNCTION(T) \ template <> \ - CAFFE2_EXPORT void Tanh(const int N, const T* X, T* Y, CPUContext*) { \ + C10_EXPORT void Tanh( \ + const int N, const T* X, T* Y, CPUContext*) { \ EigenVectorMap(Y, N) = T(1) - \ ((ConstEigenVectorArrayMap(X, N) * T(2)).exp() + T(1)).inverse() * \ T(2); \ @@ -770,10 +774,11 @@ DELEGATE_TANH_FUNCTION(float) DELEGATE_TANH_FUNCTION(double) #undef DELEGATE_TANH_FUNCTION -#define DELEGATE_CBRT_FUNCTION(T) \ - template <> \ - CAFFE2_EXPORT void Cbrt(const int N, const T* X, T* Y, CPUContext*) { \ - std::transform(X, X + N, Y, [](const T x) { return cbrt(x); }); \ +#define DELEGATE_CBRT_FUNCTION(T) \ + template <> \ + C10_EXPORT void Cbrt( \ + const int N, const T* X, T* Y, CPUContext*) { \ + std::transform(X, X + N, Y, [](const T x) { return cbrt(x); }); \ } DELEGATE_CBRT_FUNCTION(float) DELEGATE_CBRT_FUNCTION(double) @@ -781,28 +786,30 @@ DELEGATE_CBRT_FUNCTION(double) #define DELEGATE_POWX_FUNCTION(T) \ template <> \ - CAFFE2_EXPORT void Powx( \ + C10_EXPORT void Powx( \ const int N, const T* a, const T b, T* y, CPUContext*) { \ EigenVectorMap(y, N) = ConstEigenVectorArrayMap(a, N).pow(b); \ } DELEGATE_POWX_FUNCTION(float) #undef DELEGATE_POWX_FUNCTION -#define DELEGATE_SINH_FUNCTION(T) \ - template <> \ - CAFFE2_EXPORT void Sinh(const int N, const T* X, T* Y, CPUContext*) { \ - ConstEigenVectorArrayMap X_arr(X, N); \ - EigenVectorMap(Y, N) = (X_arr.exp() - (-X_arr).exp()) / 2; \ +#define DELEGATE_SINH_FUNCTION(T) \ + template <> \ + C10_EXPORT void Sinh( \ + const int N, const T* X, T* Y, CPUContext*) { \ + ConstEigenVectorArrayMap X_arr(X, N); \ + EigenVectorMap(Y, N) = (X_arr.exp() - (-X_arr).exp()) / 2; \ } DELEGATE_SINH_FUNCTION(float) DELEGATE_SINH_FUNCTION(double) #undef DELEGATE_SINH_FUNCTION -#define DELEGATE_COSH_FUNCTION(T) \ - template <> \ - CAFFE2_EXPORT void Cosh(const int N, const T* X, T* Y, CPUContext*) { \ - ConstEigenVectorArrayMap X_arr(X, N); \ - EigenVectorMap(Y, N) = (X_arr.exp() + (-X_arr).exp()) / 2; \ +#define DELEGATE_COSH_FUNCTION(T) \ + template <> \ + C10_EXPORT void Cosh( \ + const int N, const T* X, T* Y, CPUContext*) { \ + ConstEigenVectorArrayMap X_arr(X, N); \ + EigenVectorMap(Y, N) = (X_arr.exp() + (-X_arr).exp()) / 2; \ } DELEGATE_COSH_FUNCTION(float) DELEGATE_COSH_FUNCTION(double) @@ -810,7 +817,8 @@ DELEGATE_COSH_FUNCTION(double) #define DELEGATE_INV_FUNCTION(T) \ template <> \ - CAFFE2_EXPORT void Inv(const int N, const T* x, T* y, CPUContext*) { \ + C10_EXPORT void Inv( \ + const int N, const T* x, T* y, CPUContext*) { \ EigenVectorMap(y, N) = ConstEigenVectorArrayMap(x, N).inverse(); \ } DELEGATE_INV_FUNCTION(float) @@ -819,10 +827,11 @@ DELEGATE_INV_FUNCTION(double) #endif // CAFFE2_USE_MKL -#define DELEGATE_NEG_FUNCTION(T) \ - template <> \ - CAFFE2_EXPORT void Neg(const int N, const T* x, T* y, CPUContext*) { \ - EigenVectorMap(y, N) = -ConstEigenVectorMap(x, N); \ +#define DELEGATE_NEG_FUNCTION(T) \ + template <> \ + C10_EXPORT void Neg( \ + const int N, const T* x, T* y, CPUContext*) { \ + EigenVectorMap(y, N) = -ConstEigenVectorMap(x, N); \ } DELEGATE_NEG_FUNCTION(float) DELEGATE_NEG_FUNCTION(double) @@ -830,10 +839,11 @@ DELEGATE_NEG_FUNCTION(std::int32_t) DELEGATE_NEG_FUNCTION(std::int64_t) #undef DELEGATE_NEG_FUNCTION -#define DELEGATE_SIGN_FUNCTION(T) \ - template <> \ - CAFFE2_EXPORT void Sign(const int N, const T* x, T* y, CPUContext*) { \ - EigenVectorMap(y, N) = ConstEigenVectorArrayMap(x, N).sign(); \ +#define DELEGATE_SIGN_FUNCTION(T) \ + template <> \ + C10_EXPORT void Sign( \ + const int N, const T* x, T* y, CPUContext*) { \ + EigenVectorMap(y, N) = ConstEigenVectorArrayMap(x, N).sign(); \ } DELEGATE_SIGN_FUNCTION(float) DELEGATE_SIGN_FUNCTION(double) @@ -841,10 +851,11 @@ DELEGATE_SIGN_FUNCTION(std::int32_t) DELEGATE_SIGN_FUNCTION(std::int64_t) #undef DELEGATE_SIGN_FUNCTION -#define DELEGATE_ABS_FUNCTION(T) \ - template <> \ - CAFFE2_EXPORT void Abs(const int N, const T* x, T* y, CPUContext*) { \ - EigenVectorMap(y, N) = ConstEigenVectorArrayMap(x, N).abs(); \ +#define DELEGATE_ABS_FUNCTION(T) \ + template <> \ + C10_EXPORT void Abs( \ + const int N, const T* x, T* y, CPUContext*) { \ + EigenVectorMap(y, N) = ConstEigenVectorArrayMap(x, N).abs(); \ } #ifndef CAFFE2_USE_MKL DELEGATE_ABS_FUNCTION(float) @@ -854,10 +865,11 @@ DELEGATE_ABS_FUNCTION(std::int32_t) DELEGATE_ABS_FUNCTION(std::int64_t) #undef DELEGATE_ABS_FUNCTION -#define DELEGATE_CUBE_FUNCTION(T) \ - template <> \ - CAFFE2_EXPORT void Cube(const int N, const T* X, T* Y, CPUContext*) { \ - EigenVectorMap(Y, N) = ConstEigenVectorArrayMap(X, N).cube(); \ +#define DELEGATE_CUBE_FUNCTION(T) \ + template <> \ + C10_EXPORT void Cube( \ + const int N, const T* X, T* Y, CPUContext*) { \ + EigenVectorMap(Y, N) = ConstEigenVectorArrayMap(X, N).cube(); \ } DELEGATE_CUBE_FUNCTION(float) DELEGATE_CUBE_FUNCTION(double) @@ -867,7 +879,7 @@ DELEGATE_CUBE_FUNCTION(std::int64_t) #define EIGEN_SIMPLE_BINARY_FUNCTION(T, Func, expr) \ template <> \ - CAFFE2_EXPORT void Func( \ + C10_EXPORT void Func( \ const int N, const T* A, const T* B, T* C, CPUContext*) { \ EigenVectorMap(C, N) = ConstEigenVectorArrayMap(A, N) \ expr ConstEigenVectorArrayMap(B, N); \ @@ -903,19 +915,20 @@ DEFINE_SIMPLE_BINARY_FUNCTION(Div, /) // Eigen or via custom code. //////////////////////////////////////////////////////////////////////////////// -#define CAFFE2_SPECIALIZED_SET(T) \ - template <> \ - CAFFE2_EXPORT void Set(const size_t N, const T alpha, T* Y, CPUContext*) { \ - if (N == 0) { \ - return; \ - } \ - if (alpha == (T)0) { \ - if (Y != nullptr) { \ - std::memset(Y, 0, N * sizeof(T)); \ - } \ - } else { \ - EigenVectorMap(Y, N).setConstant(alpha); \ - } \ +#define CAFFE2_SPECIALIZED_SET(T) \ + template <> \ + C10_EXPORT void Set( \ + const size_t N, const T alpha, T* Y, CPUContext*) { \ + if (N == 0) { \ + return; \ + } \ + if (alpha == (T)0) { \ + if (Y != nullptr) { \ + std::memset(Y, 0, N * sizeof(T)); \ + } \ + } else { \ + EigenVectorMap(Y, N).setConstant(alpha); \ + } \ } CAFFE2_SPECIALIZED_SET(float); @@ -932,7 +945,7 @@ CAFFE2_SPECIALIZED_SET(uint16_t); #define CAFFE2_SPECIALIZED_REDUCEMIN(T) \ template <> \ - CAFFE2_EXPORT void ReduceMin( \ + C10_EXPORT void ReduceMin( \ const int N, \ const T* x, \ T* y, \ @@ -945,7 +958,7 @@ CAFFE2_SPECIALIZED_REDUCEMIN(float) #define CAFFE2_SPECIALIZED_REDUCEMAX(T) \ template <> \ - CAFFE2_EXPORT void ReduceMax( \ + C10_EXPORT void ReduceMax( \ const int N, \ const T* x, \ T* y, \ @@ -991,7 +1004,7 @@ struct SquaredL2NormFunctor { #define DELEGATE_ROWWISE_REDUCE_FUNCTION(Func, EigenOp) \ template \ - CAFFE2_EXPORT void Rowwise##Func( \ + C10_EXPORT void Rowwise##Func( \ const int rows, const int cols, const T alpha, const T* X, T* Y) { \ EigenVectorMap(Y, rows) = \ ConstEigenMatrixMap(X, cols, rows).colwise().EigenOp() * alpha; \ @@ -1006,7 +1019,7 @@ DELEGATE_ROWWISE_REDUCE_FUNCTION(ReduceL2, norm) #define DELEGATE_COLWISE_REDUCE_FUNCTION(Func, EigenOp) \ template \ - CAFFE2_EXPORT void Colwise##Func( \ + C10_EXPORT void Colwise##Func( \ const int rows, const int cols, const T alpha, const T* X, T* Y) { \ EigenVectorMap(Y, cols) = \ ConstEigenMatrixMap(X, cols, rows).rowwise().EigenOp() * alpha; \ @@ -1020,7 +1033,7 @@ DELEGATE_COLWISE_REDUCE_FUNCTION(ReduceL2, norm) #undef DELEGATE_COLWISE_REDUCE_FUNCTION template -CAFFE2_EXPORT void BothEndsReduceMin( +C10_EXPORT void BothEndsReduceMin( const int pre, const int mid, const int nxt, @@ -1044,7 +1057,7 @@ CAFFE2_EXPORT void BothEndsReduceMin( } template -CAFFE2_EXPORT void BothEndsReduceMax( +C10_EXPORT void BothEndsReduceMax( const int pre, const int mid, const int nxt, @@ -1066,7 +1079,7 @@ CAFFE2_EXPORT void BothEndsReduceMax( } template -CAFFE2_EXPORT void BothEndsReduceSum( +C10_EXPORT void BothEndsReduceSum( const int pre, const int mid, const int nxt, @@ -1087,7 +1100,7 @@ CAFFE2_EXPORT void BothEndsReduceSum( } template -CAFFE2_EXPORT void BothEndsReduceMean( +C10_EXPORT void BothEndsReduceMean( const int pre, const int mid, const int nxt, @@ -1108,7 +1121,7 @@ CAFFE2_EXPORT void BothEndsReduceMean( } template -CAFFE2_EXPORT void BothEndsReduceL1( +C10_EXPORT void BothEndsReduceL1( const int pre, const int mid, const int nxt, @@ -1135,7 +1148,7 @@ CAFFE2_EXPORT void BothEndsReduceL1( } template -CAFFE2_EXPORT void BothEndsReduceL2( +C10_EXPORT void BothEndsReduceL2( const int pre, const int mid, const int nxt, @@ -1155,7 +1168,7 @@ CAFFE2_EXPORT void BothEndsReduceL2( } template -CAFFE2_EXPORT void ReduceTensor( +C10_EXPORT void ReduceTensor( const int ndim, const int* X_dims, const int* Y_dims, @@ -1183,7 +1196,7 @@ CAFFE2_EXPORT void ReduceTensor( #define DELEGATE_REDUCE_FUNCTION(T, Func, reducer, init, is_norm) \ template <> \ - CAFFE2_EXPORT void Func( \ + C10_EXPORT void Func( \ const int num_dims, \ const int* dims, \ const int num_axes, \ @@ -1325,7 +1338,7 @@ DELEGATE_REDUCE_FUNCTION( #define CAFFE2_SPECIALIZED_REDUCE_MEAN(T) \ template <> \ - CAFFE2_EXPORT void ReduceMean( \ + C10_EXPORT void ReduceMean( \ const int num_dims, \ const int* dims, \ const int num_axes, \ @@ -1392,7 +1405,7 @@ CAFFE2_SPECIALIZED_REDUCE_MEAN(double) #define CAFFE2_SPECIALIZED_REDUCE_L2(T) \ template <> \ - CAFFE2_EXPORT void ReduceL2( \ + C10_EXPORT void ReduceL2( \ const int num_dims, \ const int* dims, \ const int num_axes, \ @@ -1462,7 +1475,7 @@ CAFFE2_SPECIALIZED_REDUCE_L2(double) namespace { template -CAFFE2_EXPORT void BroadcastImpl( +C10_EXPORT void BroadcastImpl( const int X_ndim, const int* X_dims, const int Y_ndim, @@ -1495,7 +1508,7 @@ CAFFE2_EXPORT void BroadcastImpl( #define CAFFE2_SPECIALIZED_BROADCAST(T) \ template <> \ - CAFFE2_EXPORT void Broadcast( \ + C10_EXPORT void Broadcast( \ const int X_ndim, \ const int* X_dims, \ const int Y_ndim, \ @@ -1515,7 +1528,7 @@ CAFFE2_SPECIALIZED_BROADCAST(double) namespace { template -CAFFE2_EXPORT void RowwiseMoments( +C10_EXPORT void RowwiseMoments( const int rows, const int cols, const T* X, @@ -1529,7 +1542,7 @@ CAFFE2_EXPORT void RowwiseMoments( } template -CAFFE2_EXPORT void ColwiseMoments( +C10_EXPORT void ColwiseMoments( const int rows, const int cols, const T* X, @@ -1551,7 +1564,7 @@ CAFFE2_EXPORT void ColwiseMoments( } template -CAFFE2_EXPORT void BothEndsMoments( +C10_EXPORT void BothEndsMoments( const int pre, const int mid, const int nxt, @@ -1576,7 +1589,7 @@ CAFFE2_EXPORT void BothEndsMoments( } template -CAFFE2_EXPORT void MomentsImpl( +C10_EXPORT void MomentsImpl( const int num_dims, const int* dims, const int num_axes, @@ -1643,7 +1656,7 @@ CAFFE2_EXPORT void MomentsImpl( #define CAFFE2_SPECIALIZED_MOMENTS(T) \ template <> \ - CAFFE2_EXPORT void Moments( \ + C10_EXPORT void Moments( \ const int num_dims, \ const int* dims, \ const int num_axes, \ @@ -1674,7 +1687,7 @@ CAFFE2_SPECIALIZED_INV_STD(float) #define CAFFE2_SPECIALIZED_ROWWISEMAX(T) \ template <> \ - CAFFE2_EXPORT void RowwiseMax( \ + C10_EXPORT void RowwiseMax( \ const int N, const int D, const T* x, T* y, CPUContext*) { \ EigenVectorMap(y, N) = \ ConstEigenMatrixMap(x, D, N).colwise().maxCoeff(); \ @@ -1684,7 +1697,7 @@ CAFFE2_SPECIALIZED_ROWWISEMAX(float) #define CAFFE2_SPECIALIZED_COLWISEMAX(T) \ template <> \ - CAFFE2_EXPORT void ColwiseMax( \ + C10_EXPORT void ColwiseMax( \ const int N, const int D, const T* x, T* y, CPUContext*) { \ EigenVectorMap(y, D) = \ ConstEigenMatrixMap(x, D, N).rowwise().maxCoeff(); \ @@ -1694,7 +1707,7 @@ CAFFE2_SPECIALIZED_COLWISEMAX(float) #define CAFFE2_SPECIALIZED_ELEMWISEMAX(T) \ template <> \ - CAFFE2_EXPORT void ElemwiseMax( \ + C10_EXPORT void ElemwiseMax( \ const int N, const T* x, const T* y, T* z, CPUContext* /*context*/) { \ std::transform(x, x + N, y, z, [](const T& x_i, const T& y_i) { \ return std::max(x_i, y_i); \ @@ -1705,7 +1718,7 @@ CAFFE2_SPECIALIZED_ELEMWISEMAX(float) #define CAFFE2_SPECIALIZED_MAXIMUM(T) \ template <> \ - CAFFE2_EXPORT void Maximum( \ + C10_EXPORT void Maximum( \ const int N, const float alpha, const T* x, T* y, CPUContext* context) { \ std::transform( \ x, x + N, y, [&alpha](const T& x_i) { return std::max(x_i, alpha); }); \ @@ -1718,7 +1731,7 @@ CAFFE2_SPECIALIZED_MAXIMUM(float) #define DELEGATE_EIGEN_2D_BROADCAST_1ST_BINARY_FUNCTION(T, Func, expr) \ template <> \ - CAFFE2_EXPORT void Rowwise##Func( \ + C10_EXPORT void Rowwise##Func( \ const int rows, \ const int cols, \ const T* A, \ @@ -1735,7 +1748,7 @@ CAFFE2_SPECIALIZED_MAXIMUM(float) } \ } \ template <> \ - CAFFE2_EXPORT void Colwise##Func( \ + C10_EXPORT void Colwise##Func( \ const int rows, \ const int cols, \ const T* A, \ @@ -1755,7 +1768,7 @@ CAFFE2_SPECIALIZED_MAXIMUM(float) #define DELEGATE_EIGEN_2D_BROADCAST_2ND_BINARY_FUNCTION(T, Func, expr) \ template <> \ - CAFFE2_EXPORT void Rowwise##Func( \ + C10_EXPORT void Rowwise##Func( \ const int rows, \ const int cols, \ const T* A, \ @@ -1772,7 +1785,7 @@ CAFFE2_SPECIALIZED_MAXIMUM(float) } \ } \ template <> \ - CAFFE2_EXPORT void Colwise##Func( \ + C10_EXPORT void Colwise##Func( \ const int rows, \ const int cols, \ const T* A, \ @@ -1808,7 +1821,7 @@ DEFINE_EIGEN_2D_BROADCAST_BINARY_FUNCTION(Mul, *) #define DEFINE_EIGEN_2D_BROADCAST_SUB_FUNCTION(T) \ template <> \ - CAFFE2_EXPORT void RowwiseSub( \ + C10_EXPORT void RowwiseSub( \ const int rows, \ const int cols, \ const T* A, \ @@ -1820,7 +1833,7 @@ DEFINE_EIGEN_2D_BROADCAST_BINARY_FUNCTION(Mul, *) ConstEigenVectorArrayMap(A, cols); \ } \ template <> \ - CAFFE2_EXPORT void ColwiseSub( \ + C10_EXPORT void ColwiseSub( \ const int rows, \ const int cols, \ const T* A, \ @@ -1842,7 +1855,7 @@ DEFINE_EIGEN_2D_BROADCAST_SUB_FUNCTION(std::int64_t) #define DEFINE_EIGEN_2D_BROADCAST_DIV_FUNCTION(T) \ template <> \ - CAFFE2_EXPORT void RowwiseDiv( \ + C10_EXPORT void RowwiseDiv( \ const int rows, \ const int cols, \ const T* A, \ @@ -1854,7 +1867,7 @@ DEFINE_EIGEN_2D_BROADCAST_SUB_FUNCTION(std::int64_t) ConstEigenVectorArrayMap(A, cols); \ } \ template <> \ - CAFFE2_EXPORT void ColwiseDiv( \ + C10_EXPORT void ColwiseDiv( \ const int rows, \ const int cols, \ const T* A, \ @@ -1878,7 +1891,7 @@ DELEGATE_EIGEN_2D_BROADCAST_2ND_BINARY_FUNCTION(std::int64_t, Div, /) #undef DELEGATE_EIGEN_2D_BROADCAST_2ND_BINARY_FUNCTION template <> -CAFFE2_EXPORT void Not( +C10_EXPORT void Not( const int N, const bool* x, bool* y, @@ -1893,7 +1906,7 @@ CAFFE2_EXPORT void Not( #define CAFFE2_SPECIALIZED_CPU_ADD_STRIPED_BATCH(T) \ template <> \ - CAFFE2_EXPORT void AddStripedBatch( \ + C10_EXPORT void AddStripedBatch( \ const int N, \ const T* first, \ T* y, \ @@ -1911,7 +1924,7 @@ CAFFE2_SPECIALIZED_CPU_ADD_STRIPED_BATCH(float); namespace { template -CAFFE2_EXPORT void RowwiseBinaryOp( +C10_EXPORT void RowwiseBinaryOp( const int rows, const int cols, const BinaryOperator& op, @@ -1929,7 +1942,7 @@ CAFFE2_EXPORT void RowwiseBinaryOp( } template -CAFFE2_EXPORT void ColwiseBinaryOp( +C10_EXPORT void ColwiseBinaryOp( const int rows, const int cols, const BinaryOperator& op, @@ -1947,7 +1960,7 @@ CAFFE2_EXPORT void ColwiseBinaryOp( } template -CAFFE2_EXPORT void BroadcastBinaryOpImpl( +C10_EXPORT void BroadcastBinaryOpImpl( const int ndim, const int* A_dims, const int* B_dims, @@ -1971,7 +1984,7 @@ CAFFE2_EXPORT void BroadcastBinaryOpImpl( #define DELEGATE_1D_BINARY_FUNCTION(TIn, TOut, Func, Op) \ template <> \ - CAFFE2_EXPORT void Func( \ + C10_EXPORT void Func( \ const int N, const TIn* A, const TIn* B, TOut* C, CPUContext*) { \ std::transform(A, A + N, B, C, Op()); \ } @@ -2011,7 +2024,7 @@ DEFINE_1D_BITWISE_BINARY_FUNCTION(BitwiseXor, std::bit_xor) #define DELEGATE_2D_BROADCAST_BINARY_FUNCTION(TIn, TOut, Func, Op) \ template <> \ - CAFFE2_EXPORT void Rowwise##Func( \ + C10_EXPORT void Rowwise##Func( \ const int rows, \ const int cols, \ const TIn* A, \ @@ -2021,7 +2034,7 @@ DEFINE_1D_BITWISE_BINARY_FUNCTION(BitwiseXor, std::bit_xor) RowwiseBinaryOp, true>(rows, cols, Op(), A, B, C); \ } \ template <> \ - CAFFE2_EXPORT void Rowwise##Func( \ + C10_EXPORT void Rowwise##Func( \ const int rows, \ const int cols, \ const TIn* A, \ @@ -2032,7 +2045,7 @@ DEFINE_1D_BITWISE_BINARY_FUNCTION(BitwiseXor, std::bit_xor) rows, cols, Op(), A, B, C); \ } \ template <> \ - CAFFE2_EXPORT void Colwise##Func( \ + C10_EXPORT void Colwise##Func( \ const int rows, \ const int cols, \ const TIn* A, \ @@ -2042,7 +2055,7 @@ DEFINE_1D_BITWISE_BINARY_FUNCTION(BitwiseXor, std::bit_xor) ColwiseBinaryOp, true>(rows, cols, Op(), A, B, C); \ } \ template <> \ - CAFFE2_EXPORT void Colwise##Func( \ + C10_EXPORT void Colwise##Func( \ const int rows, \ const int cols, \ const TIn* A, \ @@ -2086,28 +2099,28 @@ DEFINE_2D_BROADCAST_BITWISE_BINARY_FUNCTION(BitwiseXor, std::bit_xor) #undef DELEGATE_2D_BROADCAST_BINARY_FUNCTION -#define DEFINE_2D_BROADCAST_1ST_DIV_FUNCTION(T) \ - template <> \ - CAFFE2_EXPORT void RowwiseDiv( \ - const int rows, \ - const int cols, \ - const T* A, \ - const T* B, \ - T* C, \ - CPUContext*) { \ - RowwiseBinaryOp, true>( \ - rows, cols, std::divides(), A, B, C); \ - } \ - template <> \ - CAFFE2_EXPORT void ColwiseDiv( \ - const int rows, \ - const int cols, \ - const T* A, \ - const T* B, \ - T* C, \ - CPUContext*) { \ - ColwiseBinaryOp, true>( \ - rows, cols, std::divides(), A, B, C); \ +#define DEFINE_2D_BROADCAST_1ST_DIV_FUNCTION(T) \ + template <> \ + C10_EXPORT void RowwiseDiv( \ + const int rows, \ + const int cols, \ + const T* A, \ + const T* B, \ + T* C, \ + CPUContext*) { \ + RowwiseBinaryOp, true>( \ + rows, cols, std::divides(), A, B, C); \ + } \ + template <> \ + C10_EXPORT void ColwiseDiv( \ + const int rows, \ + const int cols, \ + const T* A, \ + const T* B, \ + T* C, \ + CPUContext*) { \ + ColwiseBinaryOp, true>( \ + rows, cols, std::divides(), A, B, C); \ } DEFINE_2D_BROADCAST_1ST_DIV_FUNCTION(std::int32_t) DEFINE_2D_BROADCAST_1ST_DIV_FUNCTION(std::int64_t) @@ -2115,7 +2128,7 @@ DEFINE_2D_BROADCAST_1ST_DIV_FUNCTION(std::int64_t) #define DELEGATE_BROADCAST_BINARY_FUNCTION(TIn, TOut, Func, Op) \ template <> \ - CAFFE2_EXPORT void Func( \ + C10_EXPORT void Func( \ const int A_ndim, \ const int* A_dims, \ const int B_ndim, \ @@ -2258,7 +2271,7 @@ DEFINE_BROADCAST_BITWISE_BINARY_FUNCTION(BitwiseXor, std::bit_xor) #define CAFFE2_RAND_UNIFORM_REAL(T) \ template <> \ - CAFFE2_EXPORT void RandUniform( \ + C10_EXPORT void RandUniform( \ const size_t n, const T a, const T b, T* r, CPUContext* context) { \ std::uniform_real_distribution distribution(a, b); \ for (size_t i = 0; i < n; ++i) { \ @@ -2271,7 +2284,7 @@ CAFFE2_RAND_UNIFORM_REAL(double); #define CAFFE2_RAND_UNIFORM_CHAR(T) \ template <> \ - CAFFE2_EXPORT void RandUniform( \ + C10_EXPORT void RandUniform( \ const size_t n, const T a, const T b, T* r, CPUContext* context) { \ std::uniform_int_distribution distribution((short)a, (short)b); \ for (size_t i = 0; i < n; ++i) { \ @@ -2284,7 +2297,7 @@ CAFFE2_RAND_UNIFORM_CHAR(uint8_t); #define CAFFE2_RAND_UNIFORM_INT(T) \ template <> \ - CAFFE2_EXPORT void RandUniform( \ + C10_EXPORT void RandUniform( \ const size_t n, const T a, const T b, T* r, CPUContext* context) { \ std::uniform_int_distribution distribution(a, b); \ for (size_t i = 0; i < n; ++i) { \ @@ -2310,7 +2323,7 @@ CAFFE2_RAND_UNIFORM_INT(uint64_t); // each value. #define CAFFE2_RAND_FIXED_SUM(T) \ template <> \ - CAFFE2_EXPORT void RandFixedSum( \ + C10_EXPORT void RandFixedSum( \ const size_t n, \ const T a, \ const T b, \ @@ -2404,7 +2417,7 @@ Ind_t generate_stack_distance( } template -CAFFE2_EXPORT void generate_trace_lru( +C10_EXPORT void generate_trace_lru( std::vector& uni_ref, std::vector& cum_val, std::vector& cum_dis, @@ -2481,7 +2494,7 @@ CAFFE2_EXPORT void generate_trace_lru( // case we need to know the table id, to sample from the right distribution #define CAFFE2_RAND_SYNTHETIC_DATA(T) \ template <> \ - CAFFE2_EXPORT void RandSyntheticData( \ + C10_EXPORT void RandSyntheticData( \ const size_t n, const T a, const T b, T* r, CPUContext* context) { \ /* unique memory references */ \ std::vector mem_ref = {1, 2, 3, 4, 5, 6}; \ @@ -2518,32 +2531,33 @@ CAFFE2_RAND_SYNTHETIC_DATA(uint32_t); CAFFE2_RAND_SYNTHETIC_DATA(uint64_t); #undef CAFFE2_RAND_SYNTHETIC_DATA -#define CAFFE2_SPECIALIZED_RAND_UNIFORM_UNIQUE(T) \ - template <> \ - CAFFE2_EXPORT void RandUniformUnique( \ - const size_t n, \ - const T a, \ - const T b, \ - T* r, \ - const size_t m, \ - const T* avoid, \ - CPUContext* context) { \ - CAFFE_ENFORCE_LE( \ - n, b - a - m + 1, "Cannot satisfy the unique requirement"); \ - std::unordered_set avoid_set(n); \ - if (m) { \ - avoid_set.insert(avoid, avoid + m); \ - CAFFE_ENFORCE_EQ(m, avoid_set.size(), "ACAFFE2_EXPORT void should be unique"); \ - } \ - std::uniform_int_distribution distribution(a, b); \ - T v = 0; \ - for (size_t i = 0; i < n; ++i) { \ - do { \ - v = distribution(context->RandGenerator()); \ - } while (avoid_set.count(v)); \ - r[i] = v; \ - avoid_set.insert(v); \ - } \ +#define CAFFE2_SPECIALIZED_RAND_UNIFORM_UNIQUE(T) \ + template <> \ + C10_EXPORT void RandUniformUnique( \ + const size_t n, \ + const T a, \ + const T b, \ + T* r, \ + const size_t m, \ + const T* avoid, \ + CPUContext* context) { \ + CAFFE_ENFORCE_LE( \ + n, b - a - m + 1, "Cannot satisfy the unique requirement"); \ + std::unordered_set avoid_set(n); \ + if (m) { \ + avoid_set.insert(avoid, avoid + m); \ + CAFFE_ENFORCE_EQ( \ + m, avoid_set.size(), "AC10_EXPORT void should be unique"); \ + } \ + std::uniform_int_distribution distribution(a, b); \ + T v = 0; \ + for (size_t i = 0; i < n; ++i) { \ + do { \ + v = distribution(context->RandGenerator()); \ + } while (avoid_set.count(v)); \ + r[i] = v; \ + avoid_set.insert(v); \ + } \ } CAFFE2_SPECIALIZED_RAND_UNIFORM_UNIQUE(int32_t); @@ -2551,7 +2565,7 @@ CAFFE2_SPECIALIZED_RAND_UNIFORM_UNIQUE(int64_t); #undef CAFFE2_SPECIALIZED_RAND_UNIFORM_UNIQUE template <> -CAFFE2_EXPORT void RandGaussian( +C10_EXPORT void RandGaussian( const size_t n, const float mean, const float std, @@ -2565,7 +2579,7 @@ CAFFE2_EXPORT void RandGaussian( #define CAFFE2_SPECIALIZED_SUM(T) \ template <> \ - CAFFE2_EXPORT void Sum( \ + C10_EXPORT void Sum( \ const int N, \ const T* x, \ T* y, \ @@ -2581,7 +2595,7 @@ CAFFE2_SPECIALIZED_SUM(int64_t); #undef CAFFE2_SPECIALIZED_SUM template <> -CAFFE2_EXPORT void SumSqr( +C10_EXPORT void SumSqr( const int N, const float* x, float* y, @@ -2591,7 +2605,7 @@ CAFFE2_EXPORT void SumSqr( } template <> -CAFFE2_EXPORT void Select( +C10_EXPORT void Select( const int N, const int D, const float* x, @@ -2605,7 +2619,7 @@ CAFFE2_EXPORT void Select( } template <> -CAFFE2_EXPORT void CopyMatrix( +C10_EXPORT void CopyMatrix( const size_t itemsize, const int M, const int N, @@ -2648,7 +2662,7 @@ CAFFE2_EXPORT void CopyMatrix( #define DELEGATE_COPY_MATRIX_FUNCTION(T, Func) \ template <> \ - CAFFE2_EXPORT void CopyMatrix( \ + C10_EXPORT void CopyMatrix( \ const int M, \ const int N, \ const T* A, \ @@ -2659,7 +2673,7 @@ CAFFE2_EXPORT void CopyMatrix( Func('R', 'N', M, N, T(1), A, lda, B, ldb); \ } \ template <> \ - CAFFE2_EXPORT void CopyMatrix( \ + C10_EXPORT void CopyMatrix( \ const int M, \ const int N, \ const T* A, \ @@ -2690,7 +2704,7 @@ DELEGATE_COPY_MATRIX_FUNCTION(double, mkl_domatcopy) #define CAFFE2_SPECIALIZED_COPY_MATRIX(T) \ template <> \ - CAFFE2_EXPORT void CopyMatrix( \ + C10_EXPORT void CopyMatrix( \ const int M, \ const int N, \ const T* A, \ @@ -2720,7 +2734,7 @@ DELEGATE_COPY_MATRIX_FUNCTION(double, mkl_domatcopy) } \ } \ template <> \ - CAFFE2_EXPORT void CopyMatrix( \ + C10_EXPORT void CopyMatrix( \ const int M, \ const int N, \ const T* A, \ @@ -2759,7 +2773,7 @@ CAFFE2_SPECIALIZED_COPY_MATRIX(std::uint16_t) namespace { template -CAFFE2_EXPORT void Im2ColZeroPaddingAndNoDilationNCHW( +C10_EXPORT void Im2ColZeroPaddingAndNoDilationNCHW( const int C, const int H, const int W, @@ -2806,7 +2820,7 @@ CAFFE2_EXPORT void Im2ColZeroPaddingAndNoDilationNCHW( } template -CAFFE2_EXPORT void Col2ImZeroPaddingAndNoDilationNCHW( +C10_EXPORT void Col2ImZeroPaddingAndNoDilationNCHW( const int C, const int H, const int W, @@ -2842,7 +2856,7 @@ CAFFE2_EXPORT void Col2ImZeroPaddingAndNoDilationNCHW( } template -CAFFE2_EXPORT void Im2ColZeroPaddingAndNoDilationNHWC( +C10_EXPORT void Im2ColZeroPaddingAndNoDilationNHWC( const int C, const int H, const int W, @@ -2867,7 +2881,7 @@ CAFFE2_EXPORT void Im2ColZeroPaddingAndNoDilationNHWC( } template -CAFFE2_EXPORT void Col2ImZeroPaddingAndNoDilationNHWC( +C10_EXPORT void Col2ImZeroPaddingAndNoDilationNHWC( const int C, const int H, const int W, @@ -2894,7 +2908,7 @@ CAFFE2_EXPORT void Col2ImZeroPaddingAndNoDilationNHWC( } template -CAFFE2_EXPORT void Im2ColNdNCHWImpl( +C10_EXPORT void Im2ColNdNCHWImpl( const int N, const int img_size, const int col_size, @@ -2950,7 +2964,7 @@ CAFFE2_EXPORT void Im2ColNdNCHWImpl( } // namespace template <> -CAFFE2_EXPORT void Im2ColNd( +C10_EXPORT void Im2ColNd( const int N, const int img_size, const int col_size, @@ -2978,7 +2992,7 @@ CAFFE2_EXPORT void Im2ColNd( } template <> -CAFFE2_EXPORT void Col2ImNd( +C10_EXPORT void Col2ImNd( const int N, const int img_size, const int col_size, @@ -3006,7 +3020,7 @@ CAFFE2_EXPORT void Col2ImNd( } template <> -CAFFE2_EXPORT void Im2Col( +C10_EXPORT void Im2Col( const int C, const int H, const int W, @@ -3072,7 +3086,7 @@ CAFFE2_EXPORT void Im2Col( } template <> -CAFFE2_EXPORT void Im2Col( +C10_EXPORT void Im2Col( const int C, const int H, const int W, @@ -3172,7 +3186,7 @@ CAFFE2_EXPORT void Im2Col( } template <> -CAFFE2_EXPORT void Col2Im( +C10_EXPORT void Col2Im( const int C, const int H, const int W, @@ -3239,7 +3253,7 @@ CAFFE2_EXPORT void Col2Im( } template <> -CAFFE2_EXPORT void Col2Im( +C10_EXPORT void Col2Im( const int C, const int H, const int W, @@ -3335,7 +3349,7 @@ CAFFE2_EXPORT void Col2Im( } template <> -CAFFE2_EXPORT void BiasCHW( +C10_EXPORT void BiasCHW( const float* bias, const float* /*bias_multiplier*/, const int bias_channels, @@ -3420,7 +3434,7 @@ CAFFE2_EXPORT void BiasCHW( #define CAFFE2_SPECIALIZED_COPYVECTOR(T) \ template <> \ - CAFFE2_EXPORT void CopyVector( \ + C10_EXPORT void CopyVector( \ const int N, const T* src, T* dst, CPUContext* /*context*/) { \ if (src != dst && N > 0) { \ memcpy(dst, src, sizeof(T) * N); \ @@ -3633,7 +3647,7 @@ void TransposeCPUImpl( #define CAFFE2_SPECIALIZED_TRANSPOSE(T) \ template <> \ - CAFFE2_EXPORT void Transpose( \ + C10_EXPORT void Transpose( \ const int ndim, \ const int* dims, \ const int* axes, \ diff --git a/caffe2/utils/math_gpu_test.cc b/caffe2/utils/math_gpu_test.cc index 9be1c3db6c1d0..4b0247a0786fc 100644 --- a/caffe2/utils/math_gpu_test.cc +++ b/caffe2/utils/math_gpu_test.cc @@ -41,9 +41,9 @@ void executeGpuBinaryOpTest( Blob* bloby = ws.CreateBlob("Y"); Blob* bloby_host = ws.CreateBlob("Y_host"); - auto* tensorx0 = blobx0->GetMutableTensor(CUDA); - auto* tensorx1 = blobx1->GetMutableTensor(CUDA); - auto* tensory = bloby->GetMutableTensor(CUDA); + auto* tensorx0 = BlobGetMutableTensor(blobx0, CUDA); + auto* tensorx1 = BlobGetMutableTensor(blobx1, CUDA); + auto* tensory = BlobGetMutableTensor(bloby, CUDA); vector shapex0_vector{shapex0}; vector shapex1_vector{shapex1}; @@ -71,7 +71,7 @@ void executeGpuBinaryOpTest( context.FinishDeviceComputation(); // Copy result to CPU so we can inspect it - auto* tensory_host = bloby_host->GetMutableTensor(CPU); + auto* tensory_host = BlobGetMutableTensor(bloby_host, CPU); tensory_host->CopyFrom(*tensory, &context); context.FinishDeviceComputation(); @@ -94,7 +94,7 @@ TEST(MathUtilGPUTest, testAddStripedBatch) { vector shapex{33 * 9, 25}; vector shapey{33, 25}; - auto* tensorx = blobx->GetMutableTensor(CUDA); + auto* tensorx = BlobGetMutableTensor(blobx, CUDA); tensorx->Resize(shapex); int stripe = 33 * 25; vector tot(33, 0.0); @@ -110,7 +110,7 @@ TEST(MathUtilGPUTest, testAddStripedBatch) { } } - auto* tensory = bloby->GetMutableTensor(CUDA); + auto* tensory = BlobGetMutableTensor(bloby, CUDA); tensory->Resize(shapey); math::Set( stripe, 0.0, tensory->mutable_data(), &context); @@ -125,7 +125,7 @@ TEST(MathUtilGPUTest, testAddStripedBatch) { context.FinishDeviceComputation(); // Copy result to CPU so we can inspect it - auto* tensory_host = bloby_host->GetMutableTensor(CPU); + auto* tensory_host = BlobGetMutableTensor(bloby_host, CPU); tensory_host->CopyFrom(*tensory, &context); context.FinishDeviceComputation(); @@ -258,9 +258,9 @@ class GemmBatchedGPUTest Blob* X_blob = ws_.CreateBlob("X"); Blob* W_blob = ws_.CreateBlob("W"); Blob* Y_blob = ws_.CreateBlob("Y"); - X_ = X_blob->GetMutableTensor(CUDA); - W_ = W_blob->GetMutableTensor(CUDA); - Y_ = Y_blob->GetMutableTensor(CUDA); + X_ = BlobGetMutableTensor(X_blob, CUDA); + W_ = BlobGetMutableTensor(W_blob, CUDA); + Y_ = BlobGetMutableTensor(Y_blob, CUDA); X_->Resize(std::vector{3, 5, 10}); W_->Resize(std::vector{3, 6, 10}); Y_->Resize(std::vector{3, 5, 6}); @@ -381,8 +381,8 @@ class ReduceTensorGPUTest : public testing::Test { cuda_context_ = make_unique(option_); Blob* blob_x = ws_.CreateBlob("X"); Blob* blob_y = ws_.CreateBlob("Y"); - X_ = blob_x->GetMutableTensor(CUDA); - Y_ = blob_y->GetMutableTensor(CUDA); + X_ = BlobGetMutableTensor(blob_x, CUDA); + Y_ = BlobGetMutableTensor(blob_y, CUDA); } void SetUpData( @@ -402,7 +402,7 @@ class ReduceTensorGPUTest : public testing::Test { void VerifyResult(const std::vector& expected_output) { Blob* blob_y_host = ws_.CreateBlob("Y_host"); - auto* Y_host = blob_y_host->GetMutableTensor(CPU); + auto* Y_host = BlobGetMutableTensor(blob_y_host, CPU); Y_host->CopyFrom(*Y_, cuda_context_.get()); cuda_context_->FinishDeviceComputation(); ASSERT_EQ(expected_output.size(), Y_host->size()); @@ -664,8 +664,8 @@ class BroadcastGPUTest : public testing::Test { cuda_context_ = make_unique(option_); Blob* blob_x = ws_.CreateBlob("X"); Blob* blob_y = ws_.CreateBlob("Y"); - X_ = blob_x->GetMutableTensor(CUDA); - Y_ = blob_y->GetMutableTensor(CUDA); + X_ = BlobGetMutableTensor(blob_x, CUDA); + Y_ = BlobGetMutableTensor(blob_y, CUDA); } void SetUpData( @@ -681,7 +681,7 @@ class BroadcastGPUTest : public testing::Test { void VerifyResult(const std::vector& expected_output) { Blob* blob_y_host = ws_.CreateBlob("Y_host"); - auto* Y_host = blob_y_host->GetMutableTensor(CPU); + auto* Y_host = BlobGetMutableTensor(blob_y_host, CPU); Y_host->CopyFrom(*Y_, cuda_context_.get()); cuda_context_->FinishDeviceComputation(); ASSERT_EQ(expected_output.size(), Y_host->size()); @@ -741,9 +741,9 @@ class MomentsGPUTest : public testing::Test { Blob* blob_x = ws_.CreateBlob("X"); Blob* blob_mean = ws_.CreateBlob("mean"); Blob* blob_variance = ws_.CreateBlob("variance"); - X_ = blob_x->GetMutableTensor(CUDA); - mean_ = blob_mean->GetMutableTensor(CUDA); - variance_ = blob_variance->GetMutableTensor(CUDA); + X_ = BlobGetMutableTensor(blob_x, CUDA); + mean_ = BlobGetMutableTensor(blob_mean, CUDA); + variance_ = BlobGetMutableTensor(blob_variance, CUDA); } void SetUpData( @@ -766,10 +766,10 @@ class MomentsGPUTest : public testing::Test { const std::vector& mean_data, const std::vector& variance_data) { Blob* blob_mean_host = ws_.CreateBlob("mean_host"); - auto* mean_host = blob_mean_host->GetMutableTensor(CPU); + auto* mean_host = BlobGetMutableTensor(blob_mean_host, CPU); mean_host->CopyFrom(*mean_, cuda_context_.get()); Blob* blob_variance_host = ws_.CreateBlob("variance_host"); - auto* variance_host = blob_variance_host->GetMutableTensor(CPU); + auto* variance_host = BlobGetMutableTensor(blob_variance_host, CPU); variance_host->CopyFrom(*variance_, cuda_context_.get()); cuda_context_->FinishDeviceComputation(); @@ -868,8 +868,8 @@ class TransposeGPUTest : public testing::Test { cuda_context_ = make_unique(option_); Blob* blob_x = ws_.CreateBlob("X"); Blob* blob_y = ws_.CreateBlob("Y"); - X_ = blob_x->GetMutableTensor(CUDA); - Y_ = blob_y->GetMutableTensor(CUDA); + X_ = BlobGetMutableTensor(blob_x, CUDA); + Y_ = BlobGetMutableTensor(blob_y, CUDA); } void SetUpData( @@ -890,7 +890,7 @@ class TransposeGPUTest : public testing::Test { void VerifyResult(const std::vector& expected_output) { Blob* blob_y_host = ws_.CreateBlob("Y_host"); - auto* Y_host = blob_y_host->GetMutableTensor(CPU); + auto* Y_host = BlobGetMutableTensor(blob_y_host, CPU); Y_host->CopyFrom(*Y_, cuda_context_.get()); cuda_context_->FinishDeviceComputation(); ASSERT_EQ(expected_output.size(), Y_host->size()); diff --git a/caffe2/utils/proto_convert.cc b/caffe2/utils/proto_convert.cc index 24984203bcb81..790bd274291dc 100644 --- a/caffe2/utils/proto_convert.cc +++ b/caffe2/utils/proto_convert.cc @@ -3,7 +3,7 @@ namespace caffe2 { -CAFFE2_EXPORT void ArgumentToAttributeProto( +C10_EXPORT void ArgumentToAttributeProto( const Argument& arg, ::torch::AttributeProto* attr) { CAFFE_ENFORCE(arg.has_name()); @@ -29,7 +29,7 @@ CAFFE2_EXPORT void ArgumentToAttributeProto( } } -CAFFE2_EXPORT void AttributeProtoToArgument( +C10_EXPORT void AttributeProtoToArgument( const ::torch::AttributeProto& attr, Argument* arg) { CAFFE_ENFORCE(attr.has_name()); @@ -94,7 +94,7 @@ CAFFE2_EXPORT void AttributeProtoToArgument( } } -CAFFE2_EXPORT void OperatorDefToNodeProto( +C10_EXPORT void OperatorDefToNodeProto( const OperatorDef& def, ::torch::NodeProto* node) { node->mutable_input()->CopyFrom(def.input()); @@ -141,7 +141,7 @@ CAFFE2_EXPORT void OperatorDefToNodeProto( } } -CAFFE2_EXPORT void NodeProtoToOperatorDef( +C10_EXPORT void NodeProtoToOperatorDef( const ::torch::NodeProto& node, OperatorDef* def) { def->mutable_input()->CopyFrom(node.input()); diff --git a/caffe2/utils/proto_utils.cc b/caffe2/utils/proto_utils.cc index 1daacff3eda2f..dc8e088eba97c 100644 --- a/caffe2/utils/proto_utils.cc +++ b/caffe2/utils/proto_utils.cc @@ -21,11 +21,11 @@ using ::google::protobuf::MessageLite; namespace caffe2 { -CAFFE2_EXPORT std::string DeviceTypeName(const int32_t& d) { +C10_EXPORT std::string DeviceTypeName(const int32_t& d) { return at::DeviceTypeName(static_cast(d)); } -CAFFE2_EXPORT int DeviceId(const DeviceOption& option) { +C10_EXPORT int DeviceId(const DeviceOption& option) { switch (option.device_type()) { case PROTO_CPU: return option.numa_node_id(); @@ -40,7 +40,7 @@ CAFFE2_EXPORT int DeviceId(const DeviceOption& option) { } } -CAFFE2_EXPORT bool IsSameDevice(const DeviceOption& lhs, const DeviceOption& rhs) { +C10_EXPORT bool IsSameDevice(const DeviceOption& lhs, const DeviceOption& rhs) { return ( lhs.device_type() == rhs.device_type() && lhs.cuda_gpu_id() == rhs.cuda_gpu_id() && @@ -49,7 +49,7 @@ CAFFE2_EXPORT bool IsSameDevice(const DeviceOption& lhs, const DeviceOption& rhs lhs.numa_node_id() == rhs.numa_node_id()); } -CAFFE2_EXPORT bool ReadStringFromFile(const char* filename, string* str) { +C10_EXPORT bool ReadStringFromFile(const char* filename, string* str) { std::ifstream ifs(filename, std::ios::in); if (!ifs) { VLOG(1) << "File cannot be opened: " << filename @@ -64,7 +64,7 @@ CAFFE2_EXPORT bool ReadStringFromFile(const char* filename, string* str) { return true; } -CAFFE2_EXPORT bool WriteStringToFile(const string& str, const char* filename) { +C10_EXPORT bool WriteStringToFile(const string& str, const char* filename) { std::ofstream ofs(filename, std::ios::out | std::ios::trunc); if (!ofs.is_open()) { VLOG(1) << "File cannot be created: " << filename @@ -102,11 +102,13 @@ class IfstreamInputStream : public ::google::protobuf::io::CopyingInputStream { }; } // namespace -CAFFE2_EXPORT string ProtoDebugString(const MessageLite& proto) { +C10_EXPORT string ProtoDebugString(const MessageLite& proto) { return proto.SerializeAsString(); } -CAFFE2_EXPORT bool ParseProtoFromLargeString(const string& str, MessageLite* proto) { +C10_EXPORT bool ParseProtoFromLargeString( + const string& str, + MessageLite* proto) { ::google::protobuf::io::ArrayInputStream input_stream(str.data(), str.size()); ::google::protobuf::io::CodedInputStream coded_stream(&input_stream); // Set PlanDef message size limit to 2G. @@ -114,7 +116,9 @@ CAFFE2_EXPORT bool ParseProtoFromLargeString(const string& str, MessageLite* pro return proto->ParseFromCodedStream(&coded_stream); } -CAFFE2_EXPORT bool ReadProtoFromBinaryFile(const char* filename, MessageLite* proto) { +C10_EXPORT bool ReadProtoFromBinaryFile( + const char* filename, + MessageLite* proto) { ::google::protobuf::io::CopyingInputStreamAdaptor stream( new IfstreamInputStream(filename)); stream.SetOwnsCopyingStream(true); @@ -125,7 +129,7 @@ CAFFE2_EXPORT bool ReadProtoFromBinaryFile(const char* filename, MessageLite* pr return proto->ParseFromCodedStream(&coded_stream); } -CAFFE2_EXPORT void WriteProtoToBinaryFile( +C10_EXPORT void WriteProtoToBinaryFile( const MessageLite& /*proto*/, const char* /*filename*/) { LOG(FATAL) << "Not implemented yet."; @@ -144,16 +148,16 @@ using ::google::protobuf::io::CodedOutputStream; using ::google::protobuf::Message; namespace TextFormat { -CAFFE2_EXPORT bool ParseFromString(const string& spec, Message* proto) { +C10_EXPORT bool ParseFromString(const string& spec, Message* proto) { return ::google::protobuf::TextFormat::ParseFromString(spec, proto); } } // namespace TextFormat -CAFFE2_EXPORT string ProtoDebugString(const Message& proto) { +C10_EXPORT string ProtoDebugString(const Message& proto) { return proto.ShortDebugString(); } -CAFFE2_EXPORT bool ParseProtoFromLargeString(const string& str, Message* proto) { +C10_EXPORT bool ParseProtoFromLargeString(const string& str, Message* proto) { ::google::protobuf::io::ArrayInputStream input_stream(str.data(), str.size()); ::google::protobuf::io::CodedInputStream coded_stream(&input_stream); // Set PlanDef message size limit to 2G. @@ -161,7 +165,7 @@ CAFFE2_EXPORT bool ParseProtoFromLargeString(const string& str, Message* proto) return proto->ParseFromCodedStream(&coded_stream); } -CAFFE2_EXPORT bool ReadProtoFromTextFile(const char* filename, Message* proto) { +C10_EXPORT bool ReadProtoFromTextFile(const char* filename, Message* proto) { int fd = open(filename, O_RDONLY); CAFFE_ENFORCE_NE(fd, -1, "File not found: ", filename); FileInputStream* input = new FileInputStream(fd); @@ -171,7 +175,9 @@ CAFFE2_EXPORT bool ReadProtoFromTextFile(const char* filename, Message* proto) { return success; } -CAFFE2_EXPORT void WriteProtoToTextFile(const Message& proto, const char* filename) { +C10_EXPORT void WriteProtoToTextFile( + const Message& proto, + const char* filename) { int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0644); FileOutputStream* output = new FileOutputStream(fd); CAFFE_ENFORCE(google::protobuf::TextFormat::Print(proto, output)); @@ -179,7 +185,9 @@ CAFFE2_EXPORT void WriteProtoToTextFile(const Message& proto, const char* filena close(fd); } -CAFFE2_EXPORT bool ReadProtoFromBinaryFile(const char* filename, MessageLite* proto) { +C10_EXPORT bool ReadProtoFromBinaryFile( + const char* filename, + MessageLite* proto) { #if defined (_MSC_VER) // for MSC compiler binary flag needs to be specified int fd = open(filename, O_RDONLY | O_BINARY); #else @@ -198,7 +206,9 @@ CAFFE2_EXPORT bool ReadProtoFromBinaryFile(const char* filename, MessageLite* pr return success; } -CAFFE2_EXPORT void WriteProtoToBinaryFile(const MessageLite& proto, const char* filename) { +C10_EXPORT void WriteProtoToBinaryFile( + const MessageLite& proto, + const char* filename) { int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0644); CAFFE_ENFORCE_NE( fd, -1, "File cannot be created: ", filename, " error number: ", errno); @@ -213,8 +223,7 @@ CAFFE2_EXPORT void WriteProtoToBinaryFile(const MessageLite& proto, const char* #endif // CAFFE2_USE_LITE_PROTO - -CAFFE2_EXPORT ArgumentHelper::ArgumentHelper(const OperatorDef& def) { +C10_EXPORT ArgumentHelper::ArgumentHelper(const OperatorDef& def) { for (auto& arg : def.arg()) { if (arg_map_.count(arg.name())) { if (arg.SerializeAsString() != arg_map_[arg.name()].SerializeAsString()) { @@ -235,7 +244,7 @@ CAFFE2_EXPORT ArgumentHelper::ArgumentHelper(const OperatorDef& def) { } } -CAFFE2_EXPORT ArgumentHelper::ArgumentHelper(const NetDef& netdef) { +C10_EXPORT ArgumentHelper::ArgumentHelper(const NetDef& netdef) { for (auto& arg : netdef.arg()) { CAFFE_ENFORCE( arg_map_.count(arg.name()) == 0, @@ -245,7 +254,7 @@ CAFFE2_EXPORT ArgumentHelper::ArgumentHelper(const NetDef& netdef) { } } -CAFFE2_EXPORT bool ArgumentHelper::HasArgument(const string& name) const { +C10_EXPORT bool ArgumentHelper::HasArgument(const string& name) const { return arg_map_.count(name); } @@ -267,41 +276,42 @@ std::ostream& operator<<(std::ostream& output, const NetDef& n) { return output; } -#define INSTANTIATE_GET_SINGLE_ARGUMENT( \ - T, fieldname, enforce_lossless_conversion) \ - template <> \ - CAFFE2_EXPORT T ArgumentHelper::GetSingleArgument( \ - const string& name, const T& default_value) const { \ - if (arg_map_.count(name) == 0) { \ - VLOG(1) << "Using default parameter value " << default_value \ - << " for parameter " << name; \ - return default_value; \ - } \ - CAFFE_ENFORCE( \ - arg_map_.at(name).has_##fieldname(), \ - "Argument ", \ - name, \ - " does not have the right field: expected field " #fieldname); \ - auto value = arg_map_.at(name).fieldname(); \ - if (enforce_lossless_conversion) { \ - auto supportsConversion = \ - SupportsLosslessConversion(value); \ - CAFFE_ENFORCE( \ - supportsConversion, \ - "Value", \ - value, \ - " of argument ", \ - name, \ - "cannot be represented correctly in a target type"); \ - } \ - return static_cast(value); \ - } \ - template <> \ - CAFFE2_EXPORT bool ArgumentHelper::HasSingleArgumentOfType(const string& name) const { \ - if (arg_map_.count(name) == 0) { \ - return false; \ - } \ - return arg_map_.at(name).has_##fieldname(); \ +#define INSTANTIATE_GET_SINGLE_ARGUMENT( \ + T, fieldname, enforce_lossless_conversion) \ + template <> \ + C10_EXPORT T ArgumentHelper::GetSingleArgument( \ + const string& name, const T& default_value) const { \ + if (arg_map_.count(name) == 0) { \ + VLOG(1) << "Using default parameter value " << default_value \ + << " for parameter " << name; \ + return default_value; \ + } \ + CAFFE_ENFORCE( \ + arg_map_.at(name).has_##fieldname(), \ + "Argument ", \ + name, \ + " does not have the right field: expected field " #fieldname); \ + auto value = arg_map_.at(name).fieldname(); \ + if (enforce_lossless_conversion) { \ + auto supportsConversion = \ + SupportsLosslessConversion(value); \ + CAFFE_ENFORCE( \ + supportsConversion, \ + "Value", \ + value, \ + " of argument ", \ + name, \ + "cannot be represented correctly in a target type"); \ + } \ + return static_cast(value); \ + } \ + template <> \ + C10_EXPORT bool ArgumentHelper::HasSingleArgumentOfType( \ + const string& name) const { \ + if (arg_map_.count(name) == 0) { \ + return false; \ + } \ + return arg_map_.at(name).has_##fieldname(); \ } INSTANTIATE_GET_SINGLE_ARGUMENT(float, f, false) @@ -321,7 +331,7 @@ INSTANTIATE_GET_SINGLE_ARGUMENT(NetDef, n, false) #define INSTANTIATE_GET_REPEATED_ARGUMENT( \ T, fieldname, enforce_lossless_conversion) \ template <> \ - CAFFE2_EXPORT vector ArgumentHelper::GetRepeatedArgument( \ + C10_EXPORT vector ArgumentHelper::GetRepeatedArgument( \ const string& name, const std::vector& default_value) const { \ if (arg_map_.count(name) == 0) { \ return default_value; \ @@ -358,14 +368,14 @@ INSTANTIATE_GET_REPEATED_ARGUMENT(string, strings, false) INSTANTIATE_GET_REPEATED_ARGUMENT(NetDef, nets, false) #undef INSTANTIATE_GET_REPEATED_ARGUMENT -#define CAFFE2_MAKE_SINGULAR_ARGUMENT(T, fieldname) \ -template <> \ -CAFFE2_EXPORT Argument MakeArgument(const string& name, const T& value) { \ - Argument arg; \ - arg.set_name(name); \ - arg.set_##fieldname(value); \ - return arg; \ -} +#define CAFFE2_MAKE_SINGULAR_ARGUMENT(T, fieldname) \ + template <> \ + C10_EXPORT Argument MakeArgument(const string& name, const T& value) { \ + Argument arg; \ + arg.set_name(name); \ + arg.set_##fieldname(value); \ + return arg; \ + } CAFFE2_MAKE_SINGULAR_ARGUMENT(bool, i) CAFFE2_MAKE_SINGULAR_ARGUMENT(float, f) @@ -375,28 +385,29 @@ CAFFE2_MAKE_SINGULAR_ARGUMENT(string, s) #undef CAFFE2_MAKE_SINGULAR_ARGUMENT template <> -CAFFE2_EXPORT bool ArgumentHelper::RemoveArgument(OperatorDef& def, int index); +C10_EXPORT bool ArgumentHelper::RemoveArgument(OperatorDef& def, int index); template <> bool ArgumentHelper::RemoveArgument(NetDef& def, int index); template <> -CAFFE2_EXPORT Argument MakeArgument(const string& name, const MessageLite& value) { +C10_EXPORT Argument MakeArgument(const string& name, const MessageLite& value) { Argument arg; arg.set_name(name); arg.set_s(value.SerializeAsString()); return arg; } -#define CAFFE2_MAKE_REPEATED_ARGUMENT(T, fieldname) \ -template <> \ -CAFFE2_EXPORT Argument MakeArgument(const string& name, const vector& value) {\ - Argument arg; \ - arg.set_name(name); \ - for (const auto& v : value) { \ - arg.add_##fieldname(v); \ - } \ - return arg; \ -} +#define CAFFE2_MAKE_REPEATED_ARGUMENT(T, fieldname) \ + template <> \ + C10_EXPORT Argument MakeArgument( \ + const string& name, const vector& value) { \ + Argument arg; \ + arg.set_name(name); \ + for (const auto& v : value) { \ + arg.add_##fieldname(v); \ + } \ + return arg; \ + } CAFFE2_MAKE_REPEATED_ARGUMENT(float, floats) CAFFE2_MAKE_REPEATED_ARGUMENT(int, ints) @@ -404,7 +415,7 @@ CAFFE2_MAKE_REPEATED_ARGUMENT(int64_t, ints) CAFFE2_MAKE_REPEATED_ARGUMENT(string, strings) #undef CAFFE2_MAKE_REPEATED_ARGUMENT -CAFFE2_EXPORT bool HasOutput(const OperatorDef& op, const std::string& output) { +C10_EXPORT bool HasOutput(const OperatorDef& op, const std::string& output) { for (const auto& outp : op.output()) { if (outp == output) { return true; @@ -413,7 +424,7 @@ CAFFE2_EXPORT bool HasOutput(const OperatorDef& op, const std::string& output) { return false; } -CAFFE2_EXPORT bool HasInput(const OperatorDef& op, const std::string& input) { +C10_EXPORT bool HasInput(const OperatorDef& op, const std::string& input) { for (const auto& inp : op.input()) { if (inp == input) { return true; @@ -423,7 +434,7 @@ CAFFE2_EXPORT bool HasInput(const OperatorDef& op, const std::string& input) { } // Return the argument index or -1 if it does not exist. -CAFFE2_EXPORT int GetArgumentIndex( +C10_EXPORT int GetArgumentIndex( const google::protobuf::RepeatedPtrField& args, const string& name) { int index = 0; @@ -436,7 +447,9 @@ CAFFE2_EXPORT int GetArgumentIndex( return -1; } -CAFFE2_EXPORT const Argument& GetArgument(const OperatorDef& def, const string& name) { +C10_EXPORT const Argument& GetArgument( + const OperatorDef& def, + const string& name) { int index = GetArgumentIndex(def.arg(), name); if (index != -1) { return def.arg(index); @@ -449,7 +462,7 @@ CAFFE2_EXPORT const Argument& GetArgument(const OperatorDef& def, const string& } } -CAFFE2_EXPORT const Argument& GetArgument(const NetDef& def, const string& name) { +C10_EXPORT const Argument& GetArgument(const NetDef& def, const string& name) { int index = GetArgumentIndex(def.arg(), name); if (index != -1) { return def.arg(index); @@ -462,7 +475,7 @@ CAFFE2_EXPORT const Argument& GetArgument(const NetDef& def, const string& name) } } -CAFFE2_EXPORT bool GetFlagArgument( +C10_EXPORT bool GetFlagArgument( const google::protobuf::RepeatedPtrField& args, const string& name, bool default_value) { @@ -476,21 +489,19 @@ CAFFE2_EXPORT bool GetFlagArgument( return default_value; } -CAFFE2_EXPORT bool GetFlagArgument( +C10_EXPORT bool GetFlagArgument( const OperatorDef& def, const string& name, bool default_value) { return GetFlagArgument(def.arg(), name, default_value); } -CAFFE2_EXPORT bool GetFlagArgument( - const NetDef& def, - const string& name, - bool default_value) { +C10_EXPORT bool +GetFlagArgument(const NetDef& def, const string& name, bool default_value) { return GetFlagArgument(def.arg(), name, default_value); } -CAFFE2_EXPORT Argument* GetMutableArgument( +C10_EXPORT Argument* GetMutableArgument( const string& name, const bool create_if_missing, OperatorDef* def) { diff --git a/caffe2/utils/proto_utils.h b/caffe2/utils/proto_utils.h index dc7c365e86c9d..500ddf73434ab 100644 --- a/caffe2/utils/proto_utils.h +++ b/caffe2/utils/proto_utils.h @@ -194,7 +194,7 @@ CAFFE2_API bool HasInput(const OperatorDef& op, const std::string& input); * does not copy the operator def, so one would need to make sure that the * lifetime of the OperatorDef object outlives that of the ArgumentHelper. */ -class CAFFE2_EXPORT ArgumentHelper { +class C10_EXPORT ArgumentHelper { public: template static bool HasArgument(const Def& def, const string& name) { diff --git a/caffe2/utils/threadpool/WorkersPool.h b/caffe2/utils/threadpool/WorkersPool.h index 27b75d8ccd3a6..b2fc9f03b0777 100644 --- a/caffe2/utils/threadpool/WorkersPool.h +++ b/caffe2/utils/threadpool/WorkersPool.h @@ -360,7 +360,7 @@ class WorkersPool { counter_to_decrement_when_ready_.Wait(); } - AT_DISABLE_COPY_AND_ASSIGN(WorkersPool); + C10_DISABLE_COPY_AND_ASSIGN(WorkersPool); std::vector>> workers_; // The BlockingCounter used to wait for the workers. BlockingCounter counter_to_decrement_when_ready_; diff --git a/caffe2/utils/zmq_helper.h b/caffe2/utils/zmq_helper.h index cfd1d53a98af6..bd45be9192dca 100644 --- a/caffe2/utils/zmq_helper.h +++ b/caffe2/utils/zmq_helper.h @@ -26,7 +26,7 @@ class ZmqContext { private: void* ptr_; - AT_DISABLE_COPY_AND_ASSIGN(ZmqContext); + C10_DISABLE_COPY_AND_ASSIGN(ZmqContext); }; class ZmqMessage { @@ -48,7 +48,7 @@ class ZmqMessage { private: zmq_msg_t msg_; - AT_DISABLE_COPY_AND_ASSIGN(ZmqMessage); + C10_DISABLE_COPY_AND_ASSIGN(ZmqMessage); }; class ZmqSocket { diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 869a563d05a27..45e9c99c3265c 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -39,23 +39,6 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO) endif() endif() -# ---[ git: used to generate git build string. -find_package(Git) -if(GIT_FOUND) - execute_process(COMMAND ${GIT_EXECUTABLE} describe --tags --always --dirty - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE - WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}/.." - OUTPUT_VARIABLE CAFFE2_GIT_VERSION - RESULT_VARIABLE __git_result) - if(NOT ${__git_result} EQUAL 0) - set(CAFFE2_GIT_VERSION "unknown") - endif() -else() - message( - WARNING - "Cannot find git, so Caffe2 won't have any git build info available") -endif() - # ---[ BLAS if(NOT BUILD_ATEN_MOBILE) set(BLAS "MKL" CACHE STRING "Selected BLAS library") @@ -419,13 +402,15 @@ find_package(pybind11 CONFIG) if((DEFINED pybind11_DIR) AND pybind11_DIR) get_target_property(pybind11_INCLUDE_DIRS pybind11::pybind11 INTERFACE_INCLUDE_DIRECTORIES) else() - message("pybind11 config not found. Fallback to legacy find.") find_package(pybind11) endif() if(pybind11_FOUND) + message(STATUS "System pybind11 found") + message(STATUS "pybind11l include dirs: " ${pybind11_INCLUDE_DIRS}) include_directories(SYSTEM ${pybind11_INCLUDE_DIRS}) else() + message(STATUS "Using third_party/pybind11.") include_directories(SYSTEM ${CMAKE_CURRENT_LIST_DIR}/../third_party/pybind11/include) endif() diff --git a/cmake/Modules/FindMKL.cmake b/cmake/Modules/FindMKL.cmake index b296e5f2e47ae..441a8e20cf068 100644 --- a/cmake/Modules/FindMKL.cmake +++ b/cmake/Modules/FindMKL.cmake @@ -314,15 +314,15 @@ if (USE_MKL AND USE_IDEEP) set(IDEEP_ROOT "${PROJECT_SOURCE_DIR}/third_party/ideep") set(MKLDNN_ROOT "${IDEEP_ROOT}/mkl-dnn") find_path(IDEEP_INCLUDE_DIR ideep.hpp PATHS ${IDEEP_ROOT} PATH_SUFFIXES include) - find_path(MKLDNN_INCLUDE_DIR mkldnn.hpp mkldnn.h PATHS ${MKLDNN_ROOT} PATH_SUFFIXES include) - if (NOT MKLDNN_INCLUDE_DIR) + find_path(MKLDNN_INCLUDE_DIR_HACK mkldnn.hpp mkldnn.h PATHS ${MKLDNN_ROOT} PATH_SUFFIXES include) + if (NOT MKLDNN_INCLUDE_DIR_HACK) execute_process(COMMAND git submodule update --init mkl-dnn WORKING_DIRECTORY ${IDEEP_ROOT}) - find_path(MKLDNN_INCLUDE_DIR mkldnn.hpp mkldnn.h PATHS ${MKLDNN_ROOT} PATH_SUFFIXES include) + find_path(MKLDNN_INCLUDE_DIR_HACK mkldnn.hpp mkldnn.h PATHS ${MKLDNN_ROOT} PATH_SUFFIXES include) endif() - if (MKLDNN_INCLUDE_DIR) - list(APPEND IDEEP_INCLUDE_DIR ${MKLDNN_INCLUDE_DIR}) - list(APPEND __ideep_looked_for MKLDNN_INCLUDE_DIR) + if (MKLDNN_INCLUDE_DIR_HACK) + list(APPEND IDEEP_INCLUDE_DIR ${MKLDNN_INCLUDE_DIR_HACK}) + list(APPEND __ideep_looked_for MKLDNN_INCLUDE_DIR_HACK) # to avoid adding conflicting submodels set(ORIG_WITH_TEST ${WITH_TEST}) set(WITH_TEST OFF) @@ -379,7 +379,7 @@ if (USE_MKL AND USE_IDEEP) endif() caffe_clear_vars(__ideep_looked_for __mklml_inner_libs) - endif() # MKLDNN_INCLUDE_DIR + endif() # MKLDNN_INCLUDE_DIR_HACK endif() # USE_IDEEP # Do nothing if MKL_FOUND was set before! diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake index 16d18ac7634d0..58eae123dd137 100644 --- a/cmake/Summary.cmake +++ b/cmake/Summary.cmake @@ -5,7 +5,6 @@ function (caffe2_print_configuration_summary) message(STATUS "General:") message(STATUS " CMake version : ${CMAKE_VERSION}") message(STATUS " CMake command : ${CMAKE_COMMAND}") - message(STATUS " Git version : ${CAFFE2_GIT_VERSION}") message(STATUS " System : ${CMAKE_SYSTEM_NAME}") message(STATUS " C++ compiler : ${CMAKE_CXX_COMPILER}") message(STATUS " C++ compiler version : ${CMAKE_CXX_COMPILER_VERSION}") @@ -18,6 +17,8 @@ function (caffe2_print_configuration_summary) message(STATUS " CMAKE_INSTALL_PREFIX : ${CMAKE_INSTALL_PREFIX}") message(STATUS "") + message(STATUS " TORCH_VERSION : ${TORCH_VERSION}") + message(STATUS " CAFFE2_VERSION : ${CAFFE2_VERSION}") message(STATUS " BUILD_ATEN_MOBILE : ${BUILD_ATEN_MOBILE}") message(STATUS " BUILD_BINARY : ${BUILD_BINARY}") message(STATUS " BUILD_CUSTOM_PROTOBUF : ${BUILD_CUSTOM_PROTOBUF}") diff --git a/cmake/TorchConfig.cmake.in b/cmake/TorchConfig.cmake.in index 066a7e63f9c57..2b847815603a9 100644 --- a/cmake/TorchConfig.cmake.in +++ b/cmake/TorchConfig.cmake.in @@ -24,9 +24,13 @@ endif() # Include directories. if (EXISTS "${TORCH_INSTALL_PREFIX}/lib/include") - set(TORCH_INCLUDE_DIRS "${TORCH_INSTALL_PREFIX}/lib/include") + set(TORCH_INCLUDE_DIRS + ${TORCH_INSTALL_PREFIX}/lib/include + ${TORCH_INSTALL_PREFIX}/lib/include/torch/csrc/api/include) else() - set(TORCH_INCLUDE_DIRS "${TORCH_INSTALL_PREFIX}/include") + set(TORCH_INCLUDE_DIRS + ${TORCH_INSTALL_PREFIX}/include + ${TORCH_INSTALL_PREFIX}/include/torch/csrc/api/include) endif() # Library dependencies. @@ -45,7 +49,7 @@ if (@USE_CUDA@) set(TORCH_CUDA_LIBRARIES ${NVTOOLEXT_HOME}/lib/x64/nvToolsExt64_1.lib ${CUDA_LIBRARIES}) - list(APPEND TORCH_INCLUDE_DIRS "${NVTOOLEXT_HOME}/include") + list(APPEND TORCH_INCLUDE_DIRS ${NVTOOLEXT_HOME}/include) elseif(APPLE) set(TORCH_CUDA_LIBRARIES ${CUDA_TOOLKIT_ROOT_DIR}/lib/libcudart.dylib @@ -66,8 +70,8 @@ endif() set(TORCH_CXX_FLAGS "-D_GLIBCXX_USE_CXX11_ABI=@GLIBCXX_USE_CXX11_ABI@") set_target_properties(torch PROPERTIES - IMPORTED_LOCATION ${TORCH_LIBRARY} - INTERFACE_INCLUDE_DIRECTORIES ${TORCH_INCLUDE_DIRS} - INTERFACE_COMPILE_OPTIONS ${TORCH_CXX_FLAGS} + IMPORTED_LOCATION "${TORCH_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${TORCH_INCLUDE_DIRS}" + INTERFACE_COMPILE_OPTIONS "${TORCH_CXX_FLAGS}" CXX_STANDARD 11 ) diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake index c212805a7b0dc..5505ae1f5c71b 100644 --- a/cmake/Utils.cmake +++ b/cmake/Utils.cmake @@ -113,6 +113,21 @@ function(caffe_parse_header_single_define LIBNAME HDR_PATH VARNAME) endif() endfunction() +################################################################################################ +# Parses a version string that might have values beyond major, minor, and patch +# and set version variables for the library. +# Usage: +# caffe2_parse_version_str( ) +function(caffe2_parse_version_str LIBNAME VERSIONSTR) + string(REGEX REPLACE "^([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MAJOR "${VERSIONSTR}") + string(REGEX REPLACE "^[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MINOR "${VERSIONSTR}") + string(REGEX REPLACE "[0-9]+\\.[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_PATCH "${VERSIONSTR}") + set(${LIBNAME}_VERSION_MAJOR ${${LIBNAME}_VERSION_MAJOR} ${ARGN} PARENT_SCOPE) + set(${LIBNAME}_VERSION_MINOR ${${LIBNAME}_VERSION_MINOR} ${ARGN} PARENT_SCOPE) + set(${LIBNAME}_VERSION_PATCH ${${LIBNAME}_VERSION_PATCH} ${ARGN} PARENT_SCOPE) + set(${LIBNAME}_VERSION "${${LIBNAME}_VERSION_MAJOR}.${${LIBNAME}_VERSION_MINOR}.${${LIBNAME}_VERSION_PATCH}" PARENT_SCOPE) +endfunction() + ############################################################################## # Helper function to automatically generate __init__.py files where python # sources reside but there are no __init__.py present. diff --git a/modules/observers/macros.h b/modules/observers/macros.h new file mode 100644 index 0000000000000..e69b055d2a1d5 --- /dev/null +++ b/modules/observers/macros.h @@ -0,0 +1,7 @@ +#include "c10/macros/Macros.h" + +#ifdef CAFFE2_BUILD_OBSERVER_LIB +#define CAFFE2_OBSERVER_API C10_EXPORT +#else +#define CAFFE2_OBSERVER_API C10_IMPORT +#endif diff --git a/modules/observers/net_observer_reporter.h b/modules/observers/net_observer_reporter.h index 3650e4584f992..5619b69a636e7 100644 --- a/modules/observers/net_observer_reporter.h +++ b/modules/observers/net_observer_reporter.h @@ -4,6 +4,7 @@ #include "caffe2/core/common.h" #include "caffe2/core/net.h" +#include "observers/macros.h" namespace caffe2 { diff --git a/modules/observers/net_observer_reporter_print.h b/modules/observers/net_observer_reporter_print.h index eb712b8e71ea2..098a7f7573399 100644 --- a/modules/observers/net_observer_reporter_print.h +++ b/modules/observers/net_observer_reporter_print.h @@ -1,5 +1,6 @@ #pragma once +#include "observers/macros.h" #include "observers/net_observer_reporter.h" #include "caffe2/core/common.h" diff --git a/modules/observers/observer_config.h b/modules/observers/observer_config.h index e1a6b3a0ead8b..cc967263a66b9 100644 --- a/modules/observers/observer_config.h +++ b/modules/observers/observer_config.h @@ -1,5 +1,6 @@ #pragma once +#include "observers/macros.h" #include "observers/net_observer_reporter.h" #include "caffe2/core/common.h" diff --git a/modules/observers/perf_observer.h b/modules/observers/perf_observer.h index 6fb4063ffe480..11fb870a61961 100644 --- a/modules/observers/perf_observer.h +++ b/modules/observers/perf_observer.h @@ -4,6 +4,7 @@ #include "caffe2/core/net.h" #include "caffe2/core/observer.h" #include "caffe2/core/timer.h" +#include "observers/macros.h" #include diff --git a/modules/rocksdb/rocksdb.cc b/modules/rocksdb/rocksdb.cc index b4752b67ca569..4f8918df41389 100644 --- a/modules/rocksdb/rocksdb.cc +++ b/modules/rocksdb/rocksdb.cc @@ -67,7 +67,7 @@ class RocksDBTransaction : public Transaction { rocksdb::DB* db_; std::unique_ptr batch_; - AT_DISABLE_COPY_AND_ASSIGN(RocksDBTransaction); + C10_DISABLE_COPY_AND_ASSIGN(RocksDBTransaction); }; class RocksDB : public DB { diff --git a/setup.py b/setup.py index 381123b2b9ced..94455ed1cf7be 100644 --- a/setup.py +++ b/setup.py @@ -346,6 +346,7 @@ def build_libs(libs): build_libs_cmd = ['bash', os.path.join('..', 'tools', 'build_pytorch_libs.sh')] my_env = os.environ.copy() my_env["PYTORCH_PYTHON"] = sys.executable + my_env["PYTORCH_BUILD_VERSION"] = version my_env["CMAKE_PREFIX_PATH"] = full_site_packages my_env["NUM_JOBS"] = str(NUM_JOBS) my_env["ONNX_NAMESPACE"] = ONNX_NAMESPACE @@ -471,18 +472,9 @@ def check_file(f): if not same: shutil.copyfile(orig_file, sym_file) - # Copy headers necessary to compile C++ extensions. - # - # This is not perfect solution as build does not depend on any of - # the auto-generated code and auto-generated files will not be - # included in this copy. If we want to use auto-generated files, - # we need to find a better way to do this. - # More information can be found in conversation thread of PR #5772 - self.copy_tree('torch/lib/tmp_install/share', 'torch/share') self.copy_tree('third_party/pybind11/include/pybind11/', 'torch/lib/include/pybind11') - self.copy_file('torch/csrc/torch.h', 'torch/lib/include/torch/torch.h') build_dep_cmds = {} @@ -1208,9 +1200,17 @@ def make_relative_rpath(path): 'lib/include/ATen/cudnn/*.h', 'lib/include/ATen/detail/*.h', 'lib/include/caffe2/utils/*.h', + 'lib/include/c10/*.h', + 'lib/include/c10/macros/*.h', 'lib/include/torch/*.h', 'lib/include/torch/csrc/*.h', - 'lib/include/torch/csrc/api/include/torch/detail/ordered_dict.h', + 'lib/include/torch/csrc/api/include/torch/*.h', + 'lib/include/torch/csrc/api/include/torch/detail/*.h', + 'lib/include/torch/csrc/api/include/torch/nn/*.h', + 'lib/include/torch/csrc/api/include/torch/nn/modules/*.h', + 'lib/include/torch/csrc/api/include/torch/nn/parallel/*.h', + 'lib/include/torch/csrc/api/include/torch/optim/*.h', + 'lib/include/torch/csrc/api/include/torch/serialize/*.h', 'lib/include/torch/csrc/autograd/*.h', 'lib/include/torch/csrc/autograd/generated/*.h', 'lib/include/torch/csrc/cuda/*.h', diff --git a/test/cpp/api/any.cpp b/test/cpp/api/any.cpp index 0d8e98c4157ab..22eda0d1004d2 100644 --- a/test/cpp/api/any.cpp +++ b/test/cpp/api/any.cpp @@ -71,7 +71,7 @@ TEST_F( ASSERT_TRUE( any.forward(std::string("a"), std::string("ab"), std::string("abc")) .sum() - .toCInt() == 6); + .item() == 6); } TEST_F(AnyModuleTest, WrongArgumentType) { @@ -232,10 +232,10 @@ TEST_F(AnyModuleTest, ConvertsVariableToTensorCorrectly) { // mismatch). AnyModule any(M{}); ASSERT_TRUE( - any.forward(torch::autograd::Variable(torch::ones(5))).sum().toCFloat() == + any.forward(torch::autograd::Variable(torch::ones(5))).sum().item() == 5); // at::Tensors that are not variables work too. - ASSERT_EQ(any.forward(at::ones(5)).sum().toCFloat(), 5); + ASSERT_EQ(any.forward(at::ones(5)).sum().item(), 5); } namespace torch { diff --git a/test/cpp/api/integration.cpp b/test/cpp/api/integration.cpp index 131b0440a41a1..b2d10097b2393 100644 --- a/test/cpp/api/integration.cpp +++ b/test/cpp/api/integration.cpp @@ -63,10 +63,10 @@ class CartPole { } void step(int action) { - auto x = state[0].toCFloat(); - auto x_dot = state[1].toCFloat(); - auto theta = state[2].toCFloat(); - auto theta_dot = state[3].toCFloat(); + auto x = state[0].item(); + auto x_dot = state[1].item(); + auto theta = state[2].item(); + auto theta_dot = state[3].item(); auto force = (action == 1) ? force_mag : -force_mag; auto costheta = std::cos(theta); @@ -222,7 +222,7 @@ bool test_mnist( torch::NoGradGuard guard; auto result = std::get<1>(forward_op(tedata).max(1)); torch::Tensor correct = (result == telabel).toType(torch::kFloat32); - return correct.sum().toCFloat() > telabel.size(0) * 0.8; + return correct.sum().item() > telabel.size(0) * 0.8; } struct IntegrationTest : torch::test::SeedingFixture {}; @@ -251,7 +251,7 @@ TEST_F(IntegrationTest, CartPole) { auto out = forward(state); auto probs = torch::Tensor(std::get<0>(out)); auto value = torch::Tensor(std::get<1>(out)); - auto action = probs.multinomial(1)[0].toCInt(); + auto action = probs.multinomial(1)[0].item(); // Compute the log prob of a multinomial distribution. // This should probably be actually implemented in autogradpp... auto p = probs / probs.sum(-1, true); @@ -274,7 +274,7 @@ TEST_F(IntegrationTest, CartPole) { std::vector policy_loss; std::vector value_loss; for (auto i = 0U; i < saved_log_probs.size(); i++) { - auto r = rewards[i] - saved_values[i].toCFloat(); + auto r = rewards[i] - saved_values[i].item(); policy_loss.push_back(-r * saved_log_probs[i]); value_loss.push_back( torch::smooth_l1_loss(saved_values[i], torch::ones(1) * rewards[i])); diff --git a/test/cpp/api/jit.cpp b/test/cpp/api/jit.cpp index 34b3e8f630c2a..9aa6968df71f5 100644 --- a/test/cpp/api/jit.cpp +++ b/test/cpp/api/jit.cpp @@ -20,10 +20,10 @@ TEST(TorchScriptTest, CanCompileMultipleFunctions) { auto a = torch::ones(1); auto b = torch::ones(1); - ASSERT_EQ(1, module->run_method("test_mul", a, b).toTensor().toCLong()); + ASSERT_EQ(1, module->run_method("test_mul", a, b).toTensor().item()); - ASSERT_EQ(2, module->run_method("test_relu", a, b).toTensor().toCLong()); + ASSERT_EQ(2, module->run_method("test_relu", a, b).toTensor().item()); ASSERT_TRUE( - 0x200 == module->run_method("test_while", a, b).toTensor().toCLong()); + 0x200 == module->run_method("test_while", a, b).toTensor().item()); } diff --git a/test/cpp/api/misc.cpp b/test/cpp/api/misc.cpp index ca716d0ac0c95..b85cb9dcc1a86 100644 --- a/test/cpp/api/misc.cpp +++ b/test/cpp/api/misc.cpp @@ -49,5 +49,5 @@ TEST(NNInitTest, CanInitializeTensorThatRequiresGrad) { tensor.fill_(1), "a leaf Variable that requires grad " "has been used in an in-place operation"); - ASSERT_EQ(torch::nn::init::ones_(tensor).sum().toCInt(), 12); + ASSERT_EQ(torch::nn::init::ones_(tensor).sum().item(), 12); } diff --git a/test/cpp/api/module.cpp b/test/cpp/api/module.cpp index f2bca9501ae64..70d05d4240e77 100644 --- a/test/cpp/api/module.cpp +++ b/test/cpp/api/module.cpp @@ -41,13 +41,13 @@ TEST_F(ModuleTest, ZeroGrad) { for (auto& parameter : module->parameters()) { auto grad = parameter->grad(); ASSERT_TRUE(grad.defined()); - ASSERT_NE(grad.sum().toCFloat(), 0); + ASSERT_NE(grad.sum().item(), 0); } module->zero_grad(); for (auto& parameter : module->parameters()) { auto grad = parameter->grad(); ASSERT_TRUE(grad.defined()); - ASSERT_EQ(grad.sum().toCFloat(), 0); + ASSERT_EQ(grad.sum().item(), 0); } } @@ -72,7 +72,7 @@ TEST_F(ModuleTest, ZeroGradWithUndefined) { ASSERT_TRUE(module.x.grad().defined()); ASSERT_FALSE(module.y.grad().defined()); - ASSERT_EQ(module.x.grad().sum().toCFloat(), 0); + ASSERT_EQ(module.x.grad().sum().item(), 0); } TEST_F(ModuleTest, CanGetName) { diff --git a/test/cpp/api/modules.cpp b/test/cpp/api/modules.cpp index 11e54a97a1885..fd9416eb3b9b6 100644 --- a/test/cpp/api/modules.cpp +++ b/test/cpp/api/modules.cpp @@ -134,7 +134,7 @@ TEST_F(ModulesTest, SimpleContainer) { ASSERT_EQ(x.ndimension(), 2); ASSERT_EQ(x.size(0), 1000); ASSERT_EQ(x.size(1), 100); - ASSERT_EQ(x.min().toCFloat(), 0); + ASSERT_EQ(x.min().item(), 0); } TEST_F(ModulesTest, EmbeddingBasic) { @@ -181,12 +181,12 @@ TEST_F(ModulesTest, Dropout) { y.backward(); ASSERT_EQ(y.ndimension(), 1); ASSERT_EQ(y.size(0), 100); - ASSERT_LT(y.sum().toCFloat(), 130); // Probably - ASSERT_GT(y.sum().toCFloat(), 70); // Probably + ASSERT_LT(y.sum().item(), 130); // Probably + ASSERT_GT(y.sum().item(), 70); // Probably dropout->eval(); y = dropout->forward(x); - ASSERT_EQ(y.sum().toCFloat(), 100); + ASSERT_EQ(y.sum().item(), 100); } TEST_F(ModulesTest, Parameters) { @@ -228,15 +228,15 @@ TEST_F(ModulesTest, FunctionalCallsSuppliedFunction) { TEST_F(ModulesTest, FunctionalWithTorchFunction) { auto functional = Functional(torch::relu); - ASSERT_EQ(functional(torch::ones({})).toCFloat(), 1); - ASSERT_EQ(functional(torch::ones({})).toCFloat(), 1); - ASSERT_EQ(functional(torch::ones({}) * -1).toCFloat(), 0); + ASSERT_EQ(functional(torch::ones({})).item(), 1); + ASSERT_EQ(functional(torch::ones({})).item(), 1); + ASSERT_EQ(functional(torch::ones({}) * -1).item(), 0); } TEST_F(ModulesTest, FunctionalArgumentBinding) { auto functional = Functional(torch::elu, /*alpha=*/1, /*scale=*/0, /*input_scale=*/1); - ASSERT_EQ(functional(torch::ones({})).toCFloat(), 0); + ASSERT_EQ(functional(torch::ones({})).item(), 0); } TEST_F(ModulesTest, BatchNormStateful) { diff --git a/test/cpp/api/optim.cpp b/test/cpp/api/optim.cpp index 03f7ed92a9b35..944a31ca7e997 100644 --- a/test/cpp/api/optim.cpp +++ b/test/cpp/api/optim.cpp @@ -44,7 +44,7 @@ bool test_optimizer_xor(Options options) { auto labels = torch::empty({kBatchSize}); for (size_t i = 0; i < kBatchSize; i++) { inputs[i] = torch::randint(2, {2}, torch::kInt64); - labels[i] = inputs[i][0].toCLong() ^ inputs[i][1].toCLong(); + labels[i] = inputs[i][0].item() ^ inputs[i][1].item(); } inputs.set_requires_grad(true); optimizer.zero_grad(); @@ -54,7 +54,7 @@ bool test_optimizer_xor(Options options) { optimizer.step(); - running_loss = running_loss * 0.99 + loss.toCFloat() * 0.01; + running_loss = running_loss * 0.99 + loss.item() * 0.01; if (epoch > kMaximumNumberOfEpochs) { std::cout << "Loss is too high after epoch " << epoch << ": " << running_loss << std::endl; @@ -286,14 +286,14 @@ TEST(OptimTest, ZeroGrad) { for (const auto& parameter : model->parameters()) { ASSERT_TRUE(parameter->grad().defined()); - ASSERT_GT(parameter->grad().sum().toCFloat(), 0); + ASSERT_GT(parameter->grad().sum().item(), 0); } optimizer.zero_grad(); for (const auto& parameter : model->parameters()) { ASSERT_TRUE(parameter->grad().defined()); - ASSERT_EQ(parameter->grad().sum().toCFloat(), 0); + ASSERT_EQ(parameter->grad().sum().item(), 0); } } diff --git a/test/cpp/api/parallel.cpp b/test/cpp/api/parallel.cpp index 71bcc542f8439..a191078236447 100644 --- a/test/cpp/api/parallel.cpp +++ b/test/cpp/api/parallel.cpp @@ -38,7 +38,7 @@ TEST_F(ParallelTest, DifferentiableScatter_MultiCUDA) { ASSERT_TRUE(input.grad().defined()); ASSERT_TRUE(input.grad().device().is_cpu()); - ASSERT_EQ(input.grad().sum().toCInt(), 10); + ASSERT_EQ(input.grad().sum().item(), 10); } TEST_F(ParallelTest, DifferentiableGather_MultiCUDA) { @@ -62,11 +62,11 @@ TEST_F(ParallelTest, DifferentiableGather_MultiCUDA) { ASSERT_TRUE(a.grad().defined()); ASSERT_EQ(a.grad().device(), torch::Device(torch::kCUDA, 0)); - ASSERT_EQ(a.grad().sum().toCInt(), 5); + ASSERT_EQ(a.grad().sum().item(), 5); ASSERT_TRUE(b.grad().defined()); ASSERT_EQ(b.grad().device(), torch::Device(torch::kCUDA, 1)); - ASSERT_EQ(b.grad().sum().toCInt(), 5); + ASSERT_EQ(b.grad().sum().item(), 5); } TEST_F(ParallelTest, Replicate_MultiCUDA) { @@ -226,6 +226,6 @@ TEST_F(ParallelTest, DataParallelUsesAllAvailableCUDADevices_CUDA) { const auto device_count = torch::cuda::device_count(); ASSERT_EQ(output.numel(), device_count); for (size_t i = 0; i < device_count; ++i) { - ASSERT_EQ(output[i].toCInt(), i); + ASSERT_EQ(output[i].item(), i); } } diff --git a/test/cpp/api/rnn.cpp b/test/cpp/api/rnn.cpp index 96ffd37eb0f62..e0d511fb09938 100644 --- a/test/cpp/api/rnn.cpp +++ b/test/cpp/api/rnn.cpp @@ -56,7 +56,7 @@ bool test_RNN_xor(Func&& model_maker, bool cuda = false) { loss.backward(); optimizer.step(); - running_loss = running_loss * 0.99 + loss.toCFloat() * 0.01; + running_loss = running_loss * 0.99 + loss.item() * 0.01; if (epoch > max_epoch) { return false; } @@ -81,7 +81,7 @@ void check_lstm_sizes(RNNOutput output) { ASSERT_EQ(output.state.size(3), 64); // 64 hidden dims // Something is in the hiddens - ASSERT_GT(output.state.norm().toCFloat(), 0); + ASSERT_GT(output.state.norm().item(), 0); } struct RNNTest : torch::test::SeedingFixture {}; @@ -103,7 +103,7 @@ TEST_F(RNNTest, CheckOutputSizes) { torch::Tensor diff = next.state - output.state; // Hiddens changed - ASSERT_GT(diff.abs().sum().toCFloat(), 1e-3); + ASSERT_GT(diff.abs().sum().item(), 1e-3); } TEST_F(RNNTest, CheckOutputValuesMatchPyTorch) { @@ -137,7 +137,7 @@ TEST_F(RNNTest, CheckOutputValuesMatchPyTorch) { 0.6620, 0.7860, 0.6501, 0.7741, 0.7889, 0.9003, 0.7769, 0.8905, 0.7635, 0.8794, 0.7484, 0.8666}; for (size_t i = 0; i < 3 * 4 * 2; i++) { - ASSERT_LT(std::abs(flat[i].toCFloat() - c_out[i]), 1e-3); + ASSERT_LT(std::abs(flat[i].item() - c_out[i]), 1e-3); } ASSERT_EQ(out.state.ndimension(), 4); // (hx, cx) x layers x B x 2 @@ -163,7 +163,7 @@ TEST_F(RNNTest, CheckOutputValuesMatchPyTorch) { 1.0931, 1.4911}; for (size_t i = 0; i < 16; i++) { - ASSERT_LT(std::abs(flat[i].toCFloat() - h_out[i]), 1e-3); + ASSERT_LT(std::abs(flat[i].item() - h_out[i]), 1e-3); } } @@ -206,7 +206,7 @@ TEST_F(RNNTest, Sizes_CUDA) { torch::Tensor diff = next.state - output.state; // Hiddens changed - ASSERT_GT(diff.abs().sum().toCFloat(), 1e-3); + ASSERT_GT(diff.abs().sum().item(), 1e-3); } TEST_F(RNNTest, EndToEndLSTM_CUDA) { diff --git a/test/cpp/api/serialize.cpp b/test/cpp/api/serialize.cpp index a37c00c2e3eff..0612029f53bca 100644 --- a/test/cpp/api/serialize.cpp +++ b/test/cpp/api/serialize.cpp @@ -90,7 +90,7 @@ TEST(Serialize, XOR) { auto labels = torch::empty({batch_size}); for (size_t i = 0; i < batch_size; i++) { inputs[i] = torch::randint(2, {2}, torch::kInt64); - labels[i] = inputs[i][0].toCLong() ^ inputs[i][1].toCLong(); + labels[i] = inputs[i][0].item() ^ inputs[i][1].item(); } auto x = model->forward(inputs); return torch::binary_cross_entropy(x, labels); @@ -112,7 +112,7 @@ TEST(Serialize, XOR) { loss.backward(); optimizer.step(); - running_loss = running_loss * 0.99 + loss.sum().toCFloat() * 0.01; + running_loss = running_loss * 0.99 + loss.sum().item() * 0.01; ASSERT_LT(epoch, 3000); epoch++; } @@ -122,7 +122,7 @@ TEST(Serialize, XOR) { torch::load(model2, tempfile.str()); auto loss = getLoss(model2, 100); - ASSERT_LT(loss.toCFloat(), 0.1); + ASSERT_LT(loss.item(), 0.1); } TEST(Serialize, Optim) { @@ -188,9 +188,9 @@ TEST(Serialize, Optim) { const auto& name = p.key; // Model 1 and 3 should be the same ASSERT_TRUE( - param1[name].norm().toCFloat() == param3[name].norm().toCFloat()); + param1[name].norm().item() == param3[name].norm().item()); ASSERT_TRUE( - param1[name].norm().toCFloat() != param2[name].norm().toCFloat()); + param1[name].norm().item() != param2[name].norm().item()); } } @@ -202,7 +202,7 @@ TEST(Serialize, Optim) { // auto labels = torch::empty({batch_size}); // for (size_t i = 0; i < batch_size; i++) { // inputs[i] = torch::randint(2, {2}, torch::kInt64); -// labels[i] = inputs[i][0].toCLong() ^ inputs[i][1].toCLong(); +// labels[i] = inputs[i][0].item() ^ inputs[i][1].item(); // } // auto x = model->forward(inputs); // return torch::binary_cross_entropy(x, labels); @@ -224,7 +224,7 @@ TEST(Serialize, Optim) { // loss.backward(); // optimizer.step(); // -// running_loss = running_loss * 0.99 + loss.sum().toCFloat() * 0.01; +// running_loss = running_loss * 0.99 + loss.sum().item() * 0.01; // ASSERT_LT(epoch, 3000); // epoch++; // } @@ -234,7 +234,7 @@ TEST(Serialize, Optim) { // torch::load(model2, tempfile.str()); // // auto loss = getLoss(model2, 100); -// ASSERT_LT(loss.toCFloat(), 0.1); +// ASSERT_LT(loss.item(), 0.1); // // model2->to(torch::kCUDA); // torch::test::TempFile tempfile2; @@ -242,5 +242,5 @@ TEST(Serialize, Optim) { // torch::load(model3, tempfile2.str()); // // loss = getLoss(model3, 100); -// ASSERT_LT(loss.toCFloat(), 0.1); +// ASSERT_LT(loss.item(), 0.1); // } diff --git a/test/cpp/api/tensor.cpp b/test/cpp/api/tensor.cpp index ad14298d86c96..3996132cc8479 100644 --- a/test/cpp/api/tensor.cpp +++ b/test/cpp/api/tensor.cpp @@ -104,7 +104,7 @@ TEST(TensorTest, ContainsCorrectValueForSingleValue) { auto tensor = at::tensor(123); ASSERT_EQ(tensor.numel(), 1); ASSERT_EQ(tensor.dtype(), at::kInt); - ASSERT_EQ(tensor[0].toCInt(), 123); + ASSERT_EQ(tensor[0].item(), 123); tensor = at::tensor(123.456f); ASSERT_EQ(tensor.numel(), 1); @@ -189,7 +189,7 @@ TEST(TensorTest, FromBlob) { auto tensor = torch::from_blob(v.data(), v.size(), torch::kInt32); ASSERT_TRUE(tensor.is_variable()); ASSERT_EQ(tensor.numel(), 3); - ASSERT_EQ(tensor[0].toCInt(), 1); - ASSERT_EQ(tensor[1].toCInt(), 2); - ASSERT_EQ(tensor[2].toCInt(), 3); + ASSERT_EQ(tensor[0].item(), 1); + ASSERT_EQ(tensor[1].item(), 2); + ASSERT_EQ(tensor[2].item(), 3); } diff --git a/test/cpp_extensions/complex_registration_extension.cpp b/test/cpp_extensions/complex_registration_extension.cpp index db75e3f67f777..21b05d060b190 100644 --- a/test/cpp_extensions/complex_registration_extension.cpp +++ b/test/cpp_extensions/complex_registration_extension.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/test/cpp_extensions/cpp_api_extension.cpp b/test/cpp_extensions/cpp_api_extension.cpp new file mode 100644 index 0000000000000..066ad64160fa5 --- /dev/null +++ b/test/cpp_extensions/cpp_api_extension.cpp @@ -0,0 +1,38 @@ +#include +#include +#include + +struct Net : torch::nn::Module { + Net(int64_t in, int64_t out) + : fc(in, out), + bn(torch::nn::BatchNormOptions(out).stateful(true)), + dropout(0.5) { + register_module("fc", fc); + register_module("bn", bn); + register_module("dropout", dropout); + } + + torch::Tensor forward(torch::Tensor x) { + return dropout->forward(bn->forward(torch::relu(fc->forward(x)))); + } + + void set_bias(torch::Tensor bias) { + fc->bias = bias; + } + + torch::Tensor get_bias() const { + return fc->bias; + } + + torch::nn::Linear fc; + torch::nn::BatchNorm bn; + torch::nn::Dropout dropout; +}; + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + torch::python::bind_module(m, "Net") + .def(py::init()) + .def("forward", &Net::forward) + .def("set_bias", &Net::set_bias) + .def("get_bias", &Net::get_bias); +} diff --git a/test/cpp_extensions/cuda_extension.cpp b/test/cpp_extensions/cuda_extension.cpp index 963850acc2795..9946b4f9cb97d 100644 --- a/test/cpp_extensions/cuda_extension.cpp +++ b/test/cpp_extensions/cuda_extension.cpp @@ -1,4 +1,4 @@ -#include +#include // Declare the function from cuda_extension.cu. It will be compiled // separately with nvcc and linked with the object file of cuda_extension.cpp diff --git a/test/cpp_extensions/cudnn_extension.cpp b/test/cpp_extensions/cudnn_extension.cpp index 7c3be3e471630..498e01a116a15 100644 --- a/test/cpp_extensions/cudnn_extension.cpp +++ b/test/cpp_extensions/cudnn_extension.cpp @@ -10,7 +10,7 @@ * 5) Return something (optional). */ -#include +#include #include // for TensorDescriptor #include // for CUDNN_CHECK diff --git a/test/cpp_extensions/doubler.h b/test/cpp_extensions/doubler.h index 2b22dca1284cd..d9e6aaea8c346 100644 --- a/test/cpp_extensions/doubler.h +++ b/test/cpp_extensions/doubler.h @@ -1,4 +1,4 @@ -#include +#include struct Doubler { Doubler(int A, int B) { diff --git a/test/cpp_extensions/extension.cpp b/test/cpp_extensions/extension.cpp index 8e79397296910..3ba27d92f32d7 100644 --- a/test/cpp_extensions/extension.cpp +++ b/test/cpp_extensions/extension.cpp @@ -1,4 +1,4 @@ -#include +#include at::Tensor sigmoid_add(at::Tensor x, at::Tensor y) { return x.sigmoid() + y.sigmoid(); diff --git a/test/cpp_extensions/half_support.cu b/test/cpp_extensions/half_support.cu index a3621bfe7c55f..9d420438fb526 100644 --- a/test/cpp_extensions/half_support.cu +++ b/test/cpp_extensions/half_support.cu @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/test/cpp_extensions/jit_extension.cpp b/test/cpp_extensions/jit_extension.cpp index e62be5b38ba1d..576e7fc9a1d3c 100644 --- a/test/cpp_extensions/jit_extension.cpp +++ b/test/cpp_extensions/jit_extension.cpp @@ -1,4 +1,4 @@ -#include +#include #include "doubler.h" diff --git a/test/cpp_extensions/jit_extension2.cpp b/test/cpp_extensions/jit_extension2.cpp index e197308c3d59e..cfd472137187a 100644 --- a/test/cpp_extensions/jit_extension2.cpp +++ b/test/cpp_extensions/jit_extension2.cpp @@ -1,4 +1,4 @@ -#include +#include using namespace at; diff --git a/test/expect/TestJit.test_cpp_cuda.expect b/test/expect/TestJit.test_cpp_cuda.expect index 451f1f9329601..8453308a0dfb5 100644 --- a/test/expect/TestJit.test_cpp_cuda.expect +++ b/test/expect/TestJit.test_cpp_cuda.expect @@ -65,6 +65,8 @@ graph(%0 : Dynamic %3 : Dynamic %4 : Dynamic) { %23 : Dynamic, %24 : Dynamic = prim::DifferentiableGraph_0(%0, %3, %1, %4, %2) + %7 : int = prim::Constant[value=1]() + %19 : int = prim::Constant[value=1]() return (%24, %23); } with prim::DifferentiableGraph_0 = graph(%1 : Dynamic @@ -74,20 +76,20 @@ with prim::DifferentiableGraph_0 = graph(%1 : Dynamic %17 : Dynamic) { %0 : Dynamic = aten::mm(%1, %2) %3 : Dynamic = aten::mm(%4, %5) - %6 : int = prim::Constant[value=1]() - %7 : Dynamic = aten::add(%0, %3, %6) - %8 : Dynamic, %9 : Dynamic, %10 : Dynamic, %11 : Dynamic = prim::ConstantChunk[chunks=4, dim=1](%7) + %7 : int = prim::Constant[value=1]() + %6 : Dynamic = aten::add(%0, %3, %7) + %8 : Dynamic, %9 : Dynamic, %10 : Dynamic, %11 : Dynamic = prim::ConstantChunk[chunks=4, dim=1](%6) %12 : Dynamic = aten::sigmoid(%8) %13 : Dynamic = aten::sigmoid(%11) %14 : Dynamic = aten::tanh(%10) %15 : Dynamic = aten::sigmoid(%9) %16 : Dynamic = aten::mul(%15, %17) %18 : Dynamic = aten::mul(%12, %14) - %19 : int = prim::Constant[value=1]() - %20 : Dynamic = aten::add(%16, %18, %19) - %21 : Dynamic = aten::tanh(%20) + %20 : int = prim::Constant[value=1]() + %19 : Dynamic = aten::add(%16, %18, %20) + %21 : Dynamic = aten::tanh(%19) %22 : Dynamic = aten::mul(%13, %21) - return (%20, %22); + return (%19, %22); } testDifferentiate diff --git a/test/expect/TestScript.test_lstm_fusion_cuda-backward.expect b/test/expect/TestScript.test_lstm_fusion_cuda-backward.expect index efb3d272bb4c2..cbdbc744b5e85 100644 --- a/test/expect/TestScript.test_lstm_fusion_cuda-backward.expect +++ b/test/expect/TestScript.test_lstm_fusion_cuda-backward.expect @@ -17,20 +17,18 @@ graph(%0 : Float(*, *) %cellgate : Float(*, *) %outgate : Float(*, *) %18 : Float(*, *)) { - %19 : int = prim::Constant[value=1]() - %20 : Float(*, *), %21 : Float(*, *) = prim::FusionGroup_0[device=0](%ingate, %forgetgate, %cellgate, %outgate, %11, %1, %18, %0) - %22 : Float(*, *) = aten::mul(%20, %19) - %23 : Float(*, *) = aten::t(%13) - %24 : Float(*, *) = aten::mm(%22, %23) - %25 : Float(*, *) = aten::t(%10) - %26 : Float(*, *) = aten::mm(%25, %22) - %27 : Float(*, *) = aten::t(%26) - %28 : Float(*, *) = aten::t(%12) - %29 : Float(*, *) = aten::mm(%20, %28) - %30 : Float(*, *) = aten::t(%9) - %31 : Float(*, *) = aten::mm(%30, %20) - %32 : Float(*, *) = aten::t(%31) - return (%32, %29, %27, %24, %22, %22, %21); + %19 : Float(*, *), %20 : Float(*, *) = prim::FusionGroup_0[device=0](%ingate, %forgetgate, %cellgate, %outgate, %11, %1, %18, %0) + %21 : Float(*, *) = aten::t(%13) + %22 : Float(*, *) = aten::mm(%19, %21) + %23 : Float(*, *) = aten::t(%10) + %24 : Float(*, *) = aten::mm(%23, %19) + %25 : Float(*, *) = aten::t(%24) + %26 : Float(*, *) = aten::t(%12) + %27 : Float(*, *) = aten::mm(%19, %26) + %28 : Float(*, *) = aten::t(%9) + %29 : Float(*, *) = aten::mm(%28, %19) + %30 : Float(*, *) = aten::t(%29) + return (%30, %27, %25, %22, %19, %19, %20); } with prim::FusionGroup_0 = graph(%0 : Float(*, *) %1 : Float(*, *) @@ -52,31 +50,29 @@ with prim::FusionGroup_0 = graph(%0 : Float(*, *) %17 : Float(*, *) = aten::add(%7, %14, %16) %18 : Float(*, *) = aten::mul(%17, %1) %19 : Float(*, *) = aten::mul(%5, %6) - %20 : int = prim::Constant[value=1]() - %21 : Float(*, *) = aten::mul(%17, %20) - %22 : Float(*, *) = aten::mul(%21, %2) - %23 : Float(*, *) = aten::mul(%21, %0) - %24 : Float(*, *) = aten::mul(%17, %4) - %25 : Float(*, *) = aten::neg(%3) - %26 : int = prim::Constant[value=1]() - %27 : Float(*, *) = aten::add(%25, %26, %26) - %28 : Float(*, *) = aten::mul(%19, %3) - %29 : Float(*, *) = aten::mul(%28, %27) - %30 : Float(*, *) = aten::mul(%2, %2) - %31 : Float(*, *) = aten::neg(%30) - %32 : int = prim::Constant[value=1]() - %33 : Float(*, *) = aten::add(%31, %32, %32) - %34 : Float(*, *) = aten::mul(%23, %33) - %35 : Float(*, *) = aten::neg(%1) - %36 : int = prim::Constant[value=1]() - %37 : Float(*, *) = aten::add(%35, %36, %36) - %38 : Float(*, *) = aten::mul(%24, %1) - %39 : Float(*, *) = aten::mul(%38, %37) - %40 : Float(*, *) = aten::neg(%0) - %41 : int = prim::Constant[value=1]() - %42 : Float(*, *) = aten::add(%40, %41, %41) - %43 : Float(*, *) = aten::mul(%22, %0) - %44 : Float(*, *) = aten::mul(%43, %42) - %45 : Float(*, *) = prim::FusedConcat[dim=1](%44, %39, %34, %29) - return (%45, %18); + %20 : Float(*, *) = aten::mul(%17, %2) + %21 : Float(*, *) = aten::mul(%17, %0) + %22 : Float(*, *) = aten::mul(%17, %4) + %23 : Float(*, *) = aten::neg(%3) + %24 : int = prim::Constant[value=1]() + %25 : Float(*, *) = aten::add(%23, %24, %24) + %26 : Float(*, *) = aten::mul(%19, %3) + %27 : Float(*, *) = aten::mul(%26, %25) + %28 : Float(*, *) = aten::mul(%2, %2) + %29 : Float(*, *) = aten::neg(%28) + %30 : int = prim::Constant[value=1]() + %31 : Float(*, *) = aten::add(%29, %30, %30) + %32 : Float(*, *) = aten::mul(%21, %31) + %33 : Float(*, *) = aten::neg(%1) + %34 : int = prim::Constant[value=1]() + %35 : Float(*, *) = aten::add(%33, %34, %34) + %36 : Float(*, *) = aten::mul(%22, %1) + %37 : Float(*, *) = aten::mul(%36, %35) + %38 : Float(*, *) = aten::neg(%0) + %39 : int = prim::Constant[value=1]() + %40 : Float(*, *) = aten::add(%38, %39, %39) + %41 : Float(*, *) = aten::mul(%20, %0) + %42 : Float(*, *) = aten::mul(%41, %40) + %43 : Float(*, *) = prim::FusedConcat[dim=1](%42, %37, %32, %27) + return (%43, %18); } diff --git a/test/expect/TestScript.test_milstm_fusion_cuda-backward.expect b/test/expect/TestScript.test_milstm_fusion_cuda-backward.expect index 1221d05e51925..b0dc85644751d 100644 --- a/test/expect/TestScript.test_milstm_fusion_cuda-backward.expect +++ b/test/expect/TestScript.test_milstm_fusion_cuda-backward.expect @@ -27,14 +27,17 @@ graph(%0 : Float(*, *) %outgate : Float(*, *) %27 : Float(*, *)) { %28 : Float(*, *) = prim::FusionGroup_0[device=0](%ingate, %forgetgate, %cellgate, %outgate, %17, %0, %27, %1) - %29 : Float(*, *), %30 : Float(*, *), %31 : Float(*, *), %32 : Float(*, *), %33 : Float(*, *), %34 : Float(*, *) = prim::FusionGroup_1[device=0](%14, %15, %Wx, %28, %Uz, %22, %16) - %35 : Float(*, *) = aten::t(%13) - %36 : Float(*, *) = aten::mm(%35, %31) - %37 : Float(*, *) = aten::t(%36) - %38 : Float(*, *) = aten::t(%12) - %39 : Float(*, *) = aten::mm(%38, %29) - %40 : Float(*, *) = aten::t(%39) - return (%40, %37, %30, %32, %33, %34); + %29 : Float(*, *) = aten::mul(%28, %Uz) + %30 : Float(*, *) = aten::mul(%28, %Wx) + %31 : Float(*, *) = prim::FusionGroup_1[device=0](%28, %22, %16) + %32 : Float(*, *), %33 : Float(*, *) = prim::FusionGroup_2[device=0](%14, %28, %15, %Wx, %Uz) + %34 : Float(*, *) = aten::t(%13) + %35 : Float(*, *) = aten::mm(%34, %31) + %36 : Float(*, *) = aten::t(%35) + %37 : Float(*, *) = aten::t(%12) + %38 : Float(*, *) = aten::mm(%37, %32) + %39 : Float(*, *) = aten::t(%38) + return (%39, %36, %33, %30, %29, %28); } with prim::FusionGroup_0 = graph(%0 : Float(*, *) %1 : Float(*, *) @@ -53,58 +56,51 @@ with prim::FusionGroup_0 = graph(%0 : Float(*, *) %14 : Float(*, *) = aten::mul(%9, %13) %15 : int = prim::Constant[value=1]() %16 : Float(*, *) = aten::add(%5, %14, %15) - %17 : int = prim::Constant[value=1]() - %18 : Float(*, *) = aten::mul(%16, %17) - %19 : Float(*, *) = aten::mul(%18, %2) - %20 : Float(*, *) = aten::mul(%18, %0) - %21 : Float(*, *) = aten::mul(%16, %4) - %22 : Float(*, *) = aten::neg(%3) - %23 : int = prim::Constant[value=1]() - %24 : Float(*, *) = aten::add(%22, %23, %23) - %25 : Float(*, *) = aten::mul(%8, %3) - %26 : Float(*, *) = aten::mul(%25, %24) - %27 : Float(*, *) = aten::mul(%2, %2) - %28 : Float(*, *) = aten::neg(%27) - %29 : int = prim::Constant[value=1]() - %30 : Float(*, *) = aten::add(%28, %29, %29) - %31 : Float(*, *) = aten::mul(%20, %30) - %32 : Float(*, *) = aten::neg(%1) - %33 : int = prim::Constant[value=1]() - %34 : Float(*, *) = aten::add(%32, %33, %33) - %35 : Float(*, *) = aten::mul(%21, %1) - %36 : Float(*, *) = aten::mul(%35, %34) - %37 : Float(*, *) = aten::neg(%0) - %38 : int = prim::Constant[value=1]() - %39 : Float(*, *) = aten::add(%37, %38, %38) - %40 : Float(*, *) = aten::mul(%19, %0) - %41 : Float(*, *) = aten::mul(%40, %39) - %42 : Float(*, *) = prim::FusedConcat[dim=1](%41, %36, %31, %26) - return (%42); + %17 : Float(*, *) = aten::mul(%16, %2) + %18 : Float(*, *) = aten::mul(%16, %0) + %19 : Float(*, *) = aten::mul(%16, %4) + %20 : Float(*, *) = aten::neg(%3) + %21 : int = prim::Constant[value=1]() + %22 : Float(*, *) = aten::add(%20, %21, %21) + %23 : Float(*, *) = aten::mul(%8, %3) + %24 : Float(*, *) = aten::mul(%23, %22) + %25 : Float(*, *) = aten::mul(%2, %2) + %26 : Float(*, *) = aten::neg(%25) + %27 : int = prim::Constant[value=1]() + %28 : Float(*, *) = aten::add(%26, %27, %27) + %29 : Float(*, *) = aten::mul(%18, %28) + %30 : Float(*, *) = aten::neg(%1) + %31 : int = prim::Constant[value=1]() + %32 : Float(*, *) = aten::add(%30, %31, %31) + %33 : Float(*, *) = aten::mul(%19, %1) + %34 : Float(*, *) = aten::mul(%33, %32) + %35 : Float(*, *) = aten::neg(%0) + %36 : int = prim::Constant[value=1]() + %37 : Float(*, *) = aten::add(%35, %36, %36) + %38 : Float(*, *) = aten::mul(%17, %0) + %39 : Float(*, *) = aten::mul(%38, %37) + %40 : Float(*, *) = prim::FusedConcat[dim=1](%39, %34, %29, %24) + return (%40); } -with prim::FusionGroup_1 = graph(%0 : Float(*) - %1 : Float(*) - %2 : Float(*, *) +with prim::FusionGroup_1 = graph(%0 : Float(*, *) + %1 : Float(*, *) + %2 : Float(*)) { + %3 : Float(*, *) = aten::mul(%0, %2) + %4 : Float(*, *) = aten::mul(%0, %1) + %5 : int = prim::Constant[value=1]() + %6 : Float(*, *) = aten::add(%3, %4, %5) + return (%6); +} +with prim::FusionGroup_2 = graph(%0 : Float(*) + %1 : Float(*, *) + %2 : Float(*) %3 : Float(*, *) - %4 : Float(*, *) - %5 : Float(*, *) - %6 : Float(*)) { - %7 : int = prim::Constant[value=1]() - %8 : int = prim::Constant[value=1]() + %4 : Float(*, *)) { + %5 : Float(*, *) = aten::mul(%1, %4) + %6 : Float(*, *) = aten::mul(%5, %3) + %7 : Float(*, *) = aten::mul(%1, %2) + %8 : Float(*, *) = aten::mul(%5, %0) %9 : int = prim::Constant[value=1]() - %10 : int = prim::Constant[value=1]() - %11 : Float(*, *) = aten::mul(%3, %10) - %12 : Float(*, *) = aten::mul(%11, %4) - %13 : Float(*, *) = aten::mul(%11, %2) - %14 : Float(*, *) = aten::mul(%11, %6) - %15 : Float(*, *) = aten::mul(%3, %5) - %16 : int = prim::Constant[value=1]() - %17 : int = prim::Constant[value=1]() - %18 : Float(*, *) = aten::add(%14, %15, %17) - %19 : Float(*, *) = aten::mul(%3, %4) - %20 : Float(*, *) = aten::mul(%19, %2) - %21 : Float(*, *) = aten::mul(%11, %1) - %22 : Float(*, *) = aten::mul(%19, %0) - %23 : int = prim::Constant[value=1]() - %24 : Float(*, *) = aten::add(%21, %22, %23) - return (%24, %20, %18, %13, %12, %11); + %10 : Float(*, *) = aten::add(%7, %8, %9) + return (%10, %6); } diff --git a/test/expect/TestScript.test_scalar_fusion.expect b/test/expect/TestScript.test_scalar_fusion.expect index 9d45a9f765d63..e2fd92a0f5739 100644 --- a/test/expect/TestScript.test_scalar_fusion.expect +++ b/test/expect/TestScript.test_scalar_fusion.expect @@ -1,12 +1,13 @@ graph(%x : Float() %y : Float()) { - %2 : Float() = prim::FusionGroup_0[device=-1](%x, %y) + %2 : Float() = prim::FusionGroup_0[device=-1](%y, %x) return (%2); } with prim::FusionGroup_0 = graph(%0 : Float() %1 : Float()) { - %2 : Float() = aten::type_as(%1, %0) - %3 : int = prim::Constant[value=1]() - %4 : Float() = aten::add(%0, %2, %3) - return (%4); + %2 : int = prim::Constant[value=2]() + %3 : Float() = aten::mul(%2, %1) + %4 : int = prim::Constant[value=1]() + %5 : Float() = aten::add(%3, %0, %4) + return (%5); } diff --git a/test/onnx/model_defs/squeezenet.py b/test/onnx/model_defs/squeezenet.py index e4ace18194ab7..2ee956b605cd1 100644 --- a/test/onnx/model_defs/squeezenet.py +++ b/test/onnx/model_defs/squeezenet.py @@ -79,9 +79,9 @@ def __init__(self, version=1.0, num_classes=1000, ceil_mode=False): for m in self.modules(): if isinstance(m, nn.Conv2d): if m is final_conv: - init.normal(m.weight.data, mean=0.0, std=0.01) + init.normal_(m.weight.data, mean=0.0, std=0.01) else: - init.kaiming_uniform(m.weight.data) + init.kaiming_uniform_(m.weight.data) if m.bias is not None: m.bias.data.zero_() diff --git a/test/onnx/model_defs/super_resolution.py b/test/onnx/model_defs/super_resolution.py index d0ba46a22d05a..619d5f4a5b581 100644 --- a/test/onnx/model_defs/super_resolution.py +++ b/test/onnx/model_defs/super_resolution.py @@ -24,7 +24,7 @@ def forward(self, x): return x def _initialize_weights(self): - init.orthogonal(self.conv1.weight, init.calculate_gain('relu')) - init.orthogonal(self.conv2.weight, init.calculate_gain('relu')) - init.orthogonal(self.conv3.weight, init.calculate_gain('relu')) - init.orthogonal(self.conv4.weight) + init.orthogonal_(self.conv1.weight, init.calculate_gain('relu')) + init.orthogonal_(self.conv2.weight, init.calculate_gain('relu')) + init.orthogonal_(self.conv3.weight, init.calculate_gain('relu')) + init.orthogonal_(self.conv4.weight) diff --git a/test/test_autograd.py b/test/test_autograd.py index e755351c336b6..965fdab9c8b54 100644 --- a/test/test_autograd.py +++ b/test/test_autograd.py @@ -299,6 +299,33 @@ def hook(*grads): self.assertFalse(hook_called[0]) self.assertIsNone(x.grad) + def test_grad_nonleaf_register_hook(self): + # This checks an edge case for register_hook. + # We want to capture grad of a nonleaf tensor, + # but avoid segfault during backward of other nonleaf tensors + x = torch.randn(5, requires_grad=True) + x_list = x.unbind() + + x0 = x_list[0] + hook_results = [None] + + def hook(grad): + hook_results[0] = grad + x0.register_hook(hook) + + x_list[0].backward() + self.assertEqual(hook_results[0], torch.tensor(1.)) + expected_grad = torch.tensor([1., 0, 0, 0, 0]) + self.assertEqual(x.grad, expected_grad) + self.assertIsNone(x_list[0].grad) + + for i in range(1, 5, 1): + x_list[i].backward() + self.assertEqual(hook_results[0], None) + expected_grad[i] = 1.0 + self.assertEqual(x.grad, expected_grad) + self.assertIsNone(x_list[i].grad) + def test_sharded_grad(self): leaves = [torch.zeros(5, 5, requires_grad=True) for _ in range(10)] intermediates = [l * i + l * l for i, l in enumerate(leaves)] diff --git a/test/test_cpp_extensions.py b/test/test_cpp_extensions.py index 3702205e4c449..e5b1121784f07 100755 --- a/test/test_cpp_extensions.py +++ b/test/test_cpp_extensions.py @@ -23,6 +23,9 @@ TEST_CUDNN = TEST_CUDA and CUDNN_HEADER_EXISTS and torch.backends.cudnn.is_available() +IS_WINDOWS = sys.platform == 'win32' + + class TestCppExtension(common.TestCase): def setUp(self): if sys.platform != 'win32': @@ -189,7 +192,7 @@ def test_inline_jit_compile_extension_multiple_sources_and_no_functions(self): ''' cpp_source2 = ''' - #include + #include at::Tensor sin_add(at::Tensor x, at::Tensor y); PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("sin_add", &sin_add, "sin(x) + sin(y)"); @@ -265,7 +268,7 @@ def test_lenient_flag_handling_in_jit_extensions(self): cpp_sources=cpp_source, functions='tanh_add', extra_cflags=['-g\n\n', '-O0 -Wall'], - extra_include_paths=[' cpp_extensions\n', '../'], + extra_include_paths=[' cpp_extensions\n'], verbose=True) x = torch.zeros(100, dtype=torch.float32) @@ -341,6 +344,50 @@ def compile(code): module = compile('int f() { return 789; }') self.assertEqual(module.f(), 789) + @unittest.skipIf(IS_WINDOWS, "C++ API not yet supported on Windows") + def test_cpp_api_extension(self): + here = os.path.abspath(__file__) + pytorch_root = os.path.dirname(os.path.dirname(here)) + api_include = os.path.join(pytorch_root, 'torch', 'csrc', 'api', 'include') + module = torch.utils.cpp_extension.load( + name='cpp_api_extension', + sources='cpp_extensions/cpp_api_extension.cpp', + extra_include_paths=api_include, + extra_cflags=[] if IS_WINDOWS else ['-UTORCH_API_INCLUDE_EXTENSION_H'], + verbose=True) + + net = module.Net(3, 5) + + self.assertTrue(net.training) + net.eval() + self.assertFalse(net.training) + net.train() + self.assertTrue(net.training) + net.eval() + + input = torch.randn(2, 3, dtype=torch.float32) + output = net.forward(input) + self.assertEqual(output, net.forward(input)) + self.assertEqual(list(output.shape), [2, 5]) + + bias = net.get_bias() + self.assertEqual(list(bias.shape), [5]) + net.set_bias(bias + 1) + self.assertEqual(net.get_bias(), bias + 1) + output2 = net.forward(input) + + self.assertNotEqual(output + 1, output2) + + self.assertEqual(len(net.parameters()), 4) + + p = net.named_parameters() + self.assertEqual(type(p), dict) + self.assertEqual(len(p), 4) + self.assertIn('fc.weight', p) + self.assertIn('fc.bias', p) + self.assertIn('bn.weight', p) + self.assertIn('bn.bias', p) + if __name__ == '__main__': common.run_tests() diff --git a/test/test_cuda.py b/test/test_cuda.py index 5d8412192cba8..560aebbdc64e0 100644 --- a/test/test_cuda.py +++ b/test/test_cuda.py @@ -30,9 +30,11 @@ TestCase = object # noqa: F811 TEST_MAGMA = TEST_CUDA +TEST_LARGE_TENSOR = TEST_CUDA if TEST_CUDA: torch.ones(1).cuda() # has_magma shows up after cuda is initialized TEST_MAGMA = torch.cuda.has_magma + TEST_LARGE_TENSOR = torch.cuda.get_device_properties(0).total_memory >= 9e9 floating_set = {torch.FloatTensor, torch.DoubleTensor, torch.cuda.FloatTensor, torch.cuda.DoubleTensor, torch.HalfTensor, torch.cuda.HalfTensor} @@ -889,23 +891,20 @@ def test_type_conversions(self): self.assertIsInstance(y.cuda().float().cpu().int(), torch.IntStorage) def test_mul_intertype_scalar(self): - x = torch.tensor(1.5, device='cuda') - y = torch.tensor(3, dtype=torch.int32, device='cuda') - - self.assertEqual(x * y, 4.5) - self.assertEqual(y * x, 4.5) - with self.assertRaisesRegex(RuntimeError, 'expected type'): - y *= x - x *= y - self.assertEqual(x, 4.5) - - x = torch.tensor(1.5, device='cuda', dtype=torch.float16) - self.assertEqual(x * y, 4.5) - # half * int currently promotes to double - with self.assertRaisesRegex(RuntimeError, 'expected type'): + def test_mul(dtype): + x = torch.tensor(1.5, dtype=dtype, device='cuda') + y = torch.tensor(3, dtype=torch.int32, device='cuda') + + self.assertEqual(x * y, 4.5) + self.assertEqual(y * x, 4.5) + with self.assertRaisesRegex(RuntimeError, 'expected type'): + y *= x x *= y - with self.assertRaisesRegex(RuntimeError, 'expected type'): - y *= x + self.assertEqual(x, 4.5) + + test_mul(torch.float16) + test_mul(torch.float32) + test_mul(torch.float64) @unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected") @skipIfRocm @@ -918,6 +917,28 @@ def test_type_conversions_same_gpu(self): def test_neg(self): TestTorch._test_neg(self, lambda t: t.cuda()) + @unittest.skipIf(not TEST_LARGE_TENSOR, "not enough memory") + def test_arithmetic_large_tensor(self): + x = torch.empty(2**30, device='cuda') + + x.fill_(1) + self.assertEqual(x.sum(), 2**30) + + x += 1 + self.assertEqual(x.sum(), 2**31) + + x.fill_(1) + x -= 0.5 + self.assertEqual(x.sum(), 2**29) + + x.fill_(1) + x *= 2 + self.assertEqual(x.sum(), 2**31) + + x.fill_(1) + x /= 2 + self.assertEqual(x.sum(), 2**29) + def _test_broadcast(self, input): if not TEST_MULTIGPU: raise unittest.SkipTest("only one GPU detected") diff --git a/test/test_distributions.py b/test/test_distributions.py index 5c710daa3a62b..86a63c6608637 100644 --- a/test/test_distributions.py +++ b/test/test_distributions.py @@ -102,12 +102,12 @@ def is_all_nan(tensor): ]), Example(Beta, [ { - 'concentration1': torch.tensor(torch.exp(torch.randn(2, 3)), requires_grad=True), - 'concentration0': torch.tensor(torch.exp(torch.randn(2, 3)), requires_grad=True), + 'concentration1': torch.randn(2, 3).exp().requires_grad_(), + 'concentration0': torch.randn(2, 3).exp().requires_grad_(), }, { - 'concentration1': torch.tensor(torch.exp(torch.randn(4)), requires_grad=True), - 'concentration0': torch.tensor(torch.exp(torch.randn(4)), requires_grad=True), + 'concentration1': torch.randn(4).exp().requires_grad_(), + 'concentration0': torch.randn(4).exp().requires_grad_(), }, ]), Example(Categorical, [ @@ -146,29 +146,29 @@ def is_all_nan(tensor): 'scale': torch.tensor([[1.0], [1.0]])} ]), Example(Chi2, [ - {'df': torch.tensor(torch.exp(torch.randn(2, 3)), requires_grad=True)}, - {'df': torch.tensor(torch.exp(torch.randn(1)), requires_grad=True)}, + {'df': torch.randn(2, 3).exp().requires_grad_()}, + {'df': torch.randn(1).exp().requires_grad_()}, ]), Example(StudentT, [ - {'df': torch.tensor(torch.exp(torch.randn(2, 3)), requires_grad=True)}, - {'df': torch.tensor(torch.exp(torch.randn(1)), requires_grad=True)}, + {'df': torch.randn(2, 3).exp().requires_grad_()}, + {'df': torch.randn(1).exp().requires_grad_()}, ]), Example(Dirichlet, [ - {'concentration': torch.tensor(torch.exp(torch.randn(2, 3)), requires_grad=True)}, - {'concentration': torch.tensor(torch.exp(torch.randn(4)), requires_grad=True)}, + {'concentration': torch.randn(2, 3).exp().requires_grad_()}, + {'concentration': torch.randn(4).exp().requires_grad_()}, ]), Example(Exponential, [ - {'rate': torch.tensor(torch.randn(5, 5).abs(), requires_grad=True)}, - {'rate': torch.tensor(torch.randn(1).abs(), requires_grad=True)}, + {'rate': torch.randn(5, 5).abs().requires_grad_()}, + {'rate': torch.randn(1).abs().requires_grad_()}, ]), Example(FisherSnedecor, [ { - 'df1': torch.tensor(torch.randn(5, 5).abs(), requires_grad=True), - 'df2': torch.tensor(torch.randn(5, 5).abs(), requires_grad=True), + 'df1': torch.randn(5, 5).abs().requires_grad_(), + 'df2': torch.randn(5, 5).abs().requires_grad_(), }, { - 'df1': torch.tensor(torch.randn(1).abs(), requires_grad=True), - 'df2': torch.tensor(torch.randn(1).abs(), requires_grad=True), + 'df1': torch.randn(1).abs().requires_grad_(), + 'df2': torch.randn(1).abs().requires_grad_(), }, { 'df1': torch.tensor([1.0]), @@ -177,22 +177,22 @@ def is_all_nan(tensor): ]), Example(Gamma, [ { - 'concentration': torch.tensor(torch.exp(torch.randn(2, 3)), requires_grad=True), - 'rate': torch.tensor(torch.exp(torch.randn(2, 3)), requires_grad=True), + 'concentration': torch.randn(2, 3).exp().requires_grad_(), + 'rate': torch.randn(2, 3).exp().requires_grad_(), }, { - 'concentration': torch.tensor(torch.exp(torch.randn(1)), requires_grad=True), - 'rate': torch.tensor(torch.exp(torch.randn(1)), requires_grad=True), + 'concentration': torch.randn(1).exp().requires_grad_(), + 'rate': torch.randn(1).exp().requires_grad_(), }, ]), Example(Gumbel, [ { 'loc': torch.randn(5, 5, requires_grad=True), - 'scale': torch.tensor(torch.randn(5, 5).abs(), requires_grad=True), + 'scale': torch.randn(5, 5).abs().requires_grad_(), }, { 'loc': torch.randn(1, requires_grad=True), - 'scale': torch.tensor(torch.randn(1).abs(), requires_grad=True), + 'scale': torch.randn(1).abs().requires_grad_(), }, ]), Example(HalfCauchy, [ @@ -200,45 +200,45 @@ def is_all_nan(tensor): {'scale': torch.tensor([[1.0], [1.0]])} ]), Example(HalfNormal, [ - {'scale': torch.tensor(torch.randn(5, 5).abs(), requires_grad=True)}, - {'scale': torch.tensor(torch.randn(1).abs(), requires_grad=True)}, + {'scale': torch.randn(5, 5).abs().requires_grad_()}, + {'scale': torch.randn(1).abs().requires_grad_()}, {'scale': torch.tensor([1e-5, 1e-5], requires_grad=True)} ]), Example(Independent, [ { 'base_distribution': Normal(torch.randn(2, 3, requires_grad=True), - torch.tensor(torch.randn(2, 3).abs(), requires_grad=True)), + torch.randn(2, 3).abs().requires_grad_()), 'reinterpreted_batch_ndims': 0, }, { 'base_distribution': Normal(torch.randn(2, 3, requires_grad=True), - torch.tensor(torch.randn(2, 3).abs(), requires_grad=True)), + torch.randn(2, 3).abs().requires_grad_()), 'reinterpreted_batch_ndims': 1, }, { 'base_distribution': Normal(torch.randn(2, 3, requires_grad=True), - torch.tensor(torch.randn(2, 3).abs(), requires_grad=True)), + torch.randn(2, 3).abs().requires_grad_()), 'reinterpreted_batch_ndims': 2, }, { 'base_distribution': Normal(torch.randn(2, 3, 5, requires_grad=True), - torch.tensor(torch.randn(2, 3, 5).abs(), requires_grad=True)), + torch.randn(2, 3, 5).abs().requires_grad_()), 'reinterpreted_batch_ndims': 2, }, { 'base_distribution': Normal(torch.randn(2, 3, 5, requires_grad=True), - torch.tensor(torch.randn(2, 3, 5).abs(), requires_grad=True)), + torch.randn(2, 3, 5).abs().requires_grad_()), 'reinterpreted_batch_ndims': 3, }, ]), Example(Laplace, [ { 'loc': torch.randn(5, 5, requires_grad=True), - 'scale': torch.tensor(torch.randn(5, 5).abs(), requires_grad=True), + 'scale': torch.randn(5, 5).abs().requires_grad_(), }, { 'loc': torch.randn(1, requires_grad=True), - 'scale': torch.tensor(torch.randn(1).abs(), requires_grad=True), + 'scale': torch.randn(1).abs().requires_grad_(), }, { 'loc': torch.tensor([1.0, 0.0], requires_grad=True), @@ -248,11 +248,11 @@ def is_all_nan(tensor): Example(LogNormal, [ { 'loc': torch.randn(5, 5, requires_grad=True), - 'scale': torch.tensor(torch.randn(5, 5).abs(), requires_grad=True), + 'scale': torch.randn(5, 5).abs().requires_grad_(), }, { 'loc': torch.randn(1, requires_grad=True), - 'scale': torch.tensor(torch.randn(1).abs(), requires_grad=True), + 'scale': torch.randn(1).abs().requires_grad_(), }, { 'loc': torch.tensor([1.0, 0.0], requires_grad=True), @@ -310,11 +310,11 @@ def is_all_nan(tensor): Example(Normal, [ { 'loc': torch.randn(5, 5, requires_grad=True), - 'scale': torch.tensor(torch.randn(5, 5).abs(), requires_grad=True), + 'scale': torch.randn(5, 5).abs().requires_grad_(), }, { 'loc': torch.randn(1, requires_grad=True), - 'scale': torch.tensor(torch.randn(1).abs(), requires_grad=True), + 'scale': torch.randn(1).abs().requires_grad_(), }, { 'loc': torch.tensor([1.0, 0.0], requires_grad=True), @@ -332,8 +332,8 @@ def is_all_nan(tensor): 'alpha': 1.0 }, { - 'scale': torch.tensor(torch.randn(5, 5).abs(), requires_grad=True), - 'alpha': torch.tensor(torch.randn(5, 5).abs(), requires_grad=True) + 'scale': torch.randn(5, 5).abs().requires_grad_(), + 'alpha': torch.randn(5, 5).abs().requires_grad_() }, { 'scale': torch.tensor([1.0]), @@ -342,10 +342,10 @@ def is_all_nan(tensor): ]), Example(Poisson, [ { - 'rate': torch.tensor(torch.randn(5, 5).abs(), requires_grad=True), + 'rate': torch.randn(5, 5).abs().requires_grad_(), }, { - 'rate': torch.tensor(torch.randn(3).abs(), requires_grad=True), + 'rate': torch.randn(3).abs().requires_grad_(), }, { 'rate': 0.2, @@ -382,23 +382,23 @@ def is_all_nan(tensor): Example(TransformedDistribution, [ { 'base_distribution': Normal(torch.randn(2, 3, requires_grad=True), - torch.tensor(torch.randn(2, 3).abs(), requires_grad=True)), + torch.randn(2, 3).abs().requires_grad_()), 'transforms': [], }, { 'base_distribution': Normal(torch.randn(2, 3, requires_grad=True), - torch.tensor(torch.randn(2, 3).abs(), requires_grad=True)), + torch.randn(2, 3).abs().requires_grad_()), 'transforms': ExpTransform(), }, { 'base_distribution': Normal(torch.randn(2, 3, 5, requires_grad=True), - torch.tensor(torch.randn(2, 3, 5).abs(), requires_grad=True)), + torch.randn(2, 3, 5).abs().requires_grad_()), 'transforms': [AffineTransform(torch.randn(3, 5), torch.randn(3, 5)), ExpTransform()], }, { 'base_distribution': Normal(torch.randn(2, 3, 5, requires_grad=True), - torch.tensor(torch.randn(2, 3, 5).abs(), requires_grad=True)), + torch.randn(2, 3, 5).abs().requires_grad_()), 'transforms': AffineTransform(1, 2), }, ]), @@ -418,8 +418,8 @@ def is_all_nan(tensor): ]), Example(Weibull, [ { - 'scale': torch.tensor(torch.randn(5, 5).abs(), requires_grad=True), - 'concentration': torch.tensor(torch.randn(1).abs(), requires_grad=True) + 'scale': torch.randn(5, 5).abs().requires_grad_(), + 'concentration': torch.randn(1).abs().requires_grad_() } ]) ] @@ -922,7 +922,7 @@ def test_geometric_sample(self): 'Geometric(prob={})'.format(prob)) def test_binomial(self): - p = torch.tensor(torch.arange(0.05, 1, 0.1), requires_grad=True) + p = torch.arange(0.05, 1, 0.1).requires_grad_() for total_count in [1, 2, 10]: self._gradcheck_log_prob(lambda p: Binomial(total_count, p), [p]) self._gradcheck_log_prob(lambda p: Binomial(total_count, None, p.log()), [p]) @@ -931,7 +931,7 @@ def test_binomial(self): @unittest.skipIf(not TEST_NUMPY, "NumPy not found") def test_binomial_log_prob(self): - probs = torch.tensor(torch.arange(0.05, 1, 0.1)) + probs = torch.arange(0.05, 1, 0.1) for total_count in [1, 2, 10]: def ref_log_prob(idx, x, log_prob): @@ -987,7 +987,7 @@ def test_binomial_vectorized_count(self): self.assertEqual(samples.var(dim=0), bin1.variance, prec=0.02) def test_negative_binomial(self): - p = torch.tensor(torch.arange(0.05, 1, 0.1), requires_grad=True) + p = torch.arange(0.05, 1, 0.1).requires_grad_() for total_count in [1, 2, 10]: self._gradcheck_log_prob(lambda p: NegativeBinomial(total_count, p), [p]) self._gradcheck_log_prob(lambda p: NegativeBinomial(total_count, None, p.log()), [p]) @@ -996,7 +996,7 @@ def test_negative_binomial(self): @unittest.skipIf(not TEST_NUMPY, "NumPy not found") def test_negative_binomial_log_prob(self): - probs = torch.tensor(torch.arange(0.05, 1, 0.1)) + probs = torch.arange(0.05, 1, 0.1) for total_count in [1, 2, 10]: def ref_log_prob(idx, x, log_prob): @@ -1142,8 +1142,8 @@ def test_one_hot_categorical_enumerate_support(self): self._check_enumerate_support(OneHotCategorical, examples) def test_poisson_shape(self): - rate = torch.tensor(torch.randn(2, 3).abs(), requires_grad=True) - rate_1d = torch.tensor(torch.randn(1).abs(), requires_grad=True) + rate = torch.randn(2, 3).abs().requires_grad_() + rate_1d = torch.randn(1).abs().requires_grad_() self.assertEqual(Poisson(rate).sample().size(), (2, 3)) self.assertEqual(Poisson(rate).sample((7,)).size(), (7, 2, 3)) self.assertEqual(Poisson(rate_1d).sample().size(), (1,)) @@ -1152,8 +1152,8 @@ def test_poisson_shape(self): @unittest.skipIf(not TEST_NUMPY, "Numpy not found") def test_poisson_log_prob(self): - rate = torch.tensor(torch.randn(2, 3).abs(), requires_grad=True) - rate_1d = torch.tensor(torch.randn(1).abs(), requires_grad=True) + rate = torch.randn(2, 3).abs().requires_grad_() + rate_1d = torch.randn(1).abs().requires_grad_() def ref_log_prob(idx, x, log_prob): l = rate.view(-1)[idx].detach() @@ -1286,9 +1286,9 @@ def pmf(self, samples): def test_uniform(self): low = torch.zeros(5, 5, requires_grad=True) - high = torch.tensor(torch.ones(5, 5) * 3, requires_grad=True) + high = (torch.ones(5, 5) * 3).requires_grad_() low_1d = torch.zeros(1, requires_grad=True) - high_1d = torch.tensor(torch.ones(1) * 3, requires_grad=True) + high_1d = (torch.ones(1) * 3).requires_grad_() self.assertEqual(Uniform(low, high).sample().size(), (5, 5)) self.assertEqual(Uniform(low, high).sample((7,)).size(), (7, 5, 5)) self.assertEqual(Uniform(low_1d, high_1d).sample().size(), (1,)) @@ -1373,7 +1373,7 @@ def test_halfcauchy(self): scale.grad.zero_() def test_halfnormal(self): - std = torch.tensor(torch.randn(5, 5).abs(), requires_grad=True) + std = torch.randn(5, 5).abs().requires_grad_() std_1d = torch.randn(1, requires_grad=True) std_delta = torch.tensor([1e-5, 1e-5]) self.assertEqual(HalfNormal(std).sample().size(), (5, 5)) @@ -1399,7 +1399,7 @@ def test_halfnormal(self): @unittest.skipIf(not TEST_NUMPY, "NumPy not found") def test_halfnormal_logprob(self): - std = torch.tensor(torch.randn(5, 1).abs(), requires_grad=True) + std = torch.randn(5, 1).abs().requires_grad_() def ref_log_prob(idx, x, log_prob): s = std.view(-1)[idx].detach() @@ -1418,9 +1418,9 @@ def test_halfnormal_sample(self): def test_lognormal(self): mean = torch.randn(5, 5, requires_grad=True) - std = torch.tensor(torch.randn(5, 5).abs(), requires_grad=True) + std = torch.randn(5, 5).abs().requires_grad_() mean_1d = torch.randn(1, requires_grad=True) - std_1d = torch.randn(1, requires_grad=True) + std_1d = torch.randn(1).abs().requires_grad_() mean_delta = torch.tensor([1.0, 0.0]) std_delta = torch.tensor([1e-5, 1e-5]) self.assertEqual(LogNormal(mean, std).sample().size(), (5, 5)) @@ -1448,7 +1448,7 @@ def test_lognormal(self): @unittest.skipIf(not TEST_NUMPY, "NumPy not found") def test_lognormal_logprob(self): mean = torch.randn(5, 1, requires_grad=True) - std = torch.tensor(torch.randn(5, 1).abs(), requires_grad=True) + std = torch.randn(5, 1).abs().requires_grad_() def ref_log_prob(idx, x, log_prob): m = mean.view(-1)[idx].detach() @@ -1534,9 +1534,9 @@ def test_logisticnormal_sample(self): def test_normal(self): loc = torch.randn(5, 5, requires_grad=True) - scale = torch.tensor(torch.randn(5, 5).abs(), requires_grad=True) + scale = torch.randn(5, 5).abs().requires_grad_() loc_1d = torch.randn(1, requires_grad=True) - scale_1d = torch.randn(1, requires_grad=True) + scale_1d = torch.randn(1).abs().requires_grad_() loc_delta = torch.tensor([1.0, 0.0]) scale_delta = torch.tensor([1e-5, 1e-5]) self.assertEqual(Normal(loc, scale).sample().size(), (5, 5)) @@ -1591,11 +1591,11 @@ def test_lowrank_multivariate_normal_shape(self): # construct PSD covariance cov_factor = torch.randn(3, 1, requires_grad=True) - cov_diag = torch.tensor(torch.randn(3).abs(), requires_grad=True) + cov_diag = torch.randn(3).abs().requires_grad_() # construct batch of PSD covariances cov_factor_batched = torch.randn(6, 5, 3, 2, requires_grad=True) - cov_diag_batched = torch.tensor(torch.randn(6, 5, 3).abs(), requires_grad=True) + cov_diag_batched = torch.randn(6, 5, 3).abs().requires_grad_() # ensure that sample, batch, event shapes all handled correctly self.assertEqual(LowRankMultivariateNormal(mean, cov_factor, cov_diag) @@ -1635,7 +1635,7 @@ def test_lowrank_multivariate_normal_shape(self): def test_lowrank_multivariate_normal_log_prob(self): mean = torch.randn(3, requires_grad=True) cov_factor = torch.randn(3, 1, requires_grad=True) - cov_diag = torch.tensor(torch.randn(3).abs(), requires_grad=True) + cov_diag = torch.randn(3).abs().requires_grad_() cov = cov_factor.matmul(cov_factor.t()) + cov_diag.diag() # check that logprob values match scipy logpdf, @@ -1651,7 +1651,7 @@ def test_lowrank_multivariate_normal_log_prob(self): # Double-check that batched versions behave the same as unbatched mean = torch.randn(5, 3, requires_grad=True) cov_factor = torch.randn(5, 3, 2, requires_grad=True) - cov_diag = torch.tensor(torch.randn(5, 3).abs(), requires_grad=True) + cov_diag = torch.randn(5, 3).abs().requires_grad_() dist_batched = LowRankMultivariateNormal(mean, cov_factor, cov_diag) dist_unbatched = [LowRankMultivariateNormal(mean[i], cov_factor[i], cov_diag[i]) @@ -1669,7 +1669,7 @@ def test_lowrank_multivariate_normal_sample(self): set_rng_seed(0) # see Note [Randomized statistical tests] mean = torch.randn(5, requires_grad=True) cov_factor = torch.randn(5, 1, requires_grad=True) - cov_diag = torch.tensor(torch.randn(5).abs(), requires_grad=True) + cov_diag = torch.randn(5).abs().requires_grad_() cov = cov_factor.matmul(cov_factor.t()) + cov_diag.diag() self._check_sampler_sampler(LowRankMultivariateNormal(mean, cov_factor, cov_diag), @@ -1680,7 +1680,7 @@ def test_lowrank_multivariate_normal_sample(self): def test_lowrank_multivariate_normal_properties(self): loc = torch.randn(5) cov_factor = torch.randn(5, 2) - cov_diag = torch.tensor(torch.randn(5).abs()) + cov_diag = torch.randn(5).abs() cov = cov_factor.matmul(cov_factor.t()) + cov_diag.diag() m1 = LowRankMultivariateNormal(loc, cov_factor, cov_diag) m2 = MultivariateNormal(loc=loc, covariance_matrix=cov) @@ -1695,7 +1695,7 @@ def test_lowrank_multivariate_normal_moments(self): set_rng_seed(0) # see Note [Randomized statistical tests] mean = torch.randn(5) cov_factor = torch.randn(5, 2) - cov_diag = torch.tensor(torch.randn(5).abs()) + cov_diag = torch.randn(5).abs() d = LowRankMultivariateNormal(mean, cov_factor, cov_diag) samples = d.rsample((100000,)) empirical_mean = samples.mean(0) @@ -1710,13 +1710,13 @@ def test_multivariate_normal_shape(self): # construct PSD covariance tmp = torch.randn(3, 10) - cov = torch.tensor(torch.matmul(tmp, tmp.t()) / tmp.shape[-1], requires_grad=True) - prec = torch.tensor(cov.inverse(), requires_grad=True) - scale_tril = torch.tensor(torch.potrf(cov, upper=False), requires_grad=True) + cov = (torch.matmul(tmp, tmp.t()) / tmp.size(-1)).requires_grad_() + prec = cov.inverse().requires_grad_() + scale_tril = torch.potrf(cov, upper=False).requires_grad_() # construct batch of PSD covariances tmp = torch.randn(6, 5, 3, 10) - cov_batched = torch.tensor((tmp.unsqueeze(-2) * tmp.unsqueeze(-3)).mean(-1), requires_grad=True) + cov_batched = (tmp.unsqueeze(-2) * tmp.unsqueeze(-3)).mean(-1).requires_grad_() prec_batched = [C.inverse() for C in cov_batched.view((-1, 3, 3))] prec_batched = torch.stack(prec_batched).view(cov_batched.shape) scale_tril_batched = [torch.potrf(C, upper=False) for C in cov_batched.view((-1, 3, 3))] @@ -1753,9 +1753,9 @@ def test_multivariate_normal_shape(self): def test_multivariate_normal_log_prob(self): mean = torch.randn(3, requires_grad=True) tmp = torch.randn(3, 10) - cov = torch.tensor(torch.matmul(tmp, tmp.t()) / tmp.shape[-1], requires_grad=True) - prec = torch.tensor(cov.inverse(), requires_grad=True) - scale_tril = torch.tensor(torch.potrf(cov, upper=False), requires_grad=True) + cov = (torch.matmul(tmp, tmp.t()) / tmp.size(-1)).requires_grad_() + prec = cov.inverse().requires_grad_() + scale_tril = torch.potrf(cov, upper=False).requires_grad_() # check that logprob values match scipy logpdf, # and that covariance and scale_tril parameters are equivalent @@ -1774,7 +1774,7 @@ def test_multivariate_normal_log_prob(self): # Double-check that batched versions behave the same as unbatched mean = torch.randn(5, 3, requires_grad=True) tmp = torch.randn(5, 3, 10) - cov = torch.tensor((tmp.unsqueeze(-2) * tmp.unsqueeze(-3)).mean(-1), requires_grad=True) + cov = (tmp.unsqueeze(-2) * tmp.unsqueeze(-3)).mean(-1).requires_grad_() dist_batched = MultivariateNormal(mean, cov) dist_unbatched = [MultivariateNormal(mean[i], cov[i]) for i in range(mean.size(0))] @@ -1791,9 +1791,9 @@ def test_multivariate_normal_sample(self): set_rng_seed(0) # see Note [Randomized statistical tests] mean = torch.randn(3, requires_grad=True) tmp = torch.randn(3, 10) - cov = torch.tensor(torch.matmul(tmp, tmp.t()) / tmp.shape[-1], requires_grad=True) - prec = torch.tensor(cov.inverse(), requires_grad=True) - scale_tril = torch.tensor(torch.potrf(cov, upper=False), requires_grad=True) + cov = (torch.matmul(tmp, tmp.t()) / tmp.size(-1)).requires_grad_() + prec = cov.inverse().requires_grad_() + scale_tril = torch.potrf(cov, upper=False).requires_grad_() self._check_sampler_sampler(MultivariateNormal(mean, cov), scipy.stats.multivariate_normal(mean.detach().numpy(), cov.detach().numpy()), @@ -1828,8 +1828,8 @@ def test_multivariate_normal_moments(self): self.assertEqual(d.variance, empirical_var, prec=0.05) def test_exponential(self): - rate = torch.tensor(torch.randn(5, 5).abs(), requires_grad=True) - rate_1d = torch.tensor(torch.randn(1).abs(), requires_grad=True) + rate = torch.randn(5, 5).abs().requires_grad_() + rate_1d = torch.randn(1).abs().requires_grad_() self.assertEqual(Exponential(rate).sample().size(), (5, 5)) self.assertEqual(Exponential(rate).sample((7,)).size(), (7, 5, 5)) self.assertEqual(Exponential(rate_1d).sample((1,)).size(), (1, 1)) @@ -1864,7 +1864,7 @@ def test_exponential_sample(self): def test_laplace(self): loc = torch.randn(5, 5, requires_grad=True) - scale = torch.tensor(torch.randn(5, 5).abs(), requires_grad=True) + scale = torch.randn(5, 5).abs().requires_grad_() loc_1d = torch.randn(1, requires_grad=True) scale_1d = torch.randn(1, requires_grad=True) loc_delta = torch.tensor([1.0, 0.0]) @@ -1915,10 +1915,10 @@ def test_laplace_sample(self): @unittest.skipIf(not TEST_NUMPY, "NumPy not found") def test_gamma_shape(self): - alpha = torch.tensor(torch.exp(torch.randn(2, 3)), requires_grad=True) - beta = torch.tensor(torch.exp(torch.randn(2, 3)), requires_grad=True) - alpha_1d = torch.tensor(torch.exp(torch.randn(1)), requires_grad=True) - beta_1d = torch.tensor(torch.exp(torch.randn(1)), requires_grad=True) + alpha = torch.randn(2, 3).exp().requires_grad_() + beta = torch.randn(2, 3).exp().requires_grad_() + alpha_1d = torch.randn(1).exp().requires_grad_() + beta_1d = torch.randn(1).exp().requires_grad_() self.assertEqual(Gamma(alpha, beta).sample().size(), (2, 3)) self.assertEqual(Gamma(alpha, beta).sample((5,)).size(), (5, 2, 3)) self.assertEqual(Gamma(alpha_1d, beta_1d).sample((1,)).size(), (1, 1)) @@ -1937,10 +1937,10 @@ def ref_log_prob(idx, x, log_prob): @unittest.skipIf(not TEST_CUDA, "CUDA not found") @unittest.skipIf(not TEST_NUMPY, "NumPy not found") def test_gamma_gpu_shape(self): - alpha = torch.tensor(torch.exp(torch.randn(2, 3).cuda()), requires_grad=True) - beta = torch.tensor(torch.exp(torch.randn(2, 3).cuda()), requires_grad=True) - alpha_1d = torch.tensor(torch.exp(torch.randn(1).cuda()), requires_grad=True) - beta_1d = torch.tensor(torch.exp(torch.randn(1).cuda()), requires_grad=True) + alpha = torch.randn(2, 3).cuda().exp().requires_grad_() + beta = torch.randn(2, 3).cuda().exp().requires_grad_() + alpha_1d = torch.randn(1).cuda().exp().requires_grad_() + beta_1d = torch.randn(1).cuda().exp().requires_grad_() self.assertEqual(Gamma(alpha, beta).sample().size(), (2, 3)) self.assertEqual(Gamma(alpha, beta).sample((5,)).size(), (5, 2, 3)) self.assertEqual(Gamma(alpha_1d, beta_1d).sample((1,)).size(), (1, 1)) @@ -1978,10 +1978,10 @@ def test_gamma_gpu_sample(self): @unittest.skipIf(not TEST_NUMPY, "NumPy not found") def test_pareto(self): - scale = torch.tensor(torch.randn(2, 3).abs(), requires_grad=True) - alpha = torch.tensor(torch.randn(2, 3).abs(), requires_grad=True) - scale_1d = torch.tensor(torch.randn(1).abs(), requires_grad=True) - alpha_1d = torch.tensor(torch.randn(1).abs(), requires_grad=True) + scale = torch.randn(2, 3).abs().requires_grad_() + alpha = torch.randn(2, 3).abs().requires_grad_() + scale_1d = torch.randn(1).abs().requires_grad_() + alpha_1d = torch.randn(1).abs().requires_grad_() self.assertEqual(Pareto(scale_1d, 0.5).mean, inf, allow_inf=True) self.assertEqual(Pareto(scale_1d, 0.5).variance, inf, allow_inf=True) self.assertEqual(Pareto(scale, alpha).sample().size(), (2, 3)) @@ -2010,9 +2010,9 @@ def test_pareto_sample(self): @unittest.skipIf(not TEST_NUMPY, "NumPy not found") def test_gumbel(self): loc = torch.randn(2, 3, requires_grad=True) - scale = torch.tensor(torch.randn(2, 3).abs(), requires_grad=True) + scale = torch.randn(2, 3).abs().requires_grad_() loc_1d = torch.randn(1, requires_grad=True) - scale_1d = torch.tensor(torch.randn(1).abs(), requires_grad=True) + scale_1d = torch.randn(1).abs().requires_grad_() self.assertEqual(Gumbel(loc, scale).sample().size(), (2, 3)) self.assertEqual(Gumbel(loc, scale).sample((5,)).size(), (5, 2, 3)) self.assertEqual(Gumbel(loc_1d, scale_1d).sample().size(), (1,)) @@ -2038,8 +2038,8 @@ def test_gumbel_sample(self): @unittest.skipIf(not TEST_NUMPY, "NumPy not found") def test_fishersnedecor(self): - df1 = torch.tensor(torch.randn(2, 3).abs(), requires_grad=True) - df2 = torch.tensor(torch.randn(2, 3).abs(), requires_grad=True) + df1 = torch.randn(2, 3).abs().requires_grad_() + df2 = torch.randn(2, 3).abs().requires_grad_() df1_1d = torch.randn(1).abs() df2_1d = torch.randn(1).abs() self.assertTrue(is_all_nan(FisherSnedecor(1, 2).mean)) @@ -2069,8 +2069,8 @@ def test_fishersnedecor_sample(self): @unittest.skipIf(not TEST_NUMPY, "NumPy not found") def test_chi2_shape(self): - df = torch.tensor(torch.exp(torch.randn(2, 3)), requires_grad=True) - df_1d = torch.tensor(torch.exp(torch.randn(1)), requires_grad=True) + df = torch.randn(2, 3).exp().requires_grad_() + df_1d = torch.randn(1).exp().requires_grad_() self.assertEqual(Chi2(df).sample().size(), (2, 3)) self.assertEqual(Chi2(df).sample((5,)).size(), (5, 2, 3)) self.assertEqual(Chi2(df_1d).sample((1,)).size(), (1, 1)) @@ -2096,8 +2096,8 @@ def test_chi2_sample(self): @unittest.skipIf(not TEST_NUMPY, "Numpy not found") def test_studentT(self): - df = torch.tensor(torch.exp(torch.randn(2, 3)), requires_grad=True) - df_1d = torch.tensor(torch.exp(torch.randn(1)), requires_grad=True) + df = torch.randn(2, 3).exp().requires_grad_() + df_1d = torch.randn(1).exp().requires_grad_() self.assertTrue(is_all_nan(StudentT(1).mean)) self.assertTrue(is_all_nan(StudentT(1).variance)) self.assertEqual(StudentT(2).variance, inf, allow_inf=True) @@ -2137,8 +2137,8 @@ def test_studentT_log_prob(self): self.assertAlmostEqual(float(actual_log_prob[i]), float(expected_log_prob), places=3) def test_dirichlet_shape(self): - alpha = torch.tensor(torch.exp(torch.randn(2, 3)), requires_grad=True) - alpha_1d = torch.tensor(torch.exp(torch.randn(4)), requires_grad=True) + alpha = torch.randn(2, 3).exp().requires_grad_() + alpha_1d = torch.randn(4).exp().requires_grad_() self.assertEqual(Dirichlet(alpha).sample().size(), (2, 3)) self.assertEqual(Dirichlet(alpha).sample((5,)).size(), (5, 2, 3)) self.assertEqual(Dirichlet(alpha_1d).sample().size(), (4,)) @@ -2165,10 +2165,10 @@ def test_dirichlet_sample(self): multivariate=True) def test_beta_shape(self): - con1 = torch.tensor(torch.exp(torch.randn(2, 3)), requires_grad=True) - con0 = torch.tensor(torch.exp(torch.randn(2, 3)), requires_grad=True) - con1_1d = torch.tensor(torch.exp(torch.randn(4)), requires_grad=True) - con0_1d = torch.tensor(torch.exp(torch.randn(4)), requires_grad=True) + con1 = torch.randn(2, 3).exp().requires_grad_() + con0 = torch.randn(2, 3).exp().requires_grad_() + con1_1d = torch.randn(4).exp().requires_grad_() + con0_1d = torch.randn(4).exp().requires_grad_() self.assertEqual(Beta(con1, con0).sample().size(), (2, 3)) self.assertEqual(Beta(con1, con0).sample((5,)).size(), (5, 2, 3)) self.assertEqual(Beta(con1_1d, con0_1d).sample().size(), (4,)) @@ -2269,7 +2269,7 @@ def test_cdf_log_prob(self): for Dist, params in EXAMPLES: for i, param in enumerate(params): dist = Dist(**param) - samples = torch.tensor(dist.sample()) + samples = dist.sample() if samples.dtype.is_floating_point: samples.requires_grad_() try: @@ -3827,7 +3827,7 @@ def test_equality(self): def test_forward_inverse_cache(self): for transform in self.transforms: - x = torch.tensor(self._generate_data(transform), requires_grad=True) + x = self._generate_data(transform).requires_grad_() try: y = transform(x) except NotImplementedError: @@ -3854,7 +3854,7 @@ def test_forward_inverse_cache(self): def test_forward_inverse_no_cache(self): for transform in self.transforms: - x = torch.tensor(self._generate_data(transform), requires_grad=True) + x = self._generate_data(transform).requires_grad_() try: y = transform(x) x2 = transform.inv(y.clone()) # bypass cache @@ -3883,7 +3883,7 @@ def test_univariate_forward_jacobian(self): for transform in self.transforms: if transform.event_dim > 0: continue - x = torch.tensor(self._generate_data(transform), requires_grad=True) + x = self._generate_data(transform).requires_grad_() try: y = transform(x) actual = transform.log_abs_det_jacobian(x, y) @@ -3900,7 +3900,7 @@ def test_univariate_inverse_jacobian(self): for transform in self.transforms: if transform.event_dim > 0: continue - y = torch.tensor(self._generate_data(transform.inv), requires_grad=True) + y = self._generate_data(transform.inv).requires_grad_() try: x = transform.inv(y) actual = transform.log_abs_det_jacobian(x, y) @@ -3980,7 +3980,7 @@ def test_transformed_distribution_shapes(self): def test_jit_fwd(self): for transform in self.unique_transforms: - x = torch.tensor(self._generate_data(transform), requires_grad=True) + x = self._generate_data(transform).requires_grad_() def f(x): return transform(x) @@ -3991,12 +3991,12 @@ def f(x): continue # check on different inputs - x = torch.tensor(self._generate_data(transform), requires_grad=True) + x = self._generate_data(transform).requires_grad_() self.assertEqual(f(x), traced_f(x)) def test_jit_inv(self): for transform in self.unique_transforms: - y = torch.tensor(self._generate_data(transform.inv), requires_grad=True) + y = self._generate_data(transform.inv).requires_grad_() def f(y): return transform.inv(y) @@ -4007,12 +4007,12 @@ def f(y): continue # check on different inputs - y = torch.tensor(self._generate_data(transform.inv), requires_grad=True) + y = self._generate_data(transform.inv).requires_grad_() self.assertEqual(f(y), traced_f(y)) def test_jit_jacobian(self): for transform in self.unique_transforms: - x = torch.tensor(self._generate_data(transform), requires_grad=True) + x = self._generate_data(transform).requires_grad_() def f(x): y = transform(x) @@ -4024,7 +4024,7 @@ def f(x): continue # check on different inputs - x = torch.tensor(self._generate_data(transform), requires_grad=True) + x = self._generate_data(transform).requires_grad_() self.assertEqual(f(x), traced_f(x)) diff --git a/test/test_jit.py b/test/test_jit.py index 35597768fa033..e4281e5a795d3 100644 --- a/test/test_jit.py +++ b/test/test_jit.py @@ -23,7 +23,8 @@ import tempfile import shutil import warnings -from test_autograd import method_tests, create_input, unpack_variables, \ +from test_autograd import method_tests as autograd_method_tests +from test_autograd import create_input, unpack_variables, \ exclude_tensor_method, non_differentiable, EXCLUDE_GRADCHECK, EXCLUDE_FUNCTIONAL from copy import deepcopy import random @@ -531,6 +532,21 @@ def forward(self, input): input = torch.rand(3, 4) self.assertEqual(2 * input + 1, m(input)) + def test_diff_subgraph_clones_constants(self): + @torch.jit.script + def f(x, y): + return x + x + y + x + y + x + y + x + y + x + + def count_constants(graph): + return sum(node.kind() == 'prim::Constant' for node in graph.nodes()) + + graph = f.graph.copy() + self.run_pass('cse', graph) + self.run_pass('create_autodiff_subgraphs', graph) + nodes = list(graph.nodes()) + self.assertEqual(count_constants(graph), 1) + self.assertEqual(count_constants(nodes[1].g('Subgraph')), 1) + # Backwards tracing was broken for indexing by a constant, # because it's internally implemented using as_strided, # and we attempted to trace its derivative (which is not @@ -1208,13 +1224,18 @@ def run(**kwargs): def fn(x): return x + torch.ones(2, 3, **kwargs) - input = torch.ones(2, 3, **kwargs) + + input_kwargs = kwargs.copy() + if 'out' in input_kwargs: + del input_kwargs['out'] + input = torch.ones(2, 3, **input_kwargs) self.checkTrace(fn, (input,), inputs_require_grads=inputs_require_grads) # check we recorded 'ones' and did not just record a constant tfn = torch.jit.trace(fn, input) self.assertTrue("ones" in str(tfn.graph)) run() run(dtype=torch.int, inputs_require_grads=False) + run(out=torch.tensor([])) if RUN_CUDA: run(device="cuda:0") if RUN_CUDA_MULTI_GPU: @@ -3501,7 +3522,7 @@ def test_fuser_multiple_blocks(this, that, theother, meme): @enable_cpu_fuser def test_scalar_fusion(self): def fn(x, y): - return x + y.type_as(x) + return 2 * x + y x = torch.tensor(0.1, dtype=torch.float, device='cpu') y = torch.tensor(1, dtype=torch.float, device='cpu') @@ -7626,6 +7647,18 @@ def forward(self, x, y): EXCLUDE_TRACED = { 'test_split_dim', 'test_split_dim_neg0', + + # The following fail due to #12024. + # A prim::ListConstruct is involved and the indices get traced as DynamicType, + # which always require_grad. This causes a crash in autodiff. + 'test___getitem___adv_index', + 'test___getitem___adv_index_beg', + 'test___getitem___adv_index_comb', + 'test___getitem___adv_index_dup', + 'test___getitem___adv_index_sub', + 'test___getitem___adv_index_sub_2', + 'test___getitem___adv_index_sub_3', + 'test___getitem___adv_index_var', } EXCLUDE_TYPE_CHECK = { @@ -7736,11 +7769,17 @@ def new_fn(*tensors_): # create a trace function from input fn -def create_traced_fn(self, fn): +# +# disable_autodiff_subgraph_inlining: +# Don't inline autodiff subgraphs so we can test autodiff +def create_traced_fn(self, fn, + disable_autodiff_subgraph_inlining=False): def traced_fn(*inputs, **kwargs): fn_tensors, inputs_tensors = partial_apply_nontensors(fn, inputs, **kwargs) traced = torch.jit.trace(fn_tensors, inputs_tensors) self.assertExportImport(traced.graph, inputs_tensors) + if disable_autodiff_subgraph_inlining: + traced.debug_disable_autodiff_subgraph_inlining() output = traced(*inputs_tensors) traced_fn.last_graph = traced.graph_for(*inputs_tensors) return output @@ -7761,7 +7800,8 @@ def get_constant(x): # create a script function from (name, func_type, output_process_fn), # returns a function takes in (args, kwargs) and runs the compiled function and # then applies the post process fn to the outputs -def create_script_fn(self, method_name, func_type, output_process_fn): +def create_script_fn(self, method_name, func_type, output_process_fn, + disable_autodiff_subgraph_inlining=False): def script_fn(*args, **kwargs): formals = [] tensors = [] @@ -7792,6 +7832,8 @@ def script_fn(*args, **kwargs): import math CU = torch.jit.CompilationUnit(script) + if disable_autodiff_subgraph_inlining: + CU.the_method.debug_disable_autodiff_subgraph_inlining() self.assertExportImport(CU.the_method.graph, tensors) output = output_process_fn(CU.the_method(*tensors)) script_fn.last_graph = CU.the_method.graph_for(*tensors) @@ -8129,7 +8171,7 @@ def func(x): ]) -def add_test( +def add_autograd_test( name, self_size, args, @@ -8172,14 +8214,20 @@ def fn(*inputs, **kwargs): check_types = test_name not in EXCLUDE_TYPE_CHECK if not is_inplace and name not in EXCLUDE_GRADCHECK and not exclude_tensor_method(name, test_name): + # Test with disable_autodiff_subgraph_inlining, which forces the graph + # to contain DifferentiableGraph nodes whenever possible. This allows us + # to test autodiff; we assume that autograd is correct and use autodiff for backprop if test_name not in EXCLUDE_TRACED: - check_against_reference(self, create_traced_fn(self, fn), + check_against_reference(self, + create_traced_fn(self, fn, + disable_autodiff_subgraph_inlining=True), fn, (self_variable,) + args_variable, kwargs_variable, check_types=check_types) if not is_magic_method and test_name not in EXCLUDE_SCRIPT: check_against_reference(self, - create_script_fn(self, name, 'method', output_process_fn), + create_script_fn(self, name, 'method', output_process_fn, + disable_autodiff_subgraph_inlining=True), fn, (self_variable,) + args_variable, kwargs_variable, check_types=check_types) @@ -8193,12 +8241,15 @@ def fn(*inputs, **kwargs): f_args_tensor = (self_tensor,) + args_tensor if not is_inplace and test_name not in EXCLUDE_TRACED: - check_against_reference(self, create_traced_fn(self, fn), fn, - f_args_variable, kwargs_variable, check_types=check_types) + check_against_reference(self, + create_traced_fn(self, fn, + disable_autodiff_subgraph_inlining=True), + fn, f_args_variable, kwargs_variable, check_types=check_types) if not is_inplace and test_name not in EXCLUDE_SCRIPT: check_against_reference(self, - create_script_fn(self, name, 'functional', output_process_fn), + create_script_fn(self, name, 'functional', output_process_fn, + disable_autodiff_subgraph_inlining=True), fn, f_args_variable, kwargs_variable, check_types=check_types) @@ -8253,8 +8304,8 @@ def post_add_test(test_name, skipTestIf, do_test): setattr(TestJitGenerated, test_name, do_test) -for test in method_tests: - add_test(*test) +for test in autograd_method_tests: + add_autograd_test(*test) for test in nn_functional_tests: add_nn_test(*test) diff --git a/tools/autograd/gen_variable_type.py b/tools/autograd/gen_variable_type.py index d09a07a7b550c..9f65e7bd366b9 100644 --- a/tools/autograd/gen_variable_type.py +++ b/tools/autograd/gen_variable_type.py @@ -144,7 +144,7 @@ jit::tracer::ensureUnique("${name}", ${mutable_input}); """) -ADD_TRACE_INPUT = CodeTemplate("""jit::tracer::addInputs(node, "${input}", ${input});""") +ADD_TRACE_INPUT = CodeTemplate("""jit::tracer::addInputs(node, "${name}", ${input});""") POST_RECORD_TRACE = CodeTemplate("""\ if (tracer_state) { @@ -154,6 +154,18 @@ """) +FACTORY_FUNCTION_NAMES = None + + +def find_factory_functions(declarations): + global FACTORY_FUNCTION_NAMES + FACTORY_FUNCTION_NAMES = set() + + for declaration in declarations: + if any(arg['simple_type'] == 'TensorOptions' for arg in declaration['arguments']): + FACTORY_FUNCTION_NAMES.add(declaration['api_name']) + + def should_trace(declaration): # Operations involving Storage or Type are not traceable at the moment if any(arg['simple_type'] in {'Storage', 'Type'} for arg in declaration['arguments']): @@ -185,17 +197,30 @@ def record_trace_outputs(declaration): def format_trace(declaration): local = {} + local['trace_name'] = trace_name = uninplace_api_name(declaration['api_name']) + + # *_out functions take the result as a first argument, but since we're + # going to de-inplace the call, we need to remove it from the argument list + trace_inputs = declaration['arguments'] + if declaration['name'].endswith('_out'): + trace_inputs = trace_inputs[1:] + trace_input_spec = [(i['name'], i['name']) for i in trace_inputs] + + # factories are a bit special because their out-of-place overloads + # take an extra TensorOptions argument, which is missing in the _out function + has_factory_name = trace_name in FACTORY_FUNCTION_NAMES + is_out_overload = any(arg['name'] == 'result' for arg in declaration['arguments']) + if has_factory_name and is_out_overload: + trace_input_spec.append(('result', 'result.options()')) + + local['add_trace_inputs'] = \ + '\n'.join(ADD_TRACE_INPUT.substitute(name=name, input=value) for name, value in trace_input_spec) - add_trace_inputs = [] - for argument in declaration['arguments']: - add_trace_inputs.append(ADD_TRACE_INPUT.substitute(input=argument['name'])) - local['add_trace_inputs'] = '\n'.join(add_trace_inputs) - local['inplace_guard'] = '' # Record inplace operations as out-of-place operations (e.g., # not add_ but add) # TODO: Add a proper concept of side effects to the IR, and # properly record inplace operations. - local['trace_name'] = uninplace_api_name(declaration['api_name']) + local['inplace_guard'] = '' if local['trace_name'] != declaration['api_name']: local['inplace_guard'] = INPLACE_GUARD.substitute(name=declaration['api_name'], mutable_input=declaration['arguments'][0]['name']) @@ -214,6 +239,7 @@ def gen_variable_type(out, aten_declarations, template_path): implementation of each function dispatches to the base tensor type to compute the output. The grad_fn is attached to differentiable functions. """ + find_factory_functions(aten_declarations) VARIABLE_TYPE_H = CodeTemplate.from_file(template_path + '/VariableType.h') VARIABLE_TYPE_CPP = CodeTemplate.from_file(template_path + '/VariableType.cpp') diff --git a/tools/autograd/templates/Functions.cpp b/tools/autograd/templates/Functions.cpp index 36cb420fb1be9..f30701a406517 100644 --- a/tools/autograd/templates/Functions.cpp +++ b/tools/autograd/templates/Functions.cpp @@ -219,7 +219,7 @@ Tensor prod_backward(Tensor grad, const Tensor& input, Tensor result, int64_t di Tensor zero_mask = (input == 0); Tensor slice_zero_count = zero_mask.sum(dim, true); - int64_t total_zeros = slice_zero_count.sum().toCLong(); + int64_t total_zeros = slice_zero_count.sum().item(); if (total_zeros == 0) { return (grad * result) / input; } else { @@ -321,7 +321,7 @@ Tensor cumprod_backward(const Tensor &grad, const Tensor &input, int64_t dim) { } // Simple case with nonzero elements in the input - if ((input != 0).all().toCByte()) { + if ((input != 0).all().item()) { Tensor result = at::cumprod(input, dim); return sum_scan_exclusive(result * grad, dim) / input; } @@ -1600,7 +1600,7 @@ Tensor symeig_backward(const std::vector &grads, cons // Invertible case is derived from Jacobi's formula, and also can be found at: // http://eprints.maths.ox.ac.uk/1079/1/NA-08-01.pdf Tensor det_backward(const Tensor & grad, const Tensor& self, const Tensor& det) { - auto det_val = det.toCDouble(); + auto det_val = det.item(); if (det_val != 0 /* invertible */) { return grad * det * self.inverse().t(); } else /* otherwise det = \prod(sigma) = 0, use svd */ { @@ -1612,7 +1612,7 @@ Tensor det_backward(const Tensor & grad, const Tensor& self, const Tensor& det) } Tensor logdet_backward(const Tensor & grad, const Tensor& self, const Tensor& logdet) { - auto logdet_val = logdet.toCDouble(); + auto logdet_val = logdet.item(); if (logdet_val != -INFINITY /* det != 0, invertible */) { return grad * self.inverse().t(); } else /* otherwise det = \prod(sigma) = 0, use svd */ { @@ -1628,7 +1628,7 @@ Tensor slogdet_backward(const std::vector &grads, const Tensor& self, const Tensor& signdet, const Tensor& logabsdet) { AT_ASSERTM(!grads[0].defined(), "slogdet's sign output should never have gradient"); - auto signdet_val = signdet.toCDouble(); + auto signdet_val = signdet.item(); if (signdet_val != 0 /* det != 0, invertible */) { return grads[1] * self.inverse().t(); } else /* otherwise det = \prod(sigma) = 0, use svd */ { diff --git a/tools/autograd/templates/VariableType.cpp b/tools/autograd/templates/VariableType.cpp index d697ec8a77420..24ac92dd63926 100644 --- a/tools/autograd/templates/VariableType.cpp +++ b/tools/autograd/templates/VariableType.cpp @@ -177,11 +177,11 @@ bool VariableType::isVariableType(const at::Type& type) { return type.is_variable(); } -at::Type* VariableType::getVariableTypeFromBaseType(const at::Type& baseType) { +at::TypeExtendedInterface* VariableType::getVariableTypeFromBaseType(const at::Type& baseType) { auto id = static_cast(baseType.ID()); if(id >= type_to_variable_type.size()) return nullptr; - return type_to_variable_type[id].get(); + return static_cast(type_to_variable_type[id].get()); } namespace { diff --git a/tools/autograd/templates/VariableType.h b/tools/autograd/templates/VariableType.h index 446fb5b889f47..045279d4cce64 100644 --- a/tools/autograd/templates/VariableType.h +++ b/tools/autograd/templates/VariableType.h @@ -53,7 +53,7 @@ struct TORCH_API VariableType final : public at::TypeDefault { Storage unsafeStorageFromTH(void * th_pointer, bool retain) const override; at::Tensor unsafeTensorFromTH(void * th_pointer, bool retain) const override; - static at::Type* getVariableTypeFromBaseType(const at::Type& baseType); + static at::TypeExtendedInterface* getVariableTypeFromBaseType(const at::Type& baseType); static bool isVariableType(const at::Type& type); static std::vector allCUDATypes(); static std::vector allCPUTypes(); diff --git a/tools/autograd/templates/python_variable_methods.cpp b/tools/autograd/templates/python_variable_methods.cpp index d92ad3dbf7688..c10de2c19f6f7 100644 --- a/tools/autograd/templates/python_variable_methods.cpp +++ b/tools/autograd/templates/python_variable_methods.cpp @@ -156,7 +156,7 @@ static double dispatch_to_CDouble(const Tensor & self) { if (self.numel() != 1) { throw ValueError("only one element tensors can be converted to Python scalars"); } - return self.toCDouble(); + return self.item(); } static std::complex dispatch_to_CComplexDouble(const Tensor & self) { @@ -165,7 +165,7 @@ static std::complex dispatch_to_CComplexDouble(const Tensor & self) { if (self.numel() != 1) { throw ValueError("only one element tensors can be converted to Python scalars"); } - return self.toCComplexDouble(); + return self.item>(); } static int64_t dispatch_to_CLong(const Tensor & self) { @@ -174,7 +174,7 @@ static int64_t dispatch_to_CLong(const Tensor & self) { if (self.numel() != 1) { throw ValueError("only one element tensors can be converted to Python scalars"); } - return self.toCLong(); + return self.item(); } static PyObject * THPVariable_float_scalar(PyObject* self, PyObject* args) { @@ -190,7 +190,7 @@ static PyObject * THPVariable_integral_scalar(PyObject* self, PyObject* args) { jit::tracer::warn("Converting a tensor to a Python integer", jit::tracer::WARN_PYTHON_DATAFLOW); auto& self_ = reinterpret_cast(self)->cdata; if (isFloatingType(self_.type().scalarType())) { - // we can't dispatch to toCLong here because we want to avoid ATen overflow checks; + // we can't dispatch to item here because we want to avoid ATen overflow checks; // the python integral type (long in python2) can't overflow. return THPUtils_packDoubleAsInt(dispatch_to_CDouble(self_)); } else { diff --git a/tools/build_pytorch_libs.bat b/tools/build_pytorch_libs.bat index 123ba5f303e09..c924b593efc23 100755 --- a/tools/build_pytorch_libs.bat +++ b/tools/build_pytorch_libs.bat @@ -172,6 +172,7 @@ goto:eof cd build cmake .. %CMAKE_GENERATOR_COMMAND% ^ -DCMAKE_BUILD_TYPE=%BUILD_TYPE% ^ + -DTORCH_BUILD_VERSION="%PYTORCH_BUILD_VERSION%" ^ -DBUILD_TORCH="%BUILD_TORCH%" ^ -DNVTOOLEXT_HOME="%NVTOOLEXT_HOME%" ^ -DNO_API=ON ^ diff --git a/tools/build_pytorch_libs.sh b/tools/build_pytorch_libs.sh index 01cb82f49c596..184c60b7c444f 100755 --- a/tools/build_pytorch_libs.sh +++ b/tools/build_pytorch_libs.sh @@ -273,6 +273,7 @@ function build_caffe2() { -DCMAKE_INSTALL_MESSAGE="LAZY" \ -DPYTHON_EXECUTABLE=$PYTORCH_PYTHON \ -DBUILDING_WITH_TORCH_LIBS=ON \ + -DTORCH_BUILD_VERSION="$PYTORCH_BUILD_VERSION" \ -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ -DBUILD_TORCH=$BUILD_TORCH \ -DBUILD_PYTHON=$BUILD_PYTHON \ diff --git a/tools/jit/gen_jit_dispatch.py b/tools/jit/gen_jit_dispatch.py index b7326e526baa8..f6fdc7505d996 100644 --- a/tools/jit/gen_jit_dispatch.py +++ b/tools/jit/gen_jit_dispatch.py @@ -351,7 +351,7 @@ def main(): help='path to Declarations.yaml') parser.add_argument('out', metavar='OUT', help='path to output directory') - parser.add_argument('template-path', metavar='TEMPLATE_PATH', + parser.add_argument('template_path', metavar='TEMPLATE_PATH', help='path to templates directory') args = parser.parse_args() gen_jit_dispatch(args.declarations, args.out, args.template_path) diff --git a/tools/setup_helpers/nccl.py b/tools/setup_helpers/nccl.py index 703446520870a..c1cc88657ebf6 100644 --- a/tools/setup_helpers/nccl.py +++ b/tools/setup_helpers/nccl.py @@ -33,9 +33,11 @@ os.path.join(ENV_ROOT, 'lib64') if ENV_ROOT is not None else None, os.path.join(CUDA_HOME, 'lib'), os.path.join(CUDA_HOME, 'lib64'), + '/usr/local/lib', '/usr/lib/x86_64-linux-gnu/', '/usr/lib/powerpc64le-linux-gnu/', '/usr/lib/aarch64-linux-gnu/', + '/usr/lib', ] + gather_paths([ 'LIBRARY_PATH', ]) + gather_paths([ @@ -45,7 +47,9 @@ INCLUDE_DIR, ENV_ROOT, os.path.join(ENV_ROOT, 'include') if ENV_ROOT is not None else None, - '/usr/include' + os.path.join(CUDA_HOME, 'include'), + '/usr/local/include', + '/usr/include', ])) if IS_CONDA: diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt index be13aaa61b97b..ce337e93c8546 100644 --- a/torch/CMakeLists.txt +++ b/torch/CMakeLists.txt @@ -12,12 +12,6 @@ endif() option(BUILD_TEST "Build torch test binaries" ON) option(TORCH_STATIC "Build libtorch.a rather than libtorch.so" OFF) -# TODO: Unify with version from setup.py -set(TORCH_VERSION_MAJOR 0) -set(TORCH_VERSION_MINOR 4) -set(TORCH_VERSION_PATCH 1) -set(TORCH_VERSION "${TORCH_VERSION_MAJOR}.${TORCH_VERSION_MINOR}.${TORCH_VERSION_PATCH}") - set(TORCH_SRC_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(TORCH_ROOT "${TORCH_SRC_DIR}/..") @@ -411,7 +405,7 @@ endif() install(DIRECTORY "${TORCH_SRC_DIR}/csrc" DESTINATION ${TORCH_INSTALL_INCLUDE_DIR}/torch FILES_MATCHING PATTERN "*.h") -install(FILES "${TORCH_SRC_DIR}/script.h" +install(FILES "${TORCH_SRC_DIR}/script.h" "${TORCH_SRC_DIR}/extension.h" DESTINATION ${TORCH_INSTALL_INCLUDE_DIR}/torch) install(TARGETS torch diff --git a/torch/csrc/api/include/torch/nn/modules/dropout.h b/torch/csrc/api/include/torch/nn/modules/dropout.h index bfe230a597215..48b89642864ed 100644 --- a/torch/csrc/api/include/torch/nn/modules/dropout.h +++ b/torch/csrc/api/include/torch/nn/modules/dropout.h @@ -12,20 +12,18 @@ namespace nn { /// Options for `Dropout` and `FeatureDropout`. struct DropoutOptions { - DropoutOptions(double rate); + /* implicit */ DropoutOptions(double rate = 0.5); /// The probability with which a particular component of the input is set to /// zero. /// Changes to this parameter at runtime are effective. - TORCH_ARG(double, rate) = 0.5; + TORCH_ARG(double, rate); }; namespace detail { template class DropoutImplBase : public torch::nn::Cloneable { public: - explicit DropoutImplBase(double rate) - : DropoutImplBase(DropoutOptions(rate)) {} - explicit DropoutImplBase(DropoutOptions options_); + explicit DropoutImplBase(DropoutOptions options_ = DropoutOptions()); void reset() override; diff --git a/torch/csrc/api/include/torch/nn/modules/sequential.h b/torch/csrc/api/include/torch/nn/modules/sequential.h index e4839ac41a910..3ee80042020b1 100644 --- a/torch/csrc/api/include/torch/nn/modules/sequential.h +++ b/torch/csrc/api/include/torch/nn/modules/sequential.h @@ -92,6 +92,8 @@ class SequentialImpl : public Cloneable { using Iterator = std::vector::iterator; using ConstIterator = std::vector::const_iterator; + SequentialImpl() = default; + /// Constructs the `Sequential` from a variadic list of modules. template explicit SequentialImpl(Modules&&... modules) { diff --git a/torch/csrc/api/include/torch/nn/pimpl-inl.h b/torch/csrc/api/include/torch/nn/pimpl-inl.h new file mode 100644 index 0000000000000..9da1c38a8372d --- /dev/null +++ b/torch/csrc/api/include/torch/nn/pimpl-inl.h @@ -0,0 +1,47 @@ +// This class exists only to do SFINAE on abstract types `T` that are really +// `ModuleHolder`, because there's no good way to say that `T` is a +// `ModuleHolder` over some unknown type `ModuleType`. With this, you can do +// `enable_if_t>`. +struct ModuleHolderIndicator {}; + +// A type trait that is true for types that are `ModuleHolder`s. +template +using is_module_holder = std::is_base_of>; + +template +using disable_if_module_holder_t = disable_if_t::value>; + +// A collection of templates that answer the question whether a type `T` is a +// `ModuleHolder`, and if so whether its contained type is of type `C`. This is +// tricky because it is hard to short circuit in template metaprogramming. A +// naive and incorrect solution to this problem would be something like +// `disable_if::value && typename T::ContainedType == C>`. +// This would disable all types that are not `ModuleHolder`s, because even +// though the `is_module_holder::value` may be `false` for such types the +// `T::ContainedType` access would be ill-formed and thus fail the whole +// expression by the rules of SFINAE. Instead we have to use template +// specialization to statically branch on the first condition +// (`is_module_holder`) and are only then allowed to query +// `T::ContainedType` in the branch for which the condition was true. + +// Base template. +template +struct is_module_holder_of_impl; + +// False branch. `T` is not a `ModuleHolder` and thus not a `ModuleHolder` with +// contained type `C`. +template +struct is_module_holder_of_impl : std::false_type {}; + +// True branch. `T` is a `ModuleHolder` and thus we can legit access its +// `ContainedType` and compare it against `C`. +template +struct is_module_holder_of_impl + : std::is_same {}; + +// Helper template. +template +struct is_module_holder_of : is_module_holder_of_impl< + detail::is_module_holder::value, + torch::decay_t, + torch::decay_t> {}; diff --git a/torch/csrc/api/include/torch/nn/pimpl.h b/torch/csrc/api/include/torch/nn/pimpl.h index 48c331e148686..ecdd36af23187 100644 --- a/torch/csrc/api/include/torch/nn/pimpl.h +++ b/torch/csrc/api/include/torch/nn/pimpl.h @@ -10,17 +10,8 @@ namespace torch { namespace detail { -/// This class exists only to do SFINAE on abstract types `T` that are really -/// `ModuleHolder`, because there's no good way to say that `T` is a -/// `ModuleHolder` over some unknown type `ModuleType`. With this, you can do -/// `enable_if_t>`. -struct ModuleHolderIndicator {}; - -template -using is_module_holder = std::is_base_of>; - -template -using disable_if_module_holder_t = disable_if_t::value>; +// Dump all the template metaprogramming in this file. +#include "pimpl-inl.h" } // namespace detail namespace nn { @@ -40,7 +31,9 @@ class ModuleHolder : torch::detail::ModuleHolderIndicator { using ContainedType = Contained; /// Default constructs the contained module if if has a default constructor, - /// else produces a static error. NOTE: This uses the behavior of template + /// else produces a static error. + /// + /// NOTE: This uses the behavior of template /// classes in C++ that constructors (or any methods) are only compiled when /// actually used. ModuleHolder() : impl_(default_construct()) { @@ -58,9 +51,16 @@ class ModuleHolder : torch::detail::ModuleHolderIndicator { /// Constructs the `ModuleHolder` with a contained module, forwarding all /// arguments to its constructor. - template - explicit ModuleHolder(Ts&&... ts) - : impl_(new Contained(std::forward(ts)...)) {} + template < + typename Head, + typename... Tail, + typename = torch::disable_if_t< + detail::is_module_holder_of::value && + (sizeof...(Tail) == 0)>> + explicit ModuleHolder(Head&& head, Tail&&... tail) + : impl_(new Contained( + std::forward(head), + std::forward(tail)...)) {} /// Constructs the `ModuleHolder` from a pointer to the contained type. /// Example: `Linear(std::make_shared(...))`. @@ -158,15 +158,10 @@ class ModuleHolder : torch::detail::ModuleHolderIndicator { /// Defines a class `Name` which inherits from `nn::ModuleHolder` to provide a /// wrapper over a `std::shared_ptr`. -#define TORCH_MODULE_IMPL(Name, Impl) \ - class Name : public torch::nn::ModuleHolder { /* NOLINT */ \ - public: \ - using torch::nn::ModuleHolder::ModuleHolder; \ - Name(const Name&) = default; /* NOLINT */ \ - Name(Name&&) = default; /* NOLINT */ \ - Name(Name& other) : Name(static_cast(other)) {} /* NOLINT */ \ - Name& operator=(const Name&) = default; /* NOLINT */ \ - Name& operator=(Name&&) = default; /* NOLINT */ \ +#define TORCH_MODULE_IMPL(Name, Impl) \ + class Name : public torch::nn::ModuleHolder { /* NOLINT */ \ + public: \ + using torch::nn::ModuleHolder::ModuleHolder; \ } /// Like `TORCH_MODULE_IMPL`, but defaults the `Impl` name to `Impl`. diff --git a/torch/csrc/api/include/torch/optim/serialize.h b/torch/csrc/api/include/torch/optim/serialize.h index 163ebbdcf098b..1c85fa74e0062 100644 --- a/torch/csrc/api/include/torch/optim/serialize.h +++ b/torch/csrc/api/include/torch/optim/serialize.h @@ -51,7 +51,7 @@ void serialize( BufferContainer& buffers) { torch::Tensor size_tensor; archive.read(key + "/size", size_tensor); - const size_t size = size_tensor.toCLong(); + const size_t size = size_tensor.item(); for (size_t index = 0; index < size; ++index) { buffers.emplace_back(); archive.read( diff --git a/torch/csrc/api/include/torch/python.h b/torch/csrc/api/include/torch/python.h new file mode 100644 index 0000000000000..ba1da4599f439 --- /dev/null +++ b/torch/csrc/api/include/torch/python.h @@ -0,0 +1,107 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include + +namespace torch { +namespace python { +namespace detail { +template +std::vector cursor_to_vector(const Cursor& cursor) { + std::vector vector; + vector.reserve(cursor.size()); + cursor.map( + std::back_inserter(vector), [](const Tensor& tensor) { return tensor; }); + return vector; +} + +template +std::unordered_map cursor_to_map(const Cursor& cursor) { + std::unordered_map map; + map.reserve(cursor.size()); + cursor.map_items( + std::inserter(map, map.end()), + [](const std::string& key, const Tensor& tensor) { + return std::make_pair(key, tensor); + }); + return map; +} +} // namespace detail + +/// Adds method bindings for a pybind11 `class_` that binds an `nn::Module` +/// subclass. +/// +/// Say you have a pybind11 class object created with `py::class_(m, +/// "Net")`. This function will add all the necessary `.def()` calls to bind the +/// `nn::Module` base class' methods, such as `train()`, `eval()` etc. into +/// Python. The exact list of supported methods and their Python signatures are: +/// - `train()` +/// - `eval()` +/// - `is_training() -> bool` +/// - `zero_grad()` +/// - `cuda()` +/// - `cpu()` +/// - `parameters() -> List` +/// - `named_parameters() -> Dict` +/// - `buffers() -> List` +/// - `named_buffers() -> Dict` +template +py::class_ add_module_bindings(py::class_ module) { + return module.def("train", [](M& module) { module.train(); }) + .def("eval", [](M& module) { module.eval(); }) + .def("clone", [](M& module) { return module.clone(); }) + .def_property_readonly( + "training", [](M& module) { return module.is_training(); }) + .def_property_readonly( + "training", [](M& module) { return module.is_training(); }) + .def("zero_grad", [](M& module) { module.zero_grad(); }) + .def("cuda", [](M& module) { module.to(torch::kCUDA); }) + .def("cpu", [](M& module) { module.to(torch::kCPU); }) + .def( + "parameters", + [](M& module) { + return detail::cursor_to_vector(module.parameters()); + }) + .def( + "named_parameters", + [](M& module) { return detail::cursor_to_map(module.parameters()); }) + .def( + "buffers", + [](M& module) { return detail::cursor_to_vector(module.buffers()); }) + .def("named_buffers", [](M& module) { + return detail::cursor_to_map(module.buffers()); + }); +} + +/// Creates a pybind11 class object for an `nn::Module` subclass type and adds +/// default bindings. +/// +/// After adding the default bindings, the class object is returned, such that +/// you can add more bindings. +/// +/// Example usage: +/// \rst +/// .. code-block:: +/// struct Net : torch::nn::Module { +/// Net(int in, int out) { } +/// torch::Tensor forward(torch::Tensor x) { return x; } +/// }; +/// +/// PYBIND11_MODULE(my_module, m) { +/// torch::python::bind_module(m, "Net") +/// .def(py::init()) +/// .def("forward", &Net::forward); +/// } +/// \endrst +template +py::class_ bind_module(py::module module, const char* name) { + return add_module_bindings(py::class_(module, name)); +} +} // namespace python +} // namespace torch diff --git a/torch/csrc/api/include/torch/torch.h b/torch/csrc/api/include/torch/torch.h index 9b6eae58d9c72..38bd5a571283f 100644 --- a/torch/csrc/api/include/torch/torch.h +++ b/torch/csrc/api/include/torch/torch.h @@ -1,8 +1,14 @@ #pragma once #include +#include #include #include #include #include #include + +#ifdef TORCH_API_INCLUDE_EXTENSION_H +#include +#warning "Including torch/torch.h for C++ extensions is deprecated. Please include torch/extension.h" +#endif // defined(TORCH_API_INCLUDE_EXTENSION_H) diff --git a/torch/csrc/api/src/optim/lbfgs.cpp b/torch/csrc/api/src/optim/lbfgs.cpp index 7f6104876bcf0..37c4b1dcaf425 100644 --- a/torch/csrc/api/src/optim/lbfgs.cpp +++ b/torch/csrc/api/src/optim/lbfgs.cpp @@ -31,7 +31,7 @@ void LBFGS::add_grad(const torch::Tensor& step_size, const Tensor& update) { Tensor& pd = autograd::Variable(parameter).data(); pd.add_( update.slice(0, offset, offset + numel, 1).view_as(pd), - step_size.toCFloat()); + step_size.item()); offset += numel; } } @@ -45,7 +45,7 @@ torch::Tensor LBFGS::step(LossClosure closure) { Tensor flat_grad = gather_flat_grad(); Tensor abs_grad_sum = flat_grad.abs().sum(); - if (abs_grad_sum.toCFloat() <= options.tolerance_grad_) { + if (abs_grad_sum.item() <= options.tolerance_grad_) { return loss; } @@ -65,7 +65,7 @@ torch::Tensor LBFGS::step(LossClosure closure) { Tensor s = d.mul(t); Tensor ys = y.dot(s); - if (ys.toCFloat() > 1e-10) { + if (ys.item() > 1e-10) { // updating memory if (old_dirs.size() == options.history_size_) { @@ -140,14 +140,15 @@ torch::Tensor LBFGS::step(LossClosure closure) { break; } else if (current_evals >= options.max_eval_) { break; - } else if (abs_grad_sum.toCFloat() <= options.tolerance_grad_) { + } else if (abs_grad_sum.item() <= options.tolerance_grad_) { break; - } else if (gtd.toCFloat() > -options.tolerance_grad_) { + } else if (gtd.item() > -options.tolerance_grad_) { break; - } else if (d.mul(t).abs_().sum().toCFloat() <= options.tolerance_change_) { + } else if ( + d.mul(t).abs_().sum().item() <= options.tolerance_change_) { break; } else if ( - std::abs(loss.toCFloat() - prev_loss.toCFloat()) < + std::abs(loss.item() - prev_loss.item()) < options.tolerance_change_) { break; } diff --git a/torch/csrc/api/src/optim/serialize.cpp b/torch/csrc/api/src/optim/serialize.cpp index fbda6af91f32c..24f9096c6ac36 100644 --- a/torch/csrc/api/src/optim/serialize.cpp +++ b/torch/csrc/api/src/optim/serialize.cpp @@ -31,7 +31,7 @@ void serialize( serialize(archive, key, tensors); steps.clear(); for (const auto& step : tensors) { - steps.push_back(step.toCLong()); + steps.push_back(step.item()); } } } // namespace detail diff --git a/torch/csrc/autograd/engine.cpp b/torch/csrc/autograd/engine.cpp index d0ecc017b42b5..1847bb65b08f8 100644 --- a/torch/csrc/autograd/engine.cpp +++ b/torch/csrc/autograd/engine.cpp @@ -407,7 +407,7 @@ auto Engine::evaluate_function(FunctionTask& task) -> void { for (int i = 0; i < num_outputs; ++i) { auto& output = outputs[i]; at::DeviceGuard guard(output); - if (output.defined() && output.ne(output).any().toCByte()) { + if (output.defined() && output.ne(output).any().item()) { std::stringstream ss; ss << "Function '" << fn.name() << "' returned nan values in its " << i << "th output."; throw std::runtime_error(ss.str()); diff --git a/torch/csrc/autograd/functions/tensor.cpp b/torch/csrc/autograd/functions/tensor.cpp index d5a94d49985bc..493a1aadd1755 100644 --- a/torch/csrc/autograd/functions/tensor.cpp +++ b/torch/csrc/autograd/functions/tensor.cpp @@ -60,7 +60,7 @@ auto CopySlices::apply(variable_list&& inputs) -> variable_list { throw std::runtime_error(ERR_BACKWARD_TWICE); } - auto result = grad.type().tensor(base.sizes(), base.strides()); + auto result = at::empty_strided(base.sizes(), base.strides(), grad.options()); result.copy_(grad); auto offset = view.storage_offset() - base.storage_offset(); diff --git a/torch/csrc/autograd/python_hook.cpp b/torch/csrc/autograd/python_hook.cpp index 3ceb1f4aa201c..af02e9e46997e 100644 --- a/torch/csrc/autograd/python_hook.cpp +++ b/torch/csrc/autograd/python_hook.cpp @@ -51,7 +51,7 @@ auto PyFunctionPreHook::operator()(const variable_list& values) -> variable_list } variable_list results(values); - results[value_idx] = ((THPVariable*)value.get())->cdata; + if (value != Py_None) results[value_idx] = ((THPVariable*)value.get())->cdata; return results; } diff --git a/torch/csrc/autograd/python_legacy_variable.cpp b/torch/csrc/autograd/python_legacy_variable.cpp index 3ba7ff94bc1fd..4c6ac18453c06 100644 --- a/torch/csrc/autograd/python_legacy_variable.cpp +++ b/torch/csrc/autograd/python_legacy_variable.cpp @@ -46,7 +46,7 @@ static PyObject *THPVariable_pynew(PyTypeObject* type, PyObject *args, PyObject if (!data || data == Py_None) { // For legacy serialization code, create an empty tensor. This is also used // by nn.Parameter() with no arguments. - auto var = torch::tensors::get_default_tensor_type().tensor(); + auto var = at::empty({0}, torch::tensors::get_default_tensor_type().options()); tensor = static_cast(var).data(); } else if (THPVariable_Check(data)) { tensor = ((THPVariable*)data)->cdata.data(); diff --git a/torch/csrc/autograd/python_variable_indexing.cpp b/torch/csrc/autograd/python_variable_indexing.cpp index b50dddace66c5..abb588bee8fc5 100644 --- a/torch/csrc/autograd/python_variable_indexing.cpp +++ b/torch/csrc/autograd/python_variable_indexing.cpp @@ -173,7 +173,7 @@ static Variable applySlicing(const Variable& self, PyObject* index, variable_lis result = applySelect(result, dim, THPUtils_unpackLong(obj)); } else { result = result.unsqueeze(dim); - handle_var(boolToIndexingTensor(result, var.toCByte() != 0)); + handle_var(boolToIndexingTensor(result, var.item() != 0)); } } else { handle_var(var); diff --git a/torch/csrc/autograd/variable.h b/torch/csrc/autograd/variable.h index bd2e475645975..9de77efeb79fa 100644 --- a/torch/csrc/autograd/variable.h +++ b/torch/csrc/autograd/variable.h @@ -599,6 +599,6 @@ inline Variable::Variable(c10::intrusive_ptr self) inline Variable::Impl* Variable::get() const { AT_CHECK(defined(), "Called Variable::get() on an undefined Variable"); - return static_cast(tensor_impl_.get()); + return static_cast(impl_.get()); } }} // namespace torch::autograd diff --git a/torch/csrc/cuda/comm.cpp b/torch/csrc/cuda/comm.cpp index 716a1d30c3c9c..5531348ebdaf0 100644 --- a/torch/csrc/cuda/comm.cpp +++ b/torch/csrc/cuda/comm.cpp @@ -47,7 +47,7 @@ std::vector broadcast(const Tensor& tensor, IntList devices) { tensors.push_back(tensor); for (auto device : devices.slice(1)) { _device_guard.set_index(device); - tensors.push_back(type.tensor(tensor.sizes())); + tensors.push_back(at::empty(tensor.sizes(), type.options())); } nccl::broadcast(tensors); } else { diff --git a/torch/csrc/jit/autodiff.cpp b/torch/csrc/jit/autodiff.cpp index 251a6466ee3a4..009bf68ae3f6d 100644 --- a/torch/csrc/jit/autodiff.cpp +++ b/torch/csrc/jit/autodiff.cpp @@ -23,10 +23,20 @@ void wrapDim(int64_t & dim, const std::vector & sizes) { } bool isDifferentiable(Node * n) { + // TODO: scalar-tensor ops should be canonicalized static OperatorSet differentiable_ops = { "aten::add(Tensor self, Tensor other, *, Scalar alpha) -> Tensor", + "aten::add(Tensor self, Scalar other, Scalar alpha) -> Tensor", + "aten::add(Scalar other, Tensor self) -> Tensor", "aten::sub(Tensor self, Tensor other, *, Scalar alpha) -> Tensor", + "aten::sub(Tensor self, Scalar other, Scalar alpha) -> Tensor", + "aten::sub(Scalar other, Tensor self) -> Tensor", "aten::mul(Tensor self, Tensor other) -> Tensor", + "aten::mul(Tensor self, Scalar other) -> Tensor", + "aten::mul(Scalar other, Tensor self) -> Tensor", + "aten::div(Scalar other, Tensor self) -> Tensor", + "aten::div(Tensor self, Tensor other) -> Tensor", + "aten::div(Tensor self, Scalar other) -> Tensor", "aten::sigmoid(Tensor self) -> Tensor", "aten::tanh(Tensor self) -> Tensor", "aten::relu(Tensor self) -> Tensor", @@ -43,9 +53,39 @@ bool isDifferentiable(Node * n) { "aten::gt(Tensor self, Tensor other) -> Tensor", "aten::ge(Tensor self, Tensor other) -> Tensor", "aten::eq(Tensor self, Tensor other) -> Tensor", - "aten::ne(Tensor self, Tensor other) -> Tensor" + "aten::ne(Tensor self, Tensor other) -> Tensor", + "aten::abs(Tensor self) -> Tensor", + "aten::acos(Tensor self) -> Tensor", + "aten::asin(Tensor self) -> Tensor", + "aten::atan(Tensor self) -> Tensor", + "aten::ceil(Tensor self) -> Tensor", + "aten::cos(Tensor self) -> Tensor", + "aten::cosh(Tensor self) -> Tensor", + "aten::exp(Tensor self) -> Tensor", + "aten::expm1(Tensor self) -> Tensor", + "aten::floor(Tensor self) -> Tensor", + "aten::fmod(Tensor self, Scalar other) -> Tensor", + "aten::frac(Tensor self) -> Tensor", + "aten::log(Tensor self) -> Tensor", + "aten::log10(Tensor self) -> Tensor", + "aten::log1p(Tensor self) -> Tensor", + "aten::log2(Tensor self) -> Tensor", + "aten::reciprocal(Tensor self) -> Tensor", + "aten::remainder(Tensor self, Scalar other) -> Tensor", + "aten::round(Tensor self) -> Tensor", + "aten::rsqrt(Tensor self) -> Tensor", + "aten::sin(Tensor self) -> Tensor", + "aten::sinh(Tensor self) -> Tensor", + "aten::tan(Tensor self) -> Tensor", + "aten::trunc(Tensor self) -> Tensor", }; + // TODO: add support for the following fusible operators. + // They're a little tricky to implement; max/min require mutability for best perf + // "aten::atan2(Tensor self) -> Tensor", + // "aten::max(Tensor self) -> Tensor", + // "aten::min(Tensor self) -> Tensor" + if (n->kind() == prim::Constant || n->kind() == prim::AutogradAdd || n->kind() == prim::ConstantChunk) @@ -89,15 +129,42 @@ static std::vector gradientForNode(Node* node, ArrayRef grad_val if (node->matches("aten::add(Tensor self, Tensor other, *, Scalar alpha) -> Tensor")) { return {grads.at(0), grads.at(0) * node->namedInput(attr::alpha), nullptr}; + } else if (node->matches("aten::add(Tensor self, Scalar other, Scalar alpha) -> Tensor")) { + return {grads.at(0), nullptr, nullptr}; + + } else if (node->matches("aten::add(Scalar other, Tensor self) -> Tensor")) { + return {nullptr, grads.at(0)}; + } else if (node->kind() == prim::AutogradAdd) { return {grads.at(0), grads.at(0)}; } else if (node->matches("aten::sub(Tensor self, Tensor other, *, Scalar alpha) -> Tensor")) { return {grads.at(0), -grads.at(0) * node->namedInput(attr::alpha), nullptr}; + } else if (node->matches("aten::sub(Tensor self, Scalar other, Scalar alpha) -> Tensor")) { + return {grads.at(0), nullptr, nullptr}; + + } else if (node->matches("aten::sub(Scalar other, Tensor self) -> Tensor")) { + return {nullptr, -grads.at(0)}; + } else if (node->matches("aten::mul(Tensor self, Tensor other) -> Tensor")) { return {grads.at(0) * inputs.at(1), grads.at(0) * inputs.at(0)}; + } else if (node->matches("aten::mul(Tensor self, Scalar other) -> Tensor")) { + return {grads.at(0) * inputs.at(1), nullptr}; + + } else if (node->matches("aten::mul(Scalar other, Tensor self) -> Tensor")) { + return {nullptr, grads.at(0) * inputs.at(0)}; + + } else if (node->matches("aten::div(Tensor self, Tensor other) -> Tensor")) { + return {grads.at(0) / inputs.at(1), -grads.at(0) * inputs.at(0) / (inputs.at(1) * inputs.at(1))}; + + } else if (node->matches("aten::div(Tensor self, Scalar other) -> Tensor")) { + return {grads.at(0) / inputs.at(1), nullptr}; + + } else if (node->matches("aten::div(Scalar other, Tensor self) -> Tensor")) { + return {nullptr, -grads.at(0) * inputs.at(0) / (inputs.at(1) * inputs.at(1))}; + } else if (node->matches("aten::sigmoid(Tensor self) -> Tensor")) { return {grads.at(0) * outputs.at(0) * (1 - outputs.at(0))}; @@ -130,6 +197,78 @@ static std::vector gradientForNode(Node* node, ArrayRef grad_val } else if (node->matches("aten::neg(Tensor self) -> Tensor")) { return {-grads.at(0)}; + } else if (node->matches("aten::abs(Tensor self) -> Tensor")) { + return {grads.at(0) * inputs.at(0).sign()}; + + } else if (node->matches("aten::acos(Tensor self) -> Tensor")) { + return {grads.at(0) * -((-inputs.at(0) * inputs.at(0) + at::Scalar(1)).rsqrt())}; + + } else if (node->matches("aten::asin(Tensor self) -> Tensor")) { + return {grads.at(0) * (-inputs.at(0) * inputs.at(0) + at::Scalar(1)).rsqrt()}; + + } else if (node->matches("aten::atan(Tensor self) -> Tensor")) { + return {grads.at(0) / (inputs.at(0) * inputs.at(0) + at::Scalar(1))}; + + } else if (node->matches("aten::ceil(Tensor self) -> Tensor")) { + return {SymbolicVariable::zeros_like(grads.at(0))}; + + } else if (node->matches("aten::cos(Tensor self) -> Tensor")) { + return {grads.at(0) * -inputs.at(0).sin()}; + + } else if (node->matches("aten::cosh(Tensor self) -> Tensor")) { + return {grads.at(0) * inputs.at(0).sinh()}; + + } else if (node->matches("aten::exp(Tensor self) -> Tensor")) { + return {grads.at(0) * outputs.at(0)}; + + } else if (node->matches("aten::expm1(Tensor self) -> Tensor")) { + return {grads.at(0) * (outputs.at(0) + at::Scalar(1))}; + + } else if (node->matches("aten::floor(Tensor self) -> Tensor")) { + return {SymbolicVariable::zeros_like(grads.at(0))}; + + } else if (node->matches("aten::fmod(Tensor self, Scalar other) -> Tensor")) { + return {grads.at(0), nullptr}; + + } else if (node->matches("aten::frac(Tensor self) -> Tensor")) { + return {grads.at(0)}; + + } else if (node->matches("aten::log(Tensor self) -> Tensor")) { + return {grads.at(0) / inputs.at(0)}; + + } else if (node->matches("aten::log10(Tensor self) -> Tensor")) { + return {grads.at(0) / (inputs.at(0) * 2.3025850929940456)}; + + } else if (node->matches("aten::log1p(Tensor self) -> Tensor")) { + return {grads.at(0) / (inputs.at(0) + at::Scalar(1))}; + + } else if (node->matches("aten::log2(Tensor self) -> Tensor")) { + return {grads.at(0) / (inputs.at(0) * 0.6931471805599453)}; + + } else if (node->matches("aten::reciprocal(Tensor self) -> Tensor")) { + return {-grads.at(0) * outputs.at(0) * outputs.at(0)}; + + } else if (node->matches("aten::remainder(Tensor self, Scalar other) -> Tensor")) { + return {grads.at(0), nullptr}; + + } else if (node->matches("aten::round(Tensor self) -> Tensor")) { + return {SymbolicVariable::zeros_like(grads.at(0))}; + + } else if (node->matches("aten::rsqrt(Tensor self) -> Tensor")) { + return {grads.at(0) * outputs.at(0).pow(3.) * -0.5}; + + } else if (node->matches("aten::sin(Tensor self) -> Tensor")) { + return {grads.at(0) * inputs.at(0).cos()}; + + } else if (node->matches("aten::sinh(Tensor self) -> Tensor")) { + return {grads.at(0) * inputs.at(0).cosh()}; + + } else if (node->matches("aten::tan(Tensor self) -> Tensor")) { + return {grads.at(0) * (1. + outputs.at(0) * outputs.at(0))}; + + } else if (node->matches("aten::trunc(Tensor self) -> Tensor")) { + return {SymbolicVariable::zeros_like(grads.at(0))}; + } else if (node->kind() == prim::ConstantChunk) { return {SymbolicVariable::cat(grads, node->i(attr::dim))}; diff --git a/torch/csrc/jit/batched/BatchTensor.cpp b/torch/csrc/jit/batched/BatchTensor.cpp index a843280912437..564b4b7e4449b 100644 --- a/torch/csrc/jit/batched/BatchTensor.cpp +++ b/torch/csrc/jit/batched/BatchTensor.cpp @@ -14,14 +14,14 @@ BatchTensor::BatchTensor(at::Tensor data, at::Tensor mask, at::Tensor dims){ } BatchTensor::BatchTensor(at::Tensor data, int64_t batch_size){ - dims = data.type().toScalarType(at::kByte).tensor(data.dim()); + dims = at::empty(data.dim(), data.options().dtype(at::kByte)); dims.fill_(0); std::vector sizes(data.dim() + 1, -1); sizes[0] = batch_size; this->data = data.unsqueeze(0).expand(sizes); std::vector mask_sizes(data.dim() + 1, 1); mask_sizes[0] = batch_size; - mask = data.type().toScalarType(at::kByte).tensor(mask_sizes); + mask = at::empty(mask_sizes, data.options().dtype(at::kByte)); mask.fill_(1); } @@ -34,17 +34,17 @@ BatchTensor::BatchTensor(const std::vector datalist, at::Tensor dims for(auto x : datalist){ sizes[i] = std::max(sizes[i], x.size(i)); } - mask_sizes[i] = *dims[i - 1].toByteData() ? sizes[i] : 1; + mask_sizes[i] = *dims[i - 1].data() ? sizes[i] : 1; } - data = datalist[0].type().tensor(sizes); + data = at::empty(sizes, datalist[0].options()); data.fill_(0); - mask = datalist[0].type().toScalarType(at::kByte).tensor(mask_sizes); + mask = at::empty(mask_sizes, datalist[0].options().dtype(at::kByte)); mask.fill_(0); for(std::size_t i = 0; i < datalist.size(); i++){ auto data_item = data.narrow(0, i, 1); auto mask_item = mask.narrow(0, i, 1); for(int64_t j = 0; j < dims.size(0); j++){ - if(*dims[j].toByteData()){ + if(*dims[j].data()){ data_item = data_item.narrow(j + 1, 0, datalist[i].size(j + 1)); mask_item = mask_item.narrow(j + 1, 0, datalist[i].size(j + 1)); } @@ -62,12 +62,12 @@ std::vector BatchTensor::examples() { data = data.sum(d, /*keepdim=*/true); while(data.dim() >= 1) data = data[0]; - return *data.toLongData(); + return *data.data(); }; for(int64_t i = 0; i < data.size(0); i++){ auto data_tmp = data.narrow(0, i, 1); for(int64_t d = 0; d < dims.size(0); d++){ - if(*dims[d].toByteData()){ + if(*dims[d].data()){ data_tmp = data_tmp.narrow(d + 1, 0, mask_sum(mask[i], d)); } } diff --git a/torch/csrc/jit/constants.cpp b/torch/csrc/jit/constants.cpp index 4cdb193d8434d..f1844d2bac665 100644 --- a/torch/csrc/jit/constants.cpp +++ b/torch/csrc/jit/constants.cpp @@ -16,6 +16,9 @@ Value* insertConstant( if(!ref.defined()) { throw constant_not_supported_error("undefined tensors cannot become constants"); } + if (ref.is_variable()) { + ref = autograd::Variable(ref).data(); + } n->output()->inferTypeFrom(ref); // note: before t_ because of std::move(ref) n->t_(attr::value, std::move(ref)); } else if(val.isInt()) { diff --git a/torch/csrc/jit/export.cpp b/torch/csrc/jit/export.cpp index 1984f35fcc897..437d0f6c77997 100644 --- a/torch/csrc/jit/export.cpp +++ b/torch/csrc/jit/export.cpp @@ -681,7 +681,7 @@ void ModuleEncoder::EncodeTensor( // NB: This new tensor is created to support cuda tensors. // Storages can be mutated when converting tensors from cuda to cpu, // and we need a cpu tensor to copy data from. - t = tensor.type().tensor( + t = at::getType(tensor).tensor( tensor.storage(), /* storageOffset = */ 0, /* size = */ { static_cast(tensor.type().elementSizeInBytes() * tensor.storage().size()) }, diff --git a/torch/csrc/jit/fusers/common/fused_kernel.cpp b/torch/csrc/jit/fusers/common/fused_kernel.cpp index 54a3c57b83a75..6095bb1374847 100644 --- a/torch/csrc/jit/fusers/common/fused_kernel.cpp +++ b/torch/csrc/jit/fusers/common/fused_kernel.cpp @@ -221,7 +221,7 @@ void FusedKernel::launch( outputs.clear(); outputs.reserve(outputDescriptors().size()); for(auto & od : outputDescriptors()) { - outputs.push_back(ref_type.toScalarType(od.scalar_type).tensor()); + outputs.push_back(at::empty({0}, ref_type.options().dtype(od.scalar_type))); } launch_with_tensors(inputs, outputs); diff --git a/torch/csrc/jit/graph_executor.cpp b/torch/csrc/jit/graph_executor.cpp index 492faade8de61..d071c46472155 100644 --- a/torch/csrc/jit/graph_executor.cpp +++ b/torch/csrc/jit/graph_executor.cpp @@ -361,6 +361,14 @@ struct GraphExecutorImpl { return state; } + // This function should be used only for testing purposes + void debugDisableAutodiffSubgraphInlining() { + // Allow single-node autodiff subgraphs + autodiffSubgraphNodeThreshold = 1; + // Don't inline autodiff subgraphs into autograd functions + autodiffSubgraphInlineThreshold = 1; + } + private: friend struct GraphExecutor; @@ -416,14 +424,14 @@ struct GraphExecutorImpl { // Phase 5. Apply non-differentiable optimizations to the graphs we've found // (or the whole grpah if we know we won't need its derivative). if (needsGradient(opt_graph)) { - auto diff_nodes = CreateAutodiffSubgraphs(*opt_graph); + auto diff_nodes = CreateAutodiffSubgraphs(*opt_graph, autodiffSubgraphNodeThreshold); for (Node * dnode : diff_nodes) { auto diff_graph = std::move(dnode->g(attr::Subgraph)); Gradient gradient = differentiate(diff_graph); runNondiffOptimization(gradient.f); packGradient(gradient, dnode); } - InlineAutodiffSubgraphs(opt_graph); + InlineAutodiffSubgraphs(opt_graph, autodiffSubgraphInlineThreshold); } else { runNondiffOptimization(opt_graph); } @@ -523,6 +531,10 @@ struct GraphExecutorImpl { // GraphExecutors can be accessed from multiple threads, so this thread needs to be // held every time we access the fallback or plan_cache. std::mutex compile_mutex; + + // Some tunable parameters + size_t autodiffSubgraphNodeThreshold = 2; + size_t autodiffSubgraphInlineThreshold = 5; }; GraphExecutor::GraphExecutor(std::shared_ptr graph, bool optimize) @@ -544,6 +556,10 @@ GraphExecutorState GraphExecutor::getDebugState() { return pImpl->getDebugState(); } +void GraphExecutor::debugDisableAutodiffSubgraphInlining() { + return pImpl->debugDisableAutodiffSubgraphInlining(); +} + void runRequiredPasses(const std::shared_ptr& g) { specializeUndef(*g); diff --git a/torch/csrc/jit/graph_executor.h b/torch/csrc/jit/graph_executor.h index 7e644273a5b07..08688a8c8cab3 100644 --- a/torch/csrc/jit/graph_executor.h +++ b/torch/csrc/jit/graph_executor.h @@ -36,6 +36,7 @@ struct TORCH_API GraphExecutor { std::shared_ptr graph() const; std::shared_ptr graphFor(const Stack& inputs) const; GraphExecutorState getDebugState(); + void debugDisableAutodiffSubgraphInlining(); private: std::shared_ptr pImpl; }; diff --git a/torch/csrc/jit/init.cpp b/torch/csrc/jit/init.cpp index 751035a00c0ba..98a7b01041932 100644 --- a/torch/csrc/jit/init.cpp +++ b/torch/csrc/jit/init.cpp @@ -14,6 +14,7 @@ #include "torch/csrc/jit/passes/erase_number_types.h" #include "torch/csrc/jit/passes/onnx/prepare_division_for_onnx.h" #include "torch/csrc/jit/passes/common_subexpression_elimination.h" +#include "torch/csrc/jit/passes/create_autodiff_subgraphs.h" #include "torch/csrc/jit/passes/peephole.h" #include "torch/csrc/jit/passes/canonicalize.h" #include "torch/csrc/jit/passes/onnx/peephole.h" @@ -106,6 +107,9 @@ void initJITBindings(PyObject *module) { return ConstantPropagation(g); }) .def("_jit_pass_erase_shape_information", EraseShapeInformation) + .def("_jit_pass_create_autodiff_subgraphs", [](Graph& graph) { + CreateAutodiffSubgraphs(graph); + }) .def("_jit_run_cpp_tests", [] { // We have to release the GIL inside this method, because if we happen to // initialize the autograd engine in these tests, the newly spawned worker threads will diff --git a/torch/csrc/jit/interpreter.h b/torch/csrc/jit/interpreter.h index 151a980d76a11..d28558d4d15b4 100644 --- a/torch/csrc/jit/interpreter.h +++ b/torch/csrc/jit/interpreter.h @@ -6,7 +6,7 @@ #include "torch/csrc/WindowsTorchApiMacro.h" namespace at { - struct Tensor; + class Tensor; } namespace torch { namespace jit { diff --git a/torch/csrc/jit/passes/create_autodiff_subgraphs.cpp b/torch/csrc/jit/passes/create_autodiff_subgraphs.cpp index 3554c22ddc70e..d1d73a36ea834 100644 --- a/torch/csrc/jit/passes/create_autodiff_subgraphs.cpp +++ b/torch/csrc/jit/passes/create_autodiff_subgraphs.cpp @@ -1,8 +1,11 @@ -#include +#include "torch/csrc/jit/passes/create_autodiff_subgraphs.h" + #include "torch/csrc/jit/ir.h" #include "torch/csrc/jit/autodiff.h" #include "torch/csrc/jit/assertions.h" +#include + namespace torch { namespace jit { struct Graph; @@ -30,6 +33,11 @@ Node* mergeNodes(Block * block, Symbol group_node_kind, ArrayRef nodes) { if(value_map.count(v) > 0) { return value_map[v]; } + if (auto value = toIValue(v)) { + Value * nv = new_graph->insertConstant(*value); + value_map[v] = nv; + return nv; + } Value * nv = new_graph->addInput()->setType(v->type()); group_node->addInput(v); value_map[v] = nv; @@ -69,8 +77,6 @@ Node* mergeNodes(Block * block, Symbol group_node_kind, ArrayRef nodes) { return group_node; } -} - void CreateAutodiffSubgraphs(Block * block, size_t threshold, std::vector& diff_graphs) { // This implementation is not optimal, but it is simple. // It just scans through the list in order looking for runs of @@ -90,8 +96,12 @@ void CreateAutodiffSubgraphs(Block * block, size_t threshold, std::vector for(Node * node : block->nodes()) { // Note: nodes() iterator stays valid since it is // always pointing _after_ the nodes that mergeNodes // mutates. - if(isDifferentiable(node)) { - groupable.push_back(node); + if (isDifferentiable(node)) { + // Constants are generally cheap to clone, so it's better to replicate them, + // instead of moving them out from the original graph. + if (node->kind() != prim::Constant) { + groupable.push_back(node); + } } else { if(groupable.size() >= threshold) { diff_graphs.push_back(mergeNodes(block, prim::DifferentiableGraph, groupable)); @@ -107,11 +117,12 @@ void CreateAutodiffSubgraphs(Block * block, size_t threshold, std::vector } } +} // anonymous namespace + std::vector CreateAutodiffSubgraphs(Graph & graph, size_t threshold) { std::vector diff_nodes; CreateAutodiffSubgraphs(graph.block(), threshold, diff_nodes); return diff_nodes; } - }} diff --git a/torch/csrc/jit/passes/create_autodiff_subgraphs.h b/torch/csrc/jit/passes/create_autodiff_subgraphs.h index 44a6683dc4ce3..1908b03e2568f 100644 --- a/torch/csrc/jit/passes/create_autodiff_subgraphs.h +++ b/torch/csrc/jit/passes/create_autodiff_subgraphs.h @@ -1,10 +1,12 @@ #pragma once + +#include "torch/csrc/jit/ir.h" +#include "torch/csrc/WindowsTorchApiMacro.h" + #include namespace torch { namespace jit { -struct Graph; - // insert GraphExecutor nodes that group together // subgraphs that are differentiable by the jit's autodiff passes // threshold - minimum number of nodes that will appear in a block diff --git a/torch/csrc/jit/passes/peephole.cpp b/torch/csrc/jit/passes/peephole.cpp index ab4a75375081a..176166218fc01 100644 --- a/torch/csrc/jit/passes/peephole.cpp +++ b/torch/csrc/jit/passes/peephole.cpp @@ -19,7 +19,7 @@ void PeepholeOptimize(Block * block) { auto* node = *it; for (Block * sub_block : node->blocks()) { - PeepholeOptimize(sub_block); + PeepholeOptimize(sub_block); } // XXX: remember that if you want to simplify an expression by combining multiple nodes @@ -41,8 +41,8 @@ void PeepholeOptimize(Block * block) { } } else if (node->matches("aten::type_as(Tensor self, Tensor other) -> Tensor")) { // x.type_as(y) == x iff x.type() == y.type() - auto self_type = node->input(0)->type()->cast(); - auto other_type = node->input(1)->type()->cast(); + auto self_type = node->input(0)->type()->cast(); + auto other_type = node->input(1)->type()->cast(); if (self_type && other_type && self_type->scalarType() == other_type->scalarType() && self_type->device() == other_type->device()) { @@ -100,6 +100,20 @@ void PeepholeOptimize(Block * block) { } } } + // TODO: this doesn't work with Scalar-Tensor ops! We should canonicalize those + } else if (node->matches("aten::mul(Tensor self, Scalar other) -> Tensor", /*with_const=*/attr::other) || + node->matches("aten::div(Tensor self, Scalar other) -> Tensor", /*with_const=*/attr::other)) { + // x * 1 == x / 1 == x + if (node->get(attr::other)->toDouble() == 1) { + node->output()->replaceAllUsesWith(node->input(0)); + } + } else if (node->matches("aten::add(Tensor self, Scalar other, Scalar alpha) -> Tensor", /*with_const=*/{attr::alpha, attr::other}) || + node->matches("aten::sub(Tensor self, Scalar other, Scalar alpha) -> Tensor", /*with_const=*/{attr::alpha, attr::other})) { + // x + 0 == x - 0 == x + if (node->get(attr::alpha)->toDouble() == 1 && + node->get(attr::other)->toDouble() == 0) { + node->output()->replaceAllUsesWith(node->input(0)); + } } else if(node->kind() == prim::TensorToNum || node->kind() == prim::ImplicitTensorToNum) { Node* input_node = node->input()->node(); if (input_node->kind() == prim::NumToTensor) { diff --git a/torch/csrc/jit/python_ir.cpp b/torch/csrc/jit/python_ir.cpp index 53e16cc0a09f9..5aa053f626faa 100644 --- a/torch/csrc/jit/python_ir.cpp +++ b/torch/csrc/jit/python_ir.cpp @@ -45,7 +45,7 @@ std::ostream& printPyObject(std::ostream & out, const THPObjectPtr& obj) { auto pytuple = pyobj.cast(); out << "("; size_t i = 0; - for (auto& o : pytuple) { + for (const auto& o : pytuple) { if (i > 0) { out << ", "; } diff --git a/torch/csrc/jit/register_prim_ops.cpp b/torch/csrc/jit/register_prim_ops.cpp index 1f8618121f1e2..71168cd3ee3d4 100644 --- a/torch/csrc/jit/register_prim_ops.cpp +++ b/torch/csrc/jit/register_prim_ops.cpp @@ -70,7 +70,7 @@ RegisterOperators reg({ at::Tensor a; pop(stack, a); at::DeviceGuard guard(a); - push(stack, a.toCLong()); + push(stack, a.item()); return 0; }; } else { @@ -78,7 +78,7 @@ RegisterOperators reg({ at::Tensor a; pop(stack, a); at::DeviceGuard guard(a); - push(stack, a.toCDouble()); + push(stack, a.item()); return 0; }; } @@ -92,7 +92,7 @@ RegisterOperators reg({ pop(stack, a); checkImplicitTensorToNum(a, /*to int*/true); at::DeviceGuard guard(a); - push(stack, a.toCLong()); + push(stack, a.item()); return 0; }; } else { @@ -101,7 +101,7 @@ RegisterOperators reg({ pop(stack, a); checkImplicitTensorToNum(a, /*to int*/false); at::DeviceGuard guard(a); - push(stack, a.toCDouble()); + push(stack, a.item()); return 0; }; } @@ -727,7 +727,7 @@ RegisterOperators reg2({ pop(stack, t); std::vector elems; for(int i = 0; i < t.size(0); i++){ - elems.push_back(*t[i].toIntData()); + elems.push_back(*t[i].data()); } push(stack, jit::IntList::create(elems)); return 0; diff --git a/torch/csrc/jit/script/init.cpp b/torch/csrc/jit/script/init.cpp index c09caf4c3702f..f0dfda81cc092 100644 --- a/torch/csrc/jit/script/init.cpp +++ b/torch/csrc/jit/script/init.cpp @@ -487,6 +487,12 @@ void initJitScriptBindings(PyObject* module) { } throw std::runtime_error("Attempted to call get_debug_state on a Module without a compiled forward()"); }) + .def("debug_disable_autodiff_subgraph_inlining", [](Module& self) { + if (self.find_method("forward")) { + Method & m = self.get_method("forward"); + m.debugDisableAutodiffSubgraphInlining(); + } + }) .def("forward", [](Module& self, py::args args, py::kwargs kwargs) { // We implement this in C++ to avoid incurring the pybind11 dispatch // overhead twice: once to call into the method lookup for "forward" @@ -515,6 +521,7 @@ void initJitScriptBindings(PyObject* module) { auto schema = extractSchemaFromDef(def, is_method); self.setSchema(schema); }) + .def("debug_disable_autodiff_subgraph_inlining", &Method::debugDisableAutodiffSubgraphInlining) .def("pretty_print_schema", &Method::pretty_print_schema); m.def("_jit_script_compile", [](const Def &def, ResolutionCallback rcb) { diff --git a/torch/csrc/jit/script/module.h b/torch/csrc/jit/script/module.h index caf084d074ba9..50ae9f48fb3c9 100644 --- a/torch/csrc/jit/script/module.h +++ b/torch/csrc/jit/script/module.h @@ -165,6 +165,10 @@ struct Method { return get_executor().getDebugState(); } + void debugDisableAutodiffSubgraphInlining() { + return get_executor().debugDisableAutodiffSubgraphInlining(); + } + bool is_optimized() { return optimize; } diff --git a/torch/csrc/jit/symbolic_variable.h b/torch/csrc/jit/symbolic_variable.h index 3e38b4323da32..daac5d48d1d89 100644 --- a/torch/csrc/jit/symbolic_variable.h +++ b/torch/csrc/jit/symbolic_variable.h @@ -56,6 +56,9 @@ struct SymbolicVariable { SymbolicVariable operator*(const SymbolicVariable rhs) const { return create(aten::mul, {*this, rhs})[0].typeLike(*this); } + SymbolicVariable operator/(const SymbolicVariable rhs) const { + return create(aten::div, {*this, rhs})[0].typeLike(*this); + } SymbolicVariable operator*(at::Scalar rhs) const { if (isConstInt(rhs, 1)) return *this; @@ -170,6 +173,30 @@ struct SymbolicVariable { Node * unpack = g->insertNode(g->create(prim::ListUnpack, {output_list}, inputs.size())); return fmap(unpack->outputs()); } + static SymbolicVariable zeros_like(const SymbolicVariable input) { + return create(t("zeros_like"), {input})[0]; + } + SymbolicVariable cos() const { + return create(t("cos"), {*this})[0]; + } + SymbolicVariable cosh() const { + return create(t("cosh"), {*this})[0]; + } + SymbolicVariable pow(at::Scalar other) const { + return create(t("pow"), {*this, insertConstant(other)})[0]; + } + SymbolicVariable rsqrt() const { + return create(t("rsqrt"), {*this})[0]; + } + SymbolicVariable sign() const { + return create(t("sign"), {*this})[0]; + } + SymbolicVariable sin() const { + return create(t("sin"), {*this})[0]; + } + SymbolicVariable sinh() const { + return create(t("sinh"), {*this})[0]; + } SymbolicVariable sum() const { return create(t("sum"), {*this})[0]; } diff --git a/torch/csrc/jit/test_jit.cpp b/torch/csrc/jit/test_jit.cpp index c853db1b1632d..9942437c9eb28 100644 --- a/torch/csrc/jit/test_jit.cpp +++ b/torch/csrc/jit/test_jit.cpp @@ -148,7 +148,7 @@ static void fusionTests() { auto outputs = debugLaunchGraph(graph, 0, {a,b}); CATCH_REQUIRE(outputs.size() == 1); auto o2 = a*b; - float max_diff = (o2 - outputs[0]).abs().max().toCDouble(); + float max_diff = (o2 - outputs[0]).abs().max().item(); //std::cout << "max diff: " << max_diff << "\n"; CATCH_REQUIRE(max_diff == 0); }; @@ -202,7 +202,7 @@ static void fusionTests() { auto outputs = debugLaunchGraph(graph, 0, inputs); CATCH_REQUIRE(outputs.size() == graph.outputs().size()); CATCH_REQUIRE(out0.is_same_size(outputs.front())); - float max_diff = (outputs.front() - out0).abs().max().toCDouble(); + float max_diff = (outputs.front() - out0).abs().max().item(); CATCH_REQUIRE(max_diff < 1e-6); }; @@ -236,9 +236,9 @@ static void fusionTests() { auto outputs = debugLaunchGraph(graph, 0, {a,b}); CATCH_REQUIRE(outputs.size() == 2); - float max_diff = (o_r - outputs[0]).abs().max().toCDouble(); + float max_diff = (o_r - outputs[0]).abs().max().item(); CATCH_REQUIRE(max_diff == 0); - float max_diff2 = (o2_r - outputs[1]).abs().max().toCDouble(); + float max_diff2 = (o2_r - outputs[1]).abs().max().item(); CATCH_REQUIRE(max_diff2 == 0); }; testConcat(0); @@ -325,16 +325,16 @@ at::Tensor t_def(at::Tensor x) { bool checkRtol(const at::Tensor& diff, const std::vector inputs) { double maxValue = 0.0; for (auto& tensor : inputs) { - maxValue = fmax(tensor.abs().max().toCFloat(), maxValue); + maxValue = fmax(tensor.abs().max().item(), maxValue); } - return diff.abs().max().toCFloat() < 2e-6 * maxValue; + return diff.abs().max().item() < 2e-6 * maxValue; } bool almostEqual(const at::Tensor & a, const at::Tensor & b) { return checkRtol(a - b,{a, b}); } bool exactlyEqual(const at::Tensor & a, const at::Tensor & b) { - return (a - b).abs().max().toCFloat() == 0.f; + return (a - b).abs().max().item() == 0.f; } std::pair @@ -533,7 +533,7 @@ struct ADTestSpec { variable_list get_grad_outputs(const variable_list& vars) { return fmap(vars, [](const Variable& v) -> Variable { - return v.type().tensor(v.sizes()).normal_(); + return at::randn(v.sizes(), v.options()); }); } @@ -873,7 +873,7 @@ void testControlFlow() { }; auto L = [](int64_t l) { return IValue(autograd::make_variable(scalar_to_tensor(at::Scalar(l)))); }; - auto V = [](IValue t) { return std::move(t).toTensor().toCLong(); }; + auto V = [](IValue t) { return std::move(t).toTensor().item(); }; auto run_binary = [&](const std::string & name, int64_t a, int64_t b) { return V(run(name, {L(a), L(b)})[0]); }; diff --git a/torch/csrc/tensor/python_tensor.cpp b/torch/csrc/tensor/python_tensor.cpp index 4a40cf243f3a6..1b85b1810b660 100644 --- a/torch/csrc/tensor/python_tensor.cpp +++ b/torch/csrc/tensor/python_tensor.cpp @@ -3,7 +3,6 @@ #include #include -#include "torch/csrc/torch.h" #include "torch/csrc/Dtype.h" #include "torch/csrc/DynamicTypes.h" #include "torch/csrc/Exceptions.h" @@ -17,6 +16,7 @@ #include "torch/csrc/utils/python_strings.h" #include "torch/csrc/utils/tensor_new.h" #include "torch/csrc/utils/tensor_types.h" +#include "torch/csrc/variable_tensor_functions.h" #include diff --git a/torch/csrc/tensor/python_tensor.h b/torch/csrc/tensor/python_tensor.h index 64ebbef786052..a8c282dd1e96a 100644 --- a/torch/csrc/tensor/python_tensor.h +++ b/torch/csrc/tensor/python_tensor.h @@ -5,7 +5,7 @@ namespace at { struct Type; struct Device; -struct Tensor; +class Tensor; } // namespace at namespace torch { namespace tensors { diff --git a/torch/csrc/torch.cpp b/torch/csrc/torch.cpp index d3f79cd49dbdc..656cae7f7e154 100644 --- a/torch/csrc/torch.cpp +++ b/torch/csrc/torch.cpp @@ -3,15 +3,15 @@ #include namespace torch { -at::Type& getVariableType(at::Backend backend, at::ScalarType type) { +at::TypeExtendedInterface& getVariableType(at::Backend backend, at::ScalarType type) { return *autograd::VariableType::getVariableTypeFromBaseType(at::getNonVariableType(backend, type)); } -at::Type& CPU(at::ScalarType type) { +at::TypeExtendedInterface& CPU(at::ScalarType type) { return torch::getVariableType(at::Backend::CPU, type); } -at::Type& CUDA(at::ScalarType type) { +at::TypeExtendedInterface& CUDA(at::ScalarType type) { return torch::getVariableType(at::Backend::CUDA, type); } diff --git a/torch/csrc/utils/pybind.h b/torch/csrc/utils/pybind.h index 2188681906d53..85fc443bb40b7 100644 --- a/torch/csrc/utils/pybind.h +++ b/torch/csrc/utils/pybind.h @@ -72,7 +72,7 @@ template<> struct type_caster { for (int idx = 0; idx < size; idx++) { PyObject* obj = tuple ? PyTuple_GET_ITEM(source, idx) : PyList_GET_ITEM(source, idx); if (THPVariable_Check(obj)) { - v_value[idx] = THPVariable_Unpack(obj).toCLong(); + v_value[idx] = THPVariable_Unpack(obj).item(); } else if (PyLong_Check(obj)) { // use THPUtils_unpackLong after it is safe to include python_numbers.h v_value[idx] = THPUtils_unpackLong(obj); diff --git a/torch/csrc/utils/python_arg_parser.h b/torch/csrc/utils/python_arg_parser.h index 9ff25d2d4e513..d4c15fd9482f0 100644 --- a/torch/csrc/utils/python_arg_parser.h +++ b/torch/csrc/utils/python_arg_parser.h @@ -300,7 +300,7 @@ inline std::vector PythonArgs::intlistWithDefault(int i, std::vector(); continue; } else { res[idx] = THPUtils_unpackIndex(obj); diff --git a/torch/csrc/utils/tensor_new.cpp b/torch/csrc/utils/tensor_new.cpp index 73b4adbf45a45..4c6a2855ea26c 100644 --- a/torch/csrc/utils/tensor_new.cpp +++ b/torch/csrc/utils/tensor_new.cpp @@ -280,12 +280,12 @@ Tensor legacy_sparse_tensor_ctor(const Type& type, PyObject* args, PyObject* kwa auto deviceOptional = r.deviceOptional(2); check_legacy_ctor_device(type, deviceOptional); at::DeviceGuard device_guard(deviceOptional); - return type.sparse_coo_tensor(r.tensor(0), r.tensor(1)); + return at::sparse_coo_tensor(r.tensor(0), r.tensor(1), type.options()); } else if (r.idx == 3) { auto deviceOptional = r.deviceOptional(3); check_legacy_ctor_device(type, deviceOptional); at::DeviceGuard device_guard(deviceOptional); - return type.sparse_coo_tensor(r.tensor(0), r.tensor(1), r.intlist(2)); + return at::sparse_coo_tensor(r.tensor(0), r.tensor(1), r.intlist(2), type.options()); } else if (r.idx == 4) { PyObject* arg = r.pyobject(0); auto deviceOptional = r.deviceOptional(1); @@ -314,7 +314,7 @@ Tensor legacy_sparse_tensor_new(const Type& type, PyObject* args, PyObject* kwar auto deviceOptional = r.deviceOptional(0); check_legacy_ctor_device(type, deviceOptional); at::DeviceGuard device_guard(deviceOptional); - return type.tensor(); + return at::empty({0}, type.options()); } else if (r.idx == 1) { auto cdata = reinterpret_cast(r.toInt64(0)); return type.unsafeTensorFromTH(cdata, true); @@ -324,14 +324,14 @@ Tensor legacy_sparse_tensor_new(const Type& type, PyObject* args, PyObject* kwar auto deviceOptional = r.deviceOptional(2); check_legacy_ctor_device(type, deviceOptional); at::DeviceGuard device_guard(deviceOptional); - return type.sparse_coo_tensor(r.tensor(0), r.tensor(1)); + return at::sparse_coo_tensor(r.tensor(0), r.tensor(1), type.options()); } else if (r.idx == 3) { // Note: this signature doesn't have a dtype, even though it has a device; it probably shouldn't // have a device (we should infer it). auto deviceOptional = r.deviceOptional(3); check_legacy_ctor_device(type, deviceOptional); at::DeviceGuard device_guard(deviceOptional); - return type.sparse_coo_tensor(r.tensor(0), r.tensor(1), r.intlist(2)); + return at::sparse_coo_tensor(r.tensor(0), r.tensor(1), r.intlist(2), type.options()); } else if (r.idx == 4) { PyObject* arg = r.pyobject(0); auto deviceOptional = r.deviceOptional(1); @@ -374,7 +374,7 @@ Tensor legacy_tensor_ctor(const Type& type, PyObject* args, PyObject* kwargs) { auto deviceOptional = r.deviceOptional(0); check_legacy_ctor_device(type, deviceOptional); at::DeviceGuard device_guard(deviceOptional); - return type.tensor(); + return at::empty({0}, type.options()); } else if (r.idx == 1) { return new_with_storage(type, r.storage(0)); } else if (r.idx == 2) { @@ -420,7 +420,7 @@ Tensor legacy_tensor_new(const Type& type, PyObject* args, PyObject* kwargs) { auto deviceOptional = r.deviceOptional(0); check_legacy_ctor_device(type, deviceOptional); at::DeviceGuard device_guard(deviceOptional); - return type.tensor(); + return at::empty({0}, type.options()); } else if (r.idx == 1) { return new_with_storage(type, r.storage(0)); } else if (r.idx == 2) { @@ -472,7 +472,7 @@ Tensor sparse_coo_tensor_ctor(const Type& type, PyObject* args, PyObject* kwargs const auto& index_type = values.type().toScalarType(kLong); Tensor indices = internal_new_from_data(index_type, r.deviceOptional(3), r.pyobject(0), false, true, false); const auto& sparse_type_to_use = values.type().toBackend(values.type().is_cuda() ? Backend::SparseCUDA : Backend::SparseCPU); - return sparse_type_to_use.sparse_coo_tensor(indices, values).set_requires_grad(r.toBool(4)); + return at::sparse_coo_tensor(indices, values, sparse_type_to_use.options()).set_requires_grad(r.toBool(4)); } else if (r.idx == 1) { bool type_inference = r.isNone(3); const auto& sparse_type = typeWithDefault(r, 3, 4, default_sparse_type); @@ -482,11 +482,11 @@ Tensor sparse_coo_tensor_ctor(const Type& type, PyObject* args, PyObject* kwargs const auto& index_type = values.type().toScalarType(kLong); Tensor indices = internal_new_from_data(index_type, r.deviceOptional(4), r.pyobject(0), false, true, false); const auto& sparse_type_to_use = values.type().toBackend(values.type().is_cuda() ? Backend::SparseCUDA : Backend::SparseCPU); - return sparse_type_to_use.sparse_coo_tensor(indices, values, r.intlist(2)).set_requires_grad(r.toBool(5)); + return at::sparse_coo_tensor(indices, values, r.intlist(2), sparse_type_to_use.options()).set_requires_grad(r.toBool(5)); } else if (r.idx == 2) { const auto& sparse_type_to_use = typeWithDefault(r, 1, 2, default_sparse_type); at::DeviceGuard device_guard(r.device(2)); - return sparse_type_to_use.sparse_coo_tensor(r.intlist(0)).set_requires_grad(r.toBool(3)); + return at::sparse_coo_tensor(r.intlist(0), sparse_type_to_use.options()).set_requires_grad(r.toBool(3)); } throw std::runtime_error("sparse_coo_tensor(): invalid arguments"); } diff --git a/torch/csrc/variable_tensor_functions.h b/torch/csrc/variable_tensor_functions.h index 692fe60aaeeab..e18794a970fe9 100644 --- a/torch/csrc/variable_tensor_functions.h +++ b/torch/csrc/variable_tensor_functions.h @@ -13,20 +13,20 @@ namespace torch { // when we create new tensors. We also provide a few accessors like requires_grad // that make it easier to get to varible information when we have a at::Tensor -/// Returns a `Type` object for the given backend (e.g. `at::kCPU`) and +/// Returns a `TypeExtendedInterface` object for the given backend (e.g. `at::kCPU`) and /// `ScalarType` (e.g. `at::kDouble`). /// TODO: Eliminate this function as much as possible -THP_CLASS at::Type& getVariableType(at::Backend backend, at::ScalarType type); +THP_CLASS at::TypeExtendedInterface& getVariableType(at::Backend backend, at::ScalarType type); -/// Returns a `Type` object for the CPU backend and the given `ScalarType` +/// Returns a `TypeExtendedInterface` object for the CPU backend and the given `ScalarType` /// (e.g. `at::kDouble`). Equivalent to `getVariableType(kCPU, type)`. /// TODO: Eliminate this function as much as possible -THP_CLASS at::Type& CPU(at::ScalarType type); +THP_CLASS at::TypeExtendedInterface& CPU(at::ScalarType type); -/// Returns a `Type` object for the CUDA backend and the given `ScalarType` +/// Returns a `TypeExtendedInterface` object for the CUDA backend and the given `ScalarType` /// (e.g. `at::kDouble`). Equivalent to `getVariableType(kCUDA, type)`. /// TODO: Eliminate this function as much as possible -THP_CLASS at::Type& CUDA(at::ScalarType type); +THP_CLASS at::TypeExtendedInterface& CUDA(at::ScalarType type); /// Sets the `requires_grad` property of the given `Tensor`. THP_CLASS void set_requires_grad(at::Tensor& tensor, bool requires_grad) noexcept; diff --git a/torch/distributed/distributed_c10d.py b/torch/distributed/distributed_c10d.py index 0568e4261f448..0da3f31b22b13 100644 --- a/torch/distributed/distributed_c10d.py +++ b/torch/distributed/distributed_c10d.py @@ -9,19 +9,19 @@ from . import ProcessGroupGloo -_MPI_AVAILBLE = True -_NCCL_AVAILBLE = True +_MPI_AVAILABLE = True +_NCCL_AVAILABLE = True try: from. import ProcessGroupMPI except ImportError: - _MPI_AVAILBLE = False + _MPI_AVAILABLE = False try: from. import ProcessGroupNCCL except ImportError: - _NCCL_AVAILBLE = False + _NCCL_AVAILABLE = False class DistBackend(object): @@ -166,7 +166,7 @@ def is_mpi_available(): Checks if MPI is available """ - return _MPI_AVAILBLE + return _MPI_AVAILABLE def is_nccl_available(): @@ -174,7 +174,7 @@ def is_nccl_available(): Checks if NCCL is available """ - return _NCCL_AVAILBLE + return _NCCL_AVAILABLE def is_initialized(): diff --git a/torch/distributions/gumbel.py b/torch/distributions/gumbel.py index b489c8754aa44..7b9deaa8c1a8a 100644 --- a/torch/distributions/gumbel.py +++ b/torch/distributions/gumbel.py @@ -33,7 +33,8 @@ def __init__(self, loc, scale, validate_args=None): if isinstance(loc, Number) and isinstance(scale, Number): base_dist = Uniform(finfo.tiny, 1 - finfo.eps) else: - base_dist = Uniform(self.loc.new(self.loc.size()).fill_(finfo.tiny), 1 - finfo.eps) + base_dist = Uniform(torch.full_like(self.loc, finfo.tiny), + torch.full_like(self.loc, 1 - finfo.eps)) transforms = [ExpTransform().inv, AffineTransform(loc=0, scale=-torch.ones_like(self.scale)), ExpTransform().inv, AffineTransform(loc=loc, scale=-self.scale)] super(Gumbel, self).__init__(base_dist, transforms, validate_args=validate_args) diff --git a/torch/distributions/transforms.py b/torch/distributions/transforms.py index a90cceefa5d6c..00a52164f9780 100644 --- a/torch/distributions/transforms.py +++ b/torch/distributions/transforms.py @@ -534,8 +534,8 @@ def _inverse_on_event(self, y): def _call(self, x): flat_x = x.contiguous().view((-1,) + x.shape[-2:]) - return torch.stack([self._call_on_event(z) for z in flat_x]).view(x.shape) + return torch.stack([self._call_on_event(flat_x[i]) for i in range(flat_x.size(0))]).view(x.shape) def _inverse(self, y): flat_y = y.contiguous().view((-1,) + y.shape[-2:]) - return torch.stack([self._inverse_on_event(z) for z in flat_y]).view(y.shape) + return torch.stack([self._inverse_on_event(flat_y[i]) for i in range(flat_y.size(0))]).view(y.shape) diff --git a/torch/distributions/weibull.py b/torch/distributions/weibull.py index 8b5afee400b78..97a50fdd6e4af 100644 --- a/torch/distributions/weibull.py +++ b/torch/distributions/weibull.py @@ -27,7 +27,7 @@ class Weibull(TransformedDistribution): def __init__(self, scale, concentration, validate_args=None): self.scale, self.concentration = broadcast_all(scale, concentration) self.concentration_reciprocal = self.concentration.reciprocal() - base_dist = Exponential(self.scale.new(self.scale.size()).fill_(1.0)) + base_dist = Exponential(torch.ones_like(self.scale)) transforms = [PowerTransform(exponent=self.concentration_reciprocal), AffineTransform(loc=0, scale=self.scale)] super(Weibull, self).__init__(base_dist, diff --git a/torch/csrc/torch.h b/torch/extension.h similarity index 79% rename from torch/csrc/torch.h rename to torch/extension.h index 5761b8ef57f64..828aefd572ae7 100644 --- a/torch/csrc/torch.h +++ b/torch/extension.h @@ -2,6 +2,5 @@ #include -#include #include #include diff --git a/torch/lib/THD/base/data_channels/DataChannelMPI.cpp b/torch/lib/THD/base/data_channels/DataChannelMPI.cpp index b23157581bdfc..e551da81d1356 100644 --- a/torch/lib/THD/base/data_channels/DataChannelMPI.cpp +++ b/torch/lib/THD/base/data_channels/DataChannelMPI.cpp @@ -153,7 +153,7 @@ at::Tensor DataChannelMPI::_newLikeFlat(std::vector& tensors) const at::DeviceGuard gpu_guard(t.is_cuda() ? t.get_device() : -1); std::vector sizes { static_cast(tensors.size()) }; // sizes = [output.size()] + input.sizes() sizes.insert(sizes.end(), t.sizes().begin(), t.sizes().end()); - return t.type().tensor(sizes); + return at::empty(sizes, t.options()); } diff --git a/torch/lib/c10d/ProcessGroupGloo.cpp b/torch/lib/c10d/ProcessGroupGloo.cpp index e56d996a36ba3..a521c36eacf88 100644 --- a/torch/lib/c10d/ProcessGroupGloo.cpp +++ b/torch/lib/c10d/ProcessGroupGloo.cpp @@ -518,7 +518,7 @@ EntryType ProcessGroupGloo::construct(const AlgorithmKey& key) { } deviceGuard.set_index(-1); #endif - entry->src[i] = key.type->tensor(srcSizes[i]); + entry->src[i] = at::empty(srcSizes[i], key.type->options()); } #ifdef USE_CUDA diff --git a/torch/lib/c10d/ProcessGroupMPI.cpp b/torch/lib/c10d/ProcessGroupMPI.cpp index 63846b443ea07..033d5d24cb26d 100644 --- a/torch/lib/c10d/ProcessGroupMPI.cpp +++ b/torch/lib/c10d/ProcessGroupMPI.cpp @@ -98,7 +98,7 @@ bool ProcessGroupMPI::WorkMPI::isCompleted() { } bool ProcessGroupMPI::WorkMPI::isSuccess() const { - return !workException_; + return !exception_; } void ProcessGroupMPI::WorkMPI::synchronize() {} @@ -124,14 +124,14 @@ void ProcessGroupMPI::WorkMPI::finishWithException( { std::unique_lock lock(workMutex_); completed_ = true; - workException_ = caughtWorkException; + exception_ = caughtWorkException; } workCV_.notify_all(); } const std::exception& ProcessGroupMPI::WorkMPI::exception() const { try { - std::rethrow_exception(workException_); + std::rethrow_exception(exception_); } catch (const std::exception& e) { return e; } @@ -169,6 +169,11 @@ bool ProcessGroupMPI::AsyncWork::isCompleted() { *srcRank_ = status_.MPI_SOURCE; } + // Populate exception if request was not successful + if (status_.MPI_ERROR != MPI_SUCCESS) { + populateException(); + } + return true; } @@ -194,19 +199,30 @@ bool ProcessGroupMPI::AsyncWork::wait() { *srcRank_ = status_.MPI_SOURCE; } - return status_.MPI_ERROR == MPI_SUCCESS; + auto ok = (status_.MPI_ERROR == MPI_SUCCESS); + + // Populate exception if request was not successful + if (!ok) { + populateException(); + } + + return ok; } const std::exception& ProcessGroupMPI::AsyncWork::exception() const { - if (request_ != MPI_REQUEST_NULL) { - throw std::runtime_error( - "Invalid call to AsyncWork::exception before work has completed"); + try { + std::rethrow_exception(exception_); + } catch (const std::exception& e) { + return e; } +} +void ProcessGroupMPI::AsyncWork::populateException() { std::array buf; int len = buf.size(); MPI_CHECK(MPI_Error_string(status_.MPI_ERROR, buf.data(), &len)); - return std::runtime_error(std::string(buf.data(), len)); + exception_ = + std::make_exception_ptr(std::runtime_error(std::string(buf.data(), len))); } // Static global states diff --git a/torch/lib/c10d/ProcessGroupMPI.hpp b/torch/lib/c10d/ProcessGroupMPI.hpp index 5bd2b303c1a4e..8d3018be90325 100644 --- a/torch/lib/c10d/ProcessGroupMPI.hpp +++ b/torch/lib/c10d/ProcessGroupMPI.hpp @@ -101,8 +101,7 @@ class ProcessGroupMPI : public ProcessGroup { std::mutex workMutex_; std::condition_variable workCV_; std::atomic completed_; - - std::exception_ptr workException_; + std::exception_ptr exception_; friend class ProcessGroupMPI; }; @@ -123,10 +122,13 @@ class ProcessGroupMPI : public ProcessGroup { const std::exception& exception() const override; protected: + void populateException(); + at::Tensor tensor_; MPI_Request request_; int* const srcRank_; MPI_Status status_; + std::exception_ptr exception_; }; // Constructor will spawn up the worker thread loop diff --git a/torch/lib/c10d/Utils.hpp b/torch/lib/c10d/Utils.hpp index 84032a0e3945f..29b7f3665d3ec 100644 --- a/torch/lib/c10d/Utils.hpp +++ b/torch/lib/c10d/Utils.hpp @@ -79,7 +79,7 @@ inline at::Tensor newLikeFlat( at::DeviceGuard gpuGuard(device); std::vector sizes{static_cast(tensors[deviceIdx].size())}; sizes.insert(sizes.end(), t.sizes().begin(), t.sizes().end()); - return t.type().tensor(sizes); + return at::empty(sizes, t.options()); } inline at::Tensor newLikeFlat(std::vector& tensors) { @@ -90,7 +90,7 @@ inline at::Tensor newLikeFlat(std::vector& tensors) { at::DeviceGuard gpuGuard(t.is_cuda() ? t.get_device() : -1); std::vector sizes{static_cast(tensors.size())}; sizes.insert(sizes.end(), t.sizes().begin(), t.sizes().end()); - return t.type().tensor(sizes); + return at::empty(sizes, t.options()); } inline std::vector> getSizes( diff --git a/torch/utils/cpp_extension.py b/torch/utils/cpp_extension.py index 50d3f74b2a2c6..4b1c4cbc32bc0 100644 --- a/torch/utils/cpp_extension.py +++ b/torch/utils/cpp_extension.py @@ -170,6 +170,7 @@ class BuildExtension(build_ext): def build_extensions(self): self._check_abi() for extension in self.extensions: + self._add_compile_flag(extension, '-DTORCH_API_INCLUDE_EXTENSION_H') self._define_torch_extension_name(extension) self._add_gnu_abi_flag_if_binary(extension) @@ -290,6 +291,13 @@ def _check_abi(self): compiler = os.environ.get('CXX', 'c++') check_compiler_abi_compatibility(compiler) + def _add_compile_flag(self, extension, flag): + if isinstance(extension.extra_compile_args, dict): + for args in extension.extra_compile_args.values(): + args.append(flag) + else: + extension.extra_compile_args.append(flag) + def _define_torch_extension_name(self, extension): # pybind11 doesn't support dots in the names # so in order to support extensions in the packages @@ -298,11 +306,7 @@ def _define_torch_extension_name(self, extension): names = extension.name.split('.') name = names[-1] define = '-DTORCH_EXTENSION_NAME={}'.format(name) - if isinstance(extension.extra_compile_args, dict): - for args in extension.extra_compile_args.values(): - args.append(define) - else: - extension.extra_compile_args.append(define) + self._add_compile_flag(extension, define) def _add_gnu_abi_flag_if_binary(self, extension): # If the version string looks like a binary build, @@ -310,14 +314,9 @@ def _add_gnu_abi_flag_if_binary(self, extension): # if the extension is compiled with gcc >= 5.1, # then we have to define _GLIBCXX_USE_CXX11_ABI=0 # so that the std::string in the API is resolved to - # non-C++11 symbols. - define = '-D_GLIBCXX_USE_CXX11_ABI=0' + # non-C++11 symbols if _is_binary_build(): - if isinstance(extension.extra_compile_args, dict): - for args in extension.extra_compile_args.values(): - args.append(define) - else: - extension.extra_compile_args.append(define) + self._add_compile_flag(extension, '-D_GLIBCXX_USE_CXX11_ABI=0') def CppExtension(name, sources, *args, **kwargs): @@ -427,10 +426,12 @@ def include_paths(cuda=False): here = os.path.abspath(__file__) torch_path = os.path.dirname(os.path.dirname(here)) lib_include = os.path.join(torch_path, 'lib', 'include') - # Some internal (old) Torch headers don't properly prefix their includes, - # so we need to pass -Itorch/lib/include/TH as well. paths = [ lib_include, + # Remove this once torch/torch.h is officially no longer supported for C++ extensions. + os.path.join(lib_include, 'torch', 'csrc', 'api', 'include'), + # Some internal (old) Torch headers don't properly prefix their includes, + # so we need to pass -Itorch/lib/include/TH as well. os.path.join(lib_include, 'TH'), os.path.join(lib_include, 'THC') ] @@ -580,7 +581,7 @@ def load_inline(name, the necessary header includes, as well as the (pybind11) binding code. More precisely, strings passed to ``cpp_sources`` are first concatenated into a single ``.cpp`` file. This file is then prepended with ``#include - ``. + ``. Furthermore, if the ``functions`` argument is supplied, bindings will be automatically generated for each function specified. ``functions`` can @@ -630,7 +631,7 @@ def load_inline(name, if isinstance(cuda_sources, str): cuda_sources = [cuda_sources] - cpp_sources.insert(0, '#include ') + cpp_sources.insert(0, '#include ') # If `functions` is supplied, we create the pybind11 bindings for the user. # Here, `functions` is (or becomes, after some processing) a map from @@ -854,7 +855,9 @@ def _build_extension_module(name, build_directory, verbose): # Python 2 and 3 compatible way of getting the error object. _, error, _ = sys.exc_info() # error.output contains the stdout and stderr of the build attempt. - message = "Error building extension '{}': {}".format(name, error.output.decode()) + message = "Error building extension '{}'".format(name) + if hasattr(error, 'output') and error.output: + message += ": {}".format(error.output.decode()) raise_from(RuntimeError(message), None) @@ -890,7 +893,7 @@ def _write_ninja_file(path, sources = [os.path.abspath(file) for file in sources] user_includes = [os.path.abspath(file) for file in extra_include_paths] - # include_paths() gives us the location of torch/torch.h + # include_paths() gives us the location of torch/extension.h system_includes = include_paths(with_cuda) # sysconfig.get_paths()['include'] gives us the location of Python.h system_includes.append(sysconfig.get_paths()['include']) @@ -901,6 +904,7 @@ def _write_ninja_file(path, system_includes.clear() common_cflags = ['-DTORCH_EXTENSION_NAME={}'.format(name)] + common_cflags.append('-DTORCH_API_INCLUDE_EXTENSION_H') common_cflags += ['-I{}'.format(include) for include in user_includes] common_cflags += ['-isystem {}'.format(include) for include in system_includes]