diff --git a/aten/src/ATen/Backend.h b/aten/src/ATen/Backend.h index ccb96feeed238a..40db1ee67f2476 100644 --- a/aten/src/ATen/Backend.h +++ b/aten/src/ATen/Backend.h @@ -1,4 +1,9 @@ #pragma once + +#include +#include +#include + #include namespace at { @@ -40,6 +45,39 @@ static inline Backend toDense(Backend b) { } } +static inline Backend tensorTypeIdToBackend(TensorTypeId t) { + if (t == CPUTensorId()) { + return Backend::CPU; + } else if (t == CUDATensorId()) { + return Backend::CUDA; + } else if (t == SparseCPUTensorId()) { + return Backend::SparseCPU; + } else if (t == SparseCUDATensorId()) { + return Backend::SparseCUDA; + } else if (t == UndefinedTensorId()) { + return Backend::Undefined; + } else { + AT_ERROR("Unrecognized tensor type ID: ", t); + } +} + +static inline TensorTypeId backendToTensorTypeId(Backend b) { + switch (b) { + case Backend::CPU: + return CPUTensorId(); + case Backend::CUDA: + return CUDATensorId(); + case Backend::SparseCPU: + return SparseCPUTensorId(); + case Backend::SparseCUDA: + return SparseCUDATensorId(); + case Backend::Undefined: + return UndefinedTensorId(); + default: + throw std::runtime_error("Unknown backend"); + } +} + static inline const char* toString(Backend b) { switch (b) { case Backend::CPU: diff --git a/aten/src/ATen/SparseTensorImpl.cpp b/aten/src/ATen/SparseTensorImpl.cpp index 03a5a6008e7d24..f4146153f798fe 100644 --- a/aten/src/ATen/SparseTensorImpl.cpp +++ b/aten/src/ATen/SparseTensorImpl.cpp @@ -3,6 +3,18 @@ namespace at { +namespace { + Backend sparseTensorIdToDenseBackend(TensorTypeId type_id) { + if (type_id == SparseCPUTensorId()) { + return Backend::CPU; + } else if (type_id == SparseCUDATensorId()) { + return Backend::CUDA; + } else { + AT_ERROR("Cannot construct SparseTensor with non-sparse tensor type ID ", type_id); + } + } +} + // An empty dense tensor defaults to a 1-dimensional tensor of size [0] // (recall, it is not a 0-dimensional tensor, because such a tensor would @@ -18,15 +30,13 @@ namespace at { // tensor and a [0] size values tensor for such an empty tensor. However, // we don't currently support zero-size dimensions, so we can't actually // do this; so we just allocate zero-size tensors for everything. -SparseTensorImpl::SparseTensorImpl(at::Backend backend, at::ScalarType scalar_type) - : TensorImpl(backend, scalar_type, nullptr, false) +SparseTensorImpl::SparseTensorImpl(at::TensorTypeId type_id, at::ScalarType scalar_type) + : TensorImpl(type_id, scalar_type, nullptr, false) , size_{0} , sparseDims_(1) , denseDims_(0) - , indices_(globalContext().getTypeOpt(toDense(backend), ScalarType::Long)->tensor()) - , values_(globalContext().getTypeOpt(toDense(backend), scalar_type)->tensor()) { - AT_ASSERT(backend == Backend::SparseCPU || backend == Backend::SparseCUDA); - } + , indices_(globalContext().getTypeOpt(sparseTensorIdToDenseBackend(type_id), ScalarType::Long)->tensor()) + , values_(globalContext().getTypeOpt(sparseTensorIdToDenseBackend(type_id), scalar_type)->tensor()) {} IntList SparseTensorImpl::sizes() const { return size_; diff --git a/aten/src/ATen/SparseTensorImpl.h b/aten/src/ATen/SparseTensorImpl.h index 8c44200cee31bf..3a1fa66a4ca3aa 100644 --- a/aten/src/ATen/SparseTensorImpl.h +++ b/aten/src/ATen/SparseTensorImpl.h @@ -48,7 +48,7 @@ struct AT_API SparseTensorImpl : public TensorImpl { public: // Public for now... - explicit SparseTensorImpl(at::Backend, at::ScalarType); + explicit SparseTensorImpl(at::TensorTypeId, at::ScalarType); int64_t nnz() const { return nnz_; } int64_t sparseDims() const { return sparseDims_; } diff --git a/aten/src/ATen/TensorImpl.cpp b/aten/src/ATen/TensorImpl.cpp index 77c99c4be9f27f..e5f9bf98fb3a67 100644 --- a/aten/src/ATen/TensorImpl.cpp +++ b/aten/src/ATen/TensorImpl.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -12,7 +13,10 @@ namespace at { Type& TensorImpl::type() const { - Type* base_type = &globalContext().getType(backend_, scalar_type_); + // Select backend from the hard-coded ones that the legacy ATen dispatcher + // knows about + Backend backend = tensorTypeIdToBackend(type_id_); + Type* base_type = &globalContext().getType(backend, scalar_type_); if (is_variable_) { return detail::getVariableHooks().getVariableType(*base_type); } else { @@ -55,10 +59,9 @@ void Tensor::backward( pImpl->backward(std::move(gradient), keep_graph, create_graph); } -TensorImpl::TensorImpl(Backend backend, ScalarType scalar_type) { - backend_ = backend; - scalar_type_ = scalar_type; - auto type = &globalContext().getType(backend, scalar_type); +TensorImpl::TensorImpl(TensorTypeId type_id, ScalarType scalar_type) + : type_id_(type_id), scalar_type_(scalar_type) { + auto type = &globalContext().getType(tensorTypeIdToBackend(type_id), scalar_type); Storage* storage = type->storage(true).release(); StorageImpl* storage_impl = storage->pImpl(); tensor = new THTensor(storage_impl); diff --git a/aten/src/ATen/TensorImpl.h b/aten/src/ATen/TensorImpl.h index fb6de9b6592e7c..c9b701c02f324d 100644 --- a/aten/src/ATen/TensorImpl.h +++ b/aten/src/ATen/TensorImpl.h @@ -6,6 +6,8 @@ #include "ATen/Retainable.h" #include "ATen/ScalarType.h" #include "ATen/core/optional.h" +#include "ATen/core/TensorTypeId.h" +#include "ATen/core/TensorTypeIdRegistration.h" struct THTensor; @@ -18,9 +20,9 @@ struct Tensor; namespace at { struct AT_API TensorImpl : public Retainable { - explicit TensorImpl(Backend backend, ScalarType scalar_type, THTensor * tensor, bool is_variable) - : backend_(backend), scalar_type_(scalar_type), is_variable_(is_variable), tensor(tensor) {} - TensorImpl(Backend backend, ScalarType scalar_type); + explicit TensorImpl(TensorTypeId type_id, ScalarType scalar_type, THTensor * tensor, bool is_variable) + : type_id_(type_id), scalar_type_(scalar_type), is_variable_(is_variable), tensor(tensor) {} + TensorImpl(TensorTypeId type_id, ScalarType scalar_type); virtual ~TensorImpl(); @@ -94,7 +96,7 @@ struct AT_API TensorImpl : public Retainable { virtual void set_data(Tensor new_data); protected: - Backend backend_; + TensorTypeId type_id_; // INVARIANT: When storage is non-null, this scalar type must // agree with the scalar type in storage ScalarType scalar_type_; diff --git a/aten/src/ATen/UndefinedTensor.cpp b/aten/src/ATen/UndefinedTensor.cpp index 8a818538be4d4c..0390b412d9c2e6 100644 --- a/aten/src/ATen/UndefinedTensor.cpp +++ b/aten/src/ATen/UndefinedTensor.cpp @@ -6,7 +6,7 @@ namespace at { // should this use the globalContext? Can it get a context passed in somehow? UndefinedTensor::UndefinedTensor() -: TensorImpl(Backend::Undefined, ScalarType::Undefined, nullptr, /* is variable */ false) { +: TensorImpl(UndefinedTensorId(), ScalarType::Undefined, nullptr, /* is variable */ false) { } IntList UndefinedTensor::sizes() const { diff --git a/aten/src/ATen/UndefinedType.cpp b/aten/src/ATen/UndefinedType.cpp index a18bbc1e0e85a9..63e9098ede528c 100644 --- a/aten/src/ATen/UndefinedType.cpp +++ b/aten/src/ATen/UndefinedType.cpp @@ -4,7 +4,7 @@ namespace at { UndefinedType::UndefinedType(Context* context) - : Type(context, /*is_variable=*/false, /*is_undefined=*/true) {} + : Type(context, UndefinedTensorId(), /*is_variable=*/false, /*is_undefined=*/true) {} ScalarType UndefinedType::scalarType() const { return ScalarType::Undefined; } diff --git a/aten/src/ATen/core/DeviceType.h b/aten/src/ATen/core/DeviceType.h index 2d2a090fddf41d..5614d247af7ae5 100644 --- a/aten/src/ATen/core/DeviceType.h +++ b/aten/src/ATen/core/DeviceType.h @@ -1,3 +1,5 @@ +#pragma once + // This is directly synchronized with caffe2/proto/caffe2.proto, but // doesn't require me to figure out how to get Protobuf headers into // ATen/core (which would require a lot more build system hacking.) diff --git a/aten/src/ATen/core/IdWrapper.h b/aten/src/ATen/core/IdWrapper.h index 7d152269d9a8c2..58632ce111db57 100644 --- a/aten/src/ATen/core/IdWrapper.h +++ b/aten/src/ATen/core/IdWrapper.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace at { @@ -21,7 +22,7 @@ namespace at { * for you, given the underlying type supports it. */ template -class IdWrapper { +class AT_CORE_API IdWrapper { public: using underlying_type = UnderlyingType; using concrete_type = ConcreteType; diff --git a/aten/src/ATen/core/TensorTypeId.cpp b/aten/src/ATen/core/TensorTypeId.cpp index 605d303ad62ee3..a07c8326edb531 100644 --- a/aten/src/ATen/core/TensorTypeId.cpp +++ b/aten/src/ATen/core/TensorTypeId.cpp @@ -1,5 +1,9 @@ #include "ATen/core/TensorTypeId.h" +namespace at { + std::ostream& operator<<(std::ostream& str, at::TensorTypeId rhs) { return str << rhs.underlyingId(); } + +} // namespace at diff --git a/aten/src/ATen/core/TensorTypeId.h b/aten/src/ATen/core/TensorTypeId.h index 5fc411137e08b4..d01437bbe9197b 100644 --- a/aten/src/ATen/core/TensorTypeId.h +++ b/aten/src/ATen/core/TensorTypeId.h @@ -5,12 +5,7 @@ #include #include #include "ATen/core/IdWrapper.h" - -namespace at { -class TensorTypeId; -} - -std::ostream& operator<<(std::ostream&, at::TensorTypeId); +#include "ATen/core/Macros.h" namespace at { @@ -22,7 +17,7 @@ using _tensorTypeId_underlyingType = uint8_t; * Dynamic type ID of a Tensor argument. It represents something like * CPUTensor, etc. */ -class TensorTypeId final +class AT_CORE_API TensorTypeId final : public at:: IdWrapper { public: @@ -37,9 +32,11 @@ class TensorTypeId final : IdWrapper(id) {} friend class TensorTypeIdCreator; - friend std::ostream& ::operator<<(std::ostream&, TensorTypeId); + friend AT_CORE_API std::ostream& operator<<(std::ostream&, TensorTypeId); }; +AT_CORE_API std::ostream& operator<<(std::ostream&, at::TensorTypeId); + } // namespace at AT_DEFINE_HASH_FOR_IDWRAPPER(at::TensorTypeId) diff --git a/aten/src/ATen/core/TensorTypeIdRegistration.cpp b/aten/src/ATen/core/TensorTypeIdRegistration.cpp index af0b992e51c6ff..17e2b2e7cd8baf 100644 --- a/aten/src/ATen/core/TensorTypeIdRegistration.cpp +++ b/aten/src/ATen/core/TensorTypeIdRegistration.cpp @@ -4,8 +4,6 @@ namespace at { -constexpr at::TensorTypeId TensorTypeIdCreator::max_id_; - TensorTypeIds::TensorTypeIds() : creator_(), registry_() {} TensorTypeIds& TensorTypeIds::singleton() { @@ -16,9 +14,10 @@ TensorTypeIds& TensorTypeIds::singleton() { TensorTypeIdCreator::TensorTypeIdCreator() : last_id_(0) {} at::TensorTypeId TensorTypeIdCreator::create() { + auto id = TensorTypeId(++last_id_); - if (id == max_id_) { + if (last_id_ == 0) { // overflow happened! // If this happens in prod, we have to change // details::_tensorTypeId_underlyingType to uint16_t. AT_ERROR( @@ -59,4 +58,10 @@ TensorTypeIdRegistrar::~TensorTypeIdRegistrar() { TensorTypeIds::singleton().deregister(id_); } +AT_DEFINE_TENSOR_TYPE(UndefinedTensorId); +AT_DEFINE_TENSOR_TYPE(CPUTensorId); +AT_DEFINE_TENSOR_TYPE(CUDATensorId); +AT_DEFINE_TENSOR_TYPE(SparseCPUTensorId); +AT_DEFINE_TENSOR_TYPE(SparseCUDATensorId); + } // namespace at diff --git a/aten/src/ATen/core/TensorTypeIdRegistration.h b/aten/src/ATen/core/TensorTypeIdRegistration.h index 0286115fdc66ac..a7b30932cebe85 100644 --- a/aten/src/ATen/core/TensorTypeIdRegistration.h +++ b/aten/src/ATen/core/TensorTypeIdRegistration.h @@ -16,7 +16,7 @@ namespace at { -class TensorTypeIdCreator final { +class AT_CORE_API TensorTypeIdCreator final { public: TensorTypeIdCreator(); @@ -29,13 +29,10 @@ class TensorTypeIdCreator final { private: std::atomic last_id_; - static constexpr at::TensorTypeId max_id_ = TensorTypeId( - std::numeric_limits::max()); - AT_DISABLE_COPY_AND_ASSIGN(TensorTypeIdCreator); }; -class TensorTypeIdRegistry final { +class AT_CORE_API TensorTypeIdRegistry final { public: TensorTypeIdRegistry(); @@ -49,7 +46,7 @@ class TensorTypeIdRegistry final { AT_DISABLE_COPY_AND_ASSIGN(TensorTypeIdRegistry); }; -class TensorTypeIds final { +class AT_CORE_API TensorTypeIds final { public: static TensorTypeIds& singleton(); @@ -71,7 +68,7 @@ inline constexpr at::TensorTypeId TensorTypeIds::undefined() noexcept { return TensorTypeIdCreator::undefined(); } -class TensorTypeIdRegistrar final { +class AT_CORE_API TensorTypeIdRegistrar final { public: TensorTypeIdRegistrar(); ~TensorTypeIdRegistrar(); @@ -88,12 +85,18 @@ inline at::TensorTypeId TensorTypeIdRegistrar::id() const noexcept { return id_; } -} // namespace at - -#define AT_DECLARE_TENSOR_TYPE(TensorName) at::TensorTypeId TensorName(); +#define AT_DECLARE_TENSOR_TYPE(TensorName) AT_CORE_API at::TensorTypeId TensorName(); #define AT_DEFINE_TENSOR_TYPE(TensorName) \ at::TensorTypeId TensorName() { \ static TensorTypeIdRegistrar registration_raii; \ return registration_raii.id(); \ } + +AT_DECLARE_TENSOR_TYPE(UndefinedTensorId); +AT_DECLARE_TENSOR_TYPE(CPUTensorId); // Caffe2 supported +AT_DECLARE_TENSOR_TYPE(CUDATensorId); // Caffe2 supported +AT_DECLARE_TENSOR_TYPE(SparseCPUTensorId); +AT_DECLARE_TENSOR_TYPE(SparseCUDATensorId); + +} // namespace at diff --git a/caffe2/core/typeid.cc b/aten/src/ATen/core/typeid.cpp similarity index 69% rename from caffe2/core/typeid.cc rename to aten/src/ATen/core/typeid.cpp index d4c5294f4b5d35..0ad13150f7c63c 100644 --- a/caffe2/core/typeid.cc +++ b/aten/src/ATen/core/typeid.cpp @@ -1,7 +1,5 @@ -#include "caffe2/core/typeid.h" -#include "caffe2/core/logging.h" -#include "caffe2/core/scope_guard.h" -#include "caffe2/core/tensor.h" +#include +#include #include @@ -28,26 +26,9 @@ std::mutex& gTypeRegistrationMutex() { return g_type_registration_mutex; } -#if defined(_MSC_VER) -// Windows does not have cxxabi.h, so we will simply return the original. -string Demangle(const char* name) { - return string(name); -} -#else -string Demangle(const char* name) { - int status = 0; - auto demangled = ::abi::__cxa_demangle(name, nullptr, nullptr, &status); - if (demangled) { - auto guard = caffe2::MakeGuard([demangled]() { free(demangled); }); - return string(demangled); - } - return name; -} -#endif - string GetExceptionString(const std::exception& e) { #ifdef __GXX_RTTI - return Demangle(typeid(e).name()) + ": " + e.what(); + return at::demangle(typeid(e).name()) + ": " + e.what(); #else return string("Exception (no RTTI available): ") + e.what(); #endif // __GXX_RTTI @@ -56,20 +37,21 @@ string GetExceptionString(const std::exception& e) { void TypeMeta::_ThrowRuntimeTypeLogicError(const std::string& msg) { // In earlier versions it used to be std::abort() but it's a bit hard-core // for a library - CAFFE_THROW(msg); + AT_ERROR(msg); } TypeIdentifier TypeIdentifier::createTypeId() { static std::atomic counter( TypeMeta::Id<_CaffeHighestPreallocatedTypeId>().underlyingId()); const TypeIdentifier::underlying_type new_value = ++counter; - if (new_value == std::numeric_limits::max()) { - throw std::logic_error("Ran out of available type ids. If you need more than 2^16 CAFFE_KNOWN_TYPEs, we need to increase TypeIdentifier to use more than 16 bit."); + if (new_value == + std::numeric_limits::max()) { + throw std::logic_error( + "Ran out of available type ids. If you need more than 2^16 CAFFE_KNOWN_TYPEs, we need to increase TypeIdentifier to use more than 16 bit."); } return TypeIdentifier(new_value); } -CAFFE_DEFINE_KNOWN_TYPE(Tensor); CAFFE_DEFINE_KNOWN_TYPE(float); CAFFE_DEFINE_KNOWN_TYPE(int); CAFFE_DEFINE_KNOWN_TYPE(std::string); @@ -102,9 +84,9 @@ namespace { // for unintializied blob. You should not use this struct yourself - it is // intended to be only instantiated once here. struct UninitializedTypeNameRegisterer { - UninitializedTypeNameRegisterer() { - gTypeNames()[TypeIdentifier::uninitialized()] = "nullptr (uninitialized)"; - } + UninitializedTypeNameRegisterer() { + gTypeNames()[TypeIdentifier::uninitialized()] = "nullptr (uninitialized)"; + } }; static UninitializedTypeNameRegisterer g_uninitialized_type_name_registerer; diff --git a/aten/src/ATen/core/typeid.h b/aten/src/ATen/core/typeid.h new file mode 100644 index 00000000000000..fd9131b5812246 --- /dev/null +++ b/aten/src/ATen/core/typeid.h @@ -0,0 +1,490 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef __GXX_RTTI +#include +#endif + +#include + +#include "ATen/core/Error.h" +#include "ATen/core/Backtrace.h" +#include "ATen/core/Macros.h" +#include "ATen/core/Half.h" +#include "ATen/core/IdWrapper.h" + +// TODO: This file is still in the caffe2 namespace, despite living +// in the ATen directory. This is because the macro CAFFE_DECLARE_KNOWN_TYPE +// defines a template specialization, which relies on the namespace of TypeMeta +// matching the namespace where the macro is called. This requires us to +// fix all of the call-sites, which I want to do later. So the namespace +// is not fixed at the moment. + +namespace caffe2 { +class TypeIdentifier; +} + +std::ostream& operator<<(std::ostream& stream, caffe2::TypeIdentifier typeId); + +namespace caffe2 { + +class TypeMeta; + +/** + * A type id is a unique id for a given C++ type. + * You need to register your types using CAFFE_KNOWN_TYPE(MyType) to be able to + * use TypeIdentifier with custom types. This is for example used to store the + * dtype of tensors. + */ +class TypeIdentifier final : public at::IdWrapper { + public: + static TypeIdentifier createTypeId(); + + friend std::ostream& ::operator<<( + std::ostream& stream, + TypeIdentifier typeId); + friend bool operator<(TypeIdentifier lhs, TypeIdentifier rhs); + + // This is 8, because 0 is uint8_t (due to ScalarType BC constraint) + static constexpr TypeIdentifier uninitialized() { + return TypeIdentifier(8); + } + + private: + constexpr explicit TypeIdentifier(uint16_t id) : IdWrapper(id) {} + friend class TypeMeta; +}; + +// Allow usage in std::map / std::set +// TODO Disallow this and rather use std::unordered_map/set everywhere +inline bool operator<(TypeIdentifier lhs, TypeIdentifier rhs) { + return lhs.underlyingId() < rhs.underlyingId(); +} + +} // namespace caffe2 + +AT_DEFINE_HASH_FOR_IDWRAPPER(caffe2::TypeIdentifier) + +inline std::ostream& operator<<( + std::ostream& stream, + caffe2::TypeIdentifier typeId) { + return stream << typeId.underlyingId(); +} + +namespace caffe2 { + +std::unordered_map& gTypeNames(); +std::unordered_set& gRegisteredTypeNames(); + +// A utility function to return an exception std::string by prepending its +// exception type before its what() content. +std::string GetExceptionString(const std::exception& e); + +std::mutex& gTypeRegistrationMutex(); + +template +struct TypeNameRegisterer { + TypeNameRegisterer(TypeIdentifier id, const std::string& literal_name) { + std::lock_guard guard(gTypeRegistrationMutex()); +#ifdef __GXX_RTTI + (void)literal_name; + + std::string name = at::demangle(typeid(T).name()); + // If we are in RTTI mode, we will also use this opportunity to do sanity + // check if there are duplicated ids registered for the same type. This + // usually happens when one does not do RTLD_GLOBAL, which is often the + // case in Python. The way we do the check is to make sure that there are + // no duplicated names registered - this could be done by checking the + // uniqueness of names. + if (gRegisteredTypeNames().count(name)) { + AT_ERROR("typeid.h: Type name ", name, " was registered twice. " + "This should not happen. Things to check:\n" + "1. Did you add a new CAFFE_KNOWN_TYPE? If so, check that " + "it is not duplicated with an existing CAFFE_KNOWN_TYPE.\n" + "2. Did you build and install PyTorch and Caffe2 separately? " + "For example, this would be the case if you ran scripts/onnx/install.sh or " + "scripts/onnx/install-develop.sh prior to Aug 12, 2018 " + "(commit 1756daaa7530d). If so, rebuild using the environment variable " + " FULL_CAFFE2=1 (if you build latest master, the ONNX scripts are " + "updated to do this for you.) " + "For more context, see https://github.com/pytorch/pytorch/issues/10460"); + } + gRegisteredTypeNames().insert(name); + gTypeNames()[id] = name; +#else // __GXX_RTTI + if (literal_name.empty()) { + gTypeNames()[id] = "(RTTI disabled, cannot show name)"; + } else { + gTypeNames()[id] = literal_name; + } +#endif // __GXX_RTTI + } +}; + +/** + * TypeMeta is a thin class that allows us to store the type of a container such + * as a blob, or the data type of a tensor, with a unique run-time id. It also + * stores some additional data such as the item size and the name of the type + * for run-time inspection. + */ +class TypeMeta { + public: + using PlacementNew = void(void*, size_t); + using TypedCopy = void(const void*, void*, size_t); + using TypedDestructor = void(void*, size_t); + /** Create a dummy TypeMeta object. To create a TypeMeta object for a specific + * type, use TypeMeta::Make(). + */ + TypeMeta() noexcept + : id_(TypeIdentifier::uninitialized()), + itemsize_(0), + ctor_(nullptr), + copy_(nullptr), + dtor_(nullptr) {} + + /** + * Copy constructor. + */ + TypeMeta(const TypeMeta& src) noexcept = default; + + /** + * Assignment operator. + */ + TypeMeta& operator=(const TypeMeta& src) noexcept = default; + + TypeMeta(TypeMeta&& rhs) noexcept = default; + + private: + // TypeMeta can only be created by Make, making sure that we do not + // create incorrectly mixed up TypeMeta objects. + TypeMeta( + TypeIdentifier i, + size_t s, + PlacementNew* ctor, + TypedCopy* copy, + TypedDestructor* dtor) noexcept + : id_(i), itemsize_(s), ctor_(ctor), copy_(copy), dtor_(dtor) {} + + // Mechanism for throwing errors which can't be prevented at compile time + // due to type erasure. E.g. somebody calling TypeMeta::copy() for + // non-copiable type. Right now just throws exception but is implemented + // in .cpp to manage dependencies + static void _ThrowRuntimeTypeLogicError(const std::string& msg); + + public: + /** + * Returns the type id. + */ + const TypeIdentifier& id() const noexcept { + return id_; + } + /** + * Returns the size of the item. + */ + const size_t& itemsize() const noexcept { + return itemsize_; + } + /** + * Returns the placement new function pointer for individual items. + */ + PlacementNew* ctor() const noexcept { + return ctor_; + } + /** + * Returns the typed copy function pointer for individual iterms. + */ + TypedCopy* copy() const noexcept { + return copy_; + } + /** + * Returns the destructor function pointer for individual items. + */ + TypedDestructor* dtor() const noexcept { + return dtor_; + } + /** + * Returns a printable name for the type. + */ + const char* name() const noexcept { + auto it = gTypeNames().find(id_); + assert(it != gTypeNames().end()); + return it->second.c_str(); + } + + friend bool operator==(const TypeMeta& lhs, const TypeMeta& rhs) noexcept; + + template + bool Match() const { + return (id_ == Id()); + } + + // Below are static functions that can be called by passing a specific type. + + /** + * Returns the unique id for the given type T. The id is unique for the type T + * in the sense that for any two different types, their id are different; for + * the same type T, the id remains the same over different calls of the + * function. However, this is not guaranteed over different runs, as the id + * is generated during run-time. Do NOT serialize the id for storage. + */ + template + AT_CORE_API static TypeIdentifier Id(); + + /** + * Returns the item size of the type. This is equivalent to sizeof(T). + */ + template + static size_t ItemSize() { + return sizeof(T); + } + + /** + * Returns the registered printable name of the type. + * + * Works for only the ones registered with CAFFE_KNOWN_TYPE + */ + template + static const char* TypeName() { + auto it = gTypeNames().find(Id()); + assert(it != gTypeNames().end()); + return it->second.c_str(); + } + + /** + * Placement new function for the type. + */ + template + static void _Ctor(void* ptr, size_t n) { + T* typed_ptr = static_cast(ptr); + for (size_t i = 0; i < n; ++i) { + new (typed_ptr + i) T; + } + } + + template + static void _CtorNotDefault(void* /*ptr*/, size_t /*n*/) { + _ThrowRuntimeTypeLogicError( + "Type " + std::string(at::demangle_type()) + + " is not default-constructible."); + } + + template < + typename T, + typename std::enable_if::value>::type* = + nullptr> + static inline PlacementNew* _PickCtor() { + return _Ctor; + } + + template < + typename T, + typename std::enable_if::value>::type* = + nullptr> + static inline PlacementNew* _PickCtor() { + return _CtorNotDefault; + } + + /** + * Typed copy function for classes. + */ + template + static void _Copy(const void* src, void* dst, size_t n) { + const T* typed_src = static_cast(src); + T* typed_dst = static_cast(dst); + for (size_t i = 0; i < n; ++i) { + typed_dst[i] = typed_src[i]; + } + } + + /** + * A placeholder function for types that do not allow assignment. + */ + template + static void _CopyNotAllowed( + const void* /*src*/, + void* /*dst*/, + size_t /*n*/) { + _ThrowRuntimeTypeLogicError( + "Type " + std::string(at::demangle_type()) + + " does not allow assignment."); + } + + template < + typename T, + typename std::enable_if::value>::type* = + nullptr> + static inline TypedCopy* _PickCopy() { + return _Copy; + } + + template < + typename T, + typename std::enable_if::value>::type* = + nullptr> + static inline TypedCopy* _PickCopy() { + return _CopyNotAllowed; + } + + /** + * Destructor for non-fundamental types. + */ + template + static void _Dtor(void* ptr, size_t n) { + T* typed_ptr = static_cast(ptr); + for (size_t i = 0; i < n; ++i) { + typed_ptr[i].~T(); + } + } + + /** + * Returns a TypeMeta object that corresponds to the typename T. + */ + template + static typename std::enable_if< + std::is_fundamental::value || std::is_pointer::value, + TypeMeta>::type + Make() { + return TypeMeta(Id(), ItemSize(), nullptr, nullptr, nullptr); + } + + template + static typename std::enable_if< + !(std::is_fundamental::value || std::is_pointer::value), + TypeMeta>::type + Make() { + return TypeMeta( + Id(), ItemSize(), _PickCtor(), _PickCopy(), _Dtor); + } + + private: + TypeIdentifier id_; + size_t itemsize_; + PlacementNew* ctor_; + TypedCopy* copy_; + TypedDestructor* dtor_; +}; + +inline bool operator==(const TypeMeta& lhs, const TypeMeta& rhs) noexcept { + return (lhs.id_ == rhs.id_); +} +inline bool operator!=(const TypeMeta& lhs, const TypeMeta& rhs) noexcept { + return !operator==(lhs, rhs); +} + +/** + * Register unique id for a type so it can be used in TypeMeta context, e.g. be + * used as a type for Blob or for Tensor elements. + * + * CAFFE_KNOWN_TYPE does explicit instantiation of TypeMeta::Id template + * function and thus needs to be put in a single translation unit (.cpp file) + * for a given type T. Other translation units that use type T as a type of the + * caffe2::Blob or element type of caffe2::Tensor need to depend on the + * translation unit that contains CAFFE_KNOWN_TYPE declaration via regular + * linkage dependencies. + * + * NOTE: the macro needs to be invoked in ::caffe2 namespace + */ +// Implementation note: in MSVC, we will need to prepend the AT_CORE_API +// keyword in order to get things compiled properly. in Linux, gcc seems to +// create attribute ignored error for explicit template instantiations, see +// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0537r0.html +// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=51930 +// and as a result, we define these two macros slightly differently. + +#ifdef _MSC_VER +#define CAFFE_KNOWN_TYPE(T) \ + template <> \ + AT_CORE_API TypeIdentifier TypeMeta::Id() { \ + static const TypeIdentifier type_id = TypeIdentifier::createTypeId(); \ + static TypeNameRegisterer registerer(type_id, #T); \ + return type_id; \ + } +#else // _MSC_VER +#define CAFFE_KNOWN_TYPE(T) \ + template <> \ + TypeIdentifier TypeMeta::Id() { \ + static const TypeIdentifier type_id = TypeIdentifier::createTypeId(); \ + static TypeNameRegisterer registerer(type_id, #T); \ + return type_id; \ + } +#endif + +/** + * CAFFE_DECLARE_KNOWN_TYPE and CAFFE_DEFINE_KNOWN_TYPE are used + * to preallocate ids for types that are queried very often so that they + * can be resolved at compile time. Please use CAFFE_KNOWN_TYPE() instead + * for your own types to allocate dynamic ids for them. + */ +#ifdef _MSC_VER +#define CAFFE_DECLARE_KNOWN_TYPE(PreallocatedId, T) \ + template <> \ + inline AT_CORE_API TypeIdentifier TypeMeta::Id() { \ + return TypeIdentifier(PreallocatedId); \ + } +#else // _MSC_VER +#define CAFFE_DECLARE_KNOWN_TYPE(PreallocatedId, T) \ + template <> \ + inline TypeIdentifier TypeMeta::Id() { \ + return TypeIdentifier(PreallocatedId); \ + } +#endif + +#define CONCAT_IMPL(x, y) x##y +#define MACRO_CONCAT(x, y) CONCAT_IMPL(x, y) + +#define CAFFE_DEFINE_KNOWN_TYPE(T) \ + namespace { \ + TypeNameRegisterer MACRO_CONCAT(registerer, __COUNTER__)( \ + TypeMeta::Id(), \ + #T); \ + } + +class Tensor; + +// Note: we have preallocated the numbers 0-8 so they line up exactly +// with at::ScalarType's numbering. All other numbers do not matter. +// +// Notably, the "uninitialized" type id is 8, not 0, for hysterical raisins. + +struct _CaffeHighestPreallocatedTypeId final {}; + +CAFFE_DECLARE_KNOWN_TYPE(0, uint8_t); +CAFFE_DECLARE_KNOWN_TYPE(1, int8_t); +CAFFE_DECLARE_KNOWN_TYPE(2, int16_t); +CAFFE_DECLARE_KNOWN_TYPE(3, int); +CAFFE_DECLARE_KNOWN_TYPE(4, int64_t); +CAFFE_DECLARE_KNOWN_TYPE(5, at::Half); +CAFFE_DECLARE_KNOWN_TYPE(6, float); +CAFFE_DECLARE_KNOWN_TYPE(7, double); +// 8 = undefined type id + +CAFFE_DECLARE_KNOWN_TYPE(9, Tensor); +CAFFE_DECLARE_KNOWN_TYPE(10, std::string); +CAFFE_DECLARE_KNOWN_TYPE(11, bool); +CAFFE_DECLARE_KNOWN_TYPE(12, uint16_t); +CAFFE_DECLARE_KNOWN_TYPE(13, char); +CAFFE_DECLARE_KNOWN_TYPE(14, std::unique_ptr); +CAFFE_DECLARE_KNOWN_TYPE(15, std::unique_ptr>); +CAFFE_DECLARE_KNOWN_TYPE(16, std::vector); +CAFFE_DECLARE_KNOWN_TYPE(17, std::vector); +CAFFE_DECLARE_KNOWN_TYPE(18, std::vector); +CAFFE_DECLARE_KNOWN_TYPE(19, bool*); +CAFFE_DECLARE_KNOWN_TYPE(20, char*); +CAFFE_DECLARE_KNOWN_TYPE(21, int*); + +#ifdef CAFFE2_UNIQUE_LONG_TYPEMETA +CAFFE_DECLARE_KNOWN_TYPE(22, long); +CAFFE_DECLARE_KNOWN_TYPE(23, std::vector); +#endif // CAFFE2_UNIQUE_LONG_TYPEMETA + +CAFFE_DECLARE_KNOWN_TYPE(24, _CaffeHighestPreallocatedTypeId); +} // namespace caffe2 diff --git a/aten/src/ATen/function_wrapper.py b/aten/src/ATen/function_wrapper.py index fdbc6f2612e8de..b19b962a23dc0e 100644 --- a/aten/src/ATen/function_wrapper.py +++ b/aten/src/ATen/function_wrapper.py @@ -180,7 +180,7 @@ def TypedDict(name, attrs, total=True): # type: ignore }""") BUFFER_DEFINITION = CodeTemplate("""\ -auto ${name}_ = new TensorImpl(Backend::${Backend}, ScalarType::${ScalarName}, ${THTensor}_new(), false); +auto ${name}_ = new TensorImpl(${Backend}TensorId(), ScalarType::${ScalarName}, ${THTensor}_new(), false); auto ${name} = Tensor(${name}_, false);""") CONDITIONAL_INITIALIZER = CodeTemplate("""\ @@ -320,23 +320,23 @@ def __init__(self, reason): CHECKED_USE_NULLABLE = CodeTemplate('${arg_name}_ ? ${usage} : NULL') ALLOC_NOARGS_WRAP = { - 'THTensor*': 'new TensorImpl(Backend::${Backend}, ScalarType::${ScalarName})', - 'THBoolTensor*': 'new TensorImpl(Backend::${Backend}, ScalarType::Byte)', - 'THIndexTensor*': 'new TensorImpl(Backend::${Backend}, ScalarType::Long)', - 'THIntegerTensor*': 'new TensorImpl(Backend::${Backend}, ScalarType::Int)', + 'THTensor*': 'new TensorImpl(${Backend}TensorId(), ScalarType::${ScalarName})', + 'THBoolTensor*': 'new TensorImpl(${Backend}TensorId(), ScalarType::Byte)', + 'THIndexTensor*': 'new TensorImpl(${Backend}TensorId(), ScalarType::Long)', + 'THIntegerTensor*': 'new TensorImpl(${Backend}TensorId(), ScalarType::Int)', 'THSTensor*': 'detail::new_Sparse${Tensor}()', - 'THDenseTensor*': 'new TensorImpl(Backend::${Backend}, ScalarType::${ScalarName})', - 'THDenseIndexTensor*': 'new TensorImpl(Backend::${Backend}, ScalarType::Long)' + 'THDenseTensor*': 'new TensorImpl(${Backend}TensorId(), ScalarType::${ScalarName})', + 'THDenseIndexTensor*': 'new TensorImpl(${Backend}TensorId(), ScalarType::Long)' } ALLOC_WRAP = { - 'THTensor*': 'new TensorImpl(Backend::${Backend}, ScalarType::${ScalarName}, ${arguments}, false)', - 'THBoolTensor*': 'new TensorImpl(Backend::${Backend}, ScalarType::Byte, ${arguments}, false)', - 'THIndexTensor*': 'new TensorImpl(Backend::${Backend}, ScalarType::Long, ${arguments}, false)', - 'THIntegerTensor*': 'new TensorImpl(Backend::${Backend}, ScalarType::Int, ${arguments}, false)', + 'THTensor*': 'new TensorImpl(${Backend}TensorId(), ScalarType::${ScalarName}, ${arguments}, false)', + 'THBoolTensor*': 'new TensorImpl(${Backend}TensorId(), ScalarType::Byte, ${arguments}, false)', + 'THIndexTensor*': 'new TensorImpl(${Backend}TensorId(), ScalarType::Long, ${arguments}, false)', + 'THIntegerTensor*': 'new TensorImpl(${Backend}TensorId(), ScalarType::Int, ${arguments}, false)', 'THSTensor*': 'new Sparse${Tensor}(${arguments})', - 'THDenseTensor*': 'new TensorImpl(Backend::${Backend}, ScalarType::${ScalarName}, ${arguments}, false)', - 'THDenseIndexTensor*': 'new TensorImpl(Backend::${Backend}, ScalarType::Long, ${arguments}, false)', + 'THDenseTensor*': 'new TensorImpl(${Backend}TensorId(), ScalarType::${ScalarName}, ${arguments}, false)', + 'THDenseIndexTensor*': 'new TensorImpl(${Backend}TensorId(), ScalarType::Long, ${arguments}, false)', } # Replacements for constants when calling into TH diff --git a/aten/src/ATen/native/sparse/SparseTensor.cpp b/aten/src/ATen/native/sparse/SparseTensor.cpp index 7a7e8be5c7ff6a..6eb9b8cfb58071 100644 --- a/aten/src/ATen/native/sparse/SparseTensor.cpp +++ b/aten/src/ATen/native/sparse/SparseTensor.cpp @@ -62,8 +62,13 @@ SparseTensor new_sparse(const SparseType& dtype) { AT_ASSERT(!dtype.is_undefined()); AT_ASSERT(!dtype.is_variable()); AT_ASSERT(dtype.is_sparse()); - // TODO: Hmm... this const_cast business seems a bit dodgy - return SparseTensor(new SparseTensorImpl(dtype.backend(), dtype.scalarType()), /* retain */ false); + TensorTypeId type_id; + if (dtype.is_cuda()) { + type_id = SparseCUDATensorId(); + } else { + type_id = SparseCPUTensorId(); + } + return SparseTensor(new SparseTensorImpl(type_id, dtype.scalarType()), /* retain */ false); } /*** Helper methods ***/ diff --git a/aten/src/ATen/templates/SparseTypeDerived.cpp b/aten/src/ATen/templates/SparseTypeDerived.cpp index 977bbbb079460d..6c3094e71aa0df 100644 --- a/aten/src/ATen/templates/SparseTypeDerived.cpp +++ b/aten/src/ATen/templates/SparseTypeDerived.cpp @@ -28,7 +28,7 @@ namespace at { ${Type}::${Type}(Context* context) - : Type(context, /*is_variable=*/false, /*is_undefined=*/false) {} + : Type(context, ${Backend}TensorId(), /*is_variable=*/false, /*is_undefined=*/false) {} ScalarType ${Type}::scalarType() const { return ScalarType::${ScalarName}; } diff --git a/aten/src/ATen/templates/Type.h b/aten/src/ATen/templates/Type.h index a01c3c5e3447f5..10036a5286b5bc 100644 --- a/aten/src/ATen/templates/Type.h +++ b/aten/src/ATen/templates/Type.h @@ -13,6 +13,7 @@ #include "ATen/Tensor.h" #include "ATen/core/ArrayRef.h" #include "ATen/core/Half.h" +#include "ATen/core/TensorTypeIdRegistration.h" #include "THNN/Reduction.h" #include @@ -44,8 +45,8 @@ enum class TypeID { }; struct AT_API Type { - explicit Type(Context* context, bool is_variable, bool is_undefined) - : context(context), is_variable_(is_variable), is_undefined_(is_undefined) {} + explicit Type(Context* context, TensorTypeId type_id, bool is_variable, bool is_undefined) + : context(context), type_id_(type_id), is_variable_(is_variable), is_undefined_(is_undefined) {} virtual ~Type() {} virtual ScalarType scalarType() const = 0; virtual Backend backend() const = 0; @@ -79,6 +80,9 @@ struct AT_API Type { // for external dispatch virtual TypeID ID() const = 0; + // New-style TensorTypeId that supports open registration. + TensorTypeId type_id() const { return type_id_; } + Tensor copy(const Tensor & src, bool non_blocking=false) const; Tensor & copy_(Tensor & self, const Tensor & src, bool non_blocking=false) const; virtual Tensor & s_copy_(Tensor & self, const Tensor & src, bool non_blocking) const = 0; @@ -98,6 +102,7 @@ struct AT_API Type { ${type_method_declarations} protected: Context* context; + TensorTypeId type_id_; bool is_variable_; bool is_undefined_; diff --git a/aten/src/ATen/templates/TypeDerived.cpp b/aten/src/ATen/templates/TypeDerived.cpp index a4ad1dfc205211..2bd041e37a7fc5 100644 --- a/aten/src/ATen/templates/TypeDerived.cpp +++ b/aten/src/ATen/templates/TypeDerived.cpp @@ -39,7 +39,7 @@ static int getPointerDevice(void* ptr) { #endif ${Type}::${Type}(Context* context) - : Type(context, /*is_variable=*/false, /*is_undefined=*/false) {} + : Type(context, ${Backend}TensorId(), /*is_variable=*/false, /*is_undefined=*/false) {} ScalarType ${Type}::scalarType() const { return ScalarType::${ScalarName}; } @@ -95,7 +95,7 @@ std::unique_ptr ${Type}::storageWithAllocator(int64_t size, Allocator* Tensor ${Type}::unsafeTensorFromTH(void * th_pointer, bool retain) const { if (retain) ${THTensor}_retain(${state,} (${THTensor}*) th_pointer); - return Tensor(new TensorImpl(Backend::${Backend}, ScalarType::${ScalarName}, + return Tensor(new TensorImpl(${Backend}TensorId(), ScalarType::${ScalarName}, (${THTensor}*)(th_pointer), false), false); } std::unique_ptr ${Type}::unsafeStorageFromTH(void * th_pointer, bool retain) const { diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt index f5cdc310ae18b8..9aba3ffc9bda4f 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -59,8 +59,7 @@ else() list(APPEND Caffe2_CPU_SRCS ${ATen_CORE_SRCS}) list(APPEND Caffe2_CPU_INCLUDE ${ATen_CORE_INCLUDE}) list(APPEND Caffe2_CPU_TEST_SRCS ${ATen_CORE_TEST_SRCS}) - # TODO: We should probably install the headers, but I don't know - # how to do that. + # See cmake/Codegen.cmake for header installation endif() # ---[ Torch build @@ -326,6 +325,14 @@ if(USE_CUDA) target_link_libraries( caffe2_gpu PUBLIC caffe2 ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS}) + # See Note [Supporting both static and dynamic libraries on Window] + # TODO: I'm actually not sure why this is necessary, because caffe2_gpu + # should depend on caffe2 (which itself would give us the necessary + # macro definition). + if (MSVC AND NOT BUILD_SHARED_LIBS) + target_compile_options(caffe2_gpu PUBLIC "-DAT_CORE_STATIC_WINDOWS=1") + endif() + # Set standard properties on the target aten_set_target_props(caffe2_gpu) diff --git a/caffe2/core/flags.h b/caffe2/core/flags.h index 6d8904560fe915..5a7adf7d143af0 100644 --- a/caffe2/core/flags.h +++ b/caffe2/core/flags.h @@ -26,12 +26,12 @@ namespace caffe2 { /** * Sets the usage message when a commandline tool is called with "--help". */ -void SetUsageMessage(const string& str); +CAFFE2_API void SetUsageMessage(const string& str); /** * Returns the usage message for the commandline tool set by SetUsageMessage. */ -const char* UsageMessage(); +CAFFE2_API const char* UsageMessage(); /** * Parses the commandline flags. @@ -41,11 +41,11 @@ const char* UsageMessage(); * commandline args that caffe2 does not deal with. Note that following * convention, argv[0] contains the binary name and is not parsed. */ -bool ParseCaffeCommandLineFlags(int* pargc, char*** pargv); +CAFFE2_API bool ParseCaffeCommandLineFlags(int* pargc, char*** pargv); /** * Checks if the commandline flags has already been passed. */ -bool CommandLineFlagsHasBeenParsed(); +CAFFE2_API bool CommandLineFlagsHasBeenParsed(); } // namespace caffe2 @@ -56,6 +56,10 @@ bool CommandLineFlagsHasBeenParsed(); #ifdef CAFFE2_USE_GFLAGS +//////////////////////////////////////////////////////////////////////////////// +// Begin gflags section: most functions are basically rerouted to gflags. +//////////////////////////////////////////////////////////////////////////////// + #include // gflags before 2.0 uses namespace google and after 2.1 uses namespace gflags. @@ -64,41 +68,70 @@ bool CommandLineFlagsHasBeenParsed(); namespace gflags = google; #endif // GFLAGS_GFLAGS_H_ -#define CAFFE2_GFLAGS_DEF_WRAPPER(type, name, default_value, help_str) \ +// Motivation about the gflags wrapper: +// (1) We would need to make sure that the gflags version and the non-gflags +// version of Caffe2 are going to expose the same flags abstraction. One should +// explicitly use caffe2::FLAGS_flag_name to access the flags. +// (2) For flag names, it is recommended to start with caffe2_ to distinguish it +// from regular gflags flags. For example, do +// CAFFE2_DEFINE_BOOL(caffe2_my_flag, true, "An example"); +// to allow one to use caffe2::FLAGS_caffe2_my_flag. +// (3) Gflags has a design issue that does not properly expose the global flags, +// if one builds the library with -fvisibility=hidden. The current gflags (as of +// Aug 2018) only deals with the Windows case using dllexport, and not the Linux +// counterparts. As a result, we will explciitly use CAFFE2_EXPORT to export the +// flags defined in Caffe2. This is done via a global reference, so the flag +// itself is not duplicated - under the hood it is the same global gflags flag. +#define CAFFE2_GFLAGS_DEF_WRAPPER( \ + type, real_type, name, default_value, help_str) \ DEFINE_##type(name, default_value, help_str); \ namespace caffe2 { \ - using ::FLAGS_##name; \ + CAFFE2_EXPORT real_type& FLAGS_##name = ::FLAGS_##name; \ } #define CAFFE2_DEFINE_int(name, default_value, help_str) \ - CAFFE2_GFLAGS_DEF_WRAPPER(int32, name, default_value, help_str) + CAFFE2_GFLAGS_DEF_WRAPPER(int32, gflags::int32, name, default_value, help_str) #define CAFFE2_DEFINE_int64(name, default_value, help_str) \ - CAFFE2_GFLAGS_DEF_WRAPPER(int64, name, default_value, help_str) + CAFFE2_GFLAGS_DEF_WRAPPER(int64, gflags::int64, name, default_value, help_str) #define CAFFE2_DEFINE_double(name, default_value, help_str) \ - CAFFE2_GFLAGS_DEF_WRAPPER(double, name, default_value, help_str) + CAFFE2_GFLAGS_DEF_WRAPPER(double, double, name, default_value, help_str) #define CAFFE2_DEFINE_bool(name, default_value, help_str) \ - CAFFE2_GFLAGS_DEF_WRAPPER(bool, name, default_value, help_str) -#define CAFFE2_DEFINE_string(name, default_value, help_str) \ - CAFFE2_GFLAGS_DEF_WRAPPER(string, name, default_value, help_str) + CAFFE2_GFLAGS_DEF_WRAPPER(bool, bool, name, default_value, help_str) +#define CAFFE2_DEFINE_string(name, default_value, help_str) \ + CAFFE2_GFLAGS_DEF_WRAPPER( \ + string, ::fLS::clstring, name, default_value, help_str) // DECLARE_typed_var should be used in header files and in the global namespace. -#define CAFFE2_GFLAGS_DECLARE_WRAPPER(type, name) \ - DECLARE_##type(name); \ - namespace caffe2 { \ - using ::FLAGS_##name; \ +#define CAFFE2_GFLAGS_DECLARE_WRAPPER(type, real_type, name) \ + DECLARE_##type(name); \ + namespace caffe2 { \ + extern real_type& FLAGS_##name ; \ } // namespace caffe2 -#define CAFFE2_DECLARE_int(name) CAFFE2_GFLAGS_DECLARE_WRAPPER(int32, name) -#define CAFFE2_DECLARE_int64(name) CAFFE2_GFLAGS_DECLARE_WRAPPER(int64, name) -#define CAFFE2_DECLARE_double(name) CAFFE2_GFLAGS_DECLARE_WRAPPER(double, name) -#define CAFFE2_DECLARE_bool(name) CAFFE2_GFLAGS_DECLARE_WRAPPER(bool, name) -#define CAFFE2_DECLARE_string(name) CAFFE2_GFLAGS_DECLARE_WRAPPER(string, name) +#define CAFFE2_DECLARE_int(name) \ + CAFFE2_GFLAGS_DECLARE_WRAPPER(int32, gflags::int32, name) +#define CAFFE2_DECLARE_int64(name) \ + CAFFE2_GFLAGS_DECLARE_WRAPPER(int64, gflags::int64, name) +#define CAFFE2_DECLARE_double(name) \ + CAFFE2_GFLAGS_DECLARE_WRAPPER(double, double, name) +#define CAFFE2_DECLARE_bool(name) \ + CAFFE2_GFLAGS_DECLARE_WRAPPER(bool, bool, name) +#define CAFFE2_DECLARE_string(name) \ + CAFFE2_GFLAGS_DECLARE_WRAPPER(string, ::fLS::clstring, name) + +//////////////////////////////////////////////////////////////////////////////// +// End gflags section. +//////////////////////////////////////////////////////////////////////////////// #else // CAFFE2_USE_GFLAGS +//////////////////////////////////////////////////////////////////////////////// +// Begin non-gflags section: providing equivalent functionality. +//////////////////////////////////////////////////////////////////////////////// + namespace caffe2 { -class Caffe2FlagParser { +class CAFFE2_API Caffe2FlagParser { public: Caffe2FlagParser() {} bool success() { return success_; } @@ -117,29 +150,29 @@ CAFFE_DECLARE_REGISTRY(Caffe2FlagsRegistry, Caffe2FlagParser, const string&); // write the CAFFE2_DEFINE_* and CAFFE2_DECLARE_* macros outside any namespace // as well. -#define CAFFE2_DEFINE_typed_var(type, name, default_value, help_str) \ - namespace caffe2 { \ - CAFFE2_EXPORT type FLAGS_##name = default_value; \ - namespace { \ - class Caffe2FlagParser_##name : public Caffe2FlagParser { \ - public: \ - explicit Caffe2FlagParser_##name(const string& content) { \ - success_ = Caffe2FlagParser::Parse(content, &FLAGS_##name); \ - } \ - }; \ - } \ - RegistererCaffe2FlagsRegistry g_Caffe2FlagsRegistry_##name( \ - #name, \ - Caffe2FlagsRegistry(), \ - RegistererCaffe2FlagsRegistry::DefaultCreator, \ - "(" #type ", default " #default_value ") " help_str); \ +#define CAFFE2_DEFINE_typed_var(type, name, default_value, help_str) \ + namespace caffe2 { \ + CAFFE2_EXPORT type FLAGS_##name = default_value; \ + namespace { \ + class Caffe2FlagParser_##name : public Caffe2FlagParser { \ + public: \ + explicit Caffe2FlagParser_##name(const string& content) { \ + success_ = Caffe2FlagParser::Parse(content, &FLAGS_##name); \ + } \ + }; \ + } \ + RegistererCaffe2FlagsRegistry g_Caffe2FlagsRegistry_##name( \ + #name, \ + Caffe2FlagsRegistry(), \ + RegistererCaffe2FlagsRegistry::DefaultCreator, \ + "(" #type ", default " #default_value ") " help_str); \ } #define CAFFE2_DEFINE_int(name, default_value, help_str) \ CAFFE2_DEFINE_typed_var(int, name, default_value, help_str) -#define CAFFE2_DEFINE_int64(name, default_value, help_str) \ +#define CAFFE2_DEFINE_int64(name, default_value, help_str) \ CAFFE2_DEFINE_typed_var(int64_t, name, default_value, help_str) -#define CAFFE2_DEFINE_double(name, default_value, help_str) \ +#define CAFFE2_DEFINE_double(name, default_value, help_str) \ CAFFE2_DEFINE_typed_var(double, name, default_value, help_str) #define CAFFE2_DEFINE_bool(name, default_value, help_str) \ CAFFE2_DEFINE_typed_var(bool, name, default_value, help_str) @@ -147,9 +180,9 @@ CAFFE_DECLARE_REGISTRY(Caffe2FlagsRegistry, Caffe2FlagParser, const string&); CAFFE2_DEFINE_typed_var(string, name, default_value, help_str) // DECLARE_typed_var should be used in header files and in the global namespace. -#define CAFFE2_DECLARE_typed_var(type, name) \ - namespace caffe2 { \ - CAFFE2_IMPORT extern type FLAGS_##name; \ +#define CAFFE2_DECLARE_typed_var(type, name) \ + namespace caffe2 { \ + CAFFE2_IMPORT extern type FLAGS_##name; \ } // namespace caffe2 #define CAFFE2_DECLARE_int(name) CAFFE2_DECLARE_typed_var(int, name) @@ -158,6 +191,10 @@ CAFFE_DECLARE_REGISTRY(Caffe2FlagsRegistry, Caffe2FlagParser, const string&); #define CAFFE2_DECLARE_bool(name) CAFFE2_DECLARE_typed_var(bool, name) #define CAFFE2_DECLARE_string(name) CAFFE2_DECLARE_typed_var(string, name) +//////////////////////////////////////////////////////////////////////////////// +// End non-gflags section. +//////////////////////////////////////////////////////////////////////////////// + #endif // CAFFE2_USE_GFLAGS #endif // CAFFE2_CORE_FLAGS_H_ diff --git a/caffe2/core/flags_test.cc b/caffe2/core/flags_test.cc new file mode 100644 index 00000000000000..28bbe3d0688265 --- /dev/null +++ b/caffe2/core/flags_test.cc @@ -0,0 +1,27 @@ +#include +#include "caffe2/core/macros.h" +#include "caffe2/core/flags.h" +#include "caffe2/core/logging.h" + +CAFFE2_DEFINE_bool(caffe2_flags_test_only_flag, true, "Only used in test."); + +namespace caffe2 { + +TEST(FlagsTest, TestGflagsCorrectness) { +#ifdef CAFFE2_USE_GFLAGS + EXPECT_EQ(FLAGS_caffe2_flags_test_only_flag, true); + EXPECT_EQ(::FLAGS_caffe2_flags_test_only_flag, true); + // Change the caffe2 namespace and check global + FLAGS_caffe2_flags_test_only_flag = false; + EXPECT_EQ(FLAGS_caffe2_flags_test_only_flag, false); + EXPECT_EQ(::FLAGS_caffe2_flags_test_only_flag, false); + // Change global and check caffe2 namespace + ::FLAGS_caffe2_flags_test_only_flag = true; + EXPECT_EQ(FLAGS_caffe2_flags_test_only_flag, true); + EXPECT_EQ(::FLAGS_caffe2_flags_test_only_flag, true); +#else // CAFFE2_USE_GFLAGS + LOG(INFO) << "Caffe2 is not built with gflags. Nothing to test here."; +#endif +} + +} // namespace caffe2 diff --git a/caffe2/core/logging.cc b/caffe2/core/logging.cc index 7de8f9aacd262a..0c49f97932634c 100644 --- a/caffe2/core/logging.cc +++ b/caffe2/core/logging.cc @@ -69,20 +69,27 @@ std::function GetOperatorLogger() { #ifdef CAFFE2_USE_GOOGLE_GLOG #ifdef CAFFE2_USE_GFLAGS +// When GLOG depends on GFLAGS, these variables are being defined in GLOG +// directly via the GFLAGS definition, so we will use DECLARE_* to declare +// them, and use them in Caffe2. // GLOG's minloglevel -CAFFE2_DECLARE_int(minloglevel); +DECLARE_int32(minloglevel); // GLOG's verbose log value. -CAFFE2_DECLARE_int(v); +DECLARE_int32(v); // GLOG's logtostderr value -CAFFE2_DECLARE_bool(logtostderr); - -#else +DECLARE_bool(logtostderr); +#endif // CAFFE2_USE_GFLAGS +// Provide easy access to the above variables, regardless whether GLOG is +// dependent on GFLAGS or not. Note that the namespace (fLI, fLB) is actually +// consistent between GLOG and GFLAGS, so we can do the below declaration +// consistently. +namespace caffe2 { using fLI::FLAGS_minloglevel; using fLI::FLAGS_v; using fLB::FLAGS_logtostderr; +} // namespace caffe2 -#endif // CAFFE2_USE_GFLAGS CAFFE2_DEFINE_int(caffe2_log_level, google::GLOG_ERROR, "The minimum log level that caffe2 will output."); diff --git a/caffe2/core/registry.h b/caffe2/core/registry.h index f5e0932228a977..d55b1e181cefde 100644 --- a/caffe2/core/registry.h +++ b/caffe2/core/registry.h @@ -178,7 +178,7 @@ class Registerer { key, \ RegistryName(), \ Registerer##RegistryName::DefaultCreator<__VA_ARGS__>, \ - DemangleType<__VA_ARGS__>()); \ + at::demangle_type<__VA_ARGS__>()); \ } // CAFFE_DECLARE_REGISTRY and CAFFE_DEFINE_REGISTRY are hard-wired to use string diff --git a/caffe2/core/tensor.cc b/caffe2/core/tensor.cc index fa494902b1d5f4..624854515e9e2b 100644 --- a/caffe2/core/tensor.cc +++ b/caffe2/core/tensor.cc @@ -16,6 +16,8 @@ CAFFE2_DEFINE_int64( namespace caffe2 { +CAFFE_DEFINE_KNOWN_TYPE(Tensor); + TensorPrinter::TensorPrinter( const std::string& tensor_name, const std::string& file_name, diff --git a/caffe2/core/typeid.h b/caffe2/core/typeid.h index 609c67a61dbf20..412de1d96a2286 100644 --- a/caffe2/core/typeid.h +++ b/caffe2/core/typeid.h @@ -1,480 +1,7 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef __GXX_RTTI -#include -#endif - -#include - -#include "ATen/core/Half.h" -#include "caffe2/core/common.h" -#include "ATen/core/IdWrapper.h" - -namespace caffe2 { -class TypeIdentifier; -} - -std::ostream& operator<<(std::ostream& stream, caffe2::TypeIdentifier typeId); - -namespace caffe2 { - -class TypeMeta; - -/** - * A type id is a unique id for a given C++ type. - * You need to register your types using CAFFE_KNOWN_TYPE(MyType) to be able to use TypeIdentifier with custom types. - * This is for example used to store the dtype of tensors. - */ -class TypeIdentifier final : public at::IdWrapper { -public: - static TypeIdentifier createTypeId(); - - friend std::ostream& ::operator<<(std::ostream& stream, TypeIdentifier typeId); - friend bool operator<(TypeIdentifier lhs, TypeIdentifier rhs); - - // This is 8, because 0 is uint8_t (due to ScalarType BC constraint) - static constexpr TypeIdentifier uninitialized() { - return TypeIdentifier(8); - } - -private: - constexpr explicit TypeIdentifier(uint16_t id): IdWrapper(id) {} - friend class TypeMeta; -}; - -// Allow usage in std::map / std::set -// TODO Disallow this and rather use std::unordered_map/set everywhere -inline bool operator<(TypeIdentifier lhs, TypeIdentifier rhs) { - return lhs.underlyingId() < rhs.underlyingId(); -} - -} - -AT_DEFINE_HASH_FOR_IDWRAPPER(caffe2::TypeIdentifier) - -inline std::ostream& operator<<(std::ostream& stream, caffe2::TypeIdentifier typeId) { - return stream << typeId.underlyingId(); -} - -namespace caffe2 { - -std::unordered_map& gTypeNames(); -std::unordered_set& gRegisteredTypeNames(); - -// A utility function to demangle a function name. -std::string Demangle(const char* name); - -/** - * Returns the printable name of the type. - * - * Works for all types, not only the ones registered with CAFFE_KNOWN_TYPE - */ -template -static const char* DemangleType() { -#ifdef __GXX_RTTI - static const std::string name = Demangle(typeid(T).name()); - return name.c_str(); -#else // __GXX_RTTI - return "(RTTI disabled, cannot show name)"; -#endif // __GXX_RTTI -} - -// A utility function to return an exception std::string by prepending its exception -// type before its what() content. -std::string GetExceptionString(const std::exception& e); - -std::mutex& gTypeRegistrationMutex(); - -template -struct TypeNameRegisterer { - TypeNameRegisterer(TypeIdentifier id, const std::string& literal_name) { - std::lock_guard guard(gTypeRegistrationMutex()); -#ifdef __GXX_RTTI - (void)literal_name; - - std::string name = Demangle(typeid(T).name()); - // If we are in RTTI mode, we will also use this opportunity to do sanity - // check if there are duplicated ids registered for the same type. This - // usually happens when one does not do RTLD_GLOBAL, which is often the - // case in Python. The way we do the check is to make sure that there are - // no duplicated names registered - this could be done by checking the - // uniqueness of names. - if (gRegisteredTypeNames().count(name)) { - std::cerr << "Type name " << name - << " registered twice. This should " - "not happen. Do you have duplicated CAFFE_KNOWN_TYPE?" - << std::endl; - throw std::runtime_error("TypeNameRegisterer error with type " + name); - } - gRegisteredTypeNames().insert(name); - gTypeNames()[id] = name; -#else // __GXX_RTTI - if (literal_name.empty()) { - gTypeNames()[id] = "(RTTI disabled, cannot show name)"; - } else { - gTypeNames()[id] = literal_name; - } -#endif // __GXX_RTTI - } -}; - -/** - * TypeMeta is a thin class that allows us to store the type of a container such - * as a blob, or the data type of a tensor, with a unique run-time id. It also - * stores some additional data such as the item size and the name of the type - * for run-time inspection. - */ -class TypeMeta { - public: - using PlacementNew = void (void*, size_t); - using TypedCopy = void (const void*, void*, size_t); - using TypedDestructor = void (void*, size_t); - /** Create a dummy TypeMeta object. To create a TypeMeta object for a specific - * type, use TypeMeta::Make(). - */ - TypeMeta() noexcept - : id_(TypeIdentifier::uninitialized()), itemsize_(0), ctor_(nullptr), copy_(nullptr), dtor_(nullptr) {} - - /** - * Copy constructor. - */ - TypeMeta(const TypeMeta& src) noexcept = default; - - /** - * Assignment operator. - */ - TypeMeta& operator=(const TypeMeta& src) noexcept = default; - - TypeMeta(TypeMeta &&rhs) noexcept = default; - - private: - // TypeMeta can only be created by Make, making sure that we do not - // create incorrectly mixed up TypeMeta objects. - TypeMeta( - TypeIdentifier i, - size_t s, - PlacementNew* ctor, - TypedCopy* copy, - TypedDestructor* dtor) noexcept - : id_(i), itemsize_(s), ctor_(ctor), copy_(copy), dtor_(dtor) {} - - // Mechanism for throwing errors which can't be prevented at compile time - // due to type erasure. E.g. somebody calling TypeMeta::copy() for - // non-copiable type. Right now just throws exception but is implemented - // in .cpp to manage dependencies - static void _ThrowRuntimeTypeLogicError(const std::string& msg); - - public: - /** - * Returns the type id. - */ - const TypeIdentifier& id() const noexcept { - return id_; - } - /** - * Returns the size of the item. - */ - const size_t& itemsize() const noexcept { - return itemsize_; - } - /** - * Returns the placement new function pointer for individual items. - */ - PlacementNew* ctor() const noexcept { - return ctor_; - } - /** - * Returns the typed copy function pointer for individual iterms. - */ - TypedCopy* copy() const noexcept { - return copy_; - } - /** - * Returns the destructor function pointer for individual items. - */ - TypedDestructor* dtor() const noexcept { - return dtor_; - } - /** - * Returns a printable name for the type. - */ - const char* name() const noexcept { - auto it = gTypeNames().find(id_); - assert(it != gTypeNames().end()); - return it->second.c_str(); - } - - friend bool operator==(const TypeMeta& lhs, const TypeMeta& rhs) noexcept; - - template - bool Match() const { - return (id_ == Id()); - } - - // Below are static functions that can be called by passing a specific type. - - /** - * Returns the unique id for the given type T. The id is unique for the type T - * in the sense that for any two different types, their id are different; for - * the same type T, the id remains the same over different calls of the - * function. However, this is not guaranteed over different runs, as the id - * is generated during run-time. Do NOT serialize the id for storage. - */ - template - CAFFE2_API static TypeIdentifier Id(); - - /** - * Returns the item size of the type. This is equivalent to sizeof(T). - */ - template - static size_t ItemSize() { - return sizeof(T); - } - - /** - * Returns the registered printable name of the type. - * - * Works for only the ones registered with CAFFE_KNOWN_TYPE - */ - template - static const char* TypeName() { - auto it = gTypeNames().find(Id()); - assert(it != gTypeNames().end()); - return it->second.c_str(); - } - - /** - * Placement new function for the type. - */ - template - static void _Ctor(void* ptr, size_t n) { - T* typed_ptr = static_cast(ptr); - for (size_t i = 0; i < n; ++i) { - new (typed_ptr + i) T; - } - } - - template - static void _CtorNotDefault(void* /*ptr*/, size_t /*n*/) { - _ThrowRuntimeTypeLogicError( - "Type " + std::string(DemangleType()) + - " is not default-constructible."); - } - - template < - typename T, - typename std::enable_if::value>::type* = - nullptr> - static inline PlacementNew* _PickCtor() { - return _Ctor; - } - - template < - typename T, - typename std::enable_if::value>::type* = - nullptr> - static inline PlacementNew* _PickCtor() { - return _CtorNotDefault; - } - - /** - * Typed copy function for classes. - */ - template - static void _Copy(const void* src, void* dst, size_t n) { - const T* typed_src = static_cast(src); - T* typed_dst = static_cast(dst); - for (size_t i = 0; i < n; ++i) { - typed_dst[i] = typed_src[i]; - } - } - - /** - * A placeholder function for types that do not allow assignment. - */ - template - static void - _CopyNotAllowed(const void* /*src*/, void* /*dst*/, size_t /*n*/) { - _ThrowRuntimeTypeLogicError( - "Type " + std::string(DemangleType()) + - " does not allow assignment."); - } - - template < - typename T, - typename std::enable_if::value>::type* = - nullptr> - static inline TypedCopy* _PickCopy() { - return _Copy; - } - - template < - typename T, - typename std::enable_if::value>::type* = - nullptr> - static inline TypedCopy* _PickCopy() { - return _CopyNotAllowed; - } - - /** - * Destructor for non-fundamental types. - */ - template - static void _Dtor(void* ptr, size_t n) { - T* typed_ptr = static_cast(ptr); - for (size_t i = 0; i < n; ++i) { - typed_ptr[i].~T(); - } - } - - /** - * Returns a TypeMeta object that corresponds to the typename T. - */ - template - static typename std::enable_if< - std::is_fundamental::value || std::is_pointer::value, - TypeMeta>::type - Make() { - return TypeMeta(Id(), ItemSize(), nullptr, nullptr, nullptr); - } - - template - static typename std::enable_if< - !(std::is_fundamental::value || std::is_pointer::value), - TypeMeta>::type - Make() { - return TypeMeta( - Id(), ItemSize(), _PickCtor(), _PickCopy(), _Dtor); - } - - private: - TypeIdentifier id_; - size_t itemsize_; - PlacementNew* ctor_; - TypedCopy* copy_; - TypedDestructor* dtor_; -}; - -inline bool operator==(const TypeMeta& lhs, const TypeMeta& rhs) noexcept { - return (lhs.id_ == rhs.id_); -} -inline bool operator!=(const TypeMeta& lhs, const TypeMeta& rhs) noexcept { - return !operator==(lhs, rhs); -} - -/** - * Register unique id for a type so it can be used in TypeMeta context, e.g. be - * used as a type for Blob or for Tensor elements. - * - * CAFFE_KNOWN_TYPE does explicit instantiation of TypeMeta::Id template - * function and thus needs to be put in a single translation unit (.cpp file) - * for a given type T. Other translation units that use type T as a type of the - * caffe2::Blob or element type of caffe2::Tensor need to depend on the - * translation unit that contains CAFFE_KNOWN_TYPE declaration via regular - * linkage dependencies. - * - * NOTE: the macro needs to be invoked in ::caffe2 namespace - */ -// Implementation note: in MSVC, we will need to prepend the CAFFE2_EXPORT -// keyword in order to get things compiled properly. in Linux, gcc seems to -// create attribute ignored error for explicit template instantiations, see -// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0537r0.html -// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=51930 -// and as a result, we define these two macros slightly differently. - -#ifdef _MSC_VER -#define CAFFE_KNOWN_TYPE(T) \ - template <> \ - CAFFE2_EXPORT TypeIdentifier TypeMeta::Id() { \ - static const TypeIdentifier type_id = TypeIdentifier::createTypeId(); \ - static TypeNameRegisterer registerer(type_id, #T); \ - return type_id; \ - } -#else // _MSC_VER -#define CAFFE_KNOWN_TYPE(T) \ - template <> \ - TypeIdentifier TypeMeta::Id() { \ - static const TypeIdentifier type_id = TypeIdentifier::createTypeId(); \ - static TypeNameRegisterer registerer(type_id, #T); \ - return type_id; \ - } -#endif - -/** - * CAFFE_DECLARE_KNOWN_TYPE and CAFFE_DEFINE_KNOWN_TYPE are used - * to preallocate ids for types that are queried very often so that they - * can be resolved at compile time. Please use CAFFE_KNOWN_TYPE() instead - * for your own types to allocate dynamic ids for them. - */ -#ifdef _MSC_VER -#define CAFFE_DECLARE_KNOWN_TYPE(PreallocatedId, T) \ - template <> \ - inline CAFFE2_EXPORT TypeIdentifier TypeMeta::Id() { \ - return TypeIdentifier(PreallocatedId); \ - } -#else // _MSC_VER -#define CAFFE_DECLARE_KNOWN_TYPE(PreallocatedId, T) \ - template <> \ - inline TypeIdentifier TypeMeta::Id() { \ - return TypeIdentifier(PreallocatedId); \ - } -#endif - -#define CONCAT_IMPL(x, y) x##y -#define MACRO_CONCAT(x, y) CONCAT_IMPL(x, y) - -#define CAFFE_DEFINE_KNOWN_TYPE(T) \ - namespace { \ - TypeNameRegisterer MACRO_CONCAT(registerer, __COUNTER__)( \ - TypeMeta::Id(), \ - #T); \ - } - -class Tensor; - -// Note: we have preallocated the numbers 0-8 so they line up exactly -// with at::ScalarType's numbering. All other numbers do not matter. -// -// Notably, the "uninitialized" type id is 8, not 0, for hysterical raisins. - -struct _CaffeHighestPreallocatedTypeId final {}; - -CAFFE_DECLARE_KNOWN_TYPE(0, uint8_t); -CAFFE_DECLARE_KNOWN_TYPE(1, int8_t); -CAFFE_DECLARE_KNOWN_TYPE(2, int16_t); -CAFFE_DECLARE_KNOWN_TYPE(3, int); -CAFFE_DECLARE_KNOWN_TYPE(4, int64_t); -CAFFE_DECLARE_KNOWN_TYPE(5, at::Half); -CAFFE_DECLARE_KNOWN_TYPE(6, float); -CAFFE_DECLARE_KNOWN_TYPE(7, double); -// 8 = undefined type id - -CAFFE_DECLARE_KNOWN_TYPE(9, Tensor); -CAFFE_DECLARE_KNOWN_TYPE(10, std::string); -CAFFE_DECLARE_KNOWN_TYPE(11, bool); -CAFFE_DECLARE_KNOWN_TYPE(12, uint16_t); -CAFFE_DECLARE_KNOWN_TYPE(13, char); -CAFFE_DECLARE_KNOWN_TYPE(14, std::unique_ptr); -CAFFE_DECLARE_KNOWN_TYPE(15, std::unique_ptr>); -CAFFE_DECLARE_KNOWN_TYPE(16, std::vector); -CAFFE_DECLARE_KNOWN_TYPE(17, std::vector); -CAFFE_DECLARE_KNOWN_TYPE(18, std::vector); -CAFFE_DECLARE_KNOWN_TYPE(19, bool*); -CAFFE_DECLARE_KNOWN_TYPE(20, char*); -CAFFE_DECLARE_KNOWN_TYPE(21, int*); - -#ifdef CAFFE2_UNIQUE_LONG_TYPEMETA -CAFFE_DECLARE_KNOWN_TYPE(22, long); -CAFFE_DECLARE_KNOWN_TYPE(23, std::vector); -#endif // CAFFE2_UNIQUE_LONG_TYPEMETA - -CAFFE_DECLARE_KNOWN_TYPE(24, _CaffeHighestPreallocatedTypeId); -} +// If I omit this header, the Windows build fails. The error message +// was sufficiently bad that I couldn't figure out which downstream file +// was missing the include of common.h. So keep it here for BC. +#include +#include diff --git a/caffe2/core/typeid_test.cc b/caffe2/core/typeid_test.cc index cc62a108de7daf..c2cc42bd803e08 100644 --- a/caffe2/core/typeid_test.cc +++ b/caffe2/core/typeid_test.cc @@ -37,7 +37,7 @@ TEST(TypeMetaTest, Names) { EXPECT_TRUE( string(string_meta.name()) != typeid(string).name()); EXPECT_TRUE( - string(string_meta.name()) == Demangle(typeid(string).name())); + string(string_meta.name()) == at::demangle(typeid(string).name())); #endif // __GXX_RTTI } diff --git a/caffe2/db/CMakeLists.txt b/caffe2/db/CMakeLists.txt index e96d6b79165a0b..d05d87d8107982 100644 --- a/caffe2/db/CMakeLists.txt +++ b/caffe2/db/CMakeLists.txt @@ -5,10 +5,14 @@ set(Caffe2_DB_COMMON_CPU_SRC set(Caffe2_DB_COMMON_GPU_SRC "${CMAKE_CURRENT_SOURCE_DIR}/create_db_op_gpu.cc" ) +set(Caffe2_DB_COMMON_HIP_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/hip/create_db_op_hip.cc" +) # Common files that are always going to be included. list(APPEND Caffe2_CPU_SRCS ${Caffe2_DB_COMMON_CPU_SRC}) list(APPEND Caffe2_GPU_SRCS ${Caffe2_DB_COMMON_GPU_SRC}) +list(APPEND Caffe2_HIP_SRCS ${Caffe2_DB_COMMON_HIP_SRC}) # DB specific files if (USE_LMDB) @@ -25,3 +29,4 @@ endif() set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} PARENT_SCOPE) set(Caffe2_GPU_SRCS ${Caffe2_GPU_SRCS} PARENT_SCOPE) +set(Caffe2_HIP_SRCS ${Caffe2_HIP_SRCS} PARENT_SCOPE) diff --git a/cmake/Codegen.cmake b/cmake/Codegen.cmake index 3829219a933b5d..4e8d2268258416 100644 --- a/cmake/Codegen.cmake +++ b/cmake/Codegen.cmake @@ -22,7 +22,7 @@ install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/../caffe2 FILES_MATCHING PATTERN "*.h") if (NOT BUILD_ATEN) install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/core - DESTINATION include/ATen/core + DESTINATION include/ATen FILES_MATCHING PATTERN "*.h") endif() install(FILES ${CMAKE_BINARY_DIR}/caffe2/core/macros.h diff --git a/setup.py b/setup.py index 3eb6ba3b6c9621..6a23eb852d7072 100644 --- a/setup.py +++ b/setup.py @@ -256,6 +256,7 @@ def patched_link(self, *args, **kwargs): version += '+' + sha[:7] except Exception: pass +print("Building wheel {}-{}".format(package_name, version)) class create_version_file(PytorchCommand): @@ -776,7 +777,6 @@ def run(self): "torch/csrc/autograd/python_variable.cpp", "torch/csrc/autograd/python_variable_indexing.cpp", "torch/csrc/byte_order.cpp", - "torch/csrc/finalizer.cpp", "torch/csrc/jit/batched/BatchTensor.cpp", "torch/csrc/jit/init.cpp", "torch/csrc/jit/ivalue.cpp", diff --git a/test/cpp/api/module.cpp b/test/cpp/api/module.cpp index ea8b37d44db54b..c822560e5f1ea3 100644 --- a/test/cpp/api/module.cpp +++ b/test/cpp/api/module.cpp @@ -289,7 +289,7 @@ TEST_CASE("module/clone") { a->module->weight.data() += 1; a->module->value = 123; - auto b = std::static_pointer_cast(a->clone()); + auto b = std::dynamic_pointer_cast(a->clone()); REQUIRE(!pointer_equal(b->module->weight, a->module->weight)); REQUIRE( diff --git a/test/cpp/api/optim.cpp b/test/cpp/api/optim.cpp index 186159c8e98edf..257bf15d546578 100644 --- a/test/cpp/api/optim.cpp +++ b/test/cpp/api/optim.cpp @@ -35,7 +35,8 @@ bool test_optimizer_xor(Options options) { const int64_t kBatchSize = 4; const int64_t kMaximumNumberOfEpochs = 3000; - auto optimizer = OptimizerClass(model->parameters(), options); + auto optimizer = OptimizerClass(std::vector(), options); + optimizer.add_parameters(model->parameters()); float running_loss = 1; int epoch = 0; @@ -258,3 +259,22 @@ TEST_CASE("Optim/ExternalVectorOfParameters") { REQUIRE(parameters[1].allclose(original_parameters[1] - 1.0)); REQUIRE(parameters[2].allclose(original_parameters[2] - 1.0)); } + +TEST_CASE("Optim/AddParameter/LBFGS") { + torch::manual_seed(0); + + std::vector parameters = {torch::randn({5, 5})}; + std::vector original_parameters = {parameters[0].clone()}; + + // Set all gradients to one + for (auto& parameter : parameters) { + parameter.grad() = torch::ones_like(parameter); + } + + LBFGS optimizer(std::vector(), 1.0); + optimizer.add_parameters(parameters); + + optimizer.step([]() { return torch::tensor(1); }); + + // REQUIRE this doesn't throw +} diff --git a/test/cpp/api/sequential.cpp b/test/cpp/api/sequential.cpp index 4d855cb10c9f85..7d07ccccb5887a 100644 --- a/test/cpp/api/sequential.cpp +++ b/test/cpp/api/sequential.cpp @@ -278,7 +278,7 @@ TEST_CASE("sequential") { SECTION("Is cloneable") { Sequential sequential(Linear(3, 4), Functional(torch::relu), BatchNorm(3)); Sequential clone = - std::static_pointer_cast(sequential->clone()); + std::dynamic_pointer_cast(sequential->clone()); REQUIRE(sequential->size() == clone->size()); for (size_t i = 0; i < sequential->size(); ++i) { @@ -309,7 +309,7 @@ TEST_CASE("sequential/clone-to-device", "[cuda]") { Sequential sequential(Linear(3, 4), Functional(torch::relu), BatchNorm(3)); torch::Device device(torch::kCUDA, 0); Sequential clone = - std::static_pointer_cast(sequential->clone(device)); + std::dynamic_pointer_cast(sequential->clone(device)); for (const auto& p : clone->parameters()) { REQUIRE(p->device() == device); } diff --git a/test/cpp/api/serialization.cpp b/test/cpp/api/serialization.cpp index 5cc8cc9e7d27b7..7f8bbe27419231 100644 --- a/test/cpp/api/serialization.cpp +++ b/test/cpp/api/serialization.cpp @@ -228,6 +228,14 @@ TEST_CASE("serialization") { ss.seekg(0, std::ios::beg); torch::load(ss, model3.get()); + auto param1 = model1->parameters(); + auto param2 = model2->parameters(); + auto param3 = model3->parameters(); + for (const auto& p : param1) { + REQUIRE(param1[p.key].allclose(param2[p.key])); + REQUIRE(param2[p.key].allclose(param3[p.key])); + } + // Make some optimizers with momentum (and thus state) auto optim1 = torch::optim::SGD( model1->parameters(), torch::optim::SGDOptions(1e-1).momentum(0.9)); @@ -240,9 +248,9 @@ TEST_CASE("serialization") { auto optim3_2 = torch::optim::SGD( model3->parameters(), torch::optim::SGDOptions(1e-1).momentum(0.9)); - auto x = torch::ones({10, 5}, torch::requires_grad()); + auto x = torch::ones({10, 5}); - auto step = [&](torch::optim::Optimizer& optimizer, Linear model) { + auto step = [&x](torch::optim::Optimizer& optimizer, Linear model) { optimizer.zero_grad(); auto y = model->forward(x).sum(); y.backward(); @@ -264,11 +272,11 @@ TEST_CASE("serialization") { torch::load(ss, &optim3_2); step(optim3_2, model3); - auto param1 = model1->parameters(); - auto param2 = model2->parameters(); - auto param3 = model3->parameters(); - for (auto& p : param1) { - auto& name = p.key; + param1 = model1->parameters(); + param2 = model2->parameters(); + param3 = model3->parameters(); + for (const auto& p : param1) { + const auto& name = p.key; // Model 1 and 3 should be the same REQUIRE(param1[name].norm().toCFloat() == param3[name].norm().toCFloat()); REQUIRE(param1[name].norm().toCFloat() != param2[name].norm().toCFloat()); diff --git a/tools/amd_build/build_caffe2_amd.py b/tools/amd_build/build_caffe2_amd.py index 9726bc2ebed542..10f72d999cd0d3 100755 --- a/tools/amd_build/build_caffe2_amd.py +++ b/tools/amd_build/build_caffe2_amd.py @@ -17,6 +17,7 @@ "caffe2/queue/*", "caffe2/**/*_test*", "caffe2/core/THCCachingAllocator*", + "caffe2/db/*", ] ignores = [ diff --git a/tools/autograd/templates/VariableType.cpp b/tools/autograd/templates/VariableType.cpp index 4713581728ebb5..75a59063842911 100644 --- a/tools/autograd/templates/VariableType.cpp +++ b/tools/autograd/templates/VariableType.cpp @@ -43,7 +43,7 @@ using namespace torch::autograd::generated; namespace torch { namespace autograd { VariableType::VariableType(Context* context, Type* baseType) - : Type(context, /*is_variable=*/true, /*is_undefined=*/false) + : Type(context, baseType->type_id(), /*is_variable=*/true, /*is_undefined=*/false) , baseType(baseType) , id_(context->freshTypeID()) { str = std::string("Variable[") + baseType->toString() + "]"; diff --git a/torch/csrc/Storage.cpp b/torch/csrc/Storage.cpp index 9838a282d90b35..4aae5f1c2fd494 100644 --- a/torch/csrc/Storage.cpp +++ b/torch/csrc/Storage.cpp @@ -13,7 +13,6 @@ // See Note [TH abstraction violation] // - Used to get at the allocator associated with a storage #include -#include #include #include "THP.h" #include "copy_utils.h" diff --git a/torch/csrc/api/include/torch/nn/cloneable.h b/torch/csrc/api/include/torch/nn/cloneable.h index 759a3341511205..feb4baebaece16 100644 --- a/torch/csrc/api/include/torch/nn/cloneable.h +++ b/torch/csrc/api/include/torch/nn/cloneable.h @@ -21,7 +21,7 @@ namespace nn { /// `clone()` method. We do not want to use this pattern in the base class, /// because then storing a module would always require templatizing it. template -class Cloneable : public Module { +class Cloneable : public virtual Module { public: using Module::Module; diff --git a/torch/csrc/api/include/torch/nn/module.h b/torch/csrc/api/include/torch/nn/module.h index 0254f3dce5fc4e..e8140659579afd 100644 --- a/torch/csrc/api/include/torch/nn/module.h +++ b/torch/csrc/api/include/torch/nn/module.h @@ -205,7 +205,7 @@ std::shared_ptr Module::register_module( std::string name, std::shared_ptr module) { auto& base_module = children_.insert(std::move(name), std::move(module)); - return std::static_pointer_cast(base_module); + return std::dynamic_pointer_cast(base_module); } template diff --git a/torch/csrc/api/include/torch/nn/modules/any.h b/torch/csrc/api/include/torch/nn/modules/any.h index 864f7245fcb69c..f95e81d636afe2 100644 --- a/torch/csrc/api/include/torch/nn/modules/any.h +++ b/torch/csrc/api/include/torch/nn/modules/any.h @@ -315,7 +315,7 @@ struct AnyModule::Holder : public AnyModule::Placeholder { std::unique_ptr clone( at::optional device) const override { return torch::make_unique( - std::static_pointer_cast(module->clone(device))); + std::dynamic_pointer_cast(module->clone(device))); } /// The actual concrete module instance. diff --git a/torch/csrc/api/include/torch/optim/adagrad.h b/torch/csrc/api/include/torch/optim/adagrad.h index 0e2b2be251b497..6e64f309f18b3d 100644 --- a/torch/csrc/api/include/torch/optim/adagrad.h +++ b/torch/csrc/api/include/torch/optim/adagrad.h @@ -29,13 +29,13 @@ class Adagrad : public Optimizer { ParameterContainer&& parameters, const AdagradOptions& options) : Optimizer(std::forward(parameters)), - options_(options), + options(options), sum_(zero_buffers_like(parameters_)), step_(parameters_.size(), 0) {} void step() override; - const AdagradOptions& options() const noexcept; + AdagradOptions options; template void serialize(Archive& ar) { @@ -45,12 +45,10 @@ class Adagrad : public Optimizer { private: friend class cereal::access; - Adagrad() : options_(0) {} - - AdagradOptions options_; + Adagrad() : options(0) {} std::vector sum_; - std::vector step_; + std::vector step_; }; } // namespace optim } // namespace torch diff --git a/torch/csrc/api/include/torch/optim/adam.h b/torch/csrc/api/include/torch/optim/adam.h index 5bf3ef04f0e110..7ad3a5190ce993 100644 --- a/torch/csrc/api/include/torch/optim/adam.h +++ b/torch/csrc/api/include/torch/optim/adam.h @@ -30,11 +30,11 @@ class Adam : public Optimizer { template explicit Adam(ParameterContainer&& parameters, const AdamOptions& options) : Optimizer(std::forward(parameters)), - options_(options), + options(options), step_buffers_(parameters_.size(), 0), exp_average_buffers_(zero_buffers_like(parameters_)), exp_average_sq_buffers_(zero_buffers_like(parameters_)) { - if (options_.amsgrad_) { + if (options.amsgrad_) { max_exp_average_sq_buffers_ = zero_buffers_like(parameters_); } } @@ -49,13 +49,11 @@ class Adam : public Optimizer { CEREAL_NVP(max_exp_average_sq_buffers_)); } - const AdamOptions& options() const noexcept; + AdamOptions options; private: friend class cereal::access; - Adam() : options_(0) {} - - AdamOptions options_; + Adam() : options(0) {} std::vector step_buffers_; std::vector exp_average_buffers_; diff --git a/torch/csrc/api/include/torch/optim/lbfgs.h b/torch/csrc/api/include/torch/optim/lbfgs.h index fe969c84677e73..d812362ccfbe86 100644 --- a/torch/csrc/api/include/torch/optim/lbfgs.h +++ b/torch/csrc/api/include/torch/optim/lbfgs.h @@ -31,13 +31,13 @@ class LBFGS : public LossClosureOptimizer { template explicit LBFGS(ParameterContainer&& parameters, const LBFGSOptions& options) : LossClosureOptimizer(std::forward(parameters)), - options_(options), - ro(options_.history_size_), - al(options_.history_size_) {} + options(options), + ro(options.history_size_), + al(options.history_size_) {} torch::Tensor step(LossClosure closure) override; - const LBFGSOptions& options() const noexcept; + LBFGSOptions options; template void serialize(Archive& ar) { @@ -52,13 +52,11 @@ class LBFGS : public LossClosureOptimizer { private: friend class cereal::access; - LBFGS() : options_(0) {} + LBFGS() : options(0) {} at::Tensor gather_flat_grad(); void add_grad(const torch::Scalar& step_size, const at::Tensor& update); - LBFGSOptions options_; - at::Tensor d{torch::empty({0})}; at::Tensor H_diag{torch::empty({0})}; at::Tensor prev_flat_grad{torch::empty({0})}; diff --git a/torch/csrc/api/include/torch/optim/optimizer.h b/torch/csrc/api/include/torch/optim/optimizer.h index eed600ab759bc4..4f56c1f67236e2 100644 --- a/torch/csrc/api/include/torch/optim/optimizer.h +++ b/torch/csrc/api/include/torch/optim/optimizer.h @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -64,9 +65,28 @@ class OptimizerBase { return result; } + /// Accesses a buffer at the given index. + /// Additionally, zeros out the buffers when this is called on the index + template + T& buffer_at(std::vector& buffers, size_t index) { + if (buffers.size() <= index) { + const auto old_size = buffers.size(); + buffers.resize(index + 1); + std::fill(buffers.begin() + old_size, buffers.end(), T{0}); + } + return buffers[index]; + } + /// Accesses a buffer at the given index, converts it to the type of the /// parameter at the corresponding index (a no-op if they match). + /// Additionally, zeros out the buffers when this is called on the index Tensor& buffer_at(std::vector& buffers, size_t index) { + if (buffers.size() <= index) { + for (auto i = buffers.size(); i <= index; i++) { + buffers.push_back(torch::zeros_like(parameters_.at(i))); + } + } + // Copy the buffer to the device and dtype of the parameter. const auto& parameter = parameters_.at(index); const auto& buffer = buffers.at(index); if (buffer.device() != parameter.device() || diff --git a/torch/csrc/api/include/torch/optim/rmsprop.h b/torch/csrc/api/include/torch/optim/rmsprop.h index e51cacc586db37..5077536a97a78f 100644 --- a/torch/csrc/api/include/torch/optim/rmsprop.h +++ b/torch/csrc/api/include/torch/optim/rmsprop.h @@ -33,7 +33,7 @@ class RMSprop : public Optimizer { ParameterContainer&& parameters, const RMSpropOptions& options) : Optimizer(std::forward(parameters)), - options_(options), + options(options), square_average_buffers_(zero_buffers_like(parameters_)) { if (options.centered_ > 0) { grad_average_buffers_ = zero_buffers_like(parameters_); @@ -45,7 +45,7 @@ class RMSprop : public Optimizer { void step() override; - const RMSpropOptions& options() const noexcept; + RMSpropOptions options; template void serialize(Archive& ar) { @@ -56,9 +56,7 @@ class RMSprop : public Optimizer { private: friend class cereal::access; - RMSprop() : options_(0) {} - - RMSpropOptions options_; + RMSprop() : options(0) {} std::vector square_average_buffers_; std::vector momentum_buffers_; diff --git a/torch/csrc/api/include/torch/optim/sgd.h b/torch/csrc/api/include/torch/optim/sgd.h index 47196074d380ee..9f58e4a7232915 100644 --- a/torch/csrc/api/include/torch/optim/sgd.h +++ b/torch/csrc/api/include/torch/optim/sgd.h @@ -30,8 +30,8 @@ class SGD : public Optimizer { template explicit SGD(ParameterContainer&& parameters, const SGDOptions& options) : Optimizer(std::forward(parameters)), - options_(options) { - if (options_.momentum_ > 0) { + options(options) { + if (options.momentum_ > 0) { momentum_buffers_ = zero_buffers_like(parameters_); } } @@ -43,13 +43,12 @@ class SGD : public Optimizer { ar(CEREAL_NVP(momentum_buffers_)); } - const SGDOptions& options() const noexcept; + SGDOptions options; private: friend class cereal::access; - SGD() : options_(0) {} + SGD() : options(0) {} - SGDOptions options_; std::vector momentum_buffers_; /// Counts how often `step()` is called, for dampening. size_t iteration_{0}; diff --git a/torch/csrc/api/include/torch/serialization.h b/torch/csrc/api/include/torch/serialization.h index 61b5b53b59b331..d28930a3ddb31e 100644 --- a/torch/csrc/api/include/torch/serialization.h +++ b/torch/csrc/api/include/torch/serialization.h @@ -4,6 +4,7 @@ #include #include +#include #include "cereal/archives/binary.hpp" #include "cereal/types/polymorphic.hpp" @@ -168,12 +169,13 @@ loadBinary(BinaryInputArchive& archive, void* data, size_t size) { // Gradients will not be saved for variables template void save(Archive& archive, torch::Tensor const& tensor) { + torch::NoGradGuard guard; if (!tensor.defined()) { int32_t typeId = ::torch::detail::scalarTypeId(torch::Dtype::Undefined); archive(CEREAL_NVP(typeId)); return; } else { - int32_t typeId = ::torch::detail::scalarTypeId(tensor.data().type().scalarType()); + int32_t typeId = ::torch::detail::scalarTypeId(tensor.dtype()); archive(CEREAL_NVP(typeId)); } auto sizes = std::vector(); @@ -199,6 +201,7 @@ void save(Archive& archive, torch::Tensor const& tensor) { **/ template void load(Archive& archive, torch::Tensor& tensor) { + torch::NoGradGuard guard; torch::Dtype type; int32_t typeId; archive(CEREAL_NVP(typeId)); @@ -214,19 +217,19 @@ void load(Archive& archive, torch::Tensor& tensor) { archive(CEREAL_NVP(backendId), CEREAL_NVP(sizes)); at::Backend backend = ::torch::detail::backendFromId(backendId); - if (!tensor.defined() || tensor.data().type().scalarType() != type) { + if (!tensor.defined() || tensor.dtype() != type) { tensor = torch::empty({}, torch::getType(backend, type)); } tensor.data().resize_(sizes); if (tensor.type().is_cuda()) { // should actually use cudamemcpy probably - auto cputensor = torch::empty(sizes, tensor.data().type().scalarType()); + auto cputensor = torch::empty(sizes, tensor.dtype()); agimpl::loadBinary( archive, cputensor.data_ptr(), cputensor.numel() * cputensor.type().elementSizeInBytes()); - tensor.copy_(cputensor); + tensor.data().copy_(cputensor.data()); } else { agimpl::loadBinary( archive, diff --git a/torch/csrc/api/src/optim/adagrad.cpp b/torch/csrc/api/src/optim/adagrad.cpp index 7d87e0c3a03914..d0fa9afd79966d 100644 --- a/torch/csrc/api/src/optim/adagrad.cpp +++ b/torch/csrc/api/src/optim/adagrad.cpp @@ -12,10 +12,6 @@ namespace optim { AdagradOptions::AdagradOptions(double learning_rate) : learning_rate_(learning_rate) {} -const AdagradOptions& Adagrad::options() const noexcept { - return options_; -} - /// Adapted from /// https://github.com/pytorch/pytorch/blob/master/torch/optim/adagrad.py void Adagrad::step() { @@ -26,16 +22,16 @@ void Adagrad::step() { continue; auto d_p = Tensor(grad).data(); - if (options_.weight_decay_ > 0) { - d_p.add_(p, options_.weight_decay_); + if (options.weight_decay_ > 0) { + d_p.add_(p, options.weight_decay_); } - step_.at(i) += 1.0; - auto clr = options_.learning_rate_ / - (1.0 + (step_.at(i) - 1.0) * options_.lr_decay_); + buffer_at(step_, i) += 1.0; + auto clr = options.learning_rate_ / + (1.0 + (buffer_at(step_, i) - 1.0) * options.lr_decay_); auto sum = buffer_at(sum_, i); sum.data().addcmul_(d_p, d_p, 1.0); - auto std = sum_.at(i).data().sqrt().add_(1e-10); + auto std = buffer_at(sum_, i).data().sqrt().add_(1e-10); p.addcdiv_(d_p, std, -clr); } } diff --git a/torch/csrc/api/src/optim/adam.cpp b/torch/csrc/api/src/optim/adam.cpp index e05b81ef7dd8eb..48a0c6a8562e9a 100644 --- a/torch/csrc/api/src/optim/adam.cpp +++ b/torch/csrc/api/src/optim/adam.cpp @@ -14,10 +14,6 @@ namespace optim { AdamOptions::AdamOptions(double learning_rate) : learning_rate_(learning_rate) {} -const AdamOptions& Adam::options() const noexcept { - return options_; -} - void Adam::step() { for (size_t i = 0; i < parameters_.size(); ++i) { auto& grad = parameters_.at(i).grad(); @@ -29,32 +25,32 @@ void Adam::step() { auto exp_average = buffer_at(exp_average_buffers_, i).data(); auto exp_average_sq = buffer_at(exp_average_sq_buffers_, i).data(); - step_buffers_.at(i) += 1; + buffer_at(step_buffers_, i) += 1; auto d_p = torch::autograd::as_variable_ref(grad).data(); - if (options_.weight_decay_ > 0) { - d_p.add_(p, options_.weight_decay_); + if (options.weight_decay_ > 0) { + d_p.add_(p, options.weight_decay_); } - exp_average.mul_(options_.beta1_).add_(d_p, 1 - options_.beta1_); - exp_average_sq.mul_(options_.beta2_) - .addcmul_(d_p, d_p, 1 - options_.beta2_); + exp_average.mul_(options.beta1_).add_(d_p, 1 - options.beta1_); + exp_average_sq.mul_(options.beta2_) + .addcmul_(d_p, d_p, 1 - options.beta2_); at::Tensor denom; - if (options_.amsgrad_) { + if (options.amsgrad_) { auto max_exp_average_sq = buffer_at(max_exp_average_sq_buffers_, i).data(); torch::max_out(max_exp_average_sq, max_exp_average_sq, exp_average_sq); - denom = max_exp_average_sq.sqrt().add_(options_.eps_); + denom = max_exp_average_sq.sqrt().add_(options.eps_); } else { - denom = exp_average_sq.sqrt().add_(options_.eps_); + denom = exp_average_sq.sqrt().add_(options.eps_); } const auto bias_correction1 = - 1 - std::pow(options_.beta1_, step_buffers_.at(i)); + 1 - std::pow(options.beta1_, buffer_at(step_buffers_, i)); const auto bias_correction2 = - 1 - std::pow(options_.beta2_, step_buffers_.at(i)); - const auto step_size = options_.learning_rate_ * + 1 - std::pow(options.beta2_, buffer_at(step_buffers_, i)); + const auto step_size = options.learning_rate_ * std::sqrt(bias_correction2) / bias_correction1; p.addcdiv_(exp_average, denom, -step_size); diff --git a/torch/csrc/api/src/optim/lbfgs.cpp b/torch/csrc/api/src/optim/lbfgs.cpp index 8048abc51ed793..3aaf8a63ae2d62 100644 --- a/torch/csrc/api/src/optim/lbfgs.cpp +++ b/torch/csrc/api/src/optim/lbfgs.cpp @@ -15,10 +15,6 @@ namespace optim { LBFGSOptions::LBFGSOptions(double learning_rate) : learning_rate_(learning_rate) {} -const LBFGSOptions& LBFGS::options() const noexcept { - return options_; -} - at::Tensor LBFGS::gather_flat_grad() { std::vector views; for (auto& parameter : parameters_) { @@ -46,14 +42,14 @@ torch::Tensor LBFGS::step(LossClosure closure) { at::Tensor flat_grad = gather_flat_grad(); torch::Scalar abs_grad_sum = torch::Scalar(flat_grad.abs().sum()); - if (torch::Scalar(abs_grad_sum).toFloat() <= options_.tolerance_grad_) { + if (torch::Scalar(abs_grad_sum).toFloat() <= options.tolerance_grad_) { return loss; } at::Tensor ONE = flat_grad.type().scalarTensor(1); int64_t n_iter = 0; - while (n_iter < options_.max_iter_) { + while (n_iter < options.max_iter_) { n_iter++; state_n_iter++; @@ -69,7 +65,7 @@ torch::Tensor LBFGS::step(LossClosure closure) { if (ys.toFloat() > 1e-10) { // updating memory - if (old_dirs.size() == options_.history_size_) { + if (old_dirs.size() == options.history_size_) { // shift history by one (limited memory) old_dirs.pop_front(); old_stps.pop_front(); @@ -114,15 +110,15 @@ torch::Tensor LBFGS::step(LossClosure closure) { // reset initial guess for step size if (n_iter == 1) { t = torch::Scalar( - at::min(ONE, ONE / abs_grad_sum) * options_.learning_rate_); + at::min(ONE, ONE / abs_grad_sum) * options.learning_rate_); } else { - t = options_.learning_rate_; + t = options.learning_rate_; } torch::Scalar gtd = torch::Scalar(flat_grad.dot(d)); add_grad(t, d); int64_t ls_func_evals = 0; - if (n_iter != options_.max_iter_) { + if (n_iter != options.max_iter_) { // re-evaluate function only if not in last iteration // the reason we do this: in a stochastic setting, // no use to re-evaluate that function here @@ -138,21 +134,21 @@ torch::Tensor LBFGS::step(LossClosure closure) { * Check conditions */ - if (n_iter == options_.max_iter_) { + if (n_iter == options.max_iter_) { break; - } else if (current_evals >= options_.max_eval_) { + } else if (current_evals >= options.max_eval_) { break; - } else if (abs_grad_sum.toFloat() <= options_.tolerance_grad_) { + } else if (abs_grad_sum.toFloat() <= options.tolerance_grad_) { break; - } else if (gtd.toFloat() > -options_.tolerance_grad_) { + } else if (gtd.toFloat() > -options.tolerance_grad_) { break; } else if ( torch::Scalar(d.mul(t).abs_().sum()).toFloat() <= - options_.tolerance_change_) { + options.tolerance_change_) { break; } else if ( std::abs(loss.toCFloat() - prev_loss.toFloat()) < - options_.tolerance_change_) { + options.tolerance_change_) { break; } } diff --git a/torch/csrc/api/src/optim/rmsprop.cpp b/torch/csrc/api/src/optim/rmsprop.cpp index abf2bf9a1f932b..e4d1eaa95418ac 100644 --- a/torch/csrc/api/src/optim/rmsprop.cpp +++ b/torch/csrc/api/src/optim/rmsprop.cpp @@ -12,10 +12,6 @@ namespace optim { RMSpropOptions::RMSpropOptions(double learning_rate) : learning_rate_(learning_rate) {} -const RMSpropOptions& RMSprop::options() const noexcept { - return options_; -} - /// Adapted from /// https://github.com/pytorch/pytorch/blob/master/torch/optim/rmsprop.py void RMSprop::step() { @@ -27,31 +23,31 @@ void RMSprop::step() { } auto d_p = torch::autograd::as_variable_ref(grad).data(); - if (options_.weight_decay_ > 0) { - d_p.add_(p, options_.weight_decay_); + if (options.weight_decay_ > 0) { + d_p.add_(p, options.weight_decay_); } auto square_average = buffer_at(square_average_buffers_, i).data(); - square_average.mul_(options_.alpha_) - .addcmul_(d_p, d_p, 1.0 - options_.alpha_); + square_average.mul_(options.alpha_) + .addcmul_(d_p, d_p, 1.0 - options.alpha_); at::Tensor average; - if (options_.centered_ > 0) { + if (options.centered_ > 0) { auto grad_average = buffer_at(grad_average_buffers_, i).data(); - grad_average.mul_(options_.alpha_).add_(d_p, 1.0 - options_.alpha_); + grad_average.mul_(options.alpha_).add_(d_p, 1.0 - options.alpha_); average = square_average.addcmul(grad_average, grad_average, -1.0) .sqrt() - .add_(options_.eps_); + .add_(options.eps_); } else { - average = square_average.sqrt().add_(options_.eps_); + average = square_average.sqrt().add_(options.eps_); } - if (options_.momentum_ > 0) { + if (options.momentum_ > 0) { auto momentum = buffer_at(momentum_buffers_, i).data(); - momentum.mul_(options_.momentum_).addcdiv_(d_p, average); - p.add_(momentum, -options_.learning_rate_); + momentum.mul_(options.momentum_).addcdiv_(d_p, average); + p.add_(momentum, -options.learning_rate_); } else { - p.addcdiv_(d_p, average, -options_.learning_rate_); + p.addcdiv_(d_p, average, -options.learning_rate_); } } } diff --git a/torch/csrc/api/src/optim/sgd.cpp b/torch/csrc/api/src/optim/sgd.cpp index 4b8ee7ae12822a..9948c12c1c3d3c 100644 --- a/torch/csrc/api/src/optim/sgd.cpp +++ b/torch/csrc/api/src/optim/sgd.cpp @@ -10,10 +10,6 @@ namespace torch { namespace optim { SGDOptions::SGDOptions(double learning_rate) : learning_rate_(learning_rate) {} -const SGDOptions& SGD::options() const noexcept { - return options_; -} - void SGD::step() { for (size_t i = 0; i < parameters_.size(); ++i) { auto& grad = parameters_.at(i).grad(); @@ -24,25 +20,25 @@ void SGD::step() { } auto d_p = torch::Tensor(grad).data(); - if (options_.weight_decay_ > 0) { - d_p.add_(p, options_.weight_decay_); + if (options.weight_decay_ > 0) { + d_p.add_(p, options.weight_decay_); } - if (options_.momentum_ != 0) { + if (options.momentum_ != 0) { auto momentum = buffer_at(momentum_buffers_, i).data(); if (iteration_ == 0) { - momentum.mul_(options_.momentum_).add_(d_p); + momentum.mul_(options.momentum_).add_(d_p); } else { - momentum.mul_(options_.momentum_).add_(d_p, 1 - options_.dampening_); + momentum.mul_(options.momentum_).add_(d_p, 1 - options.dampening_); } - if (options_.nesterov_) { - d_p = d_p.add(momentum, options_.momentum_); + if (options.nesterov_) { + d_p = d_p.add(momentum, options.momentum_); } else { d_p = momentum; } } - p.add_(d_p, -options_.learning_rate_); + p.add_(d_p, -options.learning_rate_); } iteration_ += 1; } diff --git a/torch/csrc/autograd/variable.cpp b/torch/csrc/autograd/variable.cpp index f8c88c7ddcdde5..9ebcdf6df993d3 100644 --- a/torch/csrc/autograd/variable.cpp +++ b/torch/csrc/autograd/variable.cpp @@ -22,7 +22,7 @@ namespace torch { namespace autograd { Variable::Impl::Impl(at::Tensor data, bool requires_grad, Edge gradient_edge) - : TensorImpl(data.type().backend(), data.type().scalarType(), nullptr, /* is variable */ true), + : TensorImpl(data.type().type_id(), data.type().scalarType(), nullptr, /* is variable */ true), data_(std::move(data)), grad_fn_(std::move(gradient_edge.function)), requires_grad_(false), @@ -93,8 +93,7 @@ Tensor Variable::Impl::detach() const { void Variable::Impl::detach_() { if (is_view_) { - throw std::runtime_error( - "Can't detach views in-place. Use detach() instead"); + AT_ERROR("Can't detach views in-place. Use detach() instead"); } set_requires_grad(false); grad_fn_.reset(); @@ -131,7 +130,7 @@ void Variable::Impl::set_data(Tensor new_data) { // Updates metadata scalar_type_ = new_data.type().scalarType(); - backend_ = new_data.type().backend(); + type_id_ = new_data.type().type_id(); is_variable_ = true; data_ = std::move(new_data); } diff --git a/torch/csrc/autograd/variable.h b/torch/csrc/autograd/variable.h index 5fbfcf5f2bb8c0..32c33a87e2c503 100644 --- a/torch/csrc/autograd/variable.h +++ b/torch/csrc/autograd/variable.h @@ -298,7 +298,7 @@ struct Variable::Impl : public at::TensorImpl { } /// Accesses the gradient `Variable` of this `Variable`. - Tensor& grad() override { + Variable& grad() override { return grad_; } const Variable& grad() const override { diff --git a/torch/csrc/cuda/Storage.cpp b/torch/csrc/cuda/Storage.cpp index f767e94ccc38f4..da6f15c10f1d14 100644 --- a/torch/csrc/cuda/Storage.cpp +++ b/torch/csrc/cuda/Storage.cpp @@ -11,7 +11,6 @@ #include "THCP.h" #include "override_macros.h" -#include "torch/csrc/finalizer.h" #include "torch/csrc/copy_utils.h" #include "DynamicTypes.h" diff --git a/torch/csrc/finalizer.cpp b/torch/csrc/finalizer.cpp deleted file mode 100644 index b18f4f7bb095d2..00000000000000 --- a/torch/csrc/finalizer.cpp +++ /dev/null @@ -1,6 +0,0 @@ -#include -#include - -namespace torch { - -} // namespace torch diff --git a/torch/csrc/finalizer.h b/torch/csrc/finalizer.h deleted file mode 100644 index 13b9fa5e7bdd04..00000000000000 --- a/torch/csrc/finalizer.h +++ /dev/null @@ -1,30 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -namespace torch { - -struct PyObjectFinalizer : public THFinalizer { - THPObjectPtr pyobj_; - // TODO: This recursive structure can lead to a stack overflow if you - // put too many finalizers on the same object - std::unique_ptr next_; - PyObjectFinalizer(PyObject* pyobj) { - Py_XINCREF(pyobj); - pyobj_ = pyobj; - } - void operator()() override { - if (next_) { (*next_)(); } - } - ~PyObjectFinalizer() { - // We must manually ensure that we have the GIL before - // pyobj gets destroyed... - AutoGIL gil; - pyobj_ = nullptr; - } -}; - -} // namespace torch diff --git a/torch/csrc/generic/StorageSharing.cpp b/torch/csrc/generic/StorageSharing.cpp index dbbdedb03e17ba..c6e949a2085739 100644 --- a/torch/csrc/generic/StorageSharing.cpp +++ b/torch/csrc/generic/StorageSharing.cpp @@ -291,45 +291,23 @@ static PyObject * THPStorage_(newSharedCuda)(PyObject *_unused, PyObject *args) // pointer. // // NB: This does NOT preserve object identity when you call it multiple times -static PyObject * THPStorage_(weakRef)(THPStorage *self, PyObject *weak_ref_class) { +static PyObject * THPStorage_(weakRef)(THPStorage *self, PyObject *args) { HANDLE_TH_ERRORS THStorage* storage = self->cdata; - THStorage_weakRetain(storage); - - THPObjectPtr args(Py_BuildValue("(N)", PyLong_FromVoidPtr(storage))); - if (!args) return NULL; - THPObjectPtr ref(PyObject_Call(weak_ref_class, args, NULL)); - if (!ref) return NULL; - - // We need to also add a finalizer with an owning reference to the weak class, - // so that we can keep the "weak" object live until it should actually be - // cleared form the map. - // Access to storage->finalizer protected by GIL - torch::PyObjectFinalizer* finalizer = new torch::PyObjectFinalizer(ref.get()); - std::swap(storage->finalizer_, finalizer->next_); - storage->finalizer_.reset(finalizer); - - return ref.release(); + return PyLong_FromVoidPtr(storage); END_HANDLE_TH_ERRORS } PyObject * THPStorage_(newWithWeakPtr)(PyObject *_unused, PyObject *arg) { HANDLE_TH_ERRORS - THPObjectPtr ref(PyObject_GetAttrString(arg, "cdata")); - if (!ref) { - return NULL; - } else if (ref.get() == Py_None) { - Py_RETURN_NONE; - } - THPUtils_assert(THPUtils_checkLong(ref.get()), - "_new_with_weak_ptr(): arg.cdata must be an 'int'"); - THStorage *weak_storage = (THStorage*)PyLong_AsVoidPtr(ref.get()); + THPUtils_assert(THPUtils_checkLong(arg), + "_new_with_weak_ptr(): arg must be an 'int'"); + THStorage *weak_storage = (THStorage*)PyLong_AsVoidPtr(arg); if (auto* storage = THStorage_weakLock(weak_storage)) { return THPStorage_(New)(storage); } - Py_RETURN_NONE; END_HANDLE_TH_ERRORS } @@ -349,6 +327,15 @@ PyObject * THPStorage_(freeWeakRef)(PyObject *_unused, PyObject *arg) END_HANDLE_TH_ERRORS } +PyObject * THPStorage_(expired)(PyObject *_unused, PyObject *arg) +{ + HANDLE_TH_ERRORS + THPUtils_assert(THPUtils_checkLong(arg), "_expired(): arg must be an 'int'"); + THStorage *weak_storage = (THStorage*)PyLong_AsVoidPtr(arg); + return PyBool_FromLong(weak_storage->use_count() == 0); + END_HANDLE_TH_ERRORS +} + PyObject * THPStorage_(sharedFd)(THPStorage *self) { HANDLE_TH_ERRORS @@ -390,8 +377,9 @@ static PyMethodDef THPStorage_(sharingMethods)[] = { {"_new_shared_filename", (PyCFunction)THPStorage_(newSharedFilename), METH_VARARGS | METH_STATIC, NULL}, {"_new_using_filename", (PyCFunction)THPStorage_(pyNewFilenameStorage), METH_VARARGS | METH_STATIC, NULL}, #endif - {"_weak_ref", (PyCFunction)THPStorage_(weakRef), METH_O, NULL}, + {"_weak_ref", (PyCFunction)THPStorage_(weakRef), METH_NOARGS, NULL}, {"_free_weak_ref", (PyCFunction)THPStorage_(freeWeakRef), METH_O | METH_STATIC, NULL}, + {"_expired", (PyCFunction)THPStorage_(expired), METH_O | METH_STATIC, NULL}, {"_shared_decref", (PyCFunction)THPStorage_(sharedDecref), METH_NOARGS, NULL}, {"_shared_incref", (PyCFunction)THPStorage_(sharedIncref), METH_NOARGS, NULL}, {"_get_shared_fd", (PyCFunction)THPStorage_(sharedFd), METH_NOARGS, NULL}, diff --git a/torch/csrc/jit/interpreter.cpp b/torch/csrc/jit/interpreter.cpp index 48f6e1100ac2bc..3654816d479ad2 100644 --- a/torch/csrc/jit/interpreter.cpp +++ b/torch/csrc/jit/interpreter.cpp @@ -337,7 +337,7 @@ struct PreprocessGraph { struct ContainerTensor : public at::TensorImpl { public: ContainerTensor() - : TensorImpl(at::Backend::Undefined,at::ScalarType::Undefined, nullptr, /* is_variable */ false) {} + : TensorImpl(at::UndefinedTensorId(),at::ScalarType::Undefined, nullptr, /* is_variable */ false) {} virtual ~ContainerTensor() = default; virtual at::IntList sizes() const override { diff --git a/torch/multiprocessing/reductions.py b/torch/multiprocessing/reductions.py index 2cac31c75cde48..a346d56934b175 100644 --- a/torch/multiprocessing/reductions.py +++ b/torch/multiprocessing/reductions.py @@ -1,6 +1,7 @@ import torch import os import weakref +import threading import multiprocessing from multiprocessing.reduction import ForkingPickler import sys @@ -15,19 +16,54 @@ pass -class StorageRef(object): - # An object with a cdata field which may be set to None. We subclass object - # instead of using a dict() to support weak references. +class StorageWeakRef(object): + r"""A weak reference to a Storage. - def __init__(self, ptr): - self.cdata = ptr + The cdata member is a Python number containing the integer representation of + the Storage pointer.""" + + def __init__(self, storage): + self.cdata = storage._weak_ref() + # Save a direct reference to _free_weak_ref because the `torch` module + # might be cleared during Python shutdown before this module is cleared. + self._free_weak_ref = torch.Storage._free_weak_ref + + def expired(self): + return torch.Storage._expired(self.cdata) def __del__(self): - torch.Storage._free_weak_ref(self.cdata) + self._free_weak_ref(self.cdata) + + +class SharedCache(dict): + """dictionary from multiprocessing handles to StorageWeakRef""" + + def __init__(self): + # free_dead_references() is called if the len exceeds the currrent + # limit. The limit scales with the number of remaining live objects. + self.limit = 128 + self.lock = threading.Lock() + + def __setitem__(self, key, storage_ref): + dict.__setitem__(self, key, storage_ref) + if len(self) > self.limit: + self.free_dead_references() + + def free_dead_references(self): + # Multiple Python threads may call free_dead_references() concurrently. + # Without a lock, they may try deleting the same entry multiple times. + with self.lock: + live = 0 + for key, storage_ref in list(self.items()): + if storage_ref.expired(): + del self[key] + else: + live += 1 + self.limit = max(128, live * 2) -# mapping from handles to StorageRef objects -shared_cache = weakref.WeakValueDictionary() +# mapping from handles to StorageWeakRef objects +shared_cache = SharedCache() def rebuild_event(handle): @@ -55,7 +91,7 @@ def rebuild_cuda_tensor(tensor_cls, tensor_size, tensor_stride, tensor_offset, if storage is None: torch.cuda._lazy_init() storage = storage_cls._new_shared_cuda(storage_device, storage_handle, storage_size) - shared_cache[storage_handle] = storage._weak_ref(StorageRef) + shared_cache[storage_handle] = StorageWeakRef(storage) t = torch._utils._rebuild_tensor(storage, tensor_offset, tensor_size, tensor_stride) if tensor_cls == torch.nn.parameter.Parameter: @@ -125,11 +161,7 @@ def reduce_tensor(tensor): (device, handle, storage_size, storage_offset) = storage._share_cuda_() tensor_offset = tensor.storage_offset() - # WARNING! This call to _weak_ref could lead to O(n) deleter - # behavior, if you repeatedly call it on the same Storage (all - # other sites are guarded by shared_cache; maybe this site - # should be too?) - shared_cache[handle] = storage._weak_ref(StorageRef) + shared_cache[handle] = StorageWeakRef(storage) return (rebuild_cuda_tensor, (type(tensor), @@ -159,7 +191,7 @@ def storage_from_cache(cls, key): storage_ref = shared_cache.get(key) if storage_ref is None: return None - return cls._new_with_weak_ptr(storage_ref) + return cls._new_with_weak_ptr(storage_ref.cdata) def rebuild_storage_fd(cls, df, size): @@ -172,7 +204,7 @@ def rebuild_storage_fd(cls, df, size): if storage is not None: return storage storage = cls._new_shared_fd(fd, size) - shared_cache[fd_id(fd)] = storage._weak_ref(StorageRef) + shared_cache[fd_id(fd)] = StorageWeakRef(storage) return storage finally: os.close(fd) @@ -183,7 +215,7 @@ def rebuild_storage_filename(cls, manager, handle, size): if storage is not None: return storage._shared_decref() storage = cls._new_shared_filename(manager, handle, size) - shared_cache[handle] = storage._weak_ref(StorageRef) + shared_cache[handle] = StorageWeakRef(storage) return storage._shared_decref() @@ -214,11 +246,7 @@ def reduce_storage(storage): metadata = (df, size) rebuild = rebuild_storage_fd - # WARNING! This call to _weak_ref could lead to O(n) deleter - # behavior, if you repeatedly call it on the same Storage (all - # other sites are guarded by shared_cache; maybe this site - # should be too?) - shared_cache[cache_key] = storage._weak_ref(StorageRef) + shared_cache[cache_key] = StorageWeakRef(storage) return (rebuild, (type(storage),) + metadata)