From f9d1b001e1a837df877aefce6fd6f5b06a2fc37f Mon Sep 17 00:00:00 2001 From: Gregory Chanan Date: Tue, 21 Aug 2018 08:46:06 -0700 Subject: [PATCH 01/94] Move THNN Reduction to ATen/core. (#10703) Summary: This is part of moving the (base) Type to ATen/core; Some Type methods have default argument of type THNN Reduction. Pull Request resolved: https://github.com/pytorch/pytorch/pull/10703 Differential Revision: D9406060 Pulled By: gchanan fbshipit-source-id: 789bb3387c58bd083cd526a602649105274e1ef6 --- aten/src/{THNN => ATen/core}/Reduction.h | 5 +---- aten/src/ATen/templates/Functions.h | 2 +- aten/src/ATen/templates/Type.h | 2 +- aten/src/ATen/test/basic.cpp | 2 +- aten/src/THCUNN/generic/THCUNN.h | 2 +- aten/src/THNN/CMakeLists.txt | 1 - aten/src/THNN/generic/THNN.h | 2 +- tools/autograd/templates/Functions.cpp | 2 +- 8 files changed, 7 insertions(+), 11 deletions(-) rename aten/src/{THNN => ATen/core}/Reduction.h (91%) diff --git a/aten/src/THNN/Reduction.h b/aten/src/ATen/core/Reduction.h similarity index 91% rename from aten/src/THNN/Reduction.h rename to aten/src/ATen/core/Reduction.h index fea4c2f8cdec73..6ec4fdf6870a44 100644 --- a/aten/src/THNN/Reduction.h +++ b/aten/src/ATen/core/Reduction.h @@ -1,5 +1,4 @@ -#ifndef REDUCE_H -#define REDUCE_H +#pragma once namespace Reduction { @@ -13,5 +12,3 @@ enum Reduction { END }; } - -#endif diff --git a/aten/src/ATen/templates/Functions.h b/aten/src/ATen/templates/Functions.h index 2c510a4f7172cc..88a01fa38a5b07 100644 --- a/aten/src/ATen/templates/Functions.h +++ b/aten/src/ATen/templates/Functions.h @@ -11,7 +11,7 @@ #include "ATen/NativeFunctions.h" #include "ATen/DeviceGuard.h" #include "ATen/TensorOptions.h" -#include "THNN/Reduction.h" +#include "ATen/core/Reduction.h" namespace at { diff --git a/aten/src/ATen/templates/Type.h b/aten/src/ATen/templates/Type.h index 4523b7b3efbc2e..da43ad61835d73 100644 --- a/aten/src/ATen/templates/Type.h +++ b/aten/src/ATen/templates/Type.h @@ -14,7 +14,7 @@ #include "ATen/core/ArrayRef.h" #include "ATen/core/Half.h" #include "ATen/core/TensorTypeIdRegistration.h" -#include "THNN/Reduction.h" +#include "ATen/core/Reduction.h" #include #include diff --git a/aten/src/ATen/test/basic.cpp b/aten/src/ATen/test/basic.cpp index cfd77986d626ce..1926db4a071018 100644 --- a/aten/src/ATen/test/basic.cpp +++ b/aten/src/ATen/test/basic.cpp @@ -2,7 +2,7 @@ #include "catch.hpp" #include "ATen/ATen.h" -#include "THNN/Reduction.h" +#include "ATen/core/Reduction.h" // for TH compat test only... struct THFloatTensor; diff --git a/aten/src/THCUNN/generic/THCUNN.h b/aten/src/THCUNN/generic/THCUNN.h index e9792d60642f68..d27750d9a995e4 100644 --- a/aten/src/THCUNN/generic/THCUNN.h +++ b/aten/src/THCUNN/generic/THCUNN.h @@ -2,7 +2,7 @@ #define THC_GENERIC_FILE "generic/THCUNN.h" #else -#include "Reduction.h" +#include THC_API void THNN_(Abs_updateOutput)( THCState *state, diff --git a/aten/src/THNN/CMakeLists.txt b/aten/src/THNN/CMakeLists.txt index e61624ca879a93..ab4bb755071cc2 100644 --- a/aten/src/THNN/CMakeLists.txt +++ b/aten/src/THNN/CMakeLists.txt @@ -1,5 +1,4 @@ set(ATen_CPU_SRCS ${ATen_CPU_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/init.cpp PARENT_SCOPE) -INSTALL(FILES THNN.h Reduction.h DESTINATION "${ATEN_INSTALL_INCLUDE_SUBDIR}/THNN") INSTALL(FILES generic/THNN.h DESTINATION "${ATEN_INSTALL_INCLUDE_SUBDIR}/THNN/generic") diff --git a/aten/src/THNN/generic/THNN.h b/aten/src/THNN/generic/THNN.h index 4b0bc949edb9e7..0b372e5cb4326e 100644 --- a/aten/src/THNN/generic/THNN.h +++ b/aten/src/THNN/generic/THNN.h @@ -2,7 +2,7 @@ #define TH_GENERIC_FILE "generic/THNN.h" #else -#include "Reduction.h" +#include TH_API void THNN_(Abs_updateOutput)( THNNState *state, // library's state diff --git a/tools/autograd/templates/Functions.cpp b/tools/autograd/templates/Functions.cpp index 25e298317eed65..b9dcea3afecc54 100644 --- a/tools/autograd/templates/Functions.cpp +++ b/tools/autograd/templates/Functions.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include // define constants like M_PI and C keywords for MSVC #ifdef _MSC_VER From 30ad13facaf9a61e2bc1f692abf90a54c4d2658e Mon Sep 17 00:00:00 2001 From: Marat Dukhan Date: Tue, 21 Aug 2018 08:54:00 -0700 Subject: [PATCH 02/94] Avoid shadowing i, j vars in GeneralProposals test (#10721) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/10721 - Fix compilation warning "declaration of 'i' shadows a previous local [-Werror=shadow-compatible-local]" Reviewed By: newstzpz Differential Revision: D9419688 fbshipit-source-id: 76efc3688782ce4ead3c89e7069211736febfac2 --- caffe2/operators/generate_proposals_op_test.cc | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/caffe2/operators/generate_proposals_op_test.cc b/caffe2/operators/generate_proposals_op_test.cc index a090842205b7fe..fb4c54581396d9 100644 --- a/caffe2/operators/generate_proposals_op_test.cc +++ b/caffe2/operators/generate_proposals_op_test.cc @@ -571,13 +571,15 @@ TEST(GenerateProposalsTest, TestRealDownSampledRotated) { vector bbx_with_angle(num_boxes * 5); // bbx (deltas) is in shape (A * 4, H, W). Insert angle delta // at each spatial location for each anchor. - int i = 0, j = 0; - for (int a = 0; a < A; ++a) { - for (int k = 0; k < 4 * H * W; ++k) { - bbx_with_angle[i++] = bbx[j++]; - } - for (int k = 0; k < H * W; ++k) { - bbx_with_angle[i++] = delta_angle; + { + int i = 0, j = 0; + for (int a = 0; a < A; ++a) { + for (int k = 0; k < 4 * H * W; ++k) { + bbx_with_angle[i++] = bbx[j++]; + } + for (int k = 0; k < H * W; ++k) { + bbx_with_angle[i++] = delta_angle; + } } } From b0b513914972f569d838b8c88355c357eb641184 Mon Sep 17 00:00:00 2001 From: Jorghi12 Date: Tue, 21 Aug 2018 09:25:29 -0700 Subject: [PATCH 03/94] Set the BUILD_ENVIRONMENT variable before installing sccache. (#10640) Summary: Set the build environment before installing sccache in order to make sure the docker images have the links set up. Pull Request resolved: https://github.com/pytorch/pytorch/pull/10640 Reviewed By: yf225 Differential Revision: D9399593 Pulled By: Jorghi12 fbshipit-source-id: a062fed8b7e83460fe9d50a7a27c0f20bcd766c4 --- docker/caffe2/jenkins/ubuntu-rocm/Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docker/caffe2/jenkins/ubuntu-rocm/Dockerfile b/docker/caffe2/jenkins/ubuntu-rocm/Dockerfile index 6e9738c3be7dab..c42d86b3ceef9d 100644 --- a/docker/caffe2/jenkins/ubuntu-rocm/Dockerfile +++ b/docker/caffe2/jenkins/ubuntu-rocm/Dockerfile @@ -5,6 +5,10 @@ ARG EC2 ADD ./install_base.sh install_base.sh RUN bash ./install_base.sh && rm install_base.sh +# Include BUILD_ENVIRONMENT environment variable in image +ARG BUILD_ENVIRONMENT +ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT} + # Install Python ARG PYTHON_VERSION ADD ./install_python.sh install_python.sh @@ -70,7 +74,3 @@ ARG JENKINS_GID ADD ./add_jenkins_user.sh add_jenkins_user.sh RUN if [ -n "${JENKINS}" ]; then bash ./add_jenkins_user.sh ${JENKINS_UID} ${JENKINS_GID}; fi RUN rm add_jenkins_user.sh - -# Include BUILD_ENVIRONMENT environment variable in image -ARG BUILD_ENVIRONMENT -ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT} From b23d59ce1a52d14b61003c4ad1a6f27a043f1465 Mon Sep 17 00:00:00 2001 From: Lu Fang Date: Tue, 21 Aug 2018 10:06:59 -0700 Subject: [PATCH 04/94] Make ONNX_ATEN_FALLBACK as internal default option Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/10629 Reviewed By: bddppq Differential Revision: D9381106 fbshipit-source-id: 03d42c95d17a70a68fe0f38dad68f1793996dfce --- torch/csrc/onnx/init.cpp | 6 ++++++ torch/onnx/__init__.py | 1 + torch/onnx/utils.py | 11 ++++++++++- 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/torch/csrc/onnx/init.cpp b/torch/csrc/onnx/init.cpp index 64747c8c4b83a9..ea78e83ad01b36 100644 --- a/torch/csrc/onnx/init.cpp +++ b/torch/csrc/onnx/init.cpp @@ -29,5 +29,11 @@ void initONNXBindings(PyObject* module) { .value("ONNX_ATEN", OperatorExportTypes::ONNX_ATEN) .value("ONNX_ATEN_FALLBACK", OperatorExportTypes::ONNX_ATEN_FALLBACK) .value("RAW", OperatorExportTypes::RAW); + +#ifdef PYTORCH_ONNX_CAFFE2_BUNDLE + onnx.attr("PYTORCH_ONNX_CAFFE2_BUNDLE") = true; +#else + onnx.attr("PYTORCH_ONNX_CAFFE2_BUNDLE") = false; +#endif } }} // namespace torch::onnx diff --git a/torch/onnx/__init__.py b/torch/onnx/__init__.py index 0514343da18284..32d2963f3c78c2 100644 --- a/torch/onnx/__init__.py +++ b/torch/onnx/__init__.py @@ -5,6 +5,7 @@ TensorProtoDataType = _C._onnx.TensorProtoDataType OperatorExportTypes = _C._onnx.OperatorExportTypes +PYTORCH_ONNX_CAFFE2_BUNDLE = _C._onnx.PYTORCH_ONNX_CAFFE2_BUNDLE ONNX_ARCHIVE_MODEL_PROTO_NAME = "__MODEL_PROTO" diff --git a/torch/onnx/utils.py b/torch/onnx/utils.py index cb6a862f993267..f566dbb53bff84 100644 --- a/torch/onnx/utils.py +++ b/torch/onnx/utils.py @@ -83,13 +83,22 @@ def export(model, args, f, export_params=True, verbose=False, training=False, by the functions in symbolic.py are exported as ATen ops. export_raw_ir (bool, default False): [DEPRECATED. use operator_export_type] export the internal IR directly instead of converting it to ONNX ops. + operator_export_type (enum, default OperatorExportTypes.ONNX): + OperatorExportTypes.ONNX: all ops are exported as regular ONNX ops. + OperatorExportTypes.ONNX_ATEN: all ops are exported as ATen ops. + OperatorExportTypes.ONNX_ATEN_FALLBACK: if symbolic is missing, + fall back on ATen op. + OperatorExportTypes.RAW: export raw ir. """ if aten or export_raw_ir: assert operator_export_type is None assert aten ^ export_raw_ir operator_export_type = OperatorExportTypes.ATEN if aten else OperatorExportTypes.RAW elif operator_export_type is None: - operator_export_type = OperatorExportTypes.ONNX + if torch.onnx.PYTORCH_ONNX_CAFFE2_BUNDLE: + operator_export_type = OperatorExportTypes.ONNX_ATEN_FALLBACK + else: + operator_export_type = OperatorExportTypes.ONNX _export(model, args, f, export_params, verbose, training, input_names, output_names, operator_export_type=operator_export_type) From 9c321a8779c7344788c012e26f9633bdc240a02e Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Tue, 21 Aug 2018 10:43:59 -0700 Subject: [PATCH 05/94] Add util function from core type to dtype (#10716) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/10716 title Reviewed By: idning Differential Revision: D9417357 fbshipit-source-id: 0f71805b1d64a46791d6ee4d8620763f878ffdb6 --- caffe2/python/schema.py | 7 +++++++ caffe2/python/schema_test.py | 9 ++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/caffe2/python/schema.py b/caffe2/python/schema.py index 31cfebcbc2e3aa..ab9998216ad025 100644 --- a/caffe2/python/schema.py +++ b/caffe2/python/schema.py @@ -1183,6 +1183,13 @@ def data_type_for_dtype(dtype): raise TypeError('Unknown dtype: ' + str(dtype.base)) +def dtype_for_core_type(core_type): + for np_type, dt in _DATA_TYPE_FOR_DTYPE: + if dt == core_type: + return np_type + raise TypeError('Unknown core type: ' + str(core_type)) + + def attach_metadata_to_scalars(field, metadata): for f in field.all_scalars(): f.set_metadata(metadata) diff --git a/caffe2/python/schema_test.py b/caffe2/python/schema_test.py index 3e78bc35a8559a..d4ec054622d8a9 100644 --- a/caffe2/python/schema_test.py +++ b/caffe2/python/schema_test.py @@ -403,4 +403,11 @@ def testScalarShape(self): self.assertEqual(s1_hard.field_type().shape, (1, )) s2 = schema.Scalar((np.int32, (2, 3))) - self.assertEqual(s2.field_type().shape, (2, 3)) \ No newline at end of file + self.assertEqual(s2.field_type().shape, (2, 3)) + + def testDtypeForCoreType(self): + dtype = schema.dtype_for_core_type(core.DataType.FLOAT16) + self.assertEqual(dtype, np.float16) + + with self.assertRaises(TypeError): + schema.dtype_for_core_type(100) From 8a1739b05d079ed768313ae3e7f67590029ac2d8 Mon Sep 17 00:00:00 2001 From: nadavbh12 Date: Tue, 21 Aug 2018 12:05:08 -0700 Subject: [PATCH 06/94] Add arguments __repr__ in Distribution base class Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/10373 Differential Revision: D9240316 Pulled By: ezyang fbshipit-source-id: f35c500f61f86e6be405e8bd4040db5146224984 --- torch/distributions/distribution.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/torch/distributions/distribution.py b/torch/distributions/distribution.py index 8e6be31f13d58b..9acb4f776e5efb 100644 --- a/torch/distributions/distribution.py +++ b/torch/distributions/distribution.py @@ -221,4 +221,8 @@ def _validate_sample(self, value): raise ValueError('The value argument must be within the support') def __repr__(self): - return self.__class__.__name__ + '()' + param_names = [k for k, _ in self.arg_constraints.items()] + args_string = ', '.join(['{}: {}'.format(p, self.__dict__[p] + if self.__dict__[p].dim() == 0 + else self.__dict__[p].size()) for p in param_names]) + return self.__class__.__name__ + '(' + args_string + ')' From edb34434ab4eb63d05473143426ec8b69fc50c69 Mon Sep 17 00:00:00 2001 From: Orion Reblitz-Richardson Date: Tue, 21 Aug 2018 13:29:49 -0700 Subject: [PATCH 07/94] More changes for hidden visibility (#10692) Summary: Let's run CI tests to see what fails given the changes that just landed in https://github.com/pytorch/pytorch/pull/10624 cc mingzhe09088 ezyang Yangqing Pull Request resolved: https://github.com/pytorch/pytorch/pull/10692 Reviewed By: mingzhe09088 Differential Revision: D9423617 Pulled By: orionr fbshipit-source-id: 3bda1f118d13f8dd8e823727c93167cae747d8cf --- aten/src/ATen/Device.h | 2 +- aten/src/ATen/Generator.h | 4 +++- aten/src/ATen/OptionsGuard.h | 2 +- aten/src/ATen/function_wrapper.py | 2 +- aten/src/ATen/templates/Tensor.h | 4 ++-- caffe2/contrib/script/compiler.h | 3 ++- caffe2/core/blob_stats.h | 2 +- caffe2/core/db.h | 12 ++++++------ caffe2/core/logging_is_not_google_glog.h | 4 ++-- caffe2/core/memonger.h | 4 ++-- caffe2/core/module.h | 8 ++++---- caffe2/core/net_async_base.h | 2 +- caffe2/core/net_async_scheduling.h | 2 +- caffe2/core/qtensor.h | 2 +- caffe2/core/types.h | 2 +- caffe2/observers/runcnt_observer.h | 5 +++-- caffe2/observers/time_observer.h | 9 +++++---- caffe2/onnx/backend.h | 10 +++++----- caffe2/onnx/backend_rep.h | 2 +- caffe2/operators/counter_ops.h | 2 +- caffe2/operators/create_scope_op.h | 2 +- caffe2/operators/rnn/recurrent_network_executor.h | 2 +- caffe2/operators/rnn/recurrent_network_op.h | 2 +- caffe2/operators/text_file_reader_utils.h | 14 +++++++------- caffe2/utils/math.h | 2 +- caffe2/utils/signal_handler.h | 4 ++-- caffe2/utils/smart_tensor_printer.h | 2 +- 27 files changed, 58 insertions(+), 53 deletions(-) diff --git a/aten/src/ATen/Device.h b/aten/src/ATen/Device.h index d48984a2063dc0..b11b6943fb6ad0 100644 --- a/aten/src/ATen/Device.h +++ b/aten/src/ATen/Device.h @@ -22,7 +22,7 @@ namespace at { /// 1. A negative index represents the current device, a non-negative index /// represents a specific, concrete device, /// 2. When the device type is CPU, the device index must be zero. -struct Device { +struct AT_API Device { using Type = at::DeviceType; /// Constructs a new `Device` from a `DeviceType` and an optional device diff --git a/aten/src/ATen/Generator.h b/aten/src/ATen/Generator.h index 7e2b68b32dddff..580ab704380beb 100644 --- a/aten/src/ATen/Generator.h +++ b/aten/src/ATen/Generator.h @@ -1,10 +1,12 @@ #pragma once +#include + #include namespace at { -struct Generator { +struct AT_API Generator { Generator() {}; Generator(const Generator& other) = delete; Generator(Generator&& other) = delete; diff --git a/aten/src/ATen/OptionsGuard.h b/aten/src/ATen/OptionsGuard.h index 83f819f1c34c29..225060bd9d13ce 100644 --- a/aten/src/ATen/OptionsGuard.h +++ b/aten/src/ATen/OptionsGuard.h @@ -13,7 +13,7 @@ struct DefaultTensorOptions { /// Returns the current thread local default options. /// Defined in OptionsGuard.cpp because we can't use optional in headers, due /// to Windows and other compilers. - static TensorOptions& get(); + AT_API static TensorOptions& get(); private: /// This is an optional because of compiler bugs that mis-initialize static diff --git a/aten/src/ATen/function_wrapper.py b/aten/src/ATen/function_wrapper.py index 9f589017822bc4..0e80d37846024f 100644 --- a/aten/src/ATen/function_wrapper.py +++ b/aten/src/ATen/function_wrapper.py @@ -119,7 +119,7 @@ def TypedDict(name, attrs, total=True): # type: ignore # add non-virtual declaration to Tensor.h TENSOR_METHOD_DECLARATION = CodeTemplate("""\ -AT_API ${return_type} ${api_name}(${method_formals_with_defaults})${const_mark}; +${return_type} ${api_name}(${method_formals_with_defaults})${const_mark}; """) # add non-virtual declaration to Tensor.cpp TENSOR_METHOD_DEFINITION = CodeTemplate("""\ diff --git a/aten/src/ATen/templates/Tensor.h b/aten/src/ATen/templates/Tensor.h index 0d27a88b773fbe..ae970d42b07e5e 100644 --- a/aten/src/ATen/templates/Tensor.h +++ b/aten/src/ATen/templates/Tensor.h @@ -41,7 +41,7 @@ namespace at { // // Note that Tensor can also be NULL, i.e. it is not associated with any underlying TensorImpl, and // special care must be taken to handle this. -struct Tensor : public detail::TensorBase { +struct AT_API Tensor : public detail::TensorBase { using TensorBase = detail::TensorBase; Tensor() : TensorBase() {} Tensor(TensorImpl * self, bool retain) : TensorBase(self, retain) {} @@ -204,7 +204,7 @@ struct Tensor : public detail::TensorBase { friend struct WeakTensor; }; -struct WeakTensor : public detail::WeakTensorBase { +struct AT_API WeakTensor : public detail::WeakTensorBase { using WeakTensorBase = detail::WeakTensorBase; WeakTensor() : WeakTensorBase() {} WeakTensor(TensorImpl * self, bool retain) : WeakTensorBase(self, retain) {} diff --git a/caffe2/contrib/script/compiler.h b/caffe2/contrib/script/compiler.h index 618c176b651487..0a15c33c57987d 100644 --- a/caffe2/contrib/script/compiler.h +++ b/caffe2/contrib/script/compiler.h @@ -7,7 +7,8 @@ namespace caffe2 { namespace script { struct CompilationUnitImpl; -struct CompilationUnit { + +struct CAFFE2_API CompilationUnit { CompilationUnit(); void define(const std::string& str); void defineExtern(const std::string& str, std::unique_ptr netdef); diff --git a/caffe2/core/blob_stats.h b/caffe2/core/blob_stats.h index 178bda0edd78bc..67f9e88e2edc62 100644 --- a/caffe2/core/blob_stats.h +++ b/caffe2/core/blob_stats.h @@ -41,6 +41,6 @@ namespace BlobStat { * Return size in bytes of the blob, if available for a blob of given type. * If not available, return 0. */ -size_t sizeBytes(const Blob& blob); +CAFFE2_API size_t sizeBytes(const Blob& blob); } } diff --git a/caffe2/core/db.h b/caffe2/core/db.h index 13b29664dac293..47865fe076f794 100644 --- a/caffe2/core/db.h +++ b/caffe2/core/db.h @@ -19,7 +19,7 @@ enum Mode { READ, WRITE, NEW }; /** * An abstract class for the cursor of the database while reading. */ -class Cursor { +class CAFFE2_API Cursor { public: Cursor() { } virtual ~Cursor() { } @@ -58,7 +58,7 @@ class Cursor { /** * An abstract class for the current database transaction while writing. */ -class Transaction { +class CAFFE2_API Transaction { public: Transaction() { } virtual ~Transaction() { } @@ -77,7 +77,7 @@ class Transaction { /** * An abstract class for accessing a database of key-value pairs. */ -class DB { +class CAFFE2_API DB { public: DB(const string& /*source*/, Mode mode) : mode_(mode) {} virtual ~DB() { } @@ -141,7 +141,7 @@ inline bool DBExists(const string& db_type, const string& full_db_name) { /** * A reader wrapper for DB that also allows us to serialize it. */ -class DBReader { +class CAFFE2_API DBReader { public: friend class DBReaderSerializer; @@ -288,7 +288,7 @@ class DBReader { AT_DISABLE_COPY_AND_ASSIGN(DBReader); }; -class DBReaderSerializer : public BlobSerializerBase { +class CAFFE2_API DBReaderSerializer : public BlobSerializerBase { public: /** * Serializes a DBReader. Note that this blob has to contain DBReader, @@ -300,7 +300,7 @@ class DBReaderSerializer : public BlobSerializerBase { BlobSerializerBase::SerializationAcceptor acceptor) override; }; -class DBReaderDeserializer : public BlobDeserializerBase { +class CAFFE2_API DBReaderDeserializer : public BlobDeserializerBase { public: void Deserialize(const BlobProto& proto, Blob* blob) override; }; diff --git a/caffe2/core/logging_is_not_google_glog.h b/caffe2/core/logging_is_not_google_glog.h index 80b47e22fb8b2c..6e9046fb669123 100644 --- a/caffe2/core/logging_is_not_google_glog.h +++ b/caffe2/core/logging_is_not_google_glog.h @@ -27,7 +27,7 @@ const int INFO = 0; const char CAFFE2_SEVERITY_PREFIX[] = "FEWIV"; namespace caffe2 { -class MessageLogger { +class CAFFE2_API MessageLogger { public: MessageLogger(const char *file, int line, int severity); ~MessageLogger(); @@ -46,7 +46,7 @@ class MessageLogger { // This class is used to explicitly ignore values in the conditional // logging macros. This avoids compiler warnings like "value computed // is not used" and "statement has no effect". -class LoggerVoidify { +class CAFFE2_API LoggerVoidify { public: LoggerVoidify() { } // This has to be an operator with a precedence lower than << but diff --git a/caffe2/core/memonger.h b/caffe2/core/memonger.h index fe65ae5dac19ba..e34f35bab25de9 100644 --- a/caffe2/core/memonger.h +++ b/caffe2/core/memonger.h @@ -10,11 +10,11 @@ namespace caffe2 { namespace memonger { -NetDef optimize_inference_net( +CAFFE2_API NetDef optimize_inference_net( const NetDef& net, const std::set& static_blobs); -NetDef compute_blob_recycling_for_dag( +CAFFE2_API NetDef compute_blob_recycling_for_dag( const NetDef& net, const std::vector& heads, const std::vector& op_indices, diff --git a/caffe2/core/module.h b/caffe2/core/module.h index 75505676a27184..c44192033f8261 100644 --- a/caffe2/core/module.h +++ b/caffe2/core/module.h @@ -23,7 +23,7 @@ namespace caffe2 { * different modules. Currently, we only store the name and a simple * description of what this module does. */ -class ModuleSchema { +class CAFFE2_API ModuleSchema { public: ModuleSchema(const char* name, const char* description); @@ -45,12 +45,12 @@ class ModuleSchema { * the reason we do not include ".so" is for cross-platform compatibility * on platforms like mac os. */ -const CaffeMap& CurrentModules(); +CAFFE2_API const CaffeMap& CurrentModules(); /** * @brief Checks whether a module is already present in the current binary. */ -bool HasModule(const string& name); +CAFFE2_API bool HasModule(const string& name); /** * @brief Load a module. @@ -60,7 +60,7 @@ bool HasModule(const string& name); * full path option to only experimental modules. * filename: (optional) a filename that serves as a hint to load the module. */ -void LoadModule(const string& name, const string& filename=""); +CAFFE2_API void LoadModule(const string& name, const string& filename=""); #define CAFFE2_MODULE(name, description) \ diff --git a/caffe2/core/net_async_base.h b/caffe2/core/net_async_base.h index 09510fdb16ad04..df55e2ee07a2e1 100644 --- a/caffe2/core/net_async_base.h +++ b/caffe2/core/net_async_base.h @@ -31,7 +31,7 @@ namespace tracing { class Tracer; } -class AsyncNetBase : public NetBase { +class CAFFE2_API AsyncNetBase : public NetBase { public: AsyncNetBase(const std::shared_ptr& net_def, Workspace* ws); ~AsyncNetBase() override; diff --git a/caffe2/core/net_async_scheduling.h b/caffe2/core/net_async_scheduling.h index 096e7e2b2362a4..8576fca1bb07f7 100644 --- a/caffe2/core/net_async_scheduling.h +++ b/caffe2/core/net_async_scheduling.h @@ -5,7 +5,7 @@ namespace caffe2 { -class AsyncSchedulingNet : public AsyncNetBase { +class CAFFE2_API AsyncSchedulingNet : public AsyncNetBase { public: AsyncSchedulingNet( const std::shared_ptr& net_def, diff --git a/caffe2/core/qtensor.h b/caffe2/core/qtensor.h index d857cfaf2866e8..652f2987bea442 100644 --- a/caffe2/core/qtensor.h +++ b/caffe2/core/qtensor.h @@ -14,7 +14,7 @@ namespace caffe2 { template -class QTensor { +class CAFFE2_API QTensor { public: QTensor() {} virtual ~QTensor() {} diff --git a/caffe2/core/types.h b/caffe2/core/types.h index 49c06b3fad7d77..532f3a05c1c55c 100644 --- a/caffe2/core/types.h +++ b/caffe2/core/types.h @@ -50,7 +50,7 @@ CAFFE2_API const TypeMeta& DataTypeToTypeMeta(const TensorProto::DataType& dt); static_assert(sizeof(unsigned short) == 2, "Short on this platform is not 16 bit."); namespace caffe2 { -typedef struct CAFFE2_ALIGNED(2) __f16 { uint16_t x; } float16; +typedef struct CAFFE2_API CAFFE2_ALIGNED(2) __f16 { uint16_t x; } float16; // Helpers to avoid using typeinfo with -rtti template diff --git a/caffe2/observers/runcnt_observer.h b/caffe2/observers/runcnt_observer.h index 2dbd5fb27f4591..9b7869d696343e 100644 --- a/caffe2/observers/runcnt_observer.h +++ b/caffe2/observers/runcnt_observer.h @@ -8,7 +8,8 @@ namespace caffe2 { class RunCountNetObserver; -class RunCountOperatorObserver final : public ObserverBase { + +class CAFFE2_API RunCountOperatorObserver final : public ObserverBase { public: explicit RunCountOperatorObserver(OperatorBase* op) = delete; RunCountOperatorObserver(OperatorBase* op, RunCountNetObserver* netObserver); @@ -25,7 +26,7 @@ class RunCountOperatorObserver final : public ObserverBase { RunCountNetObserver* netObserver_; }; -class RunCountNetObserver final : public OperatorAttachingNetObserver< +class CAFFE2_API RunCountNetObserver final : public OperatorAttachingNetObserver< RunCountOperatorObserver, RunCountNetObserver> { public: diff --git a/caffe2/observers/time_observer.h b/caffe2/observers/time_observer.h index cce06cf1789a76..7314587cd46d0d 100644 --- a/caffe2/observers/time_observer.h +++ b/caffe2/observers/time_observer.h @@ -13,7 +13,8 @@ namespace caffe2 { class TimeObserver; -class TimeCounter { + +class CAFFE2_API TimeCounter { public: explicit TimeCounter() {} inline float average_time() const { @@ -27,8 +28,8 @@ class TimeCounter { int iterations_ = 0; }; -class TimeOperatorObserver final : public TimeCounter, - public ObserverBase { +class CAFFE2_API TimeOperatorObserver final : public TimeCounter, + public ObserverBase { public: explicit TimeOperatorObserver(OperatorBase* subject) = delete; explicit TimeOperatorObserver( @@ -44,7 +45,7 @@ class TimeOperatorObserver final : public TimeCounter, void Stop() override; }; -class TimeObserver final +class CAFFE2_API TimeObserver final : public TimeCounter, public OperatorAttachingNetObserver { public: diff --git a/caffe2/onnx/backend.h b/caffe2/onnx/backend.h index 6aa5f271cc5c2a..6ea4fcd944f91d 100644 --- a/caffe2/onnx/backend.h +++ b/caffe2/onnx/backend.h @@ -25,7 +25,7 @@ using ::ONNX_NAMESPACE::ValueInfoProto; using ValueInfoMap = std::unordered_map; -class ConversionContext { +class CAFFE2_API ConversionContext { public: ConversionContext(const ValueInfoMap& value_infos, int opset_version) : value_infos_(value_infos), opset_version_(opset_version) {} @@ -44,7 +44,7 @@ class ConversionContext { // \brief This struct holds the converted ops after the onnx->c2 conversion. // Notice that for RNN ops, it may create ops in init_net. Hence we have the // `init_ops` field. -struct Caffe2Ops { +struct CAFFE2_API Caffe2Ops { ::google::protobuf::RepeatedPtrField init_ops; ::google::protobuf::RepeatedPtrField ops; ::google::protobuf::RepeatedPtrField interface_blobs; @@ -52,7 +52,7 @@ struct Caffe2Ops { // A convenient class to query attributes of a NodeProto. Note that the // NodeProto can not be modified during the query of OnnxAttributes object -class OnnxAttributes { +class CAFFE2_API OnnxAttributes { public: OnnxAttributes(const NodeProto& node); @@ -120,7 +120,7 @@ template <> const TensorProto* OnnxAttributes::get(const std::string& key) const; // convenient class for onnx node -struct OnnxNode { +struct CAFFE2_API OnnxNode { OnnxNode(const NodeProto& node_in) : node(node_in), attributes(node_in) {} const NodeProto& node; @@ -128,7 +128,7 @@ struct OnnxNode { OnnxAttributes attributes; }; -class Caffe2Backend { +class CAFFE2_API Caffe2Backend { public: // Since we still have this Python-C++ hybrid flow, we will need to take the // DummyName generator from Python as a pointer. In this case, Python env owns diff --git a/caffe2/onnx/backend_rep.h b/caffe2/onnx/backend_rep.h index fb46d19d10ba43..0c4d7f2c7ae4ec 100644 --- a/caffe2/onnx/backend_rep.h +++ b/caffe2/onnx/backend_rep.h @@ -8,7 +8,7 @@ #include namespace caffe2 { namespace onnx { -class Caffe2BackendRep { +class CAFFE2_API Caffe2BackendRep { public: void Run( const caffe2::Predictor::TensorVector& inputs, diff --git a/caffe2/operators/counter_ops.h b/caffe2/operators/counter_ops.h index d907d1cb98c6f9..e3342e9e009def 100644 --- a/caffe2/operators/counter_ops.h +++ b/caffe2/operators/counter_ops.h @@ -9,7 +9,7 @@ namespace caffe2 { template -class Counter { +class CAFFE2_API Counter { public: explicit Counter(T count) : count_(count) {} bool countDown() { diff --git a/caffe2/operators/create_scope_op.h b/caffe2/operators/create_scope_op.h index 41dcb170dfce15..cf2e64d085fb62 100644 --- a/caffe2/operators/create_scope_op.h +++ b/caffe2/operators/create_scope_op.h @@ -20,7 +20,7 @@ namespace detail { * Keeps track of forward and backward gradient workspaces in stack, * reuses previously created workspaces, non-thread safe */ -class WorkspaceStack { +class CAFFE2_API WorkspaceStack { public: explicit WorkspaceStack() : parent_ws_(nullptr), top_(-1) {} diff --git a/caffe2/operators/rnn/recurrent_network_executor.h b/caffe2/operators/rnn/recurrent_network_executor.h index 3afaedf577c601..7e37e562e77a50 100644 --- a/caffe2/operators/rnn/recurrent_network_executor.h +++ b/caffe2/operators/rnn/recurrent_network_executor.h @@ -466,7 +466,7 @@ std::unique_ptr createRNNExecutor( std::string timestep_blob, ArgumentHelper rnn_args); -class ThreadedRecurrentNetworkExecutor : public RecurrentNetworkExecutorBase { +class CAFFE2_API ThreadedRecurrentNetworkExecutor : public RecurrentNetworkExecutorBase { public: ThreadedRecurrentNetworkExecutor( const NetDef& step_net_def, diff --git a/caffe2/operators/rnn/recurrent_network_op.h b/caffe2/operators/rnn/recurrent_network_op.h index 51339e1c21862f..00595198b6db37 100644 --- a/caffe2/operators/rnn/recurrent_network_op.h +++ b/caffe2/operators/rnn/recurrent_network_op.h @@ -46,7 +46,7 @@ struct Link { int32_t window{1}; }; -struct ScratchWorkspaces { +struct CAFFE2_API ScratchWorkspaces { std::vector> stepWorkspaces; std::shared_ptr sharedBlobsWs = nullptr; }; diff --git a/caffe2/operators/text_file_reader_utils.h b/caffe2/operators/text_file_reader_utils.h index eaa94f43c4ea9d..15f81e53f51964 100644 --- a/caffe2/operators/text_file_reader_utils.h +++ b/caffe2/operators/text_file_reader_utils.h @@ -9,13 +9,13 @@ namespace caffe2 { -struct Token { +struct CAFFE2_API Token { int startDelimId; const char* start; const char* end; }; -class TokenizedString { +class CAFFE2_API TokenizedString { // holder for strings that have been modified std::vector> modifiedStrings_; std::vector tokens_; @@ -31,7 +31,7 @@ class TokenizedString { friend class Tokenizer; }; -class Tokenizer { +class CAFFE2_API Tokenizer { private: int startDelimId_; // state of the tokenizer @@ -48,18 +48,18 @@ class Tokenizer { void next(char* start, char* end, TokenizedString& tokenized); }; -struct CharRange { +struct CAFFE2_API CharRange { char* start; char* end; }; -struct StringProvider { +struct CAFFE2_API StringProvider { virtual void operator()(CharRange&) = 0; virtual void reset() = 0; virtual ~StringProvider() {} }; -class BufferedTokenizer { +class CAFFE2_API BufferedTokenizer { public: BufferedTokenizer(const Tokenizer& t, StringProvider* p, int numPasses = 1) : provider_(p), tokenizer_(t), tokenIndex_(0), numPasses_(numPasses) {} @@ -104,7 +104,7 @@ class BufferedTokenizer { int pass_{0}; }; -class FileReader : public StringProvider { +class CAFFE2_API FileReader : public StringProvider { public: explicit FileReader(const std::string& path, size_t bufferSize = 65536); ~FileReader(); diff --git a/caffe2/utils/math.h b/caffe2/utils/math.h index f78c105b2a4024..f9d4dd11e96600 100644 --- a/caffe2/utils/math.h +++ b/caffe2/utils/math.h @@ -23,7 +23,7 @@ class Tensor; // An empty class as a placeholder for a math function that has no specific // engine specified. -class DefaultEngine {}; +class CAFFE2_API DefaultEngine {}; namespace math { diff --git a/caffe2/utils/signal_handler.h b/caffe2/utils/signal_handler.h index e253d15ad5ea6f..e61ae84b947835 100644 --- a/caffe2/utils/signal_handler.h +++ b/caffe2/utils/signal_handler.h @@ -38,8 +38,8 @@ class CAFFE2_API SignalHandler { // This works by setting up certain fatal signal handlers. Previous fatal // signal handlers will still be called when the signal is raised. Defaults // to being off. -void setPrintStackTracesOnFatalSignal(bool print); -bool printStackTracesOnFatalSignal(); +CAFFE2_API void setPrintStackTracesOnFatalSignal(bool print); +CAFFE2_API bool printStackTracesOnFatalSignal(); #endif // defined(CAFFE2_SUPPORTS_SIGNAL_HANDLER) } // namespace caffe2 diff --git a/caffe2/utils/smart_tensor_printer.h b/caffe2/utils/smart_tensor_printer.h index 224f7d91e0e98f..48e1e47cf8efc8 100644 --- a/caffe2/utils/smart_tensor_printer.h +++ b/caffe2/utils/smart_tensor_printer.h @@ -8,7 +8,7 @@ namespace caffe2 { // explicit specify the type of the tensor while calling the Print() method. // It also supports a convenience function with a default constructed printer as // a static method. -class SmartTensorPrinter { +class CAFFE2_API SmartTensorPrinter { public: // The proliferation of constructors is to give the feature parity with // TensorPrinter From 04b773ab878e943109033bc3914ee4a12803b151 Mon Sep 17 00:00:00 2001 From: Shihao Xu Date: Tue, 21 Aug 2018 13:41:05 -0700 Subject: [PATCH 08/94] Support Loading to GPU (#10710) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/10710 Can't resume from checkpoint for workflows that use GPU. The problem is just we didn't leverage the already-provided GPU deserialization of Caffe2. `keep_device` arg of LoadOp. See https://fburl.com/y27ltaxw How a serialized BlobProto (contraining TensorProto) is loaded into GPU memory? - Load BlobProto from DB. https://fburl.com/pe1qaeyf - Deserialize the BlobProto into a Blob instance. https://fburl.com/5dirjuuh and https://fburl.com/stoho0x1 - Call Blob->Deserialized. https://fburl.com/bnureu32 - Deserializer Registration. https://fburl.com/wbu95ry7 https://fburl.com/ycetud8u - Create TensorCUDA Deserializer. https://fburl.com/2lirfuqj - Create Tensor on GPU and get TensorProto of BlobProto. https://fburl.com/7dre82zg - Copy TensorProto in CPU to Tensor on GPU. https://fburl.com/fr0qk2oe Cloned the GPU workflows for testing in D9125520. Reviewed By: mraway Differential Revision: D9372950 fbshipit-source-id: 2bf70747bd71e8da16239197f7d2761d63f09ff8 --- caffe2/python/checkpoint.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/caffe2/python/checkpoint.py b/caffe2/python/checkpoint.py index e983f5c590f387..5236989e781598 100644 --- a/caffe2/python/checkpoint.py +++ b/caffe2/python/checkpoint.py @@ -217,7 +217,9 @@ def init( [], self._blob_names, db=full_db_name, db_type=db_type, - absolute_path=True) + absolute_path=True, + keep_device=True, + ) self._names_output = task.outputs()[0] return task @@ -280,7 +282,9 @@ def add_op(): self.blob_list(), db=self._current_db_name, db_type=db_type, - absolute_path=True) + absolute_path=True, + keep_device=True, + ) return self._timed_task('checkpoint_load', add_op) From e94ae99d24720e2a6c059207b1505db71e12efc3 Mon Sep 17 00:00:00 2001 From: Tongliang Liao Date: Tue, 21 Aug 2018 16:43:03 -0700 Subject: [PATCH 09/94] Delete copy constructor/assignment of class Observable explicitly. (#10593) Summary: This should resolves "error C2280: 'std::unique_ptr,std::default_delete<_Ty>> &std::unique_ptr<_Ty,std::default_delete<_Ty>>::operator =(const std::unique_ptr<_Ty,std::default_delete<_Ty>> &)': attempting to reference a deleted function" from Visual Studio. It should also make error message more human-readable in case if something really messed up. Pull Request resolved: https://github.com/pytorch/pytorch/pull/10593 Reviewed By: orionr Differential Revision: D9436397 Pulled By: mingzhe09088 fbshipit-source-id: 31711667297b4160196134a34365da734db1c61d --- caffe2/core/observer.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/caffe2/core/observer.h b/caffe2/core/observer.h index 7358301678f08b..e10ab0bb7eac69 100644 --- a/caffe2/core/observer.h +++ b/caffe2/core/observer.h @@ -2,6 +2,7 @@ #include #include + #include "caffe2/core/logging.h" namespace caffe2 { @@ -43,7 +44,15 @@ class ObserverBase { template class Observable { public: - virtual ~Observable(){}; + Observable() = default; + + Observable(Observable&&) = default; + Observable& operator =(Observable&&) = default; + + virtual ~Observable() = default; + + AT_DISABLE_COPY_AND_ASSIGN(Observable); + using Observer = ObserverBase; /* Returns a reference to the observer after addition. */ From 71ddd837d7ec25d2bf6b09942e9438bba8d9deca Mon Sep 17 00:00:00 2001 From: Peter Goldsborough Date: Tue, 21 Aug 2018 18:15:37 -0700 Subject: [PATCH 10/94] Support custom ops in ScriptModule and tidy up test files (#10610) Summary: This PR adds support for using custom ops in ScriptModules, the last step for our custom op strategy. You can now write ``` import torch torch.ops.load_library('libcustom_ops.so') class Model(torch.jit.ScriptModule): def __init__(self): super(Model, self).__init__() torch.jit.script_method def forward(self, input): return torch.ops.custom.op(input) + 1 model = Model() model.forward(torch.ones(5)) # Works model.save("model.pt") # Works model = torch.jit.load("model.pt") # Works ``` You can then load the `model.pt` in C++ and execute its `forward` method! Missing for this was the fact that the script compiler didn't know to convert `ops.custom.op` into a `BuiltinFunction` which then emits a function call. For this I came up with the following strategy inside `torch/csrc/jit/scrip/init.cpp`: 1. When we access `torch.ops`, we return a `CustomOpValue` (subclass of `PythonValue`), whose purpose is only to return a `CustomOpNamespaceValue` (subclass of `PythonValue`) whenever something under it is accessed. 2. `CustomOpNamespaceValue` will then for each field accessed on it return a `BuiltinFunction`. This doesn't reduce performance for any calls that are not to `torch.ops` (as opposed to inspecting every function call's name the call site, for example). I also had to fix `BuiltinFunction` to not assume the namespace is always `aten::`. A lot of other changes are just tidying up the Python and C++ test harness before I integrate it in CI. zdevito dzhulgakov Pull Request resolved: https://github.com/pytorch/pytorch/pull/10610 Differential Revision: D9387832 Pulled By: goldsborough fbshipit-source-id: c00f431db56c7502a66fe1f813fe78067f428ecb --- test/custom_operator/CMakeLists.txt | 8 +-- test/custom_operator/model.py | 30 +++++++++++ test/custom_operator/op.cpp | 10 +++- test/custom_operator/test.cpp | 25 --------- test/custom_operator/test.py | 12 ----- test/custom_operator/test_custom_ops.cpp | 52 ++++++++++++++++++ test/custom_operator/test_custom_ops.py | 54 +++++++++++++++++++ ...est_script_graph_contains_custom_op.expect | 2 +- torch/csrc/jit/script/compiler.cpp | 13 +++-- torch/csrc/jit/script/compiler.h | 22 ++++---- torch/csrc/jit/script/init.cpp | 51 ++++++++++++++++-- torch/csrc/jit/script/module.h | 13 +++++ torch/op.h | 1 + 13 files changed, 230 insertions(+), 63 deletions(-) create mode 100644 test/custom_operator/model.py delete mode 100644 test/custom_operator/test.cpp delete mode 100644 test/custom_operator/test.py create mode 100644 test/custom_operator/test_custom_ops.cpp create mode 100644 test/custom_operator/test_custom_ops.py diff --git a/test/custom_operator/CMakeLists.txt b/test/custom_operator/CMakeLists.txt index 15338cc57cd8da..114cfda92d83d6 100644 --- a/test/custom_operator/CMakeLists.txt +++ b/test/custom_operator/CMakeLists.txt @@ -1,10 +1,10 @@ # Basic CMake setup cmake_minimum_required(VERSION 3.0 FATAL_ERROR) -project(custom_op) +project(custom_ops) find_package(Torch REQUIRED) -torch_add_custom_op_library(custom_op op.cpp) +torch_add_custom_op_library(custom_ops op.cpp) -add_executable(custom_op_test test.cpp) -target_link_libraries(custom_op_test custom_op) +add_executable(test_custom_ops test_custom_ops.cpp) +target_link_libraries(test_custom_ops custom_ops) diff --git a/test/custom_operator/model.py b/test/custom_operator/model.py new file mode 100644 index 00000000000000..6f91bd81c577cd --- /dev/null +++ b/test/custom_operator/model.py @@ -0,0 +1,30 @@ +import argparse +import os.path + +import torch + + +class Model(torch.jit.ScriptModule): + def __init__(self): + super(Model, self).__init__() + + @torch.jit.script_method + def forward(self, input): + return torch.ops.custom.op_with_defaults(input)[0] + 1 + + +def main(): + parser = argparse.ArgumentParser( + description="Serialize a script module with custom ops" + ) + parser.add_argument("--export-script-module-to", required=True) + options = parser.parse_args() + + torch.ops.load_library(os.path.abspath('build/libcustom_ops.so')) + + model = Model() + model.save(options.export_script_module_to) + + +if __name__ == '__main__': + main() diff --git a/test/custom_operator/op.cpp b/test/custom_operator/op.cpp index ec24967d0e10e2..113476f581c52d 100644 --- a/test/custom_operator/op.cpp +++ b/test/custom_operator/op.cpp @@ -15,4 +15,12 @@ std::vector custom_op( return output; } -static torch::RegisterOperators registry("custom::op", &custom_op); +static auto registry = + torch::RegisterOperators() + // We parse the schema for the user. + .op("custom::op", &custom_op) + // User provided schema. Among other things, allows defaulting values, + // because we cannot infer default values from the signature. It also + // gives arguments meaningful names. + .op("custom::op_with_defaults(Tensor tensor, float scalar = 1, int repeat = 1) -> Tensor[]", + &custom_op); diff --git a/test/custom_operator/test.cpp b/test/custom_operator/test.cpp deleted file mode 100644 index 57ad66d08f320b..00000000000000 --- a/test/custom_operator/test.cpp +++ /dev/null @@ -1,25 +0,0 @@ -#include "op.h" - -#include -#include - -int main() { - auto& ops = torch::jit::getAllOperatorsFor( - torch::jit::Symbol::fromQualString("custom::op")); - assert(ops.size() == 1); - - auto& op = ops.front(); - assert(op->schema().name == "custom::op"); - - torch::jit::Stack stack; - torch::jit::push(stack, torch::ones(5), 2.0, 3); - op->getOperation()(stack); - std::vector output; - torch::jit::pop(stack, output); - - assert(output.size() == 3); - for (const auto& tensor : output) { - assert(tensor.allclose(torch::ones(5) * 2)); - } - std::cout << "success" << std::endl; -} diff --git a/test/custom_operator/test.py b/test/custom_operator/test.py deleted file mode 100644 index 2a04231ed99868..00000000000000 --- a/test/custom_operator/test.py +++ /dev/null @@ -1,12 +0,0 @@ -import os -import torch - -library_path = os.path.abspath('build/libcustom_op.so') -torch.ops.load_library(library_path) -assert library_path in torch.ops.loaded_libraries - -output = torch.ops.custom.op(torch.ones(5), 2.0, 3) -assert type(output) == list -assert len(output) == 3 -assert all(tensor.allclose(torch.ones(5) * 2) for tensor in output) -print('success') diff --git a/test/custom_operator/test_custom_ops.cpp b/test/custom_operator/test_custom_ops.cpp new file mode 100644 index 00000000000000..7389e17ad4f991 --- /dev/null +++ b/test/custom_operator/test_custom_ops.cpp @@ -0,0 +1,52 @@ +#include + +#include "op.h" + +#include +#include +#include + +#include + +void get_operator_from_registry_and_execute() { + auto& ops = torch::jit::getAllOperatorsFor( + torch::jit::Symbol::fromQualString("custom::op")); + assert(ops.size() == 1); + + auto& op = ops.front(); + assert(op->schema().name == "custom::op"); + + torch::jit::Stack stack; + torch::jit::push(stack, torch::ones(5), 2.0, 3); + op->getOperation()(stack); + std::vector output; + torch::jit::pop(stack, output); + + assert(output.size() == 3); + for (const auto& tensor : output) { + assert(tensor.allclose(torch::ones(5) * 2)); + } +} + +void load_serialized_module_with_custom_op_and_execute( + const char* path_to_exported_script_module) { + std::shared_ptr module = + torch::jit::load(path_to_exported_script_module); + assert(module != nullptr); + + std::vector inputs; + inputs.push_back(torch::ones(5)); + auto output = module->forward(inputs).toTensor(); + + assert(output.allclose(torch::ones(5) + 1)); +} + +int main(int argc, const char* argv[]) { + if (argc != 2) { + std::cerr << "usage: test_custom_ops \n"; + return -1; + } + get_operator_from_registry_and_execute(); + load_serialized_module_with_custom_op_and_execute(argv[1]); + std::cout << "ok\n"; +} diff --git a/test/custom_operator/test_custom_ops.py b/test/custom_operator/test_custom_ops.py new file mode 100644 index 00000000000000..f7e6b2fa7b9e5f --- /dev/null +++ b/test/custom_operator/test_custom_ops.py @@ -0,0 +1,54 @@ +import argparse +import os.path +import tempfile +import unittest + +import torch + +from model import Model + + +class TestCustomOperators(unittest.TestCase): + def setUp(self): + self.library_path = os.path.abspath('build/libcustom_ops.so') + torch.ops.load_library(self.library_path) + + def test_custom_library_is_loaded(self): + self.assertIn(self.library_path, torch.ops.loaded_libraries) + + def test_calling_custom_op(self): + output = torch.ops.custom.op(torch.ones(5), 2.0, 3) + self.assertEqual(type(output), list) + self.assertEqual(len(output), 3) + for tensor in output: + self.assertTrue(tensor.allclose(torch.ones(5) * 2)) + + output = torch.ops.custom.op_with_defaults(torch.ones(5)) + self.assertEqual(type(output), list) + self.assertEqual(len(output), 1) + self.assertTrue(output[0].allclose(torch.ones(5))) + + def test_calling_custom_op_inside_script_module(self): + model = Model() + output = model.forward(torch.ones(5)) + self.assertTrue(output.allclose(torch.ones(5) + 1)) + + def test_saving_and_loading_script_module_with_custom_op(self): + model = Model() + # Ideally we would like to not have to manually delete the file, but NamedTemporaryFile + # opens the file, and it cannot be opened multiple times in Windows. To support Windows, + # close the file after creation and try to remove it manually. + file = tempfile.NamedTemporaryFile(delete=False) + try: + file.close() + model.save(file.name) + loaded = torch.jit.load(file.name) + finally: + os.unlink(file.name) + + output = loaded.forward(torch.ones(5)) + self.assertTrue(output.allclose(torch.ones(5) + 1)) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/expect/TestCustomOperators.test_script_graph_contains_custom_op.expect b/test/expect/TestCustomOperators.test_script_graph_contains_custom_op.expect index 04af7666fdb8d7..5ffa0bf99efe47 100644 --- a/test/expect/TestCustomOperators.test_script_graph_contains_custom_op.expect +++ b/test/expect/TestCustomOperators.test_script_graph_contains_custom_op.expect @@ -1,4 +1,4 @@ graph(%x : Dynamic) { - %1 : Dynamic = ^aten::relu()(%x) + %1 : Dynamic = aten::relu(%x) return (%1); } diff --git a/torch/csrc/jit/script/compiler.cpp b/torch/csrc/jit/script/compiler.cpp index 583d0155023e19..ebcad28fd285da 100644 --- a/torch/csrc/jit/script/compiler.cpp +++ b/torch/csrc/jit/script/compiler.cpp @@ -609,10 +609,9 @@ static Value* identity(const SourceRange& range, Value* v) { return v; } - std::shared_ptr BuiltinFunction::call( SourceRange loc, - Method & m, + Method& m, at::ArrayRef inputs_, at::ArrayRef attributes, size_t n_binders) { @@ -620,7 +619,8 @@ std::shared_ptr BuiltinFunction::call( if (value) inputs.push_back(*value); inputs.insert(inputs.end(), inputs_.begin(), inputs_.end()); - return std::make_shared(emitBuiltinCall(loc, *m.graph(), Symbol::aten(name), inputs, attributes, true)); + return std::make_shared( + emitBuiltinCall(loc, *m.graph(), symbol, inputs, attributes, true)); } struct to_ir { @@ -1511,9 +1511,12 @@ static const std::unordered_map &builtin_cast_methods( std::shared_ptr SimpleValue::attr(SourceRange loc, Method & m, const std::string& field) { // Allow method-style casts on Tensor types. e.g. x.int() if (value->type()->isSubtypeOf(DynamicType::get()) && builtin_cast_methods().count(field)) { - return std::make_shared(builtin_cast_methods().at(field), NamedValue(loc, "self", value)); + return std::make_shared( + Symbol::aten(builtin_cast_methods().at(field)), + NamedValue(loc, "self", value)); } - return std::make_shared(field, NamedValue(loc, "self", value)); + return std::make_shared( + Symbol::aten(field), NamedValue(loc, "self", value)); } std::vector inlineCallTo(Graph& g, Graph& callee, ArrayRef inputs) { diff --git a/torch/csrc/jit/script/compiler.h b/torch/csrc/jit/script/compiler.h index aa768b4ec053f3..36f43568ae1244 100644 --- a/torch/csrc/jit/script/compiler.h +++ b/torch/csrc/jit/script/compiler.h @@ -102,22 +102,24 @@ struct TORCH_API SimpleValue : public SugaredValue { }; struct TORCH_API BuiltinFunction : public SugaredValue { - BuiltinFunction(const std::string& name, at::optional value) - : name(name), value(std::move(value)) {} - std::string name; + BuiltinFunction(Symbol symbol, at::optional value) + : symbol(std::move(symbol)), value(std::move(value)) {} + + // The symbol of the function (e.g. `aten::relu`). + Symbol symbol; // if this is method, then this is the self argument. at::optional value; - virtual std::string kind() const override { + std::string kind() const override { return "builtin"; } - virtual std::shared_ptr call( - SourceRange loc, - Method & m, - at::ArrayRef attributes, - at::ArrayRef inputs, - size_t n_binders) override; + std::shared_ptr call( + SourceRange loc, + Method& m, + at::ArrayRef attributes, + at::ArrayRef inputs, + size_t n_binders) override; }; using Resolver = std::function attr( + SourceRange loc, + Method& m, + const std::string& field) override { + py::object member = getattr(loc, field); + const auto op_namespace = py::cast(self.attr("name")); + // The symbol name is the op namespace + the op (function) name, which is + // being accessed as the `field` here. + auto symbol = Symbol::fromQualString(op_namespace + "::" + field); + return std::make_shared( + std::move(symbol), at::nullopt); + } +}; + +// The `torch.ops` value. All it does is create `CustomOpNamespaceValue` +// objects when accessing attributes under it, e.g. `torch.ops.my_namespace`. +struct VISIBILITY_HIDDEN CustomOpsValue : public PythonValue { + explicit CustomOpsValue(py::object obj) : PythonValue(std::move(obj)) {} + + std::shared_ptr attr( + SourceRange loc, + Method& m, + const std::string& field) override { + py::object member = getattr(loc, field); + return std::make_shared(member); + } +}; + struct VISIBILITY_HIDDEN PythonModuleValue : public PythonValue { explicit PythonModuleValue(py::object mod) : PythonValue(mod) {} - std::shared_ptr attr(SourceRange loc, Method & m, const std::string& field) override { - py::object member = getattr(loc, field); - return toSugaredValue(member, m, loc); + std::shared_ptr attr( + SourceRange loc, + Method& m, + const std::string& field) override { + py::object member = getattr(loc, field); + return toSugaredValue(member, m, loc); } - private: }; struct VISIBILITY_HIDDEN BuiltinPythonModuleValue : public PythonModuleValue { @@ -182,7 +220,10 @@ struct VISIBILITY_HIDDEN BuiltinPythonModuleValue : public PythonModuleValue { // on the torch builtin modules py::object member = getattr(loc, field); if (py::isinstance(member)) { - return std::make_shared(field, at::nullopt); + return std::make_shared( + Symbol::aten(field), at::nullopt); + } else if (field == "ops") { + return std::make_shared(member); } return toSugaredValue(member, m, loc, /*is_constant =*/true); } diff --git a/torch/csrc/jit/script/module.h b/torch/csrc/jit/script/module.h index 3ed1a3da2b2b23..2c5d4bbc6618de 100644 --- a/torch/csrc/jit/script/module.h +++ b/torch/csrc/jit/script/module.h @@ -59,6 +59,15 @@ struct Method { } get_executor().run(stack); } + + IValue operator()(std::vector stack) { + run(stack); + if (stack.size() != 1) { + return Tuple::create(std::move(stack)); + } + return stack.front(); + } + std::shared_ptr graph_for(const Stack& inputs) { return get_executor().graphFor(inputs); } @@ -239,6 +248,10 @@ struct Module { optimize = o; } + IValue forward(std::vector inputs) { + return get_method("forward")(inputs); + } + void register_parameter(const std::string & name, autograd::Variable v, bool is_buffer) { if(auto p = parameters.find(name)){ *p->slot() = v; diff --git a/torch/op.h b/torch/op.h index 88e9dc6610121c..bf6f2aa080c133 100644 --- a/torch/op.h +++ b/torch/op.h @@ -2,6 +2,7 @@ #include #include +#include #include From 44f996f82c8487b505ddc7d886024797e17a8d18 Mon Sep 17 00:00:00 2001 From: tomdz Date: Tue, 21 Aug 2018 18:18:08 -0700 Subject: [PATCH 11/94] Py3 fixes for layer_model_helper.py (#10525) Summary: Fixes `__getattr__` to adhere to its Python API contract, and wraps `range()` call in a list since it does not return one anymore in Python 3. Pull Request resolved: https://github.com/pytorch/pytorch/pull/10525 Reviewed By: ezyang Differential Revision: D9441360 Pulled By: tomdz fbshipit-source-id: d489c0e7cefecc4699ca866fd55ddbfa629688d4 --- caffe2/python/layer_model_helper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/caffe2/python/layer_model_helper.py b/caffe2/python/layer_model_helper.py index 750f6aaf98fa3d..a085e3453ec7ca 100644 --- a/caffe2/python/layer_model_helper.py +++ b/caffe2/python/layer_model_helper.py @@ -529,7 +529,7 @@ def apply_operator(net, in_record, out_record, **kwargs): return self.add_layer(new_layer) return wrapper else: - raise ValueError( + raise AttributeError( "Trying to create non-registered layer: {}".format(layer)) @property @@ -651,5 +651,5 @@ def breakdown_map(self, breakdown_map): # and change the assertion accordingly assert isinstance(breakdown_map, dict) assert all(isinstance(k, six.string_types) for k in breakdown_map) - assert sorted(list(breakdown_map.values())) == range(len(breakdown_map)) + assert sorted(breakdown_map.values()) == list(range(len(breakdown_map))) self._breakdown_map = breakdown_map From 6325e5aa480f19080581e1397669d5731db4a2ad Mon Sep 17 00:00:00 2001 From: Aaron Jaech Date: Tue, 21 Aug 2018 18:29:17 -0700 Subject: [PATCH 12/94] fix typo in error message (#9827) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/9827 changed unitilized to uninitialized Reviewed By: jerryzh168 Differential Revision: D8995509 fbshipit-source-id: 94518d5542a7bff49fcb9a4505c0c7a959746f78 --- caffe2/python/pybind_state.h | 2 +- caffe2/python/pybind_state_mkl.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/caffe2/python/pybind_state.h b/caffe2/python/pybind_state.h index ed15bb17685a14..cb4d32700361b8 100644 --- a/caffe2/python/pybind_state.h +++ b/caffe2/python/pybind_state.h @@ -105,7 +105,7 @@ class TensorFetcher : public BlobFetcherBase { FetchedBlob FetchTensor(const Tensor& tensor, bool force_copy) { FetchedBlob result; - CAFFE_ENFORCE_GE(tensor.size(), 0, "Trying to fetch unitilized tensor"); + CAFFE_ENFORCE_GE(tensor.size(), 0, "Trying to fetch uninitialized tensor"); const int numpy_type = CaffeToNumpyType(tensor.meta()); CAFFE_ENFORCE( numpy_type != -1, diff --git a/caffe2/python/pybind_state_mkl.cc b/caffe2/python/pybind_state_mkl.cc index ea111139faf4f5..dd192c325f7de2 100644 --- a/caffe2/python/pybind_state_mkl.cc +++ b/caffe2/python/pybind_state_mkl.cc @@ -24,7 +24,7 @@ class MKLMemoryFetcher : public BlobFetcherBase { public: pybind11::object Fetch(const Blob& blob) override { const MKLMemory& src = blob.Get>(); - CAFFE_ENFORCE(src.buffer(), "Trying to fetch unitilized tensor"); + CAFFE_ENFORCE(src.buffer(), "Trying to fetch uninitialized tensor"); const int numpy_type = CaffeToNumpyType(TypeMeta::Make()); CAFFE_ENFORCE( numpy_type != -1, From ddf187c198f8e249e78351ba94e773bf9d21de3a Mon Sep 17 00:00:00 2001 From: James Reed Date: Tue, 21 Aug 2018 18:36:41 -0700 Subject: [PATCH 13/94] Dont assume serialized integral types were widened to int32 in raw_data (#10718) Summary: zdevito et al came to the conclusion that the ONNX spec does not mandate the widening conversion of integral types when serializing tensor data into raw_data, as opposed to serializing the data into int32_data. PyTorch recently made this change in the export code, which caused import in caffe2 to break because it did not match semantics. This fixes that Pull Request resolved: https://github.com/pytorch/pytorch/pull/10718 Differential Revision: D9423712 Pulled By: jamesr66a fbshipit-source-id: 479fbae67b028bf4f9c1ca1812c2c7b0c6cccd12 --- caffe2/onnx/backend.cc | 104 +++++++++++++++++--------- test/onnx/test_pytorch_onnx_caffe2.py | 22 ++++++ 2 files changed, 89 insertions(+), 37 deletions(-) diff --git a/caffe2/onnx/backend.cc b/caffe2/onnx/backend.cc index 64642ca7ea41e5..158926d77167fe 100644 --- a/caffe2/onnx/backend.cc +++ b/caffe2/onnx/backend.cc @@ -1404,6 +1404,59 @@ Caffe2BackendRep* Caffe2Backend::Prepare( return rep; } +template +void ConvertIntegralValueToCaffe2(caffe2::OperatorDef* c2_op, + caffe2::Argument* c2_values, + const TensorProto& onnx_tensor) { + c2_op->set_type( + onnx_tensor.data_type() == TensorProto::BOOL ? "GivenTensorBoolFill" + : "GivenTensorIntFill"); + ::google::protobuf::RepeatedField tmp; + const ::google::protobuf::RepeatedField* src = + &tmp; + bool converted = TryConvertingTensorRawValues(onnx_tensor, &tmp); + if (converted) { + for (const auto i : *src) { + c2_values->add_ints(i); + } + } else { + const ::google::protobuf::RepeatedField<::google::protobuf::int32> *int32_src = \ + &onnx_tensor.int32_data(); + for (const auto i : *int32_src) { + c2_values->add_ints(i); + } + } +} + +template <> +void ConvertIntegralValueToCaffe2<::google::protobuf::int64>(caffe2::OperatorDef* c2_op, + caffe2::Argument* c2_values, + const TensorProto& onnx_tensor) { + c2_op->set_type("GivenTensorInt64Fill"); + auto* ints = c2_values->mutable_ints(); + if (!TryConvertingTensorRawValues<::google::protobuf::int64>( + onnx_tensor, ints)) { + ints->CopyFrom(onnx_tensor.int64_data()); + } +} + +template <> +void ConvertIntegralValueToCaffe2<::google::protobuf::uint64>(caffe2::OperatorDef* c2_op, + caffe2::Argument* c2_values, + const TensorProto& onnx_tensor) { + c2_op->set_type("GivenTensorInt64Fill"); + ::google::protobuf::RepeatedField<::google::protobuf::uint64> tmp; + const ::google::protobuf::RepeatedField<::google::protobuf::uint64>* src = + &tmp; + if (!TryConvertingTensorRawValues<::google::protobuf::uint64>( + onnx_tensor, &tmp)) { + src = &onnx_tensor.uint64_data(); + } + for (const auto i : *src) { + c2_values->add_ints(i); + } +} + void Caffe2Backend::BuildTensorFillingOp( caffe2::OperatorDef* c2_op, const TensorProto& onnx_tensor, @@ -1435,44 +1488,21 @@ void Caffe2Backend::BuildTensorFillingOp( c2_values->add_floats(i); } } else if (onnx_tensor.data_type() == TensorProto::INT64) { - c2_op->set_type("GivenTensorInt64Fill"); - auto* ints = c2_values->mutable_ints(); - if (!TryConvertingTensorRawValues<::google::protobuf::int64>( - onnx_tensor, ints)) { - ints->CopyFrom(onnx_tensor.int64_data()); - } + ConvertIntegralValueToCaffe2<::google::protobuf::int64>(c2_op, c2_values, onnx_tensor); } else if (onnx_tensor.data_type() == TensorProto::UINT32) { - c2_op->set_type("GivenTensorInt64Fill"); - ::google::protobuf::RepeatedField<::google::protobuf::uint64> tmp; - const ::google::protobuf::RepeatedField<::google::protobuf::uint64>* src = - &tmp; - if (!TryConvertingTensorRawValues<::google::protobuf::uint64>( - onnx_tensor, &tmp)) { - src = &onnx_tensor.uint64_data(); - } - for (const auto i : *src) { - c2_values->add_ints(i); - } - } else if ( - onnx_tensor.data_type() == TensorProto::BOOL || - onnx_tensor.data_type() == TensorProto::UINT8 || - onnx_tensor.data_type() == TensorProto::INT8 || - onnx_tensor.data_type() == TensorProto::UINT16 || - onnx_tensor.data_type() == TensorProto::INT16 || - onnx_tensor.data_type() == TensorProto::INT32) { - c2_op->set_type( - onnx_tensor.data_type() == TensorProto::BOOL ? "GivenTensorBoolFill" - : "GivenTensorIntFill"); - ::google::protobuf::RepeatedField<::google::protobuf::int32> tmp; - const ::google::protobuf::RepeatedField<::google::protobuf::int32>* src = - &tmp; - if (!TryConvertingTensorRawValues<::google::protobuf::int32>( - onnx_tensor, &tmp)) { - src = &onnx_tensor.int32_data(); - } - for (const auto i : *src) { - c2_values->add_ints(i); - } + ConvertIntegralValueToCaffe2<::google::protobuf::uint64>(c2_op, c2_values, onnx_tensor); + } else if (onnx_tensor.data_type() == TensorProto::BOOL) { + ConvertIntegralValueToCaffe2<::google::protobuf::int8>(c2_op, c2_values, onnx_tensor); + } else if (onnx_tensor.data_type() == TensorProto::UINT8) { + ConvertIntegralValueToCaffe2<::google::protobuf::uint8>(c2_op, c2_values, onnx_tensor); + } else if (onnx_tensor.data_type() == TensorProto::INT8) { + ConvertIntegralValueToCaffe2<::google::protobuf::int8>(c2_op, c2_values, onnx_tensor); + } else if (onnx_tensor.data_type() == TensorProto::UINT16) { + ConvertIntegralValueToCaffe2<::google::protobuf::uint16>(c2_op, c2_values, onnx_tensor); + } else if (onnx_tensor.data_type() == TensorProto::INT16) { + ConvertIntegralValueToCaffe2<::google::protobuf::int16>(c2_op, c2_values, onnx_tensor); + } else if (onnx_tensor.data_type() == TensorProto::INT32) { + ConvertIntegralValueToCaffe2<::google::protobuf::int32>(c2_op, c2_values, onnx_tensor); } else if (onnx_tensor.data_type() == TensorProto::STRING) { c2_op->set_type("GivenTensorStringFill"); auto* strings = c2_values->mutable_strings(); diff --git a/test/onnx/test_pytorch_onnx_caffe2.py b/test/onnx/test_pytorch_onnx_caffe2.py index bf2cf4ebb719c3..220681067fd710 100644 --- a/test/onnx/test_pytorch_onnx_caffe2.py +++ b/test/onnx/test_pytorch_onnx_caffe2.py @@ -828,6 +828,28 @@ def forward(self, x, y): y = Variable(torch.randn(1, 5, 1)) self.run_model_test(MyModel(), train=False, input=(x, y), batch_size=BATCH_SIZE, use_gpu=False) + def test_int8_export(self): + class MyModel(torch.nn.Module): + def __init__(self): + super(MyModel, self).__init__() + self.param = torch.ByteTensor(3, 4).random_() + + def forward(self, x): + return x * self.param.float() + + import io + f = io.BytesIO() + from torch.onnx import ExportTypes + torch.onnx._export(MyModel(), (torch.rand(3, 4),), f, verbose=True, export_type=ExportTypes.ZIP_ARCHIVE) + + X = np.random.rand(3, 4).astype(np.float32) + + f.seek(0) + import caffe2.python.onnx.backend as c2 + model = c2.prepare_zip_archive(f) + model.run(X) + + # a bit of metaprogramming to set up all the rnn tests From a2ca634e04b17caf092b7982e8662a66f0e7d230 Mon Sep 17 00:00:00 2001 From: Bram Wasti Date: Tue, 21 Aug 2018 18:55:40 -0700 Subject: [PATCH 14/94] Add enforce back to converter.cc Summary: hotfix for B*8 Differential Revision: D9444060 fbshipit-source-id: 368f8463e684c39ec0ac18bcb11a7b6132d9f874 --- caffe2/opt/converter.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/caffe2/opt/converter.cc b/caffe2/opt/converter.cc index 10f7332ff1a50a..bcc6a6acc05f11 100644 --- a/caffe2/opt/converter.cc +++ b/caffe2/opt/converter.cc @@ -297,6 +297,11 @@ repr::NNModule convertToNNModule(caffe2::NetDef &net, std::unordered_mappushInstructionNode(opNode); } + CAFFE_ENFORCE( + externalInputNames.size() == 0, + "Attempting to convert an ill-formed network: \ + external_input contains unused blobs"); + for (const auto& outputName : net.external_output()) { CAFFE_ENFORCE( blobMap.count(outputName), "NetDef has ill-formed external_output"); From 1068ba667c9bdbf59059a16f7145006c4498522d Mon Sep 17 00:00:00 2001 From: Peter Goldsborough Date: Tue, 21 Aug 2018 19:26:34 -0700 Subject: [PATCH 15/94] Create at::linear (#10755) Summary: The optimized code for `linear()` which uses `addmm` when a bias is given was duplicated three times in the ATen and the C++ API. Let's just have `at::linear` and use that everywhere. apaszke ezyang (who mentioned this in #10481) Pull Request resolved: https://github.com/pytorch/pytorch/pull/10755 Differential Revision: D9443881 Pulled By: goldsborough fbshipit-source-id: a64862d1649b5961043d58401625ec267d97d9f3 --- aten/src/ATen/native/Linear.cpp | 19 ++++++++++++- aten/src/ATen/native/RNN.cpp | 21 +++----------- aten/src/ATen/native/native_functions.yaml | 3 ++ tools/autograd/gen_python_functions.py | 2 +- torch/csrc/api/src/nn/modules/linear.cpp | 15 ++-------- torch/csrc/api/src/nn/modules/rnn.cpp | 32 ++++++---------------- 6 files changed, 37 insertions(+), 55 deletions(-) diff --git a/aten/src/ATen/native/Linear.cpp b/aten/src/ATen/native/Linear.cpp index c82bf8ba0ae043..5137baa8683de8 100644 --- a/aten/src/ATen/native/Linear.cpp +++ b/aten/src/ATen/native/Linear.cpp @@ -1,10 +1,27 @@ #include "ATen/ATen.h" #include "ATen/NativeFunctions.h" #include "ATen/WrapDimUtilsMulti.h" + +#include #include +#include +#include +#include +#include namespace at { namespace native { +Tensor linear(const Tensor& input, const Tensor& weight, const Tensor& bias) { + if (input.dim() == 2 && bias.defined()) { + // Fused op is marginally faster. + return at::addmm(bias, input, weight.t()); + } + auto output = at::matmul(input, weight.t()); + if (bias.defined()) { + output.add_(bias); + } + return output; +} // sumproduct_pair computes `(left*right).sum(sumdims)` by means of permutation and // batch matrix multiplication @@ -299,7 +316,7 @@ Tensor einsum(std::string eqn, TensorList tensors) { } } preprocessed_op = preprocessed_op.permute(permutation); - // finally, we insert dimensions for idxes not in the operand + // finally, we insert dimensions for idxes not in the operand for (size_t dim = 0; dim < idx_to_dim.size(); dim++) { if (idx_to_dim[dim] == -1) { preprocessed_op = preprocessed_op.unsqueeze(dim); diff --git a/aten/src/ATen/native/RNN.cpp b/aten/src/ATen/native/RNN.cpp index b93b7c0d2627f7..0327ff3bf20ef5 100644 --- a/aten/src/ATen/native/RNN.cpp +++ b/aten/src/ATen/native/RNN.cpp @@ -134,19 +134,6 @@ tpair_of hidden_slice(const tpair_of& t, int64_t start, int64_t // It's a struct only because functional programming in C++ is a pain, and it's easier // to pass around "vtable pointers" than actual function pointers. -Tensor linear(const Tensor& input, const Tensor& weight, /* optional */ const Tensor& bias={}) { - if (input.dim() == 2 && bias.defined()) { - // fused op is marginally faster - return at::addmm(bias, input, weight.t()); - } - - auto output = at::matmul(input, weight.t()); - if (bias.defined()) { - output.add_(bias); - } - return output; -} - template struct Cell { using hidden_type = hidden_type_tmpl; @@ -157,7 +144,7 @@ struct Cell { template struct SimpleCell : Cell { hidden_type operator()(const Tensor& input, const hidden_type& hidden, const CellParams& params) const override { - return nonlinearity{}(linear(input, params.w_ih, params.b_ih) + linear(hidden, params.w_hh, params.b_hh)); + return nonlinearity{}(at::linear(input, params.w_ih, params.b_ih) + at::linear(hidden, params.w_hh, params.b_hh)); } }; @@ -175,7 +162,7 @@ struct LSTMCell : Cell> { return std::make_tuple(std::get<0>(result), std::get<1>(result)); } - auto gates = linear(input, params.w_ih, params.b_ih) + linear(hx, params.w_hh, params.b_hh); + auto gates = at::linear(input, params.w_ih, params.b_ih) + at::linear(hx, params.w_hh, params.b_hh); auto chunked_gates = gates.chunk(4, 1); auto ingate = chunked_gates[0].sigmoid(); @@ -200,8 +187,8 @@ struct GRUCell : Cell { return std::get<0>(result); } - auto igates = linear(input, params.w_ih, params.b_ih); - auto hgates = linear(hidden, params.w_hh, params.b_hh); + auto igates = at::linear(input, params.w_ih, params.b_ih); + auto hgates = at::linear(hidden, params.w_hh, params.b_hh); auto chunked_igates = igates.chunk(3, 1); auto chunked_hgates = hgates.chunk(3, 1); diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml index d7e0ef45d4314b..d8d4e177231d98 100644 --- a/aten/src/ATen/native/native_functions.yaml +++ b/aten/src/ATen/native/native_functions.yaml @@ -928,6 +928,9 @@ - func: layer_norm(Tensor input, IntList normalized_shape, Tensor? weight={}, Tensor? bias={}, double eps=1e-5, bool cudnn_enable=True) -> Tensor variants: function +- func: linear(Tensor input, Tensor weight, Tensor bias={}) -> Tensor + variants: function + - func: linspace(Scalar start, Scalar end, TensorOptions options={}) -> Tensor variants: function diff --git a/tools/autograd/gen_python_functions.py b/tools/autograd/gen_python_functions.py index fc1503cbca7a7b..e1e8f795580a2b 100644 --- a/tools/autograd/gen_python_functions.py +++ b/tools/autograd/gen_python_functions.py @@ -28,7 +28,7 @@ '_cumsum.*', '_cumprod.*', '_sum.*', '_prod.*', '_th_.*', 'arange.*', 'range.*', '_gesv.*', '_getri.*', 'slice', '_local_scalar', '_local_scalar_dense', - 'max_pool1d', 'max_pool2d', 'max_pool3d' + 'max_pool1d', 'max_pool2d', 'max_pool3d', 'linear' ] # These function signatures are not exposed to Python. Note that this signature diff --git a/torch/csrc/api/src/nn/modules/linear.cpp b/torch/csrc/api/src/nn/modules/linear.cpp index f375c6ab7f1798..d95d6b3e90bf0d 100644 --- a/torch/csrc/api/src/nn/modules/linear.cpp +++ b/torch/csrc/api/src/nn/modules/linear.cpp @@ -22,24 +22,15 @@ void LinearImpl::reset() { } const auto stdv = 1.0 / std::sqrt(weight.size(1)); - NoGradGuard no_grad;; + NoGradGuard no_grad; for (auto& p : parameters()) { p->uniform_(-stdv, stdv); } } Tensor LinearImpl::forward(Tensor input) { - if (input.ndimension() == 2 && options.with_bias_) { - // Fused op is marginally faster - AT_ASSERT(input.size(1) == weight.size(1)); - return {torch::addmm(bias, input, weight.t())}; - } - - auto output = input.matmul(weight.t()); - if (options.with_bias_) { - output += bias; - } - return output; + AT_ASSERT(!options.with_bias_ || bias.defined()); + return torch::linear(input, weight, bias); } } // namespace nn } // namespace torch diff --git a/torch/csrc/api/src/nn/modules/rnn.cpp b/torch/csrc/api/src/nn/modules/rnn.cpp index 3bc1ae5fa6faf4..436447134ce9fd 100644 --- a/torch/csrc/api/src/nn/modules/rnn.cpp +++ b/torch/csrc/api/src/nn/modules/rnn.cpp @@ -20,22 +20,6 @@ namespace torch { namespace nn { -namespace { -Tensor linear(Tensor x, Tensor w, Tensor b) { - if (x.ndimension() == 2 && b.defined()) { - // Fused op is marginally faster - assert(x.size(1) == w.size(1)); - return torch::addmm(b, x, w.t()); - } - - auto output = x.matmul(w.t()); - if (b.defined()) { - output += b; - } - return output; -} -} // namespace - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RNNOptionsBase ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ namespace detail { @@ -90,7 +74,7 @@ void RNNImplBase::reset() { } const auto stdv = 1.0 / std::sqrt(options.hidden_size_); - NoGradGuard no_grad;; + NoGradGuard no_grad; for (auto& p : this->parameters()) { p->uniform_(-stdv, stdv); } @@ -198,7 +182,7 @@ void RNNImplBase::flatten_parameters_for_cudnn() { } { - NoGradGuard no_grad;; + NoGradGuard no_grad; flat_weights_ = torch::_cudnn_rnn_flatten_weight( flat_weights(), /*weight_stride=*/options.with_bias_ ? 4 : 2, @@ -337,8 +321,8 @@ Tensor RNNImpl::cell_forward(Tensor input, Tensor state, int64_t layer) { ? state : torch::zeros({input.size(0), options.hidden_size_}, input.options()); - auto h = linear(input, w_ih[layer], b_ih[layer]) + - linear(hx, w_hh[layer], b_hh[layer]); + auto h = torch::linear(input, w_ih[layer], b_ih[layer]) + + torch::linear(hx, w_hh[layer], b_hh[layer]); return torch::stack(activation_function_(h)); } @@ -359,8 +343,8 @@ Tensor LSTMImpl::cell_forward(Tensor input, Tensor state, int64_t layer) { auto hx = hid[0]; auto cx = hid[1]; - auto gates = linear(input, w_ih[layer], b_ih[layer]) + - linear(hx, w_hh[layer], b_hh[layer]); + auto gates = torch::linear(input, w_ih[layer], b_ih[layer]) + + torch::linear(hx, w_hh[layer], b_hh[layer]); auto chunked = gates.chunk(4, 1); auto in_gate = chunked[0].sigmoid(); @@ -387,8 +371,8 @@ Tensor GRUImpl::cell_forward(Tensor input, Tensor state, int64_t layer) { ? state : torch::zeros({input.size(0), options.hidden_size_}, input.options()); - auto gi = linear(input, w_ih[layer], b_ih[layer]); - auto gh = linear(input, w_hh[layer], b_hh[layer]); + auto gi = torch::linear(input, w_ih[layer], b_ih[layer]); + auto gh = torch::linear(input, w_hh[layer], b_hh[layer]); auto gic = gi.chunk(3, 1); auto ghc = gh.chunk(3, 1); From e5e2514f4e0c1ebde8fcd84e67a94e680d953153 Mon Sep 17 00:00:00 2001 From: Yiming Wu Date: Tue, 21 Aug 2018 19:32:47 -0700 Subject: [PATCH 16/94] fix debug_info arg in createOperator and improve reroute_tensor (#10736) Summary: -Fixed C2 core.CreateOperator debug info assignment -Improving core.Net.reroute_tensor Pull Request resolved: https://github.com/pytorch/pytorch/pull/10736 Differential Revision: D9426659 Pulled By: harouwu fbshipit-source-id: 90caf848c88854e17e568d5f6910dc6c81fd000a --- caffe2/python/core.py | 68 ++++++++++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 27 deletions(-) diff --git a/caffe2/python/core.py b/caffe2/python/core.py index c133dff17a3c44..bb070866689412 100644 --- a/caffe2/python/core.py +++ b/caffe2/python/core.py @@ -335,6 +335,7 @@ def CreateOperator( device_option=None, arg=None, engine=None, + debug_info=None, **kwargs ): """A function wrapper that allows one to create operators based on the @@ -367,6 +368,8 @@ def CreateOperator( operator.device_option.CopyFrom(scope.CurrentDeviceScope()) if engine is not None: operator.engine = engine + if debug_info is not None: + operator.debug_info = debug_info # random seed is defined in the device option, so we need to do special # care. @@ -1777,6 +1780,17 @@ def Proto(self): self._InvalidateLookupTables() return self._net + def insert_op_at_idx(self, op, op_idx): + r""" inserting operator at index. Will update external blob list. + """ + assert op_idx >= 0 + temp_ops = self.Proto().op[op_idx:] + del self.Proto().op[op_idx:] + self.Proto().op.extend([op]) + self.Proto().op.extend(temp_ops) + self.external_outputs.extend(op.output) + self.external_inputs.extend(op.input) + def reroute_tensor(self, tensor, new_producer, can_modify=None): r""" reroute tensor to new_producer. And feed new tensor to consumers and interseciton with can_modify if provided. @@ -1791,31 +1805,30 @@ def reroute_tensor(self, tensor, new_producer, can_modify=None): Note: assume no inplace blob in net """ - if tensor in self.external_inputs: - op_idx = -1 - else: - assert tensor in new_producer.input, \ - "new producer {} is not taking in {}".format(new_producer.type, tensor) - # assuming that the net has no inplace blob - # TODO: add ssa info in tensor - op_idx = -2 - for index, op in enumerate(self.Proto().op): - if_found = False - for o in op.output: - if o == tensor: - # tensor should not be modified yet. - if_found = True - op_idx = index + def _find_tensor_input_op(tensor): + if tensor in self.external_inputs: + op_idx = -1 + else: + assert tensor in new_producer.input, \ + "new producer {} is not taking in {}".format( + new_producer.type, tensor) + # assuming that the net has no inplace blob + op_idx = -2 + for index, op in enumerate(self.Proto().op): + if_found = False + for o in op.output: + if o == tensor: + # tensor should not be modified yet. + if_found = True + op_idx = index + break + if if_found: break - if if_found: - break - - assert op_idx >= -1 - temp_ops = self.Proto().op[op_idx + 1:] - del self.Proto().op[op_idx + 1:] - self.Proto().op.extend([new_producer]) - self.Proto().op.extend(temp_ops) + return op_idx + # the place to inject new_producer is not just determined by tensor + op_idx = max(_find_tensor_input_op(t) for t in new_producer.input) + self.insert_op_at_idx(new_producer, op_idx + 1) new_tensor = new_producer.output[0] # modify external outputs if tensor in self.external_outputs: @@ -1825,10 +1838,11 @@ def reroute_tensor(self, tensor, new_producer, can_modify=None): # modify consumers reroute_cnt = 0 - for op in self.Proto().op: - if op in can_modify: # this is not necessarily true - remap_input(op, {tensor: new_tensor}) - reroute_cnt = reroute_cnt + 1 + if can_modify: + for op in self.Proto().op: + if op in can_modify: # this is not necessarily true + remap_input(op, {tensor: new_tensor}) + reroute_cnt = reroute_cnt + 1 return reroute_cnt def PopulateProtoWithFileName(self): From abb209ef253f47db0880f4482766d38c2c59212b Mon Sep 17 00:00:00 2001 From: Tongzhou Wang Date: Tue, 21 Aug 2018 21:03:48 -0700 Subject: [PATCH 17/94] Fixes *fft docs (#10760) Summary: cc cranmer fixes #10751 Pull Request resolved: https://github.com/pytorch/pytorch/pull/10760 Differential Revision: D9444473 Pulled By: SsnL fbshipit-source-id: a4036773a93981801c1283d69f86e30cb0fe3d6d --- torch/_torch_docs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/torch/_torch_docs.py b/torch/_torch_docs.py index 161c5917879c19..42cb4806c9fa23 100644 --- a/torch/_torch_docs.py +++ b/torch/_torch_docs.py @@ -5300,7 +5300,7 @@ def parse_kwargs(desc): .. math:: X[\omega_1, \dots, \omega_d] = - \frac{1}{\prod_{i=1}^d N_i} \sum_{n_1=0}^{N_1} \dots \sum_{n_d=0}^{N_d} x[n_1, \dots, n_d] + \sum_{n_1=0}^{N_1} \dots \sum_{n_d=0}^{N_d} x[n_1, \dots, n_d] e^{-j\ 2 \pi \sum_{i=0}^d \frac{\omega_i n_i}{N_i}}, where :math:`d` = :attr:`signal_ndim` is number of dimensions for the @@ -5490,7 +5490,7 @@ def parse_kwargs(desc): by :attr:`signal_ndim`. :attr:`input` must be a tensor with at least ``signal_ndim`` dimensions with optionally arbitrary number of leading batch dimensions. If :attr:`normalized` is set to ``True``, this normalizes the result -by multiplying it with :math:`\sqrt{\prod_{i=1}^K N_i}` so that the operator is +by dividing it with :math:`\sqrt{\prod_{i=1}^K N_i}` so that the operator is unitary, where :math:`N_i` is the size of signal dimension :math:`i`. The real-to-complex Fourier transform results follow conjugate symmetry: From 19031c68dc7c5f54a7eb8935ecd91d7120a5ce7f Mon Sep 17 00:00:00 2001 From: Edward Yang Date: Tue, 21 Aug 2018 21:29:30 -0700 Subject: [PATCH 18/94] Use intrusive_ptr in Storage; replace unique_ptr with Storage (#10488) Summary: ``` Use intrusive_ptr in Storage; replace unique_ptr with Storage This patch does two major changes: - It replaces the use of Retainable in Storage with a new implementation based on intrusive_ptr. This will be necessary because Caffe2 will be using this class to implement intrusive_ptrs, and we need to line these up for the merge. One good thing about the new implementation is that the default copy/move constructors/assignment operators and destructor work automatically, instead of needing to be hardcoded into Storage/Tensor. - It replaces all places where we returned std::unique_ptr with Storage, collapsing an unnecessary double indirection that is no longer necessary now that we have correctly working copy/move constructors. I didn't initially want to do step (2), but it was very important to eliminate all bare uses of new Storage and new StorageImpl, and this making the API change was the most straightforward way to do this. HOW TO FIX YOUR CODE IN THE NEW API - You no longer need to dereference the result of tensor.storage() to pass it to set. So, instead of: x.set_(*y.storage()); just write: x.set_(y.storage()); - If you were accessing methods on StorageImpl via the pImpl() method, you must use the dot operator to run pImpl(). Even better; just drop pImpl, we now have method forwarding. So, instead of: storage->pImpl()->data(); just do: storage->data(); // storage.pImpl()->data() works too but is not as recommended - storage->getDevice() is no more; instead use storage->device().index() MISC CODE UPDATES - retain, release, weak_retain, weak_release and weak_lock are now reimplemented using the "blessed API", and renamed to make it clearer that their use is discouraged. - nvcc OS X and general OS X portability improvements to intrusive_ptr - A new comment in intrusive_ptr describing how stack allocated intrusive_ptr_targets work differently than heap allocated ones from c10::make_intrusive CAVEAT EMPTOR - THStorage_weakRetain used to work on strong pointers, but it NO LONGER works with intrusive_ptr. You must reclaim the strong pointer into a real strong pointer, construct a weak pointer from it, and then release the strong and weak pointers. See StorageSharing.cpp for an example. ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/10488 Reviewed By: gchanan Differential Revision: D9306134 Pulled By: ezyang fbshipit-source-id: 02d58ef62dab8e4da6131e1a24834a65c21048e2 --- aten/src/ATen/Declarations.cwrap | 2 +- aten/src/ATen/SparseTensorImpl.cpp | 5 +- aten/src/ATen/SparseTensorImpl.h | 3 +- aten/src/ATen/Storage.cpp | 11 +- aten/src/ATen/Storage.h | 45 ++++--- aten/src/ATen/StorageImpl.cpp | 8 -- aten/src/ATen/StorageImpl.h | 126 ++++++++++++------ aten/src/ATen/TensorImpl.cpp | 29 ++-- aten/src/ATen/TensorImpl.h | 19 ++- aten/src/ATen/UndefinedTensor.cpp | 6 +- aten/src/ATen/UndefinedTensor.h | 3 +- aten/src/ATen/UndefinedType.cpp | 10 +- aten/src/ATen/UndefinedType.h | 10 +- aten/src/ATen/Utils.h | 13 +- aten/src/ATen/core/intrusive_ptr.h | 56 +++++++- aten/src/ATen/function_wrapper.py | 12 +- aten/src/ATen/native/TensorShape.cpp | 4 +- aten/src/ATen/native/cuda/Gesv.cu | 4 +- aten/src/ATen/native/cudnn/RNN.cpp | 4 +- aten/src/ATen/templates/SparseTypeDerived.cpp | 10 +- aten/src/ATen/templates/Tensor.h | 2 +- aten/src/ATen/templates/Type.cpp | 4 +- aten/src/ATen/templates/Type.h | 10 +- aten/src/ATen/templates/TypeDerived.cpp | 32 +++-- aten/src/ATen/templates/TypeDerived.h | 10 +- aten/src/ATen/test/scalar_tensor_test.cpp | 4 +- aten/src/TH/THStorageFunctions.cpp | 36 ++--- aten/src/TH/THStorageFunctions.h | 1 - aten/src/TH/THStorageFunctions.hpp | 3 - aten/src/TH/THTensor.cpp | 11 +- aten/src/TH/THTensor.hpp | 9 +- aten/src/TH/generic/THStorage.cpp | 27 ++-- aten/src/THC/THCStorage.cpp | 8 +- aten/src/THC/THCTensor.cpp | 3 +- aten/src/THC/generic/THCStorage.cpp | 18 +-- test/cpp/api/rnn.cpp | 4 +- tools/autograd/gen_python_functions.py | 2 - tools/autograd/templates/VariableType.cpp | 10 +- tools/autograd/templates/VariableType.h | 10 +- .../templates/python_variable_methods.cpp | 4 +- torch/csrc/DynamicTypes.cpp | 6 +- torch/csrc/DynamicTypes.h | 2 +- torch/csrc/autograd/variable.cpp | 6 +- torch/csrc/autograd/variable.h | 3 +- torch/csrc/generic/StorageSharing.cpp | 13 +- torch/csrc/jit/export.cpp | 10 +- torch/csrc/jit/import.cpp | 8 +- torch/csrc/jit/interpreter.cpp | 2 +- torch/csrc/utils/python_arg_parser.h | 4 +- torch/csrc/utils/tensor_new.cpp | 6 +- torch/csrc/utils/tensor_numpy.cpp | 3 +- .../lib/THD/master_worker/worker/Dispatch.cpp | 4 +- torch/lib/THD/master_worker/worker/Worker.cpp | 2 +- torch/lib/THD/master_worker/worker/Worker.hpp | 2 +- .../master_worker/worker/dispatch/Storage.cpp | 14 +- torch/lib/c10d/Utils.hpp | 5 +- 56 files changed, 357 insertions(+), 321 deletions(-) diff --git a/aten/src/ATen/Declarations.cwrap b/aten/src/ATen/Declarations.cwrap index adc815b1a5e2fe..966d57bf101d06 100644 --- a/aten/src/ATen/Declarations.cwrap +++ b/aten/src/ATen/Declarations.cwrap @@ -50,7 +50,7 @@ - THTensor* self - THStorage* source - CONSTANT 0 - - CONSTANT {static_cast(source.pImpl()->size())} + - CONSTANT {static_cast(source.size())} - CONSTANT {} - cname: setStorage arguments: diff --git a/aten/src/ATen/SparseTensorImpl.cpp b/aten/src/ATen/SparseTensorImpl.cpp index 95708be4933198..5c1cf92b0e3614 100644 --- a/aten/src/ATen/SparseTensorImpl.cpp +++ b/aten/src/ATen/SparseTensorImpl.cpp @@ -62,10 +62,7 @@ TensorImpl* SparseTensorImpl::maybe_zero_dim(bool condition_when_zero_dim) { " changing dimensionality via maybe_zero_dim"); return this; } -std::unique_ptr SparseTensorImpl::storage() { - AT_ERROR("sparse tensors do not have storage"); -} -at::StorageImpl* SparseTensorImpl::storageImpl() const { +const Storage& SparseTensorImpl::storage() { AT_ERROR("sparse tensors do not have storage"); } int64_t SparseTensorImpl::storage_offset() const { diff --git a/aten/src/ATen/SparseTensorImpl.h b/aten/src/ATen/SparseTensorImpl.h index d0982a226483b5..326c4c7ff144c4 100644 --- a/aten/src/ATen/SparseTensorImpl.h +++ b/aten/src/ATen/SparseTensorImpl.h @@ -64,8 +64,7 @@ struct AT_API SparseTensorImpl : public TensorImpl { int64_t dim() const override; TensorImpl* maybe_zero_dim(bool condition_when_zero_dim) override; - std::unique_ptr storage() override; - at::StorageImpl* storageImpl() const override; + const Storage& storage() override; int64_t storage_offset() const override; // Some ops do some manual size fiddling. diff --git a/aten/src/ATen/Storage.cpp b/aten/src/ATen/Storage.cpp index f210315fbe3948..d5cbdd69df4009 100644 --- a/aten/src/ATen/Storage.cpp +++ b/aten/src/ATen/Storage.cpp @@ -8,7 +8,7 @@ Storage::Storage( size_t size, Allocator* allocator, bool resizable) - : storage_impl_(new StorageImpl(scalar_type, size, allocator, resizable)) {} + : storage_impl_(c10::make_intrusive(scalar_type, size, allocator, resizable)) {} Storage::Storage( at::ScalarType scalar_type, @@ -16,18 +16,11 @@ Storage::Storage( size_t size, const std::function& deleter, bool resizable) - : storage_impl_(new StorageImpl( + : storage_impl_(c10::make_intrusive( scalar_type, size, std::move(data_ptr), /* allocator */ nullptr, resizable)) {} -Storage::~Storage() { - if (!storage_impl_) { - return; - } - storage_impl_->release(); -} - } // namespace at diff --git a/aten/src/ATen/Storage.h b/aten/src/ATen/Storage.h index 53927675009e57..5a0eac7b370ceb 100644 --- a/aten/src/ATen/Storage.h +++ b/aten/src/ATen/Storage.h @@ -6,8 +6,8 @@ namespace at { struct AT_API Storage { public: - Storage() = delete; - Storage(StorageImpl* storage_impl) : storage_impl_(storage_impl) {} + Storage() {} + Storage(StorageImpl* storage_impl) : storage_impl_(c10::intrusive_ptr::reclaim(storage_impl)) {} Storage( at::ScalarType, size_t size, @@ -19,29 +19,36 @@ struct AT_API Storage { size_t size, const std::function& deleter, bool resizable = false); - ~Storage(); - // There are reasonable interpretations of these constructors, but they're to - // be implemented on demand. - Storage(Storage&) = delete; - Storage(const Storage&) = delete; - Storage(Storage&&) = delete; - Storage(const Storage&&) = delete; - void set_pImpl(StorageImpl* storage_impl) { - storage_impl_ = storage_impl; - } - StorageImpl* pImpl() { - return storage_impl_; + + template + T* data() const { return storage_impl_->data(); } + + template + T* unsafe_data() const { return storage_impl_->unsafe_data(); } + + size_t elementSize() const { return storage_impl_->elementSize(); } + ptrdiff_t size() const { return storage_impl_->size(); } + bool resizable() const { return storage_impl_->resizable(); } + // get() use here is to get const-correctness + void* data() const { return storage_impl_.get()->data(); } + const at::DataPtr& data_ptr() const { return storage_impl_->data_ptr(); } + DeviceType device_type() const { return storage_impl_->device_type(); } + at::Allocator* allocator() const { return storage_impl_.get()->allocator(); } + at::ScalarType scalar_type() const { return storage_impl_->scalar_type(); } + at::Device device() const { return storage_impl_->device(); } + + StorageImpl* unsafeReleaseStorageImpl() { + return storage_impl_.release(); } - StorageImpl* pImpl() const { - return storage_impl_; + StorageImpl* unsafeGetStorageImpl() const noexcept { + return storage_impl_.get(); } - StorageImpl* retained_pImpl() const { - storage_impl_->retain(); + operator bool() const { return storage_impl_; } protected: - StorageImpl* storage_impl_; + c10::intrusive_ptr storage_impl_; }; } // namespace at diff --git a/aten/src/ATen/StorageImpl.cpp b/aten/src/ATen/StorageImpl.cpp index 3b8c83f6f0f43e..290bc78b638c98 100644 --- a/aten/src/ATen/StorageImpl.cpp +++ b/aten/src/ATen/StorageImpl.cpp @@ -28,12 +28,4 @@ StorageImpl::StorageImpl( allocator, resizable) {} -namespace detail { -Backend get_backend(StorageImpl* storage_impl) { - if (storage_impl->data_ptr().device().is_cuda()) { - return Backend::CUDA; - } - return Backend::CPU; -} -} // namespace detail } // namespace at diff --git a/aten/src/ATen/StorageImpl.h b/aten/src/ATen/StorageImpl.h index b7bdec874056eb..9df3e828d14ad6 100644 --- a/aten/src/ATen/StorageImpl.h +++ b/aten/src/ATen/StorageImpl.h @@ -3,30 +3,11 @@ #include #include #include -#include #include -#include -// Note [Weak references for intrusive refcounting] -// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -// Here's the scheme: -// -// - refcount == number of strong references to the object -// weakcount == number of weak references to the object, -// plus one more if refcount > 0 -// -// - the underlying object stays live as long as there are any strong -// or weak pointers to it (weakcount > 0, since strong -// references count as a +1 to weakcount) -// -// - underlying_object::release_resources() is called when refcount == 0 -// -// - the underlying object is destructed when weakcount == 0 (which implies -// refcount == 0) -// -// - Once refcount == 0, it can never again be > 0 (the transition -// from > 0 to == 0 is monotonic) -// +#include + +#include struct THFinalizer { virtual void operator()() = 0; @@ -37,7 +18,7 @@ namespace at { struct Type; -struct AT_API StorageImpl : public Retainable { +struct AT_API StorageImpl : public c10::intrusive_ptr_target { public: StorageImpl() = delete; virtual ~StorageImpl() {}; @@ -55,9 +36,8 @@ struct AT_API StorageImpl : public Retainable { StorageImpl(StorageImpl&) = delete; StorageImpl(const StorageImpl&) = delete; // NB: Don't move ref count! - StorageImpl(StorageImpl&& other) = delete; - StorageImpl(const StorageImpl&&) = delete; - StorageImpl& operator=(StorageImpl&& other) = delete; + StorageImpl(StorageImpl&& other) = default; + StorageImpl& operator=(StorageImpl&& other) = default; // TODO: Rename this into th_data, and move it out of the class; // the real data shouldn't call th::from_type @@ -79,7 +59,7 @@ struct AT_API StorageImpl : public Retainable { return static_cast(this->data_ptr_.get()); } - void release_resources() { + void release_resources() override { if (finalizer_) { (*finalizer_)(); } @@ -89,7 +69,7 @@ struct AT_API StorageImpl : public Retainable { void operator=(const StorageImpl&) = delete; - virtual size_t elementSize() const { + size_t elementSize() const { return at::elementSize(scalar_type_); } @@ -108,8 +88,13 @@ struct AT_API StorageImpl : public Retainable { at::DataPtr& data_ptr() { return data_ptr_; }; - void set_data_ptr(at::DataPtr&& data_ptr) { - data_ptr_ = std::move(data_ptr); + const at::DataPtr& data_ptr() const { + return data_ptr_; + }; + // Returns the previous data_ptr + at::DataPtr set_data_ptr(at::DataPtr&& data_ptr) { + std::swap(data_ptr_, data_ptr); + return std::move(data_ptr); }; void* data() { return data_ptr_.get(); @@ -117,34 +102,97 @@ struct AT_API StorageImpl : public Retainable { const void* data() const { return data_ptr_.get(); }; + at::DeviceType device_type() const { + return data_ptr_.device().type(); + } at::Allocator* allocator() { return allocator_; }; - at::ScalarType& scalar_type() { + at::ScalarType scalar_type() const { return scalar_type_; }; const at::Allocator* allocator() const { return allocator_; }; - int getDevice() const { - return data_ptr_.device().index(); + // You generally shouldn't use this method, but it is occasionally + // useful if you want to override how a tensor will be reallocated, + // after it was already allocated (and its initial allocator was + // set) + void set_allocator(at::Allocator* allocator) { + allocator_ = allocator; + } + Device device() const { + return data_ptr_.device(); } void set_resizable(bool resizable) { resizable_ = resizable; } + // You should only call these functions if you have a raw StorageImpl* + // pointer; if you have intrusive_ptr this will be + // handled automatically. + // + // TODO: Eliminate as many uses of these functions as humanly possible + void _raw_incref() { + auto ptr = c10::intrusive_ptr::reclaim(this); + auto ptr_copy = ptr; + ptr_copy.release(); + ptr.release(); + } + void _raw_decref() { + // Let it die + c10::intrusive_ptr::reclaim(this); + // NB: You still "have" a pointer, but it's now invalid. + // If you want more safety, used the actual c10::intrusive_ptr class + } + StorageImpl* _raw_make_weak() { + // NB: this is a strong reference + auto ptr = c10::intrusive_ptr::reclaim(this); + c10::weak_intrusive_ptr wptr(ptr); + ptr.release(); + return wptr.release(); + } + void _raw_weak_incweakref() { + // NB: this is a weak reference + auto wptr = c10::weak_intrusive_ptr::reclaim(this); + auto wptr_copy = wptr; + wptr_copy.release(); + wptr.release(); + } + void _raw_weak_decweakref() { + // NB: this is a weak reference + // Let it die + c10::weak_intrusive_ptr::reclaim(this); + // NB: You still "have" a pointer, but it's now invalid. + // If you want more safety, used the actual c10::weak_intrusive_ptr class + } + StorageImpl* _raw_weak_lock() { + auto wptr = c10::weak_intrusive_ptr::reclaim(this); + auto ptr = wptr.lock(); + wptr.release(); + return ptr.release(); + } + // This gives the STRONG refcount of a STRONG pointer + uint32_t _raw_use_count() { + auto ptr = c10::intrusive_ptr::reclaim(this); + auto r = ptr.use_count(); + ptr.release(); + return r; + } + // This gives the STRONG refcount of a WEAK pointer + uint32_t _raw_weak_use_count() { + auto wptr = c10::weak_intrusive_ptr::reclaim(this); + auto r = wptr.use_count(); + wptr.release(); + return r; + } + private: at::ScalarType scalar_type_; at::DataPtr data_ptr_; ptrdiff_t size_; bool resizable_; - - public: at::Allocator* allocator_; std::unique_ptr finalizer_; }; - -namespace detail { -AT_API Backend get_backend(StorageImpl* storage_impl); -} } // namespace at diff --git a/aten/src/ATen/TensorImpl.cpp b/aten/src/ATen/TensorImpl.cpp index 939d06585fe2d3..f21e6a843bd20f 100644 --- a/aten/src/ATen/TensorImpl.cpp +++ b/aten/src/ATen/TensorImpl.cpp @@ -60,22 +60,20 @@ void Tensor::backward( } TensorImpl::TensorImpl(TensorTypeId type_id, ScalarType scalar_type, bool is_variable) - : TensorImpl(nullptr, type_id, scalar_type, is_variable) { + : TensorImpl({}, type_id, scalar_type, is_variable) { // UndefinedTensors and SparseTensors don't have storages. if (type_id != UndefinedTensorId() && scalar_type != ScalarType::Undefined && type_id != SparseCPUTensorId() && type_id != SparseCUDATensorId()) { auto type = &globalContext().getType(tensorTypeIdToBackend(type_id), scalar_type); - auto storage = type->storage(true); - storage_ = storage->pImpl(); - storage_->retain(); + storage_ = type->storage(true); } } -TensorImpl::TensorImpl(StorageImpl* storage, TensorTypeId type_id, bool is_variable) - : TensorImpl(storage, type_id, storage->scalar_type(), is_variable) {} +TensorImpl::TensorImpl(Storage&& storage, TensorTypeId type_id, bool is_variable) + : TensorImpl(std::move(storage), type_id, storage.scalar_type(), is_variable) {} -TensorImpl::TensorImpl(StorageImpl* storage, TensorTypeId type_id, ScalarType scalar_type, bool is_variable) - : storage_(storage), +TensorImpl::TensorImpl(Storage&& storage, TensorTypeId type_id, ScalarType scalar_type, bool is_variable) + : storage_(std::move(storage)), storage_offset_(0), sizes_{0}, strides_{1}, @@ -83,13 +81,6 @@ TensorImpl::TensorImpl(StorageImpl* storage, TensorTypeId type_id, ScalarType sc scalar_type_(scalar_type), is_variable_(is_variable) {} -TensorImpl::~TensorImpl() { - if (storage_) { - storage_->release(); - storage_ = nullptr; - } -} - IntList TensorImpl::sizes() const { return sizes_; } @@ -100,8 +91,7 @@ IntList TensorImpl::strides() const { void TensorImpl::release_resources() { if (storage_) { - storage_->release(); - storage_ = nullptr; + storage_ = {}; } } @@ -127,9 +117,8 @@ TensorImpl* TensorImpl::maybe_zero_dim(bool condition_when_zero_dim) { return this; } -std::unique_ptr TensorImpl::storage() { - storage_->retain(); - return std::unique_ptr(new Storage(storage_)); +const Storage& TensorImpl::storage() { + return storage_; } } // namespace at diff --git a/aten/src/ATen/TensorImpl.h b/aten/src/ATen/TensorImpl.h index 8da16bd647a2c1..40b0e3574f0ef7 100644 --- a/aten/src/ATen/TensorImpl.h +++ b/aten/src/ATen/TensorImpl.h @@ -3,7 +3,9 @@ #include #include +#include "ATen/Retainable.h" #include "ATen/StorageImpl.h" +#include "ATen/Storage.h" #include "ATen/core/optional.h" #include "ATen/core/TensorTypeId.h" #include "ATen/core/TensorTypeIdRegistration.h" @@ -20,9 +22,7 @@ struct Tensor; namespace at { struct AT_API TensorImpl : public Retainable { TensorImpl(TensorTypeId type_id, ScalarType scalar_type, bool is_variable); - TensorImpl(StorageImpl* storage, TensorTypeId type_id, bool is_variable); - - virtual ~TensorImpl(); + TensorImpl(Storage&& storage, TensorTypeId type_id, bool is_variable); virtual void release_resources() override; @@ -35,7 +35,7 @@ struct AT_API TensorImpl : public Retainable { virtual IntList sizes() const; virtual IntList strides() const; virtual int64_t dim() const; - virtual std::unique_ptr storage(); + virtual const Storage& storage(); friend struct Type; int64_t numel() { @@ -95,7 +95,7 @@ struct AT_API TensorImpl : public Retainable { // TODO: make these protected // Note: storage->size() may be greater than the recorded size // of a tensor - at::StorageImpl* storage_; + at::Storage storage_; int64_t storage_offset_; std::vector sizes_; @@ -103,12 +103,12 @@ struct AT_API TensorImpl : public Retainable { template inline T * data() const { - return storageImpl()->data() + storage_offset_; + return storage_.data() + storage_offset_; } template inline T * unsafe_data() const { - return storageImpl()->unsafe_data() + storage_offset_; + return storage_.unsafe_data() + storage_offset_; } inline at::ScalarType scalar_type() const { @@ -132,9 +132,6 @@ struct AT_API TensorImpl : public Retainable { virtual int64_t size(int64_t d) const; virtual int64_t stride(int64_t d) const; - // TODO: get rid of this. - virtual at::StorageImpl* storageImpl() const { return storage_; } - protected: TensorTypeId type_id_; // INVARIANT: When storage is non-null, this scalar type must @@ -144,6 +141,6 @@ struct AT_API TensorImpl : public Retainable { bool is_wrapped_number_ = false; private: - TensorImpl(StorageImpl* storage, TensorTypeId type_id, ScalarType scalar_type, bool is_variable); + TensorImpl(Storage&& storage, TensorTypeId type_id, ScalarType scalar_type, bool is_variable); }; } // namespace at diff --git a/aten/src/ATen/UndefinedTensor.cpp b/aten/src/ATen/UndefinedTensor.cpp index 7a4aa4b325016b..f5157b58c019c4 100644 --- a/aten/src/ATen/UndefinedTensor.cpp +++ b/aten/src/ATen/UndefinedTensor.cpp @@ -25,14 +25,10 @@ int64_t UndefinedTensor::dim() const { AT_ERROR("dim() called on undefined Tensor"); } -std::unique_ptr UndefinedTensor::storage() { +const Storage& UndefinedTensor::storage() { AT_ERROR("storage() called on undefined Tensor"); } -at::StorageImpl* UndefinedTensor::storageImpl() const { - AT_ERROR("storageImpl() called on an undefined Tensor"); -} - int64_t UndefinedTensor::storage_offset() const { AT_ERROR("storage_offset() called on an undefined Tensor"); } diff --git a/aten/src/ATen/UndefinedTensor.h b/aten/src/ATen/UndefinedTensor.h index 4ec42743a228d6..3ef3b227291cb3 100644 --- a/aten/src/ATen/UndefinedTensor.h +++ b/aten/src/ATen/UndefinedTensor.h @@ -14,8 +14,7 @@ struct AT_API UndefinedTensor final : public TensorImpl { int64_t size(int64_t d) const override; int64_t stride(int64_t d) const override; int64_t dim() const override; - std::unique_ptr storage() override; - at::StorageImpl* storageImpl() const override; + const Storage& storage() override; int64_t storage_offset() const override; private: UndefinedTensor(); diff --git a/aten/src/ATen/UndefinedType.cpp b/aten/src/ATen/UndefinedType.cpp index 63e9098ede528c..60d9c884b8aef2 100644 --- a/aten/src/ATen/UndefinedType.cpp +++ b/aten/src/ATen/UndefinedType.cpp @@ -15,19 +15,19 @@ bool UndefinedType::is_cuda() const { return false; } bool UndefinedType::is_sparse() const { return false; } bool UndefinedType::is_distributed() const { return false; } -std::unique_ptr UndefinedType::storage(bool resizable) const { +Storage UndefinedType::storage(bool resizable) const { AT_ERROR("storage not defined for UndefinedType"); } -std::unique_ptr UndefinedType::storage(size_t size, bool resizable) const { +Storage UndefinedType::storage(size_t size, bool resizable) const { AT_ERROR("storage(size_t) not defined for UndefinedType"); } -std::unique_ptr UndefinedType::storageFromBlob(void * data, int64_t size, const std::function & deleter) const { +Storage UndefinedType::storageFromBlob(void * data, int64_t size, const std::function & deleter) const { AT_ERROR("storageFromBlob not defined for UndefinedType"); } -std::unique_ptr UndefinedType::unsafeStorageFromTH(void * th_pointer, bool retain) const { +Storage UndefinedType::unsafeStorageFromTH(void * th_pointer, bool retain) const { AT_ERROR("unsafeStorageFromTH not defined for UndefinedType"); } -std::unique_ptr UndefinedType::storageWithAllocator(int64_t size, Allocator* allocator) const { +Storage UndefinedType::storageWithAllocator(int64_t size, Allocator* allocator) const { AT_ERROR("storageWithAllocator not defined for UndefinedType"); } Tensor UndefinedType::unsafeTensorFromTH(void * th_pointer, bool retain) const { diff --git a/aten/src/ATen/UndefinedType.h b/aten/src/ATen/UndefinedType.h index ec6cedb18f1aba..9ca00cfb516ff7 100644 --- a/aten/src/ATen/UndefinedType.h +++ b/aten/src/ATen/UndefinedType.h @@ -19,10 +19,10 @@ struct UndefinedType final : public Type { virtual bool is_cuda() const override; virtual bool is_sparse() const override; virtual bool is_distributed() const override; - virtual std::unique_ptr storage(bool resizable = false) const override; - virtual std::unique_ptr storage(size_t size, bool resizable = false) const override; - virtual std::unique_ptr storageFromBlob(void * data, int64_t size, const std::function & deleter) const override; - virtual std::unique_ptr storageWithAllocator(int64_t size, Allocator* allocator) const override; + virtual Storage storage(bool resizable = false) const override; + virtual Storage storage(size_t size, bool resizable = false) const override; + virtual Storage storageFromBlob(void * data, int64_t size, const std::function & deleter) const override; + virtual Storage storageWithAllocator(int64_t size, Allocator* allocator) const override; virtual std::unique_ptr generator() const override; virtual const char * toString() const override; virtual size_t elementSizeInBytes() const override; @@ -30,7 +30,7 @@ struct UndefinedType final : public Type { virtual Type & toScalarType(ScalarType s) const override; virtual TypeID ID() const override; static const char * typeString(); - virtual std::unique_ptr unsafeStorageFromTH(void * th_pointer, bool retain) const override; + virtual Storage unsafeStorageFromTH(void * th_pointer, bool retain) const override; virtual Tensor unsafeTensorFromTH(void * th_pointer, bool retain) const override; virtual Tensor & s_copy_(Tensor & self, const Tensor & src, bool non_blocking) const override; diff --git a/aten/src/ATen/Utils.h b/aten/src/ATen/Utils.h index 57195e84e23743..aae6349dc6360e 100644 --- a/aten/src/ATen/Utils.h +++ b/aten/src/ATen/Utils.h @@ -26,17 +26,16 @@ namespace at { AT_API int _crash_if_asan(int); -template -static inline T* checked_cast_storage(Storage* expr, const char * name, int pos, Backend backend, ScalarType scalar_type) { - if (at::detail::get_backend(expr->pImpl()) != backend) { - AT_ERROR("Expected object of backend ", backend, " but got backend ", at::detail::get_backend(expr->pImpl()), +static inline const Storage& checked_storage(const Storage& expr, const char * name, int pos, DeviceType device_type, ScalarType scalar_type) { + if (expr.device_type() != device_type) { + AT_ERROR("Expected object of device type ", device_type, " but got device type ", expr.data_ptr().device().type(), " for argument #", pos, " '", name, "'"); } - if (expr->pImpl()->scalar_type() != scalar_type) { - AT_ERROR("Expected object of scalar type ", scalar_type, " but got scalar type ", expr->pImpl()->scalar_type(), + if (expr.scalar_type() != scalar_type) { + AT_ERROR("Expected object of scalar type ", scalar_type, " but got scalar type ", expr.scalar_type(), " for argument #", pos, " '", name, "'"); } - return static_cast(expr); + return expr; } template diff --git a/aten/src/ATen/core/intrusive_ptr.h b/aten/src/ATen/core/intrusive_ptr.h index 5a84aa3473a6a3..5e2fc427283a2b 100644 --- a/aten/src/ATen/core/intrusive_ptr.h +++ b/aten/src/ATen/core/intrusive_ptr.h @@ -15,6 +15,23 @@ namespace c10 { * used in an intrusive_ptr. */ +// Note [Stack allocated intrusive_ptr_target safety] +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +// A well known problem with std::enable_shared_from_this is that it +// allows you to create a std::shared_ptr from a stack allocated object, +// which is totally bogus because the object will die once you return +// from the stack. In intrusive_ptr, we can detect that this has occurred, +// because we set the refcount/weakcount of objects which inherit from +// intrusive_ptr_target to zero, *unless* we can prove that the object +// was dynamically allocated (e.g., via make_intrusive). +// +// Thus, whenever you transmute a T* into a intrusive_ptr, we check +// and make sure that the refcount isn't zero (or, a more subtle +// test for weak_intrusive_ptr, for which the refcount may validly +// be zero, but the weak refcount better not be zero), because that +// tells us if the object was allocated by us. If it wasn't, no +// intrusive_ptr for you! + class intrusive_ptr_target { // Note [Weak references for intrusive refcounting] // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -71,6 +88,11 @@ class intrusive_ptr_target { constexpr intrusive_ptr_target() noexcept : refcount_(0), weakcount_(0) {} + // intrusive_ptr_target supports move: but refcount and weakcount don't + // participate (since they are intrinsic properties of the memory location) + intrusive_ptr_target(intrusive_ptr_target&& other) noexcept : intrusive_ptr_target() {} + intrusive_ptr_target& operator=(intrusive_ptr_target&& other) noexcept { return *this; } + private: /** * This is called when refcount reaches zero. @@ -252,6 +274,10 @@ class intrusive_ptr final { return target_; } + operator bool() const noexcept { + return target_ != NullType::singleton(); + } + void reset() noexcept { reset_(); } @@ -298,6 +324,7 @@ class intrusive_ptr final { * passed in *must* have been created using intrusive_ptr::release(). */ static intrusive_ptr reclaim(TTarget* owning_ptr) { + // See Note [Stack allocated intrusive_ptr_target safety] AT_ASSERTM( owning_ptr->refcount_.load() > 0, "intrusive_ptr: Can only intrusive_ptr::reclaim() owning pointers that were created using intrusive_ptr::release()."); @@ -488,6 +515,31 @@ class weak_intrusive_ptr final { rhs.target_ = tmp; } + // NB: This should ONLY be used by the std::hash implementation + // for weak_intrusive_ptr. Another way you could do this is + // friend std::hash, but this triggers two + // bugs: + // + // (1) It triggers an nvcc bug, where std::hash in a friend class + // declaration gets preprocessed into hash, which then cannot + // actually be found. The error in this case looks like: + // + // error: no template named 'hash'; did you mean 'std::hash'? + // + // (2) On OS X, std::hash is declared as a struct, not a class. + // This twings: + // + // error: class 'hash' was previously declared as a struct + // [-Werror,-Wmismatched-tags] + // + // Both of these are work-aroundable, but on the whole, I decided + // it would be simpler and easier to make work if we just expose + // an unsafe getter for target_ + // + TTarget* _unsafe_get_target() const noexcept { + return target_; + } + size_t use_count() const noexcept { if (target_ == NullType::singleton()) { return 0; @@ -533,6 +585,7 @@ class weak_intrusive_ptr final { * passed in *must* have been created using weak_intrusive_ptr::release(). */ static weak_intrusive_ptr reclaim(TTarget* owning_weak_ptr) { + // See Note [Stack allocated intrusive_ptr_target safety] // if refcount > 0, weakcount must be >1 for weak references to exist. // see weak counting explanation at top of this file. // if refcount == 0, weakcount only must be >0. @@ -552,7 +605,6 @@ class weak_intrusive_ptr final { friend bool operator==( const weak_intrusive_ptr& lhs, const weak_intrusive_ptr& rhs) noexcept; - friend class std::hash; }; template @@ -598,7 +650,7 @@ struct hash> { template struct hash> { size_t operator()(const c10::weak_intrusive_ptr& x) const { - return std::hash()(x.target_); + return std::hash()(x._unsafe_get_target()); } }; } // namespace std diff --git a/aten/src/ATen/function_wrapper.py b/aten/src/ATen/function_wrapper.py index 0e80d37846024f..232329514bc82d 100644 --- a/aten/src/ATen/function_wrapper.py +++ b/aten/src/ATen/function_wrapper.py @@ -208,7 +208,7 @@ def __init__(self, reason): 'THIntegerTensor*': 'Tensor &', 'THDenseTensor*': 'Tensor &', 'THDenseIndexTensor*': 'Tensor &', - 'THStorage*': 'Storage &', + 'THStorage*': 'Storage', 'THGenerator*': 'Generator *', 'IntListSize': 'IntList', 'accreal': 'Scalar', @@ -288,9 +288,11 @@ def __init__(self, reason): 'Backend::${DenseBackend}, ScalarType::Long)'), 'THStorage*': CodeTemplate( - 'checked_cast_storage(' - '&${arg_name},"${arg_name}",${arg_pos}, ' - 'Backend::${Backend}, ScalarType::${ScalarName})'), + 'checked_storage(' + '${arg_name},"${arg_name}",${arg_pos}, ' + # We're punning here (Backend and DeviceType constructors coincide) + # but DeviceType is the correct way to classify storages + 'DeviceType::${Backend}, ScalarType::${ScalarName})'), 'THGenerator*': CodeTemplate( 'check_generator<${Backend}Generator>(${arg_name}, &globalContext().defaultGenerator(device_type()))'), @@ -313,7 +315,7 @@ def __init__(self, reason): 'THIntegerTensor*': '{}_', 'THDenseTensor*': '{}_', 'THDenseIndexTensor*': '{}_', - 'THStorage*': '{}_->pImpl()', + 'THStorage*': '{}_.unsafeGetStorageImpl()', 'THGenerator*': '{}_->generator', 'TensorList': "{0}_.data(), {0}_.size()", } diff --git a/aten/src/ATen/native/TensorShape.cpp b/aten/src/ATen/native/TensorShape.cpp index bff5685ce9b0b5..fb1510db38150b 100644 --- a/aten/src/ATen/native/TensorShape.cpp +++ b/aten/src/ATen/native/TensorShape.cpp @@ -139,11 +139,11 @@ Tensor expand_as(const Tensor& self, const Tensor& other) { } Tensor as_strided(const Tensor& self, IntList size, IntList stride, int64_t storage_offset) { - return self.type().tensor().set_(*self.storage(), storage_offset, size, stride); + return self.type().tensor().set_(self.storage(), storage_offset, size, stride); } Tensor &as_strided_(Tensor& self, IntList size, IntList stride, int64_t storage_offset) { - return self.set_(*self.storage(), storage_offset, size, stride); + return self.set_(self.storage(), storage_offset, size, stride); } Tensor as_strided(const Tensor& self, IntList size, IntList stride) { diff --git a/aten/src/ATen/native/cuda/Gesv.cu b/aten/src/ATen/native/cuda/Gesv.cu index 4d99f1b5ed22bb..0692dd0fea2901 100644 --- a/aten/src/ATen/native/cuda/Gesv.cu +++ b/aten/src/ATen/native/cuda/Gesv.cu @@ -72,7 +72,7 @@ static inline magma_int_t magma_int_cast(int64_t value, const char* varname) { // Creates an array of size elements of type T, backed by pinned memory // wrapped in a Storage template -static inline std::unique_ptr pin_memory(int64_t size, Tensor dummy) { +static inline Storage pin_memory(int64_t size, Tensor dummy) { int64_t adjusted_size = size * sizeof(T); auto* allocator = cuda::getPinnedMemoryAllocator(); auto& backend = dummy.type().toBackend(Backend::CPU).toScalarType(kByte); @@ -81,7 +81,7 @@ static inline std::unique_ptr pin_memory(int64_t size, Tensor dummy) { #define ALLOCATE_ARRAY(name, type, size, dummy_tensor) \ auto storage_##name = pin_memory(size, dummy_tensor); \ - name = reinterpret_cast(storage_##name->pImpl()->data()); + name = static_cast(storage_##name.data()); template static void applyGesv(Tensor& b, Tensor& A, std::vector infos) { diff --git a/aten/src/ATen/native/cudnn/RNN.cpp b/aten/src/ATen/native/cudnn/RNN.cpp index 6f2d13a1533bd7..8130dc4d99f044 100644 --- a/aten/src/ATen/native/cudnn/RNN.cpp +++ b/aten/src/ATen/native/cudnn/RNN.cpp @@ -459,7 +459,7 @@ namespace { mat_numel * num_linear_layers / 2, 1}; // Generate a new parameter tensor which is a view into the // weight_buf. - Tensor param = weight_buf.type().tensor().set_(*weight_buf.storage(), offset, size); + Tensor param = weight_buf.type().tensor().set_(weight_buf.storage(), offset, size); params.emplace_back(std::move(param)); layer_params_count++; } else { @@ -1148,7 +1148,7 @@ Tensor try_get_weight_buf( // Try to get parameter storage auto & any_param = parameters.at(0); auto param_storage = any_param.storage(); - auto weight_buf = any_param.type().tensor().set_(*param_storage); + auto weight_buf = any_param.type().tensor().set_(param_storage); if (weight_buf.size(0) < num_params) { return {}; } else if (weight_buf.size(0) > num_params) { diff --git a/aten/src/ATen/templates/SparseTypeDerived.cpp b/aten/src/ATen/templates/SparseTypeDerived.cpp index 8508cf4c5463ac..4a17004bb5ff8c 100644 --- a/aten/src/ATen/templates/SparseTypeDerived.cpp +++ b/aten/src/ATen/templates/SparseTypeDerived.cpp @@ -39,22 +39,22 @@ bool ${Type}::is_cuda() const { return backend() == Backend::CUDA || backend() = bool ${Type}::is_sparse() const { return backend() == Backend::SparseCPU || backend() == Backend::SparseCUDA; } bool ${Type}::is_distributed() const { return false; } -std::unique_ptr ${Type}::storage(bool resizable) const { +Storage ${Type}::storage(bool resizable) const { AT_ERROR("storage not supported on sparse"); } -std::unique_ptr ${Type}::storage(size_t size, bool resizable) const { +Storage ${Type}::storage(size_t size, bool resizable) const { AT_ERROR("storage not supported on sparse"); } -std::unique_ptr ${Type}::storageFromBlob(void * data, int64_t size, const std::function & deleter) const { +Storage ${Type}::storageFromBlob(void * data, int64_t size, const std::function & deleter) const { AT_ERROR("storage not supported on sparse"); } -std::unique_ptr ${Type}::storageWithAllocator(int64_t size, Allocator* allocator) const { +Storage ${Type}::storageWithAllocator(int64_t size, Allocator* allocator) const { AT_ERROR("storage not supported on sparse"); } Tensor ${Type}::unsafeTensorFromTH(void * th_pointer, bool retain) const { AT_ERROR("unsafeTensorFromTH not supported on sparse"); } -std::unique_ptr ${Type}::unsafeStorageFromTH(void * th_pointer, bool retain) const { +Storage ${Type}::unsafeStorageFromTH(void * th_pointer, bool retain) const { AT_ERROR("unsafeTensorFromTH not supported on sparse"); } std::unique_ptr ${Type}::generator() const { diff --git a/aten/src/ATen/templates/Tensor.h b/aten/src/ATen/templates/Tensor.h index ae970d42b07e5e..5a5c65d34e5cec 100644 --- a/aten/src/ATen/templates/Tensor.h +++ b/aten/src/ATen/templates/Tensor.h @@ -79,7 +79,7 @@ struct AT_API Tensor : public detail::TensorBase { Type & type() const { return pImpl->type(); } - std::unique_ptr storage() const { + const Storage& storage() const { return pImpl->storage(); } inline Tensor toType(const Type & t, bool non_blocking=false) const; diff --git a/aten/src/ATen/templates/Type.cpp b/aten/src/ATen/templates/Type.cpp index ea75f1c6628ee8..5da2e0e5152fb0 100644 --- a/aten/src/ATen/templates/Type.cpp +++ b/aten/src/ATen/templates/Type.cpp @@ -81,14 +81,14 @@ Tensor Type::tensorFromBlob(void * data, IntList sizes, const std::function & deleter) const { auto storage = storageFromBlob(data, computeStorageSize(sizes, strides), deleter); - return tensor(*storage, 0, sizes, strides); + return tensor(storage, 0, sizes, strides); } Tensor Type::tensorWithAllocator(IntList sizes, Allocator* allocator) const { return tensorWithAllocator(sizes, defaultStrides(sizes), std::move(allocator)); } Tensor Type::tensorWithAllocator(IntList sizes, IntList strides, Allocator* allocator) const { auto storage = storageWithAllocator(computeStorageSize(sizes, strides), std::move(allocator)); - return tensor(*storage, 0, sizes, strides); + return tensor(storage, 0, sizes, strides); } Tensor Type::scalarTensor(Scalar s) const { if(s.isBackedByTensor()) diff --git a/aten/src/ATen/templates/Type.h b/aten/src/ATen/templates/Type.h index da43ad61835d73..269baf761b2afb 100644 --- a/aten/src/ATen/templates/Type.h +++ b/aten/src/ATen/templates/Type.h @@ -57,13 +57,13 @@ struct AT_API Type { bool is_variable() const noexcept { return is_variable_; } bool is_undefined() const noexcept { return is_undefined_; } static void registerCPU(Context * context); - virtual std::unique_ptr storage(bool resizable = false) const = 0; - virtual std::unique_ptr storage(size_t size, bool resizable = false) const = 0; - virtual std::unique_ptr storageFromBlob(void * data, int64_t size, const std::function & deleter=noop_deleter) const = 0; - virtual std::unique_ptr storageWithAllocator(int64_t size, Allocator* allocator) const = 0; + virtual Storage storage(bool resizable = false) const = 0; + virtual Storage storage(size_t size, bool resizable = false) const = 0; + virtual Storage storageFromBlob(void * data, int64_t size, const std::function & deleter=noop_deleter) const = 0; + virtual Storage storageWithAllocator(int64_t size, Allocator* allocator) const = 0; virtual std::unique_ptr generator() const = 0; virtual Tensor unsafeTensorFromTH(void * th_pointer, bool retain) const = 0; - virtual std::unique_ptr unsafeStorageFromTH(void * th_pointer, bool retain) const = 0; + virtual Storage unsafeStorageFromTH(void * th_pointer, bool retain) const = 0; virtual const char * toString() const = 0; virtual size_t elementSizeInBytes() const = 0; virtual Type & toBackend(Backend b) const; diff --git a/aten/src/ATen/templates/TypeDerived.cpp b/aten/src/ATen/templates/TypeDerived.cpp index 82942655a8514b..8a7be424de3838 100644 --- a/aten/src/ATen/templates/TypeDerived.cpp +++ b/aten/src/ATen/templates/TypeDerived.cpp @@ -50,8 +50,8 @@ bool ${Type}::is_cuda() const { return backend() == Backend::CUDA || backend() = bool ${Type}::is_sparse() const { return backend() == Backend::SparseCPU || backend() == Backend::SparseCUDA; } bool ${Type}::is_distributed() const { return false; } -std::unique_ptr ${Type}::storage(bool resizable) const { - return std::unique_ptr(new Storage( +Storage ${Type}::storage(bool resizable) const { + return Storage( ScalarType::${ScalarName}, 0, #if ${isCUDA} @@ -60,10 +60,10 @@ std::unique_ptr ${Type}::storage(bool resizable) const { getTHDefaultAllocator(), #endif resizable - )); + ); } -std::unique_ptr ${Type}::storage(size_t size, bool resizable) const { - return std::unique_ptr(new Storage( +Storage ${Type}::storage(size_t size, bool resizable) const { + return Storage( ScalarType::${ScalarName}, size, #if ${isCUDA} @@ -72,25 +72,23 @@ std::unique_ptr ${Type}::storage(size_t size, bool resizable) const { getTHDefaultAllocator(), #endif resizable - )); + ); } -std::unique_ptr ${Type}::storageFromBlob(void * data, int64_t size, const std::function & deleter) const { - return std::unique_ptr( - new Storage( +Storage ${Type}::storageFromBlob(void * data, int64_t size, const std::function & deleter) const { + return Storage( ScalarType::${ScalarName}, InefficientStdFunctionContext::makeDataPtr(data, deleter, #if ${isCUDA} - Device(DeviceType::CUDA, getPointerDevice(data)) + Device(DeviceType::CUDA, getPointerDevice(data)) #else - DeviceType::CPU + DeviceType::CPU #endif ), size, - deleter)); + deleter); } -std::unique_ptr ${Type}::storageWithAllocator(int64_t size, Allocator* allocator) const { - return std::unique_ptr( - new Storage(ScalarType::${ScalarName}, size, allocator)); +Storage ${Type}::storageWithAllocator(int64_t size, Allocator* allocator) const { + return Storage(ScalarType::${ScalarName}, size, allocator); } Tensor ${Type}::unsafeTensorFromTH(void * th_pointer, bool retain) const { TensorImpl* pimpl = (TensorImpl*)(th_pointer); @@ -99,10 +97,10 @@ Tensor ${Type}::unsafeTensorFromTH(void * th_pointer, bool retain) const { } return Tensor(pimpl, false); } -std::unique_ptr ${Type}::unsafeStorageFromTH(void * th_pointer, bool retain) const { +Storage ${Type}::unsafeStorageFromTH(void * th_pointer, bool retain) const { if (retain) ${THStorage}_retain(${state,} (${THStorage}*) th_pointer); - return std::unique_ptr(new Storage((${THStorage}*) th_pointer)); + return Storage((${THStorage}*) th_pointer); } std::unique_ptr ${Type}::generator() const { return std::unique_ptr(new ${Generator}(context)); diff --git a/aten/src/ATen/templates/TypeDerived.h b/aten/src/ATen/templates/TypeDerived.h index 50e912b9a9ecac..e8613b62a333be 100644 --- a/aten/src/ATen/templates/TypeDerived.h +++ b/aten/src/ATen/templates/TypeDerived.h @@ -22,16 +22,16 @@ struct ${Type} final : public Type { virtual bool is_cuda() const override; virtual bool is_sparse() const override; virtual bool is_distributed() const override; - virtual std::unique_ptr storage(bool resizable = false) const override; - virtual std::unique_ptr storage(size_t size, bool resizable = false) const override; - virtual std::unique_ptr storageFromBlob(void * data, int64_t size, const std::function & deleter) const override; - virtual std::unique_ptr storageWithAllocator(int64_t size, Allocator* allocator) const override; + virtual Storage storage(bool resizable = false) const override; + virtual Storage storage(size_t size, bool resizable = false) const override; + virtual Storage storageFromBlob(void * data, int64_t size, const std::function & deleter) const override; + virtual Storage storageWithAllocator(int64_t size, Allocator* allocator) const override; virtual std::unique_ptr generator() const override; virtual const char * toString() const override; virtual size_t elementSizeInBytes() const override; virtual TypeID ID() const override; static const char * typeString(); - virtual std::unique_ptr unsafeStorageFromTH(void * th_pointer, bool retain) const override; + virtual Storage unsafeStorageFromTH(void * th_pointer, bool retain) const override; virtual Tensor unsafeTensorFromTH(void * th_pointer, bool retain) const override; // example diff --git a/aten/src/ATen/test/scalar_tensor_test.cpp b/aten/src/ATen/test/scalar_tensor_test.cpp index 0907c89e09b06d..d52dc27e20295e 100644 --- a/aten/src/ATen/test/scalar_tensor_test.cpp +++ b/aten/src/ATen/test/scalar_tensor_test.cpp @@ -190,7 +190,7 @@ void test(Type &T) { auto lhs = ones(*lhs_it, T); auto rhs = ones(*rhs_it, T); auto storage = T.storage(rhs.numel(), false); - lhs.set_(*storage); + lhs.set_(storage); // should not be dim 0 because an empty storage is dim 1; all other storages aren't scalars REQUIRE(lhs.dim() != 0); } @@ -199,7 +199,7 @@ void test(Type &T) { auto lhs = ones(*lhs_it, T); auto rhs = ones(*rhs_it, T); auto storage = T.storage(rhs.numel(), false); - lhs.set_(*storage, rhs.storage_offset(), rhs.sizes(), rhs.strides()); + lhs.set_(storage, rhs.storage_offset(), rhs.sizes(), rhs.strides()); require_equal_size_dim(lhs, rhs); } } diff --git a/aten/src/TH/THStorageFunctions.cpp b/aten/src/TH/THStorageFunctions.cpp index 9d117a6432e20e..b987b96613032c 100644 --- a/aten/src/TH/THStorageFunctions.cpp +++ b/aten/src/TH/THStorageFunctions.cpp @@ -1,4 +1,5 @@ #include +#include #include "THStorageFunctions.hpp" @@ -15,11 +16,11 @@ #include "THGenerateHalfType.h" THStorage* THStorage_new(at::ScalarType scalar_type) { - THStorage* storage = new THStorage( + THStorage* storage = c10::make_intrusive( scalar_type, 0, getTHDefaultAllocator(), - true); + true).release(); return storage; } @@ -28,25 +29,7 @@ void THStorage_free(THStorage* storage) { if (!storage) { return; } - storage->release(); -} - -// Manually retains a weak reference -void THStorage_weakRetain(THStorage *weak_storage) { - weak_storage->weak_retain(); -} - -// Releases a weak reference -void THStorage_weakFree(THStorage *weak_storage) { - weak_storage->weak_release(); -} - -// Given a weak reference, returns a strong reference to a storage (which must -// be freed when done) or null if the storage is already dead. -THStorage* THStorage_weakLock(THStorage *weak_storage) { - if (weak_storage->weak_lock()) - return weak_storage; - return nullptr; + storage->_raw_decref(); } ptrdiff_t THStorage_size(const THStorage *self) @@ -57,20 +40,19 @@ ptrdiff_t THStorage_size(const THStorage *self) void THStorage_retain(THStorage *storage) { if (storage) { - storage->retain(); + storage->_raw_incref(); } } void THStorage_resize(THStorage* storage, ptrdiff_t size) { if (storage->resizable()) { /* case when the allocator does not have a realloc defined */ - at::DataPtr old_data; - std::swap(old_data, storage->data_ptr()); - ptrdiff_t old_size = storage->size(); + at::DataPtr new_data; if (size != 0) { - storage->set_data_ptr( - storage->allocator()->allocate(storage->elementSize() * size)); + new_data = storage->allocator()->allocate(storage->elementSize() * size); } + at::DataPtr old_data = storage->set_data_ptr(std::move(new_data)); + ptrdiff_t old_size = storage->size(); storage->set_size(size); if (old_data != nullptr) { ptrdiff_t copy_size = old_size; diff --git a/aten/src/TH/THStorageFunctions.h b/aten/src/TH/THStorageFunctions.h index 6d32207072fd53..c6058c4c55fe7e 100644 --- a/aten/src/TH/THStorageFunctions.h +++ b/aten/src/TH/THStorageFunctions.h @@ -19,4 +19,3 @@ // This exists to have a data-type independent way of freeing (necessary for THPPointer). TH_API void THStorage_free(THStorage *storage); -TH_API void THStorage_weakFree(THStorage *storage); diff --git a/aten/src/TH/THStorageFunctions.hpp b/aten/src/TH/THStorageFunctions.hpp index 117259e29e04ae..9fe0db5e5497f9 100644 --- a/aten/src/TH/THStorageFunctions.hpp +++ b/aten/src/TH/THStorageFunctions.hpp @@ -38,6 +38,3 @@ TH_API ptrdiff_t THStorage_size(const THStorage *self); TH_API void THStorage_retain(THStorage *storage); TH_API void THStorage_resize(THStorage *storage, ptrdiff_t size); - -TH_API void THStorage_weakRetain(THStorage *weak_storage); -TH_API THStorage* THStorage_weakLock(THStorage *weak_storage); diff --git a/aten/src/TH/THTensor.cpp b/aten/src/TH/THTensor.cpp index fc3d69c046c99c..bca21d5e65c374 100644 --- a/aten/src/TH/THTensor.cpp +++ b/aten/src/TH/THTensor.cpp @@ -39,11 +39,10 @@ void THTensor_setStorageNd(THTensor *self, THStorage *storage, ptrdiff_t storage THError("Tensor: invalid null storage"); } auto scalar_type = THTensor_getStoragePtr(self)->scalar_type(); - THStorage_free(THTensor_getStoragePtr(self)); if(storage) { + storage->_raw_incref(); THTensor_stealAndSetStoragePtr(self, storage); - THStorage_retain(THTensor_getStoragePtr(self)); } else { THTensor_stealAndSetStoragePtr(self, THStorage_new(scalar_type)); @@ -196,3 +195,11 @@ THTensor_compute_stride(at::IntList oldshape, at::IntList oldstride, at::IntList } return newstride; } + +// NB: Steals ownership of storage +void THTensor_stealAndSetStoragePtr(THTensor* tensor, THStorage* storage) { + // Caffe2 might have tensors whose storages are null, but we + // don't allow it in PyTorch. + AT_ASSERT(storage); + tensor->storage_ = at::Storage(storage); +} diff --git a/aten/src/TH/THTensor.hpp b/aten/src/TH/THTensor.hpp index d5e1fba7c63a4d..9f5bd13c24728b 100644 --- a/aten/src/TH/THTensor.hpp +++ b/aten/src/TH/THTensor.hpp @@ -30,7 +30,7 @@ inline THStorage* THTensor_getStoragePtr(const THTensor* tensor) { AT_CHECK(tensor->storage_, "Cannot use PyTorch operations on a half-constructed " "tensor. If this tensor came from Caffe2, please call GetMutableData on " "it first; otherwise, this is a bug, please report it."); - return tensor->storage_; + return tensor->storage_.unsafeGetStorageImpl(); } inline void THTensor_resizeDim(THTensor* tensor, int64_t ndim) { @@ -127,12 +127,7 @@ inline void THTensor_setStorageOffset(THTensor* tensor, ptrdiff_t storage_offset } // NB: Steals ownership of storage -inline void THTensor_stealAndSetStoragePtr(THTensor* tensor, THStorage* storage) { - // Caffe2 might have tensors whose storages are null, but we - // don't allow it in PyTorch. - AT_ASSERT(storage); - tensor->storage_ = storage; -} +TH_API void THTensor_stealAndSetStoragePtr(THTensor* tensor, THStorage* storage); TH_API void THTensor_free(THTensor *self); TH_API void THTensor_setStorageNd(THTensor *self, THStorage *storage, ptrdiff_t storageOffset, int nDimension, const int64_t *size, const int64_t *stride); diff --git a/aten/src/TH/generic/THStorage.cpp b/aten/src/TH/generic/THStorage.cpp index 6a3c178ebb4e3f..df7b95ff7c45c5 100644 --- a/aten/src/TH/generic/THStorage.cpp +++ b/aten/src/TH/generic/THStorage.cpp @@ -26,22 +26,22 @@ THStorage* THStorage_(new)(void) THStorage* THStorage_(newWithSize)(ptrdiff_t size) { - THStorage* storage = new THStorage( + THStorage* storage = c10::make_intrusive( at::CTypeToScalarType>::to(), size, getTHDefaultAllocator(), - true); + true).release(); return storage; } THStorage* THStorage_(newWithAllocator)(ptrdiff_t size, at::Allocator *allocator) { - THStorage* storage = new THStorage( + THStorage* storage = c10::make_intrusive( at::CTypeToScalarType>::to(), size, allocator, - true); + true).release(); return storage; } @@ -50,13 +50,13 @@ THStorage* THStorage_(newWithMapping)(const char *filename, ptrdiff_t size, int { auto scalar_type = at::CTypeToScalarType>::to(); size_t actual_size = -1; - THStorage* storage = new THStorage( + THStorage* storage = c10::make_intrusive( scalar_type, size, THMapAllocator::makeDataPtr( filename, flags, size * at::elementSize(scalar_type), &actual_size), /* allocator */ nullptr, - false); + false).release(); if (size <= 0) { storage->set_size(actual_size / at::elementSize(scalar_type)); @@ -115,12 +115,12 @@ void THStorage_(free)(THStorage *storage) THStorage* THStorage_(newWithDataAndAllocator)(at::DataPtr&& data, ptrdiff_t size, at::Allocator* allocator) { - THStorage* storage = new THStorage( + THStorage* storage = c10::make_intrusive( at::CTypeToScalarType>::to(), size, std::move(data), allocator, - true); + true).release(); return storage; } @@ -150,16 +150,7 @@ real THStorage_(get)(const THStorage *self, ptrdiff_t idx) void THStorage_(swap)(THStorage *storage1, THStorage *storage2) { - std::swap(storage1->scalar_type(), storage2->scalar_type()); - std::swap(storage1->data_ptr(), storage2->data_ptr()); - ptrdiff_t tmp_size = storage1->size(); - storage1->set_size(storage2->size()); - storage2->set_size(tmp_size); - bool tmp_bool = storage1->resizable(); - storage1->set_resizable(storage2->resizable()); - storage2->set_resizable(tmp_bool); - std::swap(storage1->allocator_, storage2->allocator_); - std::swap(storage1->finalizer_, storage2->finalizer_); + std::swap(*storage1, *storage2); } #endif diff --git a/aten/src/THC/THCStorage.cpp b/aten/src/THC/THCStorage.cpp index 0fb6fea51f5d56..739f5dadbf9f31 100644 --- a/aten/src/THC/THCStorage.cpp +++ b/aten/src/THC/THCStorage.cpp @@ -8,6 +8,8 @@ #include "generic/THCStorage.cpp" #include "THCGenerateAllTypes.h" +#include + void THCStorage_resize(THCState *state, THCStorage *self, ptrdiff_t size) { THArgCheck(size >= 0, 2, "invalid size"); @@ -48,16 +50,16 @@ void THCStorage_resize(THCState *state, THCStorage *self, ptrdiff_t size) } int THCStorage_getDevice(THCState* state, const THCStorage* storage) { - return storage->getDevice(); + return storage->device().index(); } THC_API THCStorage* THCStorage_new( THCState* state, at::ScalarType scalar_type) { - THStorage* storage = new THStorage( + THStorage* storage = c10::make_intrusive( scalar_type, 0, state->cudaDeviceAllocator, - true); + true).release(); return storage; } diff --git a/aten/src/THC/THCTensor.cpp b/aten/src/THC/THCTensor.cpp index 9861b126d5087b..f4255edd185d69 100644 --- a/aten/src/THC/THCTensor.cpp +++ b/aten/src/THC/THCTensor.cpp @@ -190,11 +190,10 @@ void THCTensor_setStorageNd(THCState *state, THCTensor *self, THCStorage *storag THError("Tensor: invalid null storage"); } auto scalar_type = THTensor_getStoragePtr(self)->scalar_type(); - THStorage_free(THTensor_getStoragePtr(self)); if (storage) { + storage->_raw_incref(); THTensor_stealAndSetStoragePtr(self, storage); - THStorage_retain(THTensor_getStoragePtr(self)); } else { THTensor_stealAndSetStoragePtr(self, THCStorage_new(state, scalar_type)); } diff --git a/aten/src/THC/generic/THCStorage.cpp b/aten/src/THC/generic/THCStorage.cpp index 4389d1fd41cd36..ed466fa27e98e8 100644 --- a/aten/src/THC/generic/THCStorage.cpp +++ b/aten/src/THC/generic/THCStorage.cpp @@ -2,6 +2,8 @@ #define THC_GENERIC_FILE "generic/THCStorage.cpp" #else +#include + real* THCStorage_(data)(THCState *state, const THCStorage *self) { return self->data(); @@ -40,32 +42,32 @@ real THCStorage_(get)(THCState *state, const THCStorage *self, ptrdiff_t index) THCStorage* THCStorage_(new)(THCState *state) { - THStorage* storage = new THStorage( + THStorage* storage = c10::make_intrusive( at::CTypeToScalarType::to(), 0, state->cudaDeviceAllocator, - true); + true).release(); return storage; } THCStorage* THCStorage_(newWithSize)(THCState *state, ptrdiff_t size) { - THStorage* storage = new THStorage( + THStorage* storage = c10::make_intrusive( at::CTypeToScalarType::to(), size, state->cudaDeviceAllocator, - true); + true).release(); return storage; } THCStorage* THCStorage_(newWithAllocator)(THCState *state, ptrdiff_t size, at::Allocator* allocator) { - THStorage* storage = new THStorage( + THStorage* storage = c10::make_intrusive( at::CTypeToScalarType::to(), size, allocator, - true); + true).release(); return storage; } @@ -114,12 +116,12 @@ THCStorage* THCStorage_(newWithDataAndAllocator)( at::DataPtr&& data, ptrdiff_t size, at::Allocator* allocator) { - THStorage* storage = new THStorage( + THStorage* storage = c10::make_intrusive( at::CTypeToScalarType::to(), size, std::move(data), allocator, - true); + true).release(); return storage; } diff --git a/test/cpp/api/rnn.cpp b/test/cpp/api/rnn.cpp index 5462c8ebe2f7c3..92067bada0f737 100644 --- a/test/cpp/api/rnn.cpp +++ b/test/cpp/api/rnn.cpp @@ -110,7 +110,7 @@ TEST_CASE("rnn") { LSTM model(2, 2); for (auto& v : model->parameters()) { float size = v->numel(); - auto p = static_cast(v->storage()->pImpl()->data()); + auto p = static_cast(v->storage().data()); for (size_t i = 0; i < size; i++) { p[i] = i / size; } @@ -118,7 +118,7 @@ TEST_CASE("rnn") { auto x = torch::empty({3, 4, 2}, torch::requires_grad()); float size = x.numel(); - auto p = static_cast(x.storage()->pImpl()->data()); + auto p = static_cast(x.storage().data()); for (size_t i = 0; i < size; i++) { p[i] = (size - i) / size; } diff --git a/tools/autograd/gen_python_functions.py b/tools/autograd/gen_python_functions.py index e1e8f795580a2b..d61c5fb04a21c1 100644 --- a/tools/autograd/gen_python_functions.py +++ b/tools/autograd/gen_python_functions.py @@ -321,8 +321,6 @@ def parse_arg(arg, arg_index, unpack_args=False): body.append('auto {} = {};'.format(name, expr)) expr = name - if typename == 'Storage &': - expr = '*' + expr if typename == 'SparseTensorRef': expr = 'SparseTensorRef({})'.format(expr) diff --git a/tools/autograd/templates/VariableType.cpp b/tools/autograd/templates/VariableType.cpp index 75a59063842911..89101a24714b72 100644 --- a/tools/autograd/templates/VariableType.cpp +++ b/tools/autograd/templates/VariableType.cpp @@ -59,19 +59,19 @@ bool VariableType::is_cuda() const { return baseType->is_cuda(); } bool VariableType::is_sparse() const { return baseType->is_sparse(); } bool VariableType::is_distributed() const { return baseType->is_distributed(); } -std::unique_ptr VariableType::storage(bool resizable) const { +Storage VariableType::storage(bool resizable) const { return baseType->storage(); } -std::unique_ptr VariableType::storage(size_t size, bool resizable) const { +Storage VariableType::storage(size_t size, bool resizable) const { return baseType->storage(size); } -std::unique_ptr VariableType::storageFromBlob(void * data, int64_t size, const std::function & deleter) const { +Storage VariableType::storageFromBlob(void * data, int64_t size, const std::function & deleter) const { return baseType->storageFromBlob(data, size, deleter); } -std::unique_ptr VariableType::unsafeStorageFromTH(void * th_pointer, bool retain) const { +Storage VariableType::unsafeStorageFromTH(void * th_pointer, bool retain) const { return baseType->unsafeStorageFromTH(th_pointer, retain); } -std::unique_ptr VariableType::storageWithAllocator(int64_t size, Allocator* allocator) const { +Storage VariableType::storageWithAllocator(int64_t size, Allocator* allocator) const { return baseType->storageWithAllocator(size, allocator); } Tensor VariableType::unsafeTensorFromTH(void * th_pointer, bool retain) const { diff --git a/tools/autograd/templates/VariableType.h b/tools/autograd/templates/VariableType.h index a937183f6fc5fe..493c5a5fed73e3 100644 --- a/tools/autograd/templates/VariableType.h +++ b/tools/autograd/templates/VariableType.h @@ -37,10 +37,10 @@ struct TORCH_API VariableType final : public at::Type { virtual bool is_cuda() const override; virtual bool is_sparse() const override; virtual bool is_distributed() const override; - virtual std::unique_ptr storage(bool resizable = false) const override; - virtual std::unique_ptr storage(size_t size, bool resizable = false) const override; - virtual std::unique_ptr storageFromBlob(void * data, int64_t size, const std::function & deleter) const override; - virtual std::unique_ptr storageWithAllocator(int64_t size, at::Allocator* allocator) const override; + virtual Storage storage(bool resizable = false) const override; + virtual Storage storage(size_t size, bool resizable = false) const override; + virtual Storage storageFromBlob(void * data, int64_t size, const std::function & deleter) const override; + virtual Storage storageWithAllocator(int64_t size, at::Allocator* allocator) const override; virtual std::unique_ptr generator() const override; virtual const char * toString() const override; virtual at::TypeID ID() const override; @@ -48,7 +48,7 @@ struct TORCH_API VariableType final : public at::Type { virtual at::Type & toBackend(at::Backend b) const override; virtual at::Type & toScalarType(at::ScalarType s) const override; static const char * typeString(); - virtual std::unique_ptr unsafeStorageFromTH(void * th_pointer, bool retain) const override; + virtual Storage unsafeStorageFromTH(void * th_pointer, bool retain) const override; virtual at::Tensor unsafeTensorFromTH(void * th_pointer, bool retain) const override; static at::Type* getType(const at::Type& baseType); diff --git a/tools/autograd/templates/python_variable_methods.cpp b/tools/autograd/templates/python_variable_methods.cpp index 07b02632b80ab4..f5e3e5d9f8a1e8 100644 --- a/tools/autograd/templates/python_variable_methods.cpp +++ b/tools/autograd/templates/python_variable_methods.cpp @@ -476,7 +476,7 @@ static PyObject * THPVariable_storage(PyObject* self, PyObject* arg) { HANDLE_TH_ERRORS auto& self_ = reinterpret_cast(self)->cdata; - return createPyObject(*self_.storage()); + return createPyObject(self_.storage()); END_HANDLE_TH_ERRORS } @@ -484,7 +484,7 @@ static PyObject * THPVariable_storage_type(PyObject* self, PyObject* arg) { HANDLE_TH_ERRORS auto& self_ = reinterpret_cast(self)->cdata; - auto storage = THPObjectPtr(createPyObject(*self_.storage())); + auto storage = THPObjectPtr(createPyObject(self_.storage())); auto storage_type = (PyObject*)Py_TYPE(storage); Py_INCREF(storage_type); return storage_type; diff --git a/torch/csrc/DynamicTypes.cpp b/torch/csrc/DynamicTypes.cpp index 0b2cf1f842db7c..d721e534e18d25 100644 --- a/torch/csrc/DynamicTypes.cpp +++ b/torch/csrc/DynamicTypes.cpp @@ -70,7 +70,7 @@ at::Type* get_type(const std::string& name, bool is_cuda, bool is_sparse) { PyTypeObject* getPyTypeObject(const at::Storage& storage) { auto attype = at::globalContext().getTypeOpt( - at::detail::get_backend(storage.pImpl()), storage.pImpl()->scalar_type()); + deviceTypeToBackend(storage.device_type()), storage.scalar_type()); auto it = attype_to_py_storage_type.find(attype); if (it != attype_to_py_storage_type.end()) { return it->second; @@ -136,7 +136,7 @@ PyObject* createPyObject(const at::Storage& storage) auto type = getPyTypeObject(storage); auto obj = THPObjectPtr(type->tp_alloc(type, 0)); if (!obj) throw python_error(); - ((THPVoidStorage*)obj.get())->cdata = (THVoidStorage *)storage.retained_pImpl(); + ((THPVoidStorage*)obj.get())->cdata = (THVoidStorage *)at::Storage(/* copy */ storage).unsafeReleaseStorageImpl(); return obj.release(); } @@ -144,7 +144,7 @@ bool isStorage(PyObject* obj) { return py_storage_type_to_attype.count(Py_TYPE(obj)); } -std::unique_ptr createStorage(PyObject* obj) +at::Storage createStorage(PyObject* obj) { auto it = py_storage_type_to_attype.find(Py_TYPE(obj)); if (it == py_storage_type_to_attype.end()) { diff --git a/torch/csrc/DynamicTypes.h b/torch/csrc/DynamicTypes.h index 25238cb68dd42d..f7808fc358f55e 100644 --- a/torch/csrc/DynamicTypes.h +++ b/torch/csrc/DynamicTypes.h @@ -29,7 +29,7 @@ void registerDtypeObject(THPDtype *dtype, at::ScalarType scalarType); void registerLayoutObject(THPLayout *layout, at::Backend backend); PyObject* createPyObject(const at::Storage& storage); -std::unique_ptr createStorage(PyObject* obj); +at::Storage createStorage(PyObject* obj); bool isStorage(PyObject* obj); THPDtype* getDtype(at::ScalarType scalarType); diff --git a/torch/csrc/autograd/variable.cpp b/torch/csrc/autograd/variable.cpp index f266296c28cdf9..cb35724a6bd234 100644 --- a/torch/csrc/autograd/variable.cpp +++ b/torch/csrc/autograd/variable.cpp @@ -64,14 +64,10 @@ const char* Variable::Impl::typeString() { return "VariableType"; } -std::unique_ptr Variable::Impl::storage() { +const at::Storage& Variable::Impl::storage() { return data_.storage(); } -at::StorageImpl* Variable::Impl::storageImpl() const { - return data_.unsafeGetTensorImpl()->storageImpl(); -} - int64_t Variable::Impl::storage_offset() const { return data_.storage_offset(); } diff --git a/torch/csrc/autograd/variable.h b/torch/csrc/autograd/variable.h index 360caf4296ae96..4ae0785001be85 100644 --- a/torch/csrc/autograd/variable.h +++ b/torch/csrc/autograd/variable.h @@ -273,8 +273,7 @@ struct Variable::Impl : public at::TensorImpl { int64_t stride(int64_t d) const override; int64_t dim() const override; - std::unique_ptr storage() override; - at::StorageImpl* storageImpl() const override; + const at::Storage& storage() override; int64_t storage_offset() const override; static const char* typeString(); diff --git a/torch/csrc/generic/StorageSharing.cpp b/torch/csrc/generic/StorageSharing.cpp index c6e949a2085739..c7a19015b16467 100644 --- a/torch/csrc/generic/StorageSharing.cpp +++ b/torch/csrc/generic/StorageSharing.cpp @@ -215,9 +215,9 @@ static PyObject * THPStorage_(shareCuda)(THPStorage *self) { HANDLE_TH_ERRORS THWStorage *storage = self->cdata; - at::DeviceGuard device_guard(storage->getDevice()); + at::DeviceGuard device_guard(storage->device()); THPObjectPtr tuple(PyTuple_New(4)); - THPObjectPtr device(PyLong_FromLong(storage->getDevice())); + THPObjectPtr device(PyLong_FromLong(storage->device().index())); THPObjectPtr _handle(Py_None); Py_INCREF(Py_None); THPObjectPtr size(PyLong_FromLong(storage->size())); @@ -294,8 +294,7 @@ static PyObject * THPStorage_(newSharedCuda)(PyObject *_unused, PyObject *args) static PyObject * THPStorage_(weakRef)(THPStorage *self, PyObject *args) { HANDLE_TH_ERRORS THStorage* storage = self->cdata; - THStorage_weakRetain(storage); - return PyLong_FromVoidPtr(storage); + return PyLong_FromVoidPtr(storage->_raw_make_weak()); END_HANDLE_TH_ERRORS } @@ -305,7 +304,7 @@ PyObject * THPStorage_(newWithWeakPtr)(PyObject *_unused, PyObject *arg) THPUtils_assert(THPUtils_checkLong(arg), "_new_with_weak_ptr(): arg must be an 'int'"); THStorage *weak_storage = (THStorage*)PyLong_AsVoidPtr(arg); - if (auto* storage = THStorage_weakLock(weak_storage)) { + if (auto* storage = weak_storage->_raw_weak_lock()) { return THPStorage_(New)(storage); } Py_RETURN_NONE; @@ -321,7 +320,7 @@ PyObject * THPStorage_(freeWeakRef)(PyObject *_unused, PyObject *arg) THPUtils_assert(THPUtils_checkLong(arg), "_free_weak_ref(): arg must be an 'int'"); THStorage *weak_storage = (THStorage*)PyLong_AsVoidPtr(arg); - THStorage_weakFree(weak_storage); + weak_storage->_raw_weak_decweakref(); Py_RETURN_NONE; END_HANDLE_TH_ERRORS @@ -332,7 +331,7 @@ PyObject * THPStorage_(expired)(PyObject *_unused, PyObject *arg) HANDLE_TH_ERRORS THPUtils_assert(THPUtils_checkLong(arg), "_expired(): arg must be an 'int'"); THStorage *weak_storage = (THStorage*)PyLong_AsVoidPtr(arg); - return PyBool_FromLong(weak_storage->use_count() == 0); + return PyBool_FromLong(weak_storage->_raw_weak_use_count() == 0); END_HANDLE_TH_ERRORS } diff --git a/torch/csrc/jit/export.cpp b/torch/csrc/jit/export.cpp index 03b979ada19acf..8b5e678f71c497 100644 --- a/torch/csrc/jit/export.cpp +++ b/torch/csrc/jit/export.cpp @@ -617,26 +617,26 @@ void ModuleEncoder::EncodeTensor( onnx::TensorProto *tensor_proto, const at::Tensor &tensor, const at::optional external_ref = {}) { - auto storage_ptr = tensor.storage()->pImpl(); + auto storage_ptr = tensor.storage().unsafeGetStorageImpl(); auto dedup_it = storage_dedup_map_.find(storage_ptr); if (dedup_it != storage_dedup_map_.end()) { tensor_proto->add_int64_data(dedup_it->second); } else { at::Tensor t = tensor; - if (at::detail::get_backend(tensor.storage()->pImpl()) == at::Backend::CUDA) { + if (tensor.storage().device_type() == at::DeviceType::CUDA) { // NB: This new tensor is created to support cuda tensors. // Storages can be mutated when converting tensors from cuda to cpu, // and we need a cpu tensor to copy data from. t = tensor.type().tensor( - *tensor.storage(), + tensor.storage(), /* storageOffset = */ 0, - /* size = */ { static_cast(tensor.type().elementSizeInBytes() * tensor.storage()->pImpl()->size()) }, + /* size = */ { static_cast(tensor.type().elementSizeInBytes() * tensor.storage().size()) }, /* strides = */ { 1 }) .cpu(); } auto record_number = file_writer_.writeRecord( - static_cast(t.storage()->pImpl()->data()), t.type().elementSizeInBytes() * t.numel()); + static_cast(t.storage().data()), t.type().elementSizeInBytes() * t.numel()); tensor_proto->add_int64_data(record_number); storage_dedup_map_[storage_ptr] = record_number; } diff --git a/torch/csrc/jit/import.cpp b/torch/csrc/jit/import.cpp index f1f64c8beae6e6..432384c1d56688 100644 --- a/torch/csrc/jit/import.cpp +++ b/torch/csrc/jit/import.cpp @@ -67,8 +67,8 @@ at::Tensor DecoderBase::buildTensor(const onnx::TensorProto& tensor_proto) { tensor.resize_(sizes); JIT_ASSERT( - tensor.storage()->pImpl()->size() * - tensor.storage()->pImpl()->elementSize() == + tensor.storage().size() * + tensor.storage().elementSize() == tensor_proto.raw_data().size()); std::memcpy(tensor.data_ptr(), tensor_proto.raw_data().data(), tensor_proto.raw_data().size()); @@ -301,7 +301,7 @@ at::Tensor ModuleDecoder::buildTensorCommon( auto storage = std::make_shared(at::CPU(type).tensor()); auto record = file_reader_.getRecordWithKey(record_number); storage->resize_({ static_cast(std::get<1>(record)) }); - std::memcpy(storage->storage()->pImpl()->data(), std::get<0>(record).get(), std::get<1>(record)); + std::memcpy(storage->storage().data(), std::get<0>(record).get(), std::get<1>(record)); storage_map_.insert(std::make_pair(record_number, storage)); storage_tensor = storage.get(); } else { @@ -309,7 +309,7 @@ at::Tensor ModuleDecoder::buildTensorCommon( } return at::CPU(onnxTypeToATenType(tensor_proto.data_type())).tensor( - *storage_tensor->storage().get(), storage_offset, dims, strides); + storage_tensor->storage(), storage_offset, dims, strides); } // Given a full name of a parameter or method, diff --git a/torch/csrc/jit/interpreter.cpp b/torch/csrc/jit/interpreter.cpp index 7deeba9ffc8c70..cc55506475d53b 100644 --- a/torch/csrc/jit/interpreter.cpp +++ b/torch/csrc/jit/interpreter.cpp @@ -349,7 +349,7 @@ struct ContainerTensor : public at::TensorImpl { virtual int64_t dim() const override { throw std::runtime_error("dim() on ContainerTensor"); } - virtual std::unique_ptr storage() override { + virtual const at::Storage& storage() override { throw std::runtime_error("storage() on ContainerTensor"); } }; diff --git a/torch/csrc/utils/python_arg_parser.h b/torch/csrc/utils/python_arg_parser.h index 2b389baaf12786..2b1861922567dd 100644 --- a/torch/csrc/utils/python_arg_parser.h +++ b/torch/csrc/utils/python_arg_parser.h @@ -120,7 +120,7 @@ struct PythonArgs { inline std::vector intlist(int i); inline std::vector intlistWithDefault(int i, std::vector default_intlist); inline at::Generator* generator(int i); - inline std::unique_ptr storage(int i); + inline at::Storage storage(int i); inline at::ScalarType scalartype(int i); inline at::ScalarType scalartypeWithDefault(int i, at::ScalarType default_scalartype); inline at::optional scalartypeOptional(int i); @@ -428,7 +428,7 @@ inline at::Generator* PythonArgs::generator(int i) { return reinterpret_cast(args[i])->cdata; } -inline std::unique_ptr PythonArgs::storage(int i) { +inline at::Storage PythonArgs::storage(int i) { if (!args[i]) return nullptr; return createStorage(args[i]); } diff --git a/torch/csrc/utils/tensor_new.cpp b/torch/csrc/utils/tensor_new.cpp index 060896c393ca9c..406e11dec7925f 100644 --- a/torch/csrc/utils/tensor_new.cpp +++ b/torch/csrc/utils/tensor_new.cpp @@ -71,7 +71,7 @@ Tensor new_with_sizes(const Type& type, int32_t device_index, IntList sizes) { return torch::empty(sizes, TensorOptions(type, device_index)); } -Tensor new_with_storage(const Type& type, Storage& storage) { +Tensor new_with_storage(const Type& type, Storage storage) { auto tensor = at::empty({}, type); tensor.set_(storage); return tensor; @@ -344,7 +344,7 @@ Tensor legacy_tensor_ctor(const Type& type, PyObject* args, PyObject* kwargs) { at::DeviceGuard device_guard(r.device(0)); return type.tensor(); } else if (r.idx == 1) { - return new_with_storage(type, *r.storage(0)); + return new_with_storage(type, r.storage(0)); } else if (r.idx == 2) { auto cdata = reinterpret_cast(r.toInt64(0)); return type.unsafeTensorFromTH(cdata, true); @@ -384,7 +384,7 @@ Tensor legacy_tensor_new(const Type& type, PyObject* args, PyObject* kwargs) { at::DeviceGuard device_guard(r.device(0)); return type.tensor(); } else if (r.idx == 1) { - return new_with_storage(type, *r.storage(0)); + return new_with_storage(type, r.storage(0)); } else if (r.idx == 2) { auto cdata = reinterpret_cast(r.toInt64(0)); return type.unsafeTensorFromTH(cdata, true); diff --git a/torch/csrc/utils/tensor_numpy.cpp b/torch/csrc/utils/tensor_numpy.cpp index 0cee4661208c63..480554510d0833 100644 --- a/torch/csrc/utils/tensor_numpy.cpp +++ b/torch/csrc/utils/tensor_numpy.cpp @@ -79,7 +79,8 @@ PyObject* tensor_to_numpy(const at::Tensor& tensor) { if (PyArray_SetBaseObject((PyArrayObject*)array.get(), py_tensor) == -1) { return NULL; } - tensor.storage()->pImpl()->set_resizable(false); + // Use the private storage API + tensor.storage().unsafeGetStorageImpl()->set_resizable(false); return array.release(); } diff --git a/torch/lib/THD/master_worker/worker/Dispatch.cpp b/torch/lib/THD/master_worker/worker/Dispatch.cpp index 35e7a38731d5cb..c0ba3cadabb455 100644 --- a/torch/lib/THD/master_worker/worker/Dispatch.cpp +++ b/torch/lib/THD/master_worker/worker/Dispatch.cpp @@ -31,8 +31,8 @@ at::Tensor& unpackRetrieveTensor(rpc::RPCMessage& message) { return workerTensors.at(unpackTensor(message)); } -at::Storage* unpackRetrieveStorage(rpc::RPCMessage& message) { - return workerStorages.at(unpackStorage(message)).get(); +at::Storage& unpackRetrieveStorage(rpc::RPCMessage& message) { + return workerStorages.at(unpackStorage(message)); } at::Generator* unpackRetrieveGenerator(rpc::RPCMessage& message) { diff --git a/torch/lib/THD/master_worker/worker/Worker.cpp b/torch/lib/THD/master_worker/worker/Worker.cpp index e5a5a2199af30c..732e20391e9683 100644 --- a/torch/lib/THD/master_worker/worker/Worker.cpp +++ b/torch/lib/THD/master_worker/worker/Worker.cpp @@ -12,7 +12,7 @@ namespace worker { std::unique_ptr workerCommandChannel; std::unordered_map workerTensors; -std::unordered_map> workerStorages; +std::unordered_map workerStorages; std::unordered_map> workerGenerators; } // namespace worker diff --git a/torch/lib/THD/master_worker/worker/Worker.hpp b/torch/lib/THD/master_worker/worker/Worker.hpp index c8d9c6f4e3b6da..5a65e0b0b4e125 100644 --- a/torch/lib/THD/master_worker/worker/Worker.hpp +++ b/torch/lib/THD/master_worker/worker/Worker.hpp @@ -9,7 +9,7 @@ namespace thd { namespace worker { extern std::unique_ptr workerCommandChannel; extern std::unordered_map workerTensors; -extern std::unordered_map> +extern std::unordered_map workerStorages; extern std::unordered_map> workerGenerators; diff --git a/torch/lib/THD/master_worker/worker/dispatch/Storage.cpp b/torch/lib/THD/master_worker/worker/dispatch/Storage.cpp index ca583a9db64b57..7172cb11a11d66 100644 --- a/torch/lib/THD/master_worker/worker/dispatch/Storage.cpp +++ b/torch/lib/THD/master_worker/worker/dispatch/Storage.cpp @@ -1,4 +1,4 @@ -static std::unique_ptr createStorage(RPCType type) { +static at::Storage createStorage(RPCType type) { if (type == RPCType::UCHAR) return at::getType(at::Backend::CPU, at::ScalarType::Byte).storage(); else if (type == RPCType::CHAR) @@ -16,14 +16,14 @@ static std::unique_ptr createStorage(RPCType type) { throw std::invalid_argument("passed character doesn't represent a storage type"); } -static std::unique_ptr createStorage(RPCType type, size_t size) { - std::unique_ptr storage = createStorage(type); +static at::Storage createStorage(RPCType type, size_t size) { + at::Storage storage = createStorage(type); storage->resize(size); return storage; } static void storageSet(rpc::RPCMessage& raw_message) { - at::Storage *storage = unpackRetrieveStorage(raw_message); + at::Storage& storage = unpackRetrieveStorage(raw_message); ptrdiff_t offset = unpackInteger(raw_message); RPCType type = peekType(raw_message); if (isInteger(type)) { @@ -40,7 +40,7 @@ static void storageSet(rpc::RPCMessage& raw_message) { } static void storageGet(rpc::RPCMessage& raw_message) { - at::Storage *storage = unpackRetrieveStorage(raw_message); + at::Storage& storage = unpackRetrieveStorage(raw_message); ptrdiff_t offset = unpackInteger(raw_message); RPCType type = unpackType(raw_message); finalize(raw_message); @@ -79,7 +79,7 @@ static void storageNewWithSize(rpc::RPCMessage& raw_message) { static void storageNewWithSizeN(rpc::RPCMessage& raw_message, size_t size) { RPCType storage_type = unpackType(raw_message); object_id_type storage_id = unpackStorage(raw_message); - std::unique_ptr storage = createStorage(storage_type, size); + at::Storage storage = createStorage(storage_type, size); RPCType value_type = peekType(raw_message); if (isInteger(value_type)) { int64_t values[size]; @@ -128,7 +128,7 @@ static void storageFree(rpc::RPCMessage& raw_message) { } static void storageResize(rpc::RPCMessage& raw_message) { - at::Storage *storage = unpackRetrieveStorage(raw_message); + at::Storage& storage = unpackRetrieveStorage(raw_message); int64_t new_size = unpackInteger(raw_message); finalize(raw_message); storage->resize(new_size); diff --git a/torch/lib/c10d/Utils.hpp b/torch/lib/c10d/Utils.hpp index 21c4fc8d25d697..f40ea72f30692f 100644 --- a/torch/lib/c10d/Utils.hpp +++ b/torch/lib/c10d/Utils.hpp @@ -96,7 +96,7 @@ inline std::vector getDevices(const std::vector& tensors) { std::vector devices(tensors.size(), -1); if (tensors[0].type().is_cuda()) { for (size_t i = 0; i < tensors.size(); i++) { - devices[i] = tensors[i].storage()->pImpl()->getDevice(); + devices[i] = tensors[i].storage().device().index(); } } return devices; @@ -106,7 +106,8 @@ template std::vector getDataPointers(const std::vector& tensors) { std::vector ptrs(tensors.size()); for (size_t i = 0; i < tensors.size(); i++) { - ptrs[i] = static_cast(tensors[i].storage()->pImpl()->data()); + // NB: This does NOT respect storage_offset from the tensor + ptrs[i] = static_cast(tensors[i].storage().data()); } return ptrs; } From 22446a361927ac60b83548b46f5b4ac45994eac1 Mon Sep 17 00:00:00 2001 From: Ahmed Aly Date: Wed, 22 Aug 2018 00:16:56 -0700 Subject: [PATCH 19/94] Productionize CRF layer in PyText (#10362) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/10362 This diff implements a manual export from PyText's CRF module to the caffe2 CRF layer. Note that most of the changes in caffe2/python/crf.py are just formatting changes, the only relevant change is the new class CRFUtils. Reviewed By: hikushalhere Differential Revision: D9234126 fbshipit-source-id: 1a67d709034660e8b3d5ac840560b56de63e3f69 --- caffe2/python/crf.py | 372 ++++++++++++++++++------------------------- 1 file changed, 155 insertions(+), 217 deletions(-) diff --git a/caffe2/python/crf.py b/caffe2/python/crf.py index fbe4f1bd24a87a..0551eea3886e7a 100644 --- a/caffe2/python/crf.py +++ b/caffe2/python/crf.py @@ -1,17 +1,16 @@ ## @package crf # Module caffe2.python.crf -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals -from caffe2.python import core, recurrent, model_helper, brew +from __future__ import absolute_import, division, print_function, unicode_literals + import numpy as np +from caffe2.python import brew, core, model_helper, recurrent + -''' +""" Due to a limitation in ReccurentNetworkOp, this layer only supports batch_size=1 In order to support batch_size > 1, we will have to implement the CRFUnit and its gradient in C++ and handle the different batches there. -''' +""" class CRFWithLoss(object): @@ -22,10 +21,10 @@ def __init__(self, model, num_classes, transitions_blob=None): if not transitions_blob: transitions_blob = self.model.param_init_net.UniformFill( [], - [core.ScopedBlobReference('crf_transitions')], + [core.ScopedBlobReference("crf_transitions")], shape=[self.num_classes_padded, self.num_classes_padded], min=-1.0, - max=1.0 + max=1.0, ) self.transitions = transitions_blob self.model.params.append(self.transitions) @@ -35,200 +34,107 @@ def crf_loss(self, predictions, labels, seq_lengths=None): # take a snapshot of it at the beginning since it can be updated # in between the operators that uses it when doing parallel updates transitions_snapshot = self.model.net.Copy( - self.transitions, core.ScopedBlobReference('transitions_snapshot') + self.transitions, core.ScopedBlobReference("transitions_snapshot") ) # Compute best path unary score from the logits path_unary_score = self._gather_entries_sum( predictions, labels, self.num_classes ) # Append BOS and EOS entries to the predictions and labels - predictions = self._pad_predictions(predictions) - labels = self._pad_labels(labels) + predictions = CRFWithLoss.pad_predictions( + predictions, self.model.param_init_net, self.model.net, self.num_classes + ) + labels = CRFWithLoss.pad_labels( + labels, self.model.param_init_net, self.model.net, self.num_classes + ) # Compute best path binary scores from the transitions matrix path_binary_score = self._path_binary_scores( labels, transitions_snapshot, seq_lengths ) path_total_score = self.model.net.Add( [path_binary_score, path_unary_score], - core.ScopedBlobReference('path_total') + core.ScopedBlobReference("path_total"), ) # Compute all paths score - zero_index = self.model.param_init_net.ConstantFill( - [], shape=[1], value=0 - ) + zero_index = self.model.param_init_net.ConstantFill([], shape=[1], value=0) initial_state = self.model.net.Gather( [predictions, zero_index], - core.ScopedBlobReference('rnn_initial'), - dense_gradient=True + core.ScopedBlobReference("rnn_initial"), + dense_gradient=True, ) input_data, _ = self.model.net.RemovePadding( - [predictions], - padding_width=1, - end_padding_width=0, - outputs=2, + [predictions], padding_width=1, end_padding_width=0, outputs=2 ) input_data = self.model.net.ExpandDims( - [input_data], - core.ScopedBlobReference('rnn_input_data'), - dims=[1] + [input_data], core.ScopedBlobReference("rnn_input_data"), dims=[1] ) # Due to a bug in RecurrentNetworkGradientOp, we need to copy the # transitions blob before sending it to the recurrent network transitions_copy = self.model.net.Copy( - transitions_snapshot, core.ScopedBlobReference('transitions_copy') + transitions_snapshot, core.ScopedBlobReference("transitions_copy") ) all_paths_scores = self._crf_forward( input_data, initial_state, transitions_copy ) loss = self.model.net.Sub( - [all_paths_scores, path_total_score], - core.ScopedBlobReference('crf_loss') + [all_paths_scores, path_total_score], core.ScopedBlobReference("crf_loss") ) return loss - def _pad_predictions(self, predictions): - # This function will introduce two labels for beginning of sequence - # And end of sequence, it will make the necessary udpates to the - # the predictions blob - - low_score = -1000.0 # An arbitray very low number - b_scores = np.array( - [[low_score] * self.num_classes + [0, low_score]] - ).astype(np.float32) - - e_scores = np.array( - [[low_score] * self.num_classes + [low_score, 0]] - ).astype(np.float32) - - b_scores = self.model.param_init_net.GivenTensorFill( - [], "b_scores", shape=[1, self.num_classes_padded], values=b_scores - ) - e_scores = self.model.param_init_net.GivenTensorFill( - [], "e_scores", shape=[1, self.num_classes_padded], values=e_scores - ) - - zero_index = self.model.net.ConstantFill( - [], shape=[1, ], value=0 - ) - length = self.model.net.Gather( - [self.model.net.Shape([predictions]), zero_index], - ) - length = self.model.net.Cast(length, to='int32') - t_range = self.model.net.LengthsRangeFill(length) - padding = self.model.net.ConstantFill([t_range], value=low_score) - padding = self.model.net.ExpandDims(padding, dims=[1]) - padded_predictions, _ = self.model.net.Concat( - [predictions, padding, padding], - outputs=2, - axis=1 - ) - padded_predictions_concat, _ = self.model.net.Concat( - [b_scores, padded_predictions, e_scores], - outputs=2, - axis=0 - ) - return padded_predictions_concat - - def _pad_labels(self, labels): - bos_i = self.num_classes - eos_i = self.num_classes + 1 - bos_i_b = self.model.param_init_net.ConstantFill( - [], shape=[1], value=bos_i - ) - eos_i_b = self.model.param_init_net.ConstantFill( - [], shape=[1], value=eos_i - ) - labels = self.model.net.Cast([labels], to='int64') - padded_labels, _ = self.model.net.Concat( - [bos_i_b, labels, eos_i_b], - axis=0, - outputs=2 - ) - return padded_labels - def _path_binary_scores(self, labels, transitions, seq_lengths=None): column_ids, _ = self.model.net.RemovePadding( - [labels], - outputs=2, - padding_width=1, - end_padding_width=0 + [labels], outputs=2, padding_width=1, end_padding_width=0 ) row_ids, _ = self.model.net.RemovePadding( - [labels], - outputs=2, - padding_width=0, - end_padding_width=1 + [labels], outputs=2, padding_width=0, end_padding_width=1 ) # Since there is no multi-dimensional gather, I flatten the matrix to # a 1-d vector and transform the ids to (row_ids * num_columns + # column_ids) and do gather in 1-d num_columns_blob = self.model.net.ConstantFill( - [row_ids], - value=self.num_classes_padded, + [row_ids], value=self.num_classes_padded ) flattened_ids = self.model.net.Mul([row_ids, num_columns_blob]) flattened_ids = self.model.net.Add([flattened_ids, column_ids]) flattened_transitions = self.model.net.FlattenToVec([transitions]) entries = self.model.net.Gather( - [flattened_transitions, flattened_ids], - dense_gradient=True + [flattened_transitions, flattened_ids], dense_gradient=True ) return self.model.ReduceFrontSum(entries) def _gather_entries_sum(self, in_data, indices, index_size): - indices = self.model.net.Cast([indices], to='int64') + indices = self.model.net.Cast([indices], to="int64") index_size_blob = self.model.param_init_net.ConstantFill( - [], - shape=[1], - value=index_size, - ) - query_one_hot = self.model.net.OneHot( - [indices, index_size_blob] + [], shape=[1], value=index_size ) + query_one_hot = self.model.net.OneHot([indices, index_size_blob]) flattend_query = self.model.net.FlattenToVec(query_one_hot) flattend_data = self.model.net.FlattenToVec(in_data) - query_scores = self.model.net.DotProduct( - [flattend_query, flattend_data] - ) + query_scores = self.model.net.DotProduct([flattend_query, flattend_data]) final_sum = self.model.net.ReduceFrontSum([query_scores]) return final_sum def _crf_forward( - self, - input_blob, - initial_state, - transitions_copy, - seq_lengths=None + self, input_blob, initial_state, transitions_copy, seq_lengths=None ): # Build the RNN net and get the last timestep output - out_last = self.build_crf_net( - input_blob, initial_state, transitions_copy - ) + out_last = self.build_crf_net(input_blob, initial_state, transitions_copy) out_last, _ = self.model.net.Reshape( - [out_last], - outputs=2, - shape=(self.num_classes_padded,) + [out_last], outputs=2, shape=(self.num_classes_padded,) ) zero_segment_id = self.model.param_init_net.ConstantFill( - [], - value=0, - shape=[self.num_classes_padded], - dtype=core.DataType.INT32, + [], value=0, shape=[self.num_classes_padded], dtype=core.DataType.INT32 ) # Compute the accumlated total score of all the paths accum_score = self.model.net.SortedSegmentRangeLogSumExp( [out_last, zero_segment_id] ) - accum_score, _ = self.model.net.Reshape( - accum_score, - outputs=2, - shape=() - ) + accum_score, _ = self.model.net.Reshape(accum_score, outputs=2, shape=()) return accum_score def build_crf_net(self, input_blob, initial_state, transitions): - ''' + """ Adds the crf_net recurrent operator to the model. model: model_helper.ModelHelper object new operators would be added @@ -239,94 +145,75 @@ def build_crf_net(self, input_blob, initial_state, transitions): ##Only supports batch-size 1## seq_lengths: blob containing sequence lengths (unused) - ''' + """ - scope = 'crf_net' + scope = "crf_net" - def s(name): - '' - # We have to manually scope due to our internal/external blob - # relationships. - return "{}/{}".format(str(scope), str(name)) + def s(name): + "" + # We have to manually scope due to our internal/external blob + # relationships. + return "{}/{}".format(str(scope), str(name)) - step_model = model_helper.ModelHelper(name='crf_step', - param_model=self.model) - input_t, cell_t_prev, _ = ( - step_model.net.AddExternalInputs( - core.ScopedBlobReference('input_t'), - core.ScopedBlobReference('cell_t_prev'), - transitions - ) - ) - zero_segment_id = step_model.param_init_net.ConstantFill( - [], - [s('zero_segment_id')], - value=0, - shape=[self.num_classes_padded], - dtype=core.DataType.INT32, - ) + step_model = model_helper.ModelHelper(name="crf_step", param_model=self.model) + input_t, cell_t_prev, _ = step_model.net.AddExternalInputs( + core.ScopedBlobReference("input_t"), + core.ScopedBlobReference("cell_t_prev"), + transitions, + ) + zero_segment_id = step_model.param_init_net.ConstantFill( + [], + [s("zero_segment_id")], + value=0, + shape=[self.num_classes_padded], + dtype=core.DataType.INT32, + ) - # A hack to bypass model cloning for test - step_model.param_init_net.AddExternalOutput(zero_segment_id) - """ the CRF step """ - # Do tile - prev_transpose = brew.transpose( - step_model, - cell_t_prev, - [s('prev_transpose')], - axes=(0, 2, 1), - ) - prev_tiled = step_model.net.Tile( - prev_transpose, - [s('prev_tiled')], - tiles=self.num_classes_padded, - axis=2, - ) - input_t_tiled = step_model.net.Tile( - input_t, - [s('input_t_tiled')], - tiles=self.num_classes_padded, - axis=1, - ) - input_with_prev = step_model.net.Add( - [prev_tiled, input_t_tiled], - [s('input_with_prev')] - ) - all_with_transitions = step_model.net.Add( - [input_with_prev, transitions], - [s('prev_with_transitions')], - broadcast=1, - use_grad_hack=1, - ) - all_with_transitions_reshaped, _ = step_model.net.Reshape( - all_with_transitions, - [s('all_with_transitions_reshaped'), s('all_with_transitions_orig')], - shape=(self.num_classes_padded, self.num_classes_padded) - ) - cell_t = step_model.net.SortedSegmentRangeLogSumExp( - [all_with_transitions_reshaped, zero_segment_id], - [s('cell_t')], - ) - step_model.net.AddExternalOutputs(cell_t) - """ recurrent network """ - cell_input_blob = initial_state - out_all, out_last = recurrent.recurrent_net( - net=self.model.net, - cell_net=step_model.net, - inputs=[(input_t, input_blob)], - initial_cell_inputs=[ - (cell_t_prev, cell_input_blob), - ], - links={ - cell_t_prev: cell_t, - }, - scope=scope, - outputs_with_grads=(1,) - ) - return out_last + # A hack to bypass model cloning for test + step_model.param_init_net.AddExternalOutput(zero_segment_id) + """ the CRF step """ + # Do tile + prev_transpose = brew.transpose( + step_model, cell_t_prev, [s("prev_transpose")], axes=(0, 2, 1) + ) + prev_tiled = step_model.net.Tile( + prev_transpose, [s("prev_tiled")], tiles=self.num_classes_padded, axis=2 + ) + input_t_tiled = step_model.net.Tile( + input_t, [s("input_t_tiled")], tiles=self.num_classes_padded, axis=1 + ) + input_with_prev = step_model.net.Add( + [prev_tiled, input_t_tiled], [s("input_with_prev")] + ) + all_with_transitions = step_model.net.Add( + [input_with_prev, transitions], + [s("prev_with_transitions")], + broadcast=1, + use_grad_hack=1, + ) + all_with_transitions_reshaped, _ = step_model.net.Reshape( + all_with_transitions, + [s("all_with_transitions_reshaped"), s("all_with_transitions_orig")], + shape=(self.num_classes_padded, self.num_classes_padded), + ) + cell_t = step_model.net.SortedSegmentRangeLogSumExp( + [all_with_transitions_reshaped, zero_segment_id], [s("cell_t")] + ) + step_model.net.AddExternalOutputs(cell_t) + """ recurrent network """ + cell_input_blob = initial_state + out_all, out_last = recurrent.recurrent_net( + net=self.model.net, + cell_net=step_model.net, + inputs=[(input_t, input_blob)], + initial_cell_inputs=[(cell_t_prev, cell_input_blob)], + links={cell_t_prev: cell_t}, + scope=scope, + outputs_with_grads=(1,), + ) + return out_last def update_predictions(self, classes): - def crf_update_predictions_op(inputs, outputs): # This operator will compute the best path of classes by performing # Viterbi decoding and then updates the predictions to make the tag @@ -360,16 +247,67 @@ def crf_update_predictions_op(inputs, outputs): old_bests.append(old_best) # Swap the scores of the current best tag and the tag on the # Viterbi path - w_predictions[viterbi[i]], w_predictions[old_best] = \ - w_predictions[old_best], w_predictions[viterbi[i]] + w_predictions[viterbi[i]], w_predictions[old_best] = ( + w_predictions[old_best], + w_predictions[viterbi[i]], + ) new_predictions[i] = w_predictions # Remove the BOS and EOS entries from the predictions matrix orig_predictions = new_predictions[1:-1, 0:-2] outputs[0].reshape(orig_predictions.shape) outputs[0].data[...] = orig_predictions - padded_classes = self._pad_predictions(classes) + + padded_classes = CRFWithLoss.pad_predictions( + classes, self.model.param_init_net, self.model.net, self.num_classes + ) new_classes = self.model.net.Python(crf_update_predictions_op)( [padded_classes, self.transitions], - core.ScopedBlobReference('post_crf_classes') + core.ScopedBlobReference("post_crf_classes"), ) return new_classes + + @staticmethod + def pad_labels(labels, init_net, net, num_classes): + bos_i = num_classes + eos_i = num_classes + 1 + bos_i_b = init_net.ConstantFill([], shape=[1], value=bos_i) + eos_i_b = init_net.ConstantFill([], shape=[1], value=eos_i) + labels = net.Cast([labels], to="int64") + padded_labels, _ = net.Concat([bos_i_b, labels, eos_i_b], axis=0, outputs=2) + return padded_labels + + @staticmethod + def pad_predictions(predictions, init_net, net, num_classes): + # This function will introduce two labels for beginning of sequence + # And end of sequence, it will make the necessary udpates to the + # the predictions blob + + low_score = -1000.0 # An arbitray very low number + b_scores = np.array([[low_score] * num_classes + [0, low_score]]).astype( + np.float32 + ) + + e_scores = np.array([[low_score] * num_classes + [low_score, 0]]).astype( + np.float32 + ) + + b_scores = init_net.GivenTensorFill( + [], "b_scores", shape=[1, num_classes + 2], values=b_scores + ) + e_scores = init_net.GivenTensorFill( + [], "e_scores", shape=[1, num_classes + 2], values=e_scores + ) + + zero_index = net.ConstantFill([], shape=[1], value=0) + length = net.Gather([net.Shape([predictions]), zero_index]) + length = net.Cast(length, to="int32") + t_range = net.LengthsRangeFill(length) + padding = net.ConstantFill([t_range], value=low_score) + padding = net.ExpandDims(padding, dims=[1]) + padded_predictions, _ = net.Concat( + [predictions, padding, padding], outputs=2, axis=1 + ) + padded_predictions_concat, _ = net.Concat( + [b_scores, padded_predictions, e_scores], outputs=2, axis=0 + ) + return padded_predictions_concat From 2fe5fa78fab59b3384dedc33a526341a097d6ccc Mon Sep 17 00:00:00 2001 From: Pritam Damania Date: Wed, 22 Aug 2018 01:00:17 -0700 Subject: [PATCH 20/94] Use FinishDeviceComputation instead of adding events in Operator::SyncDevice Summary: The code in Operator::SyncDevice had some duplicate logic and using FinishDeviceComputation sufficed in this case. Reviewed By: yinghai Differential Revision: D9348288 fbshipit-source-id: d8d874bab491e6d448fcd5fa561a8b99d502753b --- caffe2/core/operator.h | 8 +++++--- caffe2/core/operator_gpu.cc | 26 -------------------------- 2 files changed, 5 insertions(+), 29 deletions(-) delete mode 100644 caffe2/core/operator_gpu.cc diff --git a/caffe2/core/operator.h b/caffe2/core/operator.h index b1f31af6e33d8a..28f3df0386f2a8 100644 --- a/caffe2/core/operator.h +++ b/caffe2/core/operator.h @@ -338,7 +338,7 @@ class CAFFE2_API OperatorBase : public Observable { return !event_; } - virtual void SyncDevice() { + virtual void FinishDeviceComputation() { CAFFE_NOT_IMPLEMENTED; } @@ -598,12 +598,14 @@ class CAFFE2_API Operator : public OperatorBase { return HasAsyncPart() && context_.SupportsAsyncScheduling(); } + void FinishDeviceComputation() override { + context_.FinishDeviceComputation(); + } + const Context* getContext() const { return &context_; } - void SyncDevice() final {} - protected: void RecordEvent(const char* err_msg = nullptr) final { if (event_) { diff --git a/caffe2/core/operator_gpu.cc b/caffe2/core/operator_gpu.cc deleted file mode 100644 index 03f227f7453524..00000000000000 --- a/caffe2/core/operator_gpu.cc +++ /dev/null @@ -1,26 +0,0 @@ -#include "caffe2/core/context_gpu.h" -#include "caffe2/core/operator.h" - -namespace caffe2 { - -template <> -void Operator::SyncDevice() { - auto* context = getContext(); - int device; - cudaGetDevice(&device); - - cudaEvent_t ev; - cudaSetDevice(context->cuda_gpu_id()); - cudaEventCreateWithFlags(&ev, cudaEventDisableTiming); - cudaEventRecord(ev, context->cuda_stream()); - cudaEventSynchronize(ev); - cudaEventDestroy(ev); - cudaSetDevice(device); - - cudaError_t error = cudaGetLastError(); - if (error != cudaSuccess) { - CAFFE_THROW("Encountered CUDA error Stop: ", cudaGetErrorString(error)); - } -} - -} // namespace caffe2 From 227635142f124bfb2c2626916d237cb0c6033092 Mon Sep 17 00:00:00 2001 From: Edward Yang Date: Wed, 22 Aug 2018 08:45:44 -0700 Subject: [PATCH 21/94] Delete THD master_worker (#10731) Summary: Signed-off-by: Edward Z. Yang Pull Request resolved: https://github.com/pytorch/pytorch/pull/10731 Differential Revision: D9423675 Pulled By: ezyang fbshipit-source-id: 37221e11d84cc3672b944af598ea229a1d4c38cc --- CMakeLists.txt | 1 - setup.py | 12 +- tools/build_pytorch_libs.bat | 8 - tools/build_pytorch_libs.sh | 7 - tools/setup_helpers/dist_check.py | 1 - torch/csrc/distributed/Module.cpp | 99 +- torch/csrc/distributed/Storage.cpp | 18 - torch/csrc/distributed/Storage.h | 45 - torch/csrc/distributed/THDP.h | 7 - torch/lib/THD/CMakeLists.txt | 10 - torch/lib/THD/THD.h | 10 - torch/lib/THD/master_worker/README.md | 1 - .../THD/master_worker/common/ByteArray.cpp | 55 - .../THD/master_worker/common/ByteArray.hpp | 29 - .../master_worker/common/CommandChannel.cpp | 212 --- .../master_worker/common/CommandChannel.hpp | 55 - .../THD/master_worker/common/Functions.hpp | 253 ---- .../lib/THD/master_worker/common/RPC-inl.hpp | 109 -- torch/lib/THD/master_worker/common/RPC.cpp | 157 -- torch/lib/THD/master_worker/common/RPC.hpp | 53 - torch/lib/THD/master_worker/common/Traits.hpp | 66 - torch/lib/THD/master_worker/master/Master.cpp | 42 - torch/lib/THD/master_worker/master/Master.h | 7 - torch/lib/THD/master_worker/master/Master.hpp | 14 - torch/lib/THD/master_worker/master/State.cpp | 14 - torch/lib/THD/master_worker/master/State.h | 6 - torch/lib/THD/master_worker/master/State.hpp | 28 - .../THD/master_worker/master/THDRandom.cpp | 64 - .../lib/THD/master_worker/master/THDRandom.h | 21 - .../THD/master_worker/master/THDStorage.cpp | 14 - .../lib/THD/master_worker/master/THDStorage.h | 10 - .../THD/master_worker/master/THDTensor.cpp | 31 - .../lib/THD/master_worker/master/THDTensor.h | 27 - torch/lib/THD/master_worker/master/Utils.hpp | 19 - .../master/generic/THDStorage.cpp | 215 --- .../master_worker/master/generic/THDStorage.h | 53 - .../master/generic/THDTensor.cpp | 1334 ----------------- .../master_worker/master/generic/THDTensor.h | 215 --- .../master/generic/THDTensorCopy.cpp | 30 - .../master/generic/THDTensorCopy.h | 10 - .../master/generic/THDTensorLapack.cpp | 458 ------ .../master/generic/THDTensorLapack.h | 27 - .../master/generic/THDTensorMath.cpp | 933 ------------ .../master/generic/THDTensorMath.h | 150 -- .../master/generic/THDTensorMeta.cpp | 149 -- .../master/generic/THDTensorRandom.cpp | 129 -- .../master/generic/THDTensorRandom.h | 32 - .../lib/THD/master_worker/worker/Dispatch.cpp | 301 ---- .../lib/THD/master_worker/worker/Dispatch.hpp | 13 - torch/lib/THD/master_worker/worker/Worker.cpp | 43 - torch/lib/THD/master_worker/worker/Worker.h | 7 - torch/lib/THD/master_worker/worker/Worker.hpp | 16 - .../worker/dispatch/Communication.cpp | 14 - .../worker/dispatch/Generator.cpp | 36 - .../master_worker/worker/dispatch/Storage.cpp | 151 -- .../master_worker/worker/dispatch/Tensor.cpp | 979 ------------ .../worker/dispatch/TensorCopy.cpp | 12 - .../worker/dispatch/TensorLapack.cpp | 146 -- .../worker/dispatch/TensorMath.cpp | 626 -------- .../worker/dispatch/TensorRandom.cpp | 98 -- torch/lib/THD/test/command_channel_smoke.cpp | 138 -- torch/lib/THD/test/rpc_serialization.cpp | 67 - 62 files changed, 3 insertions(+), 7884 deletions(-) delete mode 100644 torch/csrc/distributed/Storage.cpp delete mode 100644 torch/csrc/distributed/Storage.h delete mode 100644 torch/lib/THD/master_worker/README.md delete mode 100644 torch/lib/THD/master_worker/common/ByteArray.cpp delete mode 100644 torch/lib/THD/master_worker/common/ByteArray.hpp delete mode 100644 torch/lib/THD/master_worker/common/CommandChannel.cpp delete mode 100644 torch/lib/THD/master_worker/common/CommandChannel.hpp delete mode 100644 torch/lib/THD/master_worker/common/Functions.hpp delete mode 100644 torch/lib/THD/master_worker/common/RPC-inl.hpp delete mode 100644 torch/lib/THD/master_worker/common/RPC.cpp delete mode 100644 torch/lib/THD/master_worker/common/RPC.hpp delete mode 100644 torch/lib/THD/master_worker/common/Traits.hpp delete mode 100644 torch/lib/THD/master_worker/master/Master.cpp delete mode 100644 torch/lib/THD/master_worker/master/Master.h delete mode 100644 torch/lib/THD/master_worker/master/Master.hpp delete mode 100644 torch/lib/THD/master_worker/master/State.cpp delete mode 100644 torch/lib/THD/master_worker/master/State.h delete mode 100644 torch/lib/THD/master_worker/master/State.hpp delete mode 100644 torch/lib/THD/master_worker/master/THDRandom.cpp delete mode 100644 torch/lib/THD/master_worker/master/THDRandom.h delete mode 100644 torch/lib/THD/master_worker/master/THDStorage.cpp delete mode 100644 torch/lib/THD/master_worker/master/THDStorage.h delete mode 100644 torch/lib/THD/master_worker/master/THDTensor.cpp delete mode 100644 torch/lib/THD/master_worker/master/THDTensor.h delete mode 100644 torch/lib/THD/master_worker/master/Utils.hpp delete mode 100644 torch/lib/THD/master_worker/master/generic/THDStorage.cpp delete mode 100644 torch/lib/THD/master_worker/master/generic/THDStorage.h delete mode 100644 torch/lib/THD/master_worker/master/generic/THDTensor.cpp delete mode 100644 torch/lib/THD/master_worker/master/generic/THDTensor.h delete mode 100644 torch/lib/THD/master_worker/master/generic/THDTensorCopy.cpp delete mode 100644 torch/lib/THD/master_worker/master/generic/THDTensorCopy.h delete mode 100644 torch/lib/THD/master_worker/master/generic/THDTensorLapack.cpp delete mode 100644 torch/lib/THD/master_worker/master/generic/THDTensorLapack.h delete mode 100644 torch/lib/THD/master_worker/master/generic/THDTensorMath.cpp delete mode 100644 torch/lib/THD/master_worker/master/generic/THDTensorMath.h delete mode 100644 torch/lib/THD/master_worker/master/generic/THDTensorMeta.cpp delete mode 100644 torch/lib/THD/master_worker/master/generic/THDTensorRandom.cpp delete mode 100644 torch/lib/THD/master_worker/master/generic/THDTensorRandom.h delete mode 100644 torch/lib/THD/master_worker/worker/Dispatch.cpp delete mode 100644 torch/lib/THD/master_worker/worker/Dispatch.hpp delete mode 100644 torch/lib/THD/master_worker/worker/Worker.cpp delete mode 100644 torch/lib/THD/master_worker/worker/Worker.h delete mode 100644 torch/lib/THD/master_worker/worker/Worker.hpp delete mode 100644 torch/lib/THD/master_worker/worker/dispatch/Communication.cpp delete mode 100644 torch/lib/THD/master_worker/worker/dispatch/Generator.cpp delete mode 100644 torch/lib/THD/master_worker/worker/dispatch/Storage.cpp delete mode 100644 torch/lib/THD/master_worker/worker/dispatch/Tensor.cpp delete mode 100644 torch/lib/THD/master_worker/worker/dispatch/TensorCopy.cpp delete mode 100644 torch/lib/THD/master_worker/worker/dispatch/TensorLapack.cpp delete mode 100644 torch/lib/THD/master_worker/worker/dispatch/TensorMath.cpp delete mode 100644 torch/lib/THD/master_worker/worker/dispatch/TensorRandom.cpp delete mode 100644 torch/lib/THD/test/command_channel_smoke.cpp delete mode 100644 torch/lib/THD/test/rpc_serialization.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index edbd4381c70bab..3be31735fa39ed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -140,7 +140,6 @@ cmake_dependent_option( USE_MKLML "Use MKLML interface in MKL BLAS" ON "BUILD_CAFFE2" OFF) option(USE_DISTRIBUTED "Use THD (distributed)" OFF) -option(USE_DISTRIBUTED_MW "Use THD (distributed) master worker" OFF) # Used when building Caffe2 through setup.py option(BUILDING_WITH_TORCH_LIBS "Tell cmake if Caffe2 is being built alongside torch libs" OFF) diff --git a/setup.py b/setup.py index 3abd279394720a..e8a5a128854aad 100644 --- a/setup.py +++ b/setup.py @@ -109,7 +109,7 @@ # Before we run the setup_helpers, let's look for NO_* and WITH_* # variables and hotpatch the environment with the USE_* equivalent -config_env_vars = ['CUDA', 'CUDNN', 'MKLDNN', 'NNPACK', 'DISTRIBUTED', 'DISTRIBUTED_MW', +config_env_vars = ['CUDA', 'CUDNN', 'MKLDNN', 'NNPACK', 'DISTRIBUTED', 'SYSTEM_NCCL', 'GLOO_IBVERBS'] @@ -138,7 +138,7 @@ def hotpatch_var(var): from tools.setup_helpers.generate_code import generate_code from tools.setup_helpers.ninja_builder import NinjaBuilder, ninja_build_ext from tools.setup_helpers.dist_check import USE_DISTRIBUTED, \ - USE_DISTRIBUTED_MW, USE_GLOO_IBVERBS, USE_C10D + USE_GLOO_IBVERBS, USE_C10D ################################################################################ # Parameters parsed from environment @@ -340,8 +340,6 @@ def build_libs(libs): build_libs_cmd += ['--use-mkldnn'] if USE_GLOO_IBVERBS: build_libs_cmd += ['--use-gloo-ibverbs'] - if USE_DISTRIBUTED_MW: - build_libs_cmd += ['--use-distributed-mw'] if FULL_CAFFE2: build_libs_cmd += ['--full-caffe2'] @@ -841,12 +839,6 @@ def run(self): main_sources += [ "torch/csrc/distributed/Module.cpp", ] - if USE_DISTRIBUTED_MW: - main_sources += [ - "torch/csrc/distributed/Tensor.cpp", - "torch/csrc/distributed/Storage.cpp", - ] - extra_compile_args += ['-DUSE_DISTRIBUTED_MW'] include_dirs += [tmp_install_path + "/include/THD"] main_link_args += [THD_LIB] diff --git a/tools/build_pytorch_libs.bat b/tools/build_pytorch_libs.bat index 2f8b3ae1c5ebce..568d83be1bc0c5 100755 --- a/tools/build_pytorch_libs.bat +++ b/tools/build_pytorch_libs.bat @@ -55,14 +55,6 @@ IF "%~1"=="--use-gloo-ibverbs" ( set /a USE_GLOO_IBVERBS=0 ) -IF "%~1"=="--use-distributed-mw" ( - set /a USE_DISTRIBUTED_MW=1 - echo Warning: distributed mw is enabled but build is not yet implemented 1>&2 - shift -) ELSE ( - set /a USE_DISTRIBUTED_MW=0 -) - set BUILD_TYPE=Release IF "%DEBUG%"=="1" ( set BUILD_TYPE=Debug diff --git a/tools/build_pytorch_libs.sh b/tools/build_pytorch_libs.sh index f53de42c90a60b..5934d9344d1927 100755 --- a/tools/build_pytorch_libs.sh +++ b/tools/build_pytorch_libs.sh @@ -16,7 +16,6 @@ USE_ROCM=0 USE_NNPACK=0 USE_MKLDNN=0 USE_GLOO_IBVERBS=0 -USE_DISTRIBUTED_MW=0 FULL_CAFFE2=0 while [[ $# -gt 0 ]]; do case "$1" in @@ -35,9 +34,6 @@ while [[ $# -gt 0 ]]; do --use-gloo-ibverbs) USE_GLOO_IBVERBS=1 ;; - --use-distributed-mw) - USE_DISTRIBUTED_MW=1 - ;; --full-caffe2) FULL_CAFFE2=1 ;; @@ -118,9 +114,6 @@ if [[ $USE_GLOO_IBVERBS -eq 1 ]]; then GLOO_FLAGS+=" -DUSE_IBVERBS=1 -DBUILD_SHARED_LIBS=1" THD_FLAGS="-DUSE_GLOO_IBVERBS=1" fi -if [[ $USE_DISTRIBUTED_MW -eq 1 ]]; then - THD_FLAGS+="-DUSE_DISTRIBUTED_MW=1" -fi CWRAP_FILES="\ $BASE_DIR/torch/lib/ATen/Declarations.cwrap;\ $BASE_DIR/torch/lib/THNN/generic/THNN.h;\ diff --git a/tools/setup_helpers/dist_check.py b/tools/setup_helpers/dist_check.py index faae4cdeb66528..7afffddd800751 100644 --- a/tools/setup_helpers/dist_check.py +++ b/tools/setup_helpers/dist_check.py @@ -7,7 +7,6 @@ # On ROCm, RCCL development isn't complete. https://github.com/ROCmSoftwarePlatform/rccl USE_DISTRIBUTED = not check_negative_env_flag("USE_DISTRIBUTED") and not IS_WINDOWS and not check_env_flag("USE_ROCM") -USE_DISTRIBUTED_MW = USE_DISTRIBUTED and check_env_flag("USE_DISTRIBUTED_MW") USE_GLOO_IBVERBS = False USE_C10D = USE_DISTRIBUTED and USE_CUDA and IS_LINUX diff --git a/torch/csrc/distributed/Module.cpp b/torch/csrc/distributed/Module.cpp index af6b12f63c8760..ddfee7cb0440d8 100644 --- a/torch/csrc/distributed/Module.cpp +++ b/torch/csrc/distributed/Module.cpp @@ -21,64 +21,6 @@ static std::unordered_map name2channel_type = { {"nccl", THDChannelNccl}, }; -static bool THDPModule_loadClasses(PyObject *self) -{ -#ifdef USE_DISTRIBUTED_MW -#define ASSERT_NOT_NULL(ptr) if (!(ptr)) { THPUtils_setError("couldn't load classes"); return false; } - PyObject *torch_module = PyImport_ImportModule("torch.distributed"); - if (!torch_module) { - THPUtils_setError("class loader couldn't access torch.distributed module"); - return false; - } - - if (!THDPDoubleTensor_postInit(torch_module)) return false; - if (!THDPFloatTensor_postInit(torch_module)) return false; - if (!THDPHalfTensor_postInit(torch_module)) return false; - if (!THDPLongTensor_postInit(torch_module)) return false; - if (!THDPIntTensor_postInit(torch_module)) return false; - if (!THDPShortTensor_postInit(torch_module)) return false; - if (!THDPCharTensor_postInit(torch_module)) return false; - if (!THDPByteTensor_postInit(torch_module)) return false; - - ASSERT_NOT_NULL(THDPDoubleStorageClass = PyObject_GetAttrString(torch_module,(char*)"DoubleStorage")); - ASSERT_NOT_NULL(THDPFloatStorageClass = PyObject_GetAttrString(torch_module,(char*)"FloatStorage")); - ASSERT_NOT_NULL(THDPHalfStorageClass = PyObject_GetAttrString(torch_module,(char*)"HalfStorage")); - ASSERT_NOT_NULL(THDPLongStorageClass = PyObject_GetAttrString(torch_module,(char*)"LongStorage")); - ASSERT_NOT_NULL(THDPIntStorageClass = PyObject_GetAttrString(torch_module,(char*)"IntStorage")); - ASSERT_NOT_NULL(THDPShortStorageClass = PyObject_GetAttrString(torch_module,(char*)"ShortStorage")); - ASSERT_NOT_NULL(THDPCharStorageClass = PyObject_GetAttrString(torch_module,(char*)"CharStorage")); - ASSERT_NOT_NULL(THDPByteStorageClass = PyObject_GetAttrString(torch_module,(char*)"ByteStorage")); - -#undef ASSERT_NOT_NULL -#endif - return true; -} - -static bool THDPModule_assignStateless(PyObject *self) -{ -#ifdef USE_DISTRIBUTED_MW -#define INIT_STATELESS(type) \ - stateless = PyObject_CallFunctionObjArgs((PyObject*)&TH_CONCAT_3(THDP, type, TensorStatelessType), NULL); \ - if (!stateless) { \ - return false; \ - } \ - if (PyObject_SetAttrString(TH_CONCAT_3(THDP,type,TensorClass), THP_STATELESS_ATTRIBUTE_NAME, stateless) == -1) { \ - return false; \ - } - PyObject *stateless; - INIT_STATELESS(Double); - INIT_STATELESS(Float); - INIT_STATELESS(Half); - INIT_STATELESS(Long); - INIT_STATELESS(Int); - INIT_STATELESS(Short); - INIT_STATELESS(Char); - INIT_STATELESS(Byte); -#undef INIT_STATELESS -#endif - return true; -} - static std::unordered_map obj2reduceop; static std::unordered_map obj2group; @@ -126,38 +68,6 @@ PyObject* THDPModule_destroyProcessGroup(PyObject *_unused) { END_HANDLE_TH_ERRORS } -#ifdef USE_DISTRIBUTED_MW -PyObject* THDPModule_initMasterWorker(PyObject *_unused, PyObject *args) -{ - HANDLE_TH_ERRORS - if (PyTuple_GET_SIZE(args) != 5 || !THPUtils_checkString(PyTuple_GET_ITEM(args, 0)) || - !THPUtils_checkString(PyTuple_GET_ITEM(args, 1)) || - !THPUtils_checkLong(PyTuple_GET_ITEM(args, 2)) || - !THPUtils_checkString(PyTuple_GET_ITEM(args, 3)) || - !THPUtils_checkLong(PyTuple_GET_ITEM(args, 4))) { - THPUtils_invalidArguments(args, NULL, "init_master_worker", 1, "(string backend, string init_method, int world_size, string group_name, int rank)"); - return NULL; - } - - std::string backend_name = THPUtils_unpackString(PyTuple_GET_ITEM(args, 0)); - std::string init_method = THPUtils_unpackString(PyTuple_GET_ITEM(args, 1)); - int world_size = THPUtils_unpackLong(PyTuple_GET_ITEM(args, 2)); - std::string group_name = THPUtils_unpackString(PyTuple_GET_ITEM(args, 3)); - int rank = THPUtils_unpackLong(PyTuple_GET_ITEM(args, 4)); - - THDChannelType channel_type = name2channel_type.at(backend_name); - { - AutoNoGIL nogil; - THDMasterWorkerInit(channel_type, init_method, world_size, group_name, rank); - } -#ifdef USE_CUDA - THDSetCudaStatePtr(&state); -#endif - Py_RETURN_NONE; - END_HANDLE_TH_ERRORS -} -#endif - #ifdef USE_CUDA PyObject* THDPModule_registerStream(PyObject *_unused, PyObject *_stream) { @@ -962,11 +872,7 @@ PyObject* THDPModule_initExtension(PyObject *_unused, PyObject *args) { #undef REGISTER_GROUP if (is_master_worker) { - PyObject *module = PyImport_ImportModule("torch.distributed"); - THPUtils_assert(module, "class loader couldn't access torch.distributed module"); - PyObject* module_dict = PyModule_GetDict(module); - if (!THDPModule_loadClasses(module_dict)) return NULL; - if (!THDPModule_assignStateless(module_dict)) return NULL; + throw std::runtime_error("THD master_worker no longer supported"); } Py_RETURN_TRUE; } @@ -976,9 +882,6 @@ static struct PyMethodDef _THDPModule_methods[] = { {"_dist_init_process_group", (PyCFunction)THDPModule_initProcessGroup, METH_VARARGS, NULL}, {"_dist_destroy_process_group", (PyCFunction)THDPModule_destroyProcessGroup, METH_NOARGS, NULL}, {"_dist_clear_group_cache", (PyCFunction)THDPModule_clearGroupCache, METH_VARARGS, NULL}, -#ifdef USE_DISTRIBUTED_MW - {"_dist_init_master_worker", (PyCFunction)THDPModule_initMasterWorker, METH_VARARGS, NULL}, -#endif #ifdef USE_CUDA {"_dist_register_stream", (PyCFunction)THDPModule_registerStream, METH_O, NULL}, #endif diff --git a/torch/csrc/distributed/Storage.cpp b/torch/csrc/distributed/Storage.cpp deleted file mode 100644 index e6b0829e3f0735..00000000000000 --- a/torch/csrc/distributed/Storage.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#include "torch/csrc/python_headers.h" -#include - -#define THP_HOST_HALF - -#include -#include -#include "THDP.h" -#include "torch/csrc/copy_utils.h" - -#include "override_macros.h" - -#define THD_GENERIC_FILE "torch/csrc/generic/Storage.cpp" -#include - -//#define THD_GENERIC_FILE "torch/csrc/generic/StorageCopy.cpp" -//#include - diff --git a/torch/csrc/distributed/Storage.h b/torch/csrc/distributed/Storage.h deleted file mode 100644 index 5639efba2e3e52..00000000000000 --- a/torch/csrc/distributed/Storage.h +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef THDP_STORAGE_INC -#define THDP_STORAGE_INC - -#define THDPStorage TH_CONCAT_3(THDP,Real,Storage) -#define THDPStorageStr TH_CONCAT_STRING_3(torch.cuda.,Real,Storage) -#define THDPStorageClass TH_CONCAT_3(THDP,Real,StorageClass) -#define THDPStorage_(NAME) TH_CONCAT_4(THDP,Real,Storage_,NAME) - -#define THDPDoubleStorage_Check(obj) \ - PyObject_IsInstance(obj, THDPDoubleStorageClass) -#define THDPFloatStorage_Check(obj) \ - PyObject_IsInstance(obj, THDPFloatStorageClass) -#define THDPHalfStorage_Check(obj) \ - PyObject_IsInstance(obj, THDPHalfStorageClass) -#define THDPLongStorage_Check(obj) \ - PyObject_IsInstance(obj, THDPLongStorageClass) -#define THDPIntStorage_Check(obj) \ - PyObject_IsInstance(obj, THDPIntStorageClass) -#define THDPShortStorage_Check(obj) \ - PyObject_IsInstance(obj, THDPShortStorageClass) -#define THDPCharStorage_Check(obj) \ - PyObject_IsInstance(obj, THDPCharStorageClass) -#define THDPByteStorage_Check(obj) \ - PyObject_IsInstance(obj, THDPByteStorageClass) - -#define THDPDoubleStorage_CData(obj) (obj)->cdata -#define THDPFloatStorage_CData(obj) (obj)->cdata -#define THDPLongStorage_CData(obj) (obj)->cdata -#define THDPIntStorage_CData(obj) (obj)->cdata -#define THDPShortStorage_CData(obj) (obj)->cdata -#define THDPCharStorage_CData(obj) (obj)->cdata -#define THDPByteStorage_CData(obj) (obj)->cdata - -#ifdef _THP_CORE -#define THDPStorageType TH_CONCAT_3(THDP,Real,StorageType) -#define THDPStorageBaseStr TH_CONCAT_STRING_3(Distributed,Real,StorageBase) -#endif - -#include "override_macros.h" - -#define THD_GENERIC_FILE "torch/csrc/generic/Storage.h" -#include - -#endif - diff --git a/torch/csrc/distributed/THDP.h b/torch/csrc/distributed/THDP.h index 911361b22ddf50..ec546f4088a777 100644 --- a/torch/csrc/distributed/THDP.h +++ b/torch/csrc/distributed/THDP.h @@ -5,12 +5,5 @@ #include "torch/csrc/THP.h" #include "Module.h" -#ifdef USE_DISTRIBUTED_MW -#include "Storage.h" -#include "../PtrWrapper.h" -#ifdef _THP_CORE -#include "utils.h" -#endif -#endif #endif diff --git a/torch/lib/THD/CMakeLists.txt b/torch/lib/THD/CMakeLists.txt index e5cf3cb7a7dcdb..b4ed95a46a2d65 100644 --- a/torch/lib/THD/CMakeLists.txt +++ b/torch/lib/THD/CMakeLists.txt @@ -104,8 +104,6 @@ IF(NOT THD_INSTALL_BIN_DIR OR NOT THD_INSTALL_LIB_DIR OR NOT THD_INSTALL_INCLUDE SET(THD_INSTALL_INCLUDE_DIR "include" CACHE PATH "THD install include subdirectory") ENDIF() -FILE(GLOB_RECURSE master_worker_h RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "master_worker/*.h") -FILE(GLOB_RECURSE master_worker_cpp "master_worker/*.cpp") FILE(GLOB_RECURSE process_group_h RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "process_group/*.h") FILE(GLOB_RECURSE process_group_cpp "process_group/*.cpp") FILE(GLOB_RECURSE base_h RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "base/*.h") @@ -128,17 +126,9 @@ IF(NOT DISTRIBUTED_NCCL_FOUND) LIST(REMOVE_ITEM base_cpp "${CMAKE_CURRENT_SOURCE_DIR}/base/data_channels/DataChannelNccl.cpp") ENDIF() -EXCLUDE_DIR(master_worker_cpp ".*/dispatch/.*\\.cpp$") - SET(all_cpp ${base_cpp} ${process_group_cpp}) SET(all_h THD.h ${base_h} ${process_group_h}) -IF(USE_DISTRIBUTED_MW) - ADD_DEFINITIONS(-DUSE_DISTRIBUTED_MW=1) - SET(all_cpp ${all_cpp} ${master_worker_cpp}) - SET(all_h THD.h ${all_h} ${master_worker_h}) -ENDIF() - EXCLUDE_DIR(all_cpp ".*/generic/.*\\.cpp$") # Need to include external NCCL first diff --git a/torch/lib/THD/THD.h b/torch/lib/THD/THD.h index e7f2f8b9f6b3d4..881ce0fab14add 100644 --- a/torch/lib/THD/THD.h +++ b/torch/lib/THD/THD.h @@ -18,13 +18,3 @@ #include "process_group/General.h" #include "process_group/Collectives.h" - -#ifdef USE_DISTRIBUTED_MW -#include "master_worker/master/Master.h" -#include "master_worker/master/State.h" -#include "master_worker/master/THDRandom.h" -#include "master_worker/master/THDStorage.h" -#include "master_worker/master/THDTensor.h" - -#include "master_worker/worker/Worker.h" -#endif diff --git a/torch/lib/THD/master_worker/README.md b/torch/lib/THD/master_worker/README.md deleted file mode 100644 index 6e54181c6d918a..00000000000000 --- a/torch/lib/THD/master_worker/README.md +++ /dev/null @@ -1 +0,0 @@ -This folder is effectively dead at the moment. diff --git a/torch/lib/THD/master_worker/common/ByteArray.cpp b/torch/lib/THD/master_worker/common/ByteArray.cpp deleted file mode 100644 index bf342572c62830..00000000000000 --- a/torch/lib/THD/master_worker/common/ByteArray.cpp +++ /dev/null @@ -1,55 +0,0 @@ -#include "ByteArray.hpp" - -#include -#include -#include -#include -#include -#include -#include - -namespace thd { namespace rpc { - -ByteArray::ByteArray() - : _data() -{} - -ByteArray::ByteArray(size_t size) - : ByteArray() -{ - _data.reserve(size); -} - -ByteArray::ByteArray(const char* arr, size_t size) - : _data(arr, arr + size) -{} - -ByteArray::ByteArray(ByteArray&& arr) -{ - std::swap(_data, arr._data); -} - -ByteArray::ByteArray(const ByteArray& arr) - : _data(arr._data) -{} - -ByteArray::~ByteArray() {} - -ByteArray& ByteArray::append(const char* arr, size_t size) { - _data.append(arr, arr + size); - return *this; -} - -const char* ByteArray::data() const { - return _data.data(); -} - -size_t ByteArray::length() const { - return _data.size(); -} - -std::string ByteArray::to_string() const { - return _data; -} - -}} // namespace rpc, thd diff --git a/torch/lib/THD/master_worker/common/ByteArray.hpp b/torch/lib/THD/master_worker/common/ByteArray.hpp deleted file mode 100644 index 209922292e4ad6..00000000000000 --- a/torch/lib/THD/master_worker/common/ByteArray.hpp +++ /dev/null @@ -1,29 +0,0 @@ -#pragma once - -#include -#include - -namespace thd { namespace rpc { - -struct ByteArray { - using size_type = size_t; - - ByteArray(); - ByteArray(size_t size); - ByteArray(const char* arr, size_t size); - ByteArray(ByteArray&& arr); - ByteArray(const ByteArray& arr); - ~ByteArray(); - - ByteArray& append(const char* arr, size_t size); - const char* data() const; - size_type length() const; - - std::string to_string() const; - -private: - std::string _data; -}; - -}} // namespace rpc, thd - diff --git a/torch/lib/THD/master_worker/common/CommandChannel.cpp b/torch/lib/THD/master_worker/common/CommandChannel.cpp deleted file mode 100644 index 3ebaeba03245bc..00000000000000 --- a/torch/lib/THD/master_worker/common/CommandChannel.cpp +++ /dev/null @@ -1,212 +0,0 @@ -#include "CommandChannel.hpp" -#include "Functions.hpp" -#include "../../base/ChannelUtils.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace thd { -namespace { - -void sendMessage(int socket, std::unique_ptr msg) { - auto& bytes = msg.get()->bytes(); - uint64_t msg_length = static_cast(bytes.length()); - - send_bytes(socket, &msg_length, 1, true); - send_bytes( - socket, - reinterpret_cast(bytes.data()), - msg_length - ); -} - -std::unique_ptr receiveMessage(int socket) { - uint64_t msg_length; - recv_bytes(socket, &msg_length, 1); - - std::unique_ptr bytes(new std::uint8_t[msg_length]); - recv_bytes(socket, bytes.get(), msg_length); - - return std::unique_ptr( - new rpc::RPCMessage(reinterpret_cast(bytes.get()), msg_length) - ); -} - -} // anonymous namespace - -MasterCommandChannel::MasterCommandChannel(InitMethod::Config config) - : _rank(0) - , _sockets(config.world_size, -1) - , _poll_events(nullptr) - , _error_pipe(-1) - , _error(nullptr) - , _mutexes(config.world_size) -{ - _sockets[0] = config.master.listen_socket; -} - -MasterCommandChannel::~MasterCommandChannel() { - if (_error_thread.joinable()) { - if (::write(_error_pipe, "exit", 4) != 4) { - std::cerr << "Failed to notify error thread" << std::endl; - } - _error_thread.join(); - - ::close(_error_pipe); - } - - auto world_size = _sockets.size(); - for (size_t i = 0; i < world_size; ++i) { - auto socket = _sockets[i]; - if (socket == -1) continue; - try { - sendMessage(rpc::packMessage(Functions::exit), i); - } catch(...) {} - ::close(socket); - } - -} - -bool MasterCommandChannel::init() { - int socket; - rank_type rank; - for (size_t i = 1; i < _sockets.size(); ++i) { - std::tie(socket, std::ignore) = accept(_sockets[0]); - recv_bytes(socket, &rank, 1); - _sockets.at(rank) = socket; - } - - /* Sending confirm byte is to test connection and make barrier for workers. - * It allows to block connected workers until all remaining workers connect. - */ - for (size_t i = 1; i < _sockets.size(); ++i) { - std::uint8_t confirm_byte = 1; - send_bytes(_sockets[i], &confirm_byte, 1); - } - - // close listen socket - ::close(_sockets[0]); - - int fd[2]; - SYSCHECK(::pipe(fd)); - _sockets[0] = fd[0]; - _error_pipe = fd[1]; - _error_thread = std::thread(&MasterCommandChannel::errorHandler, this); - return true; -} - -void MasterCommandChannel::errorHandler() { - while (true) { - auto error = recvError(); - if (std::get<0>(error) == 0) { - return; - } - - _error.reset(new std::string( - "error (rank " + std::to_string(std::get<0>(error)) + "): " + std::get<1>(error) - )); - } -} - -void MasterCommandChannel::sendMessage(std::unique_ptr msg, int rank) { - // Throw error received from a worker. - if (_error) { - throw std::runtime_error(*_error); - } - - if ((rank <= 0) || (rank >= _sockets.size())) { - throw std::domain_error("sendMessage received invalid rank as parameter"); - } - - std::lock_guard guard(_mutexes[rank]); - ::thd::sendMessage(_sockets[rank], std::move(msg)); -} - -std::tuple MasterCommandChannel::recvError() { - if (!_poll_events) { - // cache poll events array, it will be reused in another `receiveError` calls - _poll_events.reset(new struct pollfd[_sockets.size()]); - for (size_t rank = 0; rank < _sockets.size(); ++rank) { - _poll_events[rank] = { - .fd = _sockets[rank], - .events = POLLIN - }; - } - } - - for (size_t rank = 0; rank < _sockets.size(); ++rank) { - _poll_events[rank].revents = 0; - } - - SYSCHECK(::poll(_poll_events.get(), _sockets.size(), -1)) - for (size_t rank = 0; rank < _sockets.size(); ++rank) { - if (this->_poll_events[rank].revents == 0) - continue; - - if (rank == 0) { // we are notified by master to end - return std::make_tuple(0, ""); - } - - if (_poll_events[rank].revents ^ POLLIN) { - _poll_events[rank].fd = -1; // mark worker as ignored - return std::make_tuple(rank, "connection with worker has been closed"); - } - - try { - // receive error - uint64_t error_length; - recv_bytes(_poll_events[rank].fd, &error_length, 1); - - std::unique_ptr error(new char[error_length]); - recv_bytes(_poll_events[rank].fd, error.get(), error_length); - return std::make_tuple(rank, std::string(error.get(), error_length)); - } catch (const std::exception& e) { - return std::make_tuple(rank, "recv: " + std::string(e.what())); - } - } - - // We did not receive error from any worker despite being notified. - return std::make_tuple(0, "failed to receive error from worker"); -} - - -WorkerCommandChannel::WorkerCommandChannel(InitMethod::Config config) - : _rank(config.rank) - , _socket(-1) - , _master_addr(config.worker.master_addr) - , _master_port(config.worker.master_port) -{} - -WorkerCommandChannel::~WorkerCommandChannel() { - if (_socket != -1) - ::close(_socket); -} - -bool WorkerCommandChannel::init() { - _socket = connect(_master_addr, _master_port); - send_bytes(_socket, &_rank, 1); // send rank - - std::uint8_t confirm_byte; - recv_bytes(_socket, &confirm_byte, 1); - return true; -} - -std::unique_ptr WorkerCommandChannel::recvMessage() { - return ::thd::receiveMessage(_socket); -} - -void WorkerCommandChannel::sendError(const std::string& error) { - uint64_t error_length = static_cast(error.size()); - send_bytes(_socket, &error_length, 1, true); - send_bytes(_socket, error.data(), error_length); -} - -} // namespace thd diff --git a/torch/lib/THD/master_worker/common/CommandChannel.hpp b/torch/lib/THD/master_worker/common/CommandChannel.hpp deleted file mode 100644 index c169076e2dac73..00000000000000 --- a/torch/lib/THD/master_worker/common/CommandChannel.hpp +++ /dev/null @@ -1,55 +0,0 @@ -#pragma once - -#include "RPC.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace thd { - -struct MasterCommandChannel { - MasterCommandChannel(InitMethod::Config config); - ~MasterCommandChannel(); - - bool init(); - - void sendMessage(std::unique_ptr msg, int rank); - -private: - std::tuple recvError(); - void errorHandler(); - - rank_type _rank; - std::vector _sockets; - std::unique_ptr _poll_events; - - int _error_pipe; // informs error handler thread that we are exiting - std::unique_ptr _error; - std::thread _error_thread; - std::vector _mutexes; -}; - -struct WorkerCommandChannel { - WorkerCommandChannel(InitMethod::Config config); - ~WorkerCommandChannel(); - - bool init(); - - std::unique_ptr recvMessage(); - void sendError(const std::string& error); - -private: - rank_type _rank; - int _socket; - - std::string _master_addr; - port_type _master_port; -}; - -} // namespace thd diff --git a/torch/lib/THD/master_worker/common/Functions.hpp b/torch/lib/THD/master_worker/common/Functions.hpp deleted file mode 100644 index 71f9346780271b..00000000000000 --- a/torch/lib/THD/master_worker/common/Functions.hpp +++ /dev/null @@ -1,253 +0,0 @@ -#pragma once - -#include - -namespace thd { - -enum Functions: std::uint16_t { - // generator functions - generatorNew, - generatorCopy, - generatorFree, - generatorSeed, - generatorManualSeed, - - tensorCopyFromMaster, - tensorCopyFromWorker, - - tensorNew, - tensorNewWithTensor, - tensorNewWithSize, - tensorNewWithSize1d, - tensorNewWithSize2d, - tensorNewWithSize3d, - tensorNewWithSize4d, - tensorNewWithStorage, - tensorNewWithStorage1d, - tensorNewWithStorage2d, - tensorNewWithStorage3d, - tensorNewWithStorage4d, - tensorNewClone, - tensorNewContiguous, - tensorNewSelect, - tensorNewNarrow, - tensorNewTranspose, - tensorNewUnfold, - tensorFree, - tensorResize, - tensorResizeAs, - tensorResize1d, - tensorResize2d, - tensorResize3d, - tensorResize4d, - tensorResize5d, - tensorSet, - tensorSetStorage, - tensorSetStorage1d, - tensorSetStorage2d, - tensorSetStorage3d, - tensorSetStorage4d, - tensorNarrow, - tensorSelect, - tensorTranspose, - tensorUnfold, - tensorSqueeze, - tensorSqueeze1d, - tensorNElement, - - tensorGesv, - tensorTrtrs, - tensorGels, - tensorSyev, - tensorGeev, - tensorGesvd, - tensorGesvd2, - tensorGetri, - tensorPotrf, - tensorPotrs, - tensorPotri, - tensorQr, - tensorGeqrf, - tensorOrgqr, - tensorOrmqr, - tensorPstrf, - - tensorFill, - tensorMaskedFill, - tensorMaskedCopy, - tensorMaskedSelect, - tensorNonzero, - tensorIndexSelect, - tensorIndexCopy, - tensorIndexAdd, - tensorIndexFill, - tensorGather, - tensorScatter, - tensorScatterFill, - tensorDot, - tensorMinall, - tensorMaxall, - tensorMedianall, - tensorSumall, - tensorProdall, - tensorNeg, - tensorCinv, - tensorAdd, - tensorSub, - tensorMul, - tensorDiv, - tensorFmod, - tensorRemainder, - tensorClamp, - tensorCadd, - tensorCsub, - tensorCmul, - tensorCpow, - tensorCdiv, - tensorCfmod, - tensorCremainder, - tensorAddcmul, - tensorAddcdiv, - tensorAddmv, - tensorAddmm, - tensorAddr, - tensorAddbmm, - tensorBaddbmm, - tensorMatch, - tensorNumel, - tensorMax, - tensorMin, - tensorKthvalue, - tensorMode, - tensorMedian, - tensorSum, - tensorProd, - tensorCumsum, - tensorCumprod, - tensorSign, - tensorTrace, - tensorCross, - tensorCmax, - tensorCmin, - tensorCmaxValue, - tensorCminValue, - tensorDiag, - tensorEye, - tensorRange, - tensorRandperm, - tensorReshape, - tensorSort, - tensorTopk, - tensorTril, - tensorTriu, - tensorCatArray, - tensorEqual, - tensorLtValue, - tensorLeValue, - tensorGtValue, - tensorGeValue, - tensorNeValue, - tensorEqValue, - tensorLtValueT, - tensorLeValueT, - tensorGtValueT, - tensorGeValueT, - tensorNeValueT, - tensorEqValueT, - tensorLtTensor, - tensorLeTensor, - tensorGtTensor, - tensorGeTensor, - tensorNeTensor, - tensorEqTensor, - tensorLtTensorT, - tensorLeTensorT, - tensorGtTensorT, - tensorGeTensorT, - tensorNeTensorT, - tensorEqTensorT, - tensorAbs, - tensorSigmoid, - tensorLog, - tensorLog10, - tensorLog1p, - tensorLog2, - tensorExp, - tensorExpm1, - tensorCos, - tensorAcos, - tensorCosh, - tensorSin, - tensorAsin, - tensorSinh, - tensorTan, - tensorAtan, - tensorAtan2, - tensorTanh, - tensorPow, - tensorTpow, - tensorSqrt, - tensorRsqrt, - tensorCeil, - tensorFloor, - tensorRound, - tensorTrunc, - tensorFrac, - tensorLerp, - tensorMean, - tensorStd, - tensorVar, - tensorNorm, - tensorRenorm, - tensorDist, - tensorHistc, - tensorBhistc, - tensorMeanall, - tensorVarall, - tensorStdall, - tensorNormall, - tensorLinspace, - tensorLogspace, - tensorRand, - tensorRandn, - tensorLogicalAndAll, - tensorLogicalAnd, - tensorLogicalAnyAll, - tensorLogicalAny, - - // th_random - tensorRandom, - tensorGeometric, - tensorBernoulli, - tensorBernoulli_FloatTensor, - tensorBernoulli_DoubleTensor, - tensorUniform, - tensorNormal, - tensorExponential, - tensorCauchy, - tensorLogNormal, - tensorMultinomial, - - // storage functions - storageSet, - storageGet, - - storageNew, - storageNewWithSize, - storageNewWithSize1, - storageNewWithSize2, - storageNewWithSize3, - storageNewWithSize4, - - storageFree, - storageResize, - storageFill, - - // communication requests - sendTensor, - sendStorage, - - exit -}; - -} // namespace thd diff --git a/torch/lib/THD/master_worker/common/RPC-inl.hpp b/torch/lib/THD/master_worker/common/RPC-inl.hpp deleted file mode 100644 index 5885b8350504fc..00000000000000 --- a/torch/lib/THD/master_worker/common/RPC-inl.hpp +++ /dev/null @@ -1,109 +0,0 @@ -#include -#include "TH/THStorageFunctions.h" -#include "Traits.hpp" - -namespace thd { namespace rpc { namespace detail { -//////////////////////////////////////////////////////////////////////////////// - -constexpr size_t INITIAL_BUFFER_SIZE = 256; - -template::value>::type> -inline void _appendScalar(ByteArray& str, real data) { - str.append(reinterpret_cast(&data), sizeof(data)); -} - -inline void _appendType(ByteArray& str, RPCType _type) { - char type = static_cast(_type); - str.append(&type, sizeof(type)); -} - -template -inline void __appendData(ByteArray& str, const T& arg, - std::false_type is_generator, std::false_type is_tensor, std::false_type is_storage) { - _appendType(str, type_traits::type); - _appendScalar(str, arg); -} - -template -inline void __appendData(ByteArray& str, const T& arg, - std::true_type is_generator, std::false_type is_tensor, std::false_type is_storage) { - _appendType(str, RPCType::GENERATOR); - _appendScalar(str, arg->generator_id); -} - -template -inline void __appendData(ByteArray& str, const T& arg, - std::false_type is_generator, std::true_type is_tensor, std::false_type is_storage) { - _appendType(str, RPCType::TENSOR); - _appendScalar(str, arg->tensor_id); -} - -template -inline void __appendData(ByteArray& str, const T& arg, - std::false_type is_generator, std::false_type is_tensor, std::true_type is_storage) { - _appendType(str, RPCType::STORAGE); - _appendScalar(str, arg->storage_id); -} - -template -inline void _appendData(ByteArray& str, const T& arg) { - __appendData( - str, - arg, - is_any_of(), - is_any_of(), - is_any_of() - ); -} - -inline void _appendData(ByteArray& str, THLongStorage* arg) { - _appendType(str, RPCType::LONG_STORAGE); - _appendScalar(str, arg == NULL); - if (!arg) return; - _appendScalar(str, THLongStorage_size(arg)); - for (ptrdiff_t i = 0; i < THLongStorage_size(arg); i++) - _appendScalar(str, THLongStorage_get(arg, i)); -} - -template -inline void _appendData(ByteArray& str, const std::vector& arg) { - int l = arg.size(); - _appendData(str, l); - for (size_t i = 0; i < l; i++) - __appendData( - str, - arg[i], - is_any_of(), - is_any_of(), - is_any_of() - ); -} - -inline void _appendData(ByteArray& str, RPCType type) { - _appendType(str, type); -} - -inline void _packIntoString(ByteArray& str) {}; - -template -inline void _packIntoString(ByteArray& str, const T& arg, const Args&... args) { - _appendData(str, arg); - _packIntoString(str, args...); -} - -//////////////////////////////////////////////////////////////////////////////// -} // namespace detail - -template -inline std::unique_ptr packMessage( - function_id_type fid, - const Args&... args -) { - ByteArray msg(detail::INITIAL_BUFFER_SIZE); - detail::_appendScalar(msg, fid); - detail::_packIntoString(msg, args...); - return std::unique_ptr(new RPCMessage(std::move(msg))); -} - -}} // namespace rpc, thd diff --git a/torch/lib/THD/master_worker/common/RPC.cpp b/torch/lib/THD/master_worker/common/RPC.cpp deleted file mode 100644 index 9a6a187b0f2243..00000000000000 --- a/torch/lib/THD/master_worker/common/RPC.cpp +++ /dev/null @@ -1,157 +0,0 @@ -#include "RPC.hpp" -#include "ByteArray.hpp" - -#include -#include -#include -#include -#include - -namespace thd { -namespace rpc { - -RPCMessage::RPCMessage() - : _msg(0) - , _offset(0) -{} - -RPCMessage::RPCMessage(char* str, size_t size) - : _msg(str, size) - , _offset(0) -{} - -RPCMessage::RPCMessage(const ByteArray& str) - : _msg(str) - , _offset(0) -{} - -RPCMessage::RPCMessage(ByteArray&& str) - : _msg(std::move(str)) - , _offset(0) -{} - -ByteArray& RPCMessage::bytes() { - return _msg; -} - -const char* RPCMessage::data() const { - return _msg.data() + _offset; -} - -bool RPCMessage::isEmpty() const { - return _offset >= _msg.length(); -} - -RPCMessage::size_type RPCMessage::remaining() const { - return _msg.length() - _offset; -} - -const char* RPCMessage::read(size_t num_bytes) { - if (_offset + num_bytes > _msg.length()) - throw std::out_of_range("invalid access: out of bounds"); - const char* ret_val = _msg.data() + _offset; - _offset += num_bytes; - return ret_val; -} - -//////////////////////////////////////////////////////////////////////////////// - -namespace { - -template -inline T unpackScalar(RPCMessage& raw_message) { - return *reinterpret_cast(raw_message.read(sizeof(T))); -} - -} // namespace - -//////////////////////////////////////////////////////////////////////////////// - -static_assert(sizeof(RPCType) == sizeof(char), "RPCType has to be of the " - "same size as char"); -RPCType unpackType(RPCMessage& raw_message) { - char _type = *raw_message.read(sizeof(RPCType)); - return static_cast(_type); -} - -RPCType peekType(RPCMessage& raw_message) { - char _type = *raw_message.data(); - return static_cast(_type); -} - -function_id_type unpackFunctionId(RPCMessage& raw_message) { - return unpackScalar(raw_message); -} - -double unpackFloat(RPCMessage& raw_message) { - RPCType type = unpackType(raw_message); - if (type == RPCType::DOUBLE) - return unpackScalar(raw_message); - else if (type == RPCType::FLOAT) - return unpackScalar(raw_message); - - throw std::invalid_argument("wrong real type in the raw message"); -} - -int64_t unpackInteger(RPCMessage& raw_message) { - RPCType type = unpackType(raw_message); - if (type == RPCType::CHAR) - return unpackScalar(raw_message); - else if (type == RPCType::SHORT) - return unpackScalar(raw_message); - else if (type == RPCType::INT) - return unpackScalar(raw_message); - else if (type == RPCType::LONG) - return unpackScalar(raw_message); - else if (type == RPCType::LONG_LONG) - return unpackScalar(raw_message); - - throw std::invalid_argument(std::string("wrong integer type in the raw message (") + - std::to_string(static_cast(type)) + ")"); -} - -object_id_type unpackTensor(RPCMessage& raw_message) { - RPCType type = unpackType(raw_message); - if (type == RPCType::TENSOR) - return unpackScalar(raw_message); - throw std::invalid_argument("expected tensor in the raw message"); -} - -object_id_type unpackStorage(RPCMessage& raw_message) { - RPCType type = unpackType(raw_message); - if (type == RPCType::STORAGE) - return unpackScalar(raw_message); - throw std::invalid_argument("expected storage in the raw message"); -} - -object_id_type unpackGenerator(RPCMessage& raw_message) { - RPCType type = unpackType(raw_message); - if (type == RPCType::GENERATOR) { - return unpackScalar(raw_message); - } - throw std::invalid_argument("expected generator in the raw message"); -} - -THLongStorage* unpackTHLongStorage(RPCMessage& raw_message) { - RPCType type = unpackType(raw_message); - if (type != RPCType::LONG_STORAGE) - throw std::invalid_argument("expected THLongStorage in the raw message"); - char is_null = unpackScalar(raw_message); - if (is_null) return NULL; - ptrdiff_t size = unpackScalar(raw_message); - THLongStorage* storage = THLongStorage_newWithSize(size); - int64_t* data = THLongStorage_data(storage); - - try { - for (int i = 0; i < size; i++) { - data[i] = unpackScalar(raw_message); - } - } catch (std::exception& e) { - THLongStorage_free(storage); - throw; - } - - return storage; -} - -}} // namespace rpc, thd diff --git a/torch/lib/THD/master_worker/common/RPC.hpp b/torch/lib/THD/master_worker/common/RPC.hpp deleted file mode 100644 index 99b45942b0a7dc..00000000000000 --- a/torch/lib/THD/master_worker/common/RPC.hpp +++ /dev/null @@ -1,53 +0,0 @@ -#pragma once -#include "../master/THDTensor.h" -#include "ByteArray.hpp" -#include "TH/THStorageFunctions.h" -#include "RPCType.hpp" - -#include -#include -#include - -namespace thd { - -using object_id_type = uint64_t; - -namespace rpc { - -using function_id_type = uint16_t; - -class RPCMessage { -public: - using size_type = ByteArray::size_type; - RPCMessage(); - RPCMessage(char* str, size_t size); - RPCMessage(const ByteArray& str); - RPCMessage(ByteArray&& str); - - ByteArray& bytes(); // Raw data. - const char* data() const; // Offset data. - bool isEmpty() const; - size_type remaining() const; // Length of the msg left to read. - const char* read(size_t num_bytes); - -private: - ByteArray _msg; - size_t _offset; -}; - -template -std::unique_ptr packMessage(function_id_type fid, const Args&... args); - -RPCType unpackType(RPCMessage& raw_message); -RPCType peekType(RPCMessage& raw_message); -double unpackFloat(RPCMessage& raw_message); -function_id_type unpackFunctionId(RPCMessage& raw_message); -int64_t unpackInteger(RPCMessage& raw_message); -object_id_type unpackGenerator(RPCMessage& raw_message); -object_id_type unpackTensor(RPCMessage& raw_message); -object_id_type unpackStorage(RPCMessage& raw_message); -THLongStorage* unpackTHLongStorage(RPCMessage& raw_message); - -}} // namespace rpc, thd - -#include "RPC-inl.hpp" diff --git a/torch/lib/THD/master_worker/common/Traits.hpp b/torch/lib/THD/master_worker/common/Traits.hpp deleted file mode 100644 index ada83d242e753c..00000000000000 --- a/torch/lib/THD/master_worker/common/Traits.hpp +++ /dev/null @@ -1,66 +0,0 @@ -#include -#include - -#include "master_worker/master/THDTensor.h" -#include "master_worker/master/THDStorage.h" - -namespace thd { - -template -struct or_trait : std::false_type {}; - -template -struct or_trait : T {}; - -template -struct or_trait - : std::conditional>::type {}; - -template -struct is_any_of : std::false_type {}; - -template -struct is_any_of> : std::is_same {}; - -template -struct is_any_of> - : or_trait, is_any_of>> {}; - -using THDGeneratorTypes = std::tuple; - -using THDTensorTypes = std::tuple< - THDByteTensor, - THDCharTensor, - THDShortTensor, - THDIntTensor, - THDLongTensor, - THDFloatTensor, - THDDoubleTensor ->; - -using THDStorageTypes = std::tuple< - THDByteStorage, - THDCharStorage, - THDShortStorage, - THDIntStorage, - THDLongStorage, - THDFloatStorage, - THDDoubleStorage ->; - -template class Trait, typename U> -struct map_to_ptr {}; - -template