diff --git a/.jenkins/pytorch/build.sh b/.jenkins/pytorch/build.sh index 491858a1a25b43..e280db56c42257 100755 --- a/.jenkins/pytorch/build.sh +++ b/.jenkins/pytorch/build.sh @@ -74,7 +74,7 @@ fi WERROR=1 python setup.py install # Add the test binaries so that they won't be git clean'ed away -git add -f build/bin build/lib +git add -f build/bin # Testing ATen install if [[ "$BUILD_ENVIRONMENT" != *cuda* ]]; then @@ -101,3 +101,11 @@ if [[ "$BUILD_ENVIRONMENT" == *xenial-cuda8-cudnn6-py3* ]]; then make html popd fi + +# Test no-Python build +if [[ "$BUILD_TEST_LIBTORCH" == "1" ]]; then + echo "Building libtorch" + # NB: Install outside of source directory (at the same level as the root + # pytorch folder) so that it doesn't get cleaned away prior to docker push. + WERROR=1 VERBOSE=1 tools/cpp_build/build_caffe2.sh "$PWD/../cpp-build" +fi diff --git a/.jenkins/pytorch/macos-build.sh b/.jenkins/pytorch/macos-build.sh index af5240f2caa88f..41b272eae63a8f 100755 --- a/.jenkins/pytorch/macos-build.sh +++ b/.jenkins/pytorch/macos-build.sh @@ -61,12 +61,6 @@ export IMAGE_COMMIT_TAG=${BUILD_ENVIRONMENT}-${IMAGE_COMMIT_ID} python setup.py install -# this is a bit hacky, but not too bad. Bundle the test binaries into -# the installation directory, so they can catch a free ride on the 7z -# train. -mkdir -p ${PYTORCH_ENV_DIR}/miniconda3/lib/python3.6/site-packages/torch/test_binaries/build -mv build/{bin,lib} ${PYTORCH_ENV_DIR}/miniconda3/lib/python3.6/site-packages/torch/test_binaries/build/ - # Upload torch binaries when the build job is finished 7z a ${IMAGE_COMMIT_TAG}.7z ${PYTORCH_ENV_DIR}/miniconda3/lib/python3.6/site-packages/torch* aws s3 cp ${IMAGE_COMMIT_TAG}.7z s3://ossci-macos-build/pytorch/${IMAGE_COMMIT_TAG}.7z --acl public-read diff --git a/.jenkins/pytorch/macos-test.sh b/.jenkins/pytorch/macos-test.sh index 222fea663a5730..92ef7ad191adb0 100755 --- a/.jenkins/pytorch/macos-test.sh +++ b/.jenkins/pytorch/macos-test.sh @@ -50,13 +50,22 @@ test_python_all() { test_cpp_api() { # C++ API + # NB: Install outside of source directory (at the same level as the root + # pytorch folder) so that it doesn't get cleaned away prior to docker push. + # But still clean it before we perform our own build. + # + CPP_BUILD="$PWD/../cpp-build" + rm -rf $CPP_BUILD + mkdir -p $CPP_BUILD + WERROR=1 VERBOSE=1 tools/cpp_build/build_caffe2.sh "$CPP_BUILD" + python tools/download_mnist.py --quiet -d test/cpp/api/mnist # Unfortunately it seems like the test can't load from miniconda3 # without these paths being set export DYLD_LIBRARY_PATH="$DYLD_LIBRARY_PATH:$PWD/miniconda3/lib" export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$PWD/miniconda3/lib" - ${PYTORCH_ENV_DIR}/miniconda3/lib/python3.6/site-packages/torch/test_binaries/build/bin/test_api + "$CPP_BUILD"/caffe2/bin/test_api } if [ -z "${JOB_BASE_NAME}" ] || [[ "${JOB_BASE_NAME}" == *-test ]]; then diff --git a/.jenkins/pytorch/test.sh b/.jenkins/pytorch/test.sh index 6527e734490d7f..4530b3f9fa483b 100755 --- a/.jenkins/pytorch/test.sh +++ b/.jenkins/pytorch/test.sh @@ -108,13 +108,14 @@ test_torchvision() { test_libtorch() { if [[ "$BUILD_TEST_LIBTORCH" == "1" ]]; then echo "Testing libtorch" + CPP_BUILD="$PWD/../cpp-build" if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then - ./build/bin/test_jit + "$CPP_BUILD"/caffe2/bin/test_jit else - ./build/bin/test_jit "[cpu]" + "$CPP_BUILD"/caffe2/bin/test_jit "[cpu]" fi python tools/download_mnist.py --quiet -d test/cpp/api/mnist - OMP_NUM_THREADS=2 ./build/bin/test_api + OMP_NUM_THREADS=2 "$CPP_BUILD"/caffe2/bin/test_api fi } diff --git a/aten/src/ATen/Backend.h b/aten/src/ATen/Backend.h new file mode 100644 index 00000000000000..ccb96feeed238a --- /dev/null +++ b/aten/src/ATen/Backend.h @@ -0,0 +1,58 @@ +#pragma once +#include + +namespace at { + +enum class Backend { CPU, CUDA, SparseCPU, SparseCUDA, Undefined, NumOptions }; + +constexpr Backend kCPU = Backend::CPU; +constexpr Backend kCUDA = Backend::CUDA; +constexpr Backend kSparseCPU = Backend::SparseCPU; +constexpr Backend kSparseCUDA = Backend::SparseCUDA; + +static inline Backend toSparse(Backend b) { + switch (b) { + case Backend::CPU: + return Backend::SparseCPU; + case Backend::CUDA: + return Backend::SparseCUDA; + case Backend::SparseCPU: + return Backend::SparseCPU; + case Backend::SparseCUDA: + return Backend::SparseCUDA; + default: + throw std::runtime_error("Unknown backend"); + } +} + +static inline Backend toDense(Backend b) { + switch (b) { + case Backend::CPU: + return Backend::CPU; + case Backend::CUDA: + return Backend::CUDA; + case Backend::SparseCPU: + return Backend::CPU; + case Backend::SparseCUDA: + return Backend::CUDA; + default: + throw std::runtime_error("Unknown backend"); + } +} + +static inline const char* toString(Backend b) { + switch (b) { + case Backend::CPU: + return "CPU"; + case Backend::CUDA: + return "CUDA"; + case Backend::SparseCPU: + return "SparseCPU"; + case Backend::SparseCUDA: + return "SparseCUDA"; + default: + return "UNKNOWN_BACKEND"; + } +} + +} // namespace at diff --git a/aten/src/ATen/Device.h b/aten/src/ATen/Device.h index b0a99b6a4c3e50..b777e76cde50ec 100644 --- a/aten/src/ATen/Device.h +++ b/aten/src/ATen/Device.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -38,7 +39,8 @@ struct Device { } } - /// Constructs a new `Device` from a `DeviceType` and an optional device index. + /// Constructs a new `Device` from a `DeviceType` and an optional device + /// index. /* implicit */ Device(DeviceType type, int32_t index = -1) : type_(type), index_(index) { AT_CHECK( diff --git a/aten/src/ATen/ScalarType.h b/aten/src/ATen/ScalarType.h index 0af1d72d2b6b6b..5b1f7877df268a 100644 --- a/aten/src/ATen/ScalarType.h +++ b/aten/src/ATen/ScalarType.h @@ -1,168 +1,4 @@ #pragma once - -#include "ATen/ATenGeneral.h" -#include "ATen/core/ArrayRef.h" -#include "ATen/core/Half.h" - -#include -#include - -namespace at { - -// NB: Order matters for this macro; it is relied upon in -// _promoteTypesLookup and the serialization format. -#define AT_FORALL_SCALAR_TYPES(_) \ -_(uint8_t,Byte,i) /* 0 */ \ -_(int8_t,Char,i) /* 1 */ \ -_(int16_t,Short,i) /* 2 */ \ -_(int,Int,i) /* 3 */ \ -_(int64_t,Long,i) /* 4 */ \ -_(at::Half,Half,d) /* 5 */ \ -_(float,Float,d) /* 6 */ \ -_(double,Double,d) /* 7 */ - -#define AT_FORALL_SCALAR_TYPES_EXCEPT_HALF(_) \ -_(uint8_t,Byte,i) \ -_(int8_t,Char,i) \ -_(int16_t,Short,i) \ -_(int,Int,i) \ -_(int64_t,Long,i) \ -_(float,Float,d) \ -_(double,Double,d) - -enum class ScalarType { -#define DEFINE_ENUM(_1,n,_2) \ - n, - AT_FORALL_SCALAR_TYPES(DEFINE_ENUM) -#undef DEFINE_ENUM - Undefined, // 8 - NumOptions -}; - -enum class Backend { - CPU, - CUDA, - SparseCPU, - SparseCUDA, - Undefined, - NumOptions -}; - -constexpr Backend kCPU = Backend::CPU; -constexpr Backend kCUDA = Backend::CUDA; -constexpr Backend kSparseCPU = Backend::SparseCPU; -constexpr Backend kSparseCUDA = Backend::SparseCUDA; - -static inline Backend toSparse(Backend b) { - switch (b) { - case Backend::CPU: return Backend::SparseCPU; - case Backend::CUDA: return Backend::SparseCUDA; - case Backend::SparseCPU: return Backend::SparseCPU; - case Backend::SparseCUDA: return Backend::SparseCUDA; - default: throw std::runtime_error("Unknown backend"); - } -} - -static inline Backend toDense(Backend b) { - switch (b) { - case Backend::CPU: return Backend::CPU; - case Backend::CUDA: return Backend::CUDA; - case Backend::SparseCPU: return Backend::CPU; - case Backend::SparseCUDA: return Backend::CUDA; - default: throw std::runtime_error("Unknown backend"); - } -} - -static inline const char * toString(Backend b) { - switch(b) { - case Backend::CPU: return "CPU"; - case Backend::CUDA: return "CUDA"; - case Backend::SparseCPU: return "SparseCPU"; - case Backend::SparseCUDA: return "SparseCUDA"; - default: return "UNKNOWN_BACKEND"; - } -} - -#define DEFINE_CONSTANT(_,name,_2) \ -constexpr ScalarType k##name = ScalarType::name; - -AT_FORALL_SCALAR_TYPES(DEFINE_CONSTANT) -#undef DEFINE_CONSTANT - -static inline const char * toString(ScalarType t) { -#define DEFINE_CASE(_,name,_2) \ - case ScalarType:: name : return #name; - - switch(t) { - AT_FORALL_SCALAR_TYPES(DEFINE_CASE) - default: - return "UNKNOWN_SCALAR"; - } -#undef DEFINE_CASE -} - -static inline size_t elementSize(ScalarType t) { -#define CASE_ELEMENTSIZE_CASE(ctype,name,_2) \ - case ScalarType:: name : return sizeof(ctype); - - switch(t) { - AT_FORALL_SCALAR_TYPES(CASE_ELEMENTSIZE_CASE) - default: - AT_ERROR("Unknown ScalarType"); - } -#undef CASE_ELEMENTSIZE_CASE -} - -static inline bool isIntegralType(ScalarType t) { - return (t == ScalarType::Byte || - t == ScalarType::Char || - t == ScalarType::Int || - t == ScalarType::Long || - t == ScalarType::Short); -} - -static inline bool isFloatingType(ScalarType t) { - return (t == ScalarType::Double || - t == ScalarType::Float || - t == ScalarType::Half); -} - -static inline ScalarType promoteTypes(ScalarType a, ScalarType b) { - // This is generated according to NumPy's promote_types - constexpr auto u1 = ScalarType::Byte; - constexpr auto i1 = ScalarType::Char; - constexpr auto i2 = ScalarType::Short; - constexpr auto i4 = ScalarType::Int; - constexpr auto i8 = ScalarType::Long; - constexpr auto f2 = ScalarType::Half; - constexpr auto f4 = ScalarType::Float; - constexpr auto f8 = ScalarType::Double; - constexpr auto ud = ScalarType::Undefined; - static constexpr ScalarType _promoteTypesLookup - [static_cast(ScalarType::NumOptions)] - [static_cast(ScalarType::NumOptions)] = { - /* u1 i1 i2 i4 i8 f2 f4 f8, ud */ - /* u1 */ { u1, i2, i2, i4, i8, f2, f4, f8, ud }, - /* i1 */ { i2, i1, i2, i4, i8, f2, f4, f8, ud }, - /* i2 */ { i2, i2, i2, i4, i8, f4, f4, f8, ud }, - /* i4 */ { i4, i4, i4, i4, i8, f8, f4, f8, ud }, - /* i8 */ { i8, i8, i8, i8, i8, f8, f4, f8, ud }, - /* f2 */ { f2, f2, f4, f8, f8, f2, f4, f8, ud }, - /* f4 */ { f4, f4, f4, f4, f4, f4, f4, f8, ud }, - /* f8 */ { f8, f8, f8, f8, f8, f8, f8, f8, ud }, - /* ud */ { ud, ud, ud, ud, ud, ud, ud, ud, ud }, - }; - return _promoteTypesLookup[static_cast(a)][static_cast(b)]; -} - -struct Tensor; -typedef ArrayRef IntList; -typedef ArrayRef TensorList; - -} // namespace at - -inline std::ostream& operator<<( - std::ostream& stream, - at::ScalarType scalar_type) { - return stream << at::toString(scalar_type); -} +#include // for BC reasons +#include +#include diff --git a/aten/src/ATen/core/ATenCoreTest.h b/aten/src/ATen/core/ATenCoreTest.h index ee8471f66fe258..a6769b10b93eed 100644 --- a/aten/src/ATen/core/ATenCoreTest.h +++ b/aten/src/ATen/core/ATenCoreTest.h @@ -1,6 +1,6 @@ #pragma once -#include +#include namespace at { diff --git a/aten/src/ATen/core/Backtrace.h b/aten/src/ATen/core/Backtrace.h index ec4c17c6f6a531..9aa3ac826ce78c 100644 --- a/aten/src/ATen/core/Backtrace.h +++ b/aten/src/ATen/core/Backtrace.h @@ -4,7 +4,7 @@ #include #include -#include +#include namespace at { /// Utility to demangle a C++ symbol name. diff --git a/aten/src/ATen/core/DeviceType.h b/aten/src/ATen/core/DeviceType.h index f3b3dcb03cde9d..0a3d32bfe14317 100644 --- a/aten/src/ATen/core/DeviceType.h +++ b/aten/src/ATen/core/DeviceType.h @@ -3,7 +3,7 @@ // ATen/core (which would require a lot more build system hacking.) // If you modify me, keep me synchronized with that file. -#include +#include #include @@ -12,19 +12,21 @@ namespace at { // Underlying type declared to be int32_t for consistency with protobufs. enum class DeviceType : int32_t { CPU = 0, - CUDA = 1, // CUDA. - MKLDNN = 2, // Reserved for explicit MKLDNN - OPENGL = 3, // OpenGL - OPENCL = 4, // OpenCL - IDEEP = 5, // IDEEP. - HIP = 6, // AMD HIP + CUDA = 1, // CUDA. + MKLDNN = 2, // Reserved for explicit MKLDNN + OPENGL = 3, // OpenGL + OPENCL = 4, // OpenCL + IDEEP = 5, // IDEEP. + HIP = 6, // AMD HIP // Change the following number if you add more devices in the code. COMPILE_TIME_MAX_DEVICE_TYPES = 7, - ONLY_FOR_TEST = 20901701, // This device type is only for test. + ONLY_FOR_TEST = 20901701, // This device type is only for test. }; -AT_CORE_API std::string DeviceTypeName(at::DeviceType d, bool lower_case = false); +AT_CORE_API std::string DeviceTypeName( + at::DeviceType d, + bool lower_case = false); -} +} // namespace at AT_CORE_API std::ostream& operator<<(std::ostream& stream, at::DeviceType type); diff --git a/aten/src/ATen/core/Error.h b/aten/src/ATen/core/Error.h index 5b567dd0de3506..fffc2aef56d035 100644 --- a/aten/src/ATen/core/Error.h +++ b/aten/src/ATen/core/Error.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include diff --git a/aten/src/ATen/core/Half-inl.h b/aten/src/ATen/core/Half-inl.h index d89b496d7083b8..14ceeff51284b1 100644 --- a/aten/src/ATen/core/Half-inl.h +++ b/aten/src/ATen/core/Half-inl.h @@ -2,7 +2,7 @@ #include #include -#include +#include #ifdef __CUDACC__ #include diff --git a/aten/src/ATen/core/Half.h b/aten/src/ATen/core/Half.h index 385f18e78cab02..5b11711519ff58 100644 --- a/aten/src/ATen/core/Half.h +++ b/aten/src/ATen/core/Half.h @@ -9,7 +9,7 @@ /// If you are writing a compute bound kernel, you can use the CUDA half /// intrinsics directly on the Half type from device code. -#include +#include #include #include diff --git a/aten/src/ATen/core/CoreAPI.h b/aten/src/ATen/core/Macros.h similarity index 61% rename from aten/src/ATen/core/CoreAPI.h rename to aten/src/ATen/core/Macros.h index 0ee114d9f4cfdd..dcad67ddb68c8f 100644 --- a/aten/src/ATen/core/CoreAPI.h +++ b/aten/src/ATen/core/Macros.h @@ -1,3 +1,5 @@ +#pragma once + // You can use the definition AT_CORE_STATIC_WINDOWS to control whether // or not we apply __declspec. You will want to set this as // -DAT_CORE_STATIC_WINDOWS=1 when compiling code which links @@ -18,3 +20,11 @@ #else #define AT_CORE_API #endif + +// Disable the copy and assignment operator for a class. Note that this will +// disable the usage of the class in std containers. +#ifndef DISABLE_COPY_AND_ASSIGN +#define DISABLE_COPY_AND_ASSIGN(classname) \ + classname(const classname&) = delete; \ + classname& operator=(const classname&) = delete +#endif diff --git a/aten/src/ATen/core/ScalarType.h b/aten/src/ATen/core/ScalarType.h new file mode 100644 index 00000000000000..804f73d356e972 --- /dev/null +++ b/aten/src/ATen/core/ScalarType.h @@ -0,0 +1,123 @@ +#pragma once + +#include "ATen/core/ArrayRef.h" +#include "ATen/core/Half.h" + +#include +#include + +namespace at { + +// NB: Order matters for this macro; it is relied upon in +// _promoteTypesLookup and the serialization format. +#define AT_FORALL_SCALAR_TYPES(_) \ +_(uint8_t,Byte,i) /* 0 */ \ +_(int8_t,Char,i) /* 1 */ \ +_(int16_t,Short,i) /* 2 */ \ +_(int,Int,i) /* 3 */ \ +_(int64_t,Long,i) /* 4 */ \ +_(at::Half,Half,d) /* 5 */ \ +_(float,Float,d) /* 6 */ \ +_(double,Double,d) /* 7 */ + +#define AT_FORALL_SCALAR_TYPES_EXCEPT_HALF(_) \ +_(uint8_t,Byte,i) \ +_(int8_t,Char,i) \ +_(int16_t,Short,i) \ +_(int,Int,i) \ +_(int64_t,Long,i) \ +_(float,Float,d) \ +_(double,Double,d) + +enum class ScalarType { +#define DEFINE_ENUM(_1,n,_2) \ + n, + AT_FORALL_SCALAR_TYPES(DEFINE_ENUM) +#undef DEFINE_ENUM + Undefined, // 8 + NumOptions +}; + +#define DEFINE_CONSTANT(_,name,_2) \ +constexpr ScalarType k##name = ScalarType::name; + +AT_FORALL_SCALAR_TYPES(DEFINE_CONSTANT) +#undef DEFINE_CONSTANT + +static inline const char * toString(ScalarType t) { +#define DEFINE_CASE(_,name,_2) \ + case ScalarType:: name : return #name; + + switch(t) { + AT_FORALL_SCALAR_TYPES(DEFINE_CASE) + default: + return "UNKNOWN_SCALAR"; + } +#undef DEFINE_CASE +} + +static inline size_t elementSize(ScalarType t) { +#define CASE_ELEMENTSIZE_CASE(ctype,name,_2) \ + case ScalarType:: name : return sizeof(ctype); + + switch(t) { + AT_FORALL_SCALAR_TYPES(CASE_ELEMENTSIZE_CASE) + default: + AT_ERROR("Unknown ScalarType"); + } +#undef CASE_ELEMENTSIZE_CASE +} + +static inline bool isIntegralType(ScalarType t) { + return (t == ScalarType::Byte || + t == ScalarType::Char || + t == ScalarType::Int || + t == ScalarType::Long || + t == ScalarType::Short); +} + +static inline bool isFloatingType(ScalarType t) { + return (t == ScalarType::Double || + t == ScalarType::Float || + t == ScalarType::Half); +} + +static inline ScalarType promoteTypes(ScalarType a, ScalarType b) { + // This is generated according to NumPy's promote_types + constexpr auto u1 = ScalarType::Byte; + constexpr auto i1 = ScalarType::Char; + constexpr auto i2 = ScalarType::Short; + constexpr auto i4 = ScalarType::Int; + constexpr auto i8 = ScalarType::Long; + constexpr auto f2 = ScalarType::Half; + constexpr auto f4 = ScalarType::Float; + constexpr auto f8 = ScalarType::Double; + constexpr auto ud = ScalarType::Undefined; + static constexpr ScalarType _promoteTypesLookup + [static_cast(ScalarType::NumOptions)] + [static_cast(ScalarType::NumOptions)] = { + /* u1 i1 i2 i4 i8 f2 f4 f8, ud */ + /* u1 */ { u1, i2, i2, i4, i8, f2, f4, f8, ud }, + /* i1 */ { i2, i1, i2, i4, i8, f2, f4, f8, ud }, + /* i2 */ { i2, i2, i2, i4, i8, f4, f4, f8, ud }, + /* i4 */ { i4, i4, i4, i4, i8, f8, f4, f8, ud }, + /* i8 */ { i8, i8, i8, i8, i8, f8, f4, f8, ud }, + /* f2 */ { f2, f2, f4, f8, f8, f2, f4, f8, ud }, + /* f4 */ { f4, f4, f4, f4, f4, f4, f4, f8, ud }, + /* f8 */ { f8, f8, f8, f8, f8, f8, f8, f8, ud }, + /* ud */ { ud, ud, ud, ud, ud, ud, ud, ud, ud }, + }; + return _promoteTypesLookup[static_cast(a)][static_cast(b)]; +} + +struct Tensor; +typedef ArrayRef IntList; +typedef ArrayRef TensorList; + +} // namespace at + +inline std::ostream& operator<<( + std::ostream& stream, + at::ScalarType scalar_type) { + return stream << at::toString(scalar_type); +} diff --git a/aten/src/ATen/core/SmallVector.h b/aten/src/ATen/core/SmallVector.h index 269b21b0d5cf37..483144794f46e1 100644 --- a/aten/src/ATen/core/SmallVector.h +++ b/aten/src/ATen/core/SmallVector.h @@ -21,7 +21,7 @@ #pragma once #include -#include +#include #include #include diff --git a/aten/src/ATen/core/TensorTypeId.cpp b/aten/src/ATen/core/TensorTypeId.cpp new file mode 100644 index 00000000000000..605d303ad62ee3 --- /dev/null +++ b/aten/src/ATen/core/TensorTypeId.cpp @@ -0,0 +1,5 @@ +#include "ATen/core/TensorTypeId.h" + +std::ostream& operator<<(std::ostream& str, at::TensorTypeId rhs) { + return str << rhs.underlyingId(); +} diff --git a/aten/src/ATen/core/TensorTypeId.h b/aten/src/ATen/core/TensorTypeId.h new file mode 100644 index 00000000000000..5fc411137e08b4 --- /dev/null +++ b/aten/src/ATen/core/TensorTypeId.h @@ -0,0 +1,45 @@ +#pragma once + +#include +#include +#include +#include +#include "ATen/core/IdWrapper.h" + +namespace at { +class TensorTypeId; +} + +std::ostream& operator<<(std::ostream&, at::TensorTypeId); + +namespace at { + +namespace details { +using _tensorTypeId_underlyingType = uint8_t; +} + +/** + * Dynamic type ID of a Tensor argument. It represents something like + * CPUTensor, etc. + */ +class TensorTypeId final + : public at:: + IdWrapper { + public: + // Don't use this! + // Unfortunately, a default constructor needs to be defined because of + // https://reviews.llvm.org/D41223 + constexpr TensorTypeId() noexcept : IdWrapper(0) {} + + private: + constexpr explicit TensorTypeId( + details::_tensorTypeId_underlyingType id) noexcept + : IdWrapper(id) {} + + friend class TensorTypeIdCreator; + friend std::ostream& ::operator<<(std::ostream&, TensorTypeId); +}; + +} // namespace at + +AT_DEFINE_HASH_FOR_IDWRAPPER(at::TensorTypeId) diff --git a/aten/src/ATen/core/TensorTypeIdRegistration.cpp b/aten/src/ATen/core/TensorTypeIdRegistration.cpp new file mode 100644 index 00000000000000..af0b992e51c6ff --- /dev/null +++ b/aten/src/ATen/core/TensorTypeIdRegistration.cpp @@ -0,0 +1,62 @@ +#include +#include +#include + +namespace at { + +constexpr at::TensorTypeId TensorTypeIdCreator::max_id_; + +TensorTypeIds::TensorTypeIds() : creator_(), registry_() {} + +TensorTypeIds& TensorTypeIds::singleton() { + static TensorTypeIds singleton; + return singleton; +} + +TensorTypeIdCreator::TensorTypeIdCreator() : last_id_(0) {} + +at::TensorTypeId TensorTypeIdCreator::create() { + auto id = TensorTypeId(++last_id_); + + if (id == max_id_) { + // If this happens in prod, we have to change + // details::_tensorTypeId_underlyingType to uint16_t. + AT_ERROR( + "Tried to define more than ", + std::numeric_limits::max() - 1, + " tensor types, which is unsupported"); + } + + return id; +} + +TensorTypeIdRegistry::TensorTypeIdRegistry() : registeredTypeIds_(), mutex_() {} + +void TensorTypeIdRegistry::registerId(at::TensorTypeId id) { + std::lock_guard lock(mutex_); + registeredTypeIds_.emplace(id); +} + +void TensorTypeIdRegistry::deregisterId(at::TensorTypeId id) { + std::lock_guard lock(mutex_); + registeredTypeIds_.erase(id); +} + +at::TensorTypeId TensorTypeIds::createAndRegister() { + at::TensorTypeId id = creator_.create(); + registry_.registerId(id); + return id; +} + +void TensorTypeIds::deregister(at::TensorTypeId id) { + registry_.deregisterId(id); +} + +TensorTypeIdRegistrar::TensorTypeIdRegistrar() + : id_(TensorTypeIds::singleton().createAndRegister()) {} + +TensorTypeIdRegistrar::~TensorTypeIdRegistrar() { + TensorTypeIds::singleton().deregister(id_); +} + +} // namespace at diff --git a/aten/src/ATen/core/TensorTypeIdRegistration.h b/aten/src/ATen/core/TensorTypeIdRegistration.h new file mode 100644 index 00000000000000..a890c7990c4a41 --- /dev/null +++ b/aten/src/ATen/core/TensorTypeIdRegistration.h @@ -0,0 +1,99 @@ +#pragma once + +/** + * To register your own tensor types, do in a header file: + * AT_DECLARE_TENSOR_TYPE(MY_TENSOR) + * and in one (!) cpp file: + * AT_DEFINE_TENSOR_TYPE(MY_TENSOR) + * Both must be in the same namespace. + */ + +#include "ATen/core/Macros.h" +#include "ATen/core/TensorTypeId.h" + +#include +#include + +namespace at { + +class TensorTypeIdCreator final { + public: + TensorTypeIdCreator(); + + at::TensorTypeId create(); + + static constexpr at::TensorTypeId undefined() noexcept { + return TensorTypeId(0); + } + + private: + std::atomic last_id_; + + static constexpr at::TensorTypeId max_id_ = TensorTypeId( + std::numeric_limits::max()); + + DISABLE_COPY_AND_ASSIGN(TensorTypeIdCreator); +}; + +class TensorTypeIdRegistry final { + public: + TensorTypeIdRegistry(); + + void registerId(at::TensorTypeId id); + void deregisterId(at::TensorTypeId id); + + private: + std::unordered_set registeredTypeIds_; + std::mutex mutex_; + + DISABLE_COPY_AND_ASSIGN(TensorTypeIdRegistry); +}; + +class TensorTypeIds final { + public: + static TensorTypeIds& singleton(); + + at::TensorTypeId createAndRegister(); + void deregister(at::TensorTypeId id); + + static constexpr at::TensorTypeId undefined() noexcept; + + private: + TensorTypeIds(); + + TensorTypeIdCreator creator_; + TensorTypeIdRegistry registry_; + + DISABLE_COPY_AND_ASSIGN(TensorTypeIds); +}; + +inline constexpr at::TensorTypeId TensorTypeIds::undefined() noexcept { + return TensorTypeIdCreator::undefined(); +} + +class TensorTypeIdRegistrar final { + public: + TensorTypeIdRegistrar(); + ~TensorTypeIdRegistrar(); + + at::TensorTypeId id() const noexcept; + + private: + at::TensorTypeId id_; + + DISABLE_COPY_AND_ASSIGN(TensorTypeIdRegistrar); +}; + +inline at::TensorTypeId TensorTypeIdRegistrar::id() const noexcept { + return id_; +} + +} // namespace at + +#define AT_DECLARE_TENSOR_TYPE(TensorName) at::TensorTypeId TensorName(); + +#define AT_DEFINE_TENSOR_TYPE(TensorName) \ + at::TensorTypeId TensorName() { \ + static TensorTypeIdRegistrar registration_raii; \ + return registration_raii.id(); \ + } diff --git a/aten/src/ATen/core/UniqueVoidPtr.h b/aten/src/ATen/core/UniqueVoidPtr.h index 299c729e125a58..405d286308e087 100644 --- a/aten/src/ATen/core/UniqueVoidPtr.h +++ b/aten/src/ATen/core/UniqueVoidPtr.h @@ -1,6 +1,6 @@ #include -#include +#include namespace at { diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt index 1e977f0f493f14..123244d220665f 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -223,7 +223,13 @@ target_include_directories(caffe2 PRIVATE ${Caffe2_CPU_INCLUDE}) target_include_directories(caffe2 SYSTEM PRIVATE "${Caffe2_DEPENDENCY_INCLUDE}") # Set standard properties on the target aten_set_target_props(caffe2) + +if (MSVC) target_compile_options(caffe2 INTERFACE "-std=c++11") +else() +target_compile_options(caffe2 INTERFACE "$<$:-std=c++11>") +endif() + target_compile_options(caffe2 PRIVATE "-DCAFFE2_BUILD_MAIN_LIB") if (MSVC AND NOT BUILD_SHARED_LIBS) # Note [Supporting both static and dynamic libraries on Window] diff --git a/caffe2/core/common.h b/caffe2/core/common.h index 32b06b649be258..7d002028b14f36 100644 --- a/caffe2/core/common.h +++ b/caffe2/core/common.h @@ -26,6 +26,8 @@ // is automatically generated by the cmake script during build. #include "caffe2/core/macros.h" +#include "ATen/core/Macros.h" + namespace caffe2 { // Data type for caffe2 Index/Size. We use size_t to be safe here as well as for @@ -62,14 +64,6 @@ using std::vector; #define CAFFE2_USED __attribute__((__used__)) #endif //_MSC_VER -// Disable the copy and assignment operator for a class. Note that this will -// disable the usage of the class in std containers. -#ifndef DISABLE_COPY_AND_ASSIGN -#define DISABLE_COPY_AND_ASSIGN(classname) \ - classname(const classname&) = delete; \ - classname& operator=(const classname&) = delete -#endif - // Define enabled when building for iOS or Android devices #if !defined(CAFFE2_MOBILE) #if defined(__ANDROID__) diff --git a/caffe2/core/dispatch/CMakeLists.txt b/caffe2/core/dispatch/CMakeLists.txt index c028bfa2b93070..da3177cb8cc0b0 100644 --- a/caffe2/core/dispatch/CMakeLists.txt +++ b/caffe2/core/dispatch/CMakeLists.txt @@ -8,8 +8,6 @@ set(LIB_SOURCES LeftRight.cpp OpSchema.cpp OpSchemaRegistration.cpp - TensorTypeId.cpp - TensorTypeIdRegistration.cpp ) set(TEST_SOURCES diff --git a/caffe2/core/dispatch/TensorTypeId.cpp b/caffe2/core/dispatch/TensorTypeId.cpp deleted file mode 100644 index fe1ad1b7c88a95..00000000000000 --- a/caffe2/core/dispatch/TensorTypeId.cpp +++ /dev/null @@ -1,5 +0,0 @@ -#include "caffe2/core/dispatch/TensorTypeId.h" - -std::ostream& operator<<(std::ostream& str, c10::TensorTypeId rhs) { - return str << rhs.underlyingId(); -} diff --git a/caffe2/core/dispatch/TensorTypeId.h b/caffe2/core/dispatch/TensorTypeId.h deleted file mode 100644 index 244817904667b9..00000000000000 --- a/caffe2/core/dispatch/TensorTypeId.h +++ /dev/null @@ -1,38 +0,0 @@ -#pragma once - -#include "ATen/core/IdWrapper.h" -#include -#include -#include -#include - -namespace c10 { -class TensorTypeId; -} - -std::ostream& operator<<(std::ostream&, c10::TensorTypeId); - -namespace c10 { - -namespace details { - using _tensorTypeId_underlyingType = uint8_t; -} - -/** - * Dynamic type ID of a Tensor argument. It represents something like CPUTensor, etc. - */ -class TensorTypeId final : public at::IdWrapper { -public: - // Don't use this! - // Unfortunately, a default constructor needs to be defined because of https://reviews.llvm.org/D41223 - constexpr TensorTypeId() noexcept: IdWrapper(0) {} -private: - constexpr explicit TensorTypeId(details::_tensorTypeId_underlyingType id) noexcept: IdWrapper(id) {} - - friend class TensorTypeIdCreator; - friend std::ostream& ::operator<<(std::ostream&, TensorTypeId); -}; - -} // namespace c10 - -AT_DEFINE_HASH_FOR_IDWRAPPER(c10::TensorTypeId) diff --git a/caffe2/core/dispatch/TensorTypeIdRegistration.cpp b/caffe2/core/dispatch/TensorTypeIdRegistration.cpp deleted file mode 100644 index 31b4c6b671aa29..00000000000000 --- a/caffe2/core/dispatch/TensorTypeIdRegistration.cpp +++ /dev/null @@ -1,61 +0,0 @@ -#include "caffe2/core/dispatch/TensorTypeIdRegistration.h" -#include - -namespace c10 { - -constexpr TensorTypeId TensorTypeIdCreator::max_id_; - -TensorTypeIds::TensorTypeIds() -: creator_(), registry_() {} - -TensorTypeIds& TensorTypeIds::singleton() { - static TensorTypeIds singleton; - return singleton; -} - -TensorTypeIdCreator::TensorTypeIdCreator() -: last_id_(0) {} - -TensorTypeId TensorTypeIdCreator::create() { - auto id = TensorTypeId(++last_id_); - - if (id == max_id_) { - // If this happens in prod, we have to change details::_tensorTypeId_underlyingType to uint16_t. - throw std::logic_error("Tried to define more than " + c10::guts::to_string(std::numeric_limits::max()-1) + " tensor types, which is unsupported"); - } - - return id; -} - -TensorTypeIdRegistry::TensorTypeIdRegistry() -: registeredTypeIds_(), mutex_() {} - -void TensorTypeIdRegistry::registerId(TensorTypeId id) { - std::lock_guard lock(mutex_); - registeredTypeIds_.emplace(id); -} - -void TensorTypeIdRegistry::deregisterId(TensorTypeId id) { - std::lock_guard lock(mutex_); - registeredTypeIds_.erase(id); -} - -TensorTypeId TensorTypeIds::createAndRegister() { - TensorTypeId id = creator_.create(); - registry_.registerId(id); - return id; -} - -void TensorTypeIds::deregister(TensorTypeId id) { - registry_.deregisterId(id); -} - -TensorTypeIdRegistrar::TensorTypeIdRegistrar() -: id_(TensorTypeIds::singleton().createAndRegister()) { -} - -TensorTypeIdRegistrar::~TensorTypeIdRegistrar() { - TensorTypeIds::singleton().deregister(id_); -} - -} // namespace c10 diff --git a/caffe2/core/dispatch/TensorTypeIdRegistration.h b/caffe2/core/dispatch/TensorTypeIdRegistration.h deleted file mode 100644 index a7af6337acb2a5..00000000000000 --- a/caffe2/core/dispatch/TensorTypeIdRegistration.h +++ /dev/null @@ -1,98 +0,0 @@ -#pragma once - -/** - * To register your own tensor types, do in a header file: - * C10_DECLARE_TENSOR_TYPE(MY_TENSOR) - * and in one (!) cpp file: - * C10_DEFINE_TENSOR_TYPE(MY_TENSOR) - * Both must be in the same namespace. - */ - -#include "caffe2/core/dispatch/TensorTypeId.h" -#include "caffe2/core/common.h" -#include -#include "caffe2/utils/flat_hash_map/flat_hash_map.h" - -namespace c10 { - -class TensorTypeIdCreator final { -public: - TensorTypeIdCreator(); - - TensorTypeId create(); - - static constexpr TensorTypeId undefined() noexcept { - return TensorTypeId(0); - } - -private: - std::atomic last_id_; - - static constexpr TensorTypeId max_id_ = TensorTypeId(std::numeric_limits::max()); - - DISABLE_COPY_AND_ASSIGN(TensorTypeIdCreator); -}; - -class TensorTypeIdRegistry final { -public: - TensorTypeIdRegistry(); - - void registerId(TensorTypeId id); - void deregisterId(TensorTypeId id); - -private: - ska::flat_hash_set registeredTypeIds_; - std::mutex mutex_; - - DISABLE_COPY_AND_ASSIGN(TensorTypeIdRegistry); -}; - -class TensorTypeIds final { -public: - static TensorTypeIds& singleton(); - - TensorTypeId createAndRegister(); - void deregister(TensorTypeId id); - - static constexpr TensorTypeId undefined() noexcept; - -private: - TensorTypeIds(); - - TensorTypeIdCreator creator_; - TensorTypeIdRegistry registry_; - - DISABLE_COPY_AND_ASSIGN(TensorTypeIds); -}; - -inline constexpr TensorTypeId TensorTypeIds::undefined() noexcept { - return TensorTypeIdCreator::undefined(); -} - -class TensorTypeIdRegistrar final { -public: - TensorTypeIdRegistrar(); - ~TensorTypeIdRegistrar(); - - TensorTypeId id() const noexcept; - -private: - TensorTypeId id_; - - DISABLE_COPY_AND_ASSIGN(TensorTypeIdRegistrar); -}; - -inline TensorTypeId TensorTypeIdRegistrar::id() const noexcept { - return id_; -} - -} // namespace c10 - -#define C10_DECLARE_TENSOR_TYPE(TensorName) \ - TensorTypeId TensorName(); \ - -#define C10_DEFINE_TENSOR_TYPE(TensorName) \ - TensorTypeId TensorName() { \ - static TensorTypeIdRegistrar registration_raii; \ - return registration_raii.id(); \ - } diff --git a/cmake/public/threads.cmake b/cmake/public/threads.cmake index 44c3f0ed9dc46e..f223f497c76f43 100644 --- a/cmake/public/threads.cmake +++ b/cmake/public/threads.cmake @@ -5,14 +5,12 @@ if(THREADS_FOUND AND NOT TARGET Threads::Threads) add_library(Threads::Threads INTERFACE IMPORTED) if(THREADS_HAVE_PTHREAD_ARG) - set_property( - TARGET Threads::Threads - PROPERTY INTERFACE_COMPILE_OPTIONS "-pthread") + set_property(TARGET Threads::Threads + PROPERTY INTERFACE_COMPILE_OPTIONS "-pthread") endif() if(CMAKE_THREAD_LIBS_INIT) - set_property( - TARGET Threads::Threads - PROPERTY INTERFACE_LINK_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}") + set_property(TARGET Threads::Threads + PROPERTY INTERFACE_LINK_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}") endif() -endif() \ No newline at end of file +endif() diff --git a/test/test_jit.py b/test/test_jit.py index 4e67cf69b30aab..08e80933b1ea24 100644 --- a/test/test_jit.py +++ b/test/test_jit.py @@ -107,6 +107,20 @@ def get_fn(file_name, script_path): return fn +# Python equivalents for the empty list construction builtins. We need +# these otherwise the tests won't execute in regular Python mode. +def _construct_empty_int_list(): + return [] + + +def _construct_empty_float_list(): + return [] + + +def _construct_empty_tensor_list(): + return [] + + class JitTestCase(TestCase): _do_cuda_memory_leak_check = True @@ -1816,6 +1830,26 @@ def capture_stdout(self): os.close(r) os.close(w) + def checkScriptRaisesRegex(self, script, inputs, exception, regex, + optimize=True, outputs=None, capture_output=False): + """ + Checks that a given function will throw the correct exception, + when executed with normal python, the string frontend, and the AST frontend + """ + # normal python + with self.assertRaisesRegex(exception, regex): + script(*inputs) + # string frontend + with self.assertRaisesRegex(exception, regex): + source = textwrap.dedent(inspect.getsource(script)) + cu = torch.jit.CompilationUnit(source, optimize) + ge = getattr(cu, script.__name__) + ge(*inputs) + # python AST frontend + with self.assertRaisesRegex(exception, regex): + ge = torch.jit.script(script, optimize) + ge(*inputs) + def checkScript(self, script, inputs, optimize=True, outputs=None, name='func', capture_output=False, frames_up=1): if isinstance(script, str): cu = torch.jit.CompilationUnit(script, optimize, _frames_up=frames_up) @@ -1948,6 +1982,11 @@ def func(x): x = torch.rand(10, dtype=torch.float, requires_grad=True) self.checkScript(func, [x], optimize=True) + def func2(x): + return x[5:] + + self.checkScript(func2, [x], optimize=True) + def test_gather(self): def func(x): return x[0] @@ -2059,17 +2098,6 @@ def foo3(x): canonical(foo3.graph)) def test_list_literal(self): - # Python equivalents for the empty list construction builtins. We need - # these otherwise the tests won't execute in regular Python mode. - def _construct_empty_int_list(): - return [] - - def _construct_empty_float_list(): - return [] - - def _construct_empty_tensor_list(): - return [] - def reassign(): x = [1] if True: @@ -2123,6 +2151,100 @@ def reassign_nested(): with self.assertRaisesRegex(RuntimeError, "previously has type"): self.checkScript(reassign_nested, (), optimize=True) + def test_list_gather(self): + def index(): + a = [1, 2, 3] + return a[1] + + self.checkScript(index, ()) + + def negative_index(): + a = [1, 2, 3] + return a[-1] + + self.checkScript(negative_index, ()) + + def bad_index(): + a = [1, 2, 3] + return a[4] + + self.checkScriptRaisesRegex(bad_index, (), IndexError, + "list index out of range") + + def bad_negative_index(): + a = [1, 2, 3] + return a[-5] + + self.checkScriptRaisesRegex(bad_negative_index, (), IndexError, + "list index out of range") + + def test_list_len(self): + def func(): + a = [1, 2, 3] + return len(a) == 3 + + self.checkScript(func, ()) + + def func2(): + a = _construct_empty_tensor_list() + return len(a) == 0 + + self.checkScript(func2, ()) + + def test_list_ops(self): + def test_equality(): + a = [1, 2, 3] + b = [1, 2, 3] + return a == b + + self.checkScript(test_equality, (), optimize=True) + + def test_non_equality(): + a = [1, 2, 3] + b = [3] + return a == b + + self.checkScript(test_non_equality, (), optimize=True) + + def test_list_add(): + a = [1, 2, 3] + b = [2] + c = a + b + return c == [1, 2, 3, 2] + + self.checkScript(test_list_add, (), optimize=True) + + def test_list_add_empty(): + a = [1, 2, 3] + b = _construct_empty_int_list() + c = a + b + return c == [1, 2, 3] + + self.checkScript(test_list_add_empty, (), optimize=True) + + def test_tensor_list_equality(): + t1 = torch.ones([1, 1]) + t2 = torch.ones([1, 1]) + x = [t1, t2] + y = [t2, t1] + return x == y + + self.checkScript(test_tensor_list_equality, (), optimize=True) + + def test_invalid_list_equality(): + t1 = torch.ones([2, 2]) + t2 = torch.ones([2, 2]) + x = [t1, t2] + y = [t2, t1] + # will throw since the tensors have more than one element + return x == y + + self.checkScriptRaisesRegex( + test_invalid_list_equality, + (), + RuntimeError, + "bool value of Tensor") + def test_func_call(self): script = ''' def add(a, b): diff --git a/tools/cpp_build/build_caffe2.sh b/tools/cpp_build/build_caffe2.sh new file mode 100755 index 00000000000000..6a50c14e05523e --- /dev/null +++ b/tools/cpp_build/build_caffe2.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +set -ex + +SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" + +pushd $SCRIPTPATH +source ./build_common.sh + +echo "Building Caffe2" + +mkdir -p $CAFFE2_BUILDPATH +pushd $CAFFE2_BUILDPATH + +cmake -DUSE_CUDA:BOOL=$USE_CUDA \ + -DBUILD_TORCH=ON \ + -DUSE_OPENMP:BOOL=${USE_OPENMP:ON} \ + -DBUILD_CAFFE2=OFF \ + -DBUILD_ATEN=ON \ + -DBUILD_PYTHON=OFF \ + -DBUILD_BINARY=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DONNX_NAMESPACE=$ONNX_NAMESPACE \ + -DCMAKE_BUILD_TYPE:STRING=$BUILD_TYPE \ + -DCMAKE_INSTALL_PREFIX:STRING=$INSTALL_PREFIX \ + -DCMAKE_INSTALL_MESSAGE=NEVER \ + -DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=ON \ + -G "$GENERATE" \ + $PYTORCHPATH/ +$MAKE -j "$JOBS" install + +popd +popd diff --git a/tools/cpp_build/build_common.sh b/tools/cpp_build/build_common.sh new file mode 100755 index 00000000000000..be9ac2b271743d --- /dev/null +++ b/tools/cpp_build/build_common.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash + +BUILD_PATH="${1:-$SCRIPTPATH/build}" +INSTALL_PREFIX="$BUILD_PATH/install" +PYTORCHPATH="$SCRIPTPATH/../.." + +USE_CUDA=0 +if [ -x "$(command -v nvcc)" ]; then + USE_CUDA=1 +fi + +CAFFE2_BUILDPATH="$BUILD_PATH/caffe2" +NANOPB_BUILDPATH="$BUILD_PATH/nanopb" + +# Build with Ninja if available. It has much cleaner output. +GENERATE="Unix Makefiles" +MAKE=make +if [ -x "$(command -v ninja)" ]; then + GENERATE=Ninja + MAKE=ninja +fi + +# Code is developed a lot more than released, so default to Debug. +BUILD_TYPE=${BUILD_TYPE:-Debug} + +# Try to build with as many threads as we have cores, default to 4 if the +# command fails. +set +e +if [ -n "$MAX_JOBS" ]; then # Use MAX_JOBS if it is set + JOBS=$MAX_JOBS +elif [[ "$(uname)" == "Linux" ]]; then + # https://stackoverflow.com/questions/6481005/how-to-obtain-the-number-of-cpus-cores-in-linux-from-the-command-line + JOBS="$(grep -c '^processor' /proc/cpuinfo)" +else # if [[ "$(uname)" == "Darwin"]] + # https://stackoverflow.com/questions/1715580/how-to-discover-number-of-logical-cores-on-mac-os-x + JOBS="$(sysctl -n hw.ncpu)" +fi +set -e +if [[ $? -ne 0 ]]; then + JOBS=4 +fi + +# Make sure an ONNX namespace is set +if [ -z "$ONNX_NAMESPACE" ]; then + ONNX_NAMESPACE="onnx_torch" +fi diff --git a/tools/cpp_build/build_libtorch.sh b/tools/cpp_build/build_libtorch.sh new file mode 100755 index 00000000000000..6dd9a589cf1074 --- /dev/null +++ b/tools/cpp_build/build_libtorch.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +set -ex + +SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" + +pushd $SCRIPTPATH +source ./build_common.sh + +echo "Building Torch" + +mkdir -p $LIBTORCH_BUILDPATH +pushd $LIBTORCH_BUILDPATH + +cmake -DUSE_CUDA:BOOL=$USE_CUDA \ + -DNO_API:BOOL=${NO_API:-0} \ + -DCAFFE2_PATH=$PYTORCHPATH/ \ + -DCAFFE2_BUILD_PATH=$CAFFE2_BUILDPATH \ + -DONNX_NAMESPACE=$ONNX_NAMESPACE \ + -DNANOPB_BUILD_PATH=$NANOPB_BUILDPATH \ + -DINSTALL_PREFIX=$INSTALL_PREFIX \ + -DCMAKE_BUILD_TYPE:STRING=$BUILD_TYPE \ + -DCMAKE_INSTALL_PREFIX:STRING=$INSTALL_PREFIX \ + -DCMAKE_INSTALL_MESSAGE=NEVER \ + -Dnanopb_BUILD_GENERATOR:BOOL=OFF \ + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON \ + -DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=ON \ + -DVERBOSE:BOOL=${VERBOSE:-0} \ + -G "$GENERATE" \ + $PYTORCHPATH/torch +$MAKE -j "$JOBS" + +popd +popd diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt index fda98bbd24c14f..058712f6b33db0 100644 --- a/torch/CMakeLists.txt +++ b/torch/CMakeLists.txt @@ -389,7 +389,6 @@ if (BUILD_TORCH_TEST AND NOT MSVC AND NOT APPLE AND NOT USE_ROCM) if (USE_CUDA) target_link_libraries(test_jit ${CUDA_LIBRARIES}) endif() - endif() if (BUILD_TORCH_TEST AND NOT NO_API AND NOT USE_ROCM) @@ -438,5 +437,4 @@ if (BUILD_TORCH_TEST AND NOT NO_API AND NOT USE_ROCM) -Wno-unused-but-set-parameter) endif() endif() - endif() diff --git a/torch/csrc/jit/ivalue.h b/torch/csrc/jit/ivalue.h index 3362d36f11b6c5..9d371ae95a2d92 100644 --- a/torch/csrc/jit/ivalue.h +++ b/torch/csrc/jit/ivalue.h @@ -432,6 +432,7 @@ DEFINE_TO(double, toDouble) DEFINE_TO(int64_t, toInt) DEFINE_TO(Shared, toDoubleList) DEFINE_TO(Shared, toIntList) +DEFINE_TO(Shared, toTensorList) DEFINE_TO(Shared, toString) DEFINE_TO(at::Scalar, toScalar) DEFINE_TO(bool, toInt) diff --git a/torch/csrc/jit/register_prim_ops.cpp b/torch/csrc/jit/register_prim_ops.cpp index 59b91cce49067d..6054811a7e3c6d 100644 --- a/torch/csrc/jit/register_prim_ops.cpp +++ b/torch/csrc/jit/register_prim_ops.cpp @@ -310,7 +310,127 @@ RegisterOperators reg({ }; \ }), +template +Operation listSelect(Node* node) { + return [=](Stack& stack) { + T list; + int64_t idx; + pop(stack, list, idx); + const int64_t list_size = list->elements().size(); + if (idx >= list_size) { + throw std::out_of_range("list index out of range"); + } + + if (idx < 0) { + // Handle negative indexing + idx = list_size + idx; + if (idx < 0) { + throw std::out_of_range("list index out of range"); + } + } + + auto element = list->elements().at(idx); + push(stack, std::move(element)); + return 0; + }; +} + +template +Operation listLen(Node* node) { + return [=](Stack& stack) { + T a; + pop(stack, a); + const int64_t size = a->elements().size(); + push(stack, size); + return 0; + }; +} + +template +Operation listEq(Node* node) { + return [=](Stack& stack) { + T a; + T b; + pop(stack, a, b); + if (a->elements() == b->elements()) { + push(stack, 1); + } else { + push(stack, 0); + } + return 0; + }; +} + +// Specialization for at::Tensor, since it doesn't define operator== +template <> +Operation listEq>(Node* node) { + return [=](Stack& stack) { + Shared a; + Shared b; + pop(stack, a, b); + if (a->elements().size() != b->elements().size()) { + push(stack, 0); + return 0; + } + + for (size_t i = 0; i < a->elements().size(); ++i) { + const auto& a_element = a->elements()[i]; + const auto& b_element = b->elements()[i]; + // This preserves Python's semantics, which uses eq() to compare two + // elements, then passes the result to bool(). + // see: https://docs.python.org/3.4/reference/datamodel.html#object.__ge__ + const auto cmp_result = a_element.eq(b_element); + if (!cmp_result.is_nonzero()) { + push(stack, 0); + return 0; + } + } + + push(stack, 1); + return 0; + }; +} + +template +Operation listAdd(Node* node) { + return [=](Stack& stack) { + TList a; + TList b; + pop(stack, a, b); + + std::vector ret; + const auto total_size = a->elements().size() + b->elements().size(); + ret.reserve(total_size); + for (const auto& a_element : a->elements()) { + ret.push_back(a_element); + } + for (const auto& b_element : b->elements()) { + ret.push_back(b_element); + } + + push(stack, ret); + return 0; + }; +} + RegisterOperators reg2({ + Operator("aten::select(int[] a, int b) -> int", listSelect>), + Operator("aten::select(float[] a, int b) -> float", listSelect>), + Operator("aten::select(Tensor[] a, int b) -> Tensor", listSelect>), + + Operator("aten::len(int[] a) -> int", listLen>), + Operator("aten::len(float[] a) -> int", listLen>), + Operator("aten::len(Tensor[] a) -> int", listLen>), + + Operator("aten::eq(int[] a, int[] b) -> int", listEq>), + Operator("aten::eq(float[] a, float[] b) -> int", listEq>), + Operator("aten::eq(Tensor[] a, Tensor[] b) -> int", listEq>), + + Operator("aten::add(int[] a, int[] b) -> int[]", listAdd, int64_t>), + Operator("aten::add(float[] a, float[] b) -> float[]", listAdd, double>), + Operator("aten::add(Tensor[] a, Tensor[] b) -> Tensor[]", listAdd, at::Tensor>), + + DEFINE_BINARY_OP(aten::add, a + b) DEFINE_BINARY_OP(aten::sub, a - b) DEFINE_BINARY_OP(aten::mul, a * b) diff --git a/torch/csrc/jit/script/compiler.cpp b/torch/csrc/jit/script/compiler.cpp index 73808fcb5863d1..b34524ae6b644b 100644 --- a/torch/csrc/jit/script/compiler.cpp +++ b/torch/csrc/jit/script/compiler.cpp @@ -1359,10 +1359,7 @@ struct to_ir { return emitNone(tree->range()); } break; case TK_SLICE: { - const auto slice = Slice(tree); - return emitSlice( - slice.range(), - {slice.value(), slice.startOr(0), slice.endOr(-1)}); + return emitSlice(Slice(tree)); } break; case TK_GATHER: { const auto gather = Gather(tree); @@ -1380,7 +1377,8 @@ struct to_ir { auto values = getValues(ll.inputs(), /*maybe_unpack=*/true, identity); if (values.size() == 0) { throw ErrorReport(tree) << "Empty list literals not allowed. " - << "Use _constructEmptyFooList() instead"; + << "Use _construct_empty_foo_list() instead. " + << "`foo` can be `int`, `float` or `tensor`"; } const auto elem_type = values.at(0)->type(); for (auto v : values) { @@ -1424,28 +1422,33 @@ struct to_ir { // Desugars slice syntactic sugar tensor[begin:end] -> tensor.slice(begin, // end). - Value* emitSlice( - const SourceRange& loc, - TreeList&& inputs) { - const auto applyInputs = - Compound::create(TK_LIST, loc, std::move(inputs)); - const auto input_values = getNamedValues(applyInputs->trees(), - /*maybe_unpack*/false, - identity); + Value* emitSlice(const Slice& slice) { + const auto& loc = slice.range(); + TreeList inputs = {slice.value(), slice.startOr(0)}; + const auto applyInputs = Compound::create(TK_LIST, loc, std::move(inputs)); + const auto input_values = getNamedValues( + applyInputs->trees(), + /*maybe_unpack*/ false, + identity); + NamedValue tensor = input_values[0]; NamedValue begin = input_values[1]; - NamedValue end = input_values[2]; - NamedValue dim = NamedValue(loc, "dim", - graph->insertConstant(0, loc)); - NamedValue step = NamedValue(loc, "step", - graph->insertConstant(1, loc)); - - return emitBuiltinCall( - loc, method, "slice", {tensor, dim, begin, end, step}, {}, true) + NamedValue dim = NamedValue(loc, "dim", graph->insertConstant(0, loc)); + NamedValue step = NamedValue(loc, "step", graph->insertConstant(1, loc)); + + std::vector args = {tensor, dim, begin}; + const auto has_end = slice.end().present(); + if (has_end) { + // If the user specified an `end` index, pass it down + args.emplace_back(loc, "end", emitExpr(Expr(slice.end().get()), identity)); + } + + // Otherwise rely on the schema default argument + return emitBuiltinCall(loc, method, "slice", args, {step}, true) ->asValue(loc, method); } - // Desugars gather syntactic sugar tensor[idx] -> tensor.select(idx). + // Desugars gather syntactic sugar foo[i] Value* emitGather( const SourceRange& loc, TreeList&& inputs) { @@ -1454,15 +1457,21 @@ struct to_ir { auto input_values = getNamedValues(applyInputs->trees(), /*maybe_unpack*/false, identity); - NamedValue tensor = input_values[0]; - NamedValue dim = NamedValue( - loc, - "dim", - graph->insertConstant(0, loc)); + NamedValue gatherable = input_values[0]; NamedValue idx = input_values[1]; - - return emitBuiltinCall(loc, method, "select", {tensor, dim, idx}, {}, true) - ->asValue(loc, method); + if (gatherable.value->type()->kind() == TypeKind::ListType) { + // if it's a list, emit a regular index selection op + return emitBuiltinCall( + loc, method, "select", {gatherable, idx}, {}, true) + ->asValue(loc, method); + + } else { + // if it's a single tensor, map tensor[idx] -> tensor.select(0, idx) + NamedValue dim = NamedValue(loc, "dim", graph->insertConstant(0, loc)); + return emitBuiltinCall( + loc, method, "select", {gatherable, dim, idx}, {}, true) + ->asValue(loc, method); + } } };