Skip to content

Commit 1e8e84b

Browse files
authored
Merge pull request ROCm#149 from iotamudelta/ifu
Merge from upstream
2 parents 4c79e0b + 231f80a commit 1e8e84b

File tree

423 files changed

+9241
-13995
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

423 files changed

+9241
-13995
lines changed

.gitmodules

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
[submodule "third_party/catch"]
22
path = third_party/catch
33
url = https://github.com/catchorg/Catch2.git
4-
[submodule "third_party/nanopb"]
5-
path = third_party/nanopb
6-
url = https://github.com/nanopb/nanopb.git
74
[submodule "third_party/pybind11"]
85
path = third_party/pybind11
96
url = https://github.com/pybind/pybind11.git

.jenkins/caffe2/build.sh

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,10 +107,6 @@ elif [[ "${BUILD_ENVIRONMENT}" == conda* ]]; then
107107
PROTOBUF_INCDIR=/opt/conda/include pip install -b /tmp/pip_install_onnx "file://${ROOT_DIR}/third_party/onnx#egg=onnx"
108108
report_compile_cache_stats
109109
exit 0
110-
elif [[ $BUILD_ENVIRONMENT == *setup* ]]; then
111-
rm -rf $INSTALL_PREFIX && mkdir $INSTALL_PREFIX
112-
PYTHONPATH=$INSTALL_PREFIX $PYTHON setup_caffe2.py develop --install-dir $INSTALL_PREFIX
113-
exit 0
114110
fi
115111
116112
@@ -156,6 +152,11 @@ if [[ $BUILD_ENVIRONMENT == *rocm* ]]; then
156152
export LC_ALL=C.UTF-8
157153
export HCC_AMDGPU_TARGET=gfx900
158154
155+
# The link time of libcaffe2_hip.so takes 40 minutes, according to
156+
# https://github.com/RadeonOpenCompute/hcc#thinlto-phase-1---implemented
157+
# using using ThinLTO could significantly improve link-time performance.
158+
export KMTHINLTO=1
159+
159160
########## HIPIFY Caffe2 operators
160161
${PYTHON} "${ROOT_DIR}/tools/amd_build/build_pytorch_amd.py"
161162
${PYTHON} "${ROOT_DIR}/tools/amd_build/build_caffe2_amd.py"

CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,6 @@ cmake_dependent_option(
140140
USE_MKLML "Use MKLML interface in MKL BLAS" ON
141141
"BUILD_CAFFE2" OFF)
142142
option(USE_DISTRIBUTED "Use THD (distributed)" OFF)
143-
option(USE_DISTRIBUTED_MW "Use THD (distributed) master worker" OFF)
144143

145144
# Used when building Caffe2 through setup.py
146145
option(BUILDING_WITH_TORCH_LIBS "Tell cmake if Caffe2 is being built alongside torch libs" OFF)

CONTRIBUTING.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,8 @@ information for the code in `torch/csrc`. More information at:
182182
Python `setuptools` is pretty dumb, and always rebuilds every C file in a
183183
project. If you install the ninja build system with `pip install ninja`,
184184
then PyTorch will use it to track dependencies correctly.
185+
If pytorch was already built, you will need to run `python setup.py clean` once
186+
after installing ninja for builds to succeed.
185187

186188
#### Use CCache
187189

aten/doc/Functions.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,7 @@ static inline Tensor & _standard_gamma_out(Tensor & output, const Tensor & self,
331331
static inline Tensor _standard_gamma(const Tensor & self, Generator * generator=nullptr);
332332
static inline Tensor & _dirichlet_grad_out(Tensor & output, const Tensor & x, const Tensor & alpha, const Tensor & total);
333333
static inline Tensor _dirichlet_grad(const Tensor & x, const Tensor & alpha, const Tensor & total);
334+
static inline Tensor sparse_coo_tensor(const Type& dtype, IntList size);
334335
static inline Tensor sparse_coo_tensor(const Tensor & indices, const Tensor & values, IntList size);
335336
static inline Tensor sparse_coo_tensor(const Tensor & indices, const Tensor & values);
336337
static inline Tensor alias(const Tensor & self);
@@ -1764,6 +1765,9 @@ static inline Tensor & _dirichlet_grad_out(Tensor & output, const Tensor & x, co
17641765
static inline Tensor _dirichlet_grad(const Tensor & x, const Tensor & alpha, const Tensor & total) {
17651766
return infer_type(x)._dirichlet_grad(x, alpha, total);
17661767
}
1768+
static inline Tensor sparse_coo_tensor(const Type& dtype, IntList size) {
1769+
return dtype.sparse_coo_tensor(dtype, size);
1770+
}
17671771
static inline Tensor sparse_coo_tensor(const Tensor & indices, const Tensor & values, IntList size) {
17681772
return infer_type(values).sparse_coo_tensor(indices, values, size);
17691773
}

aten/doc/Tensor.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,8 @@ struct Tensor : public detail::TensorBase {
396396
Tensor & _copy_ignoring_overlaps_(const Tensor & src);
397397
Tensor as_strided(IntList size, IntList stride, int64_t storage_offset=-1) const;
398398
Tensor & as_strided_(IntList size, IntList stride, int64_t storage_offset=-1);
399-
Tensor & sparse_raw_resize_(IntList size, int64_t nDimI, int64_t nDimV);
399+
Tensor & sparse_resize_(IntList size, int64_t nDimI, int64_t nDimV);
400+
Tensor & sparse_resize_and_clear_(IntList size, int64_t nDimI, int64_t nDimV);
400401
Tensor & reshape_(IntList size, IntList stride);
401402
Tensor _sparse_mask(SparseTensor mask) const;
402403
Tensor to_dense() const;

aten/doc/Type.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -656,14 +656,16 @@ struct AT_API Type {
656656
virtual Tensor tensor(IntList size) const;
657657
virtual Tensor tensor(IntList size, IntList stride) const;
658658
virtual Tensor tensor() const;
659+
virtual Tensor sparse_coo_tensor(IntList size) const;
659660
virtual Tensor sparse_coo_tensor(const Tensor & indices, const Tensor & values, IntList size) const;
660661
virtual Tensor sparse_coo_tensor(const Tensor & indices, const Tensor & values) const;
661662
virtual Tensor alias(const Tensor & self) const;
662663
virtual Tensor & _copy_ignoring_overlaps_(Tensor & self, const Tensor & src) const;
663664
virtual Tensor & as_strided_out(Tensor & result, const Tensor & self, IntList size, IntList stride, int64_t storage_offset=-1) const;
664665
virtual Tensor as_strided(const Tensor & self, IntList size, IntList stride, int64_t storage_offset=-1) const;
665666
virtual Tensor & as_strided_(Tensor & self, IntList size, IntList stride, int64_t storage_offset=-1) const;
666-
virtual Tensor & sparse_raw_resize_(Tensor & self, IntList size, int64_t nDimI, int64_t nDimV) const;
667+
virtual Tensor & sparse_resize_(Tensor & self, IntList size, int64_t nDimI, int64_t nDimV) const;
668+
virtual Tensor & sparse_resize_and_clear_(Tensor & self, IntList size, int64_t nDimI, int64_t nDimV) const;
667669
virtual Tensor & _cat_out(Tensor & self, TensorList tensors, int64_t dim=0) const;
668670
virtual Tensor _cat(TensorList tensors, int64_t dim=0) const;
669671
virtual Tensor & reshape_(Tensor & self, IntList size, IntList stride) const;

aten/src/ATen/ATen.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#pragma once
22

3-
#include "ATen/ATenGeneral.h"
3+
#include "ATen/core/ATenGeneral.h"
44
#include "ATen/Allocator.h"
55
#include "ATen/CPUGeneral.h"
66
#include "ATen/CUDAGuard.h"
@@ -11,8 +11,8 @@
1111
#include "ATen/Dispatch.h"
1212
#include "ATen/Formatting.h"
1313
#include "ATen/Functions.h"
14-
#include "ATen/Generator.h"
15-
#include "ATen/Layout.h"
14+
#include "ATen/core/Generator.h"
15+
#include "ATen/core/Layout.h"
1616
#include "ATen/OptionsGuard.h"
1717
#include "ATen/Scalar.h"
1818
#include "ATen/Storage.h"

aten/src/ATen/ATenGeneral.cpp

Lines changed: 0 additions & 1 deletion
This file was deleted.

aten/src/ATen/Allocator.cpp

Lines changed: 0 additions & 14 deletions
This file was deleted.

aten/src/ATen/Allocator.h

Lines changed: 1 addition & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -1,103 +1,2 @@
11
#pragma once
2-
3-
#include <memory>
4-
#include <stddef.h>
5-
6-
#include <ATen/Device.h>
7-
#include <ATen/core/Error.h>
8-
#include <ATen/core/UniqueVoidPtr.h>
9-
10-
namespace at {
11-
12-
// A DataPtr is a unique pointer (with an attached deleter and some
13-
// context for the deleter) to some memory, which also records what
14-
// device is for its data.
15-
//
16-
// nullptr DataPtrs can still have a nontrivial device; this allows
17-
// us to treat zero-size allocations uniformly with non-zero allocations.
18-
//
19-
class DataPtr {
20-
private:
21-
detail::UniqueVoidPtr ptr_;
22-
Device device_;
23-
public:
24-
// Choice of CPU here is arbitrary; if there's an "undefined" device
25-
// we could use that too
26-
DataPtr() : ptr_(), device_(DeviceType::CPU) {}
27-
DataPtr(void* data, Device device)
28-
: ptr_(data), device_(device) {}
29-
DataPtr(void* data, void* ctx, DeleterFnPtr ctx_deleter, Device device)
30-
: ptr_(data, ctx, ctx_deleter), device_(device) {}
31-
void* operator->() const { return ptr_.get(); }
32-
void clear() {
33-
ptr_.clear();
34-
}
35-
void* get() const { return ptr_.get(); }
36-
void* get_context() const { return ptr_.get_context(); }
37-
void* release_context() { return ptr_.release_context(); }
38-
operator bool() const { return static_cast<bool>(ptr_); }
39-
template <typename T>
40-
T* cast_context(DeleterFnPtr expected_deleter) const {
41-
return ptr_.cast_context<T>(expected_deleter);
42-
}
43-
Device device() const { return device_; }
44-
};
45-
46-
// NB: Device is NOT tested for here; a CUDA nullptr is as much a nullptr as a
47-
// CPU nullptr
48-
49-
inline bool operator==(const at::DataPtr& dp, std::nullptr_t) noexcept { return !dp; }
50-
inline bool operator==(std::nullptr_t, const at::DataPtr& dp) noexcept { return !dp; }
51-
inline bool operator!=(const at::DataPtr& dp, std::nullptr_t) noexcept { return dp; }
52-
inline bool operator!=(std::nullptr_t, const at::DataPtr& dp) noexcept { return dp; }
53-
54-
// Note [raw_allocate/raw_deallocate and Thrust]
55-
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
56-
// Thrust's support for custom allocators requires us to write something
57-
// like this:
58-
//
59-
// class ThrustAllocator {
60-
// char* allocate(size_t);
61-
// void deallocate(char*, size_t);
62-
// };
63-
//
64-
// This is not good for our unique_ptr based allocator interface, as
65-
// there is no way to get to the context when we free.
66-
//
67-
// However, in some cases the context is exactly the same as
68-
// the data pointer. In this case, we can support the "raw"
69-
// allocate and deallocate interface. This is what
70-
// raw_deleter signifies. By default, it returns a nullptr, which means that
71-
// the raw interface is not implemented. Be sure to implement it whenever
72-
// possible, or the raw interface will incorrectly reported as unsupported,
73-
// when it is actually possible.
74-
75-
struct Allocator {
76-
virtual ~Allocator() {}
77-
virtual at::DataPtr allocate(size_t n) const = 0;
78-
79-
// If this returns a non nullptr, it means that allocate()
80-
// is guaranteed to return a unique_ptr with this deleter attached;
81-
// it means the rawAllocate and rawDeallocate APIs are safe to use.
82-
// This function MUST always return the same BoundDeleter.
83-
virtual DeleterFnPtr raw_deleter() const { return nullptr; }
84-
void* raw_allocate(size_t n) {
85-
auto dptr = allocate(n);
86-
AT_ASSERT(dptr.get() == dptr.get_context());
87-
return dptr.release_context();
88-
}
89-
void raw_deallocate(void* ptr) {
90-
auto d = raw_deleter();
91-
AT_ASSERT(d);
92-
d(ptr);
93-
}
94-
};
95-
96-
struct AT_API InefficientStdFunctionContext {
97-
std::unique_ptr<void, std::function<void(void*)>> ptr_;
98-
InefficientStdFunctionContext(std::unique_ptr<void, std::function<void(void*)>>&& ptr)
99-
: ptr_(std::move(ptr)) {}
100-
static at::DataPtr makeDataPtr(void* ptr, const std::function<void(void*)>& deleter, Device device);
101-
};
102-
103-
} // namespace at
2+
#include <ATen/core/Allocator.h>

0 commit comments

Comments
 (0)