Skip to content

Commit 578f601

Browse files
committed
Merge remote-tracking branch 'upstream/master'
2 parents 3a0a260 + a38b572 commit 578f601

26 files changed

+566
-285
lines changed

.jenkins/pytorch/build.sh

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ fi
7474
WERROR=1 python setup.py install
7575

7676
# Add the test binaries so that they won't be git clean'ed away
77-
git add -f build/bin
77+
git add -f build/bin build/lib
7878

7979
# Testing ATen install
8080
if [[ "$BUILD_ENVIRONMENT" != *cuda* ]]; then
@@ -101,11 +101,3 @@ if [[ "$BUILD_ENVIRONMENT" == *xenial-cuda8-cudnn6-py3* ]]; then
101101
make html
102102
popd
103103
fi
104-
105-
# Test no-Python build
106-
if [[ "$BUILD_TEST_LIBTORCH" == "1" ]]; then
107-
echo "Building libtorch"
108-
# NB: Install outside of source directory (at the same level as the root
109-
# pytorch folder) so that it doesn't get cleaned away prior to docker push.
110-
WERROR=1 VERBOSE=1 tools/cpp_build/build_caffe2.sh "$PWD/../cpp-build"
111-
fi

.jenkins/pytorch/macos-build.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,12 @@ export IMAGE_COMMIT_TAG=${BUILD_ENVIRONMENT}-${IMAGE_COMMIT_ID}
6161

6262
python setup.py install
6363

64+
# this is a bit hacky, but not too bad. Bundle the test binaries into
65+
# the installation directory, so they can catch a free ride on the 7z
66+
# train.
67+
mkdir -p ${PYTORCH_ENV_DIR}/miniconda3/lib/python3.6/site-packages/torch/test_binaries/build
68+
mv build/{bin,lib} ${PYTORCH_ENV_DIR}/miniconda3/lib/python3.6/site-packages/torch/test_binaries/build/
69+
6470
# Upload torch binaries when the build job is finished
6571
7z a ${IMAGE_COMMIT_TAG}.7z ${PYTORCH_ENV_DIR}/miniconda3/lib/python3.6/site-packages/torch*
6672
aws s3 cp ${IMAGE_COMMIT_TAG}.7z s3://ossci-macos-build/pytorch/${IMAGE_COMMIT_TAG}.7z --acl public-read

.jenkins/pytorch/macos-test.sh

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -50,22 +50,13 @@ test_python_all() {
5050
test_cpp_api() {
5151
# C++ API
5252

53-
# NB: Install outside of source directory (at the same level as the root
54-
# pytorch folder) so that it doesn't get cleaned away prior to docker push.
55-
# But still clean it before we perform our own build.
56-
#
57-
CPP_BUILD="$PWD/../cpp-build"
58-
rm -rf $CPP_BUILD
59-
mkdir -p $CPP_BUILD
60-
WERROR=1 VERBOSE=1 tools/cpp_build/build_caffe2.sh "$CPP_BUILD"
61-
6253
python tools/download_mnist.py --quiet -d test/cpp/api/mnist
6354

6455
# Unfortunately it seems like the test can't load from miniconda3
6556
# without these paths being set
6657
export DYLD_LIBRARY_PATH="$DYLD_LIBRARY_PATH:$PWD/miniconda3/lib"
6758
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$PWD/miniconda3/lib"
68-
"$CPP_BUILD"/caffe2/bin/test_api
59+
${PYTORCH_ENV_DIR}/miniconda3/lib/python3.6/site-packages/torch/test_binaries/build/bin/test_api
6960
}
7061

7162
if [ -z "${JOB_BASE_NAME}" ] || [[ "${JOB_BASE_NAME}" == *-test ]]; then

.jenkins/pytorch/test.sh

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,14 +108,13 @@ test_torchvision() {
108108
test_libtorch() {
109109
if [[ "$BUILD_TEST_LIBTORCH" == "1" ]]; then
110110
echo "Testing libtorch"
111-
CPP_BUILD="$PWD/../cpp-build"
112111
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
113-
"$CPP_BUILD"/caffe2/bin/test_jit
112+
./build/bin/test_jit
114113
else
115-
"$CPP_BUILD"/caffe2/bin/test_jit "[cpu]"
114+
./build/bin/test_jit "[cpu]"
116115
fi
117116
python tools/download_mnist.py --quiet -d test/cpp/api/mnist
118-
OMP_NUM_THREADS=2 "$CPP_BUILD"/caffe2/bin/test_api
117+
OMP_NUM_THREADS=2 ./build/bin/test_api
119118
fi
120119
}
121120

caffe2/core/tensor.h

Lines changed: 55 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,17 @@ class Tensor {
231231

232232
virtual ~Tensor() noexcept {}
233233

234+
/**
235+
* @brief Extend the outer-most dimension of this tensor
236+
* to dimension of `num`.
237+
*/
238+
void ExtendTo(TIndex num, float growthPct, BaseContext* context) {
239+
CAFFE_ENFORCE_GE_WITH_CALLER(dims_.size(), 1);
240+
CAFFE_ENFORCE_GE_WITH_CALLER(growthPct, 0);
241+
CAFFE_ENFORCE(context != nullptr, "Context must be provided.");
242+
Extend(num - dims_[0], growthPct, context);
243+
}
244+
234245
/**
235246
* @brief Extends the outer-most dimension of this tensor by num elements,
236247
* preserving the existing data.
@@ -242,6 +253,8 @@ class Tensor {
242253
*/
243254
void Extend(TIndex num, float growthPct, BaseContext* context) {
244255
CAFFE_ENFORCE_GE_WITH_CALLER(dims_.size(), 1);
256+
CAFFE_ENFORCE_GE_WITH_CALLER(
257+
num, 0, "`num` must be non-negative for Extend");
245258
auto newDims = dims_;
246259
newDims[0] += num;
247260
if (!data_) {
@@ -261,30 +274,17 @@ class Tensor {
261274
auto newCapacity = dims_;
262275
newCapacity[0] = std::max<size_t>(
263276
newDims[0], std::ceil(dims_[0] * (growthPct + 100) / 100));
264-
Reserve(newCapacity, context);
265-
dims_ = newDims;
266-
size_ = newSize;
267-
}
268-
269-
template <class T>
270-
void Reserve(const std::vector<T>& newCapacity, BaseContext* context) {
271-
auto newSize = std::accumulate(
272-
newCapacity.begin(),
273-
newCapacity.end(),
274-
static_cast<TIndex>(1),
275-
std::multiplies<TIndex>());
276-
if (newSize * meta_.itemsize() <= capacity_) {
277-
return;
278-
}
279277
auto oldData = std::move(data_);
280278
auto oldSize = size_;
281279
auto oldDims = dims_;
282280
Resize(newCapacity);
283281
auto* newData = raw_mutable_data(meta_);
282+
CAFFE_ENFORCE(
283+
context != nullptr, "Context must be provided to Extend the tensor");
284284
context->CopyItemsSameDevice(meta_, oldSize, oldData.get(), newData);
285-
dims_ = oldDims;
286-
size_ = oldSize;
287285
reserved_ = true;
286+
dims_ = newDims;
287+
size_ = newSize;
288288
}
289289

290290
/**
@@ -293,7 +293,7 @@ class Tensor {
293293
* This method guarantees that no re-allocations are carried out, which means
294294
* that the extra capacity after the end of the shurnk tensor is maintained.
295295
*/
296-
void Shrink(TIndex outer_dim) {
296+
void ShrinkTo(TIndex outer_dim) {
297297
CAFFE_ENFORCE_WITH_CALLER(dims_.size() >= 1, "Tensor must be at least 1D");
298298
CAFFE_ENFORCE_WITH_CALLER(
299299
outer_dim <= dims_[0],
@@ -306,6 +306,38 @@ class Tensor {
306306
std::multiplies<TIndex>());
307307
}
308308

309+
/**
310+
* @brief Reserve space for the underlying tensor.
311+
*
312+
* This must be called after Resize(), since we only specify the first
313+
* dimension This does not copy over the old data to the newly allocated space
314+
*/
315+
template <class T>
316+
void ReserveSpace(const T& outer_dim) {
317+
CAFFE_ENFORCE(
318+
size_ != -1, "size should be initialized before calling ReserveSpace");
319+
auto newCapacity = dims_;
320+
newCapacity[0] = outer_dim;
321+
auto newSize = std::accumulate(
322+
newCapacity.begin(),
323+
newCapacity.end(),
324+
static_cast<TIndex>(1),
325+
std::multiplies<TIndex>());
326+
if (newSize * meta_.itemsize() <= capacity_) {
327+
return;
328+
}
329+
// Old data is discarded
330+
data_.reset();
331+
auto oldSize = size_;
332+
auto oldDims = dims_;
333+
Resize(newCapacity);
334+
// Allocate new memory and don't copy over the data
335+
raw_mutable_data(meta_);
336+
dims_ = oldDims;
337+
size_ = oldSize;
338+
reserved_ = true;
339+
}
340+
309341
/**
310342
* @brief Resizes a tensor.
311343
*
@@ -389,7 +421,7 @@ class Tensor {
389421
capacity_ = 0;
390422
// If reserved is true and we changed tensor memory then it is fine
391423
// to switch it to false, if Resize is called from Reserve and it triggers
392-
// FreeMemory() then reserved_ will be set to true at end of Reserve()
424+
// FreeMemory() then reserved_ will be set to true at end of ReserveSpace()
393425
reserved_ = false;
394426
}
395427

@@ -740,6 +772,10 @@ class Tensor {
740772
TypeMeta meta_;
741773
std::shared_ptr<void> data_;
742774
size_t capacity_ = 0;
775+
// we decide to keep reserved and it will
776+
// live in Tensor after the split
777+
// The logic is that if Extend() or ReserveSpace() were ever called,
778+
// then subsequent Resize()s will not free up Storage.
743779
bool reserved_ = false;
744780
DeviceType device_type_ = CPU;
745781
// In case of chunk load we store how much data was already loaded

caffe2/experiments/operators/tt_pad_op.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ class TTPadGradientOp final : public Operator<Context> {
8383
auto dim1 = G.dim(1);
8484

8585
if (old_dim0 < new_dim0) {
86-
output->Shrink(old_dim0);
86+
output->ShrinkTo(old_dim0);
8787
}
8888

8989
return true;

caffe2/mobile/contrib/ulp2/ulp_neon.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -537,7 +537,7 @@ void run2b1bConvIm2ColGEMM(QConvState* state,
537537
} else {
538538
CAFFE_ENFORCE_EQ(Y->dim32(0), divRoundUp(X.dim32(0) * OH * OW, kGEMMTileSize) * kGEMMTileSize);
539539
CAFFE_ENFORCE_EQ(Y->dim32(1), OC);
540-
Y->Shrink(X.dim32(0) * OH * OW);
540+
Y->ShrinkTo(X.dim32(0) * OH * OW);
541541
Y->Reshape(std::vector<TIndex>{{TIndex(X.dim(0)), TIndex(OH), TIndex(OW), TIndex(OC)}});
542542
}
543543
}

caffe2/operators/dataset_ops.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1004,7 +1004,7 @@ class TrimDatasetOp : public Operator<CPUContext> {
10041004
// trim each column to the offset
10051005
for (int col = 0; col < walker.fields().size(); ++col) {
10061006
auto newOuterSize = walker.fields().at(col).offset();
1007-
Output(col)->Shrink(newOuterSize);
1007+
Output(col)->ShrinkTo(newOuterSize);
10081008
}
10091009
return true;
10101010
}

caffe2/operators/group_norm_op.cc

Lines changed: 73 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -18,29 +18,64 @@ namespace caffe2 {
1818
namespace {
1919

2020
template <typename T>
21-
inline T Cube(const T& x) {
22-
return x * x * x;
21+
void GroupNormForwardNCHW(
22+
const int N,
23+
const int G,
24+
const int D,
25+
const int HxW,
26+
const T* X,
27+
const T* mu,
28+
const T* rsig,
29+
const T* gamma,
30+
const T* beta,
31+
T* Y) {
32+
const int C = G * D;
33+
EigenArrayMap<T>(Y, D * HxW, N * G) =
34+
(ConstEigenArrayMap<T>(X, D * HxW, N * G).rowwise() -
35+
ConstEigenVectorArrayMap<T>(mu, N * G).transpose())
36+
.rowwise() *
37+
ConstEigenVectorArrayMap<T>(rsig, N * G).transpose();
38+
T* Y_ptr = Y;
39+
const int stride = C * HxW;
40+
ConstEigenVectorArrayMap<T> gamma_arr(gamma, C);
41+
ConstEigenVectorArrayMap<T> beta_arr(beta, C);
42+
for (int i = 0; i < N; ++i) {
43+
EigenArrayMap<T> Y_arr(Y_ptr, HxW, C);
44+
Y_arr = (Y_arr.rowwise() * gamma_arr.transpose()).rowwise() +
45+
beta_arr.transpose();
46+
Y_ptr += stride;
47+
}
2348
}
2449

25-
template <typename T, StorageOrder kOrder>
26-
void GroupNormForward(
27-
const std::array<int, 4>& dims,
50+
template <typename T>
51+
void GroupNormForwardNHWC(
52+
const int N,
53+
const int G,
54+
const int D,
55+
const int HxW,
2856
const T* X,
2957
const T* mu,
3058
const T* rsig,
3159
const T* gamma,
3260
const T* beta,
3361
T* Y) {
34-
constexpr int kGDim = kOrder == StorageOrder::NCHW ? 1 : 2;
35-
constexpr int kDDim = kOrder == StorageOrder::NCHW ? 2 : 3;
36-
const int size = dims[0] * dims[1] * dims[2] * dims[3];
37-
std::array<int, 4> index = {0, 0, 0, 0};
38-
for (int i = 0; i < size; ++i) {
39-
const int i_mu = index[0] * dims[kGDim] + index[kGDim];
40-
const int i_gamma = index[kGDim] * dims[kDDim] + index[kDDim];
41-
Y[i] = gamma[i_gamma] * (X[i] - mu[i_mu]) * rsig[i_mu] + beta[i_gamma];
42-
math::utils::IncreaseIndexInDims(4, dims.data(), index.data());
62+
const int C = G * D;
63+
const T* X_ptr = X;
64+
T* Y_ptr = Y;
65+
for (int i = 0; i < N; ++i) {
66+
for (int j = 0; j < HxW; ++j) {
67+
EigenArrayMap<T>(Y_ptr, D, G) =
68+
(ConstEigenArrayMap<T>(X_ptr, D, G).rowwise() -
69+
ConstEigenVectorArrayMap<T>(mu + i * G, G).transpose())
70+
.rowwise() *
71+
ConstEigenVectorArrayMap<T>(rsig + i * G, G).transpose();
72+
X_ptr += C;
73+
Y_ptr += C;
74+
}
4375
}
76+
EigenArrayMap<T> Y_arr(Y, C, N * HxW);
77+
Y_arr = (Y_arr.colwise() * ConstEigenVectorArrayMap<T>(gamma, C)).colwise() +
78+
ConstEigenVectorArrayMap<T>(beta, C);
4479
}
4580

4681
template <typename T, StorageOrder kOrder>
@@ -97,8 +132,8 @@ void GroupNormBackward(
97132
for (int i = 0; i < size; ++i) {
98133
const int i_mu = index[0] * dims[kGDim] + index[kGDim];
99134
const int i_gamma = index[kGDim] * dims[kDDim] + index[kDDim];
100-
const T u =
101-
(db[i_mu] * mu[i_mu] - ds[i_mu]) * (X[i] - mu[i_mu]) * Cube(rsig[i_mu]);
135+
const T u = (db[i_mu] * mu[i_mu] - ds[i_mu]) * (X[i] - mu[i_mu]) *
136+
math::utils::Cube(rsig[i_mu]);
102137
const T v = db[i_mu] * rsig[i_mu];
103138
dX[i] = gamma[i_gamma] * dY[i] * rsig[i_mu] + (u - v) * denom;
104139
dgamma[i_gamma] += dY[i] * (X[i] - mu[i_mu]) * rsig[i_mu];
@@ -138,11 +173,29 @@ bool GroupNormOp<T, Context>::RunOnDeviceImpl(
138173

139174
// Computes Y = gamma * (X - mu) * rsig + beta.
140175
if (order_ == StorageOrder::NCHW) {
141-
GroupNormForward<T, StorageOrder::NCHW>(
142-
dims, X_data, mu_data, rsig_data, gamma_data, beta_data, Y_data);
176+
GroupNormForwardNCHW<T>(
177+
N,
178+
G,
179+
D,
180+
HxW,
181+
X_data,
182+
mu_data,
183+
rsig_data,
184+
gamma_data,
185+
beta_data,
186+
Y_data);
143187
} else {
144-
GroupNormForward<T, StorageOrder::NHWC>(
145-
dims, X_data, mu_data, rsig_data, gamma_data, beta_data, Y_data);
188+
GroupNormForwardNHWC<T>(
189+
N,
190+
G,
191+
D,
192+
HxW,
193+
X_data,
194+
mu_data,
195+
rsig_data,
196+
gamma_data,
197+
beta_data,
198+
Y_data);
146199
}
147200
return true;
148201
}

0 commit comments

Comments
 (0)