Skip to content

Commit 619e78a

Browse files
authored
Merge pull request #29 from iotamudelta/master
Merge from upstream
2 parents 1876823 + e57fe61 commit 619e78a

File tree

240 files changed

+4853
-2768
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

240 files changed

+4853
-2768
lines changed

.clang-tidy

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,36 +3,38 @@
33
Checks: '
44
*
55
,modernize-*
6-
,clang-analyzer-*
6+
,-cert-err58-cpp
7+
,-cert-err60-cpp
78
,-clang-diagnostic-*
8-
,-hicpp-no-array-decay
9+
,-cppcoreguidelines-owning-memory
10+
,-cppcoreguidelines-pro-bounds-array-to-pointer-decay
11+
,-cppcoreguidelines-pro-bounds-constant-array-index
12+
,-cppcoreguidelines-pro-type-static-cast-downcast
13+
,-cppcoreguidelines-pro-type-vararg
14+
,-cppcoreguidelines-special-member-functions
915
,-fuchsia-*
16+
,-google-build-using-namespace
17+
,-google-explicit-constructor
18+
,-google-readability-braces-around-statements
1019
,-google-readability-namespace-comments
11-
,-llvm-namespace-comment
1220
,-google-readability-todo
13-
,-cppcoreguidelines-pro-bounds-array-to-pointer-decay
14-
,-cert-err60-cpp
15-
,-llvm-header-guard
16-
,-cppcoreguidelines-special-member-functions
17-
,-misc-unused-parameters
21+
,-google-runtime-references
22+
,-google-runtime-references
1823
,-hicpp-braces-around-statements
24+
,-hicpp-explicit-conversions
25+
,-hicpp-no-array-decay
1926
,-hicpp-special-member-functions
20-
,-readability-braces-around-statements
21-
,-modernize-use-default-member-init
22-
,-google-runtime-references
23-
,-cppcoreguidelines-pro-type-vararg
24-
,-google-readability-braces-around-statements
25-
,-google-build-using-namespace
2627
,-hicpp-vararg
27-
,-hicpp-explicit-conversions
28-
,-performance-unnecessary-value-param
29-
,-google-runtime-references
30-
,-cppcoreguidelines-pro-type-static-cast-downcast
31-
,-cppcoreguidelines-pro-bounds-constant-array-index
32-
,-cert-err58-cpp
28+
,-llvm-header-guard
29+
,-llvm-namespace-comment
30+
,-misc-unused-parameters
3331
,-modernize-make-unique
34-
,-cppcoreguidelines-owning-memory
32+
,-modernize-use-default-member-init
33+
,-performance-unnecessary-value-param
34+
,-readability-braces-around-statements
35+
,-readability-else-after-return
3536
,-readability-named-parameter
37+
,clang-analyzer-*
3638
'
3739
WarningsAsErrors: ''
3840
HeaderFilterRegex: 'torch/csrc/'

.jenkins/pytorch/test.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ test_aten() {
7070
# put the dynamic libraries somewhere were the dynamic linker can find them.
7171
# This is a bit of a hack.
7272
ln -s "$TORCH_LIB_PATH"/libcaffe2* build/bin
73+
ln -s "$TORCH_LIB_PATH"/libnccl* build/bin
7374
ls build/bin
7475
aten/tools/run_tests.sh build/bin
7576
fi

aten/src/ATen/CPUApplyUtils.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -253,16 +253,15 @@ apply_op(int64_t numel, int64_t offset, const Op& op, Args... iters) {
253253
}
254254
}
255255

256+
256257
inline void apply_kernel(){};
257258

259+
// TODO: Deal elegantly with 0-dim tensors. iters.strides_ of 0-dim
260+
// strided_tensor_iter will be of size 0 for dim 0 and iters.strides_[iters.dim_
261+
// - 1] will index at -1. C++14 integer_sequence could be of use here.
258262
template <typename Op, typename... Args>
259263
inline void
260264
apply_kernel(int64_t numel, int64_t offset, const Op& op, Args... iters) {
261-
// For 0-dim tensors
262-
if (numel == 1 && max_dim(iters...) == 0) {
263-
op(1, iters.data_..., iters.strides_[iters.dim_ - 1]...);
264-
return;
265-
}
266265
if (offset > 0)
267266
forward(offset, iters...);
268267
int64_t size = std::min(numel, max_iterate_size(iters...));
@@ -284,6 +283,10 @@ inline void
284283
CPU_tensor_parallel_kernel_apply2(Tensor tensor1, Tensor tensor2, const Op op) {
285284
if (!_apply_preamble({tensor1, tensor2}))
286285
return;
286+
if (tensor1.numel() == 1) {
287+
op(1, tensor1.data<scalar1>(), tensor2.data<scalar2>(), 0, 0);
288+
return;
289+
}
287290
if (tensor1.ndimension() < 8 && tensor2.ndimension() < 8) {
288291
parallel_for(
289292
0,

aten/src/ATen/Declarations.cwrap

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1114,24 +1114,10 @@
11141114
- THTensor* self
11151115
]]
11161116
[[
1117-
name: sigmoid_
1117+
name: _th_sigmoid
11181118
types:
11191119
- floating_point
11201120
backends:
1121-
- CPU
1122-
- CUDA
1123-
cname: sigmoid
1124-
return: self
1125-
arguments:
1126-
- THTensor* self
1127-
- THTensor* self
1128-
]]
1129-
[[
1130-
name: sigmoid
1131-
types:
1132-
- floating_point
1133-
backends:
1134-
- CPU
11351121
- CUDA
11361122
cname: sigmoid
11371123
variants:

aten/src/ATen/cpu/vec256/intrinsics.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44
/* Microsoft C/C++-compatible compiler */
55
#include <intrin.h>
66
#if _MSC_VER <= 1900
7-
#define _mm256_extract_epi64(X, Y) (_mm_extract_epi16(_mm256_extractf128_si256(X, Y >> 1), Y % 2))
8-
#define _mm256_extract_epi32(X, Y) (_mm_extract_epi16(_mm256_extractf128_si256(X, Y >> 2), Y % 4))
7+
#define _mm256_extract_epi64(X, Y) (_mm_extract_epi64(_mm256_extractf128_si256(X, Y >> 1), Y % 2))
8+
#define _mm256_extract_epi32(X, Y) (_mm_extract_epi32(_mm256_extractf128_si256(X, Y >> 2), Y % 4))
99
#define _mm256_extract_epi16(X, Y) (_mm_extract_epi16(_mm256_extractf128_si256(X, Y >> 3), Y % 8))
10-
#define _mm256_extract_epi8(X, Y) (_mm_extract_epi16(_mm256_extractf128_si256(X, Y >> 4), Y % 16))
10+
#define _mm256_extract_epi8(X, Y) (_mm_extract_epi8(_mm256_extractf128_si256(X, Y >> 4), Y % 16))
1111
#endif
1212
#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
1313
/* GCC-compatible compiler, targeting x86/x86-64 */

aten/src/ATen/cpu/vec256/vec256_base.h

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@ namespace {
2323
// emulates vectorized types
2424
template <class T>
2525
struct Vec256 {
26-
static constexpr int size = 32 / sizeof(T);
26+
private:
2727
T values[32 / sizeof(T)] = {0};
28+
public:
29+
static constexpr int size = 32 / sizeof(T);
2830
Vec256() {}
2931
Vec256(T val) {
3032
for (int i = 0; i != size; i++) {
@@ -37,9 +39,9 @@ struct Vec256 {
3739
Vec256 vec;
3840
for (int64_t i = 0; i < size; i++) {
3941
if (mask & 0x01) {
40-
vec.values[i] = b[i];
42+
vec[i] = b[i];
4143
} else {
42-
vec.values[i] = a[i];
44+
vec[i] = a[i];
4345
}
4446
mask = mask >> 1;
4547
}
@@ -49,9 +51,9 @@ struct Vec256 {
4951
Vec256 vec;
5052
for (int64_t i = 0; i < size; i++) {
5153
if (i < count) {
52-
vec.values[i] = b.values[i];
54+
vec[i] = b[i];
5355
} else {
54-
vec.values[i] = a.values[i];
56+
vec[i] = a[i];
5557
}
5658
}
5759
return vec;
@@ -69,17 +71,23 @@ struct Vec256 {
6971
void store(void* ptr, int count = size) const {
7072
std::memcpy(ptr, values, count * sizeof(T));
7173
}
74+
const T& operator[](int idx) const {
75+
return values[idx];
76+
}
77+
T& operator[](int idx) {
78+
return values[idx];
79+
}
7280
Vec256<T> map(T (*f)(T)) const {
7381
Vec256<T> ret;
7482
for (int64_t i = 0; i != size; i++) {
75-
ret.values[i] = f(values[i]);
83+
ret[i] = f(values[i]);
7684
}
7785
return ret;
7886
}
7987
Vec256<T> abs() const {
8088
Vec256<T> ret;
8189
for (int64_t i = 0; i < size; i++) {
82-
ret.values[i] = values[i] < 0 ? -values[i] : values[i];
90+
ret[i] = values[i] < 0 ? -values[i] : values[i];
8391
}
8492
return ret;
8593
}
@@ -125,6 +133,9 @@ struct Vec256 {
125133
Vec256<T> floor() const {
126134
return map(std::floor);
127135
}
136+
Vec256<T> neg() const {
137+
return map([](T x) { return -x; });
138+
}
128139
Vec256<T> round() const {
129140
return map(std::round);
130141
}
@@ -146,6 +157,9 @@ struct Vec256 {
146157
Vec256<T> sqrt() const {
147158
return map(std::sqrt);
148159
}
160+
Vec256<T> reciprocal() const {
161+
return map([](T x) { return (T)(1) / x; });
162+
}
149163
Vec256<T> rsqrt() const {
150164
return map([](T x) { return 1 / std::sqrt(x); });
151165
}
@@ -154,39 +168,39 @@ struct Vec256 {
154168
template <class T> Vec256<T> operator+(const Vec256<T> &a, const Vec256<T> &b) {
155169
Vec256<T> c = Vec256<T>();
156170
for (int i = 0; i != Vec256<T>::size; i++) {
157-
c.values[i] = a.values[i] + b.values[i];
171+
c[i] = a[i] + b[i];
158172
}
159173
return c;
160174
}
161175

162176
template <class T> Vec256<T> operator-(const Vec256<T> &a, const Vec256<T> &b) {
163177
Vec256<T> c = Vec256<T>();
164178
for (int i = 0; i != Vec256<T>::size; i++) {
165-
c.values[i] = a.values[i] - b.values[i];
179+
c[i] = a[i] - b[i];
166180
}
167181
return c;
168182
}
169183

170184
template <class T> Vec256<T> operator*(const Vec256<T> &a, const Vec256<T> &b) {
171185
Vec256<T> c = Vec256<T>();
172186
for (int i = 0; i != Vec256<T>::size; i++) {
173-
c.values[i] = a.values[i] * b.values[i];
187+
c[i] = a[i] * b[i];
174188
}
175189
return c;
176190
}
177191

178192
template <class T> Vec256<T> operator/(const Vec256<T> &a, const Vec256<T> &b) __ubsan_ignore_float_divide_by_zero__ {
179193
Vec256<T> c = Vec256<T>();
180194
for (int i = 0; i != Vec256<T>::size; i++) {
181-
c.values[i] = a.values[i] / b.values[i];
195+
c[i] = a[i] / b[i];
182196
}
183197
return c;
184198
}
185199

186200
template <class T> Vec256<T> max(const Vec256<T> &a, const Vec256<T> &b) {
187201
Vec256<T> c = Vec256<T>();
188202
for (int i = 0; i != Vec256<T>::size; i++) {
189-
c.values[i] = std::max(a.values[i], b.values[i]);
203+
c[i] = std::max(a[i], b[i]);
190204
}
191205
return c;
192206
}

aten/src/ATen/cpu/vec256/vec256_double.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,10 @@ namespace {
1313
#if defined(__AVX__) && !defined(_MSC_VER)
1414

1515
template <> class Vec256<double> {
16+
private:
17+
__m256d values;
1618
public:
1719
static constexpr int size = 4;
18-
__m256d values;
1920
Vec256() {}
2021
Vec256(__m256d v) : values(v) {}
2122
Vec256(double val) {
@@ -61,6 +62,8 @@ template <> class Vec256<double> {
6162
std::memcpy(ptr, tmp_values, count * sizeof(double));
6263
}
6364
}
65+
const double& operator[](int idx) const = delete;
66+
double& operator[](int idx) = delete;
6467
Vec256<double> map(double (*f)(double)) const {
6568
__at_align32__ double tmp[4];
6669
store(tmp);
@@ -121,6 +124,9 @@ template <> class Vec256<double> {
121124
Vec256<double> floor() const {
122125
return _mm256_floor_pd(values);
123126
}
127+
Vec256<double> neg() const {
128+
return _mm256_xor_pd(_mm256_set1_pd(-0.), values);
129+
}
124130
Vec256<double> round() const {
125131
return _mm256_round_pd(values, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
126132
}
@@ -136,6 +142,9 @@ template <> class Vec256<double> {
136142
Vec256<double> sqrt() const {
137143
return _mm256_sqrt_pd(values);
138144
}
145+
Vec256<double> reciprocal() const {
146+
return _mm256_div_pd(_mm256_set1_pd(1), values);
147+
}
139148
Vec256<double> rsqrt() const {
140149
return _mm256_div_pd(_mm256_set1_pd(1), _mm256_sqrt_pd(values));
141150
}

aten/src/ATen/cpu/vec256/vec256_float.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,10 @@ namespace {
1313
#if defined(__AVX__) && !defined(_MSC_VER)
1414

1515
template <> class Vec256<float> {
16+
private:
17+
__m256 values;
1618
public:
1719
static constexpr int64_t size = 8;
18-
__m256 values;
1920
Vec256() {}
2021
Vec256(__m256 v) : values(v) {}
2122
Vec256(float val) {
@@ -66,6 +67,8 @@ template <> class Vec256<float> {
6667
std::memcpy(ptr, tmp_values, count * sizeof(float));
6768
}
6869
}
70+
const float& operator[](int idx) const = delete;
71+
float& operator[](int idx) = delete;
6972
Vec256<float> map(float (*f)(float)) const {
7073
__at_align32__ float tmp[8];
7174
store(tmp);
@@ -126,6 +129,9 @@ template <> class Vec256<float> {
126129
Vec256<float> floor() const {
127130
return _mm256_floor_ps(values);
128131
}
132+
Vec256<float> neg() const {
133+
return _mm256_xor_ps(_mm256_set1_ps(-0.f), values);
134+
}
129135
Vec256<float> round() const {
130136
return _mm256_round_ps(values, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
131137
}
@@ -141,6 +147,9 @@ template <> class Vec256<float> {
141147
Vec256<float> sqrt() const {
142148
return _mm256_sqrt_ps(values);
143149
}
150+
Vec256<float> reciprocal() const {
151+
return _mm256_div_ps(_mm256_set1_ps(1), values);
152+
}
144153
Vec256<float> rsqrt() const {
145154
return _mm256_div_ps(_mm256_set1_ps(1), _mm256_sqrt_ps(values));
146155
}

0 commit comments

Comments
 (0)