Skip to content

Commit 67097e9

Browse files
committed
Merge remote-tracking branch 'upstream/master' into ifu
2 parents 0fbc1c0 + a5d7abe commit 67097e9

File tree

89 files changed

+1689
-482
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

89 files changed

+1689
-482
lines changed

.jenkins/pytorch/build.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ fi
66

77
# TODO: move this to Docker
88
# TODO: add both NCCL and MPI in CI test by fixing these test first
9-
# sudo apt-get update
10-
# sudo apt-get install libnccl-dev libnccl2
9+
sudo apt-get update
10+
sudo apt-get install libnccl-dev libnccl2
1111
# sudo apt-get install openmpi-bin libopenmpi-dev
1212

1313
# Required environment variable: $BUILD_ENVIRONMENT

CMakeLists.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ option(USE_DISTRIBUTED "Use THD (distributed)" OFF)
144144
# Used when building Caffe2 through setup.py
145145
option(BUILDING_WITH_TORCH_LIBS "Tell cmake if Caffe2 is being built alongside torch libs" OFF)
146146

147-
if (ANDROID OR IOS)
147+
if (ANDROID OR IOS)
148148
set(BUILD_ATEN_MOBILE ON)
149149
endif()
150150

@@ -213,7 +213,9 @@ if(NOT MSVC)
213213
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-strict-overflow")
214214
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-strict-aliasing")
215215
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=deprecated-declarations")
216-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-stringop-overflow")
216+
if (CMAKE_COMPILER_IS_GNUCXX AND NOT (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0.0))
217+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-stringop-overflow")
218+
endif()
217219
# These flags are not available in GCC-4.8.5. Set only when using clang.
218220
# Compared against https://gcc.gnu.org/onlinedocs/gcc-4.8.5/gcc/Option-Summary.html
219221
if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")

aten/src/ATen/Retainable.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@
22

33
#include <atomic>
44

5+
#include "ATen/core/ATenGeneral.h"
6+
57
namespace at {
68

79
// base class for refcounted things, allows for collects of generic
810
// refcounted objects that include tensors
9-
struct Retainable {
11+
struct AT_API Retainable {
1012
Retainable(): refcount(1), weak_refcount(1) {}
1113
void retain() {
1214
++refcount;

aten/src/ATen/core/ATenGeneral.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,5 @@
44

55
// TODO: Merge the *_API macros.
66
#define AT_API AT_CORE_API
7+
#define AT_EXPORT AT_CORE_EXPORT
8+
#define AT_IMPORT AT_CORE_IMPORT

aten/src/ATen/core/Error.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ namespace at {
1919
namespace detail {
2020

2121
// Obtains the base name from a full path.
22-
std::string StripBasename(const std::string& full_path);
22+
AT_CORE_API std::string StripBasename(const std::string& full_path);
2323

2424
inline std::ostream& _str(std::ostream& ss) {
2525
return ss;
@@ -56,7 +56,7 @@ inline std::string str(const char* c_str) {
5656
}
5757

5858
/// Represents a location in source code (for debugging).
59-
struct SourceLocation {
59+
struct AT_CORE_API SourceLocation {
6060
const char* function;
6161
const char* file;
6262
uint32_t line;

aten/src/ATen/core/Macros.h

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,21 +12,33 @@
1212

1313
#ifdef _WIN32
1414
#if !defined(AT_CORE_STATIC_WINDOWS)
15-
// TODO: unfiy the controlling macros.
16-
#if defined(CAFFE2_BUILD_MAIN_LIBS) || defined(ATen_cpu_EXPORTS) || defined(caffe2_EXPORTS)
17-
#define AT_CORE_API __declspec(dllexport)
18-
#else // defined(CAFFE2_BUILD_MAIN_LIBS) || defined(ATen_cpu_EXPORTS) || defined(caffe2_EXPORTS)
19-
#define AT_CORE_API __declspec(dllimport)
20-
#endif // defined(CAFFE2_BUILD_MAIN_LIBS) || defined(ATen_cpu_EXPORTS) || defined(caffe2_EXPORTS)
15+
#define AT_CORE_EXPORT __declspec(dllexport)
16+
#define AT_CORE_IMPORT __declspec(dllimport)
2117
#else // !defined(AT_CORE_STATIC_WINDOWS)
22-
#define AT_CORE_API
18+
#define AT_CORE_EXPORT
19+
#define AT_CORE_IMPORT
2320
#endif // !defined(AT_CORE_STATIC_WINDOWS)
2421
#else // _WIN32
2522
#if defined(__GNUC__)
26-
#define AT_CORE_API __attribute__((__visibility__("default")))
23+
#define AT_CORE_EXPORT __attribute__((__visibility__("default")))
24+
#else // defined(__GNUC__)
25+
#define AT_CORE_EXPORT
2726
#endif // defined(__GNUC__)
27+
#define AT_CORE_IMPORT AT_CORE_EXPORT
2828
#endif // _WIN32
2929

30+
// AT_CORE_API is a macro that, depends on whether you are building the
31+
// main library or not, resolves to either AT_CORE_EXPORT or
32+
// AT_CORE_IMPORT.
33+
//
34+
35+
// TODO: unify the controlling macros.
36+
#if defined(CAFFE2_BUILD_MAIN_LIBS) || defined(ATen_cpu_EXPORTS) || defined(caffe2_EXPORTS)
37+
#define AT_CORE_API AT_CORE_EXPORT
38+
#else // defined(CAFFE2_BUILD_MAIN_LIBS) || defined(ATen_cpu_EXPORTS) || defined(caffe2_EXPORTS)
39+
#define AT_CORE_API AT_CORE_IMPORT
40+
#endif // defined(CAFFE2_BUILD_MAIN_LIBS) || defined(ATen_cpu_EXPORTS) || defined(caffe2_EXPORTS)
41+
3042
// Disable the copy and assignment operator for a class. Note that this will
3143
// disable the usage of the class in std containers.
3244
#define AT_DISABLE_COPY_AND_ASSIGN(classname) \

aten/src/ATen/core/intrusive_ptr.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#pragma once
22

3+
#include <ATen/core/ATenGeneral.h>
34
#include <ATen/core/Error.h>
45
#include <atomic>
56
#include <stdexcept>
@@ -32,7 +33,7 @@ namespace c10 {
3233
// tells us if the object was allocated by us. If it wasn't, no
3334
// intrusive_ptr for you!
3435

35-
class intrusive_ptr_target {
36+
class AT_CORE_API intrusive_ptr_target {
3637
// Note [Weak references for intrusive refcounting]
3738
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3839
// Here's the scheme:
@@ -113,7 +114,7 @@ class intrusive_ptr_target {
113114

114115
namespace detail {
115116
template <class TTarget>
116-
struct intrusive_target_default_null_type final {
117+
struct AT_CORE_EXPORT intrusive_target_default_null_type final {
117118
static constexpr TTarget* singleton() noexcept {
118119
return nullptr;
119120
}
@@ -126,7 +127,7 @@ class weak_intrusive_ptr;
126127
template <
127128
class TTarget,
128129
class NullType = detail::intrusive_target_default_null_type<TTarget>>
129-
class intrusive_ptr final {
130+
class AT_CORE_EXPORT intrusive_ptr final {
130131
private:
131132
static_assert(
132133
std::is_base_of<intrusive_ptr_target, TTarget>::value,
@@ -415,7 +416,7 @@ inline bool operator!=(
415416
template <
416417
typename TTarget,
417418
class NullType = detail::intrusive_target_default_null_type<TTarget>>
418-
class weak_intrusive_ptr final {
419+
class AT_CORE_EXPORT weak_intrusive_ptr final {
419420
private:
420421
static_assert(
421422
std::is_base_of<intrusive_ptr_target, TTarget>::value,
@@ -797,13 +798,13 @@ namespace std {
797798
// To allow intrusive_ptr and weak_intrusive_ptr inside std::unordered_map or
798799
// std::unordered_set, we need std::hash
799800
template <class TTarget, class NullType>
800-
struct hash<c10::intrusive_ptr<TTarget, NullType>> {
801+
struct AT_CORE_EXPORT hash<c10::intrusive_ptr<TTarget, NullType>> {
801802
size_t operator()(const c10::intrusive_ptr<TTarget, NullType>& x) const {
802803
return std::hash<TTarget*>()(x.get());
803804
}
804805
};
805806
template <class TTarget, class NullType>
806-
struct hash<c10::weak_intrusive_ptr<TTarget, NullType>> {
807+
struct AT_CORE_EXPORT hash<c10::weak_intrusive_ptr<TTarget, NullType>> {
807808
size_t operator()(const c10::weak_intrusive_ptr<TTarget, NullType>& x) const {
808809
return std::hash<TTarget*>()(x._unsafe_get_target());
809810
}

aten/src/ATen/native/cuda/Embedding.cu

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,18 @@ __global__ void embedding_backward_feature_kernel
4747
if(batch_start + tid < n)
4848
indices_batch[tid] = (int)indices[batch_start + tid];
4949

50+
int batch_end = batch_start + blockDim.x*blockDim.y < n ?
51+
batch_start + blockDim.x*blockDim.y : n;
52+
5053
// Loop over the batch of <= 1024 loaded indices in chunks of blockDim.y = 32
51-
for(int chunk_start = batch_start; chunk_start < n; chunk_start += blockDim.y)
54+
for(int chunk_start = batch_start; chunk_start < batch_end; chunk_start += blockDim.y)
5255
{
5356
// This does double duty: it makes sure indices_batch is ready, and it makes sure match-group
5457
// leaders are done with their accumulates before other warps start loading again.
5558
__syncthreads();
5659

57-
int n_this_chunk = (n - chunk_start) < blockDim.y ? (n - chunk_start) : blockDim.y;
60+
int n_this_chunk = (batch_end - chunk_start) < blockDim.y ?
61+
(batch_end - chunk_start) : blockDim.y;
5862

5963
int src_row = chunk_start + threadIdx.y;
6064
int dst_row = indices_batch[src_row - batch_start]; // This warp's target row in grad_weight

caffe2/CMakeLists.txt

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ target_link_libraries(caffe2_protos PUBLIC protobuf::libprotobuf)
207207
# Compile exposed libraries.
208208
list(APPEND Caffe2_CPU_SRCs $<TARGET_OBJECTS:c10>)
209209
add_library(caffe2 ${Caffe2_CPU_SRCS})
210+
target_compile_options(caffe2 PRIVATE "-fvisibility=hidden")
210211
caffe2_interface_library(caffe2_protos caffe2_protos_whole)
211212
target_link_libraries(caffe2 PRIVATE caffe2_protos_whole)
212213
if (${CAFFE2_LINK_LOCAL_PROTOBUF})
@@ -229,13 +230,6 @@ else()
229230
target_compile_options(caffe2 INTERFACE "$<$<COMPILE_LANGUAGE:CXX>:-std=c++11>")
230231
endif()
231232

232-
# Note(jiayq): This is not complete yet, but in the end we will need to deal with
233-
# explicit hidden visibility.
234-
# This line is here so that when testing build, we can enable it to properly test
235-
# annotation of public symbols. When finally doing proper build with all symbols
236-
# annotated, we will enable this line and have it wrapped with gcc/clang checks.
237-
# target_compile_options(caffe2 PRIVATE "-fvisibility=hidden")
238-
239233
target_compile_options(caffe2 PRIVATE "-DCAFFE2_BUILD_MAIN_LIB")
240234
if (MSVC AND NOT BUILD_SHARED_LIBS)
241235
# Note [Supporting both static and dynamic libraries on Window]

caffe2/core/nomnigraph/README.md

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,22 @@ Do not create `OperatorDef`s in the transformation itself! This is an anti-patte
6767

6868
Below is a subset of selected API calls that are quite useful. Lower level manipulation calls are omitted.
6969

70+
### Graph transformation API
71+
Nomnigraph provides a ReplaceSubgraph API to perform graph transformation operations without having to write custom subgraph matching logic. The main header file is [SubgraphMatcher.h](include/nomnigraph/Transformations/SubgraphMatcher.h).
72+
73+
ReplaceSubgraph API takes in
74+
- A subgraph pattern to be matched
75+
- A graph to be scanned for matching patterns
76+
- A ReplaceGraph lambda function that takes in a matched subgraph; callers should implement specific graph transformation operation in the lambda.
77+
78+
The ReplaceSubgraph implementation takes care of the pattern matching part and also provides tools for callers to implement graph transformation logic with less effort.
79+
80+
Example usage of the API can be found in [subgraph_matcher_test.cc](tests/subgraph_matcher_test.cc)
81+
82+
Example usage of the API for NNGraph can be found in [neural_net_test.cc](tests/neural_net_test.cc)
83+
7084
### Graph API
85+
Nomnigraph's core graph APIs provide a generic graph data structure and basic graph manipulation abilities. The main header file is [Graph.h](include/nomnigraph/Graph/Graph.h).
7186

7287
```cpp
7388
auto g = Graph<T>(); // Constructor
@@ -91,6 +106,9 @@ T d = n->data(); // Get the data stored at the node
91106
```
92107

93108
### NN API
109+
NN (NeuralNet) extends core Graph with functionalities specific to neural network computation graph. The main header file is [NeuralNet.h](include/nomnigraph/Representations/NeuralNet.h).
110+
111+
Type checking & data accessing
94112

95113
```cpp
96114
repr::NNModule nn = ...;
@@ -101,13 +119,18 @@ repr::NNGraph::NodeRef n; // Canonical node of the neural network
101119
bool b = repr::nn::is<repr::Tensor>(n); // Checks the type stored on the node. (Works with parent types.)
102120

103121
repr::Conv* c = repr::nn::get<repr::Conv>(n); // Returns a pointer to the NeuralNetOperator or NeuralNetData in the node
122+
```
104123

124+
Iterate through nodes in a NNGraph.
125+
```cpp
105126
auto pairs = dataIterator(nn); // A useful paradigm for iterating through nodes and corresponding data in no particular order.
106127
auto nodeRefs = nodeIterator(nn); // Iterate through nodes in no particular order.
107128
// See https://github.com/pytorch/pytorch/blob/master/caffe2/opt/mobile.cc#L106-L109
129+
```
108130

109131

110-
///// These functions make it easy to check attributes on nodes. /////
132+
These functions make it easy to check attributes on nodes.
133+
```cpp
111134
// -- Tensor node functions --
112135
bool b = hasProducer(tensorNode); // Checks for producers.
113136
auto n = getProducer(tensorNode); // Returns the producer of the tensor
@@ -118,8 +141,10 @@ std::vector<NNGraph::NodeRef> consumers = getConsumers(tensorNode); // Returns a
118141
bool b = hasInputs(n); // Checks if there are any input tensors.
119142
std::vector<NNGraph::NodeRef> getInputs(n); // Returns a vector of all the input tensor nodes.
120143
std::vector<NNGraph::NodeRef> getOutputs(n); // Returns a vector of all the output tensor nodes.
144+
```
121145
122-
///// These functions are less commonly useful /////
146+
These functions are less commonly useful
147+
```cpp
123148
coalesceInsertedDataDependencies(&nn); // Fixes up all the inserted dependencies in the dataflow graph.
124149
125150
insertOp<repr::Relu>(nn.dataFlow, n1, n2); // Inserts an operator into the dataflow graph and creates a new blob to do so.

caffe2/core/nomnigraph/include/nomnigraph/Generated/OpClasses.h

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,36 @@ class BatchNormalization : public NeuralNetOperator {
531531
bool IsTest;
532532
};
533533

534+
class Clip : public NeuralNetOperator {
535+
public:
536+
Clip(float min, float max)
537+
: NeuralNetOperator(NNKind::Clip), Min(min), Max(max) {}
538+
539+
~Clip() {}
540+
541+
NOMNIGRAPH_DEFINE_NN_RTTI(Clip);
542+
543+
float getMin() const {
544+
return Min;
545+
}
546+
547+
float getMax() const {
548+
return Max;
549+
}
550+
551+
void setMin(float min) {
552+
Min = min;
553+
}
554+
555+
void setMax(float max) {
556+
Max = max;
557+
}
558+
559+
private:
560+
float Min;
561+
float Max;
562+
};
563+
534564
class FC : public NeuralNetOperator {
535565
public:
536566
FC() : NeuralNetOperator(NNKind::FC) {}
@@ -638,6 +668,28 @@ class Flatten : public NeuralNetOperator {
638668
private:
639669
};
640670

671+
class CopyToOpenCL : public NeuralNetOperator {
672+
public:
673+
CopyToOpenCL() : NeuralNetOperator(NNKind::CopyToOpenCL) {}
674+
675+
~CopyToOpenCL() {}
676+
677+
NOMNIGRAPH_DEFINE_NN_RTTI(CopyToOpenCL);
678+
679+
private:
680+
};
681+
682+
class CopyFromOpenCL : public NeuralNetOperator {
683+
public:
684+
CopyFromOpenCL() : NeuralNetOperator(NNKind::CopyFromOpenCL) {}
685+
686+
~CopyFromOpenCL() {}
687+
688+
NOMNIGRAPH_DEFINE_NN_RTTI(CopyFromOpenCL);
689+
690+
private:
691+
};
692+
641693
class NCHW2NHWC : public NeuralNetOperator {
642694
public:
643695
NCHW2NHWC() : NeuralNetOperator(NNKind::NCHW2NHWC) {}
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
Relu, Conv, ConvRelu, ConvTranspose, AveragePool, AveragePoolRelu, MaxPool,
2-
MaxPoolRelu, Sum, SumRelu, Send, Receive, BatchNormalization, FC,
2+
MaxPoolRelu, Sum, SumRelu, Send, Receive, BatchNormalization, Clip, FC,
33
GivenTensorFill, Concat, Softmax, ChannelShuffle, Add, Reshape, Flatten,
4-
NCHW2NHWC, NHWC2NCHW
4+
CopyToOpenCL, CopyFromOpenCL, NCHW2NHWC, NHWC2NCHW

caffe2/core/nomnigraph/include/nomnigraph/Generated/OpNames.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ case NNKind::Receive:
3737
case NNKind::BatchNormalization:
3838
return "BatchNormalization";
3939

40+
case NNKind::Clip:
41+
return "Clip";
42+
4043
case NNKind::FC:
4144
return "FC";
4245

@@ -61,6 +64,12 @@ case NNKind::Reshape:
6164
case NNKind::Flatten:
6265
return "Flatten";
6366

67+
case NNKind::CopyToOpenCL:
68+
return "CopyToOpenCL";
69+
70+
case NNKind::CopyFromOpenCL:
71+
return "CopyFromOpenCL";
72+
6473
case NNKind::NCHW2NHWC:
6574
return "NCHW2NHWC";
6675

0 commit comments

Comments
 (0)