Skip to content

Commit 5b2b998

Browse files
authored
Merge pull request #254 from iotamudelta/ifu
Integrate from upstream
2 parents ac85abc + ca71c11 commit 5b2b998

File tree

181 files changed

+2465
-10768
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

181 files changed

+2465
-10768
lines changed

.circleci/config.yml

Lines changed: 377 additions & 316 deletions
Large diffs are not rendered by default.

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,6 @@
7676
[submodule "third_party/ideep"]
7777
path = third_party/ideep
7878
url = https://github.com/intel/ideep
79+
[submodule "third_party/nccl/nccl"]
80+
path = third_party/nccl/nccl
81+
url = https://github.com/NVIDIA/nccl

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ You get the best of speed and flexibility for your crazy research.
8888

8989
PyTorch is not a Python binding into a monolithic C++ framework.
9090
It is built to be deeply integrated into Python.
91-
You can use it naturally like you would use NumPy / SciPy / scikit-learn etc.
91+
You can use it naturally like you would use [NumPy](http://www.numpy.org/) / [SciPy](https://www.scipy.org/) / [scikit-learn](http://scikit-learn.org) etc.
9292
You can write your new neural network layers in Python itself, using your favorite libraries
9393
and use packages such as Cython and Numba.
9494
Our goal is to not reinvent the wheel where appropriate.
@@ -104,7 +104,7 @@ We hope you never spend hours debugging your code because of bad stack traces or
104104
### Fast and Lean
105105

106106
PyTorch has minimal framework overhead. We integrate acceleration libraries
107-
such as Intel MKL and NVIDIA (cuDNN, NCCL) to maximize speed.
107+
such as [Intel MKL](https://software.intel.com/mkl) and NVIDIA (cuDNN, NCCL) to maximize speed.
108108
At the core, its CPU and GPU Tensor and neural network backends
109109
(TH, THC, THNN, THCUNN) are mature and have been tested for years.
110110

@@ -226,7 +226,7 @@ should increase shared memory size either with `--ipc=host` or `--shm-size` comm
226226

227227
### Building the Documentation
228228

229-
To build documentation in various formats, you will need Sphinx and the
229+
To build documentation in various formats, you will need [Sphinx](http://www.sphinx-doc.org) and the
230230
readthedocs theme.
231231

232232
```

aten/src/ATen/function_wrapper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -780,7 +780,7 @@ def emit_nn_body(option):
780780
# _out variants must create buffers and insert them in the
781781
# arguments list between output and input arguments
782782
for buffer in option['buffers']:
783-
body.append('Tensor {} = tensor();'.format(buffer['name']))
783+
body.append('Tensor {} = at::empty({{0}}, this->options());'.format(buffer['name']))
784784
actuals = [arg['name'] for arg in option['arguments'] if arg.get('output')]
785785
actuals += [buffer['name'] for buffer in option['buffers']]
786786
actuals += [arg['name'] for arg in option['arguments'] if not arg.get('output')]

aten/src/ATen/native/native_functions.yaml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1901,9 +1901,6 @@
19011901
SparseCPU: new_with_size_sparse
19021902
SparseCUDA: new_with_size_sparse
19031903

1904-
- func: tensor(Type dtype) -> Tensor
1905-
variants: []
1906-
19071904
- func: tensor(Type dtype, IntList size) -> Tensor
19081905
variants: []
19091906

binaries/bench_gen/bench_gen.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from __future__ import unicode_literals
77

88
import argparse
9+
import ast
910

1011
from caffe2.python.model_helper import ModelHelper
1112
from caffe2.python.predictor import mobile_exporter
@@ -15,18 +16,15 @@
1516
def parse_kwarg(kwarg_str):
1617
key, value = kwarg_str.split('=')
1718
try:
18-
value = int(value)
19+
value = ast.literal_eval(value)
1920
except ValueError:
20-
try:
21-
value = float(value)
22-
except ValueError:
23-
pass
21+
pass
2422
return key, value
2523

2624

2725
def main(args):
2826
# User defined keyword arguments
29-
kwargs = {"order": "NCHW"}
27+
kwargs = {"order": "NCHW", "use_cudnn": False}
3028
kwargs.update(dict(args.kwargs))
3129

3230
model = ModelHelper(name=args.benchmark_name)

c10/test/registry_test.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ class Foo {
1313
explicit Foo(int x) {
1414
// LOG(INFO) << "Foo " << x;
1515
}
16+
virtual ~Foo() {}
1617
};
1718

1819
C10_DECLARE_REGISTRY(FooRegistry, Foo, int);
@@ -46,4 +47,45 @@ TEST(RegistryTest, ReturnNullOnNonExistingCreator) {
4647
EXPECT_EQ(FooRegistry()->Create("Non-existing bar", 1), nullptr);
4748
}
4849

50+
// C10_REGISTER_CLASS_WITH_PRIORITY defines static variable
51+
void RegisterFooDefault() {
52+
C10_REGISTER_CLASS_WITH_PRIORITY(
53+
FooRegistry, FooWithPriority, c10::REGISTRY_DEFAULT, Foo);
54+
}
55+
56+
void RegisterFooDefaultAgain() {
57+
C10_REGISTER_CLASS_WITH_PRIORITY(
58+
FooRegistry, FooWithPriority, c10::REGISTRY_DEFAULT, Foo);
59+
}
60+
61+
void RegisterFooBarFallback() {
62+
C10_REGISTER_CLASS_WITH_PRIORITY(
63+
FooRegistry, FooWithPriority, c10::REGISTRY_FALLBACK, Bar);
64+
}
65+
66+
void RegisterFooBarPreferred() {
67+
C10_REGISTER_CLASS_WITH_PRIORITY(
68+
FooRegistry, FooWithPriority, c10::REGISTRY_PREFERRED, Bar);
69+
}
70+
71+
TEST(RegistryTest, RegistryPriorities) {
72+
FooRegistry()->SetTerminate(false);
73+
RegisterFooDefault();
74+
75+
// throws because Foo is already registered with default priority
76+
EXPECT_THROW(RegisterFooDefaultAgain(), std::runtime_error);
77+
78+
#ifdef __GXX_RTTI
79+
// not going to register Bar because Foo is registered with Default priority
80+
RegisterFooBarFallback();
81+
std::unique_ptr<Foo> bar1(FooRegistry()->Create("FooWithPriority", 1));
82+
EXPECT_EQ(dynamic_cast<Bar*>(bar1.get()), nullptr);
83+
84+
// will register Bar because of higher priority
85+
RegisterFooBarPreferred();
86+
std::unique_ptr<Foo> bar2(FooRegistry()->Create("FooWithPriority", 1));
87+
EXPECT_NE(dynamic_cast<Bar*>(bar2.get()), nullptr);
88+
#endif
89+
}
90+
4991
} // namespace c10_test

c10/util/Registry.h

Lines changed: 83 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,21 @@
2424
namespace c10 {
2525

2626
template <typename KeyType>
27-
inline void PrintOffendingKey(const KeyType& /*key*/) {
28-
printf("[key type printing not supported]\n");
27+
inline std::string KeyStrRepr(const KeyType& /*key*/) {
28+
return "[key type printing not supported]";
2929
}
3030

3131
template <>
32-
inline void PrintOffendingKey(const std::string& key) {
33-
printf("Offending key: %s.\n", key.c_str());
32+
inline std::string KeyStrRepr(const std::string& key) {
33+
return key;
3434
}
3535

36+
enum RegistryPriority {
37+
REGISTRY_FALLBACK = 1,
38+
REGISTRY_DEFAULT = 2,
39+
REGISTRY_PREFERRED = 3,
40+
};
41+
3642
/**
3743
* @brief A template class that allows one to register classes by keys.
3844
*
@@ -48,9 +54,12 @@ class Registry {
4854
public:
4955
typedef std::function<ObjectPtrType(Args...)> Creator;
5056

51-
Registry() : registry_() {}
57+
Registry() : registry_(), priority_(), terminate_(true) {}
5258

53-
void Register(const SrcType& key, Creator creator) {
59+
void Register(
60+
const SrcType& key,
61+
Creator creator,
62+
const RegistryPriority priority = REGISTRY_DEFAULT) {
5463
std::lock_guard<std::mutex> lock(register_mutex_);
5564
// The if statement below is essentially the same as the following line:
5665
// CHECK_EQ(registry_.count(key), 0) << "Key " << key
@@ -59,18 +68,40 @@ class Registry {
5968
// carried out at static initialization time, we do not want to have an
6069
// explicit dependency on glog's initialization function.
6170
if (registry_.count(key) != 0) {
62-
printf("Key already registered.\n");
63-
PrintOffendingKey(key);
64-
std::exit(1);
71+
auto cur_priority = priority_[key];
72+
if (priority > cur_priority) {
73+
std::string warn_msg =
74+
"Overwriting already registered item for key " + KeyStrRepr(key);
75+
fprintf(stderr, "%s\n", warn_msg.c_str());
76+
registry_[key] = creator;
77+
priority_[key] = priority;
78+
} else if (priority == cur_priority) {
79+
std::string err_msg =
80+
"Key already registered with the same priority: " + KeyStrRepr(key);
81+
fprintf(stderr, "%s\n", err_msg.c_str());
82+
if (terminate_) {
83+
std::exit(1);
84+
} else {
85+
throw std::runtime_error(err_msg);
86+
}
87+
} else {
88+
std::string warn_msg =
89+
"Higher priority item already registered, skipping registration of " +
90+
KeyStrRepr(key);
91+
fprintf(stderr, "%s\n", warn_msg.c_str());
92+
}
93+
} else {
94+
registry_[key] = creator;
95+
priority_[key] = priority;
6596
}
66-
registry_[key] = creator;
6797
}
6898

6999
void Register(
70100
const SrcType& key,
71101
Creator creator,
72-
const std::string& help_msg) {
73-
Register(key, creator);
102+
const std::string& help_msg,
103+
const RegistryPriority priority = REGISTRY_DEFAULT) {
104+
Register(key, creator, priority);
74105
help_message_[key] = help_msg;
75106
}
76107

@@ -109,8 +140,16 @@ class Registry {
109140
return it->second.c_str();
110141
}
111142

143+
// Used for testing, if terminate is unset, Registry throws instead of
144+
// calling std::exit
145+
void SetTerminate(bool terminate) {
146+
terminate_ = terminate;
147+
}
148+
112149
private:
113150
std::unordered_map<SrcType, Creator> registry_;
151+
std::unordered_map<SrcType, RegistryPriority> priority_;
152+
bool terminate_;
114153
std::unordered_map<SrcType, std::string> help_message_;
115154
std::mutex register_mutex_;
116155

@@ -120,14 +159,23 @@ class Registry {
120159
template <class SrcType, class ObjectPtrType, class... Args>
121160
class Registerer {
122161
public:
123-
Registerer(
162+
explicit Registerer(
124163
const SrcType& key,
125164
Registry<SrcType, ObjectPtrType, Args...>* registry,
126165
typename Registry<SrcType, ObjectPtrType, Args...>::Creator creator,
127166
const std::string& help_msg = "") {
128167
registry->Register(key, creator, help_msg);
129168
}
130169

170+
explicit Registerer(
171+
const SrcType& key,
172+
const RegistryPriority priority,
173+
Registry<SrcType, ObjectPtrType, Args...>* registry,
174+
typename Registry<SrcType, ObjectPtrType, Args...>::Creator creator,
175+
const std::string& help_msg = "") {
176+
registry->Register(key, creator, help_msg, priority);
177+
}
178+
131179
template <class DerivedType>
132180
static ObjectPtrType DefaultCreator(Args... args) {
133181
return ObjectPtrType(new DerivedType(args...));
@@ -187,13 +235,27 @@ class Registerer {
187235
static Registerer##RegistryName C10_ANONYMOUS_VARIABLE(g_##RegistryName)( \
188236
key, RegistryName(), ##__VA_ARGS__);
189237

238+
#define C10_REGISTER_TYPED_CREATOR_WITH_PRIORITY( \
239+
RegistryName, key, priority, ...) \
240+
static Registerer##RegistryName C10_ANONYMOUS_VARIABLE(g_##RegistryName)( \
241+
key, priority, RegistryName(), ##__VA_ARGS__);
242+
190243
#define C10_REGISTER_TYPED_CLASS(RegistryName, key, ...) \
191244
static Registerer##RegistryName C10_ANONYMOUS_VARIABLE(g_##RegistryName)( \
192245
key, \
193246
RegistryName(), \
194247
Registerer##RegistryName::DefaultCreator<__VA_ARGS__>, \
195248
::c10::demangle_type<__VA_ARGS__>());
196249

250+
#define C10_REGISTER_TYPED_CLASS_WITH_PRIORITY( \
251+
RegistryName, key, priority, ...) \
252+
static Registerer##RegistryName C10_ANONYMOUS_VARIABLE(g_##RegistryName)( \
253+
key, \
254+
priority, \
255+
RegistryName(), \
256+
Registerer##RegistryName::DefaultCreator<__VA_ARGS__>, \
257+
::c10::demangle_type<__VA_ARGS__>());
258+
197259
// C10_DECLARE_REGISTRY and C10_DEFINE_REGISTRY are hard-wired to use
198260
// std::string as the key type, because that is the most commonly used cases.
199261
#define C10_DECLARE_REGISTRY(RegistryName, ObjectType, ...) \
@@ -218,9 +280,17 @@ class Registerer {
218280
#define C10_REGISTER_CREATOR(RegistryName, key, ...) \
219281
C10_REGISTER_TYPED_CREATOR(RegistryName, #key, __VA_ARGS__)
220282

283+
#define C10_REGISTER_CREATOR_WITH_PRIORITY(RegistryName, key, priority, ...) \
284+
C10_REGISTER_TYPED_CREATOR_WITH_PRIORITY( \
285+
RegistryName, #key, priority, __VA_ARGS__)
286+
221287
#define C10_REGISTER_CLASS(RegistryName, key, ...) \
222288
C10_REGISTER_TYPED_CLASS(RegistryName, #key, __VA_ARGS__)
223289

290+
#define C10_REGISTER_CLASS_WITH_PRIORITY(RegistryName, key, priority, ...) \
291+
C10_REGISTER_TYPED_CLASS_WITH_PRIORITY( \
292+
RegistryName, #key, priority, __VA_ARGS__)
293+
224294
} // namespace c10
225295

226296
#endif // C10_UTIL_REGISTRY_H_

caffe2/contrib/aten/gen_op.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -237,12 +237,7 @@ def find_factory_methods(decls):
237237
}
238238
defined_inferred_type = False
239239

240-
if 'Tensor' in o['method_of']:
241-
# make sure 'self' is the first argument. currently Declarations.yaml
242-
# does not always do this. Instead it keeps the argument list the same order
243-
# as the Type method.
244-
o['arguments'] = self_as_first_argument(o['arguments'])
245-
elif 'namespace' not in o['method_of']:
240+
if 'namespace' not in o['method_of'] and 'Tensor' not in o['method_of']:
246241
# methods on type like 'ones' or 'zeros' always take a
247242
# string attribute that is translated into the at::Type object
248243
# e.g. "Float" is at::kFloat
@@ -289,11 +284,11 @@ def find_factory_methods(decls):
289284
assignment = CT(t).substitute(env, offset=i, output=get_output(o, i))
290285
env['assignments'].append(assignment)
291286

292-
if 'Tensor' in o['method_of']:
287+
if 'namespace' in o['method_of']:
288+
env['invocation'] = CT("at::${name}(${arguments})").substitute(env)
289+
elif 'Tensor' in o['method_of']:
293290
env['invocation'] = "self.{}({})".format(
294291
o['name'], ', '.join(env['arguments'][1:]))
295-
elif 'namespace' in o['method_of']:
296-
env['invocation'] = CT("at::${name}(${arguments})").substitute(env)
297292
else:
298293
assert('Type' in o['method_of'])
299294
env['invocation'] = CT(

caffe2/core/hip/net_async_hip_thread_pool_hip.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ C10_DEFINE_int(
2323

2424
namespace caffe2 {
2525

26-
std::shared_ptr<TaskThreadPool>
26+
std::shared_ptr<TaskThreadPoolBase>
2727
GetAsyncNetHIPThreadPool(int hip_gpu_id, int pool_size, bool create_new) {
2828
// For GPU, use per device thread pools of predefined constant size
2929
if (pool_size != c10::FLAGS_caffe2_threads_per_hip_gpu) {

caffe2/core/net.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ unique_ptr<NetBase> CreateNet(
173173
return net;
174174
}
175175

176-
TaskThreadPool* ExecutorHelper::GetPool(
176+
TaskThreadPoolBase* ExecutorHelper::GetPool(
177177
const DeviceOption& /* unused */) const {
178178
CAFFE_THROW("Not implemented");
179179
}

caffe2/core/net.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ class CAFFE2_API NetBase : public Observable<NetBase> {
130130
class CAFFE2_API ExecutorHelper {
131131
public:
132132
ExecutorHelper() {}
133-
virtual TaskThreadPool* GetPool(const DeviceOption& option) const;
133+
virtual TaskThreadPoolBase* GetPool(const DeviceOption& option) const;
134134
virtual ~ExecutorHelper() {}
135135
};
136136

0 commit comments

Comments
 (0)