Skip to content

Commit e9e3101

Browse files
authored
Merge pull request #79 from iotamudelta/master
Merge from upstream
2 parents cc253d9 + dbff1f3 commit e9e3101

File tree

130 files changed

+1759
-1225
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

130 files changed

+1759
-1225
lines changed

aten/src/ATen/Dispatch.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
AT_PRIVATE_CASE_TYPE(at::ScalarType::Double, double, __VA_ARGS__) \
1818
AT_PRIVATE_CASE_TYPE(at::ScalarType::Float, float, __VA_ARGS__) \
1919
default: \
20-
AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
20+
AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
2121
} \
2222
}()
2323

@@ -27,9 +27,9 @@
2727
switch (the_type.scalarType()) { \
2828
AT_PRIVATE_CASE_TYPE(at::ScalarType::Double, double, __VA_ARGS__) \
2929
AT_PRIVATE_CASE_TYPE(at::ScalarType::Float, float, __VA_ARGS__) \
30-
AT_PRIVATE_CASE_TYPE(at::ScalarType::Half, Half, __VA_ARGS__) \
30+
AT_PRIVATE_CASE_TYPE(at::ScalarType::Half, at::Half, __VA_ARGS__) \
3131
default: \
32-
AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
32+
AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
3333
} \
3434
}()
3535

@@ -43,7 +43,7 @@
4343
AT_PRIVATE_CASE_TYPE(at::ScalarType::Long, int64_t, __VA_ARGS__) \
4444
AT_PRIVATE_CASE_TYPE(at::ScalarType::Short, int16_t, __VA_ARGS__) \
4545
default: \
46-
AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
46+
AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
4747
} \
4848
}()
4949

@@ -59,7 +59,7 @@
5959
AT_PRIVATE_CASE_TYPE(at::ScalarType::Long, int64_t, __VA_ARGS__) \
6060
AT_PRIVATE_CASE_TYPE(at::ScalarType::Short, int16_t, __VA_ARGS__) \
6161
default: \
62-
AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
62+
AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
6363
} \
6464
}()
6565

@@ -74,8 +74,8 @@
7474
AT_PRIVATE_CASE_TYPE(at::ScalarType::Int, int32_t, __VA_ARGS__) \
7575
AT_PRIVATE_CASE_TYPE(at::ScalarType::Long, int64_t, __VA_ARGS__) \
7676
AT_PRIVATE_CASE_TYPE(at::ScalarType::Short, int16_t, __VA_ARGS__) \
77-
AT_PRIVATE_CASE_TYPE(at::ScalarType::Half, Half, __VA_ARGS__) \
77+
AT_PRIVATE_CASE_TYPE(at::ScalarType::Half, at::Half, __VA_ARGS__) \
7878
default: \
79-
AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
79+
AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
8080
} \
8181
}()

aten/src/ATen/native/TensorFactories.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,9 @@ Tensor hamming_window(
581581
double beta,
582582
const TensorOptions& options) {
583583
window_function_checks("hamming_window", options, window_length);
584+
if (window_length == 0) {
585+
return native::empty({0}, options);
586+
}
584587
if (window_length == 1) {
585588
return native::ones({1}, options);
586589
}

binaries/benchmark_helper.cc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,8 @@ void runNetwork(
215215
const bool wipe_cache,
216216
const bool run_individual,
217217
const int warmup,
218-
const int iter) {
218+
const int iter,
219+
const int sleep_before_run) {
219220
if (!net_def.has_name()) {
220221
net_def.set_name("benchmark");
221222
}
@@ -234,6 +235,9 @@ void runNetwork(
234235
if (wipe_cache) {
235236
caffe2::wipe_cache();
236237
}
238+
if (sleep_before_run > 0) {
239+
sleep(sleep_before_run);
240+
}
237241
LOG(INFO) << "Main runs.";
238242
CAFFE_ENFORCE(
239243
iter >= 0,

binaries/benchmark_helper.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,4 +96,5 @@ void runNetwork(
9696
const bool,
9797
const bool,
9898
const int,
99+
const int,
99100
const int);

binaries/caffe2_benchmark.cc

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ CAFFE2_DEFINE_bool(
6262
run_individual,
6363
false,
6464
"Whether to benchmark individual operators.");
65+
CAFFE2_DEFINE_int(
66+
sleep_before_run,
67+
0,
68+
"The seconds to sleep before starting the benchmarking.");
6569
CAFFE2_DEFINE_bool(
6670
text_output,
6771
false,
@@ -115,7 +119,8 @@ int main(int argc, char** argv) {
115119
caffe2::FLAGS_wipe_cache,
116120
caffe2::FLAGS_run_individual,
117121
caffe2::FLAGS_warmup,
118-
caffe2::FLAGS_iter);
122+
caffe2::FLAGS_iter,
123+
caffe2::FLAGS_sleep_before_run);
119124

120125
writeOutput(
121126
workspace,

binaries/predictor_verifier.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
#include "caffe2/core/flags.h"
1818
#include "caffe2/core/init.h"
19-
#include "caffe2/core/predictor.h"
19+
#include "caffe2/predictor/predictor.h"
2020
#include "caffe2/utils/proto_utils.h"
2121

2222
CAFFE2_DEFINE_string(init_net, "", "The given path to the init protobuffer.");

caffe2/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ if(BUILD_CAFFE2)
6565
add_subdirectory(proto)
6666
add_subdirectory(contrib)
6767
add_subdirectory(core)
68+
add_subdirectory(predictor)
6869
add_subdirectory(core/nomnigraph)
6970
add_subdirectory(core/dispatch)
7071
if (USE_NVRTC)

caffe2/core/operator.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -533,10 +533,11 @@ class Operator : public OperatorBase {
533533
return fillers;
534534
}
535535

536-
#define DISABLE_INPUT_FILLERS(Context) \
537-
std::vector<TensorFiller<Context>> InputFillers( \
538-
const std::vector<std::vector<TIndex>>& /* unused */) override { \
539-
throw UnsupportedOperatorFeature("Op does not have input fillers"); \
536+
#define DISABLE_INPUT_FILLERS(Context) \
537+
std::vector<TensorFiller<Context>> InputFillers( \
538+
const std::vector<std::vector<TIndex>>& /* unused */) override { \
539+
throw UnsupportedOperatorFeature( \
540+
OperatorBase::type() + " does not have input fillers"); \
540541
}
541542

542543
void SparseLengthsFillerHelper(
@@ -554,7 +555,8 @@ class Operator : public OperatorBase {
554555
size_t segment_index,
555556
std::vector<TensorFiller<Context>>* fillers) {
556557
CAFFE_ENFORCE_EQ(shapes[segment_index].size(), 1);
557-
// TODO: what would be a proper #segments
558+
// TODO (mnaumov): distribution of value
559+
(*fillers)[value_index].Min(0).Max(shapes[value_index].front() * 2);
558560
(*fillers)[segment_index].SparseSegments(shapes[value_index].front() - 1);
559561
}
560562

caffe2/ideep/operators/operator_fallback_ideep.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
#include <caffe2/operators/collect_and_distribute_fpn_rpn_proposals_op.h>
88
#include <caffe2/operators/conv_transpose_op.h>
99
#include <caffe2/operators/cross_entropy_op.h>
10+
#include <caffe2/operators/ctc_beam_search_decoder_op.h>
11+
#include <caffe2/operators/ctc_greedy_decoder_op.h>
1012
#include <caffe2/operators/dropout_op.h>
1113
#include <caffe2/operators/elementwise_ops.h>
1214
#include <caffe2/operators/filler_op.h>
@@ -112,4 +114,12 @@ REGISTER_IDEEP_OPERATOR(
112114
PRelu,
113115
IDEEPFallbackOp<PReluOp<float, CPUContext>>);
114116

117+
// ctc decoder operators
118+
REGISTER_IDEEP_OPERATOR(
119+
CTCGreedyDecoder,
120+
IDEEPFallbackOp<CTCGreedyDecoderOp<CPUContext>>);
121+
REGISTER_IDEEP_OPERATOR(
122+
CTCBeamSearchDecoder,
123+
IDEEPFallbackOp<CTCBeamSearchDecoderOp<CPUContext>>);
124+
115125
} // namespace caffe2

caffe2/image/image_input_op.h

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -658,8 +658,16 @@ bool ImageInputOp<Context>::GetImageAndLabelAndInfoFromDBValue(
658658
for (int j = 0; j < additional_output_proto.int64_data_size(); ++j) {
659659
additional_output[j] = additional_output_proto.int64_data(j);
660660
}
661-
}
662-
else {
661+
} else if (additional_output_proto.data_type() == TensorProto::UINT8) {
662+
uint8_t* additional_output =
663+
prefetched_additional_outputs_[i].template mutable_data<uint8_t>() +
664+
item_id * additional_output_proto.int32_data_size();
665+
666+
for (int j = 0; j < additional_output_proto.int32_data_size(); ++j) {
667+
additional_output[j] =
668+
static_cast<uint8_t>(additional_output_proto.int32_data(j));
669+
}
670+
} else {
663671
LOG(FATAL) << "Unsupported output type.";
664672
}
665673
}
@@ -1148,6 +1156,9 @@ bool ImageInputOp<Context>::Prefetch() {
11481156
} else if (
11491157
additional_output_proto.data_type() == TensorProto::INT64) {
11501158
prefetched_additional_outputs_[i].template mutable_data<int64_t>();
1159+
} else if (
1160+
additional_output_proto.data_type() == TensorProto::UINT8) {
1161+
prefetched_additional_outputs_[i].template mutable_data<uint8_t>();
11511162
} else {
11521163
LOG(FATAL) << "Unsupported output type.";
11531164
}

caffe2/mobile/contrib/ios/ios_caffe.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11

22
#include "ios_caffe.h"
3-
#include "caffe2/core/predictor.h"
43
#include "caffe2/core/tensor.h"
54
#include "caffe2/mobile/contrib/ios/ios_caffe_predictor.h"
5+
#include "caffe2/predictor/predictor.h"
66

77
Caffe2IOSPredictor* MakeCaffe2Predictor(const std::string& init_net_str,
88
const std::string& predict_net_str,

caffe2/mobile/contrib/ios/ios_caffe.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33

44
#include <string>
55
#include <vector>
6-
#include "caffe2/core/predictor.h"
76
#include "caffe2/mobile/contrib/ios/ios_caffe_defines.h"
87
#include "caffe2/mobile/contrib/ios/ios_caffe_predictor.h"
8+
#include "caffe2/predictor/predictor.h"
99

1010
extern "C" {
1111

caffe2/mobile/contrib/ios/ios_caffe_predictor.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33

44
#include <string>
55
#include "caffe2/core/net.h"
6-
#include "caffe2/core/predictor.h"
76
#include "caffe2/mobile/contrib/ios/ios_caffe_defines.h"
7+
#include "caffe2/predictor/predictor.h"
88

99
struct Tensor {
1010
std::vector<int64_t> dims;

caffe2/mobile/contrib/opengl/core/GLPredictor.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
#include "GLImage.h"
55
#include "caffe2/core/net.h"
6-
#include "caffe2/core/predictor.h"
6+
#include "caffe2/predictor/predictor.h"
77

88
namespace caffe2 {
99
class GLPredictor : public Predictor {

caffe2/mobile/contrib/opengl/core/rewrite_net.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11

22
#pragma once
33
#include "GLPredictor.h"
4-
#include "caffe2/core/predictor.h"
4+
#include "caffe2/predictor/predictor.h"
55

66
namespace caffe2 {
77
bool tryConvertToOpenGL(const NetDef& initNet,

caffe2/onnx/backend_rep.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#pragma once
22

3-
#include "caffe2/core/predictor.h"
3+
#include "caffe2/predictor/predictor.h"
44
#include "caffe2/proto/caffe2.pb.h"
55

66
#include <memory>

caffe2/operators/lengths_reducer_fused_8bit_rowwise_ops.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,21 @@ class SparseLengthsFused8BitRowwiseOp : public Operator<Context> {
6868
return true;
6969
}
7070

71-
USE_VALUE_KEY_LENGTH_INPUT_FILLERS(Context, DATA, INDICES, LENGTHS)
71+
std::vector<TensorFiller<Context>> InputFillers(
72+
const std::vector<std::vector<TIndex>>& shapes) override {
73+
CAFFE_ENFORCE_EQ(shapes.size(), Operator<Context>::Inputs().size());
74+
auto fillers = Operator<Context>::InputFillers(shapes);
75+
if (with_weights) {
76+
// TODO: enable the fillers
77+
throw UnsupportedOperatorFeature(
78+
OperatorBase::type() + " does not have input fillers");
79+
}
80+
Operator<Context>::SparseLengthsFillerHelper(
81+
shapes, INDICES, LENGTHS, &fillers);
82+
Operator<Context>::SparseSegmentsFillerHelper(
83+
shapes, DATA, INDICES, &fillers);
84+
return fillers;
85+
}
7286

7387
private:
7488
enum {

caffe2/operators/lengths_reducer_ops.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,21 @@ class CPUSparseLengthsReductionOp : public Operator<CPUContext> {
9292
return true;
9393
}
9494

95-
USE_VALUE_KEY_LENGTH_INPUT_FILLERS(CPUContext, DATA, INDICES, LENGTHS)
95+
std::vector<TensorFiller<CPUContext>> InputFillers(
96+
const std::vector<std::vector<TIndex>>& shapes) override {
97+
CAFFE_ENFORCE_EQ(shapes.size(), Operator<CPUContext>::Inputs().size());
98+
auto fillers = Operator<CPUContext>::InputFillers(shapes);
99+
if (USE_WEIGHT) {
100+
// TODO: enable the fillers
101+
throw UnsupportedOperatorFeature(
102+
OperatorBase::type() + " does not have input fillers");
103+
}
104+
Operator<CPUContext>::SparseLengthsFillerHelper(
105+
shapes, INDICES, LENGTHS, &fillers);
106+
Operator<CPUContext>::SparseSegmentsFillerHelper(
107+
shapes, DATA, INDICES, &fillers);
108+
return fillers;
109+
}
96110

97111
private:
98112
enum {

caffe2/operators/one_hot_ops.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,9 @@ class SegmentOneHotOp : public Operator<CPUContext> {
172172
SegmentOneHotOp(const OperatorDef& operator_def, Workspace* ws)
173173
: Operator(operator_def, ws) {}
174174

175+
// TODO: enable input filler
176+
DISABLE_INPUT_FILLERS(CPUContext)
177+
175178
bool RunOnDevice() override {
176179
auto& lengths = Input(0);
177180
auto& indices = Input(1);

caffe2/operators/one_hot_ops.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ class OneHotOp final : public Operator<Context> {
1313
public:
1414
USE_OPERATOR_CONTEXT_FUNCTIONS;
1515

16+
// TODO: enable input filler
17+
DISABLE_INPUT_FILLERS(Context)
18+
1619
OneHotOp(const OperatorDef& operator_def, Workspace* ws)
1720
: Operator<Context>(operator_def, ws) {}
1821

@@ -58,6 +61,8 @@ class BatchOneHotOp final : public Operator<Context> {
5861
BatchOneHotOp(const OperatorDef& operator_def, Workspace* ws)
5962
: Operator<Context>(operator_def, ws) {}
6063

64+
USE_VALUE_KEY_LENGTH_INPUT_FILLERS(Context, X, VALS, LENS)
65+
6166
bool RunOnDevice() override {
6267
return DispatchHelper<TensorTypes<int32_t, int64_t>>::call(this, Input(X));
6368
}
@@ -83,6 +88,9 @@ class BatchBucketOneHotOp final : public Operator<Context> {
8388

8489
bool RunOnDevice() override;
8590

91+
// TODO: enable input filler
92+
DISABLE_INPUT_FILLERS(Context)
93+
8694
protected:
8795
INPUT_TAGS(X, LENS, BOUNDARIES);
8896
OUTPUT_TAGS(ONE_HOT);

caffe2/operators/order_switch_ops.cc

Lines changed: 9 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,10 @@ bool NHWC2NCHWOp<float, CPUContext>::RunOnDevice() {
1010
const int N = X.dim32(0), H = X.dim32(1), W = X.dim32(2), C = X.dim32(3);
1111
Y->Resize(N, C, H, W);
1212
const float* Xdata = X.data<float>();
13-
float* Ydata = Y->mutable_data<float>();
14-
for (int n = 0; n < N; ++n) {
15-
for (int h = 0; h < H; ++h) {
16-
for (int w = 0; w < W; ++w) {
17-
for (int c = 0; c < C; ++c) {
18-
Ydata[((n * C + c) * H + h) * W + w] = *(Xdata++);
19-
}
20-
}
21-
}
22-
}
13+
float* Ydata = Y->template mutable_data<float>();
14+
std::array<int, 4> dims = {N, H, W, C};
15+
std::array<int, 4> axes = {0, 3, 1, 2};
16+
math::Transpose(4, dims.data(), axes.data(), Xdata, Ydata, &context_);
2317
return true;
2418
}
2519

@@ -31,20 +25,13 @@ bool NCHW2NHWCOp<float, CPUContext>::RunOnDevice() {
3125
const int N = X.dim32(0), C = X.dim32(1), H = X.dim32(2), W = X.dim32(3);
3226
Y->Resize(N, H, W, C);
3327
const float* Xdata = X.data<float>();
34-
float* Ydata = Y->mutable_data<float>();
35-
for (int n = 0; n < N; ++n) {
36-
for (int c = 0; c < C; ++c) {
37-
for (int h = 0; h < H; ++h) {
38-
for (int w = 0; w < W; ++w) {
39-
Ydata[((n * H + h) * W + w) * C + c] = *(Xdata++);
40-
}
41-
}
42-
}
43-
}
28+
float* Ydata = Y->template mutable_data<float>();
29+
std::array<int, 4> dims = {N, C, H, W};
30+
std::array<int, 4> axes = {0, 2, 3, 1};
31+
math::Transpose(4, dims.data(), axes.data(), Xdata, Ydata, &context_);
4432
return true;
4533
}
4634

47-
4835
REGISTER_CPU_OPERATOR(NHWC2NCHW, NHWC2NCHWOp<float, CPUContext>);
4936
REGISTER_CPU_OPERATOR(NCHW2NHWC, NCHW2NHWCOp<float, CPUContext>);
5037

@@ -102,4 +89,4 @@ class GetNCHW2NHWCGradient : public GradientMakerBase {
10289
}
10390
};
10491
REGISTER_GRADIENT(NCHW2NHWC, GetNCHW2NHWCGradient);
105-
} // namespace caffe2
92+
} // namespace caffe2

0 commit comments

Comments
 (0)