goodlux
diff --git a/‎caffe2/core/dispatch/DispatchTable.h
Lines changed: 1 addition & 0 deletions b/‎caffe2/core/dispatch/DispatchTable.h
Lines changed: 1 addition & 0 deletions
diff --git a/‎caffe2/core/dispatch/KernelRegistration.h
Lines changed: 5 additions & 5 deletions b/‎caffe2/core/dispatch/KernelRegistration.h
Lines changed: 5 additions & 5 deletions
diff --git a/‎caffe2/core/operator_c10wrapper.h
Lines changed: 288 additions & 10 deletions b/‎caffe2/core/operator_c10wrapper.h
Lines changed: 288 additions & 10 deletions
diff --git a/‎caffe2/operators/CMakeLists.txt
Lines changed: 2 additions & 0 deletions b/‎caffe2/operators/CMakeLists.txt
Lines changed: 2 additions & 0 deletions
@@ -27,6 +27,7 @@ class ThreadsafeOperatorTable_ final {
     });
     if (!res) {
       std::ostringstream msg;
+      using ::operator<<;
       msg << "Tried to register conflicting kernels to the dispatcher: " << key;
       throw std::logic_error(msg.str());
     }
 
@@ -2,7 +2,7 @@
 
 #include "caffe2/core/dispatch/OpSchema.h"
 #include "caffe2/core/dispatch/Dispatcher.h"
-#include "caffe2/utils/Optional.h"
+#include <ATen/core/optional.h>
 
 /**
  * To register your own kernel for an operator, do in one (!) cpp file:
@@ -89,13 +89,13 @@ class KernelRegistrationBuilder final {
   static constexpr uint64_t KERNEL_PRESENT = 0x01 << 0;
   static constexpr uint64_t DISPATCH_KEY_PRESENT = 0x01 << 1;
 
-  optional<typename Schema::signature::func_type*> kernel_;
-  optional<typename Schema::dispatch::dispatch_key_type> dispatch_key_;
+  at::optional<typename Schema::signature::func_type*> kernel_;
+  at::optional<typename Schema::dispatch::dispatch_key_type> dispatch_key_;
 
 public:
-  constexpr KernelRegistrationBuilder(): KernelRegistrationBuilder(nullopt, nullopt) {}
+  constexpr KernelRegistrationBuilder(): KernelRegistrationBuilder(at::nullopt, at::nullopt) {}
 
-  constexpr KernelRegistrationBuilder(optional<typename Schema::signature::func_type*> kernel, optional<typename Schema::dispatch::dispatch_key_type> dispatch_key)
+  constexpr KernelRegistrationBuilder(at::optional<typename Schema::signature::func_type*> kernel, at::optional<typename Schema::dispatch::dispatch_key_type> dispatch_key)
   : kernel_(std::move(kernel)), dispatch_key_(std::move(dispatch_key)) {}
 
   /**
 
@@ -2,9 +2,29 @@
 
 #include "caffe2/core/dispatch/Dispatcher.h"
 #include "caffe2/core/operator.h"
+#include <ATen/core/ArrayRef.h>
+#include "caffe2/utils/Metaprogramming.h"
 
 namespace caffe2 {
 
+namespace details {
+template <size_t...>
+struct true_t : std::true_type {};
+template <class State>
+inline std::shared_ptr<State> init_state() {
+  return std::make_shared<State>();
+}
+template <>
+inline std::shared_ptr<void> init_state<void>() {
+  return std::shared_ptr<void>();
+}
+template <class T>
+using is_output_arg = std::is_same<Tensor*, T>;
+template <class ParameterDef>
+using extract_type_t =
+    c10::guts::result_of_t<decltype (&ParameterDef::parse)(ArgumentHelper)>;
+} // namespace details
+
 /**
  * To make a c10 operator "C10Add" callable from caffe2 as "C2MyAddOpName", just
  * write
@@ -16,26 +36,251 @@ namespace caffe2 {
  * TODO: Figure out a better way to handle output parameters
  */
 
-template <class OpSchemaDef, class Context>
+template <
+    class OpSchemaDef,
+    class Context,
+    class State,
+    bool use_array_input,
+    class ParameterDefTuple>
 class C10OperatorWrapper final : public Operator<Context> {
   using Schema = c10::OpSchema<OpSchemaDef>;
 
  public:
-  C10OperatorWrapper(const OperatorDef& operator_def, Workspace* ws)
-      : Operator<Context>(operator_def, ws) {}
+  static_assert(
+      c10::guts::is_instantiation_of<std::tuple, ParameterDefTuple>::value,
+      "");
+  using ParameterTuple =
+      c10::guts::typelist::to_tuple_t<c10::guts::typelist::map_t<
+          details::extract_type_t,
+          c10::guts::typelist::from_tuple_t<ParameterDefTuple>>>;
 
   USE_OPERATOR_CONTEXT_FUNCTIONS;
 
+  static constexpr bool op_has_context_argument = std::is_same<
+      BaseContext*,
+      c10::guts::typelist::last_t<
+          typename Schema::signature::parameter_types>>::value;
+  static constexpr bool op_has_state_argument =
+      !std::is_same<void, State>::value;
+
+  C10OperatorWrapper(const OperatorDef& operator_def, Workspace* ws)
+      : Operator<Context>(operator_def, ws),
+        state_(details::init_state<State>()),
+        parameters_(parse_parameters_(
+            operator_def,
+            c10::guts::make_index_sequence<num_parameters()>())) {}
+
+  static constexpr size_t num_inputs() {
+    return Schema::signature::num_args - num_outputs() - num_parameters() -
+        (op_has_context_argument ? 1 : 0) - (op_has_state_argument ? 1 : 0);
+  }
+
+  static constexpr size_t num_parameters() {
+    return std::tuple_size<ParameterDefTuple>::value;
+  }
+
+  static constexpr size_t num_outputs() {
+    return c10::guts::typelist::count_if<
+        details::is_output_arg,
+        typename Schema::signature::parameter_types>::value;
+  }
+
   bool RunOnDevice() override {
     RunOnDevice_(
-        c10::guts::make_index_sequence<Schema::signature::num_args - 1>());
+        c10::guts::make_index_sequence<num_inputs()>(),
+        c10::guts::make_index_sequence<num_outputs()>(),
+        c10::guts::make_index_sequence<num_parameters()>());
     return true;
   }
 
  private:
-  template <size_t... InputIndex>
-  void RunOnDevice_(c10::guts::index_sequence<InputIndex...>) {
-    c10::Dispatcher<OpSchemaDef>::call(Input(InputIndex)..., Output(0));
+  template <size_t... ParameterIndex>
+  ParameterTuple parse_parameters_(
+      const OperatorDef& operator_def,
+      c10::guts::index_sequence<ParameterIndex...>) {
+    return ParameterTuple{Parameter<ParameterIndex>(operator_def)...};
+  }
+
+  template <size_t Index>
+  details::extract_type_t<
+      typename std::tuple_element<Index, ParameterDefTuple>::type>
+  Parameter(const OperatorDef& operator_def) {
+    using Parameter =
+        typename std::tuple_element<Index, ParameterDefTuple>::type;
+    return Parameter::parse(ArgumentHelper(operator_def));
+  }
+
+  template <
+      size_t... InputIndex,
+      size_t... OutputIndex,
+      size_t... ParameterIndex>
+  c10::guts::enable_if_t<
+      details::true_t<InputIndex...>::value && op_has_context_argument &&
+          op_has_state_argument && !use_array_input,
+      void>
+  RunOnDevice_(
+      c10::guts::index_sequence<InputIndex...>,
+      c10::guts::index_sequence<OutputIndex...>,
+      c10::guts::index_sequence<ParameterIndex...>) {
+    c10::Dispatcher<OpSchemaDef>::call(
+        Input(InputIndex)...,
+        Output(OutputIndex)...,
+        std::get<ParameterIndex>(parameters_)...,
+        state_.get(),
+        static_cast<BaseContext*>(&context_));
+  }
+
+  template <
+      size_t... InputIndex,
+      size_t... OutputIndex,
+      size_t... ParameterIndex>
+  c10::guts::enable_if_t<
+      details::true_t<InputIndex...>::value && op_has_context_argument &&
+          !op_has_state_argument && !use_array_input,
+      void>
+  RunOnDevice_(
+      c10::guts::index_sequence<InputIndex...>,
+      c10::guts::index_sequence<OutputIndex...>,
+      c10::guts::index_sequence<ParameterIndex...>) {
+    c10::Dispatcher<OpSchemaDef>::call(
+        Input(InputIndex)...,
+        Output(OutputIndex)...,
+        std::get<ParameterIndex>(parameters_)...,
+        static_cast<BaseContext*>(&context_));
+  }
+
+  template <
+      size_t... InputIndex,
+      size_t... OutputIndex,
+      size_t... ParameterIndex>
+  c10::guts::enable_if_t<
+      details::true_t<InputIndex...>::value && !op_has_context_argument &&
+          op_has_state_argument && !use_array_input,
+      void>
+  RunOnDevice_(
+      c10::guts::index_sequence<InputIndex...>,
+      c10::guts::index_sequence<OutputIndex...>,
+      c10::guts::index_sequence<ParameterIndex...>) {
+    c10::Dispatcher<OpSchemaDef>::call(
+        Input(InputIndex)...,
+        Output(OutputIndex)...,
+        std::get<ParameterIndex>(parameters_)...,
+        state_.get());
+  }
+
+  template <
+      size_t... InputIndex,
+      size_t... OutputIndex,
+      size_t... ParameterIndex>
+  c10::guts::enable_if_t<
+      details::true_t<InputIndex...>::value && !op_has_context_argument &&
+          !op_has_state_argument && !use_array_input,
+      void>
+  RunOnDevice_(
+      c10::guts::index_sequence<InputIndex...>,
+      c10::guts::index_sequence<OutputIndex...>,
+      c10::guts::index_sequence<ParameterIndex...>) {
+    c10::Dispatcher<OpSchemaDef>::call(
+        Input(InputIndex)...,
+        Output(OutputIndex)...,
+        std::get<ParameterIndex>(parameters_)...);
+  }
+
+  template <
+      size_t... InputIndex,
+      size_t... OutputIndex,
+      size_t... ParameterIndex>
+  c10::guts::enable_if_t<
+      details::true_t<InputIndex...>::value && op_has_context_argument &&
+          op_has_state_argument && use_array_input,
+      void>
+  RunOnDevice_(
+      c10::guts::index_sequence<InputIndex...>,
+      c10::guts::index_sequence<OutputIndex...>,
+      c10::guts::index_sequence<ParameterIndex...>) {
+    c10::Dispatcher<OpSchemaDef>::call(
+        at::ArrayRef<const Tensor*>(array_inputs_()),
+        Output(OutputIndex)...,
+        std::get<ParameterIndex>(parameters_)...,
+        state_.get(),
+        static_cast<BaseContext*>(&context_));
+  }
+
+  template <
+      size_t... InputIndex,
+      size_t... OutputIndex,
+      size_t... ParameterIndex>
+  c10::guts::enable_if_t<
+      details::true_t<InputIndex...>::value && op_has_context_argument &&
+          !op_has_state_argument && use_array_input,
+      void>
+  RunOnDevice_(
+      c10::guts::index_sequence<InputIndex...>,
+      c10::guts::index_sequence<OutputIndex...>,
+      c10::guts::index_sequence<ParameterIndex...>) {
+    c10::Dispatcher<OpSchemaDef>::call(
+        at::ArrayRef<const Tensor*>(array_inputs_()),
+        Output(OutputIndex)...,
+        std::get<ParameterIndex>(parameters_)...,
+        static_cast<BaseContext*>(&context_));
+  }
+
+  template <
+      size_t... InputIndex,
+      size_t... OutputIndex,
+      size_t... ParameterIndex>
+  c10::guts::enable_if_t<
+      details::true_t<InputIndex...>::value && !op_has_context_argument &&
+          op_has_state_argument && use_array_input,
+      void>
+  RunOnDevice_(
+      c10::guts::index_sequence<InputIndex...>,
+      c10::guts::index_sequence<OutputIndex...>,
+      c10::guts::index_sequence<ParameterIndex...>) {
+    c10::Dispatcher<OpSchemaDef>::call(
+        at::ArrayRef<const Tensor*>(array_inputs_()),
+        Output(OutputIndex)...,
+        std::get<ParameterIndex>(parameters_)...,
+        state_.get());
+  }
+
+  template <
+      size_t... InputIndex,
+      size_t... OutputIndex,
+      size_t... ParameterIndex>
+  c10::guts::enable_if_t<
+      details::true_t<InputIndex...>::value && !op_has_context_argument &&
+          !op_has_state_argument && use_array_input,
+      void>
+  RunOnDevice_(
+      c10::guts::index_sequence<InputIndex...>,
+      c10::guts::index_sequence<OutputIndex...>,
+      c10::guts::index_sequence<ParameterIndex...>) {
+    c10::Dispatcher<OpSchemaDef>::call(
+        at::ArrayRef<const Tensor*>(array_inputs_()),
+        Output(OutputIndex)...,
+        std::get<ParameterIndex>(parameters_)...);
+  }
+
+  std::vector<const Tensor*> array_inputs_() {
+    std::vector<const Tensor*> result;
+    result.reserve(InputSize());
+    for (size_t i = 0; i < InputSize(); ++i) {
+      result.push_back(&Input(i));
+    }
+    return result;
+  }
+
+  std::shared_ptr<State> state_;
+
+  ParameterTuple parameters_;
+};
+
+template <class ParameterDef>
+struct ParameterHelper final {
+  static typename ParameterDef::type parse(const ArgumentHelper& helper) {
+    return helper.GetSingleArgument<typename ParameterDef::type>(
+        ParameterDef::name(), ParameterDef::default_value());
   }
 };
 
@@ -47,8 +292,41 @@ CAFFE_DECLARE_REGISTRY(
 
 // TODO Currently we only register the CPU variant. This is going to be fixed
 //      once the tensor detemplatization lands.
-#define REGISTER_C10_OPERATOR_FOR_CAFFE2_DISPATCH(OpSchemaDef, Name) \
-  CAFFE_REGISTER_CLASS(                                              \
-      C10OperatorRegistry, Name, C10OperatorWrapper<OpSchemaDef, CPUContext>)
+#define REGISTER_C10_OPERATOR_FOR_CAFFE2_DISPATCH(OpSchemaDef, State, Name) \
+  CAFFE_REGISTER_CLASS(                                                     \
+      C10OperatorRegistry,                                                  \
+      Name,                                                                 \
+      C10OperatorWrapper<OpSchemaDef, CPUContext, State, false, std::tuple<>>)
+
+#define REGISTER_C10_OPERATOR_FOR_CAFFE2_DISPATCH_WITH_PARAMETERS( \
+    OpSchemaDef, State, Name, ...)                                 \
+  CAFFE_REGISTER_CLASS(                                            \
+      C10OperatorRegistry,                                         \
+      Name,                                                        \
+      C10OperatorWrapper<                                          \
+          OpSchemaDef,                                             \
+          CPUContext,                                              \
+          State,                                                   \
+          false,                                                   \
+          std::tuple<__VA_ARGS__>>)
+
+#define REGISTER_C10_OPERATOR_FOR_CAFFE2_DISPATCH_WITH_ARRAY_INPUT( \
+    OpSchemaDef, State, Name)                                       \
+  CAFFE_REGISTER_CLASS(                                             \
+      C10OperatorRegistry,                                          \
+      Name,                                                         \
+      C10OperatorWrapper<OpSchemaDef, CPUContext, State, true, std::tuple<>>)
+
+#define REGISTER_C10_OPERATOR_FOR_CAFFE2_DISPATCH_WITH_ARRAY_INPUT_AND_PARAMETERS( \
+    OpSchemaDef, State, Name, ...)                                                 \
+  CAFFE_REGISTER_CLASS(                                                            \
+      C10OperatorRegistry,                                                         \
+      Name,                                                                        \
+      C10OperatorWrapper<                                                          \
+          OpSchemaDef,                                                             \
+          CPUContext,                                                              \
+          State,                                                                   \
+          true,                                                                    \
+          std::tuple<__VA_ARGS__>>)
 
 } // namespace caffe2
@@ -40,6 +40,8 @@ file(GLOB tmp *.cc)
 file(GLOB tmp_cudnn *_cudnn.cc)
 exclude(tmp "${tmp}" ${tmp_cudnn})
 set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} ${tmp})
+file(GLOB_RECURSE tmp c10/*.cc)
+set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} ${tmp})
 # exclude test files and gpu files
 file(GLOB tmp *_test.cc)
 exclude(Caffe2_CPU_SRCS "${Caffe2_CPU_SRCS}" ${tmp})
Original file line number	Diff line number	Diff line change
`@@ -27,6 +27,7 @@ class ThreadsafeOperatorTable_ final {`
`27`	`27`	`});`
`28`	`28`	`if (!res) {`
`29`	`29`	`std::ostringstream msg;`
	`30`	`+ using ::operator<<;`
`30`	`31`	`msg << "Tried to register conflicting kernels to the dispatcher: " << key;`
`31`	`32`	`throw std::logic_error(msg.str());`
`32`	`33`	`}`