goodlux
diff --git a/‎tools/jit/gen_jit_dispatch.py
Lines changed: 5 additions & 4 deletions b/‎tools/jit/gen_jit_dispatch.py
Lines changed: 5 additions & 4 deletions
diff --git a/‎tools/jit/templates/register_aten_ops.cpp
Lines changed: 7 additions & 2 deletions b/‎tools/jit/templates/register_aten_ops.cpp
Lines changed: 7 additions & 2 deletions
diff --git a/‎torch/csrc/jit/autodiff.cpp
Lines changed: 1 addition & 3 deletions b/‎torch/csrc/jit/autodiff.cpp
Lines changed: 1 addition & 3 deletions
diff --git a/‎torch/csrc/jit/graph_executor.cpp
Lines changed: 23 additions & 15 deletions b/‎torch/csrc/jit/graph_executor.cpp
Lines changed: 23 additions & 15 deletions
diff --git a/‎torch/csrc/jit/interned_strings.h
Lines changed: 0 additions & 1 deletion b/‎torch/csrc/jit/interned_strings.h
Lines changed: 0 additions & 1 deletion
diff --git a/‎torch/csrc/jit/interpreter.cpp
Lines changed: 7 additions & 5 deletions b/‎torch/csrc/jit/interpreter.cpp
Lines changed: 7 additions & 5 deletions
diff --git a/‎torch/csrc/jit/interpreter.h
Lines changed: 3 additions & 1 deletion b/‎torch/csrc/jit/interpreter.h
Lines changed: 3 additions & 1 deletion
@@ -88,9 +88,10 @@ def attr_of(jit_type):
 # map from aten 'simple_type' to the function that will turn a tensor into
 # that type
 FROM_TENSOR = {
-    'Device': 'tensor_as<IntList>',
+    'Device': 'tensor_as<std::vector<int64_t>>',
     'ScalarType': 'tensor_as<int64_t>',
     'Layout': 'tensor_as<int64_t>',
+    'IntList': 'tensor_as<std::vector<int64_t>>',
 }
 
 
@@ -107,7 +108,7 @@ def from_tensor(arg):
 """)
 
 POS_ASSIGNMENT = CodeTemplate("""\
-auto ${name} = ${from_tensor}(std::move(peek(stack, ${i}, ${N})));\
+auto ${name} = ${from_tensor}(std::move(peek(stack, ${i}, ${N})).toTensor());\
 """)
 
 CALL_NAMESPACE = CodeTemplate("""\
@@ -261,12 +262,12 @@ def emit_decl_variant(decl, is_positional_arg, has_tensorlist):
                 # NOTE: don't advance real_inputs here. After this we are going
                 # to switch over to indexing from the end as if we only had
                 # the static arguments.
-                arguments.append('peekSlice(stack, {}, varargs_length - {}, varargs_length)'
+                arguments.append('toTensors(peekSlice(stack, {}, varargs_length - {}, varargs_length))'
                                  .format(real_inputs, static_inputs))
             elif arg['simple_type'] in default_only_types:
                 arguments.append(arg['default'])
             elif is_tensor_arg(arg):
-                arguments.append('std::move(peek(stack, {}, {}))'.format(real_inputs, view_length))
+                arguments.append('std::move(peek(stack, {}, {})).toTensor()'.format(real_inputs, view_length))
                 real_inputs += 1
             elif is_positional_arg[i]:
                 template_kwargs = dict(from_tensor=from_tensor(arg),
 
@@ -29,7 +29,6 @@ using autograd::Variable;
 using autograd::variable_list;
 using at::Scalar;
 using at::Tensor;
-using at::IntList;
 using at::TensorList;
 using at::TensorOptions;
 using at::DeviceGuard;
@@ -39,10 +38,16 @@ namespace {
 int deviceForInputs(Stack & stack, size_t N) {
   if(N == 0)
     return -1;
-  auto & t = *(stack.end() - N);
+  auto t = (stack.end() - N)->toTensor();
   return t.type().is_cuda() ? (int) t.get_device() : -1;
 }
 
+std::vector<at::Tensor> toTensors(at::ArrayRef<IValue> ivalues) {
+  return fmap(ivalues, [](const IValue& v) {
+    return v.toTensor();
+  });
+}
+
 template<size_t N>
 std::array<bool, N> as_bool_array(const std::vector<int64_t>& vec) {
   std::array<bool, N> res;
 
@@ -17,7 +17,7 @@ bool hasOneValuedAttribute(Node *n, torch::jit::Symbol name) {
 
 bool isDifferentiable(Node * n) {
   static std::unordered_set<Symbol> differentiable_kinds = {
-    aten::add, aten::sub, aten::mul, prim::Constant, prim::ReplaceIfUndef,
+    aten::add, aten::sub, aten::mul, prim::Constant,
     aten::sigmoid, aten::tanh, aten::mm, aten::chunk, aten::split, aten::t, aten::neg,
     aten::unsqueeze, aten::expand, aten::addmm, aten::gt, aten::lt, aten::eq, aten::ne, aten::ge, aten::le, aten::type_as,
     aten::relu, aten::exp, prim::AutogradAdd
@@ -99,8 +99,6 @@ static std::vector<Value*> gradientForNode(Node* node, ArrayRef<Value*> grad_val
         return {grads.at(0) * inputs.at(1), grads.at(0) * inputs.at(0)};
       case prim::Constant:
         return {};
-      case prim::ReplaceIfUndef:
-        return {grads.at(0), grads.at(0)};
       case aten::sigmoid:
         return {grads.at(0) * outputs.at(0) * (1 - outputs.at(0))};
       case aten::tanh:
 
@@ -21,6 +21,7 @@
 #include "torch/csrc/jit/passes/loop_unrolling.h"
 #include "torch/csrc/jit/passes/lower_grad_of.h"
 #include "torch/csrc/jit/symbolic_variable.h"
+#include "torch/csrc/jit/ivalue.h"
 
 #include "torch/csrc/autograd/edge.h"
 #include "torch/csrc/autograd/function.h"
@@ -72,6 +73,16 @@ struct ExecutionPlanAutogradFunction : public autograd::Function {
 };
 
 
+// helper to run interpreter on variables until we switch
+// everything to IValue
+inline variable_tensor_list runOneStage(const Code & code, variable_tensor_list inputs) {
+  std::vector<IValue> stack(inputs.begin(), inputs.end());
+  InterpreterState(code).runOneStage(stack);
+  return variable_tensor_list(fmap(stack, [](IValue& v) {
+    return std::move(v).toTensor();
+  }));
+}
+
 // an optimized way of executing the subgraph computed directly on
 // tensors rather than Variables.
 // This will unwrap Variables, run the plan, and re-wrap them.
@@ -90,8 +101,7 @@ struct ExecutionPlan {
     if(grad) {
       return runWithGrad(std::move(stack));
     }
-    InterpreterState(f).runOneStage(stack);
-    return stack;
+    return runOneStage(f, std::move(stack));
   }
   std::shared_ptr<Graph> get_graph() const {
     return graph;
@@ -113,14 +123,15 @@ struct ExecutionPlan {
   }
 
 private:
-  // inplace to avoid allocations
-  variable_tensor_list unwrapVariables(variable_tensor_list && list) const {
-    for(auto & v : list) {
-      v = v.defined() ? autograd::as_variable_ref(v).detach() : at::Tensor();
-    }
-    return std::move(list);
+  // note: should be inplace to avoid allocations, but we have to switch from
+  // a list of tensor to a list of ivalues
+  std::vector<IValue> unwrapVariables(variable_tensor_list && list) const {
+    return fmap(list, [](const Variable& v) -> IValue {
+      return v.defined() ? autograd::as_variable_ref(v).detach() : at::Tensor();
+    });
   }
-  // inplace to avoid allocations
+  // note: should be inplace to avoid allocations, but we have to switch from
+  // a list of tensor to a list of ivalues
   variable_tensor_list wrapTensors(tensor_list && list) const {
     for(auto & v : list) {
       v = autograd::make_variable(v, /*requires_grad=*/false);
@@ -152,7 +163,8 @@ struct ExecutionPlan {
 
     auto stack = unwrapVariables(std::move(inputs));
     InterpreterState(f).runOneStage(stack);
-    variable_tensor_list outputs = std::move(stack);
+    variable_tensor_list outputs(
+        fmap(stack, [](IValue& v) { return std::move(v).toTensor(); }));
 
     // hookup the gradients for the output tensors that require gradients
     // to the inputs to our gradient function df
@@ -311,11 +323,7 @@ struct GraphExecutorImpl {
 
   variable_tensor_list runFallback(variable_tensor_list inputs) {
     auto & fb = getOrCreateAutogradFallback();
-    InterpreterState state(fb);
-    auto stack = std::move(inputs);
-    state.runOneStage(stack);
-    // note: we never unwrapped inputs, because we want autograd to record the trace
-    return stack;
+    return runOneStage(fb, std::move(inputs));
   }
 
   static bool calcMayIntroduceGradient(Block* b) {
 
@@ -35,7 +35,6 @@ _(prim, PadPacked) /* onnx */ \
 _(prim, Placeholder) /* debug */ \
 _(prim, Print) \
 _(prim, PythonOp) \
-_(prim, ReplaceIfUndef) \
 _(prim, Reverse) \
 _(prim, Return) \
 _(prim, Store) \
 
@@ -9,6 +9,7 @@
 #include "torch/csrc/jit/graph_executor.h"
 #include "torch/csrc/jit/ir.h"
 #include "torch/csrc/jit/tensor_conversions.h"
+#include "torch/csrc/jit/ivalue.h"
 #include "torch/csrc/variable_tensor_functions.h"
 #include "torch/csrc/autograd/generated/variable_factories.h"
 
@@ -410,7 +411,7 @@ struct CodeImpl {
     JIT_ASSERT(inst.debug_name == prim::Placeholder);
     auto offset = relativeJump(from_inst, to_inst);
     inst.callback = [offset](Stack & stack) {
-      auto t = tensor_as<int64_t>(pop(stack));
+      auto t = tensor_as<int64_t>(pop(stack).toTensor());
       return (t == 0) ? offset : 0;
     };
     inst.debug_name = prim::JumpZ;
@@ -422,7 +423,7 @@ struct CodeImpl {
     JIT_ASSERT(inst.debug_name == prim::Placeholder);
     auto offset = relativeJump(from_inst, to_inst);
     inst.callback = [offset](Stack & stack) {
-      auto t = tensor_as<int64_t>(pop(stack));
+      auto t = tensor_as<int64_t>(pop(stack).toTensor());
       return (t != 0) ? offset : 0;
     };
     inst.debug_name = prim::JumpNZ;
@@ -629,7 +630,8 @@ struct CodeImpl {
     return [=](Stack& stack) mutable {
       autograd::profiler::RecordFunction record("GraphExecutor");
       auto inputs = last(stack, num_inputs);
-      variable_tensor_list tinputs(inputs.begin(), inputs.end());
+      variable_tensor_list tinputs(
+          fmap(inputs, [](const IValue& v) { return v.toTensor(); }));
       drop(stack, num_inputs);
       //TODO: has graph executor work from a stack as well
       variable_tensor_list toutputs = executor->run(variable_tensor_list(std::move(tinputs)));
@@ -774,7 +776,7 @@ struct InterpreterStateImpl {
   // in the case where it is true, then the interpreter and this array get copied
   // if this every becomes a bottleneck then we _should_ consider minimizing the
   // total number or register
-  std::vector<at::Tensor> registers;
+  std::vector<IValue> registers;
 
   // single buffer for input/output calls to ATen functions, so that we do not reallocate
   Stack stack;
@@ -799,7 +801,7 @@ InterpreterState::InterpreterState(const Code & function)
 InterpreterState::~InterpreterState() {}
 
 void InterpreterState::runOneStage(Stack & stack) {
-    return pImpl->runOneStage(stack);
+  return pImpl->runOneStage(stack);
 }
 
 const TensorType & InterpreterState::tensorTypeForInput(size_t i) const {
 
@@ -19,6 +19,8 @@ struct InterpreterStateImpl;
 struct Graph;
 struct Node;
 struct TensorType;
+struct IValue;
+using Stack = std::vector<IValue>;
 
 struct Code {
   Code()
@@ -44,7 +46,7 @@ struct InterpreterState {
   // advance the interpreter state by running one stage. Returning the
   // outputs for that stage, suspending the computation.
   // Call this function again continues computation where it left off.
-  void runOneStage(std::vector<at::Tensor> & stack);
+  void runOneStage(Stack & stack);
   const TensorType & tensorTypeForInput(size_t i) const;
   ~InterpreterState();
   // create a copy of InterpreterState with its current state