Skip to content

Commit e66ad2e

Browse files
zdevitoRob Kunkle
authored and
Rob Kunkle
committed
Add a tagged union type that replaces tensor in the interpreter. (pytorch#9368)
Summary: IValue is short for interpreter value. It is used frequently so a short name is important. This will allow us to implement more non-tensor types in an efficient way and remove many hacks from the compiler. This PR is limited. It only introduces IValue and changes interpreter to use it. Follow up PRs will: * Change the way aten_ops consume non-tensor types so that integer lists, are no longer represented as Tensors. * Introduce TensorList as a fundamental type and remove all vararg handling in gen_jit_dispatch * Change the compiler to implement math on primitive numbers rather than converting to tensors. jamesr66a apaszke Pull Request resolved: pytorch#9368 Reviewed By: ezyang Differential Revision: D8817598 Pulled By: zdevito fbshipit-source-id: 29dce80611ce5f6384234de9d12a67861d2b112f
1 parent bbfe77e commit e66ad2e

16 files changed

+422
-87
lines changed

tools/jit/gen_jit_dispatch.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,10 @@ def attr_of(jit_type):
8888
# map from aten 'simple_type' to the function that will turn a tensor into
8989
# that type
9090
FROM_TENSOR = {
91-
'Device': 'tensor_as<IntList>',
91+
'Device': 'tensor_as<std::vector<int64_t>>',
9292
'ScalarType': 'tensor_as<int64_t>',
9393
'Layout': 'tensor_as<int64_t>',
94+
'IntList': 'tensor_as<std::vector<int64_t>>',
9495
}
9596

9697

@@ -107,7 +108,7 @@ def from_tensor(arg):
107108
""")
108109

109110
POS_ASSIGNMENT = CodeTemplate("""\
110-
auto ${name} = ${from_tensor}(std::move(peek(stack, ${i}, ${N})));\
111+
auto ${name} = ${from_tensor}(std::move(peek(stack, ${i}, ${N})).toTensor());\
111112
""")
112113

113114
CALL_NAMESPACE = CodeTemplate("""\
@@ -261,12 +262,12 @@ def emit_decl_variant(decl, is_positional_arg, has_tensorlist):
261262
# NOTE: don't advance real_inputs here. After this we are going
262263
# to switch over to indexing from the end as if we only had
263264
# the static arguments.
264-
arguments.append('peekSlice(stack, {}, varargs_length - {}, varargs_length)'
265+
arguments.append('toTensors(peekSlice(stack, {}, varargs_length - {}, varargs_length))'
265266
.format(real_inputs, static_inputs))
266267
elif arg['simple_type'] in default_only_types:
267268
arguments.append(arg['default'])
268269
elif is_tensor_arg(arg):
269-
arguments.append('std::move(peek(stack, {}, {}))'.format(real_inputs, view_length))
270+
arguments.append('std::move(peek(stack, {}, {})).toTensor()'.format(real_inputs, view_length))
270271
real_inputs += 1
271272
elif is_positional_arg[i]:
272273
template_kwargs = dict(from_tensor=from_tensor(arg),

tools/jit/templates/register_aten_ops.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ using autograd::Variable;
2929
using autograd::variable_list;
3030
using at::Scalar;
3131
using at::Tensor;
32-
using at::IntList;
3332
using at::TensorList;
3433
using at::TensorOptions;
3534
using at::DeviceGuard;
@@ -39,10 +38,16 @@ namespace {
3938
int deviceForInputs(Stack & stack, size_t N) {
4039
if(N == 0)
4140
return -1;
42-
auto & t = *(stack.end() - N);
41+
auto t = (stack.end() - N)->toTensor();
4342
return t.type().is_cuda() ? (int) t.get_device() : -1;
4443
}
4544

45+
std::vector<at::Tensor> toTensors(at::ArrayRef<IValue> ivalues) {
46+
return fmap(ivalues, [](const IValue& v) {
47+
return v.toTensor();
48+
});
49+
}
50+
4651
template<size_t N>
4752
std::array<bool, N> as_bool_array(const std::vector<int64_t>& vec) {
4853
std::array<bool, N> res;

torch/csrc/jit/autodiff.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ bool hasOneValuedAttribute(Node *n, torch::jit::Symbol name) {
1717

1818
bool isDifferentiable(Node * n) {
1919
static std::unordered_set<Symbol> differentiable_kinds = {
20-
aten::add, aten::sub, aten::mul, prim::Constant, prim::ReplaceIfUndef,
20+
aten::add, aten::sub, aten::mul, prim::Constant,
2121
aten::sigmoid, aten::tanh, aten::mm, aten::chunk, aten::split, aten::t, aten::neg,
2222
aten::unsqueeze, aten::expand, aten::addmm, aten::gt, aten::lt, aten::eq, aten::ne, aten::ge, aten::le, aten::type_as,
2323
aten::relu, aten::exp, prim::AutogradAdd
@@ -99,8 +99,6 @@ static std::vector<Value*> gradientForNode(Node* node, ArrayRef<Value*> grad_val
9999
return {grads.at(0) * inputs.at(1), grads.at(0) * inputs.at(0)};
100100
case prim::Constant:
101101
return {};
102-
case prim::ReplaceIfUndef:
103-
return {grads.at(0), grads.at(0)};
104102
case aten::sigmoid:
105103
return {grads.at(0) * outputs.at(0) * (1 - outputs.at(0))};
106104
case aten::tanh:

torch/csrc/jit/graph_executor.cpp

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "torch/csrc/jit/passes/loop_unrolling.h"
2222
#include "torch/csrc/jit/passes/lower_grad_of.h"
2323
#include "torch/csrc/jit/symbolic_variable.h"
24+
#include "torch/csrc/jit/ivalue.h"
2425

2526
#include "torch/csrc/autograd/edge.h"
2627
#include "torch/csrc/autograd/function.h"
@@ -72,6 +73,16 @@ struct ExecutionPlanAutogradFunction : public autograd::Function {
7273
};
7374

7475

76+
// helper to run interpreter on variables until we switch
77+
// everything to IValue
78+
inline variable_tensor_list runOneStage(const Code & code, variable_tensor_list inputs) {
79+
std::vector<IValue> stack(inputs.begin(), inputs.end());
80+
InterpreterState(code).runOneStage(stack);
81+
return variable_tensor_list(fmap(stack, [](IValue& v) {
82+
return std::move(v).toTensor();
83+
}));
84+
}
85+
7586
// an optimized way of executing the subgraph computed directly on
7687
// tensors rather than Variables.
7788
// This will unwrap Variables, run the plan, and re-wrap them.
@@ -90,8 +101,7 @@ struct ExecutionPlan {
90101
if(grad) {
91102
return runWithGrad(std::move(stack));
92103
}
93-
InterpreterState(f).runOneStage(stack);
94-
return stack;
104+
return runOneStage(f, std::move(stack));
95105
}
96106
std::shared_ptr<Graph> get_graph() const {
97107
return graph;
@@ -113,14 +123,15 @@ struct ExecutionPlan {
113123
}
114124

115125
private:
116-
// inplace to avoid allocations
117-
variable_tensor_list unwrapVariables(variable_tensor_list && list) const {
118-
for(auto & v : list) {
119-
v = v.defined() ? autograd::as_variable_ref(v).detach() : at::Tensor();
120-
}
121-
return std::move(list);
126+
// note: should be inplace to avoid allocations, but we have to switch from
127+
// a list of tensor to a list of ivalues
128+
std::vector<IValue> unwrapVariables(variable_tensor_list && list) const {
129+
return fmap(list, [](const Variable& v) -> IValue {
130+
return v.defined() ? autograd::as_variable_ref(v).detach() : at::Tensor();
131+
});
122132
}
123-
// inplace to avoid allocations
133+
// note: should be inplace to avoid allocations, but we have to switch from
134+
// a list of tensor to a list of ivalues
124135
variable_tensor_list wrapTensors(tensor_list && list) const {
125136
for(auto & v : list) {
126137
v = autograd::make_variable(v, /*requires_grad=*/false);
@@ -152,7 +163,8 @@ struct ExecutionPlan {
152163

153164
auto stack = unwrapVariables(std::move(inputs));
154165
InterpreterState(f).runOneStage(stack);
155-
variable_tensor_list outputs = std::move(stack);
166+
variable_tensor_list outputs(
167+
fmap(stack, [](IValue& v) { return std::move(v).toTensor(); }));
156168

157169
// hookup the gradients for the output tensors that require gradients
158170
// to the inputs to our gradient function df
@@ -311,11 +323,7 @@ struct GraphExecutorImpl {
311323

312324
variable_tensor_list runFallback(variable_tensor_list inputs) {
313325
auto & fb = getOrCreateAutogradFallback();
314-
InterpreterState state(fb);
315-
auto stack = std::move(inputs);
316-
state.runOneStage(stack);
317-
// note: we never unwrapped inputs, because we want autograd to record the trace
318-
return stack;
326+
return runOneStage(fb, std::move(inputs));
319327
}
320328

321329
static bool calcMayIntroduceGradient(Block* b) {

torch/csrc/jit/interned_strings.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ _(prim, PadPacked) /* onnx */ \
3535
_(prim, Placeholder) /* debug */ \
3636
_(prim, Print) \
3737
_(prim, PythonOp) \
38-
_(prim, ReplaceIfUndef) \
3938
_(prim, Reverse) \
4039
_(prim, Return) \
4140
_(prim, Store) \

torch/csrc/jit/interpreter.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "torch/csrc/jit/graph_executor.h"
1010
#include "torch/csrc/jit/ir.h"
1111
#include "torch/csrc/jit/tensor_conversions.h"
12+
#include "torch/csrc/jit/ivalue.h"
1213
#include "torch/csrc/variable_tensor_functions.h"
1314
#include "torch/csrc/autograd/generated/variable_factories.h"
1415

@@ -410,7 +411,7 @@ struct CodeImpl {
410411
JIT_ASSERT(inst.debug_name == prim::Placeholder);
411412
auto offset = relativeJump(from_inst, to_inst);
412413
inst.callback = [offset](Stack & stack) {
413-
auto t = tensor_as<int64_t>(pop(stack));
414+
auto t = tensor_as<int64_t>(pop(stack).toTensor());
414415
return (t == 0) ? offset : 0;
415416
};
416417
inst.debug_name = prim::JumpZ;
@@ -422,7 +423,7 @@ struct CodeImpl {
422423
JIT_ASSERT(inst.debug_name == prim::Placeholder);
423424
auto offset = relativeJump(from_inst, to_inst);
424425
inst.callback = [offset](Stack & stack) {
425-
auto t = tensor_as<int64_t>(pop(stack));
426+
auto t = tensor_as<int64_t>(pop(stack).toTensor());
426427
return (t != 0) ? offset : 0;
427428
};
428429
inst.debug_name = prim::JumpNZ;
@@ -629,7 +630,8 @@ struct CodeImpl {
629630
return [=](Stack& stack) mutable {
630631
autograd::profiler::RecordFunction record("GraphExecutor");
631632
auto inputs = last(stack, num_inputs);
632-
variable_tensor_list tinputs(inputs.begin(), inputs.end());
633+
variable_tensor_list tinputs(
634+
fmap(inputs, [](const IValue& v) { return v.toTensor(); }));
633635
drop(stack, num_inputs);
634636
//TODO: has graph executor work from a stack as well
635637
variable_tensor_list toutputs = executor->run(variable_tensor_list(std::move(tinputs)));
@@ -774,7 +776,7 @@ struct InterpreterStateImpl {
774776
// in the case where it is true, then the interpreter and this array get copied
775777
// if this every becomes a bottleneck then we _should_ consider minimizing the
776778
// total number or register
777-
std::vector<at::Tensor> registers;
779+
std::vector<IValue> registers;
778780

779781
// single buffer for input/output calls to ATen functions, so that we do not reallocate
780782
Stack stack;
@@ -799,7 +801,7 @@ InterpreterState::InterpreterState(const Code & function)
799801
InterpreterState::~InterpreterState() {}
800802

801803
void InterpreterState::runOneStage(Stack & stack) {
802-
return pImpl->runOneStage(stack);
804+
return pImpl->runOneStage(stack);
803805
}
804806

805807
const TensorType & InterpreterState::tensorTypeForInput(size_t i) const {

torch/csrc/jit/interpreter.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ struct InterpreterStateImpl;
1919
struct Graph;
2020
struct Node;
2121
struct TensorType;
22+
struct IValue;
23+
using Stack = std::vector<IValue>;
2224

2325
struct Code {
2426
Code()
@@ -44,7 +46,7 @@ struct InterpreterState {
4446
// advance the interpreter state by running one stage. Returning the
4547
// outputs for that stage, suspending the computation.
4648
// Call this function again continues computation where it left off.
47-
void runOneStage(std::vector<at::Tensor> & stack);
49+
void runOneStage(Stack & stack);
4850
const TensorType & tensorTypeForInput(size_t i) const;
4951
~InterpreterState();
5052
// create a copy of InterpreterState with its current state

0 commit comments

Comments
 (0)