Skip to content

Commit 6d6e80e

Browse files
authored
Merge pull request #158 from opti-mix/tensor_view2
Implement a new tensorview instruction
2 parents 141ad48 + 34dd961 commit 6d6e80e

File tree

12 files changed

+161
-30
lines changed

12 files changed

+161
-30
lines changed

include/glow/IR/IRBuilder.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ class IRBuilder {
5959

6060
ReshapeInst *createReshapeOp(Value *input, llvm::ArrayRef<size_t> shape);
6161

62+
TensorViewInst *createTensorView(ElemKind elemKind,
63+
llvm::ArrayRef<size_t> dims, Value *src,
64+
llvm::StringRef name);
65+
6266
TransposeInst *createTransposeOp(Value *input,
6367
llvm::ArrayRef<unsigned> shuffle);
6468

src/glow/Backends/Interpreter/Interpreter.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "llvm/Support/Casting.h"
1111

1212
using namespace glow;
13+
using llvm::isa;
1314

1415
Interpreter::~Interpreter() { clear(); }
1516

@@ -70,6 +71,24 @@ Tensor *Interpreter::getOrCreateTensor(const Value *v) {
7071
return it->second;
7172
}
7273

74+
Tensor *Interpreter::getOrCreateUnownedTensor(const Value *v,
75+
const Value *src) {
76+
assert(isa<TensorViewInst>(v) && "Expected a tensor view");
77+
78+
// Pick the tensor.
79+
auto it = tensors_.find(v);
80+
81+
// Release unowned tensors before re-creating them.
82+
if (it != tensors_.end()) {
83+
deleteTensor(v);
84+
}
85+
86+
auto *T = new Tensor();
87+
*T = getTensor(src)->getUnowned(v->dims());
88+
tensors_[v] = T;
89+
return T;
90+
}
91+
7392
void Interpreter::deleteTensor(const Value *v) {
7493
auto it = tensors_.find(v);
7594
if (it == tensors_.end()) {

src/glow/Backends/Interpreter/Interpreter.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ class Interpreter final : public Backend {
6161
/// \returns a tensor for \p v.
6262
Tensor *getOrCreateTensor(const Value *v);
6363

64+
/// Allocate an unowned tensor to back the value \p v. The source tensor of
65+
/// the unowned tensor is provided by \p src.
66+
/// \returns a tensor for \p v.
67+
Tensor *getOrCreateUnownedTensor(const Value *v, const Value *src);
68+
6469
/// If a tensor is allocated for \p v then delete it.
6570
void deleteTensor(const Value *v);
6671

src/glow/Backends/Interpreter/InterpreterNodes.cpp

Lines changed: 31 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -384,23 +384,27 @@ void Interpreter::fwdFullyConnectedInst(bool isTrain,
384384
auto inW = getWeightHandle(I->getSrc());
385385
auto outW = getWeightHandle(I->getDest());
386386

387-
auto odim = flattenCdr(outW.dims());
388-
auto idim = flattenCdr(inW.dims());
389-
assert(odim.first == idim.first && "Mismatch batch size");
387+
// outW and inW are 2-dimensional.
388+
// Dimensions are depth and width.
389+
auto OutWidth = outW.dims()[0];
390+
auto OutDepth = outW.dims()[1];
391+
auto InDepth = inW.dims()[0];
392+
auto InWidth = inW.dims()[1];
393+
394+
assert(OutWidth == InDepth && "Mismatch batch size");
390395

391396
auto filterW = getWeightHandle(I->getFilter());
392397
auto biasW = getWeightHandle(I->getBias());
393398

394-
size_t inputSize = idim.second;
399+
size_t inputSize = InWidth;
395400

396-
for (size_t n = 0; n < odim.first; n++) {
397-
size_t base = inW.getElementPtr({n});
401+
for (size_t n = 0; n < OutWidth; n++) {
398402

399-
for (size_t i = 0; i < odim.second; i++) {
403+
for (size_t i = 0; i < OutDepth; i++) {
400404

401405
float sum = 0;
402406
for (size_t j = 0; j < inputSize; j++) {
403-
sum += inW.raw(base + j) * filterW.at({i, j});
407+
sum += inW.at({n, j}) * filterW.at({i, j});
404408
}
405409

406410
sum += biasW.at({i});
@@ -415,8 +419,14 @@ void Interpreter::fwdFullyConnectedGradInst(bool isTrain,
415419
auto inG = getWeightHandle(I->getSrcGrad());
416420
auto outG = getWeightHandle(I->getDestGrad());
417421

418-
auto odim = flattenCdr(outG.dims());
419-
auto idim = flattenCdr(inW.dims());
422+
assert(inW.dims().size() == 2);
423+
assert(inG.dims().size() == 2);
424+
425+
// outG and inW are 2-dimensional.
426+
// Dimensions are depth and width.
427+
auto OutWidth = outG.dims()[0];
428+
auto OutDepth = outG.dims()[1];
429+
auto InWidth = inW.dims()[1];
420430

421431
auto filterW = getWeightHandle(I->getFilter());
422432
auto filterG = getWeightHandle(I->getFilterGrad());
@@ -426,20 +436,19 @@ void Interpreter::fwdFullyConnectedGradInst(bool isTrain,
426436
filterG.clear();
427437
inG.clear();
428438

429-
size_t inSize = idim.second;
439+
size_t inSize = InWidth;
430440

431-
for (size_t n = 0; n < odim.first; n++) {
432-
size_t base = inW.getElementPtr({n});
441+
for (size_t n = 0; n < OutWidth; n++) {
433442

434443
// Compute the gradient:
435-
for (size_t i = 0; i < odim.second; i++) {
444+
for (size_t i = 0; i < OutDepth; i++) {
436445
float chainGrad = outG.at({n, i});
437446

438447
for (size_t j = 0, e = inSize; j < e; j++) {
439448
// Input gradient:
440-
inG.raw(base + j) += filterW.at({i, j}) * chainGrad;
449+
inG.at({n, j}) += filterW.at({i, j}) * chainGrad;
441450
// Param gradient:
442-
filterG.at({i, j}) += inW.raw(base + j) * chainGrad;
451+
filterG.at({i, j}) += inW.at({n, j}) * chainGrad;
443452
}
444453

445454
biasG.at({i}) += chainGrad;
@@ -586,6 +595,10 @@ void Interpreter::fwdReshapeInst(bool isTrain, const ReshapeInst *I) {
586595
outT->copyRawFrom(inT);
587596
}
588597

598+
void Interpreter::fwdTensorViewInst(bool isTrain, const TensorViewInst *I) {
599+
getOrCreateUnownedTensor(I, I->getSrc());
600+
}
601+
589602
void Interpreter::fwdZeroInst(bool isTrain, const glow::ZeroInst *I) {
590603
auto *T = getTensor(I->getDest());
591604
T->zero();
@@ -1197,10 +1210,10 @@ void Interpreter::fwdDeallocActivationInst(bool isTrain,
11971210
/// tensor.
11981211
void Interpreter::fwdDebugPrintInst(bool isTrain, const DebugPrintInst *I) {
11991212
auto *V = I->getSrc();
1213+
llvm::outs() << I->getName() << ": ";
12001214
// Dump the content of a value.
12011215
V->dump();
12021216
llvm::outs() << "\n";
1203-
auto WH = getWeightHandle(V);
1204-
WH.dump();
1217+
dumpImpl(getTensor(V));
12051218
llvm::outs() << "\n";
12061219
}

src/glow/Backends/OpenCL/OpenCL.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,13 @@ void OCLBackend::doForwardPass(bool isTrain) {
365365
continue;
366366
}
367367

368+
if (auto *TV = dyn_cast<TensorViewInst>(I)) {
369+
assert(tensors_[TV] == tensors_[TV->getSrc()] &&
370+
"Memory address for a tensor_view should be the same as the "
371+
"address of its origin");
372+
continue;
373+
}
374+
368375
if (isa<CopyInst>(I) || isa<ReshapeInst>(I)) {
369376
auto *dest = I->getOperand(0).first;
370377
auto *src = I->getOperand(1).first;
@@ -457,6 +464,12 @@ void OCLBackend::init() {
457464
continue;
458465
}
459466

467+
if (auto *TV = llvm::dyn_cast<TensorViewInst>(I)) {
468+
assert(!tensors_.count(TV) && "Allocation already made!");
469+
tensors_[TV] = tensors_[TV->getSrc()];
470+
continue;
471+
}
472+
460473
if (auto *D = llvm::dyn_cast<DeallocActivationInst>(I)) {
461474
auto *A = D->getAlloc();
462475
assert(tensors_.count(A) && "Invalid deallocation!");

src/glow/IR/IR.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,8 @@ static void nameInstr(std::unordered_set<std::string> &usedNames, Named *named,
394394
Module::Module(Graph *G) : G_(G), name_(G->getName()) {}
395395

396396
static bool hasResultValue(Instruction *I) {
397-
return I->getKind() == Instruction::Kind::AllocActivationInstKind;
397+
return I->getKind() == Instruction::Kind::AllocActivationInstKind ||
398+
I->getKind() == Instruction::Kind::TensorViewInstKind;
398399
}
399400

400401
void Module::nameInstructions() {

src/glow/IR/IRBuilder.cpp

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,12 +101,20 @@ FullyConnectedInst *IRBuilder::createFullyConnectedOp(Value *input,
101101
Value *bias,
102102
size_t outDepth) {
103103
TypeRef T = input->getType();
104+
104105
auto idim = flattenCdr(input->dims());
105106

107+
Value *inputview = input;
108+
// Create a tensor view only if the dimensionality needs to be changed.
109+
if (input->dims().size() != 2)
110+
inputview =
111+
createTensorView(input->getElementType(), {idim.first, idim.second},
112+
input, "fctensorview");
106113
auto *dest = createAllocActivationInst("fcres", T->getElementType(),
107114
{idim.first, outDepth});
108115

109-
return createFullyConnectedInst("fc", dest, input, filter, bias, outDepth);
116+
return createFullyConnectedInst("fc", dest, inputview, filter, bias,
117+
outDepth);
110118
}
111119

112120
ReluInst *IRBuilder::createRELUOp(Value *input) {
@@ -137,6 +145,19 @@ ReshapeInst *IRBuilder::createReshapeOp(Value *input,
137145
return createReshapeInst("reshape", res, input, shape);
138146
}
139147

148+
/// Creates a tensorview instruction with the following parameters:
149+
/// \param elemKind the type of elements in a tensor
150+
/// \param dims dimensions of the view, such that the number of elements
151+
/// in the view is the same as the number of elements in the source tensor
152+
/// \p src
153+
/// \param src the source tensor used to create the unowned tensor.
154+
TensorViewInst *IRBuilder::createTensorView(ElemKind elemKind,
155+
llvm::ArrayRef<size_t> dims,
156+
Value *src, llvm::StringRef name) {
157+
auto ty = getModule().getGraph()->uniqueType(Type(elemKind, dims));
158+
return createTensorViewInst(name, src, ty);
159+
}
160+
140161
TransposeInst *IRBuilder::createTransposeOp(Value *input,
141162
llvm::ArrayRef<unsigned> shuffle) {
142163
llvm::SmallVector<size_t, 6> shape;

src/glow/IR/IRGen.cpp

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,10 @@ struct IRGenVisitor : NodeWalker {
7070
}
7171
assert(!generatedNodeDest_.count(N) &&
7272
"Already generated code for this node");
73-
assert(isa<AllocActivationInst>(v) && "The value must be an activation");
73+
auto *dest = v;
74+
if (auto *zn = dyn_cast<ZeroInst>(v))
75+
dest = zn->getDest();
76+
assert(isa<AllocActivationInst>(dest) && "The value must be an activation");
7477
generatedNodeDest_[N] = v;
7578
}
7679

@@ -180,19 +183,41 @@ struct IRGenVisitor : NodeWalker {
180183

181184
case glow::Kinded::Kind::FullyConnectedGradNodeKind: {
182185
auto *FCG = cast<FullyConnectedGradNode>(N);
183-
auto *inW = valueForNode(FCG->getInput());
186+
auto *InW = valueForNode(FCG->getInput());
187+
// FullyConnected works with tensor views, so create them.
188+
auto *InWview = InW;
189+
// Create a tensor view only if the dimensionality needs to be changed.
190+
if (InWview->dims().size() != 2) {
191+
auto idim = flattenCdr(InW->dims());
192+
InWview = builder_.createTensorView(InWview->getElementType(),
193+
{idim.first, idim.second}, InW,
194+
"inWtensorview");
195+
}
184196
auto *filterW = valueForNode(FCG->getFilter());
185197
auto *outW = valueForNode(FCG->getGradOfOriginalOutputNamedOutput());
186198
auto biasX = FCG->getBias();
187199

188-
auto *InG = builder_.createAllocActivationInst("inG", inW->getType());
200+
Value *InG = builder_.createAllocActivationInst("inG", InW->getType());
201+
// Create a tensor view for the @out parameter G.
202+
Value *InGview = InG;
203+
if (InGview->dims().size() != 2) {
204+
auto idim = flattenCdr(InG->dims());
205+
// tensorview is the first use of InG and takes inG an @in parameter.
206+
// But InG is not initialized yet to be used as an @in parameter and
207+
// the IR verifier would complain about it. Therefore, we initialize
208+
// InG as zero to make the verifier happy.
209+
builder_.createZeroInst("zero", InG);
210+
InGview = builder_.createTensorView(InGview->getElementType(),
211+
{idim.first, idim.second}, InG,
212+
"inGtensorview");
213+
}
189214
auto *FilterG =
190215
builder_.createAllocActivationInst("filterG", filterW->getType());
191216
auto *BiasG =
192217
builder_.createAllocActivationInst("biasG", biasX.getType());
193218

194-
builder_.createFullyConnectedGradInst(N->getName(), inW, filterW, outW,
195-
InG, FilterG, BiasG,
219+
builder_.createFullyConnectedGradInst(N->getName(), InWview, filterW,
220+
outW, InGview, FilterG, BiasG,
196221
FCG->getDepth());
197222

198223
registerIR(FCG->getGradOfInputNamedInput(), InG);

src/glow/IR/Instrs.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,11 @@ void FullyConnectedInst::verify() const {
134134
llvm::ArrayRef<size_t> expB = {Depth_};
135135
assert(B->dims() == expB && "Invalid output shape");
136136
(void)expB;
137+
138+
assert(src->dims().size() == 2 &&
139+
"Src of a FullyConnectedInst should be 2-dimensional");
140+
assert(dest->dims().size() == 2 &&
141+
"Dest of a FullyConnectedInst should be 2-dimensional");
137142
}
138143

139144
void BatchedMatMulInst::verify() const {
@@ -178,6 +183,13 @@ void ReshapeInst::verify() const {
178183
"Reshape into a different size");
179184
}
180185

186+
void TensorViewInst::verify() const {
187+
assert(getOperand(0).first->getType()->size() == getType()->size() &&
188+
"TensorView view size should be the same as Src size");
189+
assert(getOperand(0).first->getElementType() == getType()->getElementType() &&
190+
"TensorView view element type should be the same as Src size");
191+
}
192+
181193
void TransposeInst::verify() const {
182194
auto *dest = getOperand(0).first;
183195
auto *src = getOperand(1).first;

src/glow/Optimizer/IROptimizer.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ static void calculateLiveness(Module &M, LivenessMap &liveness) {
6969
/// Hoists Dealloc instructions right after their last use.
7070
static void hoistDealloc(Module &M) {
7171
// Maps activation instructions to their last non-dealloc user.
72-
std::unordered_map<AllocActivationInst *, InstrIterator> lastUser;
72+
std::unordered_map<Value *, InstrIterator> lastUser;
7373
auto &instrs = M.getInstrs();
7474

7575
// Record the last use of each dealloc.
@@ -82,6 +82,14 @@ static void hoistDealloc(Module &M) {
8282
if (auto alloc = dyn_cast<AllocActivationInst>(op)) {
8383
lastUser[alloc] = it;
8484
}
85+
86+
if (auto tensorView = dyn_cast<TensorViewInst>(op)) {
87+
// Consider any use of a tensor_view to be also a use
88+
// of its source tensor. This is required to make
89+
// sure that a lifetime of a tensor_view is always
90+
// enclosed inside the lifetime of its source tensor.
91+
lastUser[tensorView->getSrc()] = it;
92+
}
8593
}
8694
}
8795

@@ -884,19 +892,24 @@ static void performDebugInstrumentation(Module &M) {
884892
it = next;
885893
continue;
886894
}
895+
auto instrName = (*it)->getName();
887896
for (auto const &Op : (*it)->getOperands()) {
888897
// Dump inputs of the current instruction before the instruction.
889898
if (Op.second != OperandKind::Out) {
890-
std::string name = "print_input_";
899+
std::string name = "debug_print.before.";
891900
name += Op.first->getName();
901+
name += ".";
902+
name += instrName;
892903
auto *dumpInstr = new DebugPrintInst(&M, name, Op.first);
893904
M.insertInstruction(it, dumpInstr);
894905
}
895906

896907
// Dump outputs of the current instruction after the instruction.
897908
if (Op.second != OperandKind::In) {
898-
std::string name = "print_output_";
909+
std::string name = "debug_print.after.";
899910
name += Op.first->getName();
911+
name += ".";
912+
name += instrName;
900913
auto *dumpInstr = new DebugPrintInst(&M, name, Op.first);
901914
M.insertInstruction(next, dumpInstr);
902915
}

tests/unittests/basicIRTest.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,6 @@ TEST(IR, allInstrs) {
8585

8686
auto *I3 = builder.createWeightVar(ElemKind::FloatTy, {1, 12, 12, 64});
8787
auto *I4 = builder.createWeightVar(ElemKind::FloatTy, {1, 12, 12, 3});
88-
auto *I5 = builder.createWeightVar(ElemKind::FloatTy, {1, 32});
8988
auto *I6 = builder.createWeightVar(ElemKind::FloatTy, {2, 12, 12, 64});
9089
auto *I7 = builder.createWeightVar(T1, "I7");
9190
auto *I8 = builder.createWeightVar(ElemKind::FloatTy, {1, 24, 3, 24}, "I8");
@@ -109,12 +108,13 @@ TEST(IR, allInstrs) {
109108
builder.createCopyInst("", I1, I0);
110109
builder.createConvolutionInst("", I3, I1, F0, B0, 7, 2, 3, 64);
111110
builder.createPoolMaxInst("", I4, I0, XY, 7, 2, 3);
112-
builder.createFullyConnectedInst("", I5, I0, F1, B1, 32);
111+
builder.createFullyConnectedOp(I0, F1, B1, 32);
113112
builder.createReluInst("", I1, I0);
114113
builder.createSigmoidInst("", I1, I0);
115114
builder.createTanhInst("", I1, I0);
116115
builder.createSoftMaxInst("", I1, I0, I7, E0);
117116
builder.createTransposeInst("", I8, I2, {0, 3, 1, 2});
117+
builder.createTensorView(ElemKind::FloatTy, {1, 24, 3, 24}, I2, "I2_view");
118118
builder.createInsertTensorInst("", I6, I3, {0, 0, 0, 0});
119119
builder.createBatchNormalizationInst("", I1, I0, S0, S0, S0, S0, 3, 0.01,
120120
0.9);

0 commit comments

Comments
 (0)