From ae1c13e2f36110af8304562d1c69000dcf7ff7cf Mon Sep 17 00:00:00 2001 From: Roman Levenstein Date: Mon, 18 Dec 2017 21:14:49 -0800 Subject: [PATCH 1/2] Fix debug printing --- src/glow/Backends/Interpreter/InterpreterNodes.cpp | 4 ++-- src/glow/Optimizer/IROptimizer.cpp | 9 +++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/glow/Backends/Interpreter/InterpreterNodes.cpp b/src/glow/Backends/Interpreter/InterpreterNodes.cpp index 22c8757c6e..6050e21f8f 100644 --- a/src/glow/Backends/Interpreter/InterpreterNodes.cpp +++ b/src/glow/Backends/Interpreter/InterpreterNodes.cpp @@ -1197,10 +1197,10 @@ void Interpreter::fwdDeallocActivationInst(bool isTrain, /// tensor. void Interpreter::fwdDebugPrintInst(bool isTrain, const DebugPrintInst *I) { auto *V = I->getSrc(); + llvm::outs() << I->getName() << ": "; // Dump the content of a value. V->dump(); llvm::outs() << "\n"; - auto WH = getWeightHandle(V); - WH.dump(); + dumpImpl(getTensor(V)); llvm::outs() << "\n"; } diff --git a/src/glow/Optimizer/IROptimizer.cpp b/src/glow/Optimizer/IROptimizer.cpp index da7f960609..ab91c84f36 100644 --- a/src/glow/Optimizer/IROptimizer.cpp +++ b/src/glow/Optimizer/IROptimizer.cpp @@ -884,19 +884,24 @@ static void performDebugInstrumentation(Module &M) { it = next; continue; } + auto instrName = (*it)->getName(); for (auto const &Op : (*it)->getOperands()) { // Dump inputs of the current instruction before the instruction. if (Op.second != OperandKind::Out) { - std::string name = "print_input_"; + std::string name = "debug_print.before."; name += Op.first->getName(); + name += "."; + name += instrName; auto *dumpInstr = new DebugPrintInst(&M, name, Op.first); M.insertInstruction(it, dumpInstr); } // Dump outputs of the current instruction after the instruction. if (Op.second != OperandKind::In) { - std::string name = "print_output_"; + std::string name = "debug_print.after."; name += Op.first->getName(); + name += "."; + name += instrName; auto *dumpInstr = new DebugPrintInst(&M, name, Op.first); M.insertInstruction(next, dumpInstr); } From 34dd9614edfe74658ed4369723d180f81bbc1e2e Mon Sep 17 00:00:00 2001 From: Roman Levenstein Date: Mon, 18 Dec 2017 21:18:46 -0800 Subject: [PATCH 2/2] Implement a new tensorview instruction This instruction is used to create an unowned tensor with a required dimensionality from an existing tensor. The result is just a view of an existing tensor and does not allocate any new memory. Tensorview is essentially a typecast applied to a tensor. --- include/glow/IR/IRBuilder.h | 4 ++ src/glow/Backends/Interpreter/Interpreter.cpp | 19 ++++++++ src/glow/Backends/Interpreter/Interpreter.h | 5 +++ .../Backends/Interpreter/InterpreterNodes.cpp | 45 ++++++++++++------- src/glow/Backends/OpenCL/OpenCL.cpp | 13 ++++++ src/glow/IR/IR.cpp | 3 +- src/glow/IR/IRBuilder.cpp | 23 +++++++++- src/glow/IR/IRGen.cpp | 35 ++++++++++++--- src/glow/IR/Instrs.cpp | 12 +++++ src/glow/Optimizer/IROptimizer.cpp | 10 ++++- tests/unittests/basicIRTest.cpp | 4 +- tools/ClassGen/InstrGen.cpp | 5 +++ 12 files changed, 152 insertions(+), 26 deletions(-) diff --git a/include/glow/IR/IRBuilder.h b/include/glow/IR/IRBuilder.h index b56ce6e183..cffb69f406 100644 --- a/include/glow/IR/IRBuilder.h +++ b/include/glow/IR/IRBuilder.h @@ -59,6 +59,10 @@ class IRBuilder { ReshapeInst *createReshapeOp(Value *input, llvm::ArrayRef shape); + TensorViewInst *createTensorView(ElemKind elemKind, + llvm::ArrayRef dims, Value *src, + llvm::StringRef name); + TransposeInst *createTransposeOp(Value *input, llvm::ArrayRef shuffle); diff --git a/src/glow/Backends/Interpreter/Interpreter.cpp b/src/glow/Backends/Interpreter/Interpreter.cpp index 882ccb79de..ca395f0faa 100644 --- a/src/glow/Backends/Interpreter/Interpreter.cpp +++ b/src/glow/Backends/Interpreter/Interpreter.cpp @@ -10,6 +10,7 @@ #include "llvm/Support/Casting.h" using namespace glow; +using llvm::isa; Interpreter::~Interpreter() { clear(); } @@ -70,6 +71,24 @@ Tensor *Interpreter::getOrCreateTensor(const Value *v) { return it->second; } +Tensor *Interpreter::getOrCreateUnownedTensor(const Value *v, + const Value *src) { + assert(isa(v) && "Expected a tensor view"); + + // Pick the tensor. + auto it = tensors_.find(v); + + // Release unowned tensors before re-creating them. + if (it != tensors_.end()) { + deleteTensor(v); + } + + auto *T = new Tensor(); + *T = getTensor(src)->getUnowned(v->dims()); + tensors_[v] = T; + return T; +} + void Interpreter::deleteTensor(const Value *v) { auto it = tensors_.find(v); if (it == tensors_.end()) { diff --git a/src/glow/Backends/Interpreter/Interpreter.h b/src/glow/Backends/Interpreter/Interpreter.h index 24df2dcfd8..55c04c1587 100644 --- a/src/glow/Backends/Interpreter/Interpreter.h +++ b/src/glow/Backends/Interpreter/Interpreter.h @@ -61,6 +61,11 @@ class Interpreter final : public Backend { /// \returns a tensor for \p v. Tensor *getOrCreateTensor(const Value *v); + /// Allocate an unowned tensor to back the value \p v. The source tensor of + /// the unowned tensor is provided by \p src. + /// \returns a tensor for \p v. + Tensor *getOrCreateUnownedTensor(const Value *v, const Value *src); + /// If a tensor is allocated for \p v then delete it. void deleteTensor(const Value *v); diff --git a/src/glow/Backends/Interpreter/InterpreterNodes.cpp b/src/glow/Backends/Interpreter/InterpreterNodes.cpp index 6050e21f8f..fe65f7b5d2 100644 --- a/src/glow/Backends/Interpreter/InterpreterNodes.cpp +++ b/src/glow/Backends/Interpreter/InterpreterNodes.cpp @@ -384,23 +384,27 @@ void Interpreter::fwdFullyConnectedInst(bool isTrain, auto inW = getWeightHandle(I->getSrc()); auto outW = getWeightHandle(I->getDest()); - auto odim = flattenCdr(outW.dims()); - auto idim = flattenCdr(inW.dims()); - assert(odim.first == idim.first && "Mismatch batch size"); + // outW and inW are 2-dimensional. + // Dimensions are depth and width. + auto OutWidth = outW.dims()[0]; + auto OutDepth = outW.dims()[1]; + auto InDepth = inW.dims()[0]; + auto InWidth = inW.dims()[1]; + + assert(OutWidth == InDepth && "Mismatch batch size"); auto filterW = getWeightHandle(I->getFilter()); auto biasW = getWeightHandle(I->getBias()); - size_t inputSize = idim.second; + size_t inputSize = InWidth; - for (size_t n = 0; n < odim.first; n++) { - size_t base = inW.getElementPtr({n}); + for (size_t n = 0; n < OutWidth; n++) { - for (size_t i = 0; i < odim.second; i++) { + for (size_t i = 0; i < OutDepth; i++) { float sum = 0; for (size_t j = 0; j < inputSize; j++) { - sum += inW.raw(base + j) * filterW.at({i, j}); + sum += inW.at({n, j}) * filterW.at({i, j}); } sum += biasW.at({i}); @@ -415,8 +419,14 @@ void Interpreter::fwdFullyConnectedGradInst(bool isTrain, auto inG = getWeightHandle(I->getSrcGrad()); auto outG = getWeightHandle(I->getDestGrad()); - auto odim = flattenCdr(outG.dims()); - auto idim = flattenCdr(inW.dims()); + assert(inW.dims().size() == 2); + assert(inG.dims().size() == 2); + + // outG and inW are 2-dimensional. + // Dimensions are depth and width. + auto OutWidth = outG.dims()[0]; + auto OutDepth = outG.dims()[1]; + auto InWidth = inW.dims()[1]; auto filterW = getWeightHandle(I->getFilter()); auto filterG = getWeightHandle(I->getFilterGrad()); @@ -426,20 +436,19 @@ void Interpreter::fwdFullyConnectedGradInst(bool isTrain, filterG.clear(); inG.clear(); - size_t inSize = idim.second; + size_t inSize = InWidth; - for (size_t n = 0; n < odim.first; n++) { - size_t base = inW.getElementPtr({n}); + for (size_t n = 0; n < OutWidth; n++) { // Compute the gradient: - for (size_t i = 0; i < odim.second; i++) { + for (size_t i = 0; i < OutDepth; i++) { float chainGrad = outG.at({n, i}); for (size_t j = 0, e = inSize; j < e; j++) { // Input gradient: - inG.raw(base + j) += filterW.at({i, j}) * chainGrad; + inG.at({n, j}) += filterW.at({i, j}) * chainGrad; // Param gradient: - filterG.at({i, j}) += inW.raw(base + j) * chainGrad; + filterG.at({i, j}) += inW.at({n, j}) * chainGrad; } biasG.at({i}) += chainGrad; @@ -586,6 +595,10 @@ void Interpreter::fwdReshapeInst(bool isTrain, const ReshapeInst *I) { outT->copyRawFrom(inT); } +void Interpreter::fwdTensorViewInst(bool isTrain, const TensorViewInst *I) { + getOrCreateUnownedTensor(I, I->getSrc()); +} + void Interpreter::fwdZeroInst(bool isTrain, const glow::ZeroInst *I) { auto *T = getTensor(I->getDest()); T->zero(); diff --git a/src/glow/Backends/OpenCL/OpenCL.cpp b/src/glow/Backends/OpenCL/OpenCL.cpp index f859ae2792..500ac9cb39 100644 --- a/src/glow/Backends/OpenCL/OpenCL.cpp +++ b/src/glow/Backends/OpenCL/OpenCL.cpp @@ -365,6 +365,13 @@ void OCLBackend::doForwardPass(bool isTrain) { continue; } + if (auto *TV = dyn_cast(I)) { + assert(tensors_[TV] == tensors_[TV->getSrc()] && + "Memory address for a tensor_view should be the same as the " + "address of its origin"); + continue; + } + if (isa(I) || isa(I)) { auto *dest = I->getOperand(0).first; auto *src = I->getOperand(1).first; @@ -457,6 +464,12 @@ void OCLBackend::init() { continue; } + if (auto *TV = llvm::dyn_cast(I)) { + assert(!tensors_.count(TV) && "Allocation already made!"); + tensors_[TV] = tensors_[TV->getSrc()]; + continue; + } + if (auto *D = llvm::dyn_cast(I)) { auto *A = D->getAlloc(); assert(tensors_.count(A) && "Invalid deallocation!"); diff --git a/src/glow/IR/IR.cpp b/src/glow/IR/IR.cpp index a8a8295536..cd5173ea3e 100644 --- a/src/glow/IR/IR.cpp +++ b/src/glow/IR/IR.cpp @@ -394,7 +394,8 @@ static void nameInstr(std::unordered_set &usedNames, Named *named, Module::Module(Graph *G) : G_(G), name_(G->getName()) {} static bool hasResultValue(Instruction *I) { - return I->getKind() == Instruction::Kind::AllocActivationInstKind; + return I->getKind() == Instruction::Kind::AllocActivationInstKind || + I->getKind() == Instruction::Kind::TensorViewInstKind; } void Module::nameInstructions() { diff --git a/src/glow/IR/IRBuilder.cpp b/src/glow/IR/IRBuilder.cpp index 6318957217..70432348f9 100644 --- a/src/glow/IR/IRBuilder.cpp +++ b/src/glow/IR/IRBuilder.cpp @@ -101,12 +101,20 @@ FullyConnectedInst *IRBuilder::createFullyConnectedOp(Value *input, Value *bias, size_t outDepth) { TypeRef T = input->getType(); + auto idim = flattenCdr(input->dims()); + Value *inputview = input; + // Create a tensor view only if the dimensionality needs to be changed. + if (input->dims().size() != 2) + inputview = + createTensorView(input->getElementType(), {idim.first, idim.second}, + input, "fctensorview"); auto *dest = createAllocActivationInst("fcres", T->getElementType(), {idim.first, outDepth}); - return createFullyConnectedInst("fc", dest, input, filter, bias, outDepth); + return createFullyConnectedInst("fc", dest, inputview, filter, bias, + outDepth); } ReluInst *IRBuilder::createRELUOp(Value *input) { @@ -137,6 +145,19 @@ ReshapeInst *IRBuilder::createReshapeOp(Value *input, return createReshapeInst("reshape", res, input, shape); } +/// Creates a tensorview instruction with the following parameters: +/// \param elemKind the type of elements in a tensor +/// \param dims dimensions of the view, such that the number of elements +/// in the view is the same as the number of elements in the source tensor +/// \p src +/// \param src the source tensor used to create the unowned tensor. +TensorViewInst *IRBuilder::createTensorView(ElemKind elemKind, + llvm::ArrayRef dims, + Value *src, llvm::StringRef name) { + auto ty = getModule().getGraph()->uniqueType(Type(elemKind, dims)); + return createTensorViewInst(name, src, ty); +} + TransposeInst *IRBuilder::createTransposeOp(Value *input, llvm::ArrayRef shuffle) { llvm::SmallVector shape; diff --git a/src/glow/IR/IRGen.cpp b/src/glow/IR/IRGen.cpp index f9a4b46910..dca901a314 100644 --- a/src/glow/IR/IRGen.cpp +++ b/src/glow/IR/IRGen.cpp @@ -70,7 +70,10 @@ struct IRGenVisitor : NodeWalker { } assert(!generatedNodeDest_.count(N) && "Already generated code for this node"); - assert(isa(v) && "The value must be an activation"); + auto *dest = v; + if (auto *zn = dyn_cast(v)) + dest = zn->getDest(); + assert(isa(dest) && "The value must be an activation"); generatedNodeDest_[N] = v; } @@ -180,19 +183,41 @@ struct IRGenVisitor : NodeWalker { case glow::Kinded::Kind::FullyConnectedGradNodeKind: { auto *FCG = cast(N); - auto *inW = valueForNode(FCG->getInput()); + auto *InW = valueForNode(FCG->getInput()); + // FullyConnected works with tensor views, so create them. + auto *InWview = InW; + // Create a tensor view only if the dimensionality needs to be changed. + if (InWview->dims().size() != 2) { + auto idim = flattenCdr(InW->dims()); + InWview = builder_.createTensorView(InWview->getElementType(), + {idim.first, idim.second}, InW, + "inWtensorview"); + } auto *filterW = valueForNode(FCG->getFilter()); auto *outW = valueForNode(FCG->getGradOfOriginalOutputNamedOutput()); auto biasX = FCG->getBias(); - auto *InG = builder_.createAllocActivationInst("inG", inW->getType()); + Value *InG = builder_.createAllocActivationInst("inG", InW->getType()); + // Create a tensor view for the @out parameter G. + Value *InGview = InG; + if (InGview->dims().size() != 2) { + auto idim = flattenCdr(InG->dims()); + // tensorview is the first use of InG and takes inG an @in parameter. + // But InG is not initialized yet to be used as an @in parameter and + // the IR verifier would complain about it. Therefore, we initialize + // InG as zero to make the verifier happy. + builder_.createZeroInst("zero", InG); + InGview = builder_.createTensorView(InGview->getElementType(), + {idim.first, idim.second}, InG, + "inGtensorview"); + } auto *FilterG = builder_.createAllocActivationInst("filterG", filterW->getType()); auto *BiasG = builder_.createAllocActivationInst("biasG", biasX.getType()); - builder_.createFullyConnectedGradInst(N->getName(), inW, filterW, outW, - InG, FilterG, BiasG, + builder_.createFullyConnectedGradInst(N->getName(), InWview, filterW, + outW, InGview, FilterG, BiasG, FCG->getDepth()); registerIR(FCG->getGradOfInputNamedInput(), InG); diff --git a/src/glow/IR/Instrs.cpp b/src/glow/IR/Instrs.cpp index 3cc608f507..08f20ebabe 100644 --- a/src/glow/IR/Instrs.cpp +++ b/src/glow/IR/Instrs.cpp @@ -134,6 +134,11 @@ void FullyConnectedInst::verify() const { llvm::ArrayRef expB = {Depth_}; assert(B->dims() == expB && "Invalid output shape"); (void)expB; + + assert(src->dims().size() == 2 && + "Src of a FullyConnectedInst should be 2-dimensional"); + assert(dest->dims().size() == 2 && + "Dest of a FullyConnectedInst should be 2-dimensional"); } void BatchedMatMulInst::verify() const { @@ -178,6 +183,13 @@ void ReshapeInst::verify() const { "Reshape into a different size"); } +void TensorViewInst::verify() const { + assert(getOperand(0).first->getType()->size() == getType()->size() && + "TensorView view size should be the same as Src size"); + assert(getOperand(0).first->getElementType() == getType()->getElementType() && + "TensorView view element type should be the same as Src size"); +} + void TransposeInst::verify() const { auto *dest = getOperand(0).first; auto *src = getOperand(1).first; diff --git a/src/glow/Optimizer/IROptimizer.cpp b/src/glow/Optimizer/IROptimizer.cpp index ab91c84f36..d20f3f39eb 100644 --- a/src/glow/Optimizer/IROptimizer.cpp +++ b/src/glow/Optimizer/IROptimizer.cpp @@ -69,7 +69,7 @@ static void calculateLiveness(Module &M, LivenessMap &liveness) { /// Hoists Dealloc instructions right after their last use. static void hoistDealloc(Module &M) { // Maps activation instructions to their last non-dealloc user. - std::unordered_map lastUser; + std::unordered_map lastUser; auto &instrs = M.getInstrs(); // Record the last use of each dealloc. @@ -82,6 +82,14 @@ static void hoistDealloc(Module &M) { if (auto alloc = dyn_cast(op)) { lastUser[alloc] = it; } + + if (auto tensorView = dyn_cast(op)) { + // Consider any use of a tensor_view to be also a use + // of its source tensor. This is required to make + // sure that a lifetime of a tensor_view is always + // enclosed inside the lifetime of its source tensor. + lastUser[tensorView->getSrc()] = it; + } } } diff --git a/tests/unittests/basicIRTest.cpp b/tests/unittests/basicIRTest.cpp index 34ebdc6315..dd298f7375 100644 --- a/tests/unittests/basicIRTest.cpp +++ b/tests/unittests/basicIRTest.cpp @@ -85,7 +85,6 @@ TEST(IR, allInstrs) { auto *I3 = builder.createWeightVar(ElemKind::FloatTy, {1, 12, 12, 64}); auto *I4 = builder.createWeightVar(ElemKind::FloatTy, {1, 12, 12, 3}); - auto *I5 = builder.createWeightVar(ElemKind::FloatTy, {1, 32}); auto *I6 = builder.createWeightVar(ElemKind::FloatTy, {2, 12, 12, 64}); auto *I7 = builder.createWeightVar(T1, "I7"); auto *I8 = builder.createWeightVar(ElemKind::FloatTy, {1, 24, 3, 24}, "I8"); @@ -109,12 +108,13 @@ TEST(IR, allInstrs) { builder.createCopyInst("", I1, I0); builder.createConvolutionInst("", I3, I1, F0, B0, 7, 2, 3, 64); builder.createPoolMaxInst("", I4, I0, XY, 7, 2, 3); - builder.createFullyConnectedInst("", I5, I0, F1, B1, 32); + builder.createFullyConnectedOp(I0, F1, B1, 32); builder.createReluInst("", I1, I0); builder.createSigmoidInst("", I1, I0); builder.createTanhInst("", I1, I0); builder.createSoftMaxInst("", I1, I0, I7, E0); builder.createTransposeInst("", I8, I2, {0, 3, 1, 2}); + builder.createTensorView(ElemKind::FloatTy, {1, 24, 3, 24}, I2, "I2_view"); builder.createInsertTensorInst("", I6, I3, {0, 0, 0, 0}); builder.createBatchNormalizationInst("", I1, I0, S0, S0, S0, S0, 3, 0.01, 0.9); diff --git a/tools/ClassGen/InstrGen.cpp b/tools/ClassGen/InstrGen.cpp index 344a467437..75a3e67823 100644 --- a/tools/ClassGen/InstrGen.cpp +++ b/tools/ClassGen/InstrGen.cpp @@ -31,6 +31,11 @@ int main(int argc, char **argv) { .addMember(MemberType::TypeRef, "Ty") .setType("Ty"); + BB.newInstr("TensorView") + .addOperand("Src", OperandKind::In) + .addMember(MemberType::TypeRef, "Ty") + .setType("Ty"); + BB.newInstr("DeallocActivation") .addOperand("Src", OperandKind::Out) .addExtraMethod("AllocActivationInst *getAlloc() const { return "