Skip to content

Implement a new tensorview instruction #158

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 20, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions include/glow/IR/IRBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ class IRBuilder {

ReshapeInst *createReshapeOp(Value *input, llvm::ArrayRef<size_t> shape);

TensorViewInst *createTensorView(ElemKind elemKind,

This comment was marked as off-topic.

llvm::ArrayRef<size_t> dims, Value *src,
llvm::StringRef name);

TransposeInst *createTransposeOp(Value *input,
llvm::ArrayRef<unsigned> shuffle);

Expand Down
19 changes: 19 additions & 0 deletions src/glow/Backends/Interpreter/Interpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "llvm/Support/Casting.h"

using namespace glow;
using llvm::isa;

Interpreter::~Interpreter() { clear(); }

Expand Down Expand Up @@ -70,6 +71,24 @@ Tensor *Interpreter::getOrCreateTensor(const Value *v) {
return it->second;
}

Tensor *Interpreter::getOrCreateUnownedTensor(const Value *v,
const Value *src) {
assert(isa<TensorViewInst>(v) && "Expected a tensor view");

// Pick the tensor.
auto it = tensors_.find(v);

// Release unowned tensors before re-creating them.
if (it != tensors_.end()) {
deleteTensor(v);
}

auto *T = new Tensor();
*T = getTensor(src)->getUnowned(v->dims());
tensors_[v] = T;
return T;
}

void Interpreter::deleteTensor(const Value *v) {
auto it = tensors_.find(v);
if (it == tensors_.end()) {
Expand Down
5 changes: 5 additions & 0 deletions src/glow/Backends/Interpreter/Interpreter.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ class Interpreter final : public Backend {
/// \returns a tensor for \p v.
Tensor *getOrCreateTensor(const Value *v);

/// Allocate an unowned tensor to back the value \p v. The source tensor of
/// the unowned tensor is provided by \p src.
/// \returns a tensor for \p v.
Tensor *getOrCreateUnownedTensor(const Value *v, const Value *src);

/// If a tensor is allocated for \p v then delete it.
void deleteTensor(const Value *v);

Expand Down
49 changes: 31 additions & 18 deletions src/glow/Backends/Interpreter/InterpreterNodes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -384,23 +384,27 @@ void Interpreter::fwdFullyConnectedInst(bool isTrain,
auto inW = getWeightHandle(I->getSrc());
auto outW = getWeightHandle(I->getDest());

auto odim = flattenCdr(outW.dims());
auto idim = flattenCdr(inW.dims());
assert(odim.first == idim.first && "Mismatch batch size");
// outW and inW are 2-dimensional.

This comment was marked as off-topic.

// Dimensions are depth and width.
auto OutWidth = outW.dims()[0];
auto OutDepth = outW.dims()[1];
auto InDepth = inW.dims()[0];
auto InWidth = inW.dims()[1];

assert(OutWidth == InDepth && "Mismatch batch size");

auto filterW = getWeightHandle(I->getFilter());
auto biasW = getWeightHandle(I->getBias());

size_t inputSize = idim.second;
size_t inputSize = InWidth;

for (size_t n = 0; n < odim.first; n++) {
size_t base = inW.getElementPtr({n});
for (size_t n = 0; n < OutWidth; n++) {

for (size_t i = 0; i < odim.second; i++) {
for (size_t i = 0; i < OutDepth; i++) {

float sum = 0;
for (size_t j = 0; j < inputSize; j++) {
sum += inW.raw(base + j) * filterW.at({i, j});
sum += inW.at({n, j}) * filterW.at({i, j});
}

sum += biasW.at({i});
Expand All @@ -415,8 +419,14 @@ void Interpreter::fwdFullyConnectedGradInst(bool isTrain,
auto inG = getWeightHandle(I->getSrcGrad());
auto outG = getWeightHandle(I->getDestGrad());

auto odim = flattenCdr(outG.dims());
auto idim = flattenCdr(inW.dims());
assert(inW.dims().size() == 2);
assert(inG.dims().size() == 2);

// outG and inW are 2-dimensional.

This comment was marked as off-topic.

// Dimensions are depth and width.
auto OutWidth = outG.dims()[0];
auto OutDepth = outG.dims()[1];
auto InWidth = inW.dims()[1];

auto filterW = getWeightHandle(I->getFilter());
auto filterG = getWeightHandle(I->getFilterGrad());
Expand All @@ -426,20 +436,19 @@ void Interpreter::fwdFullyConnectedGradInst(bool isTrain,
filterG.clear();
inG.clear();

size_t inSize = idim.second;
size_t inSize = InWidth;

for (size_t n = 0; n < odim.first; n++) {
size_t base = inW.getElementPtr({n});
for (size_t n = 0; n < OutWidth; n++) {

// Compute the gradient:
for (size_t i = 0; i < odim.second; i++) {
for (size_t i = 0; i < OutDepth; i++) {
float chainGrad = outG.at({n, i});

for (size_t j = 0, e = inSize; j < e; j++) {
// Input gradient:
inG.raw(base + j) += filterW.at({i, j}) * chainGrad;
inG.at({n, j}) += filterW.at({i, j}) * chainGrad;
// Param gradient:
filterG.at({i, j}) += inW.raw(base + j) * chainGrad;
filterG.at({i, j}) += inW.at({n, j}) * chainGrad;
}

biasG.at({i}) += chainGrad;
Expand Down Expand Up @@ -586,6 +595,10 @@ void Interpreter::fwdReshapeInst(bool isTrain, const ReshapeInst *I) {
outT->copyRawFrom(inT);
}

void Interpreter::fwdTensorViewInst(bool isTrain, const TensorViewInst *I) {
getOrCreateUnownedTensor(I, I->getSrc());
}

void Interpreter::fwdZeroInst(bool isTrain, const glow::ZeroInst *I) {
auto *T = getTensor(I->getDest());
T->zero();
Expand Down Expand Up @@ -1197,10 +1210,10 @@ void Interpreter::fwdDeallocActivationInst(bool isTrain,
/// tensor.
void Interpreter::fwdDebugPrintInst(bool isTrain, const DebugPrintInst *I) {
auto *V = I->getSrc();
llvm::outs() << I->getName() << ": ";
// Dump the content of a value.
V->dump();
llvm::outs() << "\n";
auto WH = getWeightHandle(V);
WH.dump();
dumpImpl(getTensor(V));
llvm::outs() << "\n";
}
13 changes: 13 additions & 0 deletions src/glow/Backends/OpenCL/OpenCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,13 @@ void OCLBackend::doForwardPass(bool isTrain) {
continue;
}

if (auto *TV = dyn_cast<TensorViewInst>(I)) {
assert(tensors_[TV] == tensors_[TV->getSrc()] &&
"Memory address for a tensor_view should be the same as the "
"address of its origin");
continue;
}

if (isa<CopyInst>(I) || isa<ReshapeInst>(I)) {
auto *dest = I->getOperand(0).first;
auto *src = I->getOperand(1).first;
Expand Down Expand Up @@ -457,6 +464,12 @@ void OCLBackend::init() {
continue;
}

if (auto *TV = llvm::dyn_cast<TensorViewInst>(I)) {
assert(!tensors_.count(TV) && "Allocation already made!");
tensors_[TV] = tensors_[TV->getSrc()];
continue;
}

if (auto *D = llvm::dyn_cast<DeallocActivationInst>(I)) {
auto *A = D->getAlloc();
assert(tensors_.count(A) && "Invalid deallocation!");
Expand Down
3 changes: 2 additions & 1 deletion src/glow/IR/IR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,8 @@ static void nameInstr(std::unordered_set<std::string> &usedNames, Named *named,
Module::Module(Graph *G) : G_(G), name_(G->getName()) {}

static bool hasResultValue(Instruction *I) {
return I->getKind() == Instruction::Kind::AllocActivationInstKind;
return I->getKind() == Instruction::Kind::AllocActivationInstKind ||
I->getKind() == Instruction::Kind::TensorViewInstKind;
}

void Module::nameInstructions() {
Expand Down
23 changes: 22 additions & 1 deletion src/glow/IR/IRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,20 @@ FullyConnectedInst *IRBuilder::createFullyConnectedOp(Value *input,
Value *bias,
size_t outDepth) {
TypeRef T = input->getType();

auto idim = flattenCdr(input->dims());

Value *inputview = input;
// Create a tensor view only if the dimensionality needs to be changed.
if (input->dims().size() != 2)
inputview =
createTensorView(input->getElementType(), {idim.first, idim.second},
input, "fctensorview");
auto *dest = createAllocActivationInst("fcres", T->getElementType(),
{idim.first, outDepth});

return createFullyConnectedInst("fc", dest, input, filter, bias, outDepth);
return createFullyConnectedInst("fc", dest, inputview, filter, bias,
outDepth);
}

ReluInst *IRBuilder::createRELUOp(Value *input) {
Expand Down Expand Up @@ -137,6 +145,19 @@ ReshapeInst *IRBuilder::createReshapeOp(Value *input,
return createReshapeInst("reshape", res, input, shape);
}

/// Creates a tensorview instruction with the following parameters:
/// \param elemKind the type of elements in a tensor
/// \param dims dimensions of the view, such that the number of elements
/// in the view is the same as the number of elements in the source tensor
/// \p src
/// \param src the source tensor used to create the unowned tensor.
TensorViewInst *IRBuilder::createTensorView(ElemKind elemKind,
llvm::ArrayRef<size_t> dims,
Value *src, llvm::StringRef name) {
auto ty = getModule().getGraph()->uniqueType(Type(elemKind, dims));
return createTensorViewInst(name, src, ty);
}

TransposeInst *IRBuilder::createTransposeOp(Value *input,
llvm::ArrayRef<unsigned> shuffle) {
llvm::SmallVector<size_t, 6> shape;
Expand Down
35 changes: 30 additions & 5 deletions src/glow/IR/IRGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,10 @@ struct IRGenVisitor : NodeWalker {
}
assert(!generatedNodeDest_.count(N) &&
"Already generated code for this node");
assert(isa<AllocActivationInst>(v) && "The value must be an activation");
auto *dest = v;
if (auto *zn = dyn_cast<ZeroInst>(v))

This comment was marked as off-topic.

This comment was marked as off-topic.

dest = zn->getDest();
assert(isa<AllocActivationInst>(dest) && "The value must be an activation");
generatedNodeDest_[N] = v;
}

Expand Down Expand Up @@ -180,19 +183,41 @@ struct IRGenVisitor : NodeWalker {

case glow::Kinded::Kind::FullyConnectedGradNodeKind: {
auto *FCG = cast<FullyConnectedGradNode>(N);
auto *inW = valueForNode(FCG->getInput());
auto *InW = valueForNode(FCG->getInput());
// FullyConnected works with tensor views, so create them.
auto *InWview = InW;
// Create a tensor view only if the dimensionality needs to be changed.
if (InWview->dims().size() != 2) {
auto idim = flattenCdr(InW->dims());
InWview = builder_.createTensorView(InWview->getElementType(),
{idim.first, idim.second}, InW,
"inWtensorview");
}
auto *filterW = valueForNode(FCG->getFilter());
auto *outW = valueForNode(FCG->getGradOfOriginalOutputNamedOutput());
auto biasX = FCG->getBias();

auto *InG = builder_.createAllocActivationInst("inG", inW->getType());
Value *InG = builder_.createAllocActivationInst("inG", InW->getType());
// Create a tensor view for the @out parameter G.
Value *InGview = InG;
if (InGview->dims().size() != 2) {
auto idim = flattenCdr(InG->dims());
// tensorview is the first use of InG and takes inG an @in parameter.
// But InG is not initialized yet to be used as an @in parameter and
// the IR verifier would complain about it. Therefore, we initialize
// InG as zero to make the verifier happy.
builder_.createZeroInst("zero", InG);
InGview = builder_.createTensorView(InGview->getElementType(),
{idim.first, idim.second}, InG,
"inGtensorview");
}
auto *FilterG =
builder_.createAllocActivationInst("filterG", filterW->getType());
auto *BiasG =
builder_.createAllocActivationInst("biasG", biasX.getType());

builder_.createFullyConnectedGradInst(N->getName(), inW, filterW, outW,
InG, FilterG, BiasG,
builder_.createFullyConnectedGradInst(N->getName(), InWview, filterW,
outW, InGview, FilterG, BiasG,
FCG->getDepth());

registerIR(FCG->getGradOfInputNamedInput(), InG);
Expand Down
12 changes: 12 additions & 0 deletions src/glow/IR/Instrs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,11 @@ void FullyConnectedInst::verify() const {
llvm::ArrayRef<size_t> expB = {Depth_};
assert(B->dims() == expB && "Invalid output shape");
(void)expB;

assert(src->dims().size() == 2 &&
"Src of a FullyConnectedInst should be 2-dimensional");
assert(dest->dims().size() == 2 &&
"Dest of a FullyConnectedInst should be 2-dimensional");
}

void BatchedMatMulInst::verify() const {
Expand Down Expand Up @@ -178,6 +183,13 @@ void ReshapeInst::verify() const {
"Reshape into a different size");
}

void TensorViewInst::verify() const {
assert(getOperand(0).first->getType()->size() == getType()->size() &&
"TensorView view size should be the same as Src size");
assert(getOperand(0).first->getElementType() == getType()->getElementType() &&
"TensorView view element type should be the same as Src size");
}

void TransposeInst::verify() const {
auto *dest = getOperand(0).first;
auto *src = getOperand(1).first;
Expand Down
19 changes: 16 additions & 3 deletions src/glow/Optimizer/IROptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ static void calculateLiveness(Module &M, LivenessMap &liveness) {
/// Hoists Dealloc instructions right after their last use.
static void hoistDealloc(Module &M) {
// Maps activation instructions to their last non-dealloc user.
std::unordered_map<AllocActivationInst *, InstrIterator> lastUser;
std::unordered_map<Value *, InstrIterator> lastUser;
auto &instrs = M.getInstrs();

// Record the last use of each dealloc.
Expand All @@ -82,6 +82,14 @@ static void hoistDealloc(Module &M) {
if (auto alloc = dyn_cast<AllocActivationInst>(op)) {
lastUser[alloc] = it;
}

if (auto tensorView = dyn_cast<TensorViewInst>(op)) {
// Consider any use of a tensor_view to be also a use
// of its source tensor. This is required to make
// sure that a lifetime of a tensor_view is always
// enclosed inside the lifetime of its source tensor.
lastUser[tensorView->getSrc()] = it;
}
}
}

Expand Down Expand Up @@ -884,19 +892,24 @@ static void performDebugInstrumentation(Module &M) {
it = next;
continue;
}
auto instrName = (*it)->getName();
for (auto const &Op : (*it)->getOperands()) {
// Dump inputs of the current instruction before the instruction.
if (Op.second != OperandKind::Out) {
std::string name = "print_input_";
std::string name = "debug_print.before.";
name += Op.first->getName();
name += ".";
name += instrName;
auto *dumpInstr = new DebugPrintInst(&M, name, Op.first);
M.insertInstruction(it, dumpInstr);
}

// Dump outputs of the current instruction after the instruction.
if (Op.second != OperandKind::In) {
std::string name = "print_output_";
std::string name = "debug_print.after.";
name += Op.first->getName();
name += ".";
name += instrName;
auto *dumpInstr = new DebugPrintInst(&M, name, Op.first);
M.insertInstruction(next, dumpInstr);
}
Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/basicIRTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ TEST(IR, allInstrs) {

auto *I3 = builder.createWeightVar(ElemKind::FloatTy, {1, 12, 12, 64});
auto *I4 = builder.createWeightVar(ElemKind::FloatTy, {1, 12, 12, 3});
auto *I5 = builder.createWeightVar(ElemKind::FloatTy, {1, 32});
auto *I6 = builder.createWeightVar(ElemKind::FloatTy, {2, 12, 12, 64});
auto *I7 = builder.createWeightVar(T1, "I7");
auto *I8 = builder.createWeightVar(ElemKind::FloatTy, {1, 24, 3, 24}, "I8");
Expand All @@ -109,12 +108,13 @@ TEST(IR, allInstrs) {
builder.createCopyInst("", I1, I0);
builder.createConvolutionInst("", I3, I1, F0, B0, 7, 2, 3, 64);
builder.createPoolMaxInst("", I4, I0, XY, 7, 2, 3);
builder.createFullyConnectedInst("", I5, I0, F1, B1, 32);
builder.createFullyConnectedOp(I0, F1, B1, 32);
builder.createReluInst("", I1, I0);
builder.createSigmoidInst("", I1, I0);
builder.createTanhInst("", I1, I0);
builder.createSoftMaxInst("", I1, I0, I7, E0);
builder.createTransposeInst("", I8, I2, {0, 3, 1, 2});
builder.createTensorView(ElemKind::FloatTy, {1, 24, 3, 24}, I2, "I2_view");
builder.createInsertTensorInst("", I6, I3, {0, 0, 0, 0});
builder.createBatchNormalizationInst("", I1, I0, S0, S0, S0, S0, 3, 0.01,
0.9);
Expand Down
Loading