IR: Move the last layer into the new IR implementation.

nadavrot · nadavrot · commit deaaa5958805 · 2017-09-29T09:48:25.000-07:00
Summary: IR: Move the last layer into the new IR implementation. This commit moves the LRN node, which was implemented in the old network into the new IR. The code and grad checks are just copied as is into the new IR with minor interface changes. The tests seem to pass. Test Plan: Updated the tests to use the IR interface. Reviewers: #glow, meghanl, abdulras Reviewed By: abdulras Subscribers: #glow Differential Revision: https://phabricator.intern.facebook.com/D5940118 Tags: none Signature: 5940118:1506703345:242e78cb21da6bb6a7ee5ca96c8eec7439978608
diff --git a/include/glow/IR/IRBuilder.h b/include/glow/IR/IRBuilder.h
@@ -55,6 +55,11 @@ class IRBuilder {
                                                      float epsilon = 1e-5,
                                                      float momentum = 0.9);
 
+  LocalResponseNormalizationInst *
+  createLocalResponseNormalizationOp(Value *input, size_t halfWindowSize = 2,
+                                     float alpha = 1e-4, float beta = 0.75,
+                                     float k = 2.0);
+
   ArithmeticInst *createArithmeticOp(Value *LHS, Value *RHS,
                                      ArithmeticInst::OpKind op);
 
@@ -101,6 +106,11 @@ class IRBuilder {
       Value *dest, Value *src, Value *scale, Value *bias, Value *mean,
       Value *var, size_t channelIdx, float epsilon, float momentum);
 
+  LocalResponseNormalizationInst *
+  createLocalResponseNormalizationInst(Value *dest, Value *src, Value *scale,
+                                       size_t halfWindowSize, float alpha,
+                                       float beta, float k);
+
   ArithmeticInst *createArithmeticInst(Value *dest, Value *LHS, Value *RHS,
                                        ArithmeticInst::OpKind kind);
 
diff --git a/include/glow/IR/Instrs.def b/include/glow/IR/Instrs.def
@@ -14,6 +14,7 @@ DEF_INSTR(TransposeInst, transpose)
 DEF_INSTR(ReshapeInst, reshape)
 DEF_INSTR(ConcatInst, concat)
 DEF_INSTR(BatchNormalizationInst, batchnormalization)
+DEF_INSTR(LocalResponseNormalizationInst, localresponsenormalization)
 DEF_INSTR(ArithmeticInst, arithmetic)
 DEF_VALUE(WeightVar, weight)
 
diff --git a/include/glow/IR/Instrs.h b/include/glow/IR/Instrs.h
@@ -355,6 +355,45 @@ class ArithmeticInst : public Instruction {
   void verify() const;
 };
 
+class LocalResponseNormalizationInst : public Instruction {
+  /// The number of neighbouring channels on each side to sum over
+  size_t halfWindowSize_;
+
+  /// The scaling parameter
+  float alpha_;
+
+  /// The exponent parameter
+  float beta_;
+
+  /// The offset parameter
+  float k_;
+
+public:
+  LocalResponseNormalizationInst(Value *dest, Value *src, Value *scale,
+                                 size_t halfWindowSize, float alpha, float beta,
+                                 float k)
+      : Instruction(Kinded::Kind::LocalResponseNormalizationInstKind,
+                    dest->getType(),
+                    {{dest, OperandKind::kOut},
+                     {src, OperandKind::kIn},
+                     {scale, OperandKind::kInOut}}),
+        halfWindowSize_(halfWindowSize), alpha_(alpha), beta_(beta), k_(k) {}
+
+  static bool classof(const Kinded *k) {
+    return k->getKind() == Kinded::Kind::LocalResponseNormalizationInstKind;
+  }
+  std::string getExtraDesc() const;
+  Value *getDest() const { return getOperand(0).first; }
+  Value *getSrc() const { return getOperand(1).first; }
+  Value *getScale() const { return getOperand(2).first; }
+
+  size_t gethalfWindowSize() const { return halfWindowSize_; }
+  float getAlpha() const { return alpha_; }
+  float getBeta() const { return beta_; }
+  float getK() const { return k_; }
+  void verify() const;
+};
+
 class WeightVar : public Value {
 public:
   enum class InitKind {
diff --git a/src/glow/IR/IRBuilder.cpp b/src/glow/IR/IRBuilder.cpp
@@ -174,6 +174,17 @@ BatchNormalizationInst *IRBuilder::createBatchNormalizationOp(Value *input,
                                       channelIdx, epsilon, momentum);
 }
 
+LocalResponseNormalizationInst *IRBuilder::createLocalResponseNormalizationOp(
+    Value *input, size_t halfWindowSize, float alpha, float beta, float k) {
+  auto Ty = input->getType();
+  auto *scale = createAllocActivationInst(Ty, "scale");
+
+  // The output tensor is of the same shape as the input tensor.
+  auto *res = createAllocActivationInst(Ty);
+  return createLocalResponseNormalizationInst(input, res, scale, halfWindowSize,
+                                              alpha, beta, k);
+}
+
 ArithmeticInst *IRBuilder::createArithmeticOp(Value *LHS, Value *RHS,
                                               ArithmeticInst::OpKind op) {
   assert(LHS->dims() == RHS->dims() && "Invalid operand shapes");
@@ -281,6 +292,15 @@ BatchNormalizationInst *IRBuilder::createBatchNormalizationInst(
   return A;
 }
 
+LocalResponseNormalizationInst *IRBuilder::createLocalResponseNormalizationInst(
+    Value *dest, Value *src, Value *scale, size_t halfWindowSize, float alpha,
+    float beta, float k) {
+  auto *A = new LocalResponseNormalizationInst(dest, src, scale, halfWindowSize,
+                                               alpha, beta, k);
+  M_.pushInstr(A);
+  return A;
+}
+
 ArithmeticInst *IRBuilder::createArithmeticInst(Value *dest, Value *LHS,
                                                 Value *RHS,
                                                 ArithmeticInst::OpKind kind) {
diff --git a/src/glow/IR/Instrs.cpp b/src/glow/IR/Instrs.cpp
@@ -69,6 +69,10 @@ std::string BatchNormalizationInst::getExtraDesc() const {
   return listToString(channelIdx_, epsilon_, momentum_);
 }
 
+std::string LocalResponseNormalizationInst::getExtraDesc() const {
+  return listToString(halfWindowSize_, alpha_, beta_, k_);
+}
+
 const char *ArithmeticInst::getKindStr() const {
   const char *names[] = {"add", "mul", nullptr};
   return names[static_cast<int>(kind_)];
@@ -189,6 +193,7 @@ void SoftMaxInst::verify() const {
 }
 void RegressionInst::verify() const {
   checkSameType(getOperand(0), getOperand(1));
+  checkSameType(getOperand(0), getOperand(2));
 }
 
 void ReshapeInst::verify() const {
@@ -240,6 +245,10 @@ void BatchNormalizationInst::verify() const {
   assert(getOperand(4).first->getType()->dims() == exp && "Invalid mean dim");
   assert(getOperand(5).first->getType()->dims() == exp && "Invalid var dim");
 }
+void LocalResponseNormalizationInst::verify() const {
+  checkSameType(getOperand(0), getOperand(1));
+  checkSameType(getOperand(0), getOperand(2));
+}
 void ArithmeticInst::verify() const {
   checkSameType(getOperand(0), getOperand(1));
   checkSameType(getOperand(0), getOperand(2));
diff --git a/src/glow/Interpreter/InterpreterNodes.cpp b/src/glow/Interpreter/InterpreterNodes.cpp
@@ -845,6 +845,141 @@ void Interpreter::bwdBatchNormalizationInst(Context *ctx,
   }
 }
 
+void Interpreter::fwdLocalResponseNormalizationInst(
+    glow::Context *ctx, bool isTrain,
+    const glow::LocalResponseNormalizationInst *I) {
+  auto inW = getWeightHandle(ctx, I->getSrc());
+  auto outW = getWeightHandle(ctx, I->getDest());
+  auto scaleCache = getWeightHandle(ctx, I->getScale());
+
+  ShapeNHWC odim = outW.dims();
+  ShapeNHWC idim = inW.dims();
+  (void)odim;
+
+  // LRN node does not change the shape of the input.
+  assert(odim == idim && "Output of LRN node must be same shape as input");
+
+  // LRN node normalizes across channels, so the input must have a minimum
+  // depth of 1.
+  assert(idim.c > 0 && "Input of LRN node must have a minimum depth of 1");
+
+  auto halfWindowSize = I->gethalfWindowSize();
+  auto k = I->getK();
+  auto beta = I->getBeta();
+  auto windowSize = 2 * halfWindowSize + 1;
+  auto normedAlpha = I->getAlpha() / windowSize;
+
+  // For every input in the batch:
+  for (size_t n = 0; n < idim.n; n++) {
+
+    // For every row:
+    for (size_t h = 0; h < idim.h; h++) {
+
+      // For every column:
+      for (size_t w = 0; w < idim.w; w++) {
+
+        FloatTy squareSum = 0.0;
+
+        // Compute squareSum for first channel.
+        for (size_t c = 1; c <= halfWindowSize && c < idim.c; c++) {
+          auto val = inW.at({n, h, w, c});
+          squareSum += (val * val);
+        }
+
+        // For every channel:
+        for (size_t c = 0; c < idim.c; c++) {
+          auto scale = k + normedAlpha * squareSum;
+
+          // This will be used to accelerate the backward pass.
+          scaleCache.at({n, h, w, c}) = scale;
+
+          auto normFactor = std::pow(scale, -beta);
+          outW.at({n, h, w, c}) = inW.at({n, h, w, c}) * normFactor;
+
+          // Modify squareSum for next channel.
+          auto subIndex = c - halfWindowSize;
+          auto addIndex = c + halfWindowSize + 1;
+          auto sub = (c >= halfWindowSize) ? inW.at({n, h, w, subIndex}) : 0;
+          auto add = (addIndex < idim.c) ? inW.at({n, h, w, addIndex}) : 0;
+
+          // Subtract out "rear" end of this window, add "front" end of next.
+          squareSum = squareSum - (sub * sub) + (add * add);
+        }
+      }
+    }
+  }
+}
+
+void Interpreter::bwdLocalResponseNormalizationInst(
+    glow::Context *ctx, const glow::LocalResponseNormalizationInst *I) {
+  auto inW = getWeightHandle(ctx, I->getSrc());
+  auto inG = getGradHandle(ctx, I->getSrc());
+  auto outW = getWeightHandle(ctx, I->getDest());
+  auto outG = getGradHandle(ctx, I->getDest());
+  auto scaleCache = getWeightHandle(ctx, I->getScale());
+
+  ShapeNHWC odim = outW.dims();
+
+  auto halfWindowSize = I->gethalfWindowSize();
+  auto beta = I->getBeta();
+  auto windowSize = 2 * halfWindowSize + 1;
+  auto normedAlpha = I->getAlpha() / windowSize;
+
+  // For every input in the batch:
+  for (size_t n = 0; n < odim.n; n++) {
+
+    // For every row:
+    for (size_t h = 0; h < odim.h; h++) {
+
+      // For every column:
+      for (size_t w = 0; w < odim.w; w++) {
+
+        FloatTy sum = 0.0;
+
+        // Compute sum for first channel.
+        for (size_t c = 1; c <= halfWindowSize && c < odim.c; c++) {
+          auto outw = outW.at({n, h, w, c});
+          auto scale = scaleCache.at({n, h, w, c});
+          auto outg = outG.at({n, h, w, c});
+          sum += (outg * (outw / scale));
+        }
+
+        // For every channel:
+        for (size_t c = 0; c < odim.c; c++) {
+          auto outg = outG.at({n, h, w, c});
+          auto scale = scaleCache.at({n, h, w, c});
+          auto inw = inW.at({n, h, w, c});
+
+          inG.at({n, h, w, c}) = outg * std::pow(scale, -beta) -
+                                 2 * normedAlpha * beta * inw * sum;
+
+          // Modify sum for next channel.
+          auto subIndex = c - halfWindowSize;
+          auto addIndex = c + halfWindowSize + 1;
+
+          if (c >= halfWindowSize) {
+            auto outw = outW.at({n, h, w, subIndex});
+            auto scale = scaleCache.at({n, h, w, subIndex});
+            auto outg = outG.at({n, h, w, subIndex});
+
+            // Subtract "rear" end of this window.
+            sum -= (outg * (outw / scale));
+          }
+
+          if (addIndex < odim.c) {
+            auto outw = outW.at({n, h, w, addIndex});
+            auto scale = scaleCache.at({n, h, w, addIndex});
+            auto outg = outG.at({n, h, w, addIndex});
+
+            // Add "front" end of next window.
+            sum += (outg * (outw / scale));
+          }
+        }
+      }
+    }
+  }
+}
+
 //===----------------------------------------------------------------------===//
 //                       Arithmetic operations
 //===----------------------------------------------------------------------===//
diff --git a/tests/unittests/IRGradCheck.cpp b/tests/unittests/IRGradCheck.cpp
@@ -232,6 +232,42 @@ TEST(Network, gradientCheck_batchNorm) {
   performGradCheck(IP, RN, A, Ex, &inputs, &outputs, 0.001, 0.004);
 }
 
+TEST(Network, gradientCheck_LRN) {
+  Interpreter IP;
+  IP.getConfig().maxNumThreads = 1;
+
+  size_t numDim = 8;
+  size_t numOutputElem = numDim;
+
+  Value *A;
+  Value *Ex;
+  Instruction *RN;
+  {
+    IRBuilder bb(IP.getModule());
+
+    A = bb.createWeightVar(ElemKind::FloatTy, {1, numDim, numDim, 3});
+    Ex = bb.createWeightVar(ElemKind::FloatTy, {1, numOutputElem});
+
+    Instruction *O = bb.createLocalResponseNormalizationOp(A, 3, 0.0001, 0.9);
+    O = bb.createFullyConnectedOp(*O, numOutputElem);
+    RN = bb.createRegressionOp(*O, Ex);
+  }
+
+  IP.getModule().verify();
+  IP.initVars();
+
+  Tensor inputs(ElemKind::FloatTy, {1, numDim, numDim, 3});
+  Tensor outputs(ElemKind::FloatTy, {1, numOutputElem});
+
+  auto inputsH = inputs.getHandle<FloatTy>();
+  auto outputsH = outputs.getHandle<FloatTy>();
+
+  inputsH.randomize(1);
+  outputsH.randomize(1);
+
+  performGradCheck(IP, RN, A, Ex, &inputs, &outputs, 0.001, 0.004);
+}
+
 TEST(Network, gradientCheck_Arithmetic) {
   Interpreter IP;
   IP.getConfig().maxNumThreads = 1;