Tensor Layouts: update PR based on Jordan's review

shajrawi · shajrawi · commit 1b7ecf21a7da · 2019-11-08T22:11:17.000-08:00
diff --git a/docs/TensorLayout.md b/docs/TensorLayout.md
@@ -25,7 +25,7 @@ Glow's string-based layout format is encoded as follows:
 
 1. A mandatory one character representing the current dimension. Either an  alphabetic letter or `*` (any layout).
 2. An optional token for the start of the current dimension's information: `[`.
-3. An optional namespace identifier for non-standard information, such as tiling, followed by `:`. Must have `[` from 2. in place. following said identifier, all subsequent data is considered as a "black box" until `]` is encountered.
+3. An optional namespace identifier for non-standard information, such as tiling, followed by `:`. Must have `[` from 2. in place. Following said identifier, all subsequent data is considered as a "black box" until `]` is encountered.
 4. Given that we have `[` from 2. in place, the closing bracket `]` for it.
 5. Optionally go back to 2.
 
@@ -70,9 +70,10 @@ Which includes the following virtual methods they can override:
 virtual bool isSatisfiedBy(TypeRef ty,
                                const TensorLayoutDescription &destLayout,
                                const TensorLayoutDescription *srcLayout) const
+                               ```
 	- This function checks if `ty` satisfies `destLayout` layout requirements, if `srcLayout` is provided for `ty`, take that into account.
 
-- `virtual std::array<TensorLayoutDescription, max_tensor_dimensions + 1> &getLayoutsForDims() const`
+- `virtual llvm::ArrayRef<TensorLayoutDescription> getLayoutsForDims() const`
 
   - This helper function returns an array of predefined layouts for all dimensions from `0-D` to Glow's max tensor layout dimension.
 
diff --git a/examples/fr2en.cpp b/examples/fr2en.cpp
@@ -277,14 +277,15 @@ void Model::loadEncoder() {
         {0, step, 0}, {batchSize_, step + 1, EMBEDDING_SIZE});
     Node *reshape =
         F_->createReshape("encoder." + std::to_string(step) + ".reshape",
-                          inputSlice, {batchSize_, EMBEDDING_SIZE}, "*");
+                          inputSlice, {batchSize_, EMBEDDING_SIZE}, ANY_LAYOUT);
     hidden = createPyTorchGRUCell(F_, reshape, hidden, wIh, bIh, wHh, bHh);
     outputs.push_back(hidden);
   }
 
   Node *output = F_->createConcat("encoder.output", outputs, 1);
-  Node *r2 = F_->createReshape("encoder.output.r2", output,
-                               {MAX_LENGTH * batchSize_, EMBEDDING_SIZE}, "*");
+  Node *r2 =
+      F_->createReshape("encoder.output.r2", output,
+                        {MAX_LENGTH * batchSize_, EMBEDDING_SIZE}, ANY_LAYOUT);
 
   encoderHiddenOutput_ = F_->createGather("encoder.outputNth", r2, seqLength_);
 }
@@ -346,7 +347,7 @@ void Model::loadDecoder() {
 
   Node *concat = F_->createConcat("decoder.output.concat", outputs, 0);
   Node *reshape = F_->createReshape("decoder.output.reshape", concat,
-                                    {MAX_LENGTH, batchSize_}, "*");
+                                    {MAX_LENGTH, batchSize_}, ANY_LAYOUT);
   auto *save = F_->createSave("decoder.output", reshape);
   output_ = save->getPlaceholder();
   bindings.allocate(output_);
diff --git a/include/glow/Graph/Graph.h b/include/glow/Graph/Graph.h
@@ -1388,6 +1388,8 @@ Node *recursiveClone(Function *newF, Node *node, NodeMap &currToNew);
   { 0u, 3u, 1u, 2u }
 #define HWCN2NHWC                                                              \
   { 3u, 0u, 1u, 2u }
+#define NHWC2HWNC                                                              \
+  { 1u, 2u, 0u, 3u }
 
 llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const Module &mod);
 
@@ -1397,8 +1399,6 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const Function &F);
 
 llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const Function *F);
 
-#define NHWC2HWNC                                                              \
-  { 1u, 2u, 0u, 3u }
 } // namespace glow
 
 #endif // GLOW_GRAPH_GRAPH_H
diff --git a/lib/Backends/OpenCL/OpenCLTensorLayout.cpp b/lib/Backends/OpenCL/OpenCLTensorLayout.cpp
@@ -63,8 +63,8 @@ static const TensorLayoutDescription *getLayoutFromEnum(const N &node) {
 /// if it has one. Else returns nullptr. This will be removed and refactored
 /// if/when we move to using strings for all layout specifications and get rid
 /// of the enum.
-static const TensorLayoutDescription *getLayouForTempEnumRep(size_t n,
-                                                             const Node *node) {
+static const TensorLayoutDescription *
+getLayoutForTempEnumRep(size_t n, const Node *node) {
   if (const auto MP = llvm::dyn_cast<MaxPoolNode>(node)) {
     return getLayoutFromEnum(MP);
   }
@@ -96,7 +96,7 @@ std::string OpenCLTensorLayout::getNthInputLayoutRequirements(const Node *node,
   DCHECK_LE(dims.size(), max_tensor_dimensions) << "Too many dimensions";
   // TODO: Remove ->getLayout() enum and take a string like transpose. Refactor
   // the following after doing so.
-  const auto *layout = getLayouForTempEnumRep(n, node);
+  const auto *layout = getLayoutForTempEnumRep(n, node);
   if (layout) {
     return layout->getSerializedLayout();
   }
@@ -112,7 +112,7 @@ std::string OpenCLTensorLayout::getNthResultLayoutRequirements(const Node *node,
   DCHECK_LE(dims.size(), max_tensor_dimensions) << "Too many dimensions";
   // TODO: Remove ->getLayout() enum and take a string like transpose. Refactor
   // the following after doing so.
-  const auto *layout = getLayouForTempEnumRep(n, node);
+  const auto *layout = getLayoutForTempEnumRep(n, node);
   if (layout) {
     return layout->getSerializedLayout();
   }
diff --git a/lib/Graph/TensorLayout.cpp b/lib/Graph/TensorLayout.cpp
@@ -431,7 +431,8 @@ std::string TensorLayoutCommon::getNthResultLayoutRequirements(const Node *node,
     }
     // Dynamically form the layout description for transposes.
     auto input = TN->getInput();
-    auto inputLayout = getNthInputLayoutRequirements(node, 0);
+    auto inputLayout =
+        getNthInputLayoutRequirements(node, TransposeNode::InputIdx);
     auto inputLayoutHelper = TensorLayoutDescription(inputLayout);
     llvm::SmallVector<std::string, max_tensor_dimensions> dims(
         input.dims().size());
diff --git a/lib/Optimizer/GraphOptimizer/GraphOptimizer.cpp b/lib/Optimizer/GraphOptimizer/GraphOptimizer.cpp
@@ -1610,8 +1610,8 @@ static NodeValue tryToOptimizeConcatOfRehapes(Function *F, ConcatNode *CN) {
   return F->createReshape(
       CN->getInputs().front().getNode()->getName(), newCN,
       CN->getResult().dims(),
-      CanonicalTensorLayout::getInstance().getNthResultLayoutRequirements(CN,
-                                                                          0));
+      CanonicalTensorLayout::getInstance().getNthResultLayoutRequirements(
+          CN, ConcatNode::ResultIdx));
 }
 
 /// Simplify concat node.
diff --git a/lib/Optimizer/GraphOptimizer/Lower.cpp b/lib/Optimizer/GraphOptimizer/Lower.cpp
@@ -174,8 +174,8 @@ static void lowerFullyConnectedGradNode(Function *F, CompilationContext &cctx,
   auto *dx2 = F->createMatMul("fcg.dot", dout, wT);
   auto *dx = F->createReshape(
       "fcg.inG", dx2, FCG.getInput().getType()->dims(),
-      CanonicalTensorLayout::getInstance().getNthInputLayoutRequirements(&FCG,
-                                                                         0));
+      CanonicalTensorLayout::getInstance().getNthInputLayoutRequirements(
+          &FCG, FullyConnectedGradNode::InputIdx));
   replaceAllUsesOfWith(cctx.loweredInfoMap, FCG.getGradOfInputNamedInput(), dx);
 
   // dw = xT * dout.