pytorch · shajrawi · Dec 2, 2019
diff --git a/docs/Backends.md b/docs/Backends.md
@@ -195,6 +195,12 @@ BB.newBackendSpecificNode("CPUMaxSplat")
     .setDocstring("A Max node with one splat input; CPU specific.");
 ```
 
+If tensor layout requirements are enabled for the backend, on should take
+special care of updating the layout verifier when adding a new node.
+See `TensorLayout.md` for more information.
+To extend the example above, if the new node is data parallel, a `.dataParallel()`
+line should be added.
+
 During `transformPostLowering()`, this `CPUMaxSplat` node replaces the
 aforementioned pattern. However, there must be a corresponding instruction for
 this Node to be lowered to during the IRGen phase. Thus, we need a corresponding

diff --git a/docs/NewOperators.md b/docs/NewOperators.md
@@ -8,6 +8,8 @@
 #### High level IR
 * Create a new Glow high level IR node in `ClassGen/NodeGen.cpp`. Run `ninja all` to generate the node. In the build directory, check `glow/AutoGenNodes.h` to ensure the node has been generated.
 * Implement the `verify()` method for the new node in `Graph/Nodes.cpp`.
+* Implement any node layout requirements, if any, see `TensorLayout.md` for details.
+Specifically see the notes section under `Canonical Tensor Layout`.
 * Implement a node creation method in `Graph/Graph.cpp`.
 * Implement logic to load model that contains the operator in `Importer/Caffe2ModelLoader.cpp` or `Importer/ONNXModelLoader.cpp` depending on which type of model the operator comes from. Add the operator to `Importer/CommonOperatorLoader.h` instead if the loading logic can be shared between Caffe2 and ONNX. Add as much validation logic as possible here in the loader for the operator because it's crucial to catch errors at this stage. Once the operator is loaded, it is assumed that Glow will be able to successfully run the operator so any issues must be caught here.
 #### Low level IR

diff --git a/docs/TensorLayout.md b/docs/TensorLayout.md
@@ -108,6 +108,16 @@ derives from `TensorLayoutCommon` and overrides the following functions:
   - This function takes an operator `Node *node` and returns the layout requirements of the Nth result `n`.
   - It returns Common layout constraints, for example, `ConvolutionNode` should be in `NHWC` format.
 
+Notes:
+
+1. Some nodes can accept any layout as input, they are either data parallel, e.g. `Add`,
+or, while not data parallel, do not care about the order of dimensions for their operation,
+e.g. `ReshapeNodeKind`. When adding new nodes to Glow, such a behavior should be explicitly
+specified, by adding `.dataParallel()` in NodeGen for example.
+2. Some nodes propagate the layout information of their input, e.g. `convertTo` node,
+when adding such nodes to Glow the canonical layout verifier should be aware of them.
+We currently do that in `getNthInputLayoutRequirements`.
+
 ## Placeholders and Constants
 
 An important thing to note is that some operators may have a `Placeholder` or

diff --git a/lib/Graph/TensorLayout.cpp b/lib/Graph/TensorLayout.cpp
@@ -455,6 +455,10 @@ std::string TensorLayoutCommon::getNthInputLayoutRequirements(const Node *node,
     auto input = QN->getInput();
     return getNthResultLayoutRequirements(input.getNode(), input.getResNo());
   }
+  if (const auto *CTN = llvm::dyn_cast<ConvertToNode>(node)) {
+    auto input = CTN->getInput();
+    return getNthResultLayoutRequirements(input.getNode(), input.getResNo());
+  }
   if (const auto *QPN = llvm::dyn_cast<QuantizationProfileNode>(node)) {
     switch (n) {
     case QuantizationProfileNode::InputIndices::InputIdx: {
@@ -478,6 +482,19 @@ static unsigned getInputIdx(const Node *N, NodeValue in) {
   return N->getNumInputs();
 }
 
+/// \returns true if getting the input's layout would cause an infinite loop.
+static bool inputDoesNotKnowRequirements(const Node *node) {
+  switch (node->getKind()) {
+  case Kinded::Kind::TransposeNodeKind:
+  case Kinded::Kind::QuantizeNodeKind:
+  case Kinded::Kind::QuantizationProfileNodeKind:
+  case Kinded::Kind::ConvertToNodeKind:
+    return true;
+  default:
+    return false;
+  }
+}
+
 std::string TensorLayoutCommon::getNthResultLayoutRequirements(const Node *node,
                                                                size_t n) {
   DCHECK_LT(n, node->getNumResults()) << "Wrong output number";
@@ -492,6 +509,9 @@ std::string TensorLayoutCommon::getNthResultLayoutRequirements(const Node *node,
     }
     // Dynamically form the layout description for transposes.
     auto input = TN->getInput();
+    while (inputDoesNotKnowRequirements(input)) {
+      input = input.getNode()->getNthInput(0);
+    }
     auto inputLayout =
         getNthInputLayoutRequirements(node, TransposeNode::InputIdx);
     auto inputLayoutHelper = TensorLayoutDescription(inputLayout);
@@ -524,7 +544,8 @@ std::string TensorLayoutCommon::getNthResultLayoutRequirements(const Node *node,
     auto result = node->getNthResult(n);
     auto *user = (*result.getUsers().begin()).getUser();
     int inputIdx = getInputIdx(user, result);
-    if (inputIdx >= user->getNumInputs() || llvm::isa<TransposeNode>(user)) {
+    if (inputDoesNotKnowRequirements(user) ||
+        inputIdx >= user->getNumInputs() || llvm::isa<TransposeNode>(user)) {
       return getLayoutsForDims()[dims.size()].getSerializedLayout();
     }
     auto layout = getNthInputLayoutRequirements(user, inputIdx);

diff --git a/tests/unittests/TensorLayoutTest.cpp b/tests/unittests/TensorLayoutTest.cpp
@@ -16,6 +16,8 @@
 #include "BackendTestUtils.h"
 
 #include "glow/Backend/Backend.h"
+#include "glow/Converter/Float16Converter.h"
+#include "glow/Converter/TypeAToTypeBFunctionConverter.h"
 #include "glow/Graph/Graph.h"
 #include "glow/Graph/TensorLayout.h"
 #include "llvm/Support/raw_ostream.h"
@@ -91,6 +93,26 @@ TEST_P(TensorLayoutTest, convBadLayout) {
   EXPECT_FALSE(verifyLayouts(*F_, CanonicalTensorLayout::getInstance(), false));
 }
 
+// Check that we propagate the layout information for convertTo nodes:
+TEST_P(TensorLayoutTest, convertTo) {
+  CHECK_IF_ENABLED();
+
+  auto *input = mod_.createPlaceholder(ElemKind::FloatTy, {1, 3, 3, 1}, "input",
+                                       false, "NWCH");
+  auto *resultNCHW = F_->createTranspose("transposeInput", input, NHWC2NCHW);
+  auto *save = F_->createSave("save", resultNCHW);
+  bindings_.allocate(save->getPlaceholder());
+
+  EXPECT_TRUE(verifyLayouts(*F_, CanonicalTensorLayout::getInstance()));
+
+  PrecisionConfiguration precConfig;
+  TypeAToTypeBFunctionConverter converter(*F_, ElemKind::FloatTy,
+                                          ElemKind::Float16Ty, precConfig);
+  converter.convert();
+
+  EXPECT_TRUE(verifyLayouts(*F_, CanonicalTensorLayout::getInstance()));
+}
+
 // Check TensorLayoutDescription's parser with simple input.
 TEST_P(TensorLayoutTest, parseTestSimple) {
   CHECK_IF_ENABLED();