diff --git a/docs/Backends.md b/docs/Backends.md index 39a62e0df0..e0c86d4a95 100644 --- a/docs/Backends.md +++ b/docs/Backends.md @@ -195,6 +195,12 @@ BB.newBackendSpecificNode("CPUMaxSplat") .setDocstring("A Max node with one splat input; CPU specific."); ``` +If tensor layout requirements are enabled for the backend, on should take +special care of updating the layout verifier when adding a new node. +See `TensorLayout.md` for more information. +To extend the example above, if the new node is data parallel, a `.dataParallel()` +line should be added. + During `transformPostLowering()`, this `CPUMaxSplat` node replaces the aforementioned pattern. However, there must be a corresponding instruction for this Node to be lowered to during the IRGen phase. Thus, we need a corresponding diff --git a/docs/NewOperators.md b/docs/NewOperators.md index d52b8dbb08..569568d555 100644 --- a/docs/NewOperators.md +++ b/docs/NewOperators.md @@ -8,6 +8,8 @@ #### High level IR * Create a new Glow high level IR node in `ClassGen/NodeGen.cpp`. Run `ninja all` to generate the node. In the build directory, check `glow/AutoGenNodes.h` to ensure the node has been generated. * Implement the `verify()` method for the new node in `Graph/Nodes.cpp`. +* Implement any node layout requirements, if any, see `TensorLayout.md` for details. +Specifically see the notes section under `Canonical Tensor Layout`. * Implement a node creation method in `Graph/Graph.cpp`. * Implement logic to load model that contains the operator in `Importer/Caffe2ModelLoader.cpp` or `Importer/ONNXModelLoader.cpp` depending on which type of model the operator comes from. Add the operator to `Importer/CommonOperatorLoader.h` instead if the loading logic can be shared between Caffe2 and ONNX. Add as much validation logic as possible here in the loader for the operator because it's crucial to catch errors at this stage. Once the operator is loaded, it is assumed that Glow will be able to successfully run the operator so any issues must be caught here. #### Low level IR diff --git a/docs/TensorLayout.md b/docs/TensorLayout.md index ac96b1c2d7..14e3594ab3 100644 --- a/docs/TensorLayout.md +++ b/docs/TensorLayout.md @@ -108,6 +108,16 @@ derives from `TensorLayoutCommon` and overrides the following functions: - This function takes an operator `Node *node` and returns the layout requirements of the Nth result `n`. - It returns Common layout constraints, for example, `ConvolutionNode` should be in `NHWC` format. +Notes: + +1. Some nodes can accept any layout as input, they are either data parallel, e.g. `Add`, +or, while not data parallel, do not care about the order of dimensions for their operation, +e.g. `ReshapeNodeKind`. When adding new nodes to Glow, such a behavior should be explicitly +specified, by adding `.dataParallel()` in NodeGen for example. +2. Some nodes propagate the layout information of their input, e.g. `convertTo` node, +when adding such nodes to Glow the canonical layout verifier should be aware of them. +We currently do that in `getNthInputLayoutRequirements`. + ## Placeholders and Constants An important thing to note is that some operators may have a `Placeholder` or diff --git a/lib/Graph/TensorLayout.cpp b/lib/Graph/TensorLayout.cpp index e1e3b6496a..c2b6e420f4 100644 --- a/lib/Graph/TensorLayout.cpp +++ b/lib/Graph/TensorLayout.cpp @@ -455,6 +455,10 @@ std::string TensorLayoutCommon::getNthInputLayoutRequirements(const Node *node, auto input = QN->getInput(); return getNthResultLayoutRequirements(input.getNode(), input.getResNo()); } + if (const auto *CTN = llvm::dyn_cast(node)) { + auto input = CTN->getInput(); + return getNthResultLayoutRequirements(input.getNode(), input.getResNo()); + } if (const auto *QPN = llvm::dyn_cast(node)) { switch (n) { case QuantizationProfileNode::InputIndices::InputIdx: { @@ -478,6 +482,19 @@ static unsigned getInputIdx(const Node *N, NodeValue in) { return N->getNumInputs(); } +/// \returns true if getting the input's layout would cause an infinite loop. +static bool inputDoesNotKnowRequirements(const Node *node) { + switch (node->getKind()) { + case Kinded::Kind::TransposeNodeKind: + case Kinded::Kind::QuantizeNodeKind: + case Kinded::Kind::QuantizationProfileNodeKind: + case Kinded::Kind::ConvertToNodeKind: + return true; + default: + return false; + } +} + std::string TensorLayoutCommon::getNthResultLayoutRequirements(const Node *node, size_t n) { DCHECK_LT(n, node->getNumResults()) << "Wrong output number"; @@ -492,6 +509,9 @@ std::string TensorLayoutCommon::getNthResultLayoutRequirements(const Node *node, } // Dynamically form the layout description for transposes. auto input = TN->getInput(); + while (inputDoesNotKnowRequirements(input)) { + input = input.getNode()->getNthInput(0); + } auto inputLayout = getNthInputLayoutRequirements(node, TransposeNode::InputIdx); auto inputLayoutHelper = TensorLayoutDescription(inputLayout); @@ -524,7 +544,8 @@ std::string TensorLayoutCommon::getNthResultLayoutRequirements(const Node *node, auto result = node->getNthResult(n); auto *user = (*result.getUsers().begin()).getUser(); int inputIdx = getInputIdx(user, result); - if (inputIdx >= user->getNumInputs() || llvm::isa(user)) { + if (inputDoesNotKnowRequirements(user) || + inputIdx >= user->getNumInputs() || llvm::isa(user)) { return getLayoutsForDims()[dims.size()].getSerializedLayout(); } auto layout = getNthInputLayoutRequirements(user, inputIdx); diff --git a/tests/unittests/TensorLayoutTest.cpp b/tests/unittests/TensorLayoutTest.cpp index 2306380274..ec344d3a1b 100644 --- a/tests/unittests/TensorLayoutTest.cpp +++ b/tests/unittests/TensorLayoutTest.cpp @@ -16,6 +16,8 @@ #include "BackendTestUtils.h" #include "glow/Backend/Backend.h" +#include "glow/Converter/Float16Converter.h" +#include "glow/Converter/TypeAToTypeBFunctionConverter.h" #include "glow/Graph/Graph.h" #include "glow/Graph/TensorLayout.h" #include "llvm/Support/raw_ostream.h" @@ -91,6 +93,26 @@ TEST_P(TensorLayoutTest, convBadLayout) { EXPECT_FALSE(verifyLayouts(*F_, CanonicalTensorLayout::getInstance(), false)); } +// Check that we propagate the layout information for convertTo nodes: +TEST_P(TensorLayoutTest, convertTo) { + CHECK_IF_ENABLED(); + + auto *input = mod_.createPlaceholder(ElemKind::FloatTy, {1, 3, 3, 1}, "input", + false, "NWCH"); + auto *resultNCHW = F_->createTranspose("transposeInput", input, NHWC2NCHW); + auto *save = F_->createSave("save", resultNCHW); + bindings_.allocate(save->getPlaceholder()); + + EXPECT_TRUE(verifyLayouts(*F_, CanonicalTensorLayout::getInstance())); + + PrecisionConfiguration precConfig; + TypeAToTypeBFunctionConverter converter(*F_, ElemKind::FloatTy, + ElemKind::Float16Ty, precConfig); + converter.convert(); + + EXPECT_TRUE(verifyLayouts(*F_, CanonicalTensorLayout::getInstance())); +} + // Check TensorLayoutDescription's parser with simple input. TEST_P(TensorLayoutTest, parseTestSimple) { CHECK_IF_ENABLED();