Add comments for adding shape function and linting

Elias Ellison · Elias Ellison · commit f7c94ddc5fef · 2022-03-29T14:15:29.000-07:00
ghstack-source-id: 71302d6 Pull Request resolved: #73570
diff --git a/test/cpp/jit/test_misc.cpp b/test/cpp/jit/test_misc.cpp
@@ -2973,6 +2973,15 @@ TEST(TestFunctionExecutor, RunDecompositionTest) {
   }
 }
 
+TEST(TestShapeGraphLinting, Basic) {
+  auto schemas = RegisteredShapeComputeSchemas();
+  for (const auto& schema : schemas) {
+    auto g = shapeComputeGraphForSchema(*schema);
+    TORCH_INTERNAL_ASSERT(g);
+    LintShapeComputeGraph(schema, *g);
+  }
+}
+
 // TODO: move to test_kernel when global settings are explicit
 // fusion parameters
 class Composed : public ::testing::Test {
diff --git a/test/jit/test_symbolic_shape_analysis.py b/test/jit/test_symbolic_shape_analysis.py
@@ -12,6 +12,7 @@
 )
 from torch.testing._internal.common_utils import make_tensor
 from torch.testing._internal.jit_utils import JitTestCase, execWrapper
+from typing import List, Any
 
 if __name__ == '__main__':
     raise RuntimeError("This test file is not meant to be run directly, use:\n\n"
@@ -498,3 +499,37 @@ def test_shape_function_includes(self):
         m2_shape = [20, 10]
         res = torch.jit._shapes.matmul(m1_shape, m2_shape)
         self.assertEqual(res, [10, 10])
+
+    def test_register_function_error_checking(self):
+        # this will error before registering on global map, so
+        # no issue in overwriting schema mappings
+        @torch.jit.script
+        def foo(x, y):
+            return x + y
+
+        node = foo.graph.findNode("aten::add")
+
+        @torch.jit.script
+        def wrong_input_types(x, y):
+            x: List[int] = []
+            return x
+        with self.assertRaisesRegex(RuntimeError, "Expected supertype of int"):
+            torch._C._jit_register_shape_compute_graph_for_node(node, wrong_input_types.graph)
+
+        @torch.jit.script
+        def wrong_output_types(x: List[int], y: List[int]):
+            x: List[Tensor] = []
+            return x
+
+        with self.assertRaisesRegex(RuntimeError, "but got graph_type"):
+            torch._C._jit_register_shape_compute_graph_for_node(node, wrong_output_types.graph)
+
+        @torch.jit.script
+        def too_many_inputs(x: List[int], y: List[int], z: Any, z2: Any):
+            x: List[int] = []
+            return x
+
+        with self.assertRaises(RuntimeError) as error:
+            torch._C._jit_register_shape_compute_graph_for_node(node, too_many_inputs.graph)
+
+        self.assertTrue("fewer arguments than schema" in str(error.exception))
diff --git a/torch/csrc/jit/python/init.cpp b/torch/csrc/jit/python/init.cpp
@@ -170,6 +170,19 @@ void initJITBindings(PyObject* module) {
             return DecompositionGraphForSchema(n->schema());
           })
       .def("_jit_pass_run_decompositions", RunDecompositions)
+      // using Node* here instead of Schema because looking up the schema
+      // and passing it in from Python will have a different pointer than the
+      // schema that is globally used for caching
+      .def(
+          "_jit_register_shape_compute_graph_for_node",
+          [](Node* n, std::shared_ptr<Graph>& graph) {
+            if (n->maybeSchema()) {
+              const FunctionSchema& schema = n->schema();
+              RegisterShapeComputeGraphForSchema(schema, graph);
+            } else {
+              TORCH_INTERNAL_ASSERT(false, "Expected schema", n);
+            }
+          })
       .def("_jit_pass_propagate_shapes_on_graph", PropagateShapesOnGraph)
       .def(
           "_jit_pass_propagate_shapes_on_graph_and_build_compute",
diff --git a/torch/csrc/jit/runtime/shape_functions.h b/torch/csrc/jit/runtime/shape_functions.h
@@ -343,7 +343,7 @@ def conv2d(
 
 def batch_norm(
     input: List[int],
-    weight: List[int],
+    weight: Optional[List[int]],
     bias: Optional[List[int]],
     running_mean: Optional[List[int]],
     running_var: Optional[List[int]],
diff --git a/torch/csrc/jit/runtime/symbolic_shape_registry.cpp b/torch/csrc/jit/runtime/symbolic_shape_registry.cpp
@@ -1,6 +1,9 @@
+#include <c10/util/Exception.h>
 #include <torch/csrc/jit/frontend/ir_emitter.h>
+#include <torch/csrc/jit/ir/ir_views.h>
 #include <torch/csrc/jit/jit_log.h>
 #include <torch/csrc/jit/passes/inliner.h>
+#include <torch/csrc/jit/runtime/graph_iterator.h>
 #include <torch/csrc/jit/runtime/operator.h>
 #include <torch/csrc/jit/runtime/symbolic_shape_registry.h>
 #include <torch/csrc/jit/runtime/symbolic_shape_registry_util.h>
@@ -160,26 +163,121 @@ const at::optional<const FunctionSchema*> getInplaceVariant(
   return at::nullopt;
 }
 
-void registerSchema(
-    const FunctionSchema* schema_string,
-    const std::string& shape_compute_function_name,
-    std::unordered_map<std::string, std::shared_ptr<Graph>>& reused_functions,
-    const CompilationUnit& module) {
-  if (reused_functions.count(shape_compute_function_name)) {
-    auto graph = reused_functions[shape_compute_function_name];
+TypePtr mapTensorToListOfInts(TypePtr type) {
+  if (type->cast<TensorType>()) {
+    return ListType::ofInts();
+  }
+  at::ArrayRef<TypePtr> contained = type->containedTypes();
+  if (contained.empty()) {
+    return type;
+  }
+  return type->withContained(
+      fmap(type->containedTypes(), mapTensorToListOfInts));
+}
 
-    // allow extra unused arguments to map multiple functions to e.g. unary
+void checkForWhileLoop(
+    const FunctionSchema* schema,
+    std::shared_ptr<Graph> graph) {
+  DepthFirstGraphNodeIterator graph_it(graph);
+  for (auto* node = graph_it.next(); node != nullptr; node = graph_it.next()) {
+    if (node->kind() != prim::Loop) {
+      continue;
+    }
+    LoopView loop(node);
+    if (loop.loopType() != LoopView::For) {
+      TORCH_WARN(
+          "While loops are not yet implemented in unrolling which may make this shape function difficult to partially evaluate: ",
+          *node,
+          " for schema ",
+          *schema);
+    }
+  }
+}
+
+void checkInputReturnedAsOutput(
+    const FunctionSchema* schema,
+    const std::shared_ptr<Graph>& graph) {
+  // Could use alias db here as well but would have to warn because it's
+  // imprecise
+  for (size_t i : c10::irange(graph->inputs().size())) {
+    Value* input = graph->inputs().at(i);
+    for (size_t j : c10::irange(graph->outputs().size())) {
+      Value* output = graph->outputs().at(j);
+      TORCH_CHECK(
+          input != output,
+          "For schema: ",
+          *schema,
+          " input index ",
+          i,
+          " is returned as output index ",
+          j,
+          ". Shape functions must return new unaliased lists");
+    }
+  }
+}
+
+void checkInputAndOutputTypes(
+    const FunctionSchema* schema,
+    const std::shared_ptr<Graph>& graph) {
+  // allow extra unused arguments to map multiple functions to e.g. unary
+  TORCH_CHECK(
+      graph->inputs().size() <= schema->arguments().size(),
+      "Shape function must have fewer arguments than schema. Got ",
+      graph->inputs().size(),
+      " graph arguments and ",
+      schema->arguments().size(),
+      " schema arguments of schema: ",
+      *schema);
+
+  for (auto i : c10::irange(graph->inputs().size())) {
+    auto inp_type = schema->arguments().at(i).type();
+    auto mapped_type = mapTensorToListOfInts(inp_type);
+    auto graph_type = graph->inputs().at(i)->type();
     TORCH_INTERNAL_ASSERT(
-        graph->inputs().size() <= schema_string->arguments().size());
+        mapped_type->isSubtypeOf(graph->inputs().at(i)->type()),
+        "For schema type: ",
+        inp_type->str(),
+        " Expected supertype of ",
+        mapped_type->str(),
+        " but got graph_type ",
+        graph_type->str(),
+        " at index ",
+        i,
+        " of schema: ",
+        *schema);
+  }
 
-    cached_schema_to_graph[schema_string] = graph;
-    return;
+  TORCH_CHECK(
+      graph->outputs().size() == schema->returns().size(),
+      "Shape function equal number of outputs as schema. Got ",
+      graph->outputs().size(),
+      " graph outputs and ",
+      schema->returns().size(),
+      " schema returns of schema: ",
+      *schema);
+
+  for (auto i : c10::irange(schema->returns().size())) {
+    auto out_type = schema->returns().at(i).type();
+    auto mapped_type = mapTensorToListOfInts(out_type);
+    auto graph_type = graph->outputs().at(i)->type();
+    TORCH_INTERNAL_ASSERT(
+        mapped_type->isSubtypeOf(graph->outputs().at(i)->type()),
+        "For schema type: ",
+        out_type->str(),
+        " Expected supertype of ",
+        mapped_type->str(),
+        " but got graph_type ",
+        graph_type->str(),
+        " at output index ",
+        i,
+        " of schema: ",
+        *schema);
   }
+}
 
-  Function& shape_compute_function =
-      module.get_function(shape_compute_function_name);
-  std::shared_ptr<Graph> graph =
-      toGraphFunction(shape_compute_function).graph();
+void transformShapeFunction(
+    const FunctionSchema* schema_string,
+    std::shared_ptr<Graph> graph) {
   Inline(*graph);
 
   // ATEN operators can return multiple unboxed values, this in contrast to
@@ -197,9 +295,33 @@ void registerSchema(
       graph->registerOutput(v);
     }
   }
-  // allow extra unused arguments to map multiple functions to e.g. unary
-  TORCH_INTERNAL_ASSERT(
-      graph->inputs().size() <= schema_string->arguments().size());
+}
+
+void registerSchema(
+    const FunctionSchema* schema_string,
+    const std::string& shape_compute_function_name,
+    std::unordered_map<std::string, std::shared_ptr<Graph>>& reused_functions,
+    const CompilationUnit& module) {
+  if (reused_functions.count(shape_compute_function_name)) {
+    auto graph = reused_functions[shape_compute_function_name];
+
+    // allow extra unused arguments to map multiple functions to e.g. unary
+    TORCH_INTERNAL_ASSERT(
+        graph->inputs().size() <= schema_string->arguments().size());
+
+    cached_schema_to_graph[schema_string] = graph;
+    return;
+  }
+
+  Function& shape_compute_function =
+      module.get_function(shape_compute_function_name);
+  std::shared_ptr<Graph> graph =
+      toGraphFunction(shape_compute_function).graph();
+
+  transformShapeFunction(schema_string, graph);
+  // NB: we lint the shape functions registered in source
+  // in a test file
+  // LintShapeComputeGraph(schema_string, graph);
 
   cached_schema_to_graph[schema_string] = graph;
   reused_functions[shape_compute_function_name] = graph;
@@ -299,8 +421,34 @@ void RegisterShapeComputeGraphForSchema(
   if (cached_schema_to_graph.size() == 0) {
     loadFunctions();
   }
+  transformShapeFunction(&schema, g);
+  LintShapeComputeGraph(&schema, g);
+
   cached_schema_to_graph[&schema] = g;
 }
 
+std::vector<const FunctionSchema*> RegisteredShapeComputeSchemas() {
+  std::lock_guard<std::mutex> guard(lock);
+  if (cached_schema_to_graph.size() == 0) {
+    loadFunctions();
+  }
+
+  std::vector<const FunctionSchema*> schemas;
+  schemas.reserve(cached_schema_to_graph.size());
+  for (const auto& pair : cached_schema_to_graph) {
+    schemas.push_back(pair.first);
+  }
+  return schemas;
+}
+
+void LintShapeComputeGraph(
+    const FunctionSchema* schema,
+    const std::shared_ptr<Graph>& graph) {
+  checkInputAndOutputTypes(schema, graph);
+  checkForWhileLoop(schema, graph);
+  checkInputReturnedAsOutput(schema, graph);
+  // TODO: other checks ? list ops which we don't symbolically optimize, etc ?
+}
+
 } // namespace jit
 } // namespace torch
diff --git a/torch/csrc/jit/runtime/symbolic_shape_registry.h b/torch/csrc/jit/runtime/symbolic_shape_registry.h
@@ -8,12 +8,56 @@
 namespace torch {
 namespace jit {
 
+/*
+ADDING A NEW SHAPE GRAPH:
+- For one node schema, there is one corresponding registered shape compute
+graph. The schema of the graph should be the same except for Tensor arguments.
+For every Tensor input in operator schema, there should be a List[int]
+corresponding to that Tensor's shape. For example: "aten::linear(Tensor input,
+Tensor weight, Tensor? bias=None) -> Tensor" ==> def linear(input: List[int],
+weight: List[int], bias: Optional[List[int]])
+
+Additionally, arguments which are unused at the end of the schema may be left
+off. This allows sharing a single graph for multiple function schemas, such as
+unary operators with different trailing arguments that do not affect the output
+shape.
+
+The shape graph should return a new, unaliased List[int] (or tuple of lists for
+multiple returns) and should not modify any input lists. This allows the shape
+graphs to be composed and executed.
+
+The shape analysis (particularly for non-complete, or symbolic shapes) works by
+partially evaluating the JIT IR. It may be possible for a Graph to be registered
+that we cannot currently partially evaluate. If this happens, please file an
+issue. There are lints registered to avoid particular known patterns (continue
+or break or early return in a loop). Those may be improved in the future, please
+file an issue if necessary.
+
+To debug (and write initially) the recommended flow is to define these functions
+in python and iterate there. Functions in `shape_functions.h` and
+`shape_functions_1.h` should be executable in python.
+
+To test operators, the preferred flow is through OpInfos, with
+`assert_jit_shape_analysis=True`. If this is not feasible, you can look at tests
+in `test_symbolic_shape_analysis.py` such as `test_adaptive_avg_pool2d`.
+
+Operators which take in a list of tensors, such as concat, are not yet
+supported. Concat has been special cased and could be generalized as needed.
+Please file an issue.
+*/
+
 TORCH_API void RegisterShapeComputeGraphForSchema(
     const FunctionSchema& schema,
     std::shared_ptr<Graph> g);
 
 TORCH_API c10::optional<std::shared_ptr<Graph>> shapeComputeGraphForSchema(
     const FunctionSchema& schema);
 
+TORCH_API std::vector<const FunctionSchema*> RegisteredShapeComputeSchemas();
+
+TORCH_API void LintShapeComputeGraph(
+    const FunctionSchema* schema,
+    const std::shared_ptr<Graph>& graph);
+
 } // namespace jit
 } // namespace torch
diff --git a/torch/csrc/jit/runtime/symbolic_shape_registry_util.cpp b/torch/csrc/jit/runtime/symbolic_shape_registry_util.cpp
@@ -118,7 +118,8 @@ const OperatorMap<std::string>& get_tensorexpr_elementwise_set() {
       {"aten::where.ScalarSelf(Tensor condition, Scalar self, Tensor other) -> Tensor", "broadcast_one_three"},
       // TODO: enable slice, shape inference is not implemented for this op yet
   };
-  return tensorexpr_elementwise_set;
+ // clang-format on
+ return tensorexpr_elementwise_set;
 }
 
 }

Original file line number	Diff line number	Diff line change
`@@ -118,7 +118,8 @@ const OperatorMap<std::string>& get_tensorexpr_elementwise_set() {`
`118`	`118`	`{"aten::where.ScalarSelf(Tensor condition, Scalar self, Tensor other) -> Tensor", "broadcast_one_three"},`
`119`	`119`	`// TODO: enable slice, shape inference is not implemented for this op yet`
`120`	`120`	`};`
`121`		`- return tensorexpr_elementwise_set;`
	`121`	`+ // clang-format on`
	`122`	`+ return tensorexpr_elementwise_set;`
`122`	`123`	`}`
`123`	`124`
`124`	`125`	`}`