fix: Bug in aten::where with differing-shape inputs

gs-olive · gs-olive · commit de9cbaf00b56 · 2022-12-07T13:59:15.000-08:00
- Behavior of Torch-TRT differing from that of Torch in the case where
the input tensors to `aten::where` have different rank
- Torch automatically broadcasts tensors to the highest-rank variant
whereas the TRT Select layer requires tensors of the same rank and
throws an error
- Add dimension checking and unsqueeze operator to ensure broadcasting
is enabled
- Add test case to catch error
diff --git a/core/conversion/converters/impl/select.cpp b/core/conversion/converters/impl/select.cpp
@@ -736,8 +736,41 @@ auto select_registrations TORCHTRT_UNUSED =
             {"aten::where.self(Tensor condition, Tensor self, Tensor other) -> (Tensor)",
              [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
                auto condition = args[0].ITensorOrFreeze(ctx);
+               auto c_nbDims = condition->getDimensions().nbDims;
                auto x = args[1].ITensorOrFreeze(ctx);
+               auto x_nbDims = x->getDimensions().nbDims;
                auto y = args[2].ITensorOrFreeze(ctx);
+               auto y_nbDims = y->getDimensions().nbDims;
+
+               // Get maximum rank of all input tensors
+               auto max_nbDims = std::max(c_nbDims, std::max(x_nbDims, y_nbDims));
+
+               // TensorRT requires all inputs to Select layers to have the same rank, so for each
+               // tensor input, ensure that its rank is equal to the maximum number of dimensions
+               // If not, left-pad the tensor dimension with 1s until the max rank is achieved
+               auto add_reshape = [&ctx, &max_nbDims](nvinfer1::ITensor*& tensor) {
+                 nvinfer1::Dims dimensions = tensor->getDimensions();
+
+                 // If the rank of this tensor is smaller than the max rank, use reshape
+                 if (dimensions.nbDims < max_nbDims) {
+                   auto shuffle_layer = ctx->net->addShuffle(*tensor);
+
+                   // For each dimension from the rank of the smaller tensor to the max rank,
+                   // unsqueeze dimensions by 1
+                   for (auto i = dimensions.nbDims; i < max_nbDims; i++) {
+                     dimensions = util::unsqueezeDims(dimensions, 0, 1, false);
+                   }
+
+                   // Reshape to the unsqueezed dimensions
+                   shuffle_layer->setReshapeDimensions(dimensions);
+                   tensor = shuffle_layer->getOutput(0);
+                 }
+               };
+
+               // Apply reshape to each tensor input
+               add_reshape(condition);
+               add_reshape(x);
+               add_reshape(y);
 
                auto layer = ctx->net->addSelect(*condition, *x, *y);
 
diff --git a/tests/core/conversion/converters/test_select.cpp b/tests/core/conversion/converters/test_select.cpp
@@ -1224,3 +1224,35 @@ TEST(Converters, WhereConvertsCorrectly) {
 
   ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6));
 }
+
+TEST(Converters, WhereConvertsMismatchedShapesCorrectly) {
+  const auto graph = R"IR(
+        graph(%condition : Tensor,
+              %x : Tensor,
+              %y : Tensor):
+          %out : Tensor = aten::where(%condition, %x, %y)
+          return (%out))IR";
+
+  auto g = std::make_shared<torch::jit::Graph>();
+
+  torch::jit::parseIR(graph, g.get());
+
+  // As per Torch behavior, the input Tensors are expected to be broadcasted
+  // along their respective dimension in the largest-rank Tensor provided
+  auto condition = at::randint(0, 2, {7, 5}, {at::kCUDA}).to(torch::kBool);
+  auto x = at::randn({2, 7, 5}, {at::kCUDA});
+  auto y = at::randn({5}, {at::kCUDA});
+
+  auto jit_condition = at::clone(condition);
+  auto jit_x = at::clone(x);
+  auto jit_y = at::clone(y);
+  auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {});
+  auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_condition, jit_x, jit_y});
+
+  auto trt_condition = at::clone(condition);
+  auto trt_x = at::clone(x);
+  auto trt_y = at::clone(y);
+  auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_condition, trt_x, trt_y});
+
+  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6));
+}