From 8e60a54e0dd5e57b56f9bcc4e11d2a725c629cc3 Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Tue, 18 Oct 2022 10:46:35 -0700 Subject: [PATCH 01/11] feat: Initial implementation of dynamic shapes + fallback Signed-off-by: Dheeraj Peri --- core/compiler.cpp | 26 ++++++------ core/partitioning/partitioning.cpp | 14 ++++++- core/partitioning/partitioning.h | 8 ++-- .../partitioningctx/PartitioningCtx.h | 1 + .../segmentedblock/SegmentedBlock.cpp | 21 ++++++++++ .../segmentedblock/SegmentedBlock.h | 22 +++++++--- core/partitioning/shape_analysis.cpp | 41 +++++++++++-------- tests/cpp/BUILD | 17 ++++++++ 8 files changed, 110 insertions(+), 40 deletions(-) diff --git a/core/compiler.cpp b/core/compiler.cpp index 118ca7aa1c..efba438be0 100644 --- a/core/compiler.cpp +++ b/core/compiler.cpp @@ -137,10 +137,10 @@ partitioning::GraphAndMapping BuildHybridGraph( auto partitioning_info = cfg.partitioning_info; auto partitioning_ctx = partitioning::PartitioningCtx(block, partitioning_info); - auto collection_input_ivalues_map = - partitioning::generateRandomInputs(partitioning_info.collection_input_spec_map, first_use_types); - - partitioning::partition(&partitioning_ctx, collection_input_ivalues_map); + // auto collection_input_ivalues_map = + // partitioning::generateRandomInputs(partitioning_info.collection_input_spec_map, first_use_types); + partitioning_ctx.input_types_map = first_use_types; + partitioning::partition(&partitioning_ctx); for (auto& partitioned_block : partitioning_ctx.partitioned_blocks) { partitioning::PartitionedGraph& segmented_blocks = partitioned_block.second; @@ -151,14 +151,16 @@ partitioning::GraphAndMapping BuildHybridGraph( trt_engine_id << reinterpret_cast(&seg_block); if (seg_block.target() == partitioning::SegmentedBlock::kTensorRT) { - auto shapes = seg_block.in_shapes(); - auto types = seg_block.in_types(); - std::vector inputs; - for (size_t i = 0; i < shapes.size(); i++) { - auto in = ir::Input(shapes[i]); - in.dtype = util::ScalarTypeToTRTDataType(types[i]); - inputs.push_back(in); - } + // auto shapes = seg_block.in_shapes(); + // auto types = seg_block.in_types(); + // std::vector inputs; + // for (size_t i = 0; i < shapes.size(); i++) { + // auto in = ir::Input(shapes[i]); + // in.dtype = util::ScalarTypeToTRTDataType(types[i]); + // inputs.push_back(in); + // } + auto inputs = seg_block.construct_inputs_spec(); + LOG_DEBUG("============ INPUTS: " << inputs); // update the input ranges for each segments convert_info.inputs = ir::associate_specs_with_inputs(seg_block.g(), inputs, static_params); diff --git a/core/partitioning/partitioning.cpp b/core/partitioning/partitioning.cpp index eb8c86de50..f97963d5d2 100644 --- a/core/partitioning/partitioning.cpp +++ b/core/partitioning/partitioning.cpp @@ -436,7 +436,7 @@ void segmentGraph(PartitioningCtx* ctx, torch::jit::Block* block) { return; } -void partition(PartitioningCtx* ctx, ExampleIValues& example_tensor_map) { +void partition(PartitioningCtx* ctx) { LOG_DEBUG(ctx->settings); // Go through all the blocks to do the partitioning @@ -453,7 +453,17 @@ void partition(PartitioningCtx* ctx, ExampleIValues& example_tensor_map) { registerSegmentsOutputs(ctx, block); // run shape analysis on each segmented block - runShapeAnalysis(ctx, block, example_tensor_map); + auto min_input_ivalues_map = + partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, "min"); + auto opt_input_ivalues_map = + partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, "opt"); + auto max_input_ivalues_map = + partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, "max"); + + runShapeAnalysis(ctx, block, min_input_ivalues_map, "min"); + runShapeAnalysis(ctx, block, opt_input_ivalues_map, "opt"); + runShapeAnalysis(ctx, block, max_input_ivalues_map, "max"); + } } diff --git a/core/partitioning/partitioning.h b/core/partitioning/partitioning.h index 3038f6c52f..714bd9f030 100644 --- a/core/partitioning/partitioning.h +++ b/core/partitioning/partitioning.h @@ -13,20 +13,20 @@ namespace torch_tensorrt { namespace core { namespace partitioning { -typedef std::unordered_map ExampleIValues; +typedef std::unordered_map ExampleIValues; typedef std::pair, std::unordered_map> GraphAndMapping; -ExampleIValues generateRandomInputs(ir::CollectionInputSpecMap& input_ranges, ir::CollectionTypeMap& input_types); +ExampleIValues generateRandomInputs(ir::CollectionInputSpecMap& input_ranges, ir::CollectionTypeMap& input_types, const std::string& shape_mode = std::string("opt")); -void runShapeAnalysis(PartitioningCtx* ctx, torch::jit::Block* block, ExampleIValues& ivalues_maps); +void runShapeAnalysis(PartitioningCtx* ctx, torch::jit::Block* block, ExampleIValues& ivalues_maps, const std::string& shape_mode); void segmentGraph(PartitioningCtx* ctx, torch::jit::Block* block); GraphAndMapping stitch(PartitioningCtx* ctx, torch::jit::Block* block); -void partition(PartitioningCtx* ctx, ExampleIValues& example_tensor_map); +void partition(PartitioningCtx* ctx); } // namespace partitioning } // namespace core diff --git a/core/partitioning/partitioningctx/PartitioningCtx.h b/core/partitioning/partitioningctx/PartitioningCtx.h index ed8e705be5..6f47015372 100644 --- a/core/partitioning/partitioningctx/PartitioningCtx.h +++ b/core/partitioning/partitioningctx/PartitioningCtx.h @@ -60,6 +60,7 @@ struct PartitioningCtx { bool shouldNodeRunInTorch(torch::jit::Node* n); bool shouldNodeRunInTensorRT(torch::jit::Node* n); std::vector getNodesRunInTorch(); + std::unordered_map>> input_types_map; private: void _load_nodes_into_decision_map(torch::jit::Block* b); diff --git a/core/partitioning/segmentedblock/SegmentedBlock.cpp b/core/partitioning/segmentedblock/SegmentedBlock.cpp index 6a370c83ad..cbcdbc7faa 100644 --- a/core/partitioning/segmentedblock/SegmentedBlock.cpp +++ b/core/partitioning/segmentedblock/SegmentedBlock.cpp @@ -1,4 +1,5 @@ #include "SegmentedBlock.h" +#include "core/util/prelude.h" namespace torch_tensorrt { namespace core { @@ -56,6 +57,26 @@ torch::jit::Value* SegmentedBlock::getOrAddInputForValue(torch::jit::Value* old_ } } +std::vector SegmentedBlock::construct_inputs_spec() const { + std::vector inputs; + if (min_shapes_.size() == opt_shapes_.size() && opt_shapes_.size() == max_shapes_.size()){ + LOG_DEBUG("====== IS DYNAMIC ===="); + for (uint64_t i=0; i < opt_shapes_.size(); i++){ + auto in = ir::Input(min_shapes_[i], opt_shapes_[i], max_shapes_[i]); + in.dtype = util::ScalarTypeToTRTDataType(in_types_[i]); + inputs.push_back(in); + } + } else { + LOG_DEBUG("====== IS STATIC ===="); + for (uint64_t i=0; i < opt_shapes_.size(); i++){ + auto in = ir::Input(opt_shapes_[i]); + in.dtype = util::ScalarTypeToTRTDataType(in_types_[i]); + inputs.push_back(in); + } + } + return inputs; +} + torch::jit::Node* SegmentedBlock::cloneNode(torch::jit::Node* node) { auto* block = g_->block(); auto env = [&](torch::jit::Value* v) { return getOrAddInputForValue(v); }; diff --git a/core/partitioning/segmentedblock/SegmentedBlock.h b/core/partitioning/segmentedblock/SegmentedBlock.h index 0e04237f63..0138cc0059 100644 --- a/core/partitioning/segmentedblock/SegmentedBlock.h +++ b/core/partitioning/segmentedblock/SegmentedBlock.h @@ -35,6 +35,7 @@ struct SegmentedBlock { SegmentedBlock(BlockID id, SegmentedBlockTarget blk_target, const std::vector& nodes); torch::jit::Value* getOrAddInputForValue(torch::jit::Value* v); + std::vector construct_inputs_spec() const; torch::jit::Node* cloneNode(torch::jit::Node* node); void appendNode(torch::jit::Node* n) { cloneNode(n); @@ -72,18 +73,25 @@ struct SegmentedBlock { bool contain_raw_value(torch::jit::Value* input) const { return old_to_new_.count(input); } - void register_inshapes(std::vector& in_shapes) { - in_shapes_ = in_shapes; - } - const std::vector& in_shapes() const { - return in_shapes_; + void register_inshapes(std::vector>& in_shapes, const std::string& shape_mode) { + if (shape_mode.compare("min") == 0){ + min_shapes_ = in_shapes; + } else if(shape_mode.compare("opt") == 0){ + opt_shapes_ = in_shapes; + } else{ + max_shapes_ = in_shapes; + } } + // const std::vector& in_shapes() const { + // return in_shapes_; + // } void register_intypes(std::vector& in_types) { in_types_ = in_types; } const std::vector& in_types() const { return in_types_; } + void update_id(BlockID new_id) { id_ = new_id; } @@ -99,7 +107,9 @@ struct SegmentedBlock { private: BlockID id_; SegmentedBlockTarget target_; - std::vector in_shapes_; + std::vector> min_shapes_; + std::vector> opt_shapes_; + std::vector> max_shapes_; std::vector in_types_; std::vector inputs_; std::vector outputs_; diff --git a/core/partitioning/shape_analysis.cpp b/core/partitioning/shape_analysis.cpp index 21696e8204..de2c754748 100644 --- a/core/partitioning/shape_analysis.cpp +++ b/core/partitioning/shape_analysis.cpp @@ -9,25 +9,33 @@ namespace torch_tensorrt { namespace core { namespace partitioning { -at::Tensor generateSingleInput(ir::Input& input, c10::optional& type_opt) { - auto cur_shape = input.input_shape; - std::vector shape; - shape.insert(shape.begin(), std::begin(cur_shape.d), std::begin(cur_shape.d) + cur_shape.nbDims); - // auto type_opt = types[input.first][i]; +at::Tensor generateSingleInput(ir::Input& input, c10::optional& type_opt, const std::string& shape_mode) { + nvinfer1::Dims input_shape = input.input_shape; + if (input.input_is_dynamic){ + if (shape_mode.compare("min") == 0){ + input_shape = input.min; + } else if(shape_mode.compare("opt") == 0){ + input_shape = input.opt; + } else { + input_shape = input.max; + } + } + auto type = at::kFloat; if (type_opt) { type = type_opt.value(); } else { LOG_WARNING("Input type for doing shape analysis could not be determined, defaulting to F32"); } - auto in = at::randint(5, shape, {at::kCUDA}).to(type); - // ivalue_map[input.first] = in.clone(); + auto in = at::randint(5, util::toVec(input_shape), {at::kCUDA}).to(type); + return in; } std::unordered_map generateRandomInputs( std::unordered_map>& inputs, - std::unordered_map>>& types) { + std::unordered_map>>& types, + const std::string& shape_mode) { // generate random inputs for running pytorch segments std::unordered_map ivalue_map; @@ -36,7 +44,7 @@ std::unordered_map generateRandomI c10::TypePtr elementType = c10::TensorType::get(); auto generic_list = c10::impl::GenericList(elementType); for (size_t i = 0; i < input.second.size(); i++) { - auto in = generateSingleInput(input.second[i], types[input.first][i]); + auto in = generateSingleInput(input.second[i], types[input.first][i], shape_mode); generic_list.push_back(in.clone()); } ivalue_map[input.first] = c10::IValue(generic_list); @@ -44,13 +52,13 @@ std::unordered_map generateRandomI // create tuple std::vector list; for (size_t i = 0; i < input.second.size(); i++) { - auto in = generateSingleInput(input.second[i], types[input.first][i]); + auto in = generateSingleInput(input.second[i], types[input.first][i], shape_mode); list.push_back(in.clone()); } auto tuple = c10::ivalue::Tuple::create(list); // create tuple ptr ivalue_map[input.first] = c10::IValue(tuple); } else { - auto in = generateSingleInput(input.second[0], types[input.first][0]); + auto in = generateSingleInput(input.second[0], types[input.first][0], shape_mode); ivalue_map[input.first] = in.clone(); } } @@ -60,7 +68,8 @@ std::unordered_map generateRandomI void getSegmentsOutputByRunning( SegmentedBlock& seg_block, std::unordered_map& ivalues_maps, - const PartitioningInfo& partitioning_info) { + const PartitioningInfo& partitioning_info, + const std::string& shape_mode) { // create a module to run the graph auto g = seg_block.g(); auto copy_g = g->copy(); @@ -141,7 +150,7 @@ void getSegmentsOutputByRunning( } // set input shape for each segmented block so we wil use it in conversion process - std::vector input_shapes; + std::vector> input_shapes; std::vector input_types; for (auto& i : seg_block.raw_inputs()) { if (ivalues_maps[i].isTensor()) { @@ -175,15 +184,15 @@ void getSegmentsOutputByRunning( // TODO: tuple and list inputs in subgraph } - seg_block.register_inshapes(input_shapes); + seg_block.register_inshapes(input_shapes, shape_mode); seg_block.register_intypes(input_types); } -void runShapeAnalysis(PartitioningCtx* ctx, torch::jit::Block* block, ExampleIValues& example_tensor_map) { +void runShapeAnalysis(PartitioningCtx* ctx, torch::jit::Block* block, ExampleIValues& example_tensor_map, const std::string& shape_mode) { // register every segment's input shape, and it's running output IValues for (auto& seg_block : ctx->partitioned_blocks[block]) { torch::jit::ConstantPooling(seg_block.g()); - getSegmentsOutputByRunning(seg_block, example_tensor_map, ctx->settings); + getSegmentsOutputByRunning(seg_block, example_tensor_map, ctx->settings, shape_mode); } return; } diff --git a/tests/cpp/BUILD b/tests/cpp/BUILD index 3d56682189..6c75ed3d59 100644 --- a/tests/cpp/BUILD +++ b/tests/cpp/BUILD @@ -17,6 +17,7 @@ test_suite( ":test_default_input_types", ":test_example_tensors", ":test_module_fallback", + ":test_dynamic_fallback", ":test_modules_as_engines", ":test_multiple_registered_engines", ":test_runtime_thread_safety", @@ -32,6 +33,7 @@ test_suite( ":test_default_input_types", ":test_example_tensors", ":test_module_fallback", + ":test_dynamic_fallback", ":test_modules_as_engines", ":test_multiple_registered_engines", ":test_runtime_thread_safety", @@ -125,6 +127,21 @@ cc_test( }), ) +cc_test( + name = "test_dynamic_fallback", + srcs = ["test_dynamic_fallback.cpp"], + data = [ + "//tests/modules:jit_models", + ], + deps = [ + "//tests/util", + "@googletest//:gtest_main", + ] + select({ + ":use_pre_cxx11_abi": ["@libtorch_pre_cxx11_abi//:libtorch"], + "//conditions:default": ["@libtorch//:libtorch"], + }), +) + cc_test( name = "test_collections", srcs = ["test_collections.cpp"], From 86982e1a53aad82b3bfd561beccdc0d2f6041779 Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Wed, 19 Oct 2022 10:56:19 -0700 Subject: [PATCH 02/11] chore: Add test case Signed-off-by: Dheeraj Peri --- tests/cpp/test_dynamic_fallback.cpp | 32 +++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 tests/cpp/test_dynamic_fallback.cpp diff --git a/tests/cpp/test_dynamic_fallback.cpp b/tests/cpp/test_dynamic_fallback.cpp new file mode 100644 index 0000000000..77f5fa9b75 --- /dev/null +++ b/tests/cpp/test_dynamic_fallback.cpp @@ -0,0 +1,32 @@ +#include +#include "gtest/gtest.h" +#include "tests/util/util.h" +#include "torch/script.h" +#include "torch_tensorrt/torch_tensorrt.h" + +TEST(CppAPITest, ResNet50DynamicFallbackGraphCorrectly) { + torch::jit::script::Module mod; + try { + mod = torch::jit::load("tests/modules/resnet18_scripted.jit.pt"); + } catch (const c10::Error& e) { + std::cerr << "error loading the model\n"; + ASSERT_TRUE(false); + } + + const std::vector> input_shapes = {{1, 3, 224, 224}, {4, 3, 224, 224}, {8, 3, 224, 224}}; + std::vector jit_inputs_ivalues; + std::vector trt_inputs_ivalues; + auto in = at::randint(5, input_shapes[0], {at::kCUDA}); + jit_inputs_ivalues.push_back(in.clone()); + trt_inputs_ivalues.push_back(in.clone()); + + std::vector inputs; + inputs.push_back(torch_tensorrt::Input(input_shapes[0], input_shapes[1], input_shapes[2])); + torch_tensorrt::ts::CompileSpec cfg(inputs); + cfg.torch_executed_ops.push_back("aten::add"); + + auto jit_results = mod.forward(jit_inputs_ivalues).toTensor(); + auto trt_mod = torch_tensorrt::ts::compile(mod, cfg); + auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor(); + ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results)); +} From 375bdfc3dee1ae37b520e662a6f59ecf308abd9c Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Wed, 19 Oct 2022 14:23:44 -0700 Subject: [PATCH 03/11] chore: Clean up and refactor code Signed-off-by: Dheeraj Peri --- core/compiler.cpp | 11 ----- core/partitioning/partitioning.cpp | 47 ++++++++++++++----- core/partitioning/partitioning.h | 13 +++-- .../segmentedblock/SegmentedBlock.cpp | 8 ++-- .../segmentedblock/SegmentedBlock.h | 6 +-- core/partitioning/shape_analysis.cpp | 17 +++++-- tests/cpp/BUILD | 4 +- tests/cpp/test_dynamic_fallback.cpp | 37 ++++++++++++--- 8 files changed, 95 insertions(+), 48 deletions(-) diff --git a/core/compiler.cpp b/core/compiler.cpp index efba438be0..558cb82af6 100644 --- a/core/compiler.cpp +++ b/core/compiler.cpp @@ -137,8 +137,6 @@ partitioning::GraphAndMapping BuildHybridGraph( auto partitioning_info = cfg.partitioning_info; auto partitioning_ctx = partitioning::PartitioningCtx(block, partitioning_info); - // auto collection_input_ivalues_map = - // partitioning::generateRandomInputs(partitioning_info.collection_input_spec_map, first_use_types); partitioning_ctx.input_types_map = first_use_types; partitioning::partition(&partitioning_ctx); @@ -151,16 +149,7 @@ partitioning::GraphAndMapping BuildHybridGraph( trt_engine_id << reinterpret_cast(&seg_block); if (seg_block.target() == partitioning::SegmentedBlock::kTensorRT) { - // auto shapes = seg_block.in_shapes(); - // auto types = seg_block.in_types(); - // std::vector inputs; - // for (size_t i = 0; i < shapes.size(); i++) { - // auto in = ir::Input(shapes[i]); - // in.dtype = util::ScalarTypeToTRTDataType(types[i]); - // inputs.push_back(in); - // } auto inputs = seg_block.construct_inputs_spec(); - LOG_DEBUG("============ INPUTS: " << inputs); // update the input ranges for each segments convert_info.inputs = ir::associate_specs_with_inputs(seg_block.g(), inputs, static_params); diff --git a/core/partitioning/partitioning.cpp b/core/partitioning/partitioning.cpp index f97963d5d2..7cd273889d 100644 --- a/core/partitioning/partitioning.cpp +++ b/core/partitioning/partitioning.cpp @@ -436,6 +436,20 @@ void segmentGraph(PartitioningCtx* ctx, torch::jit::Block* block) { return; } +bool isInputDynamic(PartitioningCtx* ctx) { + // Check if inputs have dynamic shapes + bool input_is_dynamic = true; + auto inputs_map = ctx->settings.collection_input_spec_map; + for (auto inputs : inputs_map) { + for (auto input : inputs.second) { + if (!input.input_is_dynamic) { + input_is_dynamic = false; + } + } + } + return input_is_dynamic; +} + void partition(PartitioningCtx* ctx) { LOG_DEBUG(ctx->settings); @@ -446,24 +460,33 @@ void partition(PartitioningCtx* ctx) { // It's possible that some TensorRT blocks have nonTensor inputs/output because they are interleaved by Torch blocks // resolve nonTensor inputs/outputs + LOG_DEBUG("Resolving non-tensor inputs for segmented blocks"); resolveTRTNonTensorInputs(ctx, block); // register input/output torch::jit::Value for segmented graphs LOG_DEBUG("Registering input/output torch::jit::Value for segmented graphs"); registerSegmentsOutputs(ctx, block); - // run shape analysis on each segmented block - auto min_input_ivalues_map = - partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, "min"); - auto opt_input_ivalues_map = - partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, "opt"); - auto max_input_ivalues_map = - partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, "max"); - - runShapeAnalysis(ctx, block, min_input_ivalues_map, "min"); - runShapeAnalysis(ctx, block, opt_input_ivalues_map, "opt"); - runShapeAnalysis(ctx, block, max_input_ivalues_map, "max"); - + // Incase of dynamic shape inputs, run shape analysis on each segmented block for min/opt/max ranges and register + // output shapes for each block accordingly + if (isInputDynamic(ctx)) { + LOG_DEBUG("Performing shape analysis for segmented blocks using min/opt/max shapes for inputs"); + auto min_input_ivalues_map = + partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, "min"); + auto opt_input_ivalues_map = + partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, "opt"); + auto max_input_ivalues_map = + partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, "max"); + + runShapeAnalysis(ctx, block, min_input_ivalues_map, "min"); + runShapeAnalysis(ctx, block, opt_input_ivalues_map, "opt"); + runShapeAnalysis(ctx, block, max_input_ivalues_map, "max"); + } else { + LOG_DEBUG("Performing shape analysis for segmented blocks using static shapes for inputs"); + auto opt_input_ivalues_map = + partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, "opt"); + runShapeAnalysis(ctx, block, opt_input_ivalues_map, "opt"); + } } } diff --git a/core/partitioning/partitioning.h b/core/partitioning/partitioning.h index 714bd9f030..acdfeaf39b 100644 --- a/core/partitioning/partitioning.h +++ b/core/partitioning/partitioning.h @@ -18,9 +18,16 @@ typedef std::unordered_map ExampleIValues typedef std::pair, std::unordered_map> GraphAndMapping; -ExampleIValues generateRandomInputs(ir::CollectionInputSpecMap& input_ranges, ir::CollectionTypeMap& input_types, const std::string& shape_mode = std::string("opt")); - -void runShapeAnalysis(PartitioningCtx* ctx, torch::jit::Block* block, ExampleIValues& ivalues_maps, const std::string& shape_mode); +ExampleIValues generateRandomInputs( + ir::CollectionInputSpecMap& input_ranges, + ir::CollectionTypeMap& input_types, + const std::string& shape_mode = std::string("opt")); + +void runShapeAnalysis( + PartitioningCtx* ctx, + torch::jit::Block* block, + ExampleIValues& ivalues_maps, + const std::string& shape_mode); void segmentGraph(PartitioningCtx* ctx, torch::jit::Block* block); diff --git a/core/partitioning/segmentedblock/SegmentedBlock.cpp b/core/partitioning/segmentedblock/SegmentedBlock.cpp index cbcdbc7faa..583e67ca4d 100644 --- a/core/partitioning/segmentedblock/SegmentedBlock.cpp +++ b/core/partitioning/segmentedblock/SegmentedBlock.cpp @@ -59,16 +59,14 @@ torch::jit::Value* SegmentedBlock::getOrAddInputForValue(torch::jit::Value* old_ std::vector SegmentedBlock::construct_inputs_spec() const { std::vector inputs; - if (min_shapes_.size() == opt_shapes_.size() && opt_shapes_.size() == max_shapes_.size()){ - LOG_DEBUG("====== IS DYNAMIC ===="); - for (uint64_t i=0; i < opt_shapes_.size(); i++){ + if (min_shapes_.size() == opt_shapes_.size() && opt_shapes_.size() == max_shapes_.size()) { + for (uint64_t i = 0; i < opt_shapes_.size(); i++) { auto in = ir::Input(min_shapes_[i], opt_shapes_[i], max_shapes_[i]); in.dtype = util::ScalarTypeToTRTDataType(in_types_[i]); inputs.push_back(in); } } else { - LOG_DEBUG("====== IS STATIC ===="); - for (uint64_t i=0; i < opt_shapes_.size(); i++){ + for (uint64_t i = 0; i < opt_shapes_.size(); i++) { auto in = ir::Input(opt_shapes_[i]); in.dtype = util::ScalarTypeToTRTDataType(in_types_[i]); inputs.push_back(in); diff --git a/core/partitioning/segmentedblock/SegmentedBlock.h b/core/partitioning/segmentedblock/SegmentedBlock.h index 0138cc0059..db3db87740 100644 --- a/core/partitioning/segmentedblock/SegmentedBlock.h +++ b/core/partitioning/segmentedblock/SegmentedBlock.h @@ -74,11 +74,11 @@ struct SegmentedBlock { return old_to_new_.count(input); } void register_inshapes(std::vector>& in_shapes, const std::string& shape_mode) { - if (shape_mode.compare("min") == 0){ + if (shape_mode.compare("min") == 0) { min_shapes_ = in_shapes; - } else if(shape_mode.compare("opt") == 0){ + } else if (shape_mode.compare("opt") == 0) { opt_shapes_ = in_shapes; - } else{ + } else { max_shapes_ = in_shapes; } } diff --git a/core/partitioning/shape_analysis.cpp b/core/partitioning/shape_analysis.cpp index de2c754748..798198c65d 100644 --- a/core/partitioning/shape_analysis.cpp +++ b/core/partitioning/shape_analysis.cpp @@ -9,12 +9,15 @@ namespace torch_tensorrt { namespace core { namespace partitioning { -at::Tensor generateSingleInput(ir::Input& input, c10::optional& type_opt, const std::string& shape_mode) { +at::Tensor generateSingleInput( + ir::Input& input, + c10::optional& type_opt, + const std::string& shape_mode) { nvinfer1::Dims input_shape = input.input_shape; - if (input.input_is_dynamic){ - if (shape_mode.compare("min") == 0){ + if (input.input_is_dynamic) { + if (shape_mode.compare("min") == 0) { input_shape = input.min; - } else if(shape_mode.compare("opt") == 0){ + } else if (shape_mode.compare("opt") == 0) { input_shape = input.opt; } else { input_shape = input.max; @@ -188,7 +191,11 @@ void getSegmentsOutputByRunning( seg_block.register_intypes(input_types); } -void runShapeAnalysis(PartitioningCtx* ctx, torch::jit::Block* block, ExampleIValues& example_tensor_map, const std::string& shape_mode) { +void runShapeAnalysis( + PartitioningCtx* ctx, + torch::jit::Block* block, + ExampleIValues& example_tensor_map, + const std::string& shape_mode) { // register every segment's input shape, and it's running output IValues for (auto& seg_block : ctx->partitioned_blocks[block]) { torch::jit::ConstantPooling(seg_block.g()); diff --git a/tests/cpp/BUILD b/tests/cpp/BUILD index 6c75ed3d59..41d31f5275 100644 --- a/tests/cpp/BUILD +++ b/tests/cpp/BUILD @@ -15,9 +15,9 @@ test_suite( ":test_collections", ":test_compiled_modules", ":test_default_input_types", + ":test_dynamic_fallback", ":test_example_tensors", ":test_module_fallback", - ":test_dynamic_fallback", ":test_modules_as_engines", ":test_multiple_registered_engines", ":test_runtime_thread_safety", @@ -31,9 +31,9 @@ test_suite( ":test_collections", ":test_compiled_modules", ":test_default_input_types", + ":test_dynamic_fallback", ":test_example_tensors", ":test_module_fallback", - ":test_dynamic_fallback", ":test_modules_as_engines", ":test_multiple_registered_engines", ":test_runtime_thread_safety", diff --git a/tests/cpp/test_dynamic_fallback.cpp b/tests/cpp/test_dynamic_fallback.cpp index 77f5fa9b75..bad852e738 100644 --- a/tests/cpp/test_dynamic_fallback.cpp +++ b/tests/cpp/test_dynamic_fallback.cpp @@ -4,7 +4,7 @@ #include "torch/script.h" #include "torch_tensorrt/torch_tensorrt.h" -TEST(CppAPITest, ResNet50DynamicFallbackGraphCorrectly) { +TEST(CppAPITest, ResNet18DynamicBatchFallbackCorrectly) { torch::jit::script::Module mod; try { mod = torch::jit::load("tests/modules/resnet18_scripted.jit.pt"); @@ -16,17 +16,40 @@ TEST(CppAPITest, ResNet50DynamicFallbackGraphCorrectly) { const std::vector> input_shapes = {{1, 3, 224, 224}, {4, 3, 224, 224}, {8, 3, 224, 224}}; std::vector jit_inputs_ivalues; std::vector trt_inputs_ivalues; - auto in = at::randint(5, input_shapes[0], {at::kCUDA}); - jit_inputs_ivalues.push_back(in.clone()); - trt_inputs_ivalues.push_back(in.clone()); + auto in_bs1 = at::randint(5, input_shapes[0], {at::kCUDA}); + jit_inputs_ivalues.push_back(in_bs1.clone()); + trt_inputs_ivalues.push_back(in_bs1.clone()); std::vector inputs; inputs.push_back(torch_tensorrt::Input(input_shapes[0], input_shapes[1], input_shapes[2])); torch_tensorrt::ts::CompileSpec cfg(inputs); cfg.torch_executed_ops.push_back("aten::add"); - auto jit_results = mod.forward(jit_inputs_ivalues).toTensor(); + auto jit_results_bs1 = mod.forward(jit_inputs_ivalues).toTensor(); + // Compile and build the hybrid graph with dynamic shapes auto trt_mod = torch_tensorrt::ts::compile(mod, cfg); - auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor(); - ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results)); + auto trt_results_bs1 = trt_mod.forward(trt_inputs_ivalues).toTensor(); + ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results_bs1, trt_results_bs1)); + jit_inputs_ivalues.clear(); + trt_inputs_ivalues.clear(); + + // Run with batch size of 4 + auto in_bs4 = at::randint(5, input_shapes[1], {at::kCUDA}); + jit_inputs_ivalues.push_back(in_bs4.clone()); + trt_inputs_ivalues.push_back(in_bs4.clone()); + + auto jit_results_bs4 = mod.forward(jit_inputs_ivalues).toTensor(); + auto trt_results_bs4 = trt_mod.forward(trt_inputs_ivalues).toTensor(); + ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results_bs4, trt_results_bs4)); + jit_inputs_ivalues.clear(); + trt_inputs_ivalues.clear(); + + // Run with batch size of 8 + auto in_bs8 = at::randint(5, input_shapes[2], {at::kCUDA}); + jit_inputs_ivalues.push_back(in_bs8.clone()); + trt_inputs_ivalues.push_back(in_bs8.clone()); + + auto jit_results_bs8 = mod.forward(jit_inputs_ivalues).toTensor(); + auto trt_results_bs8 = trt_mod.forward(trt_inputs_ivalues).toTensor(); + ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results_bs8, trt_results_bs8)); } From 3e5e9c9494e671f85aa5f5ed96259af599b1ac15 Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Wed, 19 Oct 2022 14:28:43 -0700 Subject: [PATCH 04/11] chore: Minor change Signed-off-by: Dheeraj Peri --- core/partitioning/partitioning.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/partitioning/partitioning.h b/core/partitioning/partitioning.h index acdfeaf39b..e45a4899c9 100644 --- a/core/partitioning/partitioning.h +++ b/core/partitioning/partitioning.h @@ -13,7 +13,7 @@ namespace torch_tensorrt { namespace core { namespace partitioning { -typedef std::unordered_map ExampleIValues; +typedef std::unordered_map ExampleIValues; typedef std::pair, std::unordered_map> GraphAndMapping; From 38dc7d51cb549609fa298b528d8a758e82f0af58 Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Wed, 19 Oct 2022 14:36:26 -0700 Subject: [PATCH 05/11] chore: add in_shapes call Signed-off-by: Dheeraj Peri --- core/partitioning/segmentedblock/SegmentedBlock.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/partitioning/segmentedblock/SegmentedBlock.h b/core/partitioning/segmentedblock/SegmentedBlock.h index db3db87740..703510f839 100644 --- a/core/partitioning/segmentedblock/SegmentedBlock.h +++ b/core/partitioning/segmentedblock/SegmentedBlock.h @@ -82,9 +82,9 @@ struct SegmentedBlock { max_shapes_ = in_shapes; } } - // const std::vector& in_shapes() const { - // return in_shapes_; - // } + const std::vector& in_shapes() const { + return opt_shapes_; + } void register_intypes(std::vector& in_types) { in_types_ = in_types; } From c879fdf0b3c4dcdac598c3887961250e7eeb3656 Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Wed, 19 Oct 2022 16:22:46 -0700 Subject: [PATCH 06/11] chore: Minor fixes Signed-off-by: Dheeraj Peri --- .../segmentedblock/SegmentedBlock.h | 2 +- tests/cpp/test_dynamic_fallback.cpp | 56 ++++++++++++++++++- 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/core/partitioning/segmentedblock/SegmentedBlock.h b/core/partitioning/segmentedblock/SegmentedBlock.h index 703510f839..d1ee77dc94 100644 --- a/core/partitioning/segmentedblock/SegmentedBlock.h +++ b/core/partitioning/segmentedblock/SegmentedBlock.h @@ -82,7 +82,7 @@ struct SegmentedBlock { max_shapes_ = in_shapes; } } - const std::vector& in_shapes() const { + const std::vector> in_shapes() const { return opt_shapes_; } void register_intypes(std::vector& in_types) { diff --git a/tests/cpp/test_dynamic_fallback.cpp b/tests/cpp/test_dynamic_fallback.cpp index bad852e738..54800e2dca 100644 --- a/tests/cpp/test_dynamic_fallback.cpp +++ b/tests/cpp/test_dynamic_fallback.cpp @@ -4,10 +4,60 @@ #include "torch/script.h" #include "torch_tensorrt/torch_tensorrt.h" -TEST(CppAPITest, ResNet18DynamicBatchFallbackCorrectly) { +// TEST(CppAPITest, ResNet18DynamicBatchFallbackCorrectly) { +// torch::jit::script::Module mod; +// try { +// mod = torch::jit::load("tests/modules/resnet18_scripted.jit.pt"); +// } catch (const c10::Error& e) { +// std::cerr << "error loading the model\n"; +// ASSERT_TRUE(false); +// } +// +// const std::vector> input_shapes = {{1, 3, 224, 224}, {4, 3, 224, 224}, {8, 3, 224, 224}}; +// std::vector jit_inputs_ivalues; +// std::vector trt_inputs_ivalues; +// auto in_bs1 = at::randint(5, input_shapes[0], {at::kCUDA}); +// jit_inputs_ivalues.push_back(in_bs1.clone()); +// trt_inputs_ivalues.push_back(in_bs1.clone()); +// +// std::vector inputs; +// inputs.push_back(torch_tensorrt::Input(input_shapes[0], input_shapes[1], input_shapes[2])); +// torch_tensorrt::ts::CompileSpec cfg(inputs); +// cfg.torch_executed_ops.push_back("aten::add"); +// +// auto jit_results_bs1 = mod.forward(jit_inputs_ivalues).toTensor(); +// // Compile and build the hybrid graph with dynamic shapes +// auto trt_mod = torch_tensorrt::ts::compile(mod, cfg); +// auto trt_results_bs1 = trt_mod.forward(trt_inputs_ivalues).toTensor(); +// ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results_bs1, trt_results_bs1)); +// jit_inputs_ivalues.clear(); +// trt_inputs_ivalues.clear(); +// +// // Run with batch size of 4 +// auto in_bs4 = at::randint(5, input_shapes[1], {at::kCUDA}); +// jit_inputs_ivalues.push_back(in_bs4.clone()); +// trt_inputs_ivalues.push_back(in_bs4.clone()); +// +// auto jit_results_bs4 = mod.forward(jit_inputs_ivalues).toTensor(); +// auto trt_results_bs4 = trt_mod.forward(trt_inputs_ivalues).toTensor(); +// ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results_bs4, trt_results_bs4)); +// jit_inputs_ivalues.clear(); +// trt_inputs_ivalues.clear(); +// +// // Run with batch size of 8 +// auto in_bs8 = at::randint(5, input_shapes[2], {at::kCUDA}); +// jit_inputs_ivalues.push_back(in_bs8.clone()); +// trt_inputs_ivalues.push_back(in_bs8.clone()); +// +// auto jit_results_bs8 = mod.forward(jit_inputs_ivalues).toTensor(); +// auto trt_results_bs8 = trt_mod.forward(trt_inputs_ivalues).toTensor(); +// ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results_bs8, trt_results_bs8)); +// } + +TEST(CppAPITest, VITDynamicBatchFallbackCorrectly) { torch::jit::script::Module mod; try { - mod = torch::jit::load("tests/modules/resnet18_scripted.jit.pt"); + mod = torch::jit::load("tests/modules/vit_scripted.jit.pt"); } catch (const c10::Error& e) { std::cerr << "error loading the model\n"; ASSERT_TRUE(false); @@ -23,7 +73,7 @@ TEST(CppAPITest, ResNet18DynamicBatchFallbackCorrectly) { std::vector inputs; inputs.push_back(torch_tensorrt::Input(input_shapes[0], input_shapes[1], input_shapes[2])); torch_tensorrt::ts::CompileSpec cfg(inputs); - cfg.torch_executed_ops.push_back("aten::add"); + cfg.torch_executed_ops.push_back("aten::layer_norm"); auto jit_results_bs1 = mod.forward(jit_inputs_ivalues).toTensor(); // Compile and build the hybrid graph with dynamic shapes From 27176930c59d93492c13d8a11aac2ba1dd9e5221 Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Fri, 28 Oct 2022 15:45:16 -0700 Subject: [PATCH 07/11] chore: Remove VIT test case Signed-off-by: Dheeraj Peri --- tests/cpp/test_dynamic_fallback.cpp | 56 ++--------------------------- 1 file changed, 3 insertions(+), 53 deletions(-) diff --git a/tests/cpp/test_dynamic_fallback.cpp b/tests/cpp/test_dynamic_fallback.cpp index 54800e2dca..bad852e738 100644 --- a/tests/cpp/test_dynamic_fallback.cpp +++ b/tests/cpp/test_dynamic_fallback.cpp @@ -4,60 +4,10 @@ #include "torch/script.h" #include "torch_tensorrt/torch_tensorrt.h" -// TEST(CppAPITest, ResNet18DynamicBatchFallbackCorrectly) { -// torch::jit::script::Module mod; -// try { -// mod = torch::jit::load("tests/modules/resnet18_scripted.jit.pt"); -// } catch (const c10::Error& e) { -// std::cerr << "error loading the model\n"; -// ASSERT_TRUE(false); -// } -// -// const std::vector> input_shapes = {{1, 3, 224, 224}, {4, 3, 224, 224}, {8, 3, 224, 224}}; -// std::vector jit_inputs_ivalues; -// std::vector trt_inputs_ivalues; -// auto in_bs1 = at::randint(5, input_shapes[0], {at::kCUDA}); -// jit_inputs_ivalues.push_back(in_bs1.clone()); -// trt_inputs_ivalues.push_back(in_bs1.clone()); -// -// std::vector inputs; -// inputs.push_back(torch_tensorrt::Input(input_shapes[0], input_shapes[1], input_shapes[2])); -// torch_tensorrt::ts::CompileSpec cfg(inputs); -// cfg.torch_executed_ops.push_back("aten::add"); -// -// auto jit_results_bs1 = mod.forward(jit_inputs_ivalues).toTensor(); -// // Compile and build the hybrid graph with dynamic shapes -// auto trt_mod = torch_tensorrt::ts::compile(mod, cfg); -// auto trt_results_bs1 = trt_mod.forward(trt_inputs_ivalues).toTensor(); -// ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results_bs1, trt_results_bs1)); -// jit_inputs_ivalues.clear(); -// trt_inputs_ivalues.clear(); -// -// // Run with batch size of 4 -// auto in_bs4 = at::randint(5, input_shapes[1], {at::kCUDA}); -// jit_inputs_ivalues.push_back(in_bs4.clone()); -// trt_inputs_ivalues.push_back(in_bs4.clone()); -// -// auto jit_results_bs4 = mod.forward(jit_inputs_ivalues).toTensor(); -// auto trt_results_bs4 = trt_mod.forward(trt_inputs_ivalues).toTensor(); -// ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results_bs4, trt_results_bs4)); -// jit_inputs_ivalues.clear(); -// trt_inputs_ivalues.clear(); -// -// // Run with batch size of 8 -// auto in_bs8 = at::randint(5, input_shapes[2], {at::kCUDA}); -// jit_inputs_ivalues.push_back(in_bs8.clone()); -// trt_inputs_ivalues.push_back(in_bs8.clone()); -// -// auto jit_results_bs8 = mod.forward(jit_inputs_ivalues).toTensor(); -// auto trt_results_bs8 = trt_mod.forward(trt_inputs_ivalues).toTensor(); -// ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results_bs8, trt_results_bs8)); -// } - -TEST(CppAPITest, VITDynamicBatchFallbackCorrectly) { +TEST(CppAPITest, ResNet18DynamicBatchFallbackCorrectly) { torch::jit::script::Module mod; try { - mod = torch::jit::load("tests/modules/vit_scripted.jit.pt"); + mod = torch::jit::load("tests/modules/resnet18_scripted.jit.pt"); } catch (const c10::Error& e) { std::cerr << "error loading the model\n"; ASSERT_TRUE(false); @@ -73,7 +23,7 @@ TEST(CppAPITest, VITDynamicBatchFallbackCorrectly) { std::vector inputs; inputs.push_back(torch_tensorrt::Input(input_shapes[0], input_shapes[1], input_shapes[2])); torch_tensorrt::ts::CompileSpec cfg(inputs); - cfg.torch_executed_ops.push_back("aten::layer_norm"); + cfg.torch_executed_ops.push_back("aten::add"); auto jit_results_bs1 = mod.forward(jit_inputs_ivalues).toTensor(); // Compile and build the hybrid graph with dynamic shapes From ccda27739f58b961af6ed3e804bcbd4bffdf8d0a Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Thu, 3 Nov 2022 12:18:10 -0700 Subject: [PATCH 08/11] chore: Fix tests Signed-off-by: Dheeraj Peri --- tests/core/partitioning/test_conditionals.cpp | 60 +++++++++---------- .../test_resolve_nontensor_inputs.cpp | 16 ++--- .../core/partitioning/test_shape_analysis.cpp | 13 ++-- tests/cpp/test_dynamic_fallback.cpp | 50 ++++++++++++++++ 4 files changed, 97 insertions(+), 42 deletions(-) diff --git a/tests/core/partitioning/test_conditionals.cpp b/tests/core/partitioning/test_conditionals.cpp index e0f93dde59..410ebcad39 100644 --- a/tests/core/partitioning/test_conditionals.cpp +++ b/tests/core/partitioning/test_conditionals.cpp @@ -43,33 +43,33 @@ TEST(Partitioning, FallbackOnConditionalsCorrectly) { ASSERT_TRUE(conditional_engines_count == 2); } -TEST(Partitioning, FallbackInplaceOPInConditionalsCorrectly) { - torch::jit::script::Module mod; - try { - mod = torch::jit::load("tests/modules/inplace_op_if_scripted.jit.pt"); - } catch (const c10::Error& e) { - std::cerr << "error loading the model\n"; - return; - } - - const std::vector> input_shapes = {{4, 4}, {4, 4}}; - std::vector jit_inputs_ivalues; - std::vector trt_inputs_ivalues; - for (auto in_shape : input_shapes) { - auto in = at::randint(5, in_shape, {at::kCUDA}); - jit_inputs_ivalues.push_back(in.clone()); - trt_inputs_ivalues.push_back(in.clone()); - } - - std::vector inputs{ - torch_tensorrt::core::ir::Input({4, 4}), torch_tensorrt::core::ir::Input({4, 4})}; - auto g = mod.get_method("forward").graph(); - torch_tensorrt::core::CompileSpec cfg(inputs); - cfg.partitioning_info.enabled = true; - cfg.partitioning_info.forced_fallback_operators.push_back("prim::ListConstruct"); - - auto jit_results = mod.forward(jit_inputs_ivalues).toTensor(); - auto trt_mod = torch_tensorrt::core::CompileGraph(mod, cfg); - auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor(); - ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results)); -} +// TEST(Partitioning, FallbackInplaceOPInConditionalsCorrectly) { +// torch::jit::script::Module mod; +// try { +// mod = torch::jit::load("tests/modules/inplace_op_if_scripted.jit.pt"); +// } catch (const c10::Error& e) { +// std::cerr << "error loading the model\n"; +// return; +// } +// +// const std::vector> input_shapes = {{4, 4}, {4, 4}}; +// std::vector jit_inputs_ivalues; +// std::vector trt_inputs_ivalues; +// for (auto in_shape : input_shapes) { +// auto in = at::randint(5, in_shape, {at::kCUDA}); +// jit_inputs_ivalues.push_back(in.clone()); +// trt_inputs_ivalues.push_back(in.clone()); +// } +// +// std::vector inputs{ +// torch_tensorrt::core::ir::Input({4, 4}), torch_tensorrt::core::ir::Input({4, 4})}; +// auto g = mod.get_method("forward").graph(); +// torch_tensorrt::core::CompileSpec cfg(inputs); +// cfg.partitioning_info.enabled = true; +// cfg.partitioning_info.forced_fallback_operators.push_back("prim::ListConstruct"); +// +// auto jit_results = mod.forward(jit_inputs_ivalues).toTensor(); +// auto trt_mod = torch_tensorrt::core::CompileGraph(mod, cfg); +// auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor(); +// ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results)); +// } diff --git a/tests/core/partitioning/test_resolve_nontensor_inputs.cpp b/tests/core/partitioning/test_resolve_nontensor_inputs.cpp index 1c1a1631ea..078347a43b 100644 --- a/tests/core/partitioning/test_resolve_nontensor_inputs.cpp +++ b/tests/core/partitioning/test_resolve_nontensor_inputs.cpp @@ -122,9 +122,10 @@ TEST(Partitioning, ResolveNonTensorInputsCorrectly) { inputs_map.insert({g->inputs()[i], {inputs[i]}}); input_types.insert({g->inputs()[i], {{at::kFloat}}}); } - auto input_ivalues_map = torch_tensorrt::core::partitioning::generateRandomInputs(inputs_map, input_types); + torch_tensorrt::core::partitioning::PartitioningCtx ctx(g->block(), partitioning_info); - torch_tensorrt::core::partitioning::partition(&ctx, input_ivalues_map); + ctx.input_types_map = input_types; + torch_tensorrt::core::partitioning::partition(&ctx); std::vector segmented_blocks = ctx.partitioned_blocks.begin()->second; @@ -182,10 +183,10 @@ TEST(Partitioning, ResolveTensorListInputsInTrtCorrectly) { inputs_map.insert({g->inputs()[i], {inputs[i]}}); input_types.insert({g->inputs()[i], {{at::kFloat}}}); } - auto input_ivalues_map = torch_tensorrt::core::partitioning::generateRandomInputs(inputs_map, input_types); - torch_tensorrt::core::partitioning::PartitioningCtx ctx(g->block(), partitioning_info); - torch_tensorrt::core::partitioning::partition(&ctx, input_ivalues_map); + torch_tensorrt::core::partitioning::PartitioningCtx ctx(g->block(), partitioning_info); + ctx.input_types_map = input_types; + torch_tensorrt::core::partitioning::partition(&ctx); std::vector segmented_blocks = ctx.partitioned_blocks.begin()->second; @@ -376,9 +377,10 @@ TEST(Partitioning, ResolveOnlyNeccessaryNonTensorInputs) { inputs_map.insert({g->inputs()[i], {inputs[i]}}); input_types.insert({g->inputs()[i], {{at::kFloat}}}); } - auto input_ivalues_map = torch_tensorrt::core::partitioning::generateRandomInputs(inputs_map, input_types); + // auto input_ivalues_map = torch_tensorrt::core::partitioning::generateRandomInputs(inputs_map, input_types); torch_tensorrt::core::partitioning::PartitioningCtx ctx(g->block(), partitioning_info); - torch_tensorrt::core::partitioning::partition(&ctx, input_ivalues_map); + ctx.input_types_map = input_types; + torch_tensorrt::core::partitioning::partition(&ctx); auto segmented_blocks = ctx.partitioned_blocks.begin()->second; int torch_block_cnt = 0, trt_block_cnt = 0; diff --git a/tests/core/partitioning/test_shape_analysis.cpp b/tests/core/partitioning/test_shape_analysis.cpp index 87c42c0e47..8347b3b31b 100644 --- a/tests/core/partitioning/test_shape_analysis.cpp +++ b/tests/core/partitioning/test_shape_analysis.cpp @@ -15,7 +15,7 @@ bool checkSegmentedBlockInputShape( if (cur_block_in_shapes.size() != in_shape[i].size()) return false; for (size_t j = 0; j < cur_block_in_shapes.size(); ++j) { - auto cur_input_shape = torch_tensorrt::core::util::toVec(cur_block_in_shapes[j].input_shape); + auto cur_input_shape = cur_block_in_shapes[j]; for (size_t k = 0; k < cur_input_shape.size(); ++k) { if (cur_input_shape[k] != in_shape[i][j][k]) return false; @@ -61,14 +61,16 @@ TEST(Partitioning, InferSequentialModelSegmentedBlockShapeCorrectly) { std::unordered_map> inputs_map; std::unordered_map>> input_types; + for (size_t i = 0; i < g->inputs().size(); ++i) { inputs_map.insert({g->inputs()[i], {inputs[i]}}); input_types.insert({g->inputs()[i], {{at::kFloat}}}); } - auto input_ivalues_map = torch_tensorrt::core::partitioning::generateRandomInputs(inputs_map, input_types); torch_tensorrt::core::partitioning::PartitioningCtx ctx(g->block(), partitioning_info); - torch_tensorrt::core::partitioning::partition(&ctx, input_ivalues_map); + ctx.input_types_map = input_types; + ctx.settings.collection_input_spec_map = inputs_map; + torch_tensorrt::core::partitioning::partition(&ctx); auto segmented_blocks = ctx.partitioned_blocks.begin()->second; ASSERT_TRUE(checkSegmentedBlockInputShape( @@ -117,10 +119,11 @@ TEST(Partitioning, InferBranchModelSegmentedBlockShapeCorrectly) { inputs_map.insert({g->inputs()[i], {inputs[i]}}); input_types.insert({g->inputs()[i], {{at::kFloat}}}); } - auto input_ivalues_map = torch_tensorrt::core::partitioning::generateRandomInputs(inputs_map, input_types); torch_tensorrt::core::partitioning::PartitioningCtx ctx(g->block(), partitioning_info); - torch_tensorrt::core::partitioning::partition(&ctx, input_ivalues_map); + ctx.input_types_map = input_types; + ctx.settings.collection_input_spec_map = inputs_map; + torch_tensorrt::core::partitioning::partition(&ctx); auto segmented_blocks = ctx.partitioned_blocks.begin()->second; ASSERT_TRUE(checkSegmentedBlockInputShape( diff --git a/tests/cpp/test_dynamic_fallback.cpp b/tests/cpp/test_dynamic_fallback.cpp index bad852e738..42ffbba897 100644 --- a/tests/cpp/test_dynamic_fallback.cpp +++ b/tests/cpp/test_dynamic_fallback.cpp @@ -53,3 +53,53 @@ TEST(CppAPITest, ResNet18DynamicBatchFallbackCorrectly) { auto trt_results_bs8 = trt_mod.forward(trt_inputs_ivalues).toTensor(); ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results_bs8, trt_results_bs8)); } + +TEST(CppAPITest, ResNet18DynamicShapeFallbackCorrectly) { + torch::jit::script::Module mod; + try { + mod = torch::jit::load("tests/modules/resnet18_scripted.jit.pt"); + } catch (const c10::Error& e) { + std::cerr << "error loading the model\n"; + ASSERT_TRUE(false); + } + + const std::vector> input_shapes = {{1, 3, 64, 64}, {1, 3, 128, 128}, {1, 3, 224, 224}}; + std::vector jit_inputs_ivalues; + std::vector trt_inputs_ivalues; + auto in_64 = at::randint(5, input_shapes[0], {at::kCUDA}); + jit_inputs_ivalues.push_back(in_64.clone()); + trt_inputs_ivalues.push_back(in_64.clone()); + + std::vector inputs; + inputs.push_back(torch_tensorrt::Input(input_shapes[0], input_shapes[1], input_shapes[2])); + torch_tensorrt::ts::CompileSpec cfg(inputs); + cfg.torch_executed_ops.push_back("aten::add"); + + auto jit_results_64 = mod.forward(jit_inputs_ivalues).toTensor(); + // Compile and build the hybrid graph with dynamic shapes + auto trt_mod = torch_tensorrt::ts::compile(mod, cfg); + auto trt_results_64 = trt_mod.forward(trt_inputs_ivalues).toTensor(); + ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results_64, trt_results_64)); + jit_inputs_ivalues.clear(); + trt_inputs_ivalues.clear(); + + // Run with input resolution of (1, 3, 128, 128) + auto in_128 = at::randint(5, input_shapes[1], {at::kCUDA}); + jit_inputs_ivalues.push_back(in_128.clone()); + trt_inputs_ivalues.push_back(in_128.clone()); + + auto jit_results_128 = mod.forward(jit_inputs_ivalues).toTensor(); + auto trt_results_128 = trt_mod.forward(trt_inputs_ivalues).toTensor(); + ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results_128, trt_results_128)); + jit_inputs_ivalues.clear(); + trt_inputs_ivalues.clear(); + + // Run with input resolution of (1, 3, 256, 256) + auto in_256 = at::randint(5, input_shapes[2], {at::kCUDA}); + jit_inputs_ivalues.push_back(in_256.clone()); + trt_inputs_ivalues.push_back(in_256.clone()); + + auto jit_results_256 = mod.forward(jit_inputs_ivalues).toTensor(); + auto trt_results_256 = trt_mod.forward(trt_inputs_ivalues).toTensor(); + ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results_256, trt_results_256)); +} From 6baa500b30d01845ada331bf50e38cebcaa1a337 Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Thu, 3 Nov 2022 16:22:14 -0700 Subject: [PATCH 09/11] chore: Move shape mode to enum, fix CI tests by storing input_ivalues map in a PartioningCtx object Signed-off-by: Dheeraj Peri --- core/compiler.cpp | 7 +++ core/ir/ir.h | 6 ++ core/partitioning/partitioning.cpp | 31 ++++++---- core/partitioning/partitioning.h | 6 +- .../partitioningctx/PartitioningCtx.h | 3 + .../segmentedblock/SegmentedBlock.h | 6 +- core/partitioning/shape_analysis.cpp | 12 ++-- tests/core/partitioning/test_conditionals.cpp | 60 +++++++++---------- .../test_resolve_nontensor_inputs.cpp | 10 +++- .../core/partitioning/test_shape_analysis.cpp | 11 +++- 10 files changed, 93 insertions(+), 59 deletions(-) diff --git a/core/compiler.cpp b/core/compiler.cpp index 558cb82af6..d4c536a801 100644 --- a/core/compiler.cpp +++ b/core/compiler.cpp @@ -127,6 +127,8 @@ bool CheckMethodOperatorSupport(const torch::jit::script::Module& mod, std::stri return conversion::VerifyConverterSupportForBlock(g->block()); } + + partitioning::GraphAndMapping BuildHybridGraph( torch::jit::script::Module& new_mod, torch::jit::Block* block, @@ -138,6 +140,11 @@ partitioning::GraphAndMapping BuildHybridGraph( auto partitioning_ctx = partitioning::PartitioningCtx(block, partitioning_info); partitioning_ctx.input_types_map = first_use_types; + + // Generate a dictionary of input torch::jit::Value's to their min, opt, max tensors and store in ctx + // TODO: Combine this within partition call + partitioning::populateInputIValues(&partitioning_ctx); + partitioning::partition(&partitioning_ctx); for (auto& partitioned_block : partitioning_ctx.partitioned_blocks) { diff --git a/core/ir/ir.h b/core/ir/ir.h index a5225daa25..91c2666be5 100644 --- a/core/ir/ir.h +++ b/core/ir/ir.h @@ -11,6 +11,12 @@ namespace torch_tensorrt { namespace core { namespace ir { +enum class ShapeMode { + kMIN, + kOPT, + kMAX, +}; + struct Input : torch::CustomClassHolder { Input(){}; Input( diff --git a/core/partitioning/partitioning.cpp b/core/partitioning/partitioning.cpp index 7cd273889d..372489bc57 100644 --- a/core/partitioning/partitioning.cpp +++ b/core/partitioning/partitioning.cpp @@ -450,6 +450,20 @@ bool isInputDynamic(PartitioningCtx* ctx) { return input_is_dynamic; } +void populateInputIValues(PartitioningCtx* ctx){ + if (isInputDynamic(ctx)) { + ctx->min_input_ivalues_map = + partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, ir::ShapeMode::kMIN); + ctx->opt_input_ivalues_map = + partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, ir::ShapeMode::kOPT); + ctx->max_input_ivalues_map = + partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, ir::ShapeMode::kMAX); + } else { + ctx->opt_input_ivalues_map = + partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, ir::ShapeMode::kOPT); + } +} + void partition(PartitioningCtx* ctx) { LOG_DEBUG(ctx->settings); @@ -471,21 +485,12 @@ void partition(PartitioningCtx* ctx) { // output shapes for each block accordingly if (isInputDynamic(ctx)) { LOG_DEBUG("Performing shape analysis for segmented blocks using min/opt/max shapes for inputs"); - auto min_input_ivalues_map = - partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, "min"); - auto opt_input_ivalues_map = - partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, "opt"); - auto max_input_ivalues_map = - partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, "max"); - - runShapeAnalysis(ctx, block, min_input_ivalues_map, "min"); - runShapeAnalysis(ctx, block, opt_input_ivalues_map, "opt"); - runShapeAnalysis(ctx, block, max_input_ivalues_map, "max"); + runShapeAnalysis(ctx, block, ctx->min_input_ivalues_map, ir::ShapeMode::kMIN); + runShapeAnalysis(ctx, block, ctx->opt_input_ivalues_map, ir::ShapeMode::kOPT); + runShapeAnalysis(ctx, block, ctx->max_input_ivalues_map, ir::ShapeMode::kMAX); } else { LOG_DEBUG("Performing shape analysis for segmented blocks using static shapes for inputs"); - auto opt_input_ivalues_map = - partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, "opt"); - runShapeAnalysis(ctx, block, opt_input_ivalues_map, "opt"); + runShapeAnalysis(ctx, block, ctx->opt_input_ivalues_map, ir::ShapeMode::kOPT); } } } diff --git a/core/partitioning/partitioning.h b/core/partitioning/partitioning.h index e45a4899c9..7c72d091b6 100644 --- a/core/partitioning/partitioning.h +++ b/core/partitioning/partitioning.h @@ -21,13 +21,15 @@ typedef std::pair, std::unordered_map min_input_ivalues_map; + std::unordered_map opt_input_ivalues_map; + std::unordered_map max_input_ivalues_map; // records all the original blocks topologically in the module std::vector original_blocks; // mapping: node=> execution status diff --git a/core/partitioning/segmentedblock/SegmentedBlock.h b/core/partitioning/segmentedblock/SegmentedBlock.h index d1ee77dc94..d649e9e2c3 100644 --- a/core/partitioning/segmentedblock/SegmentedBlock.h +++ b/core/partitioning/segmentedblock/SegmentedBlock.h @@ -73,10 +73,10 @@ struct SegmentedBlock { bool contain_raw_value(torch::jit::Value* input) const { return old_to_new_.count(input); } - void register_inshapes(std::vector>& in_shapes, const std::string& shape_mode) { - if (shape_mode.compare("min") == 0) { + void register_inshapes(std::vector>& in_shapes, const ir::ShapeMode& shape_mode) { + if (shape_mode == ir::ShapeMode::kMIN) { min_shapes_ = in_shapes; - } else if (shape_mode.compare("opt") == 0) { + } else if (shape_mode == ir::ShapeMode::kOPT) { opt_shapes_ = in_shapes; } else { max_shapes_ = in_shapes; diff --git a/core/partitioning/shape_analysis.cpp b/core/partitioning/shape_analysis.cpp index 798198c65d..89dfd99bd6 100644 --- a/core/partitioning/shape_analysis.cpp +++ b/core/partitioning/shape_analysis.cpp @@ -12,12 +12,12 @@ namespace partitioning { at::Tensor generateSingleInput( ir::Input& input, c10::optional& type_opt, - const std::string& shape_mode) { + const ir::ShapeMode& shape_mode) { nvinfer1::Dims input_shape = input.input_shape; if (input.input_is_dynamic) { - if (shape_mode.compare("min") == 0) { + if (shape_mode == ir::ShapeMode::kMIN) { input_shape = input.min; - } else if (shape_mode.compare("opt") == 0) { + } else if (shape_mode == ir::ShapeMode::kOPT) { input_shape = input.opt; } else { input_shape = input.max; @@ -38,7 +38,7 @@ at::Tensor generateSingleInput( std::unordered_map generateRandomInputs( std::unordered_map>& inputs, std::unordered_map>>& types, - const std::string& shape_mode) { + const ir::ShapeMode& shape_mode) { // generate random inputs for running pytorch segments std::unordered_map ivalue_map; @@ -72,7 +72,7 @@ void getSegmentsOutputByRunning( SegmentedBlock& seg_block, std::unordered_map& ivalues_maps, const PartitioningInfo& partitioning_info, - const std::string& shape_mode) { + const ir::ShapeMode& shape_mode) { // create a module to run the graph auto g = seg_block.g(); auto copy_g = g->copy(); @@ -195,7 +195,7 @@ void runShapeAnalysis( PartitioningCtx* ctx, torch::jit::Block* block, ExampleIValues& example_tensor_map, - const std::string& shape_mode) { + const ir::ShapeMode& shape_mode) { // register every segment's input shape, and it's running output IValues for (auto& seg_block : ctx->partitioned_blocks[block]) { torch::jit::ConstantPooling(seg_block.g()); diff --git a/tests/core/partitioning/test_conditionals.cpp b/tests/core/partitioning/test_conditionals.cpp index 410ebcad39..e0f93dde59 100644 --- a/tests/core/partitioning/test_conditionals.cpp +++ b/tests/core/partitioning/test_conditionals.cpp @@ -43,33 +43,33 @@ TEST(Partitioning, FallbackOnConditionalsCorrectly) { ASSERT_TRUE(conditional_engines_count == 2); } -// TEST(Partitioning, FallbackInplaceOPInConditionalsCorrectly) { -// torch::jit::script::Module mod; -// try { -// mod = torch::jit::load("tests/modules/inplace_op_if_scripted.jit.pt"); -// } catch (const c10::Error& e) { -// std::cerr << "error loading the model\n"; -// return; -// } -// -// const std::vector> input_shapes = {{4, 4}, {4, 4}}; -// std::vector jit_inputs_ivalues; -// std::vector trt_inputs_ivalues; -// for (auto in_shape : input_shapes) { -// auto in = at::randint(5, in_shape, {at::kCUDA}); -// jit_inputs_ivalues.push_back(in.clone()); -// trt_inputs_ivalues.push_back(in.clone()); -// } -// -// std::vector inputs{ -// torch_tensorrt::core::ir::Input({4, 4}), torch_tensorrt::core::ir::Input({4, 4})}; -// auto g = mod.get_method("forward").graph(); -// torch_tensorrt::core::CompileSpec cfg(inputs); -// cfg.partitioning_info.enabled = true; -// cfg.partitioning_info.forced_fallback_operators.push_back("prim::ListConstruct"); -// -// auto jit_results = mod.forward(jit_inputs_ivalues).toTensor(); -// auto trt_mod = torch_tensorrt::core::CompileGraph(mod, cfg); -// auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor(); -// ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results)); -// } +TEST(Partitioning, FallbackInplaceOPInConditionalsCorrectly) { + torch::jit::script::Module mod; + try { + mod = torch::jit::load("tests/modules/inplace_op_if_scripted.jit.pt"); + } catch (const c10::Error& e) { + std::cerr << "error loading the model\n"; + return; + } + + const std::vector> input_shapes = {{4, 4}, {4, 4}}; + std::vector jit_inputs_ivalues; + std::vector trt_inputs_ivalues; + for (auto in_shape : input_shapes) { + auto in = at::randint(5, in_shape, {at::kCUDA}); + jit_inputs_ivalues.push_back(in.clone()); + trt_inputs_ivalues.push_back(in.clone()); + } + + std::vector inputs{ + torch_tensorrt::core::ir::Input({4, 4}), torch_tensorrt::core::ir::Input({4, 4})}; + auto g = mod.get_method("forward").graph(); + torch_tensorrt::core::CompileSpec cfg(inputs); + cfg.partitioning_info.enabled = true; + cfg.partitioning_info.forced_fallback_operators.push_back("prim::ListConstruct"); + + auto jit_results = mod.forward(jit_inputs_ivalues).toTensor(); + auto trt_mod = torch_tensorrt::core::CompileGraph(mod, cfg); + auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor(); + ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results)); +} diff --git a/tests/core/partitioning/test_resolve_nontensor_inputs.cpp b/tests/core/partitioning/test_resolve_nontensor_inputs.cpp index 078347a43b..7df8d5bff4 100644 --- a/tests/core/partitioning/test_resolve_nontensor_inputs.cpp +++ b/tests/core/partitioning/test_resolve_nontensor_inputs.cpp @@ -123,8 +123,11 @@ TEST(Partitioning, ResolveNonTensorInputsCorrectly) { input_types.insert({g->inputs()[i], {{at::kFloat}}}); } + partitioning_info.collection_input_spec_map = inputs_map; torch_tensorrt::core::partitioning::PartitioningCtx ctx(g->block(), partitioning_info); ctx.input_types_map = input_types; + + torch_tensorrt::core::partitioning::populateInputIValues(&ctx); torch_tensorrt::core::partitioning::partition(&ctx); std::vector segmented_blocks = ctx.partitioned_blocks.begin()->second; @@ -184,8 +187,10 @@ TEST(Partitioning, ResolveTensorListInputsInTrtCorrectly) { input_types.insert({g->inputs()[i], {{at::kFloat}}}); } + partitioning_info.collection_input_spec_map = inputs_map; torch_tensorrt::core::partitioning::PartitioningCtx ctx(g->block(), partitioning_info); ctx.input_types_map = input_types; + torch_tensorrt::core::partitioning::populateInputIValues(&ctx); torch_tensorrt::core::partitioning::partition(&ctx); std::vector segmented_blocks = ctx.partitioned_blocks.begin()->second; @@ -263,7 +268,7 @@ TEST(Partitioning, ConvertForTensorListInputsInFallbackCorrectly) { int count = count_trt_engines(fallback_g); ASSERT_TRUE(count == 1); } - +// TEST(Partitioning, ResolveOnlyNeccessaryNonTensorInputs) { /* parseIR does not support "= aten::_set_item" so we will build this graph manually const auto graph = R"IR( @@ -377,9 +382,10 @@ TEST(Partitioning, ResolveOnlyNeccessaryNonTensorInputs) { inputs_map.insert({g->inputs()[i], {inputs[i]}}); input_types.insert({g->inputs()[i], {{at::kFloat}}}); } - // auto input_ivalues_map = torch_tensorrt::core::partitioning::generateRandomInputs(inputs_map, input_types); + partitioning_info.collection_input_spec_map = inputs_map; torch_tensorrt::core::partitioning::PartitioningCtx ctx(g->block(), partitioning_info); ctx.input_types_map = input_types; + torch_tensorrt::core::partitioning::populateInputIValues(&ctx); torch_tensorrt::core::partitioning::partition(&ctx); auto segmented_blocks = ctx.partitioned_blocks.begin()->second; diff --git a/tests/core/partitioning/test_shape_analysis.cpp b/tests/core/partitioning/test_shape_analysis.cpp index 8347b3b31b..7558fbec69 100644 --- a/tests/core/partitioning/test_shape_analysis.cpp +++ b/tests/core/partitioning/test_shape_analysis.cpp @@ -66,10 +66,12 @@ TEST(Partitioning, InferSequentialModelSegmentedBlockShapeCorrectly) { inputs_map.insert({g->inputs()[i], {inputs[i]}}); input_types.insert({g->inputs()[i], {{at::kFloat}}}); } - + // Store a map of torch::jit::Value to ir::Input + partitioning_info.collection_input_spec_map = inputs_map; torch_tensorrt::core::partitioning::PartitioningCtx ctx(g->block(), partitioning_info); ctx.input_types_map = input_types; - ctx.settings.collection_input_spec_map = inputs_map; + + torch_tensorrt::core::partitioning::populateInputIValues(&ctx); torch_tensorrt::core::partitioning::partition(&ctx); auto segmented_blocks = ctx.partitioned_blocks.begin()->second; @@ -120,9 +122,12 @@ TEST(Partitioning, InferBranchModelSegmentedBlockShapeCorrectly) { input_types.insert({g->inputs()[i], {{at::kFloat}}}); } + // Store a map of torch::jit::Value to ir::Input + partitioning_info.collection_input_spec_map = inputs_map; torch_tensorrt::core::partitioning::PartitioningCtx ctx(g->block(), partitioning_info); ctx.input_types_map = input_types; - ctx.settings.collection_input_spec_map = inputs_map; + + torch_tensorrt::core::partitioning::populateInputIValues(&ctx); torch_tensorrt::core::partitioning::partition(&ctx); auto segmented_blocks = ctx.partitioned_blocks.begin()->second; From 86b2f2a8a8db99ee0304a85f2ea056879e9bcff3 Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Thu, 3 Nov 2022 16:24:01 -0700 Subject: [PATCH 10/11] chore: Linter fixes Signed-off-by: Dheeraj Peri --- core/compiler.cpp | 2 -- core/ir/ir.h | 6 +++--- core/partitioning/partitioning.cpp | 18 +++++++++--------- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/core/compiler.cpp b/core/compiler.cpp index d4c536a801..58af1e6cd8 100644 --- a/core/compiler.cpp +++ b/core/compiler.cpp @@ -127,8 +127,6 @@ bool CheckMethodOperatorSupport(const torch::jit::script::Module& mod, std::stri return conversion::VerifyConverterSupportForBlock(g->block()); } - - partitioning::GraphAndMapping BuildHybridGraph( torch::jit::script::Module& new_mod, torch::jit::Block* block, diff --git a/core/ir/ir.h b/core/ir/ir.h index 91c2666be5..8e5e4cd59a 100644 --- a/core/ir/ir.h +++ b/core/ir/ir.h @@ -12,9 +12,9 @@ namespace core { namespace ir { enum class ShapeMode { - kMIN, - kOPT, - kMAX, + kMIN, + kOPT, + kMAX, }; struct Input : torch::CustomClassHolder { diff --git a/core/partitioning/partitioning.cpp b/core/partitioning/partitioning.cpp index 372489bc57..6e37afdbd6 100644 --- a/core/partitioning/partitioning.cpp +++ b/core/partitioning/partitioning.cpp @@ -450,17 +450,17 @@ bool isInputDynamic(PartitioningCtx* ctx) { return input_is_dynamic; } -void populateInputIValues(PartitioningCtx* ctx){ +void populateInputIValues(PartitioningCtx* ctx) { if (isInputDynamic(ctx)) { - ctx->min_input_ivalues_map = - partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, ir::ShapeMode::kMIN); - ctx->opt_input_ivalues_map = - partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, ir::ShapeMode::kOPT); - ctx->max_input_ivalues_map = - partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, ir::ShapeMode::kMAX); + ctx->min_input_ivalues_map = partitioning::generateRandomInputs( + ctx->settings.collection_input_spec_map, ctx->input_types_map, ir::ShapeMode::kMIN); + ctx->opt_input_ivalues_map = partitioning::generateRandomInputs( + ctx->settings.collection_input_spec_map, ctx->input_types_map, ir::ShapeMode::kOPT); + ctx->max_input_ivalues_map = partitioning::generateRandomInputs( + ctx->settings.collection_input_spec_map, ctx->input_types_map, ir::ShapeMode::kMAX); } else { - ctx->opt_input_ivalues_map = - partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, ir::ShapeMode::kOPT); + ctx->opt_input_ivalues_map = partitioning::generateRandomInputs( + ctx->settings.collection_input_spec_map, ctx->input_types_map, ir::ShapeMode::kOPT); } } From 6d0b0f64503c0ce872dbf47bf89c400580ffe7b1 Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Mon, 14 Nov 2022 18:59:16 -0800 Subject: [PATCH 11/11] chore: Address review commentas Signed-off-by: Dheeraj Peri --- core/partitioning/segmentedblock/SegmentedBlock.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/core/partitioning/segmentedblock/SegmentedBlock.h b/core/partitioning/segmentedblock/SegmentedBlock.h index c6ceaaed41..db5e8fedd9 100644 --- a/core/partitioning/segmentedblock/SegmentedBlock.h +++ b/core/partitioning/segmentedblock/SegmentedBlock.h @@ -82,9 +82,15 @@ struct SegmentedBlock { max_shapes_ = in_shapes; } } - const std::vector> in_shapes() const { + const std::vector> in_opt_shapes() const { return opt_shapes_; } + const std::vector> in_min_shapes() const { + return min_shapes_; + } + const std::vector> in_max_shapes() const { + return max_shapes_; + } void register_intypes(std::vector& in_types) { in_types_ = in_types; }