Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions third_party/nvfuser/csrc/compute_at_map.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -331,12 +331,13 @@ void IterDomainGraph::build(Fusion* fusion) {
auto all_ids = ir_utils::allIDsOf(tv);

// Check is this domain is a consumer of a view-like operation
bool view_like_domain = tv->domain()->hasViewLikeRFactor();
// bool view_like_domain = tv->domain()->hasViewLikeRFactor();

for (auto id : all_ids) {
// Check if this id is a view like rfactor id
bool is_view_rfactor_id = false;
if (view_like_domain && id->isRFactorProduct()) {
// if (view_like_domain && id->isRFactorProduct()) {
if (id->isRFactorProduct()) {
// If the tensor domain is a view like domain, and the iteration domain
// is marked as an rfactor product and is in the rfactor domain, it's a
// view like rfactor iteration domain
Expand Down
47 changes: 47 additions & 0 deletions third_party/nvfuser/test/test_gpu_indexing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <gtest/gtest.h>

#include <executor.h>
#include <inlining.h>
#include <ir_all_nodes.h>
#include <ir_builder.h>
#include <ops/arith.h>
Expand Down Expand Up @@ -783,4 +784,50 @@ TEST_F(NVFuserTest, FusionIndexing17_CUDA) {
&fusion, cg_outputs, aten_inputs, aten_outputs, __LINE__, __FILE__);
}

// Repro of issue #2560
TEST_F(NVFuserTest, FusionIndexing18_CUDA) {
Fusion fusion;
FusionGuard fg(&fusion);

auto tv0 = makeSymbolicTensor(1);
fusion.addInput(tv0);
auto tv1 = makeSymbolicTensor(2);
fusion.addInput(tv1);

auto tv2 = broadcast(tv0, {false, true});
auto tv3 = add(tv2, tv1);
auto tv4 = sum(tv3, {0, 1});
fusion.addOutput(tv4);

fusion.printMath();

tv4->merge(0);
tv4->split(0, 4);
auto tv5 = tv4->rFactor({1});

MaxRootDomainInfoSpanningTree tree(tv5);
TransformPropagator tp(tv5);
tree.traverse(&tp);

inlineAllAt(tv4, 1, true);

fusion.printMath();

fusion.printKernel();

auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0);
at::manual_seed(1);
at::Tensor t0 = at::randn({5}, options);
at::Tensor t1 = at::randn({5, 3}, options);
std::vector<c10::IValue> inputs = {t0, t1};

FusionExecutor fe;
fe.compileFusion(&fusion, inputs);
auto cg_outputs = fe.runFusion(inputs);

auto ref = (t0.unsqueeze(-1) + t1).sum();

testValidate(fe.kernel(), cg_outputs, inputs, {ref}, __LINE__, __FILE__);
}

} // namespace nvfuser