Format and add comments to GEMM test.

csarofeen · csarofeen · commit a656b6dd9400 · 2020-06-09T09:29:00.000-04:00
diff --git a/test/cpp/jit/test_gpu.cpp b/test/cpp/jit/test_gpu.cpp
@@ -2674,23 +2674,44 @@ void testGPU_FusionSimpleGemm() {
     fusion.addInput(tv1);
 
     TensorView* tv2 = broadcast(tv0, {false, false, true});
+    // tv2[I0, I1, B] = tv0[I0, I1]
+
     TensorView* tv3 = broadcast(tv1, {true, false, false});
+    // tv3[B, I1, I2] = tv1[I1, I2]
 
+    // tv4[I0, I1, I2] = tv2[I0, I1, B] * tv3[B, I1, I2]
     TensorView* tv4 = mul(tv2, tv3);
+    // tv5[I0, R1, I2] = tv4[I0, I1, I2]
     TensorView* tv5 = sum(tv4, {1});
     fusion.addOutput(tv5);
 
     tv5->split(1, 32);
+    // tv5[I0, R1o, R1i{32}, I2]
+
     auto tv6 = tv5->rFactor({1});
+    // tv6[I0, R1o, I1i{32}, I2] = tv4[I0, I1, I2]
+    // tv5[I0,    , R1i{32}, I2] = tv6[I0, R1o, I1i{32}, I2]
 
     tv5->split(0, 4);
     tv5->split(-1, 4);
+    // tv5[I0o, I0i{4}, R1i{32}, I2o, I2i{4}]
+    // tv5[I0o, I0i{4}, R1i{32}, I2o, I2i{4}]
 
     tv0->computeAt(tv5, -1);
     tv1->computeAt(tv5, -1);
 
+    // tv6[I0o, I0i{4}, R1o, I1i{32}, I2o, I2i{4}]
+    // tv5[I0o, I0i{4},    , R1i{32}, I2o, I2i{4}]
+    //--> (line symbolizes compute at location)
+    // tv4[I0o, I0i{4}, I1i{32}, I2o, I2i{4}|, I1o]
+    // tv6[I0o, I0i{4}, I1i{32}, I2o, I2i{4}|, R1o]
+    // tv5[I0o, I0i{4}, R1i{32}, I2o, I2i{4}|]
+
     tv0->computeAt(tv6, -1);
     tv1->computeAt(tv6, -1);
+    // tv4[I0o, I0i{4}, I1i{32}, I2o, I2i{4}, I1o |]
+    // tv6[I0o, I0i{4}, I1i{32}, I2o, I2i{4}, R1o |]
+    // tv5[I0o, I0i{4}, R1i{32}, I2o, I2i{4}|]
 
     tv5->axis(0)->parallelize(ParallelType::BIDz);
     tv5->axis(1)->parallelize(ParallelType::TIDz);
diff --git a/torch/csrc/jit/codegen/cuda/iter_visitor.cpp b/torch/csrc/jit/codegen/cuda/iter_visitor.cpp
@@ -56,7 +56,7 @@ void IterVisitor::traverseFrom(
   FusionGuard fg(fusion);
   std::unordered_set<Statement*> visited;
   stmt_stack.clear();
-  if(!from.empty())
+  if (!from.empty())
     stmt_stack.emplace_back(from.rbegin(), from.rend());
 
   while (!stmt_stack.empty()) {
@@ -191,7 +191,7 @@ std::unordered_set<Val*> IterVisitor::getTerminatingOutputs(
   auto exprs = Exprs::getExprs(
       fusion,
       std::vector<Val*>(fusion->outputs().begin(), fusion->outputs().end()));
-      
+
   for (auto expr : exprs) {
     for (auto inp : expr->inputs())
       used_vals.emplace(inp);