Skip to content

Commit bee6c69

Browse files
authored
bug fix (csarofeen#1819)
1 parent 4413c8f commit bee6c69

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

torch/csrc/jit/codegen/cuda/test/test_gpu.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9636,7 +9636,6 @@ TEST_F(NVFuserTest, FusionPersistentNormLocalShared_CUDA) {
96369636

96379637
torch::jit::fuser::cuda::FusionExecutor fe;
96389638
fe.compileFusion(&fusion, aten_inputs);
9639-
fe.runFusion(aten_inputs, {cg_static_out, cg_dynamic_out});
96409639

96419640
auto properties = at::cuda::getDeviceProperties(0);
96429641
// Require 70KB of smem to run test
@@ -9645,6 +9644,8 @@ TEST_F(NVFuserTest, FusionPersistentNormLocalShared_CUDA) {
96459644
GTEST_SKIP() << "not enough shared memory space on device to run test";
96469645
}
96479646

9647+
fe.runFusion(aten_inputs, {cg_static_out, cg_dynamic_out});
9648+
96489649
auto at_mu = at::mean(aten_input.to(at::kDouble), -1).unsqueeze(1);
96499650
auto at_var = at::var(aten_input.to(at::kDouble), -1, false).unsqueeze(1);
96509651
auto at_rvar = at::rsqrt(at::add(at_var, kEps));

0 commit comments

Comments
 (0)