Skip to content

Commit 45045cd

Browse files
authored
Enable tests previously disabled due to an aliasing bug (#2005)
* Enable tests previously disabled due to an aliasing bug The bug was fixed by #1792
1 parent 967aa77 commit 45045cd

File tree

1 file changed

+13
-11
lines changed

1 file changed

+13
-11
lines changed

torch/csrc/jit/codegen/cuda/test/test_gpu.cpp

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9413,7 +9413,7 @@ TEST_F(NVFuserTest, FusionMagicSchedulerInstanceNormalizationBackward_CUDA) {
94139413
"");
94149414
}
94159415

9416-
TEST_F(NVFuserTest, FusionPersistentSoftmaxLocalSmem_CUDA) {
9416+
TEST_F(NVFuserTest, FusionPersistentSoftmaxLocalShared_CUDA) {
94179417
Fusion fusion;
94189418
FusionGuard fg(&fusion);
94199419

@@ -9519,10 +9519,11 @@ TEST_F(NVFuserTest, FusionPersistentSoftmaxLocalSmem_CUDA) {
95199519
const int64_t dimy = 16384;
95209520

95219521
auto properties = at::cuda::getDeviceProperties(0);
9522-
// Require 70KB of smem to run test
9523-
const size_t required_smem_size = 70 << 10;
9522+
const size_t required_smem_size =
9523+
(dimy - static_size) * sizeof(float) + TIDX * sizeof(float);
95249524
if (properties->sharedMemPerBlockOptin < required_smem_size) {
9525-
GTEST_SKIP() << "not enough shared memory space on device to run test";
9525+
GTEST_SKIP() << "not enough shared memory space on device to run test: "
9526+
<< properties->sharedMemPerBlock;
95269527
}
95279528

95289529
auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0);
@@ -9708,6 +9709,14 @@ TEST_F(NVFuserTest, FusionPersistentNormLocalShared_CUDA) {
97089709
const float kEps = 1e-5;
97099710
auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0);
97109711

9712+
auto properties = at::cuda::getDeviceProperties(0);
9713+
const size_t required_smem_size =
9714+
(dimy - static_size) * sizeof(float) + TIDX * sizeof(float);
9715+
if (properties->sharedMemPerBlockOptin < required_smem_size) {
9716+
GTEST_SKIP() << "not enough shared memory space on device to run test: "
9717+
<< properties->sharedMemPerBlock;
9718+
}
9719+
97119720
at::Tensor aten_input = at::randn({dimx, dimy}, options);
97129721
at::Tensor aten_static_in = aten_input.narrow(1, 0, static_size);
97139722
at::Tensor aten_dynamic_in =
@@ -9723,13 +9732,6 @@ TEST_F(NVFuserTest, FusionPersistentNormLocalShared_CUDA) {
97239732
torch::jit::fuser::cuda::FusionExecutor fe;
97249733
fe.compileFusion(&fusion, aten_inputs);
97259734

9726-
auto properties = at::cuda::getDeviceProperties(0);
9727-
// Require 70KB of smem to run test
9728-
const size_t required_smem_size = 70 << 10;
9729-
if (properties->sharedMemPerBlockOptin < required_smem_size) {
9730-
GTEST_SKIP() << "not enough shared memory space on device to run test";
9731-
}
9732-
97339735
fe.runFusion(aten_inputs, {cg_static_out, cg_dynamic_out});
97349736

97359737
auto at_mu = at::mean(aten_input.to(at::kDouble), -1).unsqueeze(1);

0 commit comments

Comments
 (0)