ROCm · jithunnair-amd · Dec 14, 2023 · Dec 8, 2023 · Dec 8, 2023
diff --git a/torch/testing/_internal/distributed/distributed_test.py b/torch/testing/_internal/distributed/distributed_test.py
@@ -4493,6 +4493,10 @@ def _test_ddp_hook_with_optimizer_parity(
             BACKEND == "nccl" or BACKEND == "ucc",
             "Issues with async error handling, see https://github.com/pytorch/pytorch/issues/73259"
         )
+        @sandcastle_skip_if(
+            BACKEND == "gloo" and HAS_TORCHVISION,
+            "Failing with gloo backend + torchvision due to ongoing issue https://github.com/pytorch/pytorch/issues/111834",
+        )
         @skip_if_lt_x_gpu(2)
         @parametrize("grad_as_bucket_view", [True, False])
         @parametrize("static_graph", [True, False])
@@ -4520,6 +4524,10 @@ def test_ddp_hook_with_optimizer_parity_adamw(
             BACKEND == "nccl" or BACKEND == "ucc",
             "Issues with async error handling, see https://github.com/pytorch/pytorch/issues/73259"
         )
+        @sandcastle_skip_if(
+            BACKEND == "gloo" and HAS_TORCHVISION,
+            "Failing with gloo backend + torchvision due to ongoing issue https://github.com/pytorch/pytorch/issues/111834",
+        )
         @skip_if_lt_x_gpu(2)
         @parametrize("optimize_subset", [True, False])
         def test_ddp_hook_with_optimizer_parity_adam(self, optimize_subset):
@@ -4540,6 +4548,10 @@ def test_ddp_hook_with_optimizer_parity_adam(self, optimize_subset):
             BACKEND == "nccl" or BACKEND == "ucc",
             "Issues with async error handling, see https://github.com/pytorch/pytorch/issues/73259"
         )
+        @sandcastle_skip_if(
+            BACKEND == "gloo" and HAS_TORCHVISION,
+            "Failing with gloo backend + torchvision due to ongoing issue https://github.com/pytorch/pytorch/issues/111834",
+        )
         @skip_if_lt_x_gpu(2)
         @parametrize("optimize_subset", [True, False])
         def test_ddp_hook_with_optimizer_parity_sgd(self, optimize_subset):
@@ -4629,8 +4641,11 @@ def _test_ddp_apply_optim_in_backward(
                         # case.
                         optim.zero_grad(set_to_none=True)
 
-        @skip_if_rocm
         @skip_if_lt_x_gpu(2)
+        @sandcastle_skip_if(
+            BACKEND == "gloo" and HAS_TORCHVISION,
+            "Failing with gloo backend + torchvision due to ongoing issue https://github.com/pytorch/pytorch/issues/111834",
+        ) 
         def test_ddp_apply_optim_in_backward(self):
             for optim_cls in [torch.optim.SGD, torch.optim.Adam]:
                 with self.subTest(optim_cls=optim_cls):
@@ -4639,8 +4654,11 @@ def test_ddp_apply_optim_in_backward(self):
                         optim_kwargs={"lr": 0.03}
                     )
 
-        @skip_if_rocm
         @skip_if_lt_x_gpu(2)
+        @sandcastle_skip_if(
+            BACKEND == "gloo" and HAS_TORCHVISION,
+            "Failing with gloo backend + torchvision due to ongoing issue https://github.com/pytorch/pytorch/issues/111834",
+        ) 
         def test_ddp_apply_optim_in_backward_grad_as_bucket_view_false(self):
             self._test_ddp_apply_optim_in_backward(
                 optim_cls=torch.optim.SGD,