Skip to content

Commit 9c084f9

Browse files
jaglinuxpruthvistony
authored andcommitted
[Distributed tests] Add skip for odd world_size condition
As per pytorch#74995, the tests needs to be skipped for odd WORLD_SIZE Signed-off-by: Jagadish Krishnamoorthy <[email protected]> Fixes pytorch#74995 Pull Request resolved: pytorch#76136 Approved by: https://github.com/kumpera, https://github.com/wayi1
1 parent e2476c0 commit 9c084f9

File tree

2 files changed

+13
-0
lines changed

2 files changed

+13
-0
lines changed

torch/testing/_internal/common_distributed.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ class TestSkip(NamedTuple):
4747
72, "Skipped because distributed backend is not available."
4848
),
4949
"small_worldsize": TestSkip(73, "Skipped due to small world size."),
50+
"odd_worldsize": TestSkip(87, "Skipped due to odd world size."),
5051
"no_cuda": TestSkip(74, "CUDA is not available."),
5152
"multi-gpu-1": TestSkip(75, "Need at least 1 CUDA device"),
5253
"multi-gpu-2": TestSkip(77, "Need at least 2 CUDA devices"),
@@ -108,6 +109,15 @@ def wrapper(*args, **kwargs):
108109

109110
return wrapper
110111

112+
def skip_if_odd_worldsize(func):
113+
@wraps(func)
114+
def wrapper(*args, **kwargs):
115+
if (os.environ["BACKEND"] != "mpi") and int(os.environ["WORLD_SIZE"]) % 2 == 1:
116+
sys.exit(TEST_SKIPS["odd_worldsize"].exit_code)
117+
118+
return func(*args, **kwargs)
119+
120+
return wrapper
111121

112122
def require_n_gpus_for_nccl_backend(n, backend):
113123
def decorator(func):

torch/testing/_internal/distributed/distributed_test.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
simple_sparse_reduce_tests,
5050
skip_if_rocm,
5151
skip_if_small_worldsize,
52+
skip_if_odd_worldsize,
5253
skip_if_lt_x_gpu,
5354
nccl_skip_if_lt_x_gpu,
5455
skip_if_no_gpu,
@@ -4871,6 +4872,7 @@ def _create_hierarchical_model_averager(self):
48714872
)
48724873

48734874
@skip_if_lt_x_gpu(4)
4875+
@skip_if_odd_worldsize
48744876
@sandcastle_skip_if(
48754877
BACKEND not in DistTestCases.backend_feature["ddp"],
48764878
f"The {BACKEND} backend does not support DistributedDataParallel"
@@ -4883,6 +4885,7 @@ def test_post_localSGD_optimizer_parity_with_hierarchical_sgd(self):
48834885
)
48844886

48854887
@skip_if_lt_x_gpu(4)
4888+
@skip_if_odd_worldsize
48864889
@sandcastle_skip_if(
48874890
BACKEND not in DistTestCases.backend_feature["ddp"],
48884891
f"The {BACKEND} backend does not support DistributedDataParallel"

0 commit comments

Comments
 (0)