File tree Expand file tree Collapse file tree 2 files changed +13
-0
lines changed Expand file tree Collapse file tree 2 files changed +13
-0
lines changed Original file line number Diff line number Diff line change @@ -47,6 +47,7 @@ class TestSkip(NamedTuple):
47
47
72 , "Skipped because distributed backend is not available."
48
48
),
49
49
"small_worldsize" : TestSkip (73 , "Skipped due to small world size." ),
50
+ "odd_worldsize" : TestSkip (87 , "Skipped due to odd world size." ),
50
51
"no_cuda" : TestSkip (74 , "CUDA is not available." ),
51
52
"multi-gpu-1" : TestSkip (75 , "Need at least 1 CUDA device" ),
52
53
"multi-gpu-2" : TestSkip (77 , "Need at least 2 CUDA devices" ),
@@ -108,6 +109,15 @@ def wrapper(*args, **kwargs):
108
109
109
110
return wrapper
110
111
112
+ def skip_if_odd_worldsize (func ):
113
+ @wraps (func )
114
+ def wrapper (* args , ** kwargs ):
115
+ if (os .environ ["BACKEND" ] != "mpi" ) and int (os .environ ["WORLD_SIZE" ]) % 2 == 1 :
116
+ sys .exit (TEST_SKIPS ["odd_worldsize" ].exit_code )
117
+
118
+ return func (* args , ** kwargs )
119
+
120
+ return wrapper
111
121
112
122
def require_n_gpus_for_nccl_backend (n , backend ):
113
123
def decorator (func ):
Original file line number Diff line number Diff line change 49
49
simple_sparse_reduce_tests ,
50
50
skip_if_rocm ,
51
51
skip_if_small_worldsize ,
52
+ skip_if_odd_worldsize ,
52
53
skip_if_lt_x_gpu ,
53
54
nccl_skip_if_lt_x_gpu ,
54
55
skip_if_no_gpu ,
@@ -4871,6 +4872,7 @@ def _create_hierarchical_model_averager(self):
4871
4872
)
4872
4873
4873
4874
@skip_if_lt_x_gpu (4 )
4875
+ @skip_if_odd_worldsize
4874
4876
@sandcastle_skip_if (
4875
4877
BACKEND not in DistTestCases .backend_feature ["ddp" ],
4876
4878
f"The { BACKEND } backend does not support DistributedDataParallel"
@@ -4883,6 +4885,7 @@ def test_post_localSGD_optimizer_parity_with_hierarchical_sgd(self):
4883
4885
)
4884
4886
4885
4887
@skip_if_lt_x_gpu (4 )
4888
+ @skip_if_odd_worldsize
4886
4889
@sandcastle_skip_if (
4887
4890
BACKEND not in DistTestCases .backend_feature ["ddp" ],
4888
4891
f"The { BACKEND } backend does not support DistributedDataParallel"
You can’t perform that action at this time.
0 commit comments