@@ -69,7 +69,7 @@ def build_test_list():
69
69
"--experimental.pipeline_parallel_split_points layers.4" ,
70
70
"--experimental.pipeline_parallel_schedule 1f1b" ,
71
71
"--training.data_parallel_degree 1" ,
72
- "--model.norm_type rmsnorm" , # fused_rmsnorm crashes with PP
72
+ "--model.norm_type rmsnorm" , # compiled_rmsnorm / fused_rmsnorm crashes with PP
73
73
],
74
74
],
75
75
"PP 1D test 1f1b" ,
@@ -85,7 +85,7 @@ def build_test_list():
85
85
"--experimental.pipeline_parallel_split_points layers.4" ,
86
86
"--experimental.pipeline_parallel_schedule gpipe" ,
87
87
"--training.data_parallel_degree 1" ,
88
- "--model.norm_type rmsnorm" , # fused_rmsnorm crashes with PP
88
+ "--model.norm_type rmsnorm" , # compiled_rmsnorm / fused_rmsnorm crashes with PP
89
89
],
90
90
],
91
91
"PP 1D test gpipe" ,
@@ -101,7 +101,7 @@ def build_test_list():
101
101
"--experimental.pipeline_parallel_split_points layers.4" ,
102
102
"--experimental.pipeline_parallel_schedule 1f1b" ,
103
103
"--training.data_parallel_degree 2" ,
104
- "--model.norm_type rmsnorm" , # fused_rmsnorm crashes with PP
104
+ "--model.norm_type rmsnorm" , # compiled_rmsnorm / fused_rmsnorm crashes with PP
105
105
],
106
106
],
107
107
"PP+DP 1f1b 2D test" ,
@@ -116,7 +116,7 @@ def build_test_list():
116
116
"--experimental.pipeline_parallel_split_points layers.4" ,
117
117
"--experimental.pipeline_parallel_schedule gpipe" ,
118
118
"--training.data_parallel_degree 2" ,
119
- "--model.norm_type rmsnorm" , # fused_rmsnorm crashes with PP
119
+ "--model.norm_type rmsnorm" , # compiled_rmsnorm / fused_rmsnorm crashes with PP
120
120
],
121
121
],
122
122
"PP+DP gpipe 2D test" ,
@@ -130,7 +130,6 @@ def build_test_list():
130
130
"--experimental.pipeline_parallel_degree 2" ,
131
131
"--experimental.pipeline_parallel_split_points layers.4" ,
132
132
"--training.tensor_parallel_degree 2" ,
133
- "--model.norm_type rmsnorm" , # fused_rmsnorm not yet compatible with TP
134
133
],
135
134
],
136
135
"PP+TP 2D test" ,
@@ -144,7 +143,6 @@ def build_test_list():
144
143
"--experimental.pipeline_parallel_degree 2" ,
145
144
"--experimental.pipeline_parallel_split_points layers.4" ,
146
145
"--experimental.pipeline_parallel_split_mode tracer" ,
147
- "--model.norm_type rmsnorm" , # fused_rmsnorm not yet compatible with tracer
148
146
],
149
147
],
150
148
"PP tracer frontend test" ,
@@ -162,7 +160,16 @@ def build_test_list():
162
160
OverrideDefinitions (
163
161
[
164
162
[
165
- "--training.compile --model.norm_type=rmsnorm" ,
163
+ "--training.tensor_parallel_degree 2" ,
164
+ ],
165
+ ],
166
+ "2D eager" ,
167
+ "2d_eager" ,
168
+ ),
169
+ OverrideDefinitions (
170
+ [
171
+ [
172
+ "--training.compile" ,
166
173
],
167
174
],
168
175
"1D compile" ,
@@ -182,29 +189,20 @@ def build_test_list():
182
189
OverrideDefinitions (
183
190
[
184
191
[
185
- "--training.compile --training.tensor_parallel_degree 2 --model.norm_type=rmsnorm " ,
192
+ "--training.compile --training.tensor_parallel_degree 2" ,
186
193
],
187
194
],
188
195
"2D compile" ,
189
196
"2d_compile" ,
190
197
),
191
- OverrideDefinitions (
192
- [
193
- [
194
- "--training.tensor_parallel_degree 2 --model.norm_type=rmsnorm" ,
195
- ],
196
- ],
197
- "Eager mode 2DParallel with rmsnorm" ,
198
- "eager_2d_rmsnorm" ,
199
- ),
200
198
OverrideDefinitions (
201
199
[
202
200
[
203
201
"--training.tensor_parallel_degree 2 --model.norm_type=fused_rmsnorm" ,
204
202
],
205
203
],
206
- "Eager mode 2DParallel with fused_rmsnorm" ,
207
- "eager_2d_fused_rmsnorm " ,
204
+ "2D eager with fused_rmsnorm" ,
205
+ "2d_eager_fused_rmsnorm " ,
208
206
),
209
207
OverrideDefinitions (
210
208
[
@@ -248,7 +246,6 @@ def build_test_list():
248
246
"--experimental.pipeline_parallel_split_points layers.4" ,
249
247
"--training.data_parallel_degree 2" ,
250
248
"--training.tensor_parallel_degree 2" ,
251
- "--model.norm_type rmsnorm" , # fused_rmsnorm not yet compatible with TP
252
249
],
253
250
[
254
251
"--training.steps 20" ,
@@ -257,7 +254,6 @@ def build_test_list():
257
254
"--experimental.pipeline_parallel_split_points layers.4" ,
258
255
"--training.data_parallel_degree 2" ,
259
256
"--training.tensor_parallel_degree 2" ,
260
- "--model.norm_type rmsnorm" , # fused_rmsnorm not yet compatible with TP
261
257
],
262
258
],
263
259
"PP+DP+TP 3D test with save/load resume ckpt" ,
@@ -272,7 +268,7 @@ def build_test_list():
272
268
"--experimental.pipeline_parallel_degree 4" ,
273
269
"--experimental.pipeline_parallel_split_points layers.1,layers.2,layers.3,layers.4,layers.5,layers.6,layers.7" ,
274
270
"--experimental.pipeline_parallel_schedule interleaved_1f1b" ,
275
- "--model.norm_type rmsnorm" , # fused_rmsnorm throws cuda context error with pp
271
+ "--model.norm_type rmsnorm" , # compiled_rmsnorm / fused_rmsnorm crashes with PP
276
272
],
277
273
],
278
274
"PP looped 1f1b test" ,
@@ -292,7 +288,8 @@ def build_test_list():
292
288
OverrideDefinitions (
293
289
[
294
290
[
295
- "--memory_estimation.enabled --model.norm_type rmsnorm" ,
291
+ "--memory_estimation.enabled" ,
292
+ "--model.norm_type rmsnorm" , # estimation mode does not support compiled_rmsnorm yet
296
293
]
297
294
],
298
295
"FSDP2 Memory Tracking and Estimation" ,
0 commit comments