@@ -178,38 +178,27 @@ echo "ctx_nodes: ${ctx_nodes[@]}, total_ctx_nodes_num: ${total_ctx_nodes_num}"
178
178
179
179
rm -rf ${full_logdir} /hostnames
180
180
181
- node_list_start=0
182
181
# start the gen workers
183
182
for i in $( seq 0 $(( num_gen_servers - 1 )) ) ; do
184
- node_list_start=$(( i * gen_nodes_num))
185
- node_list=(${gen_nodes[@]: ${node_list_start} : ${gen_nodes_num} } )
186
- echo " gen node_list index ${i} : ${node_list[@]} "
187
183
srun -l -N ${gen_nodes_num} \
188
184
--ntasks=${gen_tp_size} \
189
185
--ntasks-per-node=${ntasks_per_node} \
190
- --segment=${gen_nodes_num} \
191
186
--container-image=${container_image} \
192
187
--container-name=${container_name} \
193
188
--container-mounts=${mounts} \
194
- --nodelist=$( IFS=,; echo " ${node_list[*]} " ) \
195
189
--mpi=pmix \
196
190
bash ${workdir} /start_worker.sh " GEN" ${i} ${model_dir} " 8336" ${benchmark_mode} ${concurrency} ${enable_pdl} ${full_logdir} ${nsys_on} \
197
191
& > ${full_logdir} /output_gen_${i} .log &
198
192
done
199
193
200
194
# start the ctx workers
201
195
for i in $( seq 0 $(( num_ctx_servers - 1 )) ) ; do
202
- node_list_start=$(( i * ctx_nodes_num))
203
- node_list=(${ctx_nodes[@]: ${node_list_start} : ${ctx_nodes_num} } )
204
- echo " ctx node_list index ${i} : ${node_list[@]} "
205
196
srun -l -N ${ctx_nodes_num} \
206
197
--ntasks=${ctx_tp_size} \
207
198
--ntasks-per-node=${ntasks_per_node} \
208
- --segment=${ctx_nodes_num} \
209
199
--container-image=${container_image} \
210
200
--container-name=${container_name} \
211
201
--container-mounts=${mounts} \
212
- --nodelist=$( IFS=,; echo " ${node_list[*]} " ) \
213
202
--mpi=pmix \
214
203
bash ${workdir} /start_worker.sh " CTX" ${i} ${model_dir} " 8336" ${benchmark_mode} ${concurrency} ${enable_pdl} ${full_logdir} ${nsys_on} \
215
204
& > ${full_logdir} /output_ctx_${i} .log &
0 commit comments