We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 668f6cd commit f13fe3fCopy full SHA for f13fe3f
train.py
@@ -355,7 +355,12 @@ def loss_fn(pred, labels):
355
gpu_memory_monitor.reset_peak_stats()
356
357
# train loop
358
- logger.info(f"Training starts at step {train_state.step + 1}")
+ logger.info(
359
+ f"Training starts at step {train_state.step + 1}, "
360
+ f"with local batch size: {job_config.training.batch_size}, "
361
+ f"sequence length: {job_config.training.seq_len}, "
362
+ f"total steps: {job_config.training.steps}({job_config.training.warmup_steps}), "
363
+ )
364
with maybe_enable_profiling(
365
job_config, global_step=train_state.step
366
) as torch_profiler, maybe_enable_memory_snapshot(
0 commit comments