enable batch_size auto for model eval

vkuzo · vkuzo · commit e6fe10e6f12d · 2025-08-04T11:48:14.000-07:00
Summary: enables passing `--batch_size auto` to model eval script on LLaMa 3.1 8B and wikitext + hellaswag, this reduces the runtime from 13 minutes to 4.5 minutes on my machines (a 2.9x speedup) Test Plan: ``` with-proxy time python benchmarks/_models/eval_hf_models.py --model_id meta-llama/Llama-3.1-8B --tasks wikitext hellaswag --batch_size auto ``` Reviewers: Subscribers: Tasks: Tags: ghstack-source-id: 91c5dd0 ghstack-comment-id: 3151916665 Pull Request resolved: #2675
diff --git a/benchmarks/_models/eval_hf_models.py b/benchmarks/_models/eval_hf_models.py
@@ -147,7 +147,7 @@ def run(
         "--device", type=str, default="cuda:0", help="Device to run the model on."
     )
     parser.add_argument(
-        "--batch_size", type=int, default=1, help="Batch size for lm_eval."
+        "--batch_size", type=str, default="auto", help="Batch size for lm_eval."
     )
     parser.add_argument(
         "--prompt",

Original file line number	Diff line number	Diff line change
`@@ -147,7 +147,7 @@ def run(`
`147`	`147`	`"--device", type=str, default="cuda:0", help="Device to run the model on."`
`148`	`148`	`)`
`149`	`149`	`parser.add_argument(`
`150`		`- "--batch_size", type=int, default=1, help="Batch size for lm_eval."`
	`150`	`+ "--batch_size", type=str, default="auto", help="Batch size for lm_eval."`
`151`	`151`	`)`
`152`	`152`	`parser.add_argument(`
`153`	`153`	`"--prompt",`