From 7f913d9eea2dd6333d849d594c82253a2a967360 Mon Sep 17 00:00:00 2001 From: kevin Date: Wed, 19 Jun 2024 21:19:38 +0000 Subject: [PATCH 1/3] p Signed-off-by: kevin --- .buildkite/test-template-aws.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/test-template-aws.j2 b/.buildkite/test-template-aws.j2 index 08146bf4454c..7eefac226d2b 100644 --- a/.buildkite/test-template-aws.j2 +++ b/.buildkite/test-template-aws.j2 @@ -75,7 +75,7 @@ steps: - "'cd {{ (step.working_dir or default_working_dir) | safe }} && {{ step.command or (step.commands | join(' && ')) | safe }}'" resources: limits: - nvidia.com/gpu: 8 + nvidia.com/gpu: {{ step.num_gpus or 8 }} volumeMounts: - name: devshm mountPath: /dev/shm From 1f950ee2f501e286ad2363a5bb6c56bb6f148438 Mon Sep 17 00:00:00 2001 From: kevin Date: Wed, 19 Jun 2024 21:34:29 +0000 Subject: [PATCH 2/3] p Signed-off-by: kevin --- .buildkite/test-template-aws.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/test-template-aws.j2 b/.buildkite/test-template-aws.j2 index 7eefac226d2b..fb34b787e0cb 100644 --- a/.buildkite/test-template-aws.j2 +++ b/.buildkite/test-template-aws.j2 @@ -75,7 +75,7 @@ steps: - "'cd {{ (step.working_dir or default_working_dir) | safe }} && {{ step.command or (step.commands | join(' && ')) | safe }}'" resources: limits: - nvidia.com/gpu: {{ step.num_gpus or 8 }} + nvidia.com/gpu: {{ step.num_gpus or 1 }} volumeMounts: - name: devshm mountPath: /dev/shm From 363640f76124a54952c973b4bccde06909632303 Mon Sep 17 00:00:00 2001 From: kevin Date: Wed, 19 Jun 2024 21:35:31 +0000 Subject: [PATCH 3/3] p Signed-off-by: kevin --- .buildkite/test-pipeline.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index b1602dd9496b..95cd5b1989ee 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -184,6 +184,7 @@ steps: - label: Distributed Tests (A100) gpu: a100 + num_gpus: 4 commands: # NOTE: don't test llama model here, it seems hf implementation is buggy # see https://github.com/vllm-project/vllm/pull/5689 for details