Skip to content

Commit c357254

Browse files
committed
2 parents e488a5b + 2ee4528 commit c357254

File tree

236 files changed

+11713
-2486
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

236 files changed

+11713
-2486
lines changed

.buildkite/check-wheel-size.py

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,43 @@
11
import os
2+
import sys
23
import zipfile
34

4-
MAX_SIZE_MB = 250
5+
# Read the VLLM_MAX_SIZE_MB environment variable, defaulting to 250 MB
6+
VLLM_MAX_SIZE_MB = int(os.environ.get('VLLM_MAX_SIZE_MB', 250))
57

68

79
def print_top_10_largest_files(zip_file):
10+
"""Print the top 10 largest files in the given zip file."""
811
with zipfile.ZipFile(zip_file, 'r') as z:
912
file_sizes = [(f, z.getinfo(f).file_size) for f in z.namelist()]
1013
file_sizes.sort(key=lambda x: x[1], reverse=True)
1114
for f, size in file_sizes[:10]:
12-
print(f"{f}: {size/(1024*1024)} MBs uncompressed.")
15+
print(f"{f}: {size / (1024 * 1024):.2f} MBs uncompressed.")
1316

1417

1518
def check_wheel_size(directory):
19+
"""Check the size of .whl files in the given directory."""
1620
for root, _, files in os.walk(directory):
17-
for f in files:
18-
if f.endswith(".whl"):
19-
wheel_path = os.path.join(root, f)
20-
wheel_size = os.path.getsize(wheel_path)
21-
wheel_size_mb = wheel_size / (1024 * 1024)
22-
if wheel_size_mb > MAX_SIZE_MB:
23-
print(
24-
f"Wheel {wheel_path} is too large ({wheel_size_mb} MB) "
25-
f"compare to the allowed size ({MAX_SIZE_MB} MB).")
21+
for file_name in files:
22+
if file_name.endswith(".whl"):
23+
wheel_path = os.path.join(root, file_name)
24+
wheel_size_mb = os.path.getsize(wheel_path) / (1024 * 1024)
25+
if wheel_size_mb > VLLM_MAX_SIZE_MB:
26+
print(f"Not allowed: Wheel {wheel_path} is larger "
27+
f"({wheel_size_mb:.2f} MB) than the limit "
28+
f"({VLLM_MAX_SIZE_MB} MB).")
2629
print_top_10_largest_files(wheel_path)
2730
return 1
2831
else:
2932
print(f"Wheel {wheel_path} is within the allowed size "
30-
f"({wheel_size_mb} MB).")
33+
f"({wheel_size_mb:.2f} MB).")
3134
return 0
3235

3336

3437
if __name__ == "__main__":
35-
import sys
36-
sys.exit(check_wheel_size(sys.argv[1]))
38+
if len(sys.argv) < 2:
39+
print("Usage: python check-wheel-size.py <directory>")
40+
sys.exit(1)
41+
42+
directory = sys.argv[1]
43+
sys.exit(check_wheel_size(directory))

.buildkite/run-amd-test.sh

100644100755
Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# This script runs test inside the corresponding ROCm docker container.
2-
set -ex
2+
set -o pipefail
33

44
# Print ROCm version
55
echo "--- Confirming Clean Initial State"
@@ -70,16 +70,51 @@ HF_CACHE="$(realpath ~)/huggingface"
7070
mkdir -p ${HF_CACHE}
7171
HF_MOUNT="/root/.cache/huggingface"
7272

73-
docker run \
73+
commands=$@
74+
PARALLEL_JOB_COUNT=8
75+
# check if the command contains shard flag, we will run all shards in parallel because the host have 8 GPUs.
76+
if [[ $commands == *"--shard-id="* ]]; then
77+
for GPU in $(seq 0 $(($PARALLEL_JOB_COUNT-1))); do
78+
#replace shard arguments
79+
commands=${@//"--shard-id= "/"--shard-id=${GPU} "}
80+
commands=${commands//"--num-shards= "/"--num-shards=${PARALLEL_JOB_COUNT} "}
81+
docker run \
7482
--device /dev/kfd --device /dev/dri \
7583
--network host \
7684
--shm-size=16gb \
7785
--rm \
78-
-e HIP_VISIBLE_DEVICES=0 \
86+
-e HIP_VISIBLE_DEVICES=${GPU} \
7987
-e HF_TOKEN \
8088
-v ${HF_CACHE}:${HF_MOUNT} \
8189
-e HF_HOME=${HF_MOUNT} \
82-
--name ${container_name} \
90+
--name ${container_name}_${GPU} \
8391
${image_name} \
84-
/bin/bash -c "${@}"
85-
92+
/bin/bash -c "${commands}" \
93+
|& while read -r line; do echo ">>Shard $GPU: $line"; done &
94+
PIDS+=($!)
95+
done
96+
#wait for all processes to finish and collect exit codes
97+
for pid in ${PIDS[@]}; do
98+
wait ${pid}
99+
STATUS+=($?)
100+
done
101+
for st in ${STATUS[@]}; do
102+
if [[ ${st} -ne 0 ]]; then
103+
echo "One of the processes failed with $st"
104+
exit ${st}
105+
fi
106+
done
107+
else
108+
docker run \
109+
--device /dev/kfd --device /dev/dri \
110+
--network host \
111+
--shm-size=16gb \
112+
--rm \
113+
-e HIP_VISIBLE_DEVICES=0 \
114+
-e HF_TOKEN \
115+
-v ${HF_CACHE}:${HF_MOUNT} \
116+
-e HF_HOME=${HF_MOUNT} \
117+
--name ${container_name} \
118+
${image_name} \
119+
/bin/bash -c "${commands}"
120+
fi

.buildkite/run-cpu-test.sh

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,12 @@ docker exec cpu-test-avx2 bash -c "python3 examples/offline_inference.py"
2323
# Run basic model test
2424
docker exec cpu-test bash -c "
2525
pip install pytest matplotlib einops transformers_stream_generator
26-
pytest -v -s tests/models -m \"not vlm\" --ignore=tests/models/test_embedding.py --ignore=tests/models/test_oot_registration.py --ignore=tests/models/test_registry.py --ignore=tests/models/test_jamba.py --ignore=tests/models/test_danube3_4b.py" # Mamba and Danube3-4B on CPU is not supported
26+
pytest -v -s tests/models -m \"not vlm\" --ignore=tests/models/test_embedding.py \
27+
--ignore=tests/models/test_oot_registration.py \
28+
--ignore=tests/models/test_registry.py \
29+
--ignore=tests/models/test_fp8.py \
30+
--ignore=tests/models/test_jamba.py \
31+
--ignore=tests/models/test_danube3_4b.py" # Mamba and Danube3-4B on CPU is not supported
2732

2833
# online inference
2934
docker exec cpu-test bash -c "

.buildkite/test-pipeline.yaml

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ steps:
9090
- pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_lazy_outlines.py
9191
- pytest -v -s entrypoints/llm/test_lazy_outlines.py # it needs a clean process
9292
- pytest -v -s entrypoints/openai
93+
- pytest -v -s entrypoints/test_chat_utils.py
94+
9395

9496
- label: Distributed Tests (4 GPUs) # 10min
9597
working_dir: "/vllm-workspace/tests"
@@ -156,6 +158,7 @@ steps:
156158
- python3 offline_inference_with_prefix.py
157159
- python3 llm_engine_example.py
158160
- python3 offline_inference_vision_language.py
161+
- python3 offline_inference_vision_language_multi_image.py
159162
- python3 tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
160163
- python3 offline_inference_encoder_decoder.py
161164

@@ -217,9 +220,9 @@ steps:
217220
- pytest -v -s spec_decode
218221

219222
- label: LoRA Test %N # 30min each
223+
mirror_hardwares: [amd]
220224
source_file_dependencies:
221225
- vllm/lora
222-
- csrc/punica
223226
- tests/lora
224227
command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_long_context.py
225228
parallelism: 4
@@ -270,6 +273,15 @@ steps:
270273
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
271274
- bash ./run-tests.sh -c configs/models-small.txt -t 1
272275

276+
- label: OpenAI-Compatible Tool Use # 20 min
277+
fast_check: false
278+
mirror_hardwares: [ amd ]
279+
source_file_dependencies:
280+
- vllm/
281+
- tests/tool_use
282+
commands:
283+
- pytest -v -s tool_use
284+
273285
##### 1 GPU test #####
274286
##### multi gpus test #####
275287

@@ -357,9 +369,9 @@ steps:
357369
- label: LoRA Long Context (Distributed) # 11min
358370
# This test runs llama 13B, so it is required to run on 4 GPUs.
359371
num_gpus: 4
372+
soft_fail: true
360373
source_file_dependencies:
361374
- vllm/lora
362-
- csrc/punica
363375
- tests/lora/test_long_context
364376
commands:
365377
# FIXIT: find out which code initialize cuda before running the test

.github/workflows/add_label_ready_comment.yml

Lines changed: 0 additions & 23 deletions
This file was deleted.

.github/workflows/reminder_comment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515
owner: context.repo.owner,
1616
repo: context.repo.repo,
1717
issue_number: context.issue.number,
18-
body: '👋 Hi! Thank you for contributing to the vLLM project.\n Just a reminder: PRs would not trigger full CI run by default. Instead, it would only run `fastcheck` CI which consists a small and essential subset of CI tests to quickly catch errors. You can run other CI tests on top of default ones by unblocking the steps in your `fast-check` build on Buildkite UI. \n\nOnce the PR is approved and ready to go, please make sure to run full CI as it is required to merge (or just use auto-merge).\n\n To run full CI, you can do one of these:\n- Comment `/ready` on the PR\n- Add `ready` label to the PR\n- Enable auto-merge.\n\n🚀'
18+
body: '👋 Hi! Thank you for contributing to the vLLM project.\n Just a reminder: PRs would not trigger full CI run by default. Instead, it would only run `fastcheck` CI which starts running only a small and essential subset of CI tests to quickly catch errors. You can run other CI tests on top of those by going to your `fastcheck` build on Buildkite UI (linked in the PR checks section) and unblock them. If you do not have permission to unblock, ping `simon-mo` or `khluu` to add you in our Buildkite org. \n\nOnce the PR is approved and ready to go, your PR reviewer(s) can run CI to test the changes comprehensively before merging.\n\n To run CI, PR reviewers can do one of these:\n- Add `ready` label to the PR\n- Enable auto-merge.\n\n🚀'
1919
})
2020
env:
2121
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

.github/workflows/remove_label_not_ready_comment.yml

Lines changed: 0 additions & 23 deletions
This file was deleted.

CODE_OF_CONDUCT.md

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
2+
# vLLM Code of Conduct
3+
4+
## Our Pledge
5+
6+
We as members, contributors, and leaders pledge to make participation in our
7+
community a harassment-free experience for everyone, regardless of age, body
8+
size, visible or invisible disability, ethnicity, sex characteristics, gender
9+
identity and expression, level of experience, education, socioeconomic status,
10+
nationality, personal appearance, race, caste, color, religion, or sexual
11+
identity and orientation.
12+
13+
We pledge to act and interact in ways that contribute to an open, welcoming,
14+
diverse, inclusive, and healthy community.
15+
16+
## Our Standards
17+
18+
Examples of behavior that contributes to a positive environment for our
19+
community include:
20+
21+
* Demonstrating empathy and kindness toward other people
22+
* Being respectful of differing opinions, viewpoints, and experiences
23+
* Giving and gracefully accepting constructive feedback
24+
* Accepting responsibility and apologizing to those affected by our mistakes,
25+
and learning from the experience
26+
* Focusing on what is best not just for us as individuals, but for the overall
27+
community
28+
29+
Examples of unacceptable behavior include:
30+
31+
* The use of sexualized language or imagery, and sexual attention or advances of
32+
any kind
33+
* Trolling, insulting or derogatory comments, and personal or political attacks
34+
* Public or private harassment
35+
* Publishing others' private information, such as a physical or email address,
36+
without their explicit permission
37+
* Other conduct which could reasonably be considered inappropriate in a
38+
professional setting
39+
40+
## Enforcement Responsibilities
41+
42+
Community leaders are responsible for clarifying and enforcing our standards of
43+
acceptable behavior and will take appropriate and fair corrective action in
44+
response to any behavior that they deem inappropriate, threatening, offensive,
45+
or harmful.
46+
47+
Community leaders have the right and responsibility to remove, edit, or reject
48+
comments, commits, code, wiki edits, issues, and other contributions that are
49+
not aligned to this Code of Conduct, and will communicate reasons for moderation
50+
decisions when appropriate.
51+
52+
## Scope
53+
54+
This Code of Conduct applies within all community spaces, and also applies when
55+
an individual is officially representing the community in public spaces.
56+
Examples of representing our community include using an official email address,
57+
posting via an official social media account, or acting as an appointed
58+
representative at an online or offline/IRL event.
59+
60+
## Enforcement
61+
62+
Instances of abusive, harassing, or otherwise unacceptable behavior may be
63+
reported to the community leaders responsible for enforcement in the #code-of-conduct
64+
channel in the [vLLM Discord](https://discord.com/invite/jz7wjKhh6g).
65+
All complaints will be reviewed and investigated promptly and fairly.
66+
67+
All community leaders are obligated to respect the privacy and security of the
68+
reporter of any incident.
69+
70+
## Enforcement Guidelines
71+
72+
Community leaders will follow these Community Impact Guidelines in determining
73+
the consequences for any action they deem in violation of this Code of Conduct:
74+
75+
### 1. Correction
76+
77+
**Community Impact**: Use of inappropriate language or other behavior deemed
78+
unprofessional or unwelcome in the community.
79+
80+
**Consequence**: A private, written warning from community leaders, providing
81+
clarity around the nature of the violation and an explanation of why the
82+
behavior was inappropriate. A public apology may be requested.
83+
84+
### 2. Warning
85+
86+
**Community Impact**: A violation through a single incident or series of
87+
actions.
88+
89+
**Consequence**: A warning with consequences for continued behavior. No
90+
interaction with the people involved, including unsolicited interaction with
91+
those enforcing the Code of Conduct, for a specified period of time. This
92+
includes avoiding interactions in community spaces as well as external channels
93+
like social media. Violating these terms may lead to a temporary or permanent
94+
ban.
95+
96+
### 3. Temporary Ban
97+
98+
**Community Impact**: A serious violation of community standards, including
99+
sustained inappropriate behavior.
100+
101+
**Consequence**: A temporary ban from any sort of interaction or public
102+
communication with the community for a specified period of time. No public or
103+
private interaction with the people involved, including unsolicited interaction
104+
with those enforcing the Code of Conduct, is allowed during this period.
105+
Violating these terms may lead to a permanent ban.
106+
107+
### 4. Permanent Ban
108+
109+
**Community Impact**: Demonstrating a pattern of violation of community
110+
standards, including sustained inappropriate behavior, harassment of an
111+
individual, or aggression toward or disparagement of classes of individuals.
112+
113+
**Consequence**: A permanent ban from any sort of public interaction within the
114+
community.
115+
116+
## Attribution
117+
118+
This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/),
119+
version 2.1, available at
120+
[v2.1](https://www.contributor-covenant.org/version/2/1/code_of_conduct.html).
121+
122+
Community Impact Guidelines were inspired by
123+
[Mozilla's code of conduct enforcement ladder](https://github.com/mozilla/inclusion).
124+
125+
For answers to common questions about this code of conduct, see the
126+
[Contributor Covenant FAQ](https://www.contributor-covenant.org/faq). Translations are available at
127+
[Contributor Covenant translations](https://www.contributor-covenant.org/translations).
128+

0 commit comments

Comments
 (0)