From 7b181bdb682c2a5d863ee72b82b037fa5270c0eb Mon Sep 17 00:00:00 2001 From: "Kevin H. Luu" Date: Tue, 21 Jan 2025 11:38:27 -0800 Subject: [PATCH 1/5] Add back default arg for pre-commit --- .github/workflows/pre-commit.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index bf9460151ec1..97b831adc084 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -16,4 +16,4 @@ jobs: - run: echo "::add-matcher::.github/workflows/matchers/actionlint.json" - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1 with: - extra_args: --hook-stage manual + extra_args: --hook-stage --all-files manual From 6217c1f0298505c5891f5cc82f093f561a40bd98 Mon Sep 17 00:00:00 2001 From: "Kevin H. Luu" Date: Tue, 21 Jan 2025 11:40:23 -0800 Subject: [PATCH 2/5] Update pre-commit.yml --- .github/workflows/pre-commit.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 97b831adc084..06564969dc77 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -16,4 +16,4 @@ jobs: - run: echo "::add-matcher::.github/workflows/matchers/actionlint.json" - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1 with: - extra_args: --hook-stage --all-files manual + extra_args: --all-files --hook-stage manual From 9eec52842ff6c2fb09cf369c96058a16ef1fcd8b Mon Sep 17 00:00:00 2001 From: "Kevin H. Luu" Date: Tue, 21 Jan 2025 11:46:50 -0800 Subject: [PATCH 3/5] Update __init__.py --- vllm/platforms/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/platforms/__init__.py b/vllm/platforms/__init__.py index d20d35199bf5..ddbdc43ca571 100644 --- a/vllm/platforms/__init__.py +++ b/vllm/platforms/__init__.py @@ -101,7 +101,7 @@ def cpu_platform_plugin() -> Optional[str]: try: from importlib.metadata import version is_cpu = "cpu" in version("vllm") - if is_cpu == False: + if not is_cpu: import platform is_cpu = platform.machine().lower().startswith("arm") From 75bca170b125ec57624f83820053bb8ad523f302 Mon Sep 17 00:00:00 2001 From: kevin Date: Tue, 21 Jan 2025 19:53:57 +0000 Subject: [PATCH 4/5] reformat Signed-off-by: kevin --- tests/models/decoder_only/language/test_gguf.py | 17 +++++++---------- vllm/model_executor/models/paligemma.py | 2 +- vllm/model_executor/models/siglip.py | 8 +++----- 3 files changed, 11 insertions(+), 16 deletions(-) diff --git a/tests/models/decoder_only/language/test_gguf.py b/tests/models/decoder_only/language/test_gguf.py index 38cea2462b44..ad8f8a0c320e 100644 --- a/tests/models/decoder_only/language/test_gguf.py +++ b/tests/models/decoder_only/language/test_gguf.py @@ -74,11 +74,7 @@ def gguf_model(self): ) MODELS = [ - LLAMA_CONFIG, - QWEN2_CONFIG, - PHI3_CONFIG, - GPT2_CONFIG, - STABLELM_CONFIG, + LLAMA_CONFIG, QWEN2_CONFIG, PHI3_CONFIG, GPT2_CONFIG, STABLELM_CONFIG, DOLPHIN_CONFIG # STARCODER_CONFIG, # broken ] @@ -114,11 +110,12 @@ def test_models( messages, tokenize=False, add_generation_prompt=True) # Run unquantized model. - with vllm_runner(model_name=model.original_model, - enforce_eager=True, # faster tests - dtype=dtype, - max_model_len=MAX_MODEL_LEN, - tensor_parallel_size=tp_size) as original_model: + with vllm_runner( + model_name=model.original_model, + enforce_eager=True, # faster tests + dtype=dtype, + max_model_len=MAX_MODEL_LEN, + tensor_parallel_size=tp_size) as original_model: original_outputs = original_model.generate_greedy_logprobs( example_prompts[:-1], max_tokens, num_logprobs) diff --git a/vllm/model_executor/models/paligemma.py b/vllm/model_executor/models/paligemma.py index ed9ae1887259..5a28b1ffbb7b 100644 --- a/vllm/model_executor/models/paligemma.py +++ b/vllm/model_executor/models/paligemma.py @@ -147,7 +147,7 @@ class PaliGemmaForConditionalGeneration(nn.Module, SupportsMultiModal, "up_proj", ], } - + def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): super().__init__() config = vllm_config.model_config.hf_config diff --git a/vllm/model_executor/models/siglip.py b/vllm/model_executor/models/siglip.py index 211e5dc80066..1e51018973e8 100644 --- a/vllm/model_executor/models/siglip.py +++ b/vllm/model_executor/models/siglip.py @@ -348,12 +348,10 @@ def __init__( if quant_config and quant_config.get_name() == "bitsandbytes": quantizable = True else: - # For other quantization, we require the hidden size to be a + # For other quantization, we require the hidden size to be a # multiple of 64 - quantizable = ( - config.hidden_size % 64 == 0 - and config.intermediate_size % 64 == 0 - ) + quantizable = (config.hidden_size % 64 == 0 + and config.intermediate_size % 64 == 0) self.fc1 = ColumnParallelLinear( config.hidden_size, config.intermediate_size, From 1e9b2b9a01638569a5e5b1c54a3c613f9829824a Mon Sep 17 00:00:00 2001 From: kevin Date: Tue, 21 Jan 2025 20:24:52 +0000 Subject: [PATCH 5/5] mypy fix Signed-off-by: kevin --- vllm/v1/stats/common.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/vllm/v1/stats/common.py b/vllm/v1/stats/common.py index 099d82c5904c..500bc356fc17 100644 --- a/vllm/v1/stats/common.py +++ b/vllm/v1/stats/common.py @@ -10,10 +10,11 @@ from vllm.sampling_params import SamplingParams -class RequestStatsUpdate(msgspec.Struct, - array_like=True, - omit_defaults=True, - gc=False): +class RequestStatsUpdate( + msgspec.Struct, # type: ignore + array_like=True, + omit_defaults=True, + gc=False): """ An update to the request stats. @@ -341,8 +342,8 @@ def update_from(self, update: "RequestStatsUpdate"): self.queued_ts_s = ts elif update.type == RequestStatsUpdate.Type.PREFILLING: self.prefill_start_ts_s_lst.append(ts) - self.num_cached_tokens = update.num_cached_tokens - self.num_computed_tokens = update.num_computed_tokens + self.num_cached_tokens = update.num_cached_tokens or 0 + self.num_computed_tokens = update.num_computed_tokens or 0 elif update.type == RequestStatsUpdate.Type.PREEMPTED: self._reset_for_preemption(ts) elif update.type == RequestStatsUpdate.Type.DECODING: @@ -350,7 +351,7 @@ def update_from(self, update: "RequestStatsUpdate"): elif update.type == RequestStatsUpdate.Type.DETOKENIZED: self._record_detokenized_output( ts, - update.num_new_tokens, + update.num_new_tokens or 0, ) elif update.type == RequestStatsUpdate.Type.FINISHED: self.finished_ts_s = ts @@ -425,10 +426,11 @@ class EngineCoreProcessStats: output_queue_size: Optional[int] = None -class EngineCoreStatsSnapshot(msgspec.Struct, - array_like=True, - omit_defaults=True, - gc=False): +class EngineCoreStatsSnapshot( + msgspec.Struct, # type: ignore + array_like=True, + omit_defaults=True, + gc=False): """ A snapshot of the EngineCore's current stats over a period of time. """