Skip to content

Commit f7345bf

Browse files
committed
fix accuracy tests
Signed-off-by: Bo Deng <[email protected]>
1 parent 5b3f55a commit f7345bf

File tree

5 files changed

+54
-34
lines changed

5 files changed

+54
-34
lines changed

tests/integration/defs/accuracy/test_disaggregated_serving.py

Lines changed: 50 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -258,30 +258,6 @@ class TestLlama3_1_8BInstruct(LlmapiAccuracyTestHarness):
258258
MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct"
259259
MODEL_PATH = f"{llm_models_root()}/llama-3.1-model/Llama-3.1-8B-Instruct"
260260

261-
def test_nixl_backend(self):
262-
ctx_server_config = {"cache_transceiver_config": {"backend": "nixl"}}
263-
gen_server_config = {"cache_transceiver_config": {"backend": "nixl"}}
264-
disaggregated_server_config = {
265-
"hostname": "localhost",
266-
"port": 8000,
267-
"backend": "pytorch",
268-
"context_servers": {
269-
"num_instances": 1,
270-
"urls": ["localhost:8001"]
271-
},
272-
"generation_servers": {
273-
"num_instances": 1,
274-
"urls": ["localhost:8002"]
275-
}
276-
}
277-
with launch_disaggregated_llm(disaggregated_server_config,
278-
ctx_server_config, gen_server_config,
279-
self.MODEL_PATH) as llm:
280-
task = MMLU(self.MODEL_NAME)
281-
task.evaluate(llm)
282-
task = GSM8K(self.MODEL_NAME)
283-
task.evaluate(llm)
284-
285261
@pytest.mark.skip_less_device_memory(32000)
286262
@pytest.mark.parametrize("disable_overlap_scheduler", [False, True])
287263
def test_auto_dtype(self, disable_overlap_scheduler):
@@ -478,8 +454,18 @@ class TestDeepSeekV3Lite(LlmapiAccuracyTestHarness):
478454
MODEL_PATH = f"{llm_models_root()}/DeepSeek-V3-Lite/bf16"
479455

480456
def test_nixl_backend(self):
481-
ctx_server_config = {"cache_transceiver_config": {"backend": "nixl"}}
482-
gen_server_config = {"cache_transceiver_config": {"backend": "nixl"}}
457+
ctx_server_config = {
458+
"disable_overlap_scheduler": True,
459+
"cache_transceiver_config": {
460+
"backend": "nixl"
461+
}
462+
}
463+
gen_server_config = {
464+
"disable_overlap_scheduler": True,
465+
"cache_transceiver_config": {
466+
"backend": "nixl"
467+
}
468+
}
483469
disaggregated_server_config = {
484470
"hostname": "localhost",
485471
"port": 8000,
@@ -494,10 +480,8 @@ def test_nixl_backend(self):
494480
}
495481
}
496482
with launch_disaggregated_llm(disaggregated_server_config,
497-
ctx_server_config,
498-
gen_server_config,
499-
self.MODEL_PATH,
500-
tensor_parallel_size=4) as llm:
483+
ctx_server_config, gen_server_config,
484+
self.MODEL_PATH) as llm:
501485
task = MMLU(self.MODEL_NAME)
502486
task.evaluate(llm)
503487
task = GSM8K(self.MODEL_NAME)
@@ -600,6 +584,42 @@ class TestQwen3_8B(LlmapiAccuracyTestHarness):
600584
MODEL_NAME = "Qwen3/Qwen3-8B"
601585
MODEL_PATH = f"{llm_models_root()}/Qwen3/Qwen3-8B-FP8"
602586

587+
def test_nixl_backend(self):
588+
ctx_server_config = {
589+
"disable_overlap_scheduler": True,
590+
"cache_transceiver_config": {
591+
"backend": "nixl"
592+
}
593+
}
594+
gen_server_config = {
595+
"disable_overlap_scheduler": True,
596+
"cache_transceiver_config": {
597+
"backend": "nixl"
598+
}
599+
}
600+
ctx_server_config["cache_transceiver_config"]
601+
ctx_server_config["cache_transceiver_config"]
602+
disaggregated_server_config = {
603+
"hostname": "localhost",
604+
"port": 8000,
605+
"backend": "pytorch",
606+
"context_servers": {
607+
"num_instances": 1,
608+
"urls": ["localhost:8001"]
609+
},
610+
"generation_servers": {
611+
"num_instances": 1,
612+
"urls": ["localhost:8002"]
613+
}
614+
}
615+
with launch_disaggregated_llm(disaggregated_server_config,
616+
ctx_server_config, gen_server_config,
617+
self.MODEL_PATH) as llm:
618+
task = MMLU(self.MODEL_NAME)
619+
task.evaluate(llm)
620+
task = GSM8K(self.MODEL_NAME)
621+
task.evaluate(llm)
622+
603623
@pytest.mark.parametrize("overlap_scheduler", [False, True])
604624
def test_auto_dtype(self, overlap_scheduler):
605625
ctx_server_config = {

tests/integration/test_lists/qa/llm_function_full.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -544,7 +544,7 @@ accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype
544544
accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype_long_rope
545545
accuracy/test_llm_api_pytorch.py::TestPhi4MiniInstruct::test_auto_dtype
546546
accuracy/test_llm_api_pytorch.py::TestEXAONE4::test_auto_dtype
547-
accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_nixl_backend
547+
accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_nixl_backend
548548
accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_nixl_backend
549549

550550
test_e2e.py::test_llama_e2e[use_cpp_session-remove_input_padding-]

tests/integration/test_lists/qa/llm_function_sanity.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_trtl
102102
accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm_eagle3]
103103
accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_fp8_block_scales[latency]
104104
accuracy/test_llm_api_pytorch.py::TestPhi4MiniInstruct::test_auto_dtype
105-
accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_nixl_backend
105+
accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_nixl_backend
106106
accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_nixl_backend
107107
disaggregated/test_disaggregated.py::test_disaggregated_cache_aware_balance[TinyLlama-1.1B-Chat-v1.0]
108108
disaggregated/test_disaggregated.py::test_disaggregated_cuda_graph[TinyLlama-1.1B-Chat-v1.0]

tests/integration/test_lists/test-db/l0_dgx_b200.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,5 +74,5 @@ l0_dgx_b200:
7474
- disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-v3-8b-hf]
7575
- disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-3.1-8b-instruct-hf-fp8]
7676
- disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[DeepSeek-V3-Lite-fp8]
77-
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_nixl_backend
77+
- accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_nixl_backend
7878
- accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_nixl_backend

tests/integration/test_lists/test-db/l0_dgx_h100.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ l0_dgx_h100:
5050
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[GSM8K-gen_tp=2-ctx_pp=2]
5151
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[MMLU-gen_tp=1-ctx_pp=2]
5252
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[MMLU-gen_tp=2-ctx_pp=2]
53-
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_nixl_backend
53+
- accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_nixl_backend
5454
- accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_nixl_backend
5555
- test_e2e.py::test_ptp_quickstart_advanced_bs1
5656
- test_e2e.py::test_ptp_quickstart_advanced_deepseek_v3_lite_4gpus_adp_balance[DeepSeek-V3-Lite-FP8-DeepSeek-V3-Lite/fp8]

0 commit comments

Comments
 (0)