Skip to content

Commit 00f167f

Browse files
committed
Add disagg bs==1 test
Signed-off-by: Yifei Zhang <[email protected]>
1 parent c4dd3c1 commit 00f167f

File tree

2 files changed

+68
-0
lines changed

2 files changed

+68
-0
lines changed
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
2+
hostname: localhost
3+
port: 8000
4+
backend: pytorch
5+
cuda_graph_config: null
6+
context_servers:
7+
num_instances: 1
8+
max_batch_size: 1
9+
max_num_tokens: 3000
10+
max_seq_len: 4096
11+
free_gpu_memory_fraction: 0.85
12+
tensor_parallel_size: 2
13+
moe_expert_parallel_size: 2
14+
enable_attention_dp: true
15+
pipeline_parallel_size: 1
16+
disable_overlap_scheduler: true
17+
kv_cache_config:
18+
enable_block_reuse: false
19+
free_gpu_memory_fraction: 0.85
20+
dtype: fp8
21+
cache_transceiver_config:
22+
backend: default
23+
urls:
24+
- "localhost:8001"
25+
generation_servers:
26+
num_instances: 1
27+
tensor_parallel_size: 2
28+
moe_expert_parallel_size: 2
29+
enable_attention_dp: true
30+
pipeline_parallel_size: 1
31+
max_batch_size: 1
32+
max_num_tokens: 4
33+
max_seq_len: 2251
34+
free_gpu_memory_fraction: 0.7
35+
kv_cache_config:
36+
enable_block_reuse: false
37+
free_gpu_memory_fraction: 0.7
38+
dtype: fp8
39+
moe_config:
40+
backend: CUTLASS
41+
cache_transceiver_config:
42+
backend: default
43+
stream_interval: 20
44+
urls:
45+
- "localhost:8002"

tests/integration/defs/disaggregated/test_disaggregated.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def get_test_config(test_desc, example_dir, test_root):
4444
"gen_only": (2, f"{test_configs_root}/disagg_config_gen_only.yaml"),
4545
"gen_only_trt_backend":
4646
(2, f"{test_configs_root}/disagg_config_gen_only_trt_backend.yaml"),
47+
"genbs1": (4, f"{test_configs_root}/disagg_config_genbs1.yaml"),
4748
"4_ranks": (4, f"{test_configs_root}/disagg_config_ctxtp2_gentp1.yaml"),
4849
"4_ranks_trt_backend":
4950
(4,
@@ -384,6 +385,28 @@ def test_disaggregated_benchmark_gen_only_trt_backend(
384385
cwd=llm_venv.get_working_directory())
385386

386387

388+
@pytest.mark.skip_less_device(4)
389+
@pytest.mark.parametrize("llama_model_root", ['TinyLlama-1.1B-Chat-v1.0'],
390+
indirect=True)
391+
def test_disaggregated_benchmark_genbs1(disaggregated_test_root,
392+
disaggregated_example_root, llm_venv,
393+
llama_model_root):
394+
src_dst_dict = {
395+
llama_model_root:
396+
f"{llm_venv.get_working_directory()}/TinyLlama/TinyLlama-1.1B-Chat-v1.0",
397+
}
398+
for src, dst in src_dst_dict.items():
399+
if not os.path.islink(dst):
400+
os.makedirs(os.path.dirname(dst), exist_ok=True)
401+
os.symlink(src, dst, target_is_directory=True)
402+
403+
run_disaggregated_test(disaggregated_example_root,
404+
"genbs1",
405+
env=llm_venv._new_env,
406+
cwd=llm_venv.get_working_directory(),
407+
prompt_file="long_prompts.json")
408+
409+
387410
@pytest.mark.skip_less_device(2)
388411
@pytest.mark.parametrize("llama_model_root", ['TinyLlama-1.1B-Chat-v1.0'],
389412
indirect=True)

0 commit comments

Comments
 (0)