|
4 | 4 | from tensorrt_llm.sampling_params import SamplingParams
|
5 | 5 |
|
6 | 6 | # isort: off
|
7 |
| -from .test_llm import ( |
8 |
| - get_model_path, global_kvcache_config, llama_model_path, |
9 |
| - llm_get_stats_async_test_harness, llm_get_stats_test_harness, prompts, |
10 |
| - run_llm_abort_request, run_llm_with_postprocess_parallel_and_result_handler, |
11 |
| - tinyllama_logits_processor_test_harness, _test_llm_capture_request_error) |
| 7 | +from .test_llm import (get_model_path, global_kvcache_config, llama_model_path, |
| 8 | + llm_get_stats_async_test_harness, |
| 9 | + llm_get_stats_test_harness, prompts, |
| 10 | + run_llm_abort_request, |
| 11 | + run_llm_with_postprocess_parallel_and_result_handler, |
| 12 | + tinyllama_logits_processor_test_harness) |
12 | 13 | from utils.util import force_ampere, similar, skip_gpu_memory_less_than_40gb, skip_gpu_memory_less_than_80gb, skip_gpu_memory_less_than_138gb
|
13 | 14 | from utils.llm_data import llm_models_root
|
14 | 15 | from tensorrt_llm.lora_manager import LoraConfig
|
@@ -63,10 +64,6 @@ def test_llm_get_stats_async(return_context_logits, use_overlap,
|
63 | 64 | enable_iter_req_stats=enable_iter_req_stats)
|
64 | 65 |
|
65 | 66 |
|
66 |
| -def test_llm_capture_request_error(): |
67 |
| - _test_llm_capture_request_error(pytorch_backend=True, tp_size=1) |
68 |
| - |
69 |
| - |
70 | 67 | @force_ampere
|
71 | 68 | @pytest.mark.parametrize(
|
72 | 69 | "sampling_params",
|
|
0 commit comments