From 9d6881867e749d3944057ab028cd8ed29fb44d0a Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Wed, 16 Jul 2025 14:41:16 -0400 Subject: [PATCH 1/7] Implement initial multiturn support --- src/guidellm/request/loader.py | 9 ++++++- src/guidellm/request/session.py | 42 ++++++++++++++++++++++++++------- 2 files changed, 42 insertions(+), 9 deletions(-) diff --git a/src/guidellm/request/loader.py b/src/guidellm/request/loader.py index 452e4733..04471cc9 100644 --- a/src/guidellm/request/loader.py +++ b/src/guidellm/request/loader.py @@ -107,13 +107,20 @@ def __init__( self._preserved_iter = None def __iter__(self) -> Iterator[GenerativeRequestSession]: + turns = 1 + + data_iter = self._create_requests() + while requests := [i for i, _ in zip(data_iter, range(turns))]: + yield GenerativeRequestSession(requests) + + def _create_requests(self) -> Iterator[GenerationRequest]: scope_create_count = 0 while (dataset_iter := self._get_dataset_iter(scope_create_count)) is not None: scope_create_count += 1 for item in dataset_iter: - yield GenerativeRequestSession(self._create_request(item)) + yield self._create_request(item) self._preserved_iter = None diff --git a/src/guidellm/request/session.py b/src/guidellm/request/session.py index 9e00b37d..2fd9bfbd 100644 --- a/src/guidellm/request/session.py +++ b/src/guidellm/request/session.py @@ -1,3 +1,4 @@ +import itertools from abc import ABC, abstractmethod from typing import Generic, TypeVar @@ -34,22 +35,47 @@ def complete(self) -> bool: ... class GenerativeRequestSession(RequestSession[GenerationRequest, ResponseSummary]): - def __init__(self, request: GenerationRequest) -> None: - self.request = request - self._complete = False + def __init__(self, prompts: list[GenerationRequest]) -> None: + if not prompts: + raise ValueError("Prompts cannot be empty") + + self.prompts = prompts + self.responses: list[str] = [] def __len__(self) -> int: - return 1 + return len(self.prompts) def get_next_request(self) -> GenerationRequest: - return self.request + completed_responses = len(self.responses) + base_request = self.prompts[completed_responses].model_copy(deep=True) + base_request.content = "".join( + itertools.chain.from_iterable( + zip((x.content for x in self.prompts), self.responses + [""]) + ) + ) + base_request.stats["prompt_tokens"] = sum( + x.stats["prompt_tokens"] for x in self.prompts[: completed_responses + 1] + ) + base_request.constraints["output_tokens"] = sum( + x.constraints["output_tokens"] + for x in self.prompts[: completed_responses + 1] + ) + + return base_request def get_next_delay(self) -> float: return 0.0 - def push_response(self, response: ResponseSummary) -> None: # noqa: ARG002 - self._complete = True + def push_response(self, response: ResponseSummary) -> None: + if len(self.responses) < len(self.prompts): + if response.response_output_tokens is not None: + self.prompts[len(self.responses)].constraints["output_tokens"] = ( + response.response_output_tokens + ) + self.responses.append(response.value) + else: + raise ValueError("Response list full") @property def complete(self) -> bool: - return self._complete + return len(self.responses) >= len(self.prompts) From a4761b79d16a9f1a208c7dcc22ee9f3330a8634a Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Thu, 17 Jul 2025 17:12:21 -0400 Subject: [PATCH 2/7] Implement item type --- src/guidellm/preprocess/item.py | 60 +++++++++++++++++++++++++++++++ src/guidellm/request/loader.py | 38 ++++++-------------- src/guidellm/request/session.py | 63 +++++++++++++++++++++------------ 3 files changed, 111 insertions(+), 50 deletions(-) create mode 100644 src/guidellm/preprocess/item.py diff --git a/src/guidellm/preprocess/item.py b/src/guidellm/preprocess/item.py new file mode 100644 index 00000000..7a4fb3e3 --- /dev/null +++ b/src/guidellm/preprocess/item.py @@ -0,0 +1,60 @@ +from collections.abc import Sequence +from typing import Generic, Optional, TypeVar, Union + +from pydantic import Field + +from guidellm.objects.pydantic import StandardBaseModel + +PromptT = TypeVar("PromptT") + + +class Item(StandardBaseModel, Generic[PromptT]): + """ + Represents a single item in a dataset, containing a prompt and its associated metadata. + """ + + value: PromptT = Field( + description="The prompt text or data for the item.", + examples=[ + "What is the capital of France?", + "Explain quantum computing in simple terms.", + ], + ) + prompt_tokens: Optional[int] = Field( + default=None, gt=0, description="Number of tokens in the prompt" + ) + output_tokens: Optional[int] = Field( + default=None, gt=0, description="Number of tokens in the output" + ) + + +class ItemList(Sequence[Item[PromptT]]): + """ + Represents a list of items, each containing a prompt and its metadata. + """ + + def __init__(self, *items: Item[PromptT], shared_prefix: Optional[PromptT] = None): + self.shared_prefix: Optional[PromptT] = shared_prefix + self._items: list[Item[PromptT]] = list(items) + + def __getitem__(self, key) -> Union[Item[PromptT], Sequence[Item[PromptT]]]: + return self._items[key] + + def __len__(self) -> int: + return len(self._items) + + @classmethod + def from_lists( + cls, + prompts: list[PromptT], + prompts_tokens: list[Optional[int]], + outputs_tokens: list[Optional[int]], + ) -> "ItemList": + return cls( + *[ + Item(value=prompt, output_tokens=in_t, prompt_tokens=out_t) + for prompt, in_t, out_t in zip( + prompts, prompts_tokens, outputs_tokens, strict=True + ) + ] + ) diff --git a/src/guidellm/request/loader.py b/src/guidellm/request/loader.py index 04471cc9..4ed781f5 100644 --- a/src/guidellm/request/loader.py +++ b/src/guidellm/request/loader.py @@ -11,10 +11,9 @@ from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict from transformers import PreTrainedTokenizerBase # type: ignore[import] -from guidellm.config import settings from guidellm.dataset import ColumnInputTypes, load_dataset from guidellm.objects import StandardBaseModel -from guidellm.request.request import GenerationRequest +from guidellm.preprocess.item import ItemList from guidellm.request.session import GenerativeRequestSession __all__ = [ @@ -107,20 +106,13 @@ def __init__( self._preserved_iter = None def __iter__(self) -> Iterator[GenerativeRequestSession]: - turns = 1 - - data_iter = self._create_requests() - while requests := [i for i, _ in zip(data_iter, range(turns))]: - yield GenerativeRequestSession(requests) - - def _create_requests(self) -> Iterator[GenerationRequest]: scope_create_count = 0 while (dataset_iter := self._get_dataset_iter(scope_create_count)) is not None: scope_create_count += 1 for item in dataset_iter: - yield self._create_request(item) + yield GenerativeRequestSession(self._create_items(item)) self._preserved_iter = None @@ -268,25 +260,17 @@ def _get_dataset_iter( return dataset_iter - def _create_request(self, item: dict[str, Any]) -> GenerationRequest: - prompt_tokens = ( - item[self.column_mappings["prompt_tokens_count_column"]] + def _create_items(self, item: dict[str, Any]) -> ItemList: + prompts = list(item[self.column_mappings["prompt_column"]]) + prompt_tokens: list[Optional[int]] = ( + list(item[self.column_mappings["prompt_tokens_count_column"]]) if "prompt_tokens_count_column" in self.column_mappings - else None + else [None] ) - output_tokens = ( - item[self.column_mappings["output_tokens_count_column"]] + output_tokens: list[Optional[int]] = ( + list(item[self.column_mappings["output_tokens_count_column"]]) if "output_tokens_count_column" in self.column_mappings - else None + else [None] ) - return GenerationRequest( - request_type=settings.preferred_route, - content=item[self.column_mappings["prompt_column"]], - stats=( - {"prompt_tokens": prompt_tokens} if prompt_tokens is not None else {} - ), - constraints=( - {"output_tokens": output_tokens} if output_tokens is not None else {} - ), - ) + return ItemList.from_lists(prompts, prompt_tokens, output_tokens) diff --git a/src/guidellm/request/session.py b/src/guidellm/request/session.py index 2fd9bfbd..c65a6cf9 100644 --- a/src/guidellm/request/session.py +++ b/src/guidellm/request/session.py @@ -1,15 +1,16 @@ import itertools from abc import ABC, abstractmethod -from typing import Generic, TypeVar +from collections.abc import Sequence +from typing import Generic from guidellm.backend.response import ResponseSummary +from guidellm.config import settings +from guidellm.preprocess.item import Item, ItemList from guidellm.request.request import GenerationRequest +from guidellm.request.types import RequestT, ResponseT __all__ = ["GenerativeRequestSession", "RequestSession"] -RequestT = TypeVar("RequestT") -ResponseT = TypeVar("ResponseT") - class RequestSession(ABC, Generic[RequestT, ResponseT]): """ @@ -35,44 +36,60 @@ def complete(self) -> bool: ... class GenerativeRequestSession(RequestSession[GenerationRequest, ResponseSummary]): - def __init__(self, prompts: list[GenerationRequest]) -> None: - if not prompts: + def __init__(self, items: ItemList) -> None: + if len(items) < 1: raise ValueError("Prompts cannot be empty") - self.prompts = prompts - self.responses: list[str] = [] + self.prompts: Sequence[Item] = items + self.responses: list[Item] = [] def __len__(self) -> int: return len(self.prompts) def get_next_request(self) -> GenerationRequest: completed_responses = len(self.responses) - base_request = self.prompts[completed_responses].model_copy(deep=True) - base_request.content = "".join( + + # FIXME: Can only handle string requests + content = "".join( itertools.chain.from_iterable( - zip((x.content for x in self.prompts), self.responses + [""]) + (x.value, y.value) + for x, y in zip(self.prompts, self.responses + [Item(value="")]) ) ) - base_request.stats["prompt_tokens"] = sum( - x.stats["prompt_tokens"] for x in self.prompts[: completed_responses + 1] + + prev_prompt_tokens = sum( + (x.prompt_tokens or 0) + (x.output_tokens or 0) for x in self.responses ) - base_request.constraints["output_tokens"] = sum( - x.constraints["output_tokens"] - for x in self.prompts[: completed_responses + 1] + prompt_tokens = ( + self.prompts[completed_responses].prompt_tokens or 0 + ) + prev_prompt_tokens + + output_tokens = self.prompts[completed_responses].output_tokens + + return GenerationRequest( + request_type=settings.preferred_route, + content=content, + stats=( + {"prompt_tokens": prompt_tokens} if prompt_tokens is not None else {} + ), + constraints=( + {"output_tokens": output_tokens} if output_tokens is not None else {} + ), ) - return base_request - def get_next_delay(self) -> float: return 0.0 def push_response(self, response: ResponseSummary) -> None: if len(self.responses) < len(self.prompts): - if response.response_output_tokens is not None: - self.prompts[len(self.responses)].constraints["output_tokens"] = ( - response.response_output_tokens - ) - self.responses.append(response.value) + resp = Item( + value=response.value, + prompt_tokens=response.response_prompt_tokens + or response.request_prompt_tokens, + output_tokens=response.response_output_tokens + or response.request_output_tokens, + ) + self.responses.append(resp) else: raise ValueError("Response list full") From 42ad49a6872b4bb0e1886ff77f2ddc0c8eeffb5b Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Fri, 18 Jul 2025 12:03:33 -0400 Subject: [PATCH 3/7] If prompt/output token count is 0, don't set stats/constraints --- src/guidellm/request/session.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/guidellm/request/session.py b/src/guidellm/request/session.py index c65a6cf9..5e28e35c 100644 --- a/src/guidellm/request/session.py +++ b/src/guidellm/request/session.py @@ -1,7 +1,9 @@ import itertools from abc import ABC, abstractmethod -from collections.abc import Sequence -from typing import Generic +from typing import TYPE_CHECKING, Generic + +if TYPE_CHECKING: + from collections.abc import Sequence from guidellm.backend.response import ResponseSummary from guidellm.config import settings @@ -69,12 +71,8 @@ def get_next_request(self) -> GenerationRequest: return GenerationRequest( request_type=settings.preferred_route, content=content, - stats=( - {"prompt_tokens": prompt_tokens} if prompt_tokens is not None else {} - ), - constraints=( - {"output_tokens": output_tokens} if output_tokens is not None else {} - ), + stats=({"prompt_tokens": prompt_tokens} if prompt_tokens else {}), + constraints=({"output_tokens": output_tokens} if output_tokens else {}), ) def get_next_delay(self) -> float: From 1a3df22cfed00b97b38b92853065ae03d304b799 Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Fri, 18 Jul 2025 12:08:33 -0400 Subject: [PATCH 4/7] Fix hand in item creation code --- src/guidellm/preprocess/item.py | 16 ---------------- src/guidellm/request/loader.py | 26 +++++++++++++++++--------- 2 files changed, 17 insertions(+), 25 deletions(-) diff --git a/src/guidellm/preprocess/item.py b/src/guidellm/preprocess/item.py index 7a4fb3e3..9539e63c 100644 --- a/src/guidellm/preprocess/item.py +++ b/src/guidellm/preprocess/item.py @@ -42,19 +42,3 @@ def __getitem__(self, key) -> Union[Item[PromptT], Sequence[Item[PromptT]]]: def __len__(self) -> int: return len(self._items) - - @classmethod - def from_lists( - cls, - prompts: list[PromptT], - prompts_tokens: list[Optional[int]], - outputs_tokens: list[Optional[int]], - ) -> "ItemList": - return cls( - *[ - Item(value=prompt, output_tokens=in_t, prompt_tokens=out_t) - for prompt, in_t, out_t in zip( - prompts, prompts_tokens, outputs_tokens, strict=True - ) - ] - ) diff --git a/src/guidellm/request/loader.py b/src/guidellm/request/loader.py index 4ed781f5..082b8697 100644 --- a/src/guidellm/request/loader.py +++ b/src/guidellm/request/loader.py @@ -13,7 +13,7 @@ from guidellm.dataset import ColumnInputTypes, load_dataset from guidellm.objects import StandardBaseModel -from guidellm.preprocess.item import ItemList +from guidellm.preprocess.item import Item, ItemList from guidellm.request.session import GenerativeRequestSession __all__ = [ @@ -261,16 +261,24 @@ def _get_dataset_iter( return dataset_iter def _create_items(self, item: dict[str, Any]) -> ItemList: - prompts = list(item[self.column_mappings["prompt_column"]]) - prompt_tokens: list[Optional[int]] = ( - list(item[self.column_mappings["prompt_tokens_count_column"]]) + prompts = item[self.column_mappings["prompt_column"]] + prompt_tokens = ( + item[self.column_mappings["prompt_tokens_count_column"]] if "prompt_tokens_count_column" in self.column_mappings - else [None] + else None ) - output_tokens: list[Optional[int]] = ( - list(item[self.column_mappings["output_tokens_count_column"]]) + output_tokens = ( + item[self.column_mappings["output_tokens_count_column"]] if "output_tokens_count_column" in self.column_mappings - else [None] + else None ) - return ItemList.from_lists(prompts, prompt_tokens, output_tokens) + items = ( + Item(value=prompt, output_tokens=out_t, prompt_tokens=in_t) + for prompt, in_t, out_t in zip( + prompts if isinstance(prompts, list) else [prompts], + prompt_tokens if isinstance(prompt_tokens, list) else [prompt_tokens], + output_tokens if isinstance(output_tokens, list) else [output_tokens], + ) + ) + return ItemList(*items) From ec31fa181d6f7bde76738a7eded4a521163941fe Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Fri, 18 Jul 2025 12:59:22 -0400 Subject: [PATCH 5/7] Fix ItemList typing --- src/guidellm/preprocess/item.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/guidellm/preprocess/item.py b/src/guidellm/preprocess/item.py index 9539e63c..91801de8 100644 --- a/src/guidellm/preprocess/item.py +++ b/src/guidellm/preprocess/item.py @@ -1,5 +1,5 @@ from collections.abc import Sequence -from typing import Generic, Optional, TypeVar, Union +from typing import Generic, Optional, TypeVar from pydantic import Field @@ -10,7 +10,8 @@ class Item(StandardBaseModel, Generic[PromptT]): """ - Represents a single item in a dataset, containing a prompt and its associated metadata. + Represents a single item in a dataset, + containing a prompt and its associated metadata. """ value: PromptT = Field( @@ -33,11 +34,13 @@ class ItemList(Sequence[Item[PromptT]]): Represents a list of items, each containing a prompt and its metadata. """ + shared_prefix: Optional[PromptT] + def __init__(self, *items: Item[PromptT], shared_prefix: Optional[PromptT] = None): - self.shared_prefix: Optional[PromptT] = shared_prefix - self._items: list[Item[PromptT]] = list(items) + self.shared_prefix = shared_prefix + self._items = list(items) - def __getitem__(self, key) -> Union[Item[PromptT], Sequence[Item[PromptT]]]: + def __getitem__(self, key): return self._items[key] def __len__(self) -> int: From f142b047763e0cbfb6dd8cc1a91a60290b2762b3 Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Fri, 18 Jul 2025 13:17:05 -0400 Subject: [PATCH 6/7] Add turns support to synthetic dataset --- src/guidellm/dataset/synthetic.py | 77 +++++++++++++++++++++---------- 1 file changed, 52 insertions(+), 25 deletions(-) diff --git a/src/guidellm/dataset/synthetic.py b/src/guidellm/dataset/synthetic.py index 9868ab52..d7839718 100644 --- a/src/guidellm/dataset/synthetic.py +++ b/src/guidellm/dataset/synthetic.py @@ -2,7 +2,7 @@ import random from collections.abc import Iterable, Iterator from pathlib import Path -from typing import Any, Literal, Optional, Union +from typing import Any, Optional, TypedDict, Union import yaml from datasets import ( @@ -63,6 +63,26 @@ class SyntheticDatasetConfig(BaseModel): gt=0, default=None, ) + turns: int = Field( + description="The number of turns in the conversation.", + gt=0, + default=1, + ) + turns_stdev: Optional[int] = Field( + description="The standard deviation of the number of turns.", + gt=0, + default=None, + ) + turns_min: Optional[int] = Field( + description="The minimum number of turns in the conversation.", + gt=0, + default=None, + ) + turns_max: Optional[int] = Field( + description="The maximum number of turns in the conversation.", + gt=0, + default=None, + ) samples: int = Field( description="The number of samples to generate for the dataset.", gt=0, @@ -118,14 +138,13 @@ def parse_config_file(data: Union[str, Path]) -> "SyntheticDatasetConfig": return SyntheticDatasetConfig(**config_dict) -class SyntheticTextItemsGenerator( - Iterable[ - dict[ - Literal["prompt", "prompt_tokens_count", "output_tokens_count"], - Union[str, int], - ] - ] -): +class SyntheticDatasetRow(TypedDict): + prompt: list[str] + prompt_tokens_count: list[int] + output_tokens_count: list[int] + + +class SyntheticTextItemsGenerator(Iterable[SyntheticDatasetRow]): def __init__( self, config: SyntheticDatasetConfig, @@ -141,12 +160,7 @@ def __init__( def __iter__( self, - ) -> Iterator[ - dict[ - Literal["prompt", "prompt_tokens_count", "output_tokens_count"], - Union[str, int], - ] - ]: + ) -> Iterator[SyntheticDatasetRow]: prompt_tokens_sampler = IntegerRangeSampler( average=self.config.prompt_tokens, variance=self.config.prompt_tokens_stdev, @@ -161,20 +175,33 @@ def __iter__( max_value=self.config.output_tokens_max, random_seed=self.random_seed + 1, # ensure diff dist from prompts ) + turns_sampler = IntegerRangeSampler( + average=self.config.turns, + variance=self.config.turns_stdev, + min_value=self.config.turns_min, + max_value=self.config.turns_max, + random_seed=self.random_seed + 7, # ensure diff dist + ) # ensure diff distribution from output tokens rand = random.Random(self.random_seed + 2) # noqa: S311 - for _, prompt_tokens, output_tokens in zip( - range(self.config.samples), - prompt_tokens_sampler, - output_tokens_sampler, - ): - start_index = rand.randint(0, len(self.text_creator.words)) - yield { - "prompt": self._create_prompt(prompt_tokens, start_index), - "prompt_tokens_count": prompt_tokens, - "output_tokens_count": output_tokens, + for _, turns in zip(range(self.config.samples), turns_sampler): + row: SyntheticDatasetRow = { + "prompt": [], + "prompt_tokens_count": [], + "output_tokens_count": [], } + for _, prompt_tokens, output_tokens in zip( + range(turns), + prompt_tokens_sampler, + output_tokens_sampler, + ): + start_index = rand.randint(0, len(self.text_creator.words)) + row["prompt"].append(self._create_prompt(prompt_tokens, start_index)) + row["prompt_tokens_count"].append(prompt_tokens) + row["output_tokens_count"].append(output_tokens) + + yield row def _create_prompt(self, prompt_tokens: int, start_index: int) -> str: if prompt_tokens <= 0: From 465884df6ece90287db64675a31d29641fb1cf39 Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Wed, 23 Jul 2025 13:40:05 -0400 Subject: [PATCH 7/7] Add multi-turn documentation to readme --- README.md | 7 ++++++- src/guidellm/benchmark/entrypoints.py | 8 ++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index b1abc75f..0f42e0e5 100644 --- a/README.md +++ b/README.md @@ -145,7 +145,8 @@ The `guidellm benchmark` command is used to run benchmarks against a generative - `prompt_tokens`: Average number of tokens for prompts. - `output_tokens`: Average number of tokens for outputs. - - `TYPE_stdev`, `TYPE_min`, `TYPE_max`: Standard deviation, minimum, and maximum values for the specified type (e.g., `prompt_tokens`, `output_tokens`). If not provided, will use the provided tokens value only. + - `turns`: Average number of request-response pairs per sample. Values above `1` result in a multi-turn[^1] benchmark. + - `TYPE_stdev`, `TYPE_min`, `TYPE_max`: Standard deviation, minimum, and maximum values for the specified type (e.g., `prompt_tokens`, `output_tokens`, `turns`). If not provided, will use the provided tokens value only. - `samples`: Number of samples to generate, defaults to 1000. - `source`: Source text data for generation, defaults to a local copy of Pride and Prejudice. @@ -261,3 +262,7 @@ If you find GuideLLM helpful in your research or projects, please consider citin howpublished={\url{https://github.com/vllm-project/guidellm}}, } ``` + +- - - + +[^1]: Multi-turn refers to a benchmark where each dataset row represents a series of sequential requests, with each subsequent request building upon the context of the previous ones. diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py index 2ef85c3e..31f936af 100644 --- a/src/guidellm/benchmark/entrypoints.py +++ b/src/guidellm/benchmark/entrypoints.py @@ -90,11 +90,11 @@ async def benchmark_generative_text( ), random_seed=random_seed, ) - unique_requests = request_loader.num_unique_items(raise_err=False) + unique_samples = request_loader.num_unique_items(raise_err=False) console.print_line( - f"Created loader with {unique_requests} unique requests from {data}.\n\n" - if unique_requests > 0 - else f"Created loader with unknown number unique requests from {data}.\n\n" + f"Created loader with {unique_samples} unique samples from {data}.\n\n" + if unique_samples > 0 + else f"Created loader with unknown number unique samples from {data}.\n\n" ) profile = create_profile(rate_type=rate_type, rate=rate)