Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,13 @@ def results_to_json(latency, throughput, serving):
# this result is generated via `benchmark_serving.py`

# attach the benchmarking command to raw_result
with open(test_file.with_suffix(".commands")) as f:
command = json.loads(f.read())
try:
with open(test_file.with_suffix(".commands")) as f:
command = json.loads(f.read())
except OSError as e:
print(e)
continue

raw_result.update(command)

# update the test name of this result
Expand All @@ -99,8 +104,13 @@ def results_to_json(latency, throughput, serving):
# this result is generated via `benchmark_latency.py`

# attach the benchmarking command to raw_result
with open(test_file.with_suffix(".commands")) as f:
command = json.loads(f.read())
try:
with open(test_file.with_suffix(".commands")) as f:
command = json.loads(f.read())
except OSError as e:
print(e)
continue

raw_result.update(command)

# update the test name of this result
Expand All @@ -121,8 +131,13 @@ def results_to_json(latency, throughput, serving):
# this result is generated via `benchmark_throughput.py`

# attach the benchmarking command to raw_result
with open(test_file.with_suffix(".commands")) as f:
command = json.loads(f.read())
try:
with open(test_file.with_suffix(".commands")) as f:
command = json.loads(f.read())
except OSError as e:
print(e)
continue

raw_result.update(command)

# update the test name of this result
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -309,11 +309,14 @@ run_serving_tests() {

new_test_name=$test_name"_qps_"$qps

# pass the tensor parallel size to the client so that it can be displayed
# on the benchmark dashboard
client_command="python3 benchmark_serving.py \
--save-result \
--result-dir $RESULTS_FOLDER \
--result-filename ${new_test_name}.json \
--request-rate $qps \
--metadata "tensor_parallel_size=$tp" \
$client_args"

echo "Running test case $test_name with qps $qps"
Expand Down
2 changes: 1 addition & 1 deletion .buildkite/nightly-benchmarks/tests/throughput-tests.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,4 @@
"backend": "vllm"
}
}
]
]
5 changes: 2 additions & 3 deletions benchmarks/benchmark_latency.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import numpy as np
import torch
from benchmark_utils import convert_to_pytorch_benchmark_format
from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json
from tqdm import tqdm

from vllm import LLM, SamplingParams
Expand All @@ -30,8 +30,7 @@ def save_to_pytorch_benchmark_format(args: argparse.Namespace,
for k in ["avg_latency", "percentiles"]})
if pt_records:
pt_file = f"{os.path.splitext(args.output_json)[0]}.pytorch.json"
with open(pt_file, "w") as f:
json.dump(pt_records, f)
write_to_json(pt_file, pt_records)


def main(args: argparse.Namespace):
Expand Down
5 changes: 2 additions & 3 deletions benchmarks/benchmark_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
except ImportError:
from argparse import ArgumentParser as FlexibleArgumentParser

from benchmark_utils import convert_to_pytorch_benchmark_format
from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json

MILLISECONDS_TO_SECONDS_CONVERSION = 1000

Expand Down Expand Up @@ -841,8 +841,7 @@ def save_to_pytorch_benchmark_format(args: argparse.Namespace,
if pt_records:
# Don't use json suffix here as we don't want CI to pick it up
pt_file = f"{os.path.splitext(file_name)[0]}.pytorch.json"
with open(pt_file, "w") as f:
json.dump(pt_records, f)
write_to_json(pt_file, pt_records)


def main(args: argparse.Namespace):
Expand Down
5 changes: 2 additions & 3 deletions benchmarks/benchmark_throughput.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import torch
import uvloop
from benchmark_utils import convert_to_pytorch_benchmark_format
from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json
from PIL import Image
from tqdm import tqdm
from transformers import (AutoModelForCausalLM, AutoTokenizer,
Expand Down Expand Up @@ -366,8 +366,7 @@ def save_to_pytorch_benchmark_format(args: argparse.Namespace,
if pt_records:
# Don't use json suffix here as we don't want CI to pick it up
pt_file = f"{os.path.splitext(args.output_json)[0]}.pytorch.json"
with open(pt_file, "w") as f:
json.dump(pt_records, f)
write_to_json(pt_file, pt_records)


def main(args: argparse.Namespace):
Expand Down
30 changes: 30 additions & 0 deletions benchmarks/benchmark_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# SPDX-License-Identifier: Apache-2.0

import argparse
import json
import math
import os
from typing import Any, Dict, List

Expand Down Expand Up @@ -34,6 +36,34 @@ def convert_to_pytorch_benchmark_format(args: argparse.Namespace,
"extra_info": extra_info,
},
}

tp = record["benchmark"]["extra_info"]["args"].get(
"tensor_parallel_size")
# Save tensor_parallel_size parameter if it's part of the metadata
if not tp and "tensor_parallel_size" in extra_info:
record["benchmark"]["extra_info"]["args"][
"tensor_parallel_size"] = extra_info["tensor_parallel_size"]

records.append(record)

return records


class InfEncoder(json.JSONEncoder):

def clear_inf(self, o: Any):
if isinstance(o, dict):
return {k: self.clear_inf(v) for k, v in o.items()}
elif isinstance(o, list):
return [self.clear_inf(v) for v in o]
elif isinstance(o, float) and math.isinf(o):
return "inf"
return o

def iterencode(self, o: Any, *args, **kwargs) -> Any:
return super().iterencode(self.clear_inf(o), *args, **kwargs)


def write_to_json(filename: str, records: List) -> None:
with open(filename, "w") as f:
json.dump(records, f, cls=InfEncoder)