diff --git a/torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql b/torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql
index cb04ac8ac7..34e68a971b 100644
--- a/torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql
+++ b/torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql
@@ -23,15 +23,32 @@ WITH benchmarks AS (
tupleElement(o.benchmark, 'extra_info')['arch'],
tupleElement(o.runners[1], 'type')
) AS arch,
- IF(
- tupleElement(o.benchmark, 'extra_info')['compile'] = '',
- 'true', -- Default to true
- tupleElement(o.benchmark, 'extra_info')['compile']
- ) AS use_torch_compile,
DATE_TRUNC(
{granularity: String },
fromUnixTimestamp(o.timestamp)
- ) AS granularity_bucket
+ ) AS granularity_bucket,
+ -- Repo-specific fields
+ map(
+ -- Used by torchao
+ 'use_torch_compile',
+ IF(
+ tupleElement(o.benchmark, 'extra_info')['compile'] = '',
+ 'true',
+ -- Default to true
+ tupleElement(o.benchmark, 'extra_info')['compile']
+ ),
+ -- Used by vLLM
+ 'request_rate',
+ JSONExtractString(
+ tupleElement(o.benchmark, 'extra_info')['args'],
+ 'request_rate'
+ ),
+ 'tensor_parallel_size',
+ JSONExtractString(
+ tupleElement(o.benchmark, 'extra_info')['args'],
+ 'tensor_parallel_size'
+ )
+ ) AS extra
FROM
benchmark.oss_ci_benchmark_v3 o
WHERE
@@ -77,8 +94,8 @@ SELECT DISTINCT
dtype,
device,
arch,
- toBool(use_torch_compile) AS use_torch_compile,
- granularity_bucket
+ granularity_bucket,
+ extra
FROM
benchmarks
WHERE
@@ -101,4 +118,5 @@ ORDER BY
backend,
model,
dtype,
- device
+ device,
+ metric
diff --git a/torchci/components/NavBar.tsx b/torchci/components/NavBar.tsx
index 59674c8547..53849f0ffc 100644
--- a/torchci/components/NavBar.tsx
+++ b/torchci/components/NavBar.tsx
@@ -60,6 +60,10 @@ function NavBar() {
name: "TorchAO LLMs",
href: "/benchmark/llms?repoName=pytorch%2Fao",
},
+ {
+ name: "vLLM v1",
+ href: "/benchmark/llms?repoName=vllm-project%2Fvllm",
+ },
];
const devInfraDropdown = [
diff --git a/torchci/components/benchmark/CommitPanel.tsx b/torchci/components/benchmark/CommitPanel.tsx
index e48da70470..eb431812f7 100644
--- a/torchci/components/benchmark/CommitPanel.tsx
+++ b/torchci/components/benchmark/CommitPanel.tsx
@@ -18,22 +18,37 @@ export function CommitPanel({
}) {
return (
-
- *This report was generated by CI running on {repoName}{" "}
- {lBranchAndCommit.branch} branch at commit{" "}
-
- {lBranchAndCommit.commit.substring(0, SHA_DISPLAY_LENGTH)}
- {" "}
- comparing with {rBranchAndCommit.branch} branch at commit{" "}
-
- {rBranchAndCommit.commit.substring(0, SHA_DISPLAY_LENGTH)}
-
- . {children}
-
+ {repoName !== "vllm-project/vllm" && (
+
+ *This report was generated by CI running on {repoName}{" "}
+ {lBranchAndCommit.branch} branch at commit{" "}
+
+ {lBranchAndCommit.commit.substring(0, SHA_DISPLAY_LENGTH)}
+ {" "}
+ comparing with {rBranchAndCommit.branch} branch at commit{" "}
+
+ {rBranchAndCommit.commit.substring(0, SHA_DISPLAY_LENGTH)}
+
+ . {children}
+
+ )}
+ {repoName === "vllm-project/vllm" && (
+
+ This is vLLM v1 dashboard, please refer to{" "}
+
+ v0 dashboard
+ {" "}
+ for the information about how the benchmark is setup
+
+ )}
);
}
diff --git a/torchci/components/benchmark/llms/ModelGraphPanel.tsx b/torchci/components/benchmark/llms/ModelGraphPanel.tsx
index 1ad3a09f4d..5d226d303c 100644
--- a/torchci/components/benchmark/llms/ModelGraphPanel.tsx
+++ b/torchci/components/benchmark/llms/ModelGraphPanel.tsx
@@ -141,14 +141,44 @@ export function GraphPanel({
const model = record.model;
const dtype = record.dtype;
const device = record.device;
+ const metric = record.metric;
- record.display = model.includes(dtype)
- ? model.includes(device)
- ? model
- : `${model} (${device})`
- : model.includes(device)
- ? `${model} (${dtype})`
- : `${model} (${dtype} / ${device})`;
+ if (repoName === "vllm-project/vllm") {
+ let requestRate = record.extra!["request_rate"];
+ // TODO (huydhn): Fix the invalid JSON on vLLM side
+ if (
+ metric.includes("itl") ||
+ metric.includes("tpot") ||
+ metric.includes("ttft")
+ ) {
+ requestRate = requestRate !== "" ? requestRate : "Inf";
+ }
+
+ let tensorParallel = record.extra!["tensor_parallel_size"];
+ // TODO (huydhn): Fix the passing of tensor_parallel_size to the benchmark
+ // script on vLLM side
+ if (model.includes("8B")) {
+ tensorParallel = tensorParallel !== "" ? tensorParallel : "1";
+ } else if (model.includes("70B")) {
+ tensorParallel = tensorParallel !== "" ? tensorParallel : "4";
+ } else if (model.includes("8x7B")) {
+ tensorParallel = tensorParallel !== "" ? tensorParallel : "2";
+ }
+
+ if (requestRate !== "") {
+ record.display = `${model} / tp${tensorParallel} / qps_${requestRate}`;
+ } else {
+ record.display = `${model} / tp${tensorParallel}`;
+ }
+ } else {
+ record.display = model.includes(dtype)
+ ? model.includes(device)
+ ? model
+ : `${model} (${device})`
+ : model.includes(device)
+ ? `${model} (${dtype})`
+ : `${model} (${dtype} / ${device})`;
+ }
return record;
});
@@ -177,7 +207,7 @@ export function GraphPanel({
.filter((metric) => chartData[metric].length !== 0)
.map((metric: string) => (
@@ -203,7 +233,7 @@ export function GraphPanel({
},
},
}}
- legendPadding={modelName === DEFAULT_MODEL_NAME ? 320 : 200}
+ legendPadding={320}
/>
))}
diff --git a/torchci/components/benchmark/llms/SummaryPanel.tsx b/torchci/components/benchmark/llms/SummaryPanel.tsx
index 2d3de26ad0..e8025cb2dc 100644
--- a/torchci/components/benchmark/llms/SummaryPanel.tsx
+++ b/torchci/components/benchmark/llms/SummaryPanel.tsx
@@ -47,7 +47,7 @@ export function SummaryPanel({
const rCommit = rPerfData.commit;
const rData = rPerfData.data;
- const data = combineLeftAndRight(lPerfData, rPerfData);
+ const data = combineLeftAndRight(repoName, lPerfData, rPerfData);
const columns: any[] = [
{
field: "metadata",
@@ -120,6 +120,26 @@ export function SummaryPanel({
});
}
+ if (repoName === "vllm-project/vllm") {
+ columns.push({
+ field: "tensor_parallel_size",
+ headerName: "Tensor parallel",
+ flex: 1,
+ renderCell: (params: GridRenderCellParams) => {
+ return `${params.value}`;
+ },
+ });
+
+ columns.push({
+ field: "request_rate",
+ headerName: "Request rate",
+ flex: 1,
+ renderCell: (params: GridRenderCellParams) => {
+ return `${params.value}`;
+ },
+ });
+ }
+
columns.push(
...[
{
diff --git a/torchci/components/benchmark/llms/common.tsx b/torchci/components/benchmark/llms/common.tsx
index 6454cbe04a..d8c0f02e8d 100644
--- a/torchci/components/benchmark/llms/common.tsx
+++ b/torchci/components/benchmark/llms/common.tsx
@@ -5,8 +5,17 @@ export const REPO_TO_BENCHMARKS: { [k: string]: string[] } = {
"pytorch/pytorch": ["PyTorch gpt-fast benchmark"],
"pytorch/executorch": ["ExecuTorch"],
"pytorch/ao": ["TorchAO benchmark"],
+ "vllm-project/vllm": ["vLLM benchmark"],
};
-export const EXCLUDED_METRICS: string[] = ["load_status"];
+export const EXCLUDED_METRICS: string[] = [
+ "load_status",
+ "mean_itl_ms",
+ "mean_tpot_ms",
+ "mean_ttft_ms",
+ "std_itl_ms",
+ "std_tpot_ms",
+ "std_ttft_ms",
+];
export const DEFAULT_MODEL_NAME = "All Models";
export const SCALE = 2;
export const METRIC_DISPLAY_HEADERS: { [k: string]: string } = {
@@ -17,6 +26,15 @@ export const METRIC_DISPLAY_HEADERS: { [k: string]: string } = {
compile_vs_eager_speedup: "Compile vs eager speedup",
autoquant_vs_compile_speedup: "Autoquant vs compile speedup",
eager_speedup: "Eager speedup",
+ latency: "Latency (s)",
+ median_itl_ms: "Median ITL (ms)",
+ median_tpot_ms: "Median TPOT (ms)",
+ median_ttft_ms: "Median TTFT (ms)",
+ p99_itl_ms: "p99 ITL (ms)",
+ p99_tpot_ms: "p99 TPOT (ms)",
+ p99_ttft_ms: "p99 TTFT (ms)",
+ requests_per_second: "Requests/s",
+ tokens_per_second: "Tokens/s",
};
// The variable name is a bit dumb, but it tells if a higher metric value
// is good or bad so that we can highlight it on the dashboard accordingly.
@@ -32,6 +50,15 @@ export const IS_INCREASING_METRIC_VALUE_GOOD: { [k: string]: boolean } = {
"peak_inference_mem_usage(mb)": false,
"peak_load_mem_usuage(mb)": false,
"generate_time(ms)": false,
+ latency: false,
+ median_itl_ms: false,
+ median_tpot_ms: false,
+ median_ttft_ms: false,
+ p99_itl_ms: false,
+ p99_tpot_ms: false,
+ p99_ttft_ms: false,
+ requests_per_second: true,
+ tokens_per_second: true,
};
export const METRIC_DISPLAY_SHORT_HEADERS: { [k: string]: string } = {
"memory_bandwidth(GB/s)": "Bandwidth",
@@ -71,7 +98,7 @@ export interface LLMsBenchmarkData {
device: string;
arch: string;
display?: string;
- use_torch_compile?: boolean;
+ extra?: { [key: string]: string };
}
export interface BranchAndCommitPerfData extends BranchAndCommit {
diff --git a/torchci/lib/benchmark/aoUtils.ts b/torchci/lib/benchmark/aoUtils.ts
index 62e9eb5657..449627478c 100644
--- a/torchci/lib/benchmark/aoUtils.ts
+++ b/torchci/lib/benchmark/aoUtils.ts
@@ -84,10 +84,8 @@ export function computeSpeedup(
const currentCommitBaseline: { [key: string]: LLMsBenchmarkData } = {};
data.forEach((r: LLMsBenchmarkData) => {
- if (
- r.dtype !== TORCHAO_BASELINE ||
- r.use_torch_compile !== useTorchCompile
- ) {
+ const compile = r.extra?.use_torch_compile === "true";
+ if (r.dtype !== TORCHAO_BASELINE || compile !== useTorchCompile) {
return;
}
@@ -112,8 +110,9 @@ export function computeSpeedup(
data.forEach((r: LLMsBenchmarkData) => {
withSpeedup.push(r);
+ const compile = r.extra?.use_torch_compile === "true";
// Compute eager speedup vs the base commit baseline
- if (r.dtype === TORCHAO_BASELINE && r.use_torch_compile === false) {
+ if (r.dtype === TORCHAO_BASELINE && compile === false) {
if (SPEEDUP_METRICS.includes(r.metric)) {
const k = `${r.model} ${r.metric} ${r.device} ${r.arch}`;
if (
diff --git a/torchci/lib/benchmark/llmUtils.ts b/torchci/lib/benchmark/llmUtils.ts
index 2bce8d7eb3..92621890d7 100644
--- a/torchci/lib/benchmark/llmUtils.ts
+++ b/torchci/lib/benchmark/llmUtils.ts
@@ -11,8 +11,7 @@ export function useBenchmark(
queryParams: { [key: string]: any },
branchAndCommit: BranchAndCommit
) {
- const queryCollection = "benchmarks";
- const queryName = "oss_ci_benchmark_llms";
+ const queryName: string = "oss_ci_benchmark_llms";
const queryParamsWithBranchAndCommit: { [key: string]: any } = queryParams;
(queryParamsWithBranchAndCommit as { [key: string]: any })["branches"] =
@@ -30,6 +29,7 @@ export function useBenchmark(
}
export function combineLeftAndRight(
+ repoName: string,
lPerfData: BranchAndCommitPerfData,
rPerfData: BranchAndCommitPerfData
): { [k: string]: any }[] {
@@ -49,9 +49,10 @@ export function combineLeftAndRight(
const dtype = record.dtype;
const device = record.device;
const arch = record.arch;
+ const extra = JSON.stringify(record.extra);
const metric = record.metric;
- const key = `${model};${backend};${dtype};${device};${arch}`;
+ const key = `${model};${backend};${dtype};${device};${arch};${extra}`;
if (!(key in dataGroupedByModel)) {
dataGroupedByModel[key] = {};
}
@@ -73,9 +74,10 @@ export function combineLeftAndRight(
const dtype = record.dtype;
const device = record.device;
const arch = record.arch;
+ const extra = JSON.stringify(record.extra);
const metric = record.metric;
- const key = `${model};${backend};${dtype};${device};${arch}`;
+ const key = `${model};${backend};${dtype};${device};${arch};${extra}`;
if (!(key in dataGroupedByModel)) {
dataGroupedByModel[key] = {};
}
@@ -97,7 +99,7 @@ export function combineLeftAndRight(
const validBackends = new Set();
// First round to get all the valid devices
Object.keys(dataGroupedByModel).forEach((key: string) => {
- const [model, backend, dtype, device, arch] = key.split(";");
+ const [model, backend, dtype, device, arch, extra] = key.split(";");
const row: { [k: string]: any } = {
// Keep the name as as the row ID as DataGrid requires it
name: `${model} ${backend} (${dtype} / ${device} / ${arch})`,
@@ -118,10 +120,10 @@ export function combineLeftAndRight(
// Transform the data into a displayable format
const data: { [k: string]: any }[] = [];
Object.keys(dataGroupedByModel).forEach((key: string) => {
- const [model, backend, dtype, device, arch] = key.split(";");
+ const [model, backend, dtype, device, arch, extra] = key.split(";");
const row: { [k: string]: any } = {
// Keep the name as as the row ID as DataGrid requires it
- name: `${model} ${backend} (${dtype} / ${device} / ${arch})`,
+ name: `${model} ${backend} (${dtype} / ${device} / ${arch} / ${extra})`,
};
for (const metric in dataGroupedByModel[key]) {
@@ -173,6 +175,44 @@ export function combineLeftAndRight(
arch: arch,
};
+ if (repoName === "vllm-project/vllm") {
+ // These fields are only available on vLLM benchmark
+ const extraInfo = JSON.parse(extra);
+ // TODO (huydhn): Fix the invalid JSON on vLLM side
+ if (
+ metric.includes("itl") ||
+ metric.includes("tpot") ||
+ metric.includes("ttft")
+ ) {
+ extraInfo["request_rate"] =
+ extraInfo["request_rate"] !== ""
+ ? extraInfo["request_rate"]
+ : "Inf";
+ }
+ // TODO (huydhn): Fix the passing of tensor_parallel_size to the benchmark
+ // script on vLLM side
+ if (model.includes("8B")) {
+ extraInfo["tensor_parallel_size"] =
+ extraInfo["tensor_parallel_size"] !== ""
+ ? extraInfo["tensor_parallel_size"]
+ : 1;
+ } else if (model.includes("70B")) {
+ extraInfo["tensor_parallel_size"] =
+ extraInfo["tensor_parallel_size"] !== ""
+ ? extraInfo["tensor_parallel_size"]
+ : 4;
+ } else if (model.includes("8x7B")) {
+ extraInfo["tensor_parallel_size"] =
+ extraInfo["tensor_parallel_size"] !== ""
+ ? extraInfo["tensor_parallel_size"]
+ : 2;
+ }
+
+ row["extra"] = extraInfo;
+ row["tensor_parallel_size"] = extraInfo["tensor_parallel_size"];
+ row["request_rate"] = extraInfo["request_rate"];
+ }
+
row[metric] = {
l: hasL
? {
diff --git a/torchci/pages/benchmark/llms.tsx b/torchci/pages/benchmark/llms.tsx
index c25f0d7f44..62f12cb116 100644
--- a/torchci/pages/benchmark/llms.tsx
+++ b/torchci/pages/benchmark/llms.tsx
@@ -126,7 +126,7 @@ function Report({
? lDataWithSpeedup[0].granularity_bucket
: undefined,
}}
- workflowName={"inductor-micro-benchmark"}
+ workflowName={""}
>
<>>