Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 27 additions & 9 deletions torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,32 @@ WITH benchmarks AS (
tupleElement(o.benchmark, 'extra_info')['arch'],
tupleElement(o.runners[1], 'type')
) AS arch,
IF(
tupleElement(o.benchmark, 'extra_info')['compile'] = '',
'true', -- Default to true
tupleElement(o.benchmark, 'extra_info')['compile']
) AS use_torch_compile,
DATE_TRUNC(
{granularity: String },
fromUnixTimestamp(o.timestamp)
) AS granularity_bucket
) AS granularity_bucket,
-- Repo-specific fields
map(
-- Used by torchao
'use_torch_compile',
IF(
tupleElement(o.benchmark, 'extra_info')['compile'] = '',
'true',
-- Default to true
tupleElement(o.benchmark, 'extra_info')['compile']
),
-- Used by vLLM
'request_rate',
JSONExtractString(
tupleElement(o.benchmark, 'extra_info')['args'],
'request_rate'
),
'tensor_parallel_size',
JSONExtractString(
tupleElement(o.benchmark, 'extra_info')['args'],
'tensor_parallel_size'
)
) AS extra
FROM
benchmark.oss_ci_benchmark_v3 o
WHERE
Expand Down Expand Up @@ -77,8 +94,8 @@ SELECT DISTINCT
dtype,
device,
arch,
toBool(use_torch_compile) AS use_torch_compile,
granularity_bucket
granularity_bucket,
extra
FROM
benchmarks
WHERE
Expand All @@ -101,4 +118,5 @@ ORDER BY
backend,
model,
dtype,
device
device,
metric
4 changes: 4 additions & 0 deletions torchci/components/NavBar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ function NavBar() {
name: "TorchAO LLMs",
href: "/benchmark/llms?repoName=pytorch%2Fao",
},
{
name: "vLLM v1",
href: "/benchmark/llms?repoName=vllm-project%2Fvllm",
},
];

const devInfraDropdown = [
Expand Down
47 changes: 31 additions & 16 deletions torchci/components/benchmark/CommitPanel.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,37 @@ export function CommitPanel({
}) {
return (
<Stack direction="row" spacing={2} sx={{ mb: 2 }}>
<Typography fontSize={"1rem"} fontStyle={"italic"}>
*This report was generated by CI running on {repoName}{" "}
{lBranchAndCommit.branch} branch at commit{" "}
<a
href={`/${repoName}/commit/${lBranchAndCommit.commit}#${workflowName}`}
>
{lBranchAndCommit.commit.substring(0, SHA_DISPLAY_LENGTH)}
</a>{" "}
comparing with {rBranchAndCommit.branch} branch at commit{" "}
<a
href={`/${repoName}/commit/${rBranchAndCommit.commit}#${workflowName}`}
>
{rBranchAndCommit.commit.substring(0, SHA_DISPLAY_LENGTH)}
</a>
. {children}
</Typography>
{repoName !== "vllm-project/vllm" && (
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

option: maybe can place a list of repoName that has this exception

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, the benchmark UX seems to be growing pretty fast recently, so I think I will take a step back to see if we could refactor the code here for better modularization

<Typography fontSize={"1rem"} fontStyle={"italic"}>
*This report was generated by CI running on {repoName}{" "}
{lBranchAndCommit.branch} branch at commit{" "}
<a
href={`/${repoName}/commit/${lBranchAndCommit.commit}#${workflowName}`}
>
{lBranchAndCommit.commit.substring(0, SHA_DISPLAY_LENGTH)}
</a>{" "}
comparing with {rBranchAndCommit.branch} branch at commit{" "}
<a
href={`/${repoName}/commit/${rBranchAndCommit.commit}#${workflowName}`}
>
{rBranchAndCommit.commit.substring(0, SHA_DISPLAY_LENGTH)}
</a>
. {children}
</Typography>
)}
{repoName === "vllm-project/vllm" && (
<Typography fontSize={"1rem"} fontStyle={"italic"}>
This is vLLM v1 dashboard, please refer to{" "}
<a
href={
"https://simon-mo-workspace.observablehq.cloud/vllm-dashboard-v0/perf"
}
>
v0 dashboard
</a>{" "}
for the information about how the benchmark is setup
</Typography>
)}
</Stack>
);
}
48 changes: 39 additions & 9 deletions torchci/components/benchmark/llms/ModelGraphPanel.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -141,14 +141,44 @@ export function GraphPanel({
const model = record.model;
const dtype = record.dtype;
const device = record.device;
const metric = record.metric;

record.display = model.includes(dtype)
? model.includes(device)
? model
: `${model} (${device})`
: model.includes(device)
? `${model} (${dtype})`
: `${model} (${dtype} / ${device})`;
if (repoName === "vllm-project/vllm") {
let requestRate = record.extra!["request_rate"];
// TODO (huydhn): Fix the invalid JSON on vLLM side
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here is the fix on vLLM side vllm-project/vllm#13641 under review. Once this lands, we can remove these hacks

if (
metric.includes("itl") ||
metric.includes("tpot") ||
metric.includes("ttft")
) {
requestRate = requestRate !== "" ? requestRate : "Inf";
}

let tensorParallel = record.extra!["tensor_parallel_size"];
// TODO (huydhn): Fix the passing of tensor_parallel_size to the benchmark
// script on vLLM side
if (model.includes("8B")) {
tensorParallel = tensorParallel !== "" ? tensorParallel : "1";
} else if (model.includes("70B")) {
tensorParallel = tensorParallel !== "" ? tensorParallel : "4";
} else if (model.includes("8x7B")) {
tensorParallel = tensorParallel !== "" ? tensorParallel : "2";
}

if (requestRate !== "") {
record.display = `${model} / tp${tensorParallel} / qps_${requestRate}`;
} else {
record.display = `${model} / tp${tensorParallel}`;
}
} else {
record.display = model.includes(dtype)
? model.includes(device)
? model
: `${model} (${device})`
: model.includes(device)
? `${model} (${dtype})`
: `${model} (${dtype} / ${device})`;
}

return record;
});
Expand Down Expand Up @@ -177,7 +207,7 @@ export function GraphPanel({
.filter((metric) => chartData[metric].length !== 0)
.map((metric: string) => (
<Grid2
size={{ xs: 12, lg: modelName === DEFAULT_MODEL_NAME ? 12 : 4 }}
size={{ xs: 12, lg: modelName === DEFAULT_MODEL_NAME ? 12 : 6 }}
height={GRAPH_ROW_HEIGHT}
key={metric}
>
Expand All @@ -203,7 +233,7 @@ export function GraphPanel({
},
},
}}
legendPadding={modelName === DEFAULT_MODEL_NAME ? 320 : 200}
legendPadding={320}
/>
</Grid2>
))}
Expand Down
22 changes: 21 additions & 1 deletion torchci/components/benchmark/llms/SummaryPanel.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ export function SummaryPanel({
const rCommit = rPerfData.commit;
const rData = rPerfData.data;

const data = combineLeftAndRight(lPerfData, rPerfData);
const data = combineLeftAndRight(repoName, lPerfData, rPerfData);
const columns: any[] = [
{
field: "metadata",
Expand Down Expand Up @@ -120,6 +120,26 @@ export function SummaryPanel({
});
}

if (repoName === "vllm-project/vllm") {
columns.push({
field: "tensor_parallel_size",
headerName: "Tensor parallel",
flex: 1,
renderCell: (params: GridRenderCellParams<any>) => {
return `${params.value}`;
},
});

columns.push({
field: "request_rate",
headerName: "Request rate",
flex: 1,
renderCell: (params: GridRenderCellParams<any>) => {
return `${params.value}`;
},
});
}

columns.push(
...[
{
Expand Down
31 changes: 29 additions & 2 deletions torchci/components/benchmark/llms/common.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,17 @@ export const REPO_TO_BENCHMARKS: { [k: string]: string[] } = {
"pytorch/pytorch": ["PyTorch gpt-fast benchmark"],
"pytorch/executorch": ["ExecuTorch"],
"pytorch/ao": ["TorchAO benchmark"],
"vllm-project/vllm": ["vLLM benchmark"],
};
export const EXCLUDED_METRICS: string[] = ["load_status"];
export const EXCLUDED_METRICS: string[] = [
"load_status",
"mean_itl_ms",
"mean_tpot_ms",
"mean_ttft_ms",
"std_itl_ms",
"std_tpot_ms",
"std_ttft_ms",
];
export const DEFAULT_MODEL_NAME = "All Models";
export const SCALE = 2;
export const METRIC_DISPLAY_HEADERS: { [k: string]: string } = {
Expand All @@ -17,6 +26,15 @@ export const METRIC_DISPLAY_HEADERS: { [k: string]: string } = {
compile_vs_eager_speedup: "Compile vs eager speedup",
autoquant_vs_compile_speedup: "Autoquant vs compile speedup",
eager_speedup: "Eager speedup",
latency: "Latency (s)",
median_itl_ms: "Median ITL (ms)",
median_tpot_ms: "Median TPOT (ms)",
median_ttft_ms: "Median TTFT (ms)",
p99_itl_ms: "p99 ITL (ms)",
p99_tpot_ms: "p99 TPOT (ms)",
p99_ttft_ms: "p99 TTFT (ms)",
requests_per_second: "Requests/s",
tokens_per_second: "Tokens/s",
};
// The variable name is a bit dumb, but it tells if a higher metric value
// is good or bad so that we can highlight it on the dashboard accordingly.
Expand All @@ -32,6 +50,15 @@ export const IS_INCREASING_METRIC_VALUE_GOOD: { [k: string]: boolean } = {
"peak_inference_mem_usage(mb)": false,
"peak_load_mem_usuage(mb)": false,
"generate_time(ms)": false,
latency: false,
median_itl_ms: false,
median_tpot_ms: false,
median_ttft_ms: false,
p99_itl_ms: false,
p99_tpot_ms: false,
p99_ttft_ms: false,
requests_per_second: true,
tokens_per_second: true,
};
export const METRIC_DISPLAY_SHORT_HEADERS: { [k: string]: string } = {
"memory_bandwidth(GB/s)": "Bandwidth",
Expand Down Expand Up @@ -71,7 +98,7 @@ export interface LLMsBenchmarkData {
device: string;
arch: string;
display?: string;
use_torch_compile?: boolean;
extra?: { [key: string]: string };
}

export interface BranchAndCommitPerfData extends BranchAndCommit {
Expand Down
9 changes: 4 additions & 5 deletions torchci/lib/benchmark/aoUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,8 @@ export function computeSpeedup(
const currentCommitBaseline: { [key: string]: LLMsBenchmarkData } = {};

data.forEach((r: LLMsBenchmarkData) => {
if (
r.dtype !== TORCHAO_BASELINE ||
r.use_torch_compile !== useTorchCompile
) {
const compile = r.extra?.use_torch_compile === "true";
if (r.dtype !== TORCHAO_BASELINE || compile !== useTorchCompile) {
return;
}

Expand All @@ -112,8 +110,9 @@ export function computeSpeedup(
data.forEach((r: LLMsBenchmarkData) => {
withSpeedup.push(r);

const compile = r.extra?.use_torch_compile === "true";
// Compute eager speedup vs the base commit baseline
if (r.dtype === TORCHAO_BASELINE && r.use_torch_compile === false) {
if (r.dtype === TORCHAO_BASELINE && compile === false) {
if (SPEEDUP_METRICS.includes(r.metric)) {
const k = `${r.model} ${r.metric} ${r.device} ${r.arch}`;
if (
Expand Down
Loading
Loading