diff --git a/torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql b/torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql index cb04ac8ac7..34e68a971b 100644 --- a/torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql +++ b/torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql @@ -23,15 +23,32 @@ WITH benchmarks AS ( tupleElement(o.benchmark, 'extra_info')['arch'], tupleElement(o.runners[1], 'type') ) AS arch, - IF( - tupleElement(o.benchmark, 'extra_info')['compile'] = '', - 'true', -- Default to true - tupleElement(o.benchmark, 'extra_info')['compile'] - ) AS use_torch_compile, DATE_TRUNC( {granularity: String }, fromUnixTimestamp(o.timestamp) - ) AS granularity_bucket + ) AS granularity_bucket, + -- Repo-specific fields + map( + -- Used by torchao + 'use_torch_compile', + IF( + tupleElement(o.benchmark, 'extra_info')['compile'] = '', + 'true', + -- Default to true + tupleElement(o.benchmark, 'extra_info')['compile'] + ), + -- Used by vLLM + 'request_rate', + JSONExtractString( + tupleElement(o.benchmark, 'extra_info')['args'], + 'request_rate' + ), + 'tensor_parallel_size', + JSONExtractString( + tupleElement(o.benchmark, 'extra_info')['args'], + 'tensor_parallel_size' + ) + ) AS extra FROM benchmark.oss_ci_benchmark_v3 o WHERE @@ -77,8 +94,8 @@ SELECT DISTINCT dtype, device, arch, - toBool(use_torch_compile) AS use_torch_compile, - granularity_bucket + granularity_bucket, + extra FROM benchmarks WHERE @@ -101,4 +118,5 @@ ORDER BY backend, model, dtype, - device + device, + metric diff --git a/torchci/components/NavBar.tsx b/torchci/components/NavBar.tsx index 59674c8547..53849f0ffc 100644 --- a/torchci/components/NavBar.tsx +++ b/torchci/components/NavBar.tsx @@ -60,6 +60,10 @@ function NavBar() { name: "TorchAO LLMs", href: "/benchmark/llms?repoName=pytorch%2Fao", }, + { + name: "vLLM v1", + href: "/benchmark/llms?repoName=vllm-project%2Fvllm", + }, ]; const devInfraDropdown = [ diff --git a/torchci/components/benchmark/CommitPanel.tsx b/torchci/components/benchmark/CommitPanel.tsx index e48da70470..eb431812f7 100644 --- a/torchci/components/benchmark/CommitPanel.tsx +++ b/torchci/components/benchmark/CommitPanel.tsx @@ -18,22 +18,37 @@ export function CommitPanel({ }) { return ( - - *This report was generated by CI running on {repoName}{" "} - {lBranchAndCommit.branch} branch at commit{" "} - - {lBranchAndCommit.commit.substring(0, SHA_DISPLAY_LENGTH)} - {" "} - comparing with {rBranchAndCommit.branch} branch at commit{" "} - - {rBranchAndCommit.commit.substring(0, SHA_DISPLAY_LENGTH)} - - . {children} - + {repoName !== "vllm-project/vllm" && ( + + *This report was generated by CI running on {repoName}{" "} + {lBranchAndCommit.branch} branch at commit{" "} + + {lBranchAndCommit.commit.substring(0, SHA_DISPLAY_LENGTH)} + {" "} + comparing with {rBranchAndCommit.branch} branch at commit{" "} + + {rBranchAndCommit.commit.substring(0, SHA_DISPLAY_LENGTH)} + + . {children} + + )} + {repoName === "vllm-project/vllm" && ( + + This is vLLM v1 dashboard, please refer to{" "} + + v0 dashboard + {" "} + for the information about how the benchmark is setup + + )} ); } diff --git a/torchci/components/benchmark/llms/ModelGraphPanel.tsx b/torchci/components/benchmark/llms/ModelGraphPanel.tsx index 1ad3a09f4d..5d226d303c 100644 --- a/torchci/components/benchmark/llms/ModelGraphPanel.tsx +++ b/torchci/components/benchmark/llms/ModelGraphPanel.tsx @@ -141,14 +141,44 @@ export function GraphPanel({ const model = record.model; const dtype = record.dtype; const device = record.device; + const metric = record.metric; - record.display = model.includes(dtype) - ? model.includes(device) - ? model - : `${model} (${device})` - : model.includes(device) - ? `${model} (${dtype})` - : `${model} (${dtype} / ${device})`; + if (repoName === "vllm-project/vllm") { + let requestRate = record.extra!["request_rate"]; + // TODO (huydhn): Fix the invalid JSON on vLLM side + if ( + metric.includes("itl") || + metric.includes("tpot") || + metric.includes("ttft") + ) { + requestRate = requestRate !== "" ? requestRate : "Inf"; + } + + let tensorParallel = record.extra!["tensor_parallel_size"]; + // TODO (huydhn): Fix the passing of tensor_parallel_size to the benchmark + // script on vLLM side + if (model.includes("8B")) { + tensorParallel = tensorParallel !== "" ? tensorParallel : "1"; + } else if (model.includes("70B")) { + tensorParallel = tensorParallel !== "" ? tensorParallel : "4"; + } else if (model.includes("8x7B")) { + tensorParallel = tensorParallel !== "" ? tensorParallel : "2"; + } + + if (requestRate !== "") { + record.display = `${model} / tp${tensorParallel} / qps_${requestRate}`; + } else { + record.display = `${model} / tp${tensorParallel}`; + } + } else { + record.display = model.includes(dtype) + ? model.includes(device) + ? model + : `${model} (${device})` + : model.includes(device) + ? `${model} (${dtype})` + : `${model} (${dtype} / ${device})`; + } return record; }); @@ -177,7 +207,7 @@ export function GraphPanel({ .filter((metric) => chartData[metric].length !== 0) .map((metric: string) => ( @@ -203,7 +233,7 @@ export function GraphPanel({ }, }, }} - legendPadding={modelName === DEFAULT_MODEL_NAME ? 320 : 200} + legendPadding={320} /> ))} diff --git a/torchci/components/benchmark/llms/SummaryPanel.tsx b/torchci/components/benchmark/llms/SummaryPanel.tsx index 2d3de26ad0..e8025cb2dc 100644 --- a/torchci/components/benchmark/llms/SummaryPanel.tsx +++ b/torchci/components/benchmark/llms/SummaryPanel.tsx @@ -47,7 +47,7 @@ export function SummaryPanel({ const rCommit = rPerfData.commit; const rData = rPerfData.data; - const data = combineLeftAndRight(lPerfData, rPerfData); + const data = combineLeftAndRight(repoName, lPerfData, rPerfData); const columns: any[] = [ { field: "metadata", @@ -120,6 +120,26 @@ export function SummaryPanel({ }); } + if (repoName === "vllm-project/vllm") { + columns.push({ + field: "tensor_parallel_size", + headerName: "Tensor parallel", + flex: 1, + renderCell: (params: GridRenderCellParams) => { + return `${params.value}`; + }, + }); + + columns.push({ + field: "request_rate", + headerName: "Request rate", + flex: 1, + renderCell: (params: GridRenderCellParams) => { + return `${params.value}`; + }, + }); + } + columns.push( ...[ { diff --git a/torchci/components/benchmark/llms/common.tsx b/torchci/components/benchmark/llms/common.tsx index 6454cbe04a..d8c0f02e8d 100644 --- a/torchci/components/benchmark/llms/common.tsx +++ b/torchci/components/benchmark/llms/common.tsx @@ -5,8 +5,17 @@ export const REPO_TO_BENCHMARKS: { [k: string]: string[] } = { "pytorch/pytorch": ["PyTorch gpt-fast benchmark"], "pytorch/executorch": ["ExecuTorch"], "pytorch/ao": ["TorchAO benchmark"], + "vllm-project/vllm": ["vLLM benchmark"], }; -export const EXCLUDED_METRICS: string[] = ["load_status"]; +export const EXCLUDED_METRICS: string[] = [ + "load_status", + "mean_itl_ms", + "mean_tpot_ms", + "mean_ttft_ms", + "std_itl_ms", + "std_tpot_ms", + "std_ttft_ms", +]; export const DEFAULT_MODEL_NAME = "All Models"; export const SCALE = 2; export const METRIC_DISPLAY_HEADERS: { [k: string]: string } = { @@ -17,6 +26,15 @@ export const METRIC_DISPLAY_HEADERS: { [k: string]: string } = { compile_vs_eager_speedup: "Compile vs eager speedup", autoquant_vs_compile_speedup: "Autoquant vs compile speedup", eager_speedup: "Eager speedup", + latency: "Latency (s)", + median_itl_ms: "Median ITL (ms)", + median_tpot_ms: "Median TPOT (ms)", + median_ttft_ms: "Median TTFT (ms)", + p99_itl_ms: "p99 ITL (ms)", + p99_tpot_ms: "p99 TPOT (ms)", + p99_ttft_ms: "p99 TTFT (ms)", + requests_per_second: "Requests/s", + tokens_per_second: "Tokens/s", }; // The variable name is a bit dumb, but it tells if a higher metric value // is good or bad so that we can highlight it on the dashboard accordingly. @@ -32,6 +50,15 @@ export const IS_INCREASING_METRIC_VALUE_GOOD: { [k: string]: boolean } = { "peak_inference_mem_usage(mb)": false, "peak_load_mem_usuage(mb)": false, "generate_time(ms)": false, + latency: false, + median_itl_ms: false, + median_tpot_ms: false, + median_ttft_ms: false, + p99_itl_ms: false, + p99_tpot_ms: false, + p99_ttft_ms: false, + requests_per_second: true, + tokens_per_second: true, }; export const METRIC_DISPLAY_SHORT_HEADERS: { [k: string]: string } = { "memory_bandwidth(GB/s)": "Bandwidth", @@ -71,7 +98,7 @@ export interface LLMsBenchmarkData { device: string; arch: string; display?: string; - use_torch_compile?: boolean; + extra?: { [key: string]: string }; } export interface BranchAndCommitPerfData extends BranchAndCommit { diff --git a/torchci/lib/benchmark/aoUtils.ts b/torchci/lib/benchmark/aoUtils.ts index 62e9eb5657..449627478c 100644 --- a/torchci/lib/benchmark/aoUtils.ts +++ b/torchci/lib/benchmark/aoUtils.ts @@ -84,10 +84,8 @@ export function computeSpeedup( const currentCommitBaseline: { [key: string]: LLMsBenchmarkData } = {}; data.forEach((r: LLMsBenchmarkData) => { - if ( - r.dtype !== TORCHAO_BASELINE || - r.use_torch_compile !== useTorchCompile - ) { + const compile = r.extra?.use_torch_compile === "true"; + if (r.dtype !== TORCHAO_BASELINE || compile !== useTorchCompile) { return; } @@ -112,8 +110,9 @@ export function computeSpeedup( data.forEach((r: LLMsBenchmarkData) => { withSpeedup.push(r); + const compile = r.extra?.use_torch_compile === "true"; // Compute eager speedup vs the base commit baseline - if (r.dtype === TORCHAO_BASELINE && r.use_torch_compile === false) { + if (r.dtype === TORCHAO_BASELINE && compile === false) { if (SPEEDUP_METRICS.includes(r.metric)) { const k = `${r.model} ${r.metric} ${r.device} ${r.arch}`; if ( diff --git a/torchci/lib/benchmark/llmUtils.ts b/torchci/lib/benchmark/llmUtils.ts index 2bce8d7eb3..92621890d7 100644 --- a/torchci/lib/benchmark/llmUtils.ts +++ b/torchci/lib/benchmark/llmUtils.ts @@ -11,8 +11,7 @@ export function useBenchmark( queryParams: { [key: string]: any }, branchAndCommit: BranchAndCommit ) { - const queryCollection = "benchmarks"; - const queryName = "oss_ci_benchmark_llms"; + const queryName: string = "oss_ci_benchmark_llms"; const queryParamsWithBranchAndCommit: { [key: string]: any } = queryParams; (queryParamsWithBranchAndCommit as { [key: string]: any })["branches"] = @@ -30,6 +29,7 @@ export function useBenchmark( } export function combineLeftAndRight( + repoName: string, lPerfData: BranchAndCommitPerfData, rPerfData: BranchAndCommitPerfData ): { [k: string]: any }[] { @@ -49,9 +49,10 @@ export function combineLeftAndRight( const dtype = record.dtype; const device = record.device; const arch = record.arch; + const extra = JSON.stringify(record.extra); const metric = record.metric; - const key = `${model};${backend};${dtype};${device};${arch}`; + const key = `${model};${backend};${dtype};${device};${arch};${extra}`; if (!(key in dataGroupedByModel)) { dataGroupedByModel[key] = {}; } @@ -73,9 +74,10 @@ export function combineLeftAndRight( const dtype = record.dtype; const device = record.device; const arch = record.arch; + const extra = JSON.stringify(record.extra); const metric = record.metric; - const key = `${model};${backend};${dtype};${device};${arch}`; + const key = `${model};${backend};${dtype};${device};${arch};${extra}`; if (!(key in dataGroupedByModel)) { dataGroupedByModel[key] = {}; } @@ -97,7 +99,7 @@ export function combineLeftAndRight( const validBackends = new Set(); // First round to get all the valid devices Object.keys(dataGroupedByModel).forEach((key: string) => { - const [model, backend, dtype, device, arch] = key.split(";"); + const [model, backend, dtype, device, arch, extra] = key.split(";"); const row: { [k: string]: any } = { // Keep the name as as the row ID as DataGrid requires it name: `${model} ${backend} (${dtype} / ${device} / ${arch})`, @@ -118,10 +120,10 @@ export function combineLeftAndRight( // Transform the data into a displayable format const data: { [k: string]: any }[] = []; Object.keys(dataGroupedByModel).forEach((key: string) => { - const [model, backend, dtype, device, arch] = key.split(";"); + const [model, backend, dtype, device, arch, extra] = key.split(";"); const row: { [k: string]: any } = { // Keep the name as as the row ID as DataGrid requires it - name: `${model} ${backend} (${dtype} / ${device} / ${arch})`, + name: `${model} ${backend} (${dtype} / ${device} / ${arch} / ${extra})`, }; for (const metric in dataGroupedByModel[key]) { @@ -173,6 +175,44 @@ export function combineLeftAndRight( arch: arch, }; + if (repoName === "vllm-project/vllm") { + // These fields are only available on vLLM benchmark + const extraInfo = JSON.parse(extra); + // TODO (huydhn): Fix the invalid JSON on vLLM side + if ( + metric.includes("itl") || + metric.includes("tpot") || + metric.includes("ttft") + ) { + extraInfo["request_rate"] = + extraInfo["request_rate"] !== "" + ? extraInfo["request_rate"] + : "Inf"; + } + // TODO (huydhn): Fix the passing of tensor_parallel_size to the benchmark + // script on vLLM side + if (model.includes("8B")) { + extraInfo["tensor_parallel_size"] = + extraInfo["tensor_parallel_size"] !== "" + ? extraInfo["tensor_parallel_size"] + : 1; + } else if (model.includes("70B")) { + extraInfo["tensor_parallel_size"] = + extraInfo["tensor_parallel_size"] !== "" + ? extraInfo["tensor_parallel_size"] + : 4; + } else if (model.includes("8x7B")) { + extraInfo["tensor_parallel_size"] = + extraInfo["tensor_parallel_size"] !== "" + ? extraInfo["tensor_parallel_size"] + : 2; + } + + row["extra"] = extraInfo; + row["tensor_parallel_size"] = extraInfo["tensor_parallel_size"]; + row["request_rate"] = extraInfo["request_rate"]; + } + row[metric] = { l: hasL ? { diff --git a/torchci/pages/benchmark/llms.tsx b/torchci/pages/benchmark/llms.tsx index c25f0d7f44..62f12cb116 100644 --- a/torchci/pages/benchmark/llms.tsx +++ b/torchci/pages/benchmark/llms.tsx @@ -126,7 +126,7 @@ function Report({ ? lDataWithSpeedup[0].granularity_bucket : undefined, }} - workflowName={"inductor-micro-benchmark"} + workflowName={""} > <>