@@ -5,8 +5,17 @@ export const REPO_TO_BENCHMARKS: { [k: string]: string[] } = {
5
5
"pytorch/pytorch" : [ "PyTorch gpt-fast benchmark" ] ,
6
6
"pytorch/executorch" : [ "ExecuTorch" ] ,
7
7
"pytorch/ao" : [ "TorchAO benchmark" ] ,
8
+ "vllm-project/vllm" : [ "vLLM benchmark" ] ,
8
9
} ;
9
- export const EXCLUDED_METRICS : string [ ] = [ "load_status" ] ;
10
+ export const EXCLUDED_METRICS : string [ ] = [
11
+ "load_status" ,
12
+ "mean_itl_ms" ,
13
+ "mean_tpot_ms" ,
14
+ "mean_ttft_ms" ,
15
+ "std_itl_ms" ,
16
+ "std_tpot_ms" ,
17
+ "std_ttft_ms" ,
18
+ ] ;
10
19
export const DEFAULT_MODEL_NAME = "All Models" ;
11
20
export const SCALE = 2 ;
12
21
export const METRIC_DISPLAY_HEADERS : { [ k : string ] : string } = {
@@ -17,6 +26,15 @@ export const METRIC_DISPLAY_HEADERS: { [k: string]: string } = {
17
26
compile_vs_eager_speedup : "Compile vs eager speedup" ,
18
27
autoquant_vs_compile_speedup : "Autoquant vs compile speedup" ,
19
28
eager_speedup : "Eager speedup" ,
29
+ latency : "Latency (s)" ,
30
+ median_itl_ms : "Median ITL (ms)" ,
31
+ median_tpot_ms : "Median TPOT (ms)" ,
32
+ median_ttft_ms : "Median TTFT (ms)" ,
33
+ p99_itl_ms : "p99 ITL (ms)" ,
34
+ p99_tpot_ms : "p99 TPOT (ms)" ,
35
+ p99_ttft_ms : "p99 TTFT (ms)" ,
36
+ requests_per_second : "Requests/s" ,
37
+ tokens_per_second : "Tokens/s" ,
20
38
} ;
21
39
// The variable name is a bit dumb, but it tells if a higher metric value
22
40
// is good or bad so that we can highlight it on the dashboard accordingly.
@@ -32,6 +50,15 @@ export const IS_INCREASING_METRIC_VALUE_GOOD: { [k: string]: boolean } = {
32
50
"peak_inference_mem_usage(mb)" : false ,
33
51
"peak_load_mem_usuage(mb)" : false ,
34
52
"generate_time(ms)" : false ,
53
+ latency : false ,
54
+ median_itl_ms : false ,
55
+ median_tpot_ms : false ,
56
+ median_ttft_ms : false ,
57
+ p99_itl_ms : false ,
58
+ p99_tpot_ms : false ,
59
+ p99_ttft_ms : false ,
60
+ requests_per_second : true ,
61
+ tokens_per_second : true ,
35
62
} ;
36
63
export const METRIC_DISPLAY_SHORT_HEADERS : { [ k : string ] : string } = {
37
64
"memory_bandwidth(GB/s)" : "Bandwidth" ,
@@ -71,7 +98,7 @@ export interface LLMsBenchmarkData {
71
98
device : string ;
72
99
arch : string ;
73
100
display ?: string ;
74
- use_torch_compile ?: boolean ;
101
+ extra ?: { [ key : string ] : string } ;
75
102
}
76
103
77
104
export interface BranchAndCommitPerfData extends BranchAndCommit {
0 commit comments