Skip to content

Commit 5397347

Browse files
authored
Populate the benchmark metadata (#5918)
To ease the process of gathering the benchmark metadata before uploading the the database, I'm adding a script `.github/scripts/benchmarks/gather_metadata.py` to gather this information and pass it to the upload script. From #5839, the benchmark metadata includes the following required fields: ``` -- Metadata `timestamp` UInt64, `schema_version` String DEFAULT 'v3', `name` String, -- About the change `repo` String DEFAULT 'pytorch/pytorch', `head_branch` String, `head_sha` String, `workflow_id` UInt64, `run_attempt` UInt32, `job_id` UInt64, -- The raw records on S3 `s3_path` String, ``` I'm going to test this out with PT2 compiler instruction count benchmark at pytorch/pytorch#140493 ### Testing https://github.com/pytorch/test-infra/actions/runs/11831746632/job/32967412160?pr=5918#step:5:105 gathers the metadata and upload the benchmark results correctly Also, an actual upload at https://github.com/pytorch/pytorch/actions/runs/11831781500/job/33006545698#step:24:138
1 parent 6cacc52 commit 5397347

File tree

7 files changed

+342
-46
lines changed

7 files changed

+342
-46
lines changed

.github/actions/upload-benchmark-results/action.yml

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ inputs:
99
# TODO (huydhn): Use this to gate the migration to oss_ci_benchmark_v3 on S3
1010
schema-version:
1111
default: 'v2'
12+
github-token:
13+
default: ''
1214

1315
runs:
1416
using: composite
@@ -19,22 +21,92 @@ runs:
1921
set -eux
2022
python3 -mpip install boto3==1.35.33
2123
24+
- name: Check that GITHUB_TOKEN is defined
25+
if: ${{ inputs.schema-version != 'v2' }}
26+
env:
27+
GITHUB_TOKEN: ${{ inputs.github-token }}
28+
shell: bash
29+
run: |
30+
set -eux
31+
32+
if [[ -z "${GITHUB_TOKEN}" ]]; then
33+
echo "Missing github-token input"
34+
exit 1
35+
fi
36+
37+
- name: Get workflow job id
38+
if: ${{ inputs.github-token != '' }}
39+
id: get-job-id
40+
uses: pytorch/test-infra/.github/actions/get-workflow-job-id@main
41+
with:
42+
github-token: ${{ inputs.github-token }}
43+
44+
- name: Gather the metadata
45+
id: gather-metadata
46+
shell: bash
47+
env:
48+
SCHEMA_VERSION: ${{ inputs.schema-version }}
49+
REPO: ${{ github.repository }}
50+
HEAD_BRANCH: ${{ github.head_ref }}
51+
HEAD_SHA: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
52+
WORKFLOW_RUN_ID: ${{ github.run_id }}
53+
RUN_ATTEMPT: ${{ github.run_attempt }}
54+
JOB_ID: ${{ inputs.github-token != '' && steps.get-job-id.outputs.job-id || '0' }}
55+
JOB_NAME: ${{ inputs.github-token != '' && steps.get-job-id.outputs.job-name || '' }}
56+
run: |
57+
set -eux
58+
59+
python3 "${GITHUB_ACTION_PATH}/../../scripts/benchmarks/gather_metadata.py" \
60+
--schema-version "${SCHEMA_VERSION}" \
61+
--repo "${REPO}" \
62+
--head-branch "${HEAD_BRANCH}" \
63+
--head-sha "${HEAD_SHA}" \
64+
--workflow-id "${WORKFLOW_RUN_ID}" \
65+
--run-attempt "${RUN_ATTEMPT}" \
66+
--job-id "${JOB_ID}" \
67+
--job-name "${JOB_NAME}"
68+
69+
- name: Gather the runner information
70+
id: gather-runner-info
71+
shell: bash
72+
run: |
73+
set -eux
74+
75+
# TODO (huydhn): Implement this part
76+
echo "runners=[]" >> "${GITHUB_OUTPUT}"
77+
78+
- name: Gather the dependencies information
79+
id: gather-dependencies
80+
shell: bash
81+
run: |
82+
set -eux
83+
84+
# TODO (huydhn): Implement this part
85+
echo "dependencies={}" >> "${GITHUB_OUTPUT}"
86+
2287
- name: Upload benchmark results
2388
shell: bash
2489
env:
2590
BENCHMARK_RESULTS_DIR: ${{ inputs.benchmark-results-dir }}
2691
DRY_RUN: ${{ inputs.dry-run }}
27-
SCHEMA_VERSION: ${{ inputs.schema-version }}
92+
# Additional information about the benchmarks
93+
BENCHMARK_METADATA: ${{ steps.gather-metadata.outputs.metadata }}
94+
RUNNER_INFO: ${{ steps.gather-runner-info.outputs.runners }}
95+
DEPENDENCIES: ${{ steps.gather-dependencies.outputs.dependencies }}
2896
run: |
2997
set -eux
3098
3199
if [[ "${DRY_RUN}" == "true" ]]; then
32100
python3 "${GITHUB_ACTION_PATH}/../../scripts/upload_benchmark_results.py" \
33101
--benchmark-results-dir "${BENCHMARK_RESULTS_DIR}" \
34-
--schema-version "${SCHEMA_VERSION}" \
102+
--metadata "${BENCHMARK_METADATA}" \
103+
--runners "${RUNNER_INFO}" \
104+
--dependencies "${DEPENDENCIES}" \
35105
--dry-run
36106
else
37107
python3 "${GITHUB_ACTION_PATH}/../../scripts/upload_benchmark_results.py" \
38108
--benchmark-results-dir "${BENCHMARK_RESULTS_DIR}" \
39-
--schema-version "${SCHEMA_VERSION}"
109+
--metadata "${BENCHMARK_METADATA}" \
110+
--runners "${RUNNER_INFO}" \
111+
--dependencies "${DEPENDENCIES}"
40112
fi
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[{"benchmark": {"name": "pr_time_benchmarks", "extra_info": {"is_dynamic": false, "device": "cpu", "description": "a loop over 100 add node"}}, "model": {"name": "add_loop_eager", "type": "add_loop", "backend": "eager"}, "metric": {"name": "compile_time_instruction_count", "benchmark_values": [3086359081]}}]
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[{"benchmark": {"name": "pr_time_benchmarks", "extra_info": {"is_dynamic": true, "device": "cpu", "description": "a loop over 100 add node"}}, "model": {"name": "add_loop_eager_dynamic", "type": "add_loop", "backend": "eager"}, "metric": {"name": "compile_time_instruction_count", "benchmark_values": [5712213247]}}]
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[{"benchmark": {"name": "pr_time_benchmarks", "extra_info": {"is_dynamic": true, "device": "cuda", "description": "a loop over 100 add node"}}, "model": {"name": "add_loop_inductor_dynamic_gpu", "type": "add_loop", "backend": "inductor"}, "metric": {"name": "compile_time_instruction_count", "benchmark_values": [40859830085]}}]
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
#!/usr/bin/env python3
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
import os
9+
import json
10+
import time
11+
from typing import Any
12+
13+
14+
def parse_args() -> Any:
15+
from argparse import ArgumentParser
16+
17+
parser = ArgumentParser("gather some metadata about the benchmark")
18+
# v3 is defined at torchci/clickhouse_queries/oss_ci_benchmark_v3/query.sql
19+
parser.add_argument(
20+
"--schema-version",
21+
choices=["v2", "v3"],
22+
required=True,
23+
help="the database schema to use",
24+
)
25+
parser.add_argument(
26+
"--repo",
27+
type=str,
28+
required=True,
29+
help="the name of repository where the benchmark is run",
30+
)
31+
parser.add_argument(
32+
"--head-branch",
33+
type=str,
34+
required=True,
35+
help="the name of branch where the benchmark is run",
36+
)
37+
parser.add_argument(
38+
"--head-sha",
39+
type=str,
40+
required=True,
41+
help="the commit that the benchmark uses",
42+
)
43+
parser.add_argument(
44+
"--workflow-id",
45+
type=int,
46+
required=True,
47+
help="the benchmark workflow id",
48+
)
49+
parser.add_argument(
50+
"--run-attempt",
51+
type=int,
52+
default=1,
53+
help="the workflow run attempt",
54+
)
55+
parser.add_argument(
56+
"--job-id",
57+
type=int,
58+
required=True,
59+
help="the benchmark job id",
60+
)
61+
parser.add_argument(
62+
"--job-name",
63+
type=str,
64+
required=True,
65+
help="the benchmark job name",
66+
)
67+
68+
return parser.parse_args()
69+
70+
71+
def set_output(name: str, val: Any) -> None:
72+
if os.getenv("GITHUB_OUTPUT"):
73+
with open(str(os.getenv("GITHUB_OUTPUT")), "a") as env:
74+
print(f"{name}={val}", file=env)
75+
else:
76+
print(f"::set-output name={name}::{val}")
77+
78+
79+
def main() -> None:
80+
args = parse_args()
81+
82+
# From https://github.com/pytorch/test-infra/pull/5839
83+
metadata = {
84+
"timestamp": int(time.time()),
85+
"schema_version": args.schema_version,
86+
"name": args.job_name,
87+
"repo": args.repo,
88+
"head_branch": args.head_branch,
89+
"head_sha": args.head_sha,
90+
"workflow_id": args.workflow_id,
91+
"run_attempt": args.run_attempt,
92+
"job_id": args.job_id,
93+
}
94+
set_output("metadata", json.dumps(metadata))
95+
96+
97+
if __name__ == "__main__":
98+
main()

0 commit comments

Comments
 (0)