Skip to content

Commit a46f36a

Browse files
rsorasilvanocerza
rsora
authored andcommitted
[skip changelog] Add stats workflow to gather downloads data
1 parent bfb90a8 commit a46f36a

File tree

3 files changed

+270
-0
lines changed

3 files changed

+270
-0
lines changed

.github/tools/fetch_athena_stats.sh

+118
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
#!/usr/bin/env bash
2+
3+
# This script performs the following:
4+
# 1. Run the query, use jq to capture the QueryExecutionId, and then capture that into bash variable
5+
# 2. Wait for the query to finish running (240 seconds).
6+
# 3. Get the results.
7+
# 4. Json data points struct build
8+
9+
# Expected env variables are:
10+
# AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY for accessing AWS resources
11+
# AWS_ATHENA_SOURCE_TABLE
12+
# AWS_ATHENA_OUTPUT_LOCATION
13+
# GITHUB_REPOSITORY
14+
15+
set -euo pipefail
16+
17+
loadExecutionId=$(
18+
aws athena start-query-execution \
19+
--query-string "MSCK REPAIR TABLE ${AWS_ATHENA_SOURCE_TABLE};" \
20+
--result-configuration "OutputLocation=${AWS_ATHENA_OUTPUT_LOCATION}" \
21+
--region us-east-1 | jq -r ".QueryExecutionId"
22+
)
23+
24+
echo "QueryExecutionId is ${loadExecutionId}"
25+
for i in $(seq 1 120); do
26+
loadState=$(
27+
aws athena get-query-execution \
28+
--query-execution-id "${loadExecutionId}" \
29+
--region us-east-1 | jq -r ".QueryExecution.Status.State"
30+
)
31+
32+
if [[ "${loadState}" == "SUCCEEDED" ]]; then
33+
break
34+
fi
35+
36+
echo "QueryExecutionId ${loadExecutionId} - state is ${loadState}"
37+
38+
if [[ "${loadState}" == "FAILED" ]]; then
39+
exit 1
40+
fi
41+
42+
sleep 2
43+
done
44+
45+
! read -r -d '' query <<EOM
46+
SELECT split_part(replace(json_extract_scalar(url_decode(url_decode(querystring)),'$.data.url'), 'https://downloads.arduino.cc/arduino-ide/arduino-ide_', ''),'?',1) AS flavor, count(json_extract(url_decode(url_decode(querystring)),'$')) AS gauge
47+
FROM stats_ingest_prod.complete_cf_logs_partitioned
48+
WHERE json_extract_scalar(url_decode(url_decode(querystring)),'$.data.url') LIKE 'https://downloads.arduino.cc/arduino-ide/arduino-ide_%'
49+
AND json_extract_scalar(url_decode(url_decode(querystring)),'$.data.url') NOT LIKE '%latest%' -- exclude latest redirect
50+
group by 1 ;
51+
EOM
52+
53+
queryExecutionId=$(
54+
aws athena start-query-execution \
55+
--query-string "${query}" \
56+
--result-configuration "OutputLocation=${AWS_ATHENA_OUTPUT_LOCATION}" \
57+
--region us-east-1 | jq -r ".QueryExecutionId"
58+
)
59+
60+
echo "QueryExecutionId is ${queryExecutionId}"
61+
for i in $(seq 1 120); do
62+
queryState=$(
63+
aws athena get-query-execution \
64+
--query-execution-id "${queryExecutionId}" \
65+
--region us-east-1 | jq -r ".QueryExecution.Status.State"
66+
)
67+
68+
if [[ "${queryState}" == "SUCCEEDED" ]]; then
69+
break
70+
fi
71+
72+
echo "QueryExecutionId ${queryExecutionId} - state is ${queryState}"
73+
74+
if [[ "${queryState}" == "FAILED" ]]; then
75+
exit 1
76+
fi
77+
78+
sleep 2
79+
done
80+
81+
echo "Query succeeded. Processing data"
82+
queryResult=$(
83+
aws athena get-query-results \
84+
--query-execution-id "${queryExecutionId}" \
85+
--region us-east-1 | jq --compact-output
86+
)
87+
88+
! read -r -d '' jsonTemplate <<EOM
89+
{
90+
"type": "gauge",
91+
"name": "arduino.downloads.total",
92+
"value": "%s",
93+
"host": "${GITHUB_REPOSITORY}",
94+
"tags": [
95+
"version:%s",
96+
"os:%s",
97+
"arch:%s",
98+
"cdn:downloads.arduino.cc",
99+
"project:arduino-ide"
100+
]
101+
},
102+
EOM
103+
104+
datapoints="["
105+
for row in $(echo "${queryResult}" | jq 'del(.ResultSet.Rows[0])' | jq -r '.ResultSet.Rows[] | .Data' --compact-output); do
106+
value=$(jq -r ".[1].VarCharValue" <<<"${row}")
107+
tag=$(jq -r ".[0].VarCharValue" <<<"${row}")
108+
# Some splitting to obtain 0.6.0, Windows, 32bit elements from string 0.6.0_Windows_32bit.zip
109+
split=($(echo "$tag" | tr '_' '\n'))
110+
if [[ ${#split[@]} -ne 3 ]]; then
111+
continue
112+
fi
113+
archSplit=($(echo "${split[2]}" | tr '.' '\n'))
114+
datapoints+=$(printf "${jsonTemplate}" "${value}" "${split[0]}" "${split[1]}" "${archSplit[0]}")
115+
done
116+
datapoints="${datapoints::-1}]"
117+
118+
echo "::set-output name=result::$(jq --compact-output <<<"${datapoints}")"

.github/workflows/arduino-stats.yaml

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
name: arduino-stats
2+
3+
on:
4+
schedule:
5+
# run every day at 07:00 AM, 03:00 PM and 11:00 PM
6+
- cron: "0 7,15,23 * * *"
7+
workflow_dispatch:
8+
repository_dispatch:
9+
10+
jobs:
11+
push-stats:
12+
# This workflow is only of value to the arduino/arduino-ide repository and
13+
# would always fail in forks
14+
if: github.repository == 'arduino/arduino-ide'
15+
runs-on: ubuntu-latest
16+
17+
steps:
18+
- name: Checkout
19+
uses: actions/checkout@v2
20+
21+
- name: Fetch downloads count form Arduino CDN using AWS Athena
22+
id: fetch
23+
env:
24+
AWS_ACCESS_KEY_ID: ${{ secrets.STATS_AWS_ACCESS_KEY_ID }}
25+
AWS_SECRET_ACCESS_KEY: ${{ secrets.STATS_AWS_SECRET_ACCESS_KEY }}
26+
AWS_ATHENA_SOURCE_TABLE: ${{ secrets.STATS_AWS_ATHENA_SOURCE_TABLE }}
27+
AWS_ATHENA_OUTPUT_LOCATION: ${{ secrets.STATS_AWS_ATHENA_OUTPUT_LOCATION }}
28+
GITHUB_REPOSITORY: ${{ github.repository }}
29+
run: |
30+
# Fetch jq 1.6 as VM has only 1.5 ATM
31+
wget -q https://github.com/stedolan/jq/releases/download/jq-1.6/jq-linux64 -O jq
32+
chmod +x jq
33+
PATH="${{ github.workspace }}:$PATH"
34+
.github/tools/fetch_athena_stats.sh
35+
36+
- name: Send metrics
37+
uses: masci/datadog@v1
38+
with:
39+
api-key: ${{ secrets.DD_API_KEY }}
40+
# Metrics input expects YAML but JSON will work just right.
41+
metrics: ${{steps.fetch.outputs.result}}
42+
43+
- name: Report failure
44+
if: failure()
45+
uses: masci/datadog@v1
46+
with:
47+
api-key: ${{ secrets.DD_API_KEY }}
48+
events: |
49+
- title: "Arduino IDE stats failing"
50+
text: "Stats collection failed"
51+
alert_type: "error"
52+
host: ${{ github.repository }}
53+
tags:
54+
- "project:arduino-ide"
55+
- "cdn:downloads.arduino.cc"
56+
- "workflow:${{ github.workflow }}"

.github/workflows/github-stats.yaml

+96
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
name: github-stats
2+
3+
on:
4+
schedule:
5+
# run every 30 minutes
6+
- cron: "*/30 * * * *"
7+
workflow_dispatch:
8+
repository_dispatch:
9+
10+
jobs:
11+
push-stats:
12+
# This workflow is only of value to the arduino/arduino-ide repository and
13+
# would always fail in forks
14+
if: github.repository == 'arduino/arduino-ide'
15+
runs-on: ubuntu-latest
16+
17+
steps:
18+
- name: Fetch downloads count
19+
id: fetch
20+
uses: actions/github-script@v4
21+
with:
22+
github-token: ${{github.token}}
23+
script: |
24+
let metrics = []
25+
26+
// Get a list of releases
27+
const opts = github.repos.listReleases.endpoint.merge({
28+
...context.repo
29+
})
30+
const releases = await github.paginate(opts)
31+
32+
// Get download stats for every release
33+
for (const rel of releases) {
34+
// Names for assets are like `arduino-ide_2.0.0-beta.11_Linux_64bit.zip`,
35+
// we'll use this later to split the asset file name more easily
36+
const baseName = `arduino-ide_${rel.name}_`
37+
38+
// Get a list of assets for this release
39+
const opts = github.repos.listReleaseAssets.endpoint.merge({
40+
...context.repo,
41+
release_id: rel.id
42+
})
43+
const assets = await github.paginate(opts)
44+
45+
for (const asset of assets) {
46+
// Ignore files that are not arduino-ide packages
47+
if (!asset.name.startsWith(baseName)) {
48+
continue
49+
}
50+
51+
// Strip the base and remove file extension to get `Linux_32bit`
52+
systemArch = asset.name.replace(baseName, "").split(".")[0].split("_")
53+
54+
// Add a metric object to the list of gathered metrics
55+
metrics.push({
56+
"type": "gauge",
57+
"name": "arduino.downloads.total",
58+
"value": asset.download_count,
59+
"host": "${{ github.repository }}",
60+
"tags": [
61+
`version:${rel.name}`,
62+
`os:${systemArch[0]}`,
63+
`arch:${systemArch[1]}`,
64+
"cdn:github.com",
65+
"project:arduino-ide"
66+
]
67+
})
68+
}
69+
}
70+
71+
// The action will put whatever we return from this function in
72+
// `outputs.result`, JSON encoded. So we just return the array
73+
// of objects and GitHub will do the rest.
74+
return metrics
75+
76+
- name: Send metrics
77+
uses: masci/datadog@v1
78+
with:
79+
api-key: ${{ secrets.DD_API_KEY }}
80+
# Metrics input expects YAML but JSON will work just right.
81+
metrics: ${{steps.fetch.outputs.result}}
82+
83+
- name: Report failure
84+
if: failure()
85+
uses: masci/datadog@v1
86+
with:
87+
api-key: ${{ secrets.DD_API_KEY }}
88+
events: |
89+
- title: "Arduino IDE stats failing"
90+
text: "Stats collection failed"
91+
alert_type: "error"
92+
host: ${{ github.repository }}
93+
tags:
94+
- "project:arduino-ide"
95+
- "cdn:github.com"
96+
- "workflow:${{ github.workflow }}"

0 commit comments

Comments
 (0)