|
| 1 | +#!/usr/bin/env bash |
| 2 | + |
| 3 | +# This script performs the following: |
| 4 | +# 1. Run the query, use jq to capture the QueryExecutionId, and then capture that into bash variable |
| 5 | +# 2. Wait for the query to finish running (240 seconds). |
| 6 | +# 3. Get the results. |
| 7 | +# 4. Json data points struct build |
| 8 | + |
| 9 | +# Expected env variables are: |
| 10 | +# AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY for accessing AWS resources |
| 11 | +# AWS_ATHENA_SOURCE_TABLE |
| 12 | +# AWS_ATHENA_OUTPUT_LOCATION |
| 13 | +# GITHUB_REPOSITORY |
| 14 | + |
| 15 | +set -euo pipefail |
| 16 | + |
| 17 | +loadExecutionId=$( |
| 18 | + aws athena start-query-execution \ |
| 19 | + --query-string "MSCK REPAIR TABLE ${AWS_ATHENA_SOURCE_TABLE};" \ |
| 20 | + --result-configuration "OutputLocation=${AWS_ATHENA_OUTPUT_LOCATION}" \ |
| 21 | + --region us-east-1 | jq -r ".QueryExecutionId" |
| 22 | +) |
| 23 | + |
| 24 | +echo "QueryExecutionId is ${loadExecutionId}" |
| 25 | +for i in $(seq 1 120); do |
| 26 | + loadState=$( |
| 27 | + aws athena get-query-execution \ |
| 28 | + --query-execution-id "${loadExecutionId}" \ |
| 29 | + --region us-east-1 | jq -r ".QueryExecution.Status.State" |
| 30 | + ) |
| 31 | + |
| 32 | + if [[ "${loadState}" == "SUCCEEDED" ]]; then |
| 33 | + break |
| 34 | + fi |
| 35 | + |
| 36 | + echo "QueryExecutionId ${loadExecutionId} - state is ${loadState}" |
| 37 | + |
| 38 | + if [[ "${loadState}" == "FAILED" ]]; then |
| 39 | + exit 1 |
| 40 | + fi |
| 41 | + |
| 42 | + sleep 2 |
| 43 | +done |
| 44 | + |
| 45 | +! read -r -d '' query <<EOM |
| 46 | +SELECT split_part(replace(json_extract_scalar(url_decode(url_decode(querystring)),'$.data.url'), 'https://downloads.arduino.cc/arduino-ide/arduino-ide_', ''),'?',1) AS flavor, count(json_extract(url_decode(url_decode(querystring)),'$')) AS gauge |
| 47 | +FROM stats_ingest_prod.complete_cf_logs_partitioned |
| 48 | +WHERE json_extract_scalar(url_decode(url_decode(querystring)),'$.data.url') LIKE 'https://downloads.arduino.cc/arduino-ide/arduino-ide_%' |
| 49 | + AND json_extract_scalar(url_decode(url_decode(querystring)),'$.data.url') NOT LIKE '%latest%' -- exclude latest redirect |
| 50 | +group by 1 ; |
| 51 | +EOM |
| 52 | + |
| 53 | +queryExecutionId=$( |
| 54 | + aws athena start-query-execution \ |
| 55 | + --query-string "${query}" \ |
| 56 | + --result-configuration "OutputLocation=${AWS_ATHENA_OUTPUT_LOCATION}" \ |
| 57 | + --region us-east-1 | jq -r ".QueryExecutionId" |
| 58 | +) |
| 59 | + |
| 60 | +echo "QueryExecutionId is ${queryExecutionId}" |
| 61 | +for i in $(seq 1 120); do |
| 62 | + queryState=$( |
| 63 | + aws athena get-query-execution \ |
| 64 | + --query-execution-id "${queryExecutionId}" \ |
| 65 | + --region us-east-1 | jq -r ".QueryExecution.Status.State" |
| 66 | + ) |
| 67 | + |
| 68 | + if [[ "${queryState}" == "SUCCEEDED" ]]; then |
| 69 | + break |
| 70 | + fi |
| 71 | + |
| 72 | + echo "QueryExecutionId ${queryExecutionId} - state is ${queryState}" |
| 73 | + |
| 74 | + if [[ "${queryState}" == "FAILED" ]]; then |
| 75 | + exit 1 |
| 76 | + fi |
| 77 | + |
| 78 | + sleep 2 |
| 79 | +done |
| 80 | + |
| 81 | +echo "Query succeeded. Processing data" |
| 82 | +queryResult=$( |
| 83 | + aws athena get-query-results \ |
| 84 | + --query-execution-id "${queryExecutionId}" \ |
| 85 | + --region us-east-1 | jq --compact-output |
| 86 | +) |
| 87 | + |
| 88 | +! read -r -d '' jsonTemplate <<EOM |
| 89 | +{ |
| 90 | +"type": "gauge", |
| 91 | +"name": "arduino.downloads.total", |
| 92 | +"value": "%s", |
| 93 | +"host": "${GITHUB_REPOSITORY}", |
| 94 | +"tags": [ |
| 95 | +"version:%s", |
| 96 | +"os:%s", |
| 97 | +"arch:%s", |
| 98 | +"cdn:downloads.arduino.cc", |
| 99 | +"project:arduino-ide" |
| 100 | +] |
| 101 | +}, |
| 102 | +EOM |
| 103 | + |
| 104 | +datapoints="[" |
| 105 | +for row in $(echo "${queryResult}" | jq 'del(.ResultSet.Rows[0])' | jq -r '.ResultSet.Rows[] | .Data' --compact-output); do |
| 106 | + value=$(jq -r ".[1].VarCharValue" <<<"${row}") |
| 107 | + tag=$(jq -r ".[0].VarCharValue" <<<"${row}") |
| 108 | + # Some splitting to obtain 0.6.0, Windows, 32bit elements from string 0.6.0_Windows_32bit.zip |
| 109 | + split=($(echo "$tag" | tr '_' '\n')) |
| 110 | + if [[ ${#split[@]} -ne 3 ]]; then |
| 111 | + continue |
| 112 | + fi |
| 113 | + archSplit=($(echo "${split[2]}" | tr '.' '\n')) |
| 114 | + datapoints+=$(printf "${jsonTemplate}" "${value}" "${split[0]}" "${split[1]}" "${archSplit[0]}") |
| 115 | +done |
| 116 | +datapoints="${datapoints::-1}]" |
| 117 | + |
| 118 | +echo "::set-output name=result::$(jq --compact-output <<<"${datapoints}")" |
0 commit comments