diff --git a/.github/tools/fetch_athena_stats.py b/.github/tools/fetch_athena_stats.py deleted file mode 100644 index 1f3140359..000000000 --- a/.github/tools/fetch_athena_stats.py +++ /dev/null @@ -1,131 +0,0 @@ -import boto3 -import semver -import os -import logging -import uuid -import time - - -# logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) -log = logging.getLogger() -logging.getLogger("boto3").setLevel(logging.CRITICAL) -logging.getLogger("botocore").setLevel(logging.CRITICAL) -logging.getLogger("urllib3").setLevel(logging.CRITICAL) - - -def execute(client, statement, dest_s3_output_location): - log.info("execute query: {} dumping in {}".format(statement, dest_s3_output_location)) - result = client.start_query_execution( - QueryString=statement, - ClientRequestToken=str(uuid.uuid4()), - ResultConfiguration={ - "OutputLocation": dest_s3_output_location, - }, - ) - execution_id = result["QueryExecutionId"] - log.info("wait for query {} completion".format(execution_id)) - wait_for_query_execution_completion(client, execution_id) - log.info("operation successful") - return execution_id - - -def wait_for_query_execution_completion(client, query_execution_id): - query_ended = False - while not query_ended: - query_execution = client.get_query_execution(QueryExecutionId=query_execution_id) - state = query_execution["QueryExecution"]["Status"]["State"] - if state == "SUCCEEDED": - query_ended = True - elif state in ["FAILED", "CANCELLED"]: - raise BaseException( - "query failed or canceled: {}".format(query_execution["QueryExecution"]["Status"]["StateChangeReason"]) - ) - else: - time.sleep(1) - - -def valid(key): - split = key.split("_") - if len(split) < 1: - return False - try: - semver.parse(split[0]) - except ValueError: - return False - return True - - -def get_results(client, execution_id): - results_paginator = client.get_paginator("get_query_results") - results_iter = results_paginator.paginate(QueryExecutionId=execution_id, PaginationConfig={"PageSize": 1000}) - res = {} - for results_page in results_iter: - for row in results_page["ResultSet"]["Rows"][1:]: - # Loop through the JSON objects - key = row["Data"][0]["VarCharValue"] - if valid(key): - res[key] = row["Data"][1]["VarCharValue"] - - return res - - -def convert_data(data): - result = [] - for key, value in data.items(): - # 0.18.0_macOS_64bit.tar.gz - split_key = key.split("_") - if len(split_key) != 3: - continue - (version, os_version, arch) = split_key - arch_split = arch.split(".") - if len(arch_split) < 1: - continue - arch = arch_split[0] - if len(arch) > 10: - # This can't be an architecture really. - # It's an ugly solution but works for now so deal with it. - continue - repo = os.environ["GITHUB_REPOSITORY"].split("/")[1] - result.append( - { - "type": "gauge", - "name": "arduino.downloads.total", - "value": value, - "host": os.environ["GITHUB_REPOSITORY"], - "tags": [ - f"version:{version}", - f"os:{os_version}", - f"arch:{arch}", - "cdn:downloads.arduino.cc", - f"project:{repo}", - ], - } - ) - - return result - - -if __name__ == "__main__": - DEST_S3_OUTPUT = os.environ["AWS_ATHENA_OUTPUT_LOCATION"] - AWS_ATHENA_SOURCE_TABLE = os.environ["AWS_ATHENA_SOURCE_TABLE"] - - session = boto3.session.Session(region_name="us-east-1") - athena_client = session.client("athena") - - # Load all partitions before querying downloads - execute(athena_client, f"MSCK REPAIR TABLE {AWS_ATHENA_SOURCE_TABLE};", DEST_S3_OUTPUT) - - query = f"""SELECT replace(json_extract_scalar(url_decode(url_decode(querystring)), -'$.data.url'), 'https://downloads.arduino.cc/arduino-ide/arduino-ide_', '') -AS flavor, count(json_extract(url_decode(url_decode(querystring)),'$')) AS gauge -FROM {AWS_ATHENA_SOURCE_TABLE} -WHERE json_extract_scalar(url_decode(url_decode(querystring)),'$.data.url') -LIKE 'https://downloads.arduino.cc/arduino-ide/arduino-ide_%' -AND json_extract_scalar(url_decode(url_decode(querystring)),'$.data.url') -NOT LIKE '%latest%' -- exclude latest redirect -group by 1 ;""" - exec_id = execute(athena_client, query, DEST_S3_OUTPUT) - results = get_results(athena_client, exec_id) - result_json = convert_data(results) - - print(f"::set-output name=result::{result_json}") diff --git a/.github/workflows/arduino-stats.yml b/.github/workflows/arduino-stats.yml deleted file mode 100644 index 167ab57a2..000000000 --- a/.github/workflows/arduino-stats.yml +++ /dev/null @@ -1,57 +0,0 @@ -name: arduino-stats - -on: - schedule: - # run every day at 07:00 AM, 03:00 PM and 11:00 PM - - cron: "0 7,15,23 * * *" - workflow_dispatch: - repository_dispatch: - -jobs: - push-stats: - # This workflow is only of value to the arduino/arduino-ide repository and - # would always fail in forks - if: github.repository == 'arduino/arduino-ide' - runs-on: ubuntu-latest - - steps: - - name: Checkout - uses: actions/checkout@v2 - - - uses: actions/setup-python@v2 - with: - python-version: '3.x' - - - name: Fetch downloads count form Arduino CDN using AWS Athena - id: fetch - env: - AWS_ACCESS_KEY_ID: ${{ secrets.STATS_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.STATS_AWS_SECRET_ACCESS_KEY }} - AWS_ATHENA_SOURCE_TABLE: ${{ secrets.STATS_AWS_ATHENA_SOURCE_TABLE }} - AWS_ATHENA_OUTPUT_LOCATION: ${{ secrets.STATS_AWS_ATHENA_OUTPUT_LOCATION }} - GITHUB_REPOSITORY: ${{ github.repository }} - run: | - pip install boto3 semver - python .github/tools/fetch_athena_stats.py - - - name: Send metrics - uses: masci/datadog@v1 - with: - api-key: ${{ secrets.DD_API_KEY }} - # Metrics input expects YAML but JSON will work just right. - metrics: ${{steps.fetch.outputs.result}} - - - name: Report failure - if: failure() - uses: masci/datadog@v1 - with: - api-key: ${{ secrets.DD_API_KEY }} - events: | - - title: "Arduino IDE stats failing" - text: "Stats collection failed" - alert_type: "error" - host: ${{ github.repository }} - tags: - - "project:arduino-ide" - - "cdn:downloads.arduino.cc" - - "workflow:${{ github.workflow }}" diff --git a/.github/workflows/github-stats.yml b/.github/workflows/github-stats.yml deleted file mode 100644 index c7c8a98af..000000000 --- a/.github/workflows/github-stats.yml +++ /dev/null @@ -1,96 +0,0 @@ -name: github-stats - -on: - schedule: - # run every 30 minutes - - cron: "*/30 * * * *" - workflow_dispatch: - repository_dispatch: - -jobs: - push-stats: - # This workflow is only of value to the arduino/arduino-ide repository and - # would always fail in forks - if: github.repository == 'arduino/arduino-ide' - runs-on: ubuntu-latest - - steps: - - name: Fetch downloads count - id: fetch - uses: actions/github-script@v4 - with: - github-token: ${{github.token}} - script: | - let metrics = [] - - // Get a list of releases - const opts = github.repos.listReleases.endpoint.merge({ - ...context.repo - }) - const releases = await github.paginate(opts) - - // Get download stats for every release - for (const rel of releases) { - // Names for assets are like `arduino-ide_2.0.0-beta.12_Linux_64bit.zip`, - // we'll use this later to split the asset file name more easily - const baseName = `arduino-ide_${rel.name}_` - - // Get a list of assets for this release - const opts = github.repos.listReleaseAssets.endpoint.merge({ - ...context.repo, - release_id: rel.id - }) - const assets = await github.paginate(opts) - - for (const asset of assets) { - // Ignore files that are not arduino-ide packages - if (!asset.name.startsWith(baseName)) { - continue - } - - // Strip the base and remove file extension to get `Linux_32bit` - systemArch = asset.name.replace(baseName, "").split(".")[0].split("_") - - // Add a metric object to the list of gathered metrics - metrics.push({ - "type": "gauge", - "name": "arduino.downloads.total", - "value": asset.download_count, - "host": "${{ github.repository }}", - "tags": [ - `version:${rel.name}`, - `os:${systemArch[0]}`, - `arch:${systemArch[1]}`, - "cdn:github.com", - "project:arduino-ide" - ] - }) - } - } - - // The action will put whatever we return from this function in - // `outputs.result`, JSON encoded. So we just return the array - // of objects and GitHub will do the rest. - return metrics - - - name: Send metrics - uses: masci/datadog@v1 - with: - api-key: ${{ secrets.DD_API_KEY }} - # Metrics input expects YAML but JSON will work just right. - metrics: ${{steps.fetch.outputs.result}} - - - name: Report failure - if: failure() - uses: masci/datadog@v1 - with: - api-key: ${{ secrets.DD_API_KEY }} - events: | - - title: "Arduino IDE stats failing" - text: "Stats collection failed" - alert_type: "error" - host: ${{ github.repository }} - tags: - - "project:arduino-ide" - - "cdn:github.com" - - "workflow:${{ github.workflow }}"