diff --git a/sql/moz-fx-data-shared-prod/mdn_yari_derived/mdn_popularities_v1/query.py b/sql/moz-fx-data-shared-prod/mdn_yari_derived/mdn_popularities_v1/query.py index 50c9c0510b9..e28c1610618 100644 --- a/sql/moz-fx-data-shared-prod/mdn_yari_derived/mdn_popularities_v1/query.py +++ b/sql/moz-fx-data-shared-prod/mdn_yari_derived/mdn_popularities_v1/query.py @@ -10,24 +10,14 @@ from google.cloud import bigquery, storage QUERY_TEMPLATE = """\ -WITH events_stream AS - (SELECT JSON_VALUE(event_extra.url) AS url - FROM `moz-fx-data-shared-prod.mdn_fred.events_stream` - WHERE DATE(submission_timestamp) BETWEEN DATE_TRUNC(@submission_date, MONTH) AND LAST_DAY(@submission_date) - AND client_info.app_channel = 'prod' - AND event_name = 'page_load' - AND JSON_VALUE(event_extra.url) LIKE "https://developer.mozilla.org/%/docs/%" - AND JSON_VALUE(event_extra.title) != 'Page not found | MDN' - UNION ALL SELECT JSON_VALUE(event_extra.url) AS url - FROM `moz-fx-data-shared-prod.mdn_yari.events_stream` - WHERE DATE(submission_timestamp) BETWEEN DATE_TRUNC(@submission_date, MONTH) AND LAST_DAY(@submission_date) - AND client_info.app_channel = 'prod' - AND event_name = 'page_load' - AND JSON_VALUE(event_extra.url) LIKE "https://developer.mozilla.org/%/docs/%" - AND JSON_VALUE(event_extra.title) NOT LIKE '%Page not found | MDN' ) -SELECT REGEXP_EXTRACT(url, r'^https://developer.mozilla.org(/.+?/docs/[^?#]+)') AS Page, +SELECT REGEXP_EXTRACT(JSON_VALUE(event_extra.url), r'^https://developer.mozilla.org(/.+?/docs/[^?#]+)') AS Page, COUNT(*) AS Pageviews -FROM events_stream +FROM `moz-fx-data-shared-prod.mdn_fred.events_stream` +WHERE DATE(submission_timestamp) BETWEEN DATE_TRUNC(@submission_date, MONTH) AND LAST_DAY(@submission_date) + AND client_info.app_channel = 'prod' + AND event_name = 'page_load' + AND JSON_VALUE(event_extra.url) LIKE "https://developer.mozilla.org/%/docs/%" + AND JSON_VALUE(event_extra.title) != 'Page not found | MDN' GROUP BY Page ORDER BY Pageviews DESC """