Skip to content

Conversation

ilanasegall
Copy link
Contributor

Description

This PR adds backfill for reported content

Related Tickets & Documents

*DS-4333

Reviewer, please follow this checklist

@ilanasegall ilanasegall reopened this Aug 25, 2025
@dataops-ci-bot
Copy link

Integration report for "add backfill for reported content"

sql.diff

Click to expand!
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_newtab.py /tmp/workspace/generated-sql/dags/bqetl_newtab.py
--- /tmp/workspace/main-generated-sql/dags/bqetl_newtab.py	2025-08-25 20:55:18.000000000 +0000
+++ /tmp/workspace/generated-sql/dags/bqetl_newtab.py	2025-08-25 20:58:30.000000000 +0000
@@ -171,6 +171,21 @@
         depends_on_past=False,
     )
 
+    firefox_desktop_derived__report_content__v1 = bigquery_etl_query(
+        task_id="firefox_desktop_derived__report_content__v1",
+        destination_table="report_content_v1",
+        dataset_id="firefox_desktop_derived",
+        project_id="moz-fx-data-shared-prod",
+        owner="[email protected]",
+        email=[
+            "[email protected]",
+            "[email protected]",
+            "[email protected]",
+        ],
+        date_partition_parameter="submission_date",
+        depends_on_past=False,
+    )
+
     telemetry_derived__newtab_clients_daily__v1 = bigquery_etl_query(
         task_id="telemetry_derived__newtab_clients_daily__v1",
         destination_table="newtab_clients_daily_v1",
@@ -308,6 +323,10 @@
         wait_for_copy_deduplicate_all
     )
 
+    firefox_desktop_derived__report_content__v1.set_upstream(
+        wait_for_copy_deduplicate_all
+    )
+
     telemetry_derived__newtab_clients_daily__v1.set_upstream(
         telemetry_derived__newtab_visits__v1
     )
Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived: report_content_v1
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/backfill.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/backfill.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/backfill.yaml	1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/backfill.yaml	2025-08-25 20:46:48.000000000 +0000
@@ -0,0 +1,11 @@
+2025-08-25:
+  start_date: 2025-03-31
+  end_date: 2025-07-31
+  reason: backfill for request in DS-4333
+  watchers:
+  - [email protected]
+  status: Initiate
+  shredder_mitigation: false
+  override_retention_limit: false
+  override_depends_on_past_end_date: false
+  ignore_date_partition_offset: false
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/metadata.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/metadata.yaml	1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/metadata.yaml	2025-08-25 20:51:43.000000000 +0000
@@ -0,0 +1,39 @@
+friendly_name: Reported Content
+description: |-
+  Reported organic content
+
+  For each report, we collect
+    submission date
+    card type
+    corpus item id
+    report reason
+    section
+    section position
+    title
+    topic
+    url
+owners:
+- [email protected]
+labels:
+  application: newtab
+  incremental: true
+  schedule: daily
+  dag: bqetl_newtab
+  owner1: isegall
+  table_type: event_level
+scheduling:
+  dag_name: bqetl_newtab
+bigquery:
+  time_partitioning:
+    type: day
+    field: submission_date
+    require_partition_filter: true
+    expiration_days: null
+  range_partitioning: null
+  clustering: null
+workgroup_access:
+- role: roles/bigquery.dataViewer
+  members:
+  - workgroup:mozilla-confidential
+references: {}
+require_column_descriptions: false
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/query.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/query.sql	1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/query.sql	2025-08-25 20:46:48.000000000 +0000
@@ -0,0 +1,24 @@
+WITH events AS (
+  SELECT
+    DATE(submission_timestamp) AS submission_date,
+    mozfun.map.get_key(event.extra, 'card_type') AS card_type,
+    mozfun.map.get_key(event.extra, 'corpus_item_id') AS corpus_item_id,
+    mozfun.map.get_key(event.extra, 'report_reason') AS report_reason,
+    mozfun.map.get_key(event.extra, 'section') AS section,
+    mozfun.map.get_key(event.extra, 'section_position') AS section_position,
+    mozfun.map.get_key(event.extra, 'title') AS title,
+    mozfun.map.get_key(event.extra, 'topic') AS topic,
+    mozfun.map.get_key(event.extra, 'url') AS url
+  FROM
+    `moz-fx-data-shared-prod.firefox_desktop.newtab` AS e
+  CROSS JOIN
+    UNNEST(e.events) AS event
+  WHERE
+    DATE(submission_timestamp) = @submission_date
+    AND event.category = 'newtab'
+    AND event.name = 'report_content_submit'
+)
+SELECT
+  *
+FROM
+  events
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/schema.yaml	1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/report_content_v1/schema.yaml	2025-08-25 20:46:48.000000000 +0000
@@ -0,0 +1,37 @@
+fields:
+- name: submission_date
+  type: DATE
+  mode: NULLABLE
+  description: Day the event was received in the newtab content ping
+- name: card_type
+  type: STRING
+  mode: NULLABLE
+  description: The type of the content card (e.g., "spoc", "organic")
+- name: corpus_item_id
+  type: STRING
+  mode: NULLABLE
+  description: content identifier
+- name: section
+  type: STRING
+  mode: NULLABLE
+  description: If click belongs in a section, the name of the section
+- name: section_position
+  type: STRING
+  mode: NULLABLE
+  description: If click belongs in a section, the numeric position of the section
+- name: report_reason
+  type: STRING
+  mode: NULLABLE
+  description: The reason selected by the user when reporting the content
+- name: title
+  type: STRING
+  mode: NULLABLE
+  description: Title of the recommendation.
+- name: topic
+  type: STRING
+  mode: NULLABLE
+  description: The topic of the recommendation. Like "entertainment".
+- name: url
+  type: STRING
+  description: URL of the recommendation.
+  mode: NULLABLE

Link to full diff

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants