From ef26dfc1d59d949af8ceea1b08d70bb34ee69972 Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Tue, 6 Aug 2024 18:11:41 +0000 Subject: [PATCH] docs: update streaming notebook --- notebooks/streaming/streaming_dataframe.ipynb | 203 +++++++++++------- 1 file changed, 127 insertions(+), 76 deletions(-) diff --git a/notebooks/streaming/streaming_dataframe.ipynb b/notebooks/streaming/streaming_dataframe.ipynb index d4cc255fa5..9b52c2d71e 100644 --- a/notebooks/streaming/streaming_dataframe.ipynb +++ b/notebooks/streaming/streaming_dataframe.ipynb @@ -17,10 +17,22 @@ "cell_type": "code", "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'1.13.0'" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import bigframes\n", - "import bigframes.streaming as bst" + "# make sure bigframes version >= 1.12.0\n", + "bigframes.__version__" ] }, { @@ -29,10 +41,46 @@ "metadata": {}, "outputs": [], "source": [ - "bigframes.options._bigquery_options.project = \"bigframes-load-testing\"\n", + "import bigframes.pandas as bpd\n", + "import bigframes.streaming as bst\n", + "bigframes.options._bigquery_options.project = \"bigframes-load-testing\" # Change to your own project ID\n", "job_id_prefix = \"test_streaming_\"" ] }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Query job 65df3a2f-cda8-405d-8b38-20a755f9b9a0 is DONE. 28.9 kB processed. Open Job" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'birds.penguins_bigtable_streaming'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Copy a table from the public dataset for streaming jobs. Any changes to the table can be reflected in the streaming destination.\n", + "df = bpd.read_gbq(\"bigquery-public-data.ml_datasets.penguins\")\n", + "df.to_gbq(\"birds.penguins_bigtable_streaming\", if_exists=\"replace\")" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -43,13 +91,15 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ + "/usr/local/google/home/garrettwu/src/bigframes/bigframes/session/__init__.py:773: PreviewWarning: The bigframes.streaming module is a preview feature, and subject to change.\n", + " warnings.warn(\n", "/usr/local/google/home/garrettwu/src/bigframes/bigframes/core/blocks.py:126: NullIndexPreviewWarning: Creating object with Null Index. Null Index is a preview feature.\n", " warnings.warn(\n" ] @@ -61,7 +111,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -82,7 +132,7 @@ { "data": { "text/html": [ - "Query job d57200dd-e6f1-42c7-876b-7f4a54994ae6 is DONE. 0 Bytes processed. Open Job" + "Query job dd20bd9d-4844-43e4-86ab-95759d7e673a is DONE. 2.7 kB processed. Open Job" ], "text/plain": [ "" @@ -104,7 +154,7 @@ { "data": { "text/html": [ - "Query job 1decce4a-eb32-49f4-8e47-7bda0220037a is DONE. 28.9 kB processed. Open Job" + "Query job 873e44ee-76e9-4254-83d3-04cf36fbd140 is DONE. 28.9 kB processed. Open Job" ], "text/plain": [ "" @@ -144,151 +194,151 @@ " 0\n", " Adelie Penguin (Pygoscelis adeliae)\n", " Torgersen\n", - " 3875\n", + " 3875.0\n", " \n", " \n", " 1\n", " Adelie Penguin (Pygoscelis adeliae)\n", " Torgersen\n", - " 2900\n", + " 2900.0\n", " \n", " \n", " 2\n", " Adelie Penguin (Pygoscelis adeliae)\n", " Biscoe\n", - " 3725\n", + " 3725.0\n", " \n", " \n", " 3\n", " Adelie Penguin (Pygoscelis adeliae)\n", " Dream\n", - " 2975\n", + " 2975.0\n", " \n", " \n", " 4\n", " Adelie Penguin (Pygoscelis adeliae)\n", " Torgersen\n", - " 3050\n", + " 3050.0\n", " \n", " \n", " 5\n", " Chinstrap penguin (Pygoscelis antarctica)\n", " Dream\n", - " 2700\n", + " 2700.0\n", " \n", " \n", " 6\n", " Adelie Penguin (Pygoscelis adeliae)\n", " Dream\n", - " 3900\n", + " 3900.0\n", " \n", " \n", " 7\n", " Adelie Penguin (Pygoscelis adeliae)\n", " Biscoe\n", - " 3825\n", + " 3825.0\n", " \n", " \n", " 8\n", " Chinstrap penguin (Pygoscelis antarctica)\n", " Dream\n", - " 3775\n", + " 3775.0\n", " \n", " \n", " 9\n", " Adelie Penguin (Pygoscelis adeliae)\n", " Dream\n", - " 3350\n", + " 3350.0\n", " \n", " \n", " 10\n", " Adelie Penguin (Pygoscelis adeliae)\n", " Biscoe\n", - " 3900\n", + " 3900.0\n", " \n", " \n", " 11\n", " Adelie Penguin (Pygoscelis adeliae)\n", " Torgersen\n", - " 3650\n", + " 3650.0\n", " \n", " \n", " 12\n", " Adelie Penguin (Pygoscelis adeliae)\n", " Biscoe\n", - " 3200\n", + " 3200.0\n", " \n", " \n", " 13\n", " Chinstrap penguin (Pygoscelis antarctica)\n", " Dream\n", - " 3650\n", + " 3650.0\n", " \n", " \n", " 14\n", " Adelie Penguin (Pygoscelis adeliae)\n", " Dream\n", - " 3700\n", + " 3700.0\n", " \n", " \n", " 15\n", " Chinstrap penguin (Pygoscelis antarctica)\n", " Dream\n", - " 3800\n", + " 3800.0\n", " \n", " \n", " 16\n", " Chinstrap penguin (Pygoscelis antarctica)\n", " Dream\n", - " 3950\n", + " 3950.0\n", " \n", " \n", " 17\n", " Chinstrap penguin (Pygoscelis antarctica)\n", " Dream\n", - " 3350\n", + " 3350.0\n", " \n", " \n", " 18\n", " Adelie Penguin (Pygoscelis adeliae)\n", " Dream\n", - " 3100\n", + " 3100.0\n", " \n", " \n", " 19\n", " Chinstrap penguin (Pygoscelis antarctica)\n", " Dream\n", - " 3750\n", + " 3750.0\n", " \n", " \n", " 20\n", " Adelie Penguin (Pygoscelis adeliae)\n", " Biscoe\n", - " 3550\n", + " 3550.0\n", " \n", " \n", " 21\n", " Chinstrap penguin (Pygoscelis antarctica)\n", " Dream\n", - " 3400\n", + " 3400.0\n", " \n", " \n", " 22\n", " Adelie Penguin (Pygoscelis adeliae)\n", " Torgersen\n", - " 3450\n", + " 3450.0\n", " \n", " \n", " 23\n", " Adelie Penguin (Pygoscelis adeliae)\n", " Torgersen\n", - " 3600\n", + " 3600.0\n", " \n", " \n", " 24\n", " Chinstrap penguin (Pygoscelis antarctica)\n", " Dream\n", - " 3650\n", + " 3650.0\n", " \n", " \n", "\n", @@ -297,37 +347,37 @@ ], "text/plain": [ " species rowkey body_mass_g\n", - " Adelie Penguin (Pygoscelis adeliae) Torgersen 3875\n", - " Adelie Penguin (Pygoscelis adeliae) Torgersen 2900\n", - " Adelie Penguin (Pygoscelis adeliae) Biscoe 3725\n", - " Adelie Penguin (Pygoscelis adeliae) Dream 2975\n", - " Adelie Penguin (Pygoscelis adeliae) Torgersen 3050\n", - "Chinstrap penguin (Pygoscelis antarctica) Dream 2700\n", - " Adelie Penguin (Pygoscelis adeliae) Dream 3900\n", - " Adelie Penguin (Pygoscelis adeliae) Biscoe 3825\n", - "Chinstrap penguin (Pygoscelis antarctica) Dream 3775\n", - " Adelie Penguin (Pygoscelis adeliae) Dream 3350\n", - " Adelie Penguin (Pygoscelis adeliae) Biscoe 3900\n", - " Adelie Penguin (Pygoscelis adeliae) Torgersen 3650\n", - " Adelie Penguin (Pygoscelis adeliae) Biscoe 3200\n", - "Chinstrap penguin (Pygoscelis antarctica) Dream 3650\n", - " Adelie Penguin (Pygoscelis adeliae) Dream 3700\n", - "Chinstrap penguin (Pygoscelis antarctica) Dream 3800\n", - "Chinstrap penguin (Pygoscelis antarctica) Dream 3950\n", - "Chinstrap penguin (Pygoscelis antarctica) Dream 3350\n", - " Adelie Penguin (Pygoscelis adeliae) Dream 3100\n", - "Chinstrap penguin (Pygoscelis antarctica) Dream 3750\n", - " Adelie Penguin (Pygoscelis adeliae) Biscoe 3550\n", - "Chinstrap penguin (Pygoscelis antarctica) Dream 3400\n", - " Adelie Penguin (Pygoscelis adeliae) Torgersen 3450\n", - " Adelie Penguin (Pygoscelis adeliae) Torgersen 3600\n", - "Chinstrap penguin (Pygoscelis antarctica) Dream 3650\n", + " Adelie Penguin (Pygoscelis adeliae) Torgersen 3875.0\n", + " Adelie Penguin (Pygoscelis adeliae) Torgersen 2900.0\n", + " Adelie Penguin (Pygoscelis adeliae) Biscoe 3725.0\n", + " Adelie Penguin (Pygoscelis adeliae) Dream 2975.0\n", + " Adelie Penguin (Pygoscelis adeliae) Torgersen 3050.0\n", + "Chinstrap penguin (Pygoscelis antarctica) Dream 2700.0\n", + " Adelie Penguin (Pygoscelis adeliae) Dream 3900.0\n", + " Adelie Penguin (Pygoscelis adeliae) Biscoe 3825.0\n", + "Chinstrap penguin (Pygoscelis antarctica) Dream 3775.0\n", + " Adelie Penguin (Pygoscelis adeliae) Dream 3350.0\n", + " Adelie Penguin (Pygoscelis adeliae) Biscoe 3900.0\n", + " Adelie Penguin (Pygoscelis adeliae) Torgersen 3650.0\n", + " Adelie Penguin (Pygoscelis adeliae) Biscoe 3200.0\n", + "Chinstrap penguin (Pygoscelis antarctica) Dream 3650.0\n", + " Adelie Penguin (Pygoscelis adeliae) Dream 3700.0\n", + "Chinstrap penguin (Pygoscelis antarctica) Dream 3800.0\n", + "Chinstrap penguin (Pygoscelis antarctica) Dream 3950.0\n", + "Chinstrap penguin (Pygoscelis antarctica) Dream 3350.0\n", + " Adelie Penguin (Pygoscelis adeliae) Dream 3100.0\n", + "Chinstrap penguin (Pygoscelis antarctica) Dream 3750.0\n", + " Adelie Penguin (Pygoscelis adeliae) Biscoe 3550.0\n", + "Chinstrap penguin (Pygoscelis antarctica) Dream 3400.0\n", + " Adelie Penguin (Pygoscelis adeliae) Torgersen 3450.0\n", + " Adelie Penguin (Pygoscelis adeliae) Torgersen 3600.0\n", + "Chinstrap penguin (Pygoscelis antarctica) Dream 3650.0\n", "...\n", "\n", "[165 rows x 3 columns]" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -351,22 +401,22 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/garrettwu/src/bigframes/bigframes/streaming/dataframe.py:338: PreviewWarning: The bigframes.streaming module is a preview feature, and subject to change.\n", + "/usr/local/google/home/garrettwu/src/bigframes/bigframes/streaming/dataframe.py:341: PreviewWarning: The bigframes.streaming module is a preview feature, and subject to change.\n", " warnings.warn(\n" ] } ], "source": [ - "job = sdf.to_bigtable(instance=\"streaming-testing-instance\",\n", - " table=\"garrettwu-no-col-family\",\n", - " service_account_email=\"streaming-testing-admin@bigframes-load-testing.iam.gserviceaccount.com\",\n", + "job = sdf.to_bigtable(instance=\"streaming-testing-instance\", # Change to your own Bigtable instance name\n", + " table=\"garrettwu-no-col-family\", # Change to your own Bigtable table name\n", + " service_account_email=\"streaming-testing-admin@bigframes-load-testing.iam.gserviceaccount.com\", # Change to your own service account\n", " app_profile=None,\n", " truncate=True,\n", " overwrite=True,\n", @@ -378,7 +428,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -397,7 +447,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -406,7 +456,7 @@ "True" ] }, - "execution_count": 7, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -419,13 +469,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### PubSub\n", - "Create Pubsub streaming job" + "### Pub/Sub\n", + "Create Pub/Sub streaming job" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -438,27 +488,28 @@ } ], "source": [ + "# Pub/Sub requires a single column\n", "sdf = sdf[[\"rowkey\"]]" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/garrettwu/src/bigframes/bigframes/streaming/dataframe.py:453: PreviewWarning: The bigframes.streaming module is a preview feature, and subject to change.\n", + "/usr/local/google/home/garrettwu/src/bigframes/bigframes/streaming/dataframe.py:456: PreviewWarning: The bigframes.streaming module is a preview feature, and subject to change.\n", " warnings.warn(\n" ] } ], "source": [ "job = sdf.to_pubsub(\n", - " topic=\"penguins\",\n", - " service_account_email=\"streaming-testing@bigframes-load-testing.iam.gserviceaccount.com\",\n", + " topic=\"penguins\", # Change to your own Pub/Sub topic ID\n", + " service_account_email=\"streaming-testing@bigframes-load-testing.iam.gserviceaccount.com\", # Change to your own service account\n", " job_id=None,\n", " job_id_prefix=job_id_prefix,\n", " )" @@ -466,7 +517,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -485,7 +536,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -494,7 +545,7 @@ "True" ] }, - "execution_count": 11, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" }