Skip to content

Commit 4cc34e4

Browse files
committed
Back to old commit
1 parent f91d286 commit 4cc34e4

File tree

1 file changed

+20
-40
lines changed

1 file changed

+20
-40
lines changed

alloydb/notebooks/batch_embeddings_update.ipynb

+20-40
Original file line numberDiff line numberDiff line change
@@ -55,22 +55,6 @@
5555
"* A Google Cloud Account and Google Cloud Project"
5656
]
5757
},
58-
{
59-
"cell_type": "markdown",
60-
"metadata": {
61-
"id": "vHdR4fF3vLWA"
62-
},
63-
"source": [
64-
"## Objectives\n",
65-
"\n",
66-
"In the following instructions you will learn to:\n",
67-
"\n",
68-
"1. Install required dependencies for our application\n",
69-
"2. Set up authentication for our project\n",
70-
"3. Set up a AlloyDB for PostgreSQL Instance\n",
71-
"4. Import the data used by our application"
72-
]
73-
},
7458
{
7559
"cell_type": "markdown",
7660
"metadata": {
@@ -382,6 +366,17 @@
382366
" return pool"
383367
]
384368
},
369+
{
370+
"cell_type": "code",
371+
"execution_count": null,
372+
"metadata": {},
373+
"outputs": [],
374+
"source": [
375+
"from google.cloud.alloydb.connector import AsyncConnector\n",
376+
"\n",
377+
"connector = AsyncConnector()"
378+
]
379+
},
385380
{
386381
"cell_type": "markdown",
387382
"metadata": {
@@ -405,15 +400,9 @@
405400
},
406401
"outputs": [],
407402
"source": [
408-
"from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine\n",
409403
"from sqlalchemy import text, exc\n",
410404
"\n",
411-
"from google.cloud.alloydb.connector import AsyncConnector, IPTypes\n",
412-
"\n",
413-
"async def create_db(database_name):\n",
414-
" # Get a raw connection directly from the connector\n",
415-
" connector = AsyncConnector()\n",
416-
" connection_string = f\"projects/{project_id}/locations/{region}/clusters/{cluster_name}/instances/{instance_name}\"\n",
405+
"async def create_db(database_name, connector): \n",
417406
" pool = await init_connection_pool(connector, \"postgres\")\n",
418407
" async with pool.connect() as conn:\n",
419408
" try:\n",
@@ -423,7 +412,7 @@
423412
" except exc.ProgrammingError:\n",
424413
" print(f\"Database '{database_name}' already exists\")\n",
425414
"\n",
426-
"await create_db(database_name=database_name)"
415+
"await create_db(database_name=database_name, connector=connector)"
427416
]
428417
},
429418
{
@@ -600,7 +589,7 @@
600589
" \"overview\": row[\"overview\"],\n",
601590
" \"analysis\": row[\"analysis\"],\n",
602591
" }\n",
603-
" for index, row in df.iterrows()\n",
592+
" for _, row in df.iterrows()\n",
604593
"]"
605594
]
606595
},
@@ -614,8 +603,6 @@
614603
"source": [
615604
"from google.cloud.alloydb.connector import AsyncConnector\n",
616605
"\n",
617-
"connector = AsyncConnector()\n",
618-
"\n",
619606
"# Create table and insert data\n",
620607
"async def insert_data(pool):\n",
621608
" async with pool.connect() as db_conn:\n",
@@ -1042,7 +1029,6 @@
10421029
"source": [
10431030
"import vertexai\n",
10441031
"import time\n",
1045-
"import asyncio\n",
10461032
"from vertexai.language_models import TextEmbeddingModel\n",
10471033
"\n",
10481034
"pool_size = 10\n",
@@ -1051,7 +1037,6 @@
10511037
"total_char_count = 0\n",
10521038
"\n",
10531039
"# Set up connections to the database\n",
1054-
"connector = AsyncConnector()\n",
10551040
"pool = await init_connection_pool(connector, database_name, pool_size=pool_size)\n",
10561041
"\n",
10571042
"# Initialise VertexAI and the model to be used to generate embeddings\n",
@@ -1067,10 +1052,14 @@
10671052
"batch_data = batch_source_data(source_data, cols_to_embed)\n",
10681053
"\n",
10691054
"# Generate embeddings for the batched data concurrently\n",
1070-
"embeddings_data = embed_objects_concurrently(cols_to_embed, batch_data, model, task, max_concurrency=embed_data_concurrency)\n",
1055+
"embeddings_data = embed_objects_concurrently(\n",
1056+
" cols_to_embed, batch_data, model, task, max_concurrency=embed_data_concurrency\n",
1057+
")\n",
10711058
"\n",
10721059
"# Update the database with the generated embeddings concurrently\n",
1073-
"await batch_update_rows_concurrently(pool, embeddings_data, cols_to_embed, max_concurrency=batch_update_concurrency)\n",
1060+
"await batch_update_rows_concurrently(\n",
1061+
" pool, embeddings_data, cols_to_embed, max_concurrency=batch_update_concurrency\n",
1062+
")\n",
10741063
"\n",
10751064
"end_time = time.monotonic()\n",
10761065
"elapsed_time = end_time - start_time\n",
@@ -1084,15 +1073,6 @@
10841073
"print(f\"Total run time: {elapsed_time:.2f} seconds\")\n",
10851074
"print(f\"Total characters embedded: {total_char_count}\")"
10861075
]
1087-
},
1088-
{
1089-
"cell_type": "code",
1090-
"execution_count": 41,
1091-
"metadata": {
1092-
"id": "fzZJsWRZAMxs"
1093-
},
1094-
"outputs": [],
1095-
"source": []
10961076
}
10971077
],
10981078
"metadata": {

0 commit comments

Comments
 (0)