|
55 | 55 | "* A Google Cloud Account and Google Cloud Project"
|
56 | 56 | ]
|
57 | 57 | },
|
58 |
| - { |
59 |
| - "cell_type": "markdown", |
60 |
| - "metadata": { |
61 |
| - "id": "vHdR4fF3vLWA" |
62 |
| - }, |
63 |
| - "source": [ |
64 |
| - "## Objectives\n", |
65 |
| - "\n", |
66 |
| - "In the following instructions you will learn to:\n", |
67 |
| - "\n", |
68 |
| - "1. Install required dependencies for our application\n", |
69 |
| - "2. Set up authentication for our project\n", |
70 |
| - "3. Set up a AlloyDB for PostgreSQL Instance\n", |
71 |
| - "4. Import the data used by our application" |
72 |
| - ] |
73 |
| - }, |
74 | 58 | {
|
75 | 59 | "cell_type": "markdown",
|
76 | 60 | "metadata": {
|
|
382 | 366 | " return pool"
|
383 | 367 | ]
|
384 | 368 | },
|
| 369 | + { |
| 370 | + "cell_type": "code", |
| 371 | + "execution_count": null, |
| 372 | + "metadata": {}, |
| 373 | + "outputs": [], |
| 374 | + "source": [ |
| 375 | + "from google.cloud.alloydb.connector import AsyncConnector\n", |
| 376 | + "\n", |
| 377 | + "connector = AsyncConnector()" |
| 378 | + ] |
| 379 | + }, |
385 | 380 | {
|
386 | 381 | "cell_type": "markdown",
|
387 | 382 | "metadata": {
|
|
405 | 400 | },
|
406 | 401 | "outputs": [],
|
407 | 402 | "source": [
|
408 |
| - "from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine\n", |
409 | 403 | "from sqlalchemy import text, exc\n",
|
410 | 404 | "\n",
|
411 |
| - "from google.cloud.alloydb.connector import AsyncConnector, IPTypes\n", |
412 |
| - "\n", |
413 |
| - "async def create_db(database_name):\n", |
414 |
| - " # Get a raw connection directly from the connector\n", |
415 |
| - " connector = AsyncConnector()\n", |
416 |
| - " connection_string = f\"projects/{project_id}/locations/{region}/clusters/{cluster_name}/instances/{instance_name}\"\n", |
| 405 | + "async def create_db(database_name, connector): \n", |
417 | 406 | " pool = await init_connection_pool(connector, \"postgres\")\n",
|
418 | 407 | " async with pool.connect() as conn:\n",
|
419 | 408 | " try:\n",
|
|
423 | 412 | " except exc.ProgrammingError:\n",
|
424 | 413 | " print(f\"Database '{database_name}' already exists\")\n",
|
425 | 414 | "\n",
|
426 |
| - "await create_db(database_name=database_name)" |
| 415 | + "await create_db(database_name=database_name, connector=connector)" |
427 | 416 | ]
|
428 | 417 | },
|
429 | 418 | {
|
|
600 | 589 | " \"overview\": row[\"overview\"],\n",
|
601 | 590 | " \"analysis\": row[\"analysis\"],\n",
|
602 | 591 | " }\n",
|
603 |
| - " for index, row in df.iterrows()\n", |
| 592 | + " for _, row in df.iterrows()\n", |
604 | 593 | "]"
|
605 | 594 | ]
|
606 | 595 | },
|
|
614 | 603 | "source": [
|
615 | 604 | "from google.cloud.alloydb.connector import AsyncConnector\n",
|
616 | 605 | "\n",
|
617 |
| - "connector = AsyncConnector()\n", |
618 |
| - "\n", |
619 | 606 | "# Create table and insert data\n",
|
620 | 607 | "async def insert_data(pool):\n",
|
621 | 608 | " async with pool.connect() as db_conn:\n",
|
|
1042 | 1029 | "source": [
|
1043 | 1030 | "import vertexai\n",
|
1044 | 1031 | "import time\n",
|
1045 |
| - "import asyncio\n", |
1046 | 1032 | "from vertexai.language_models import TextEmbeddingModel\n",
|
1047 | 1033 | "\n",
|
1048 | 1034 | "pool_size = 10\n",
|
|
1051 | 1037 | "total_char_count = 0\n",
|
1052 | 1038 | "\n",
|
1053 | 1039 | "# Set up connections to the database\n",
|
1054 |
| - "connector = AsyncConnector()\n", |
1055 | 1040 | "pool = await init_connection_pool(connector, database_name, pool_size=pool_size)\n",
|
1056 | 1041 | "\n",
|
1057 | 1042 | "# Initialise VertexAI and the model to be used to generate embeddings\n",
|
|
1067 | 1052 | "batch_data = batch_source_data(source_data, cols_to_embed)\n",
|
1068 | 1053 | "\n",
|
1069 | 1054 | "# Generate embeddings for the batched data concurrently\n",
|
1070 |
| - "embeddings_data = embed_objects_concurrently(cols_to_embed, batch_data, model, task, max_concurrency=embed_data_concurrency)\n", |
| 1055 | + "embeddings_data = embed_objects_concurrently(\n", |
| 1056 | + " cols_to_embed, batch_data, model, task, max_concurrency=embed_data_concurrency\n", |
| 1057 | + ")\n", |
1071 | 1058 | "\n",
|
1072 | 1059 | "# Update the database with the generated embeddings concurrently\n",
|
1073 |
| - "await batch_update_rows_concurrently(pool, embeddings_data, cols_to_embed, max_concurrency=batch_update_concurrency)\n", |
| 1060 | + "await batch_update_rows_concurrently(\n", |
| 1061 | + " pool, embeddings_data, cols_to_embed, max_concurrency=batch_update_concurrency\n", |
| 1062 | + ")\n", |
1074 | 1063 | "\n",
|
1075 | 1064 | "end_time = time.monotonic()\n",
|
1076 | 1065 | "elapsed_time = end_time - start_time\n",
|
|
1084 | 1073 | "print(f\"Total run time: {elapsed_time:.2f} seconds\")\n",
|
1085 | 1074 | "print(f\"Total characters embedded: {total_char_count}\")"
|
1086 | 1075 | ]
|
1087 |
| - }, |
1088 |
| - { |
1089 |
| - "cell_type": "code", |
1090 |
| - "execution_count": 41, |
1091 |
| - "metadata": { |
1092 |
| - "id": "fzZJsWRZAMxs" |
1093 |
| - }, |
1094 |
| - "outputs": [], |
1095 |
| - "source": [] |
1096 | 1076 | }
|
1097 | 1077 | ],
|
1098 | 1078 | "metadata": {
|
|
0 commit comments