Skip to content

Commit 146989d

Browse files
authored
samples(discoveryengine): Updates to VAIS Data Store Samples (GoogleCloudPlatform#12121)
* samples(discoveryengine): Add Datastore get/list samples for VAIS * Fix typing * Update default setting for gcs import to "content" * Change message for multiple uris * Fix import order * Update library version * Add Purge Documents Sample * Adjust sample return * Remove unneeded import * Adjust comments based on TW Feedback
1 parent fc434d7 commit 146989d

12 files changed

+241
-18
lines changed

discoveryengine/create_data_store_sample.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def create_data_store_sample(
7373
print(f"Waiting for operation to complete: {operation.operation.name}")
7474
response = operation.result()
7575

76-
# Once the operation is complete,
76+
# After the operation is complete,
7777
# get information from operation metadata
7878
metadata = discoveryengine.CreateDataStoreMetadata(operation.metadata)
7979

discoveryengine/create_engine_sample.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def create_engine_sample(
7979
print(f"Waiting for operation to complete: {operation.operation.name}")
8080
response = operation.result()
8181

82-
# Once the operation is complete,
82+
# After the operation is complete,
8383
# get information from operation metadata
8484
metadata = discoveryengine.CreateEngineMetadata(operation.metadata)
8585

discoveryengine/data_store_sample_test.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,12 @@
1616
import os
1717
from uuid import uuid4
1818

19-
from discoveryengine import create_data_store_sample, delete_data_store_sample
19+
from discoveryengine import (
20+
create_data_store_sample,
21+
delete_data_store_sample,
22+
get_data_store_sample,
23+
list_data_stores_sample,
24+
)
2025

2126
project_id = os.environ["GOOGLE_CLOUD_PROJECT"]
2227
location = "global"
@@ -30,6 +35,18 @@ def test_create_data_store():
3035
assert operation_name
3136

3237

38+
def test_get_data_store():
39+
data_store = get_data_store_sample.get_data_store_sample(
40+
project_id, location, data_store_id
41+
)
42+
assert data_store
43+
44+
45+
def test_list_data_stores():
46+
response = list_data_stores_sample.list_data_stores_sample(project_id, location)
47+
assert response
48+
49+
3350
def test_delete_data_store():
3451
operation_name = delete_data_store_sample.delete_data_store_sample(
3552
project_id, location, data_store_id

discoveryengine/documents_sample_test.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
from discoveryengine import import_documents_sample
1919
from discoveryengine import list_documents_sample
20+
from discoveryengine import purge_documents_sample
2021

2122
import pytest
2223

@@ -41,7 +42,7 @@ def test_import_documents_bigquery():
4142

4243

4344
def test_import_documents_gcs():
44-
gcs_uri = "gs://cloud-samples-data/gen-app-builder/search/empty.json"
45+
gcs_uri = "gs://cloud-samples-data/gen-app-builder/search/alphabet-investor-pdfs/goog023-alphabet-2023-annual-report-web-1.pdf"
4546
operation_name = import_documents_sample.import_documents_gcs_sample(
4647
project_id=project_id,
4748
location=location,
@@ -171,3 +172,13 @@ def test_list_documents():
171172
)
172173

173174
assert response
175+
176+
177+
def test_purge_documents():
178+
response = purge_documents_sample.purge_documents_sample(
179+
project_id=project_id,
180+
location=location,
181+
data_store_id=data_store_id,
182+
)
183+
184+
assert response
+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
16+
# [START genappbuilder_get_data_store]
17+
18+
from google.api_core.client_options import ClientOptions
19+
from google.cloud import discoveryengine
20+
21+
# TODO(developer): Uncomment these variables before running the sample.
22+
# project_id = "YOUR_PROJECT_ID"
23+
# location = "YOUR_LOCATION" # Values: "global"
24+
# data_store_id = "YOUR_DATA_STORE_ID"
25+
26+
27+
def get_data_store_sample(
28+
project_id: str,
29+
location: str,
30+
data_store_id: str,
31+
) -> discoveryengine.DataStore:
32+
# For more information, refer to:
33+
# https://cloud.google.com/generative-ai-app-builder/docs/locations#specify_a_multi-region_for_your_data_store
34+
client_options = (
35+
ClientOptions(api_endpoint=f"{location}-discoveryengine.googleapis.com")
36+
if location != "global"
37+
else None
38+
)
39+
40+
# Create a client
41+
client = discoveryengine.DataStoreServiceClient(client_options=client_options)
42+
43+
request = discoveryengine.GetDataStoreRequest(
44+
# The full resource name of the data store
45+
name=client.data_store_path(project_id, location, data_store_id)
46+
)
47+
48+
# Make the request
49+
data_store = client.get_data_store(request=request)
50+
51+
print(data_store)
52+
53+
return data_store
54+
55+
56+
# [END genappbuilder_get_data_store]

discoveryengine/import_documents_sample.py

+23-10
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def import_documents_bigquery_sample(
7373
print(f"Waiting for operation to complete: {operation.operation.name}")
7474
response = operation.result()
7575

76-
# Once the operation is complete,
76+
# After the operation is complete,
7777
# get information from operation metadata
7878
metadata = discoveryengine.ImportDocumentsMetadata(operation.metadata)
7979

@@ -99,7 +99,15 @@ def import_documents_gcs_sample(
9999
# project_id = "YOUR_PROJECT_ID"
100100
# location = "YOUR_LOCATION" # Values: "global"
101101
# data_store_id = "YOUR_DATA_STORE_ID"
102-
# Format: `gs://bucket/directory/object.json` or `gs://bucket/directory/*.json`
102+
103+
# Examples:
104+
# - Unstructured documents
105+
# - `gs://bucket/directory/file.pdf`
106+
# - `gs://bucket/directory/*.pdf`
107+
# - Unstructured documents with JSONL Metadata
108+
# - `gs://bucket/directory/file.json`
109+
# - Unstructured documents with CSV Metadata
110+
# - `gs://bucket/directory/file.csv`
103111
# gcs_uri = "YOUR_GCS_PATH"
104112

105113
# For more information, refer to:
@@ -125,8 +133,13 @@ def import_documents_gcs_sample(
125133
request = discoveryengine.ImportDocumentsRequest(
126134
parent=parent,
127135
gcs_source=discoveryengine.GcsSource(
136+
# Multiple URIs are supported
128137
input_uris=[gcs_uri],
129-
data_schema="custom",
138+
# Options:
139+
# - `content` - Unstructured documents (PDF, HTML, DOC, TXT, PPTX)
140+
# - `custom` - Unstructured documents with JSONL metadata
141+
# - `csv` - Unstructured documents with CSV metadata
142+
data_schema="content",
130143
),
131144
# Options: `FULL`, `INCREMENTAL`
132145
reconciliation_mode=discoveryengine.ImportDocumentsRequest.ReconciliationMode.INCREMENTAL,
@@ -138,7 +151,7 @@ def import_documents_gcs_sample(
138151
print(f"Waiting for operation to complete: {operation.operation.name}")
139152
response = operation.result()
140153

141-
# Once the operation is complete,
154+
# After the operation is complete,
142155
# get information from operation metadata
143156
metadata = discoveryengine.ImportDocumentsMetadata(operation.metadata)
144157

@@ -213,7 +226,7 @@ def import_documents_cloud_sql_sample(
213226
print(f"Waiting for operation to complete: {operation.operation.name}")
214227
response = operation.result()
215228

216-
# Once the operation is complete,
229+
# After the operation is complete,
217230
# get information from operation metadata
218231
metadata = discoveryengine.ImportDocumentsMetadata(operation.metadata)
219232

@@ -285,7 +298,7 @@ def import_documents_spanner_sample(
285298
print(f"Waiting for operation to complete: {operation.operation.name}")
286299
response = operation.result()
287300

288-
# Once the operation is complete,
301+
# After the operation is complete,
289302
# get information from operation metadata
290303
metadata = discoveryengine.ImportDocumentsMetadata(operation.metadata)
291304

@@ -354,7 +367,7 @@ def import_documents_firestore_sample(
354367
print(f"Waiting for operation to complete: {operation.operation.name}")
355368
response = operation.result()
356369

357-
# Once the operation is complete,
370+
# After the operation is complete,
358371
# get information from operation metadata
359372
metadata = discoveryengine.ImportDocumentsMetadata(operation.metadata)
360373

@@ -443,7 +456,7 @@ def import_documents_bigtable_sample(
443456
print(f"Waiting for operation to complete: {operation.operation.name}")
444457
response = operation.result()
445458

446-
# Once the operation is complete,
459+
# After the operation is complete,
447460
# get information from operation metadata
448461
metadata = discoveryengine.ImportDocumentsMetadata(operation.metadata)
449462

@@ -518,7 +531,7 @@ def import_documents_alloy_db_sample(
518531
print(f"Waiting for operation to complete: {operation.operation.name}")
519532
response = operation.result()
520533

521-
# Once the operation is complete,
534+
# After the operation is complete,
522535
# get information from operation metadata
523536
metadata = discoveryengine.ImportDocumentsMetadata(operation.metadata)
524537

@@ -592,7 +605,7 @@ def import_documents_healthcare_fhir_sample(
592605
print(f"Waiting for operation to complete: {operation.operation.name}")
593606
response = operation.result()
594607

595-
# Once the operation is complete,
608+
# After the operation is complete,
596609
# get information from operation metadata
597610
metadata = discoveryengine.ImportDocumentsMetadata(operation.metadata)
598611

+57
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
16+
# [START genappbuilder_list_data_stores]
17+
18+
from google.api_core.client_options import ClientOptions
19+
from google.cloud import discoveryengine
20+
21+
# TODO(developer): Uncomment these variables before running the sample.
22+
# project_id = "YOUR_PROJECT_ID"
23+
# location = "YOUR_LOCATION" # Values: "global"
24+
25+
26+
def list_data_stores_sample(
27+
project_id: str,
28+
location: str,
29+
) -> discoveryengine.ListDataStoresResponse:
30+
# For more information, refer to:
31+
# https://cloud.google.com/generative-ai-app-builder/docs/locations#specify_a_multi-region_for_your_data_store
32+
client_options = (
33+
ClientOptions(api_endpoint=f"{location}-discoveryengine.googleapis.com")
34+
if location != "global"
35+
else None
36+
)
37+
38+
# Create a client
39+
client = discoveryengine.DataStoreServiceClient(client_options=client_options)
40+
41+
request = discoveryengine.ListDataStoresRequest(
42+
# The full resource name of the data store
43+
parent=client.collection_path(
44+
project_id, location, collection="default_collection"
45+
)
46+
)
47+
48+
# Make the request
49+
response = client.list_data_stores(request=request)
50+
51+
for data_store in response:
52+
print(data_store)
53+
54+
return response
55+
56+
57+
# [END genappbuilder_list_data_stores]
+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
# [START genappbuilder_purge_documents]
16+
from google.api_core.client_options import ClientOptions
17+
from google.cloud import discoveryengine
18+
19+
# TODO(developer): Uncomment these variables before running the sample.
20+
# project_id = "YOUR_PROJECT_ID"
21+
# location = "YOUR_LOCATION" # Values: "global", "us", "eu"
22+
# data_store_id = "YOUR_DATA_STORE_ID"
23+
24+
25+
def purge_documents_sample(
26+
project_id: str, location: str, data_store_id: str
27+
) -> discoveryengine.PurgeDocumentsMetadata:
28+
# For more information, refer to:
29+
# https://cloud.google.com/generative-ai-app-builder/docs/locations#specify_a_multi-region_for_your_data_store
30+
client_options = (
31+
ClientOptions(api_endpoint=f"{location}-discoveryengine.googleapis.com")
32+
if location != "global"
33+
else None
34+
)
35+
36+
# Create a client
37+
client = discoveryengine.DocumentServiceClient(client_options=client_options)
38+
39+
operation = client.purge_documents(
40+
request=discoveryengine.PurgeDocumentsRequest(
41+
# The full resource name of the search engine branch.
42+
# e.g. projects/{project}/locations/{location}/dataStores/{data_store_id}/branches/{branch}
43+
parent=client.branch_path(
44+
project=project_id,
45+
location=location,
46+
data_store=data_store_id,
47+
branch="default_branch",
48+
),
49+
filter="*",
50+
# If force is set to `False`, return the expected purge count without deleting any documents.
51+
force=True,
52+
)
53+
)
54+
55+
print(f"Waiting for operation to complete: {operation.operation.name}")
56+
response = operation.result()
57+
58+
# After the operation is complete,
59+
# get information from operation metadata
60+
metadata = discoveryengine.PurgeDocumentsMetadata(operation.metadata)
61+
62+
# Handle the response
63+
print(response)
64+
print(metadata)
65+
66+
return metadata
67+
68+
69+
# [END genappbuilder_purge_documents]

discoveryengine/requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
google-cloud-discoveryengine==0.11.14
1+
google-cloud-discoveryengine==0.12.1
22
google-api-core==2.19.0

discoveryengine/site_search_engine_sample.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def create_target_site(
6767
print(f"Waiting for operation to complete: {operation.operation.name}")
6868
response = operation.result()
6969

70-
# Once the operation is complete,
70+
# After the operation is complete,
7171
# get information from operation metadata
7272
metadata = discoveryengine.CreateTargetSiteMetadata(operation.metadata)
7373

documentai/snippets/batch_process_documents_sample.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ def batch_process_documents(
112112
#
113113
# operation.add_done_callback(my_callback)
114114

115-
# Once the operation is complete,
115+
# After the operation is complete,
116116
# get output document information from operation metadata
117117
metadata = documentai.BatchProcessMetadata(operation.metadata)
118118

0 commit comments

Comments
 (0)