Skip to content

Commit 0d49272

Browse files
Add index creation to aggregations example
1 parent 663d6f4 commit 0d49272

File tree

4 files changed

+103
-88
lines changed

4 files changed

+103
-88
lines changed

examples/async/composite_agg.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@
1919
import os
2020
from typing import Any, AsyncIterator, Dict, List, Optional, Union
2121

22+
from elasticsearch.helpers import async_bulk
23+
2224
from elasticsearch_dsl import A, Agg, AsyncSearch, Response, async_connections
25+
from tests.test_integration.test_data import DATA, GIT_INDEX
2326

2427

2528
async def scan_aggs(
@@ -56,8 +59,17 @@ async def run_search(**kwargs: Any) -> Response:
5659

5760
async def main() -> None:
5861
# initiate the default connection to elasticsearch
59-
async_connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]])
62+
client = async_connections.create_connection(
63+
hosts=[os.environ["ELASTICSEARCH_URL"]]
64+
)
65+
66+
# create the index and populate it with some data
67+
# note that the dataset is imported from the library's test suite
68+
await client.indices.delete(index="git", ignore_unavailable=True)
69+
await client.indices.create(index="git", **GIT_INDEX)
70+
await async_bulk(client, DATA, raise_on_error=True, refresh=True)
6071

72+
# run some aggregations on the data
6173
async for b in scan_aggs(
6274
AsyncSearch(index="git"),
6375
{"files": A("terms", field="files")},

examples/composite_agg.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@
1818
import os
1919
from typing import Any, Dict, Iterator, List, Optional, Union
2020

21+
from elasticsearch.helpers import bulk
22+
2123
from elasticsearch_dsl import A, Agg, Response, Search, connections
24+
from tests.test_integration.test_data import DATA, GIT_INDEX
2225

2326

2427
def scan_aggs(
@@ -55,8 +58,15 @@ def run_search(**kwargs: Any) -> Response:
5558

5659
def main() -> None:
5760
# initiate the default connection to elasticsearch
58-
connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]])
61+
client = connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]])
62+
63+
# create the index and populate it with some data
64+
# note that the dataset is imported from the library's test suite
65+
client.indices.delete(index="git", ignore_unavailable=True)
66+
client.indices.create(index="git", **GIT_INDEX)
67+
bulk(client, DATA, raise_on_error=True, refresh=True)
5968

69+
# run some aggregations on the data
6070
for b in scan_aggs(
6171
Search(index="git"),
6272
{"files": A("terms", field="files")},

tests/test_integration/test_data.py

Lines changed: 78 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -19,99 +19,91 @@
1919

2020
from elasticsearch import Elasticsearch
2121

22+
user_mapping = {
23+
"properties": {"name": {"type": "text", "fields": {"raw": {"type": "keyword"}}}}
24+
}
2225

23-
def create_flat_git_index(client: Elasticsearch, index: str) -> None:
24-
# we will use user on several places
25-
user_mapping = {
26-
"properties": {"name": {"type": "text", "fields": {"raw": {"type": "keyword"}}}}
27-
}
28-
29-
client.indices.create(
30-
index=index,
31-
body={
32-
"settings": {
33-
# just one shard, no replicas for testing
34-
"number_of_shards": 1,
35-
"number_of_replicas": 0,
36-
# custom analyzer for analyzing file paths
37-
"analysis": {
38-
"analyzer": {
39-
"file_path": {
40-
"type": "custom",
41-
"tokenizer": "path_hierarchy",
42-
"filter": ["lowercase"],
43-
}
44-
}
45-
},
46-
},
47-
"mappings": {
48-
"properties": {
49-
"description": {"type": "text", "analyzer": "snowball"},
50-
"author": user_mapping,
51-
"authored_date": {"type": "date"},
52-
"committer": user_mapping,
53-
"committed_date": {"type": "date"},
54-
"parent_shas": {"type": "keyword"},
55-
"files": {
56-
"type": "text",
57-
"analyzer": "file_path",
58-
"fielddata": True,
59-
},
26+
FLAT_GIT_INDEX: Dict[str, Any] = {
27+
"settings": {
28+
# just one shard, no replicas for testing
29+
"number_of_shards": 1,
30+
"number_of_replicas": 0,
31+
# custom analyzer for analyzing file paths
32+
"analysis": {
33+
"analyzer": {
34+
"file_path": {
35+
"type": "custom",
36+
"tokenizer": "path_hierarchy",
37+
"filter": ["lowercase"],
6038
}
39+
}
40+
},
41+
},
42+
"mappings": {
43+
"properties": {
44+
"description": {"type": "text", "analyzer": "snowball"},
45+
"author": user_mapping,
46+
"authored_date": {"type": "date"},
47+
"committer": user_mapping,
48+
"committed_date": {"type": "date"},
49+
"parent_shas": {"type": "keyword"},
50+
"files": {
51+
"type": "text",
52+
"analyzer": "file_path",
53+
"fielddata": True,
6154
},
55+
}
56+
},
57+
}
58+
59+
GIT_INDEX: Dict[str, Any] = {
60+
"settings": {
61+
# just one shard, no replicas for testing
62+
"number_of_shards": 1,
63+
"number_of_replicas": 0,
64+
# custom analyzer for analyzing file paths
65+
"analysis": {
66+
"analyzer": {
67+
"file_path": {
68+
"type": "custom",
69+
"tokenizer": "path_hierarchy",
70+
"filter": ["lowercase"],
71+
}
72+
}
6273
},
63-
)
74+
},
75+
"mappings": {
76+
"properties": {
77+
# common fields
78+
"description": {"type": "text", "analyzer": "snowball"},
79+
"commit_repo": {"type": "join", "relations": {"repo": "commit"}},
80+
# COMMIT mappings
81+
"author": user_mapping,
82+
"authored_date": {"type": "date"},
83+
"committer": user_mapping,
84+
"committed_date": {"type": "date"},
85+
"parent_shas": {"type": "keyword"},
86+
"files": {
87+
"type": "text",
88+
"analyzer": "file_path",
89+
"fielddata": True,
90+
},
91+
# REPO mappings
92+
"is_public": {"type": "boolean"},
93+
"owner": user_mapping,
94+
"created_at": {"type": "date"},
95+
"tags": {"type": "keyword"},
96+
}
97+
},
98+
}
6499

65100

66-
def create_git_index(client: Elasticsearch, index: str) -> None:
67-
# we will use user on several places
68-
user_mapping = {
69-
"properties": {"name": {"type": "text", "fields": {"raw": {"type": "keyword"}}}}
70-
}
101+
def create_flat_git_index(client: Elasticsearch, index: str) -> None:
102+
client.indices.create(index=index, body=FLAT_GIT_INDEX)
71103

72-
client.indices.create(
73-
index=index,
74-
body={
75-
"settings": {
76-
# just one shard, no replicas for testing
77-
"number_of_shards": 1,
78-
"number_of_replicas": 0,
79-
# custom analyzer for analyzing file paths
80-
"analysis": {
81-
"analyzer": {
82-
"file_path": {
83-
"type": "custom",
84-
"tokenizer": "path_hierarchy",
85-
"filter": ["lowercase"],
86-
}
87-
}
88-
},
89-
},
90-
"mappings": {
91-
"properties": {
92-
# common fields
93-
"description": {"type": "text", "analyzer": "snowball"},
94-
"commit_repo": {"type": "join", "relations": {"repo": "commit"}},
95-
# COMMIT mappings
96-
"author": user_mapping,
97-
"authored_date": {"type": "date"},
98-
"committer": user_mapping,
99-
"committed_date": {"type": "date"},
100-
"parent_shas": {"type": "keyword"},
101-
"files": {
102-
"type": "text",
103-
"analyzer": "file_path",
104-
"fielddata": True,
105-
},
106-
# REPO mappings
107-
"is_public": {"type": "boolean"},
108-
"owner": user_mapping,
109-
"created_at": {"type": "date"},
110-
"tags": {"type": "keyword"},
111-
}
112-
},
113-
},
114-
)
104+
105+
def create_git_index(client: Elasticsearch, index: str) -> None:
106+
client.indices.create(index=index, body=GIT_INDEX)
115107

116108

117109
DATA = [

utils/run-unasync.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ def main(check=False):
6464
"async_connections": "connections",
6565
"async_scan": "scan",
6666
"async_simulate": "simulate",
67+
"async_bulk": "bulk",
6768
"async_mock_client": "mock_client",
6869
"async_client": "client",
6970
"async_data_client": "data_client",

0 commit comments

Comments
 (0)