Skip to content

Commit 0bdebdc

Browse files
committed
use elasticsearch_dsl, move search logic into warehouse/search app
1 parent 0304eba commit 0bdebdc

File tree

4 files changed

+149
-47
lines changed

4 files changed

+149
-47
lines changed

warehouse/config.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,10 @@ def configure(settings=None):
119119
# Register our CSRF support
120120
config.include(".csrf")
121121

122-
# Register our authentication support.
122+
# Register our elasticsearch integration
123+
config.include(".search")
124+
125+
# Register our authentication support
123126
config.include(".accounts")
124127

125128
# Allow the packaging app to register any services it has.

warehouse/packaging/elasticsearch.py

Lines changed: 40 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,51 @@
1+
# Licensed under the Apache License, Version 2.0 (the "License");
2+
# you may not use this file except in compliance with the License.
3+
# You may obtain a copy of the License at
4+
#
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
#
7+
# Unless required by applicable law or agreed to in writing, software
8+
# distributed under the License is distributed on an "AS IS" BASIS,
9+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
# See the License for the specific language governing permissions and
11+
# limitations under the License.
112
"""
2-
Indexes sqlalchemy changes to elasticsearch. It does not handle
3-
db cascades so they should be avoided.
13+
Index sqlalchemy Session changes to ElasticSearch.
14+
15+
It does not handle non-ORM cascades so they should be avoided.
416
"""
517

6-
import transaction
7-
from sqlalchemy import event
8-
from elasticsearch import Elasticsearch
9-
from pyramid.threadlocal import get_current_registry
18+
from elasticsearch_dsl import DocType, String, Date, Integer
1019

11-
from warehouse.db import _Session
1220
from warehouse.packaging.models import Release
1321

22+
# TODO: most basic documentation
23+
# TODO: add travis test for server-side cascades?
24+
25+
26+
class ReleaseDoc(DocType):
27+
name = String(analyzer='snowball')
28+
version = Integer()
29+
body = String(analyzer='snowball')
30+
published_from = Date()
31+
# TODO: all about those fields
1432

15-
release_defaults = dict(index="warehouse",
16-
doc_type="release")
33+
class Meta:
34+
index = 'release'
35+
model = Release
1736

18-
def handle_insert(elasticsearch_url, target):
19-
es = Elasticsearch([es_url])
20-
es.index(id=target.name,
21-
body={"name": target.name,
22-
"version": target.version,
23-
"description": target.description,
24-
"summary": target.summary,
25-
"license": target.license,
26-
"download_url": target.download_url},
27-
**release_defaults)
37+
@classmethod
38+
def from_model_instance(cls, obj):
39+
return cls(
40+
id=obj.name,
41+
name=obj.name,
42+
version=obj.version,
43+
description=obj.description,
44+
summary=obj.summary,
45+
license=obj.license,
46+
download_url=obj.download_url,
47+
)
2848

29-
def handle_delete(elasticsearch_url, target):
30-
es = Elasticsearch([es_url])
31-
es.delete(id=target.id, **release_defaults)
3249

3350
def includeme(config):
34-
"""Register elasticsearch callbacks"""
35-
elasticsearch_url = config.registry.settings["elasticsearch.url"]
36-
37-
@event.listens_for(Release, 'after_insert')
38-
@event.listens_for(Release, 'after_update')
39-
def release_insert_update(mapper, connection, target):
40-
"""Signal insert/update events for Release model"""
41-
tx = transaction.get()
42-
tx.addAfterCommitHook(handle_insert, args=(elasticsearch_url, target))
43-
44-
@event.listens_for(Release, 'before_delete')
45-
def release_delete(mapper, connection, target):
46-
"""Signal idelete event for Release model"""
47-
tx = transaction.get()
48-
tx.addAfterCommitHook(handle_delete, args=(elasticsearch_url, target))
49-
50-
@event.listens_for(_Session, 'after_bulk_update')
51-
def release_after_bulk_update(update_context):
52-
import pdb;pdb.set_trace()
53-
54-
@event.listens_for(_Session, 'before_bulk_delete')
55-
def release_after_bulk_delete(update_context):
56-
"""Get affected ids before they are deleted"""
57-
import pdb;pdb.set_trace()
51+
config.add_elasticsearch_doctype(ReleaseDoc)

warehouse/search/__init__.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# Licensed under the Apache License, Version 2.0 (the "License");
2+
# you may not use this file except in compliance with the License.
3+
# You may obtain a copy of the License at
4+
#
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
#
7+
# Unless required by applicable law or agreed to in writing, software
8+
# distributed under the License is distributed on an "AS IS" BASIS,
9+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
# See the License for the specific language governing permissions and
11+
# limitations under the License.
12+
13+
import transaction
14+
from sqlalchemy import event
15+
from elasticsearch import Elasticsearch
16+
17+
from warehouse.db import _Session
18+
19+
20+
def add_elasticsearch_doctype(config, doctype):
21+
doctype.init(using=config.registry.elasticsearch)
22+
23+
def handle_insert(target):
24+
obj = doctype.from_model_instance(target)
25+
obj.save(using=config.registry.elasticsearch)
26+
27+
def handle_delete(target):
28+
# TODO: what if id doesn't exist? (add a test)
29+
obj = doctype.get(id=target.id)
30+
obj.delete(using=config.registry.elasticsearch)
31+
32+
@event.listens_for(model, 'after_insert')
33+
@event.listens_for(model, 'after_update')
34+
def release_insert_update(mapper, connection, target):
35+
"""Signal insert/update events for the model"""
36+
tx = transaction.get()
37+
tx.addAfterCommitHook(handle_insert,
38+
args=(target,))
39+
40+
@event.listens_for(model, 'before_delete')
41+
def release_delete(mapper, connection, target):
42+
"""Signal idelete event for the model"""
43+
tx = transaction.get()
44+
tx.addAfterCommitHook(handle_delete,
45+
args=(target,))
46+
47+
# TODO: our hooks defeat the purpose of bulk queries - these should be ran
48+
# as part of extrnal process
49+
50+
# TODO: these two callback currently work for all queries, they should be
51+
# limited to a specific model
52+
53+
@event.listens_for(_Session, 'after_bulk_update')
54+
def release_after_bulk_update(update_context):
55+
tx = transaction.get()
56+
for obj in query:
57+
tx.addAfterCommitHook(handle_delete,
58+
args=(obj,))
59+
60+
@event.listens_for(_Session, 'before_bulk_delete')
61+
def release_after_bulk_delete(update_context):
62+
"""Get affected ids before they are deleted"""
63+
tx = transaction.get()
64+
for obj in query:
65+
tx.addAfterCommitHook(handle_delete,
66+
args=(obj,))
67+
68+
69+
def includeme(config):
70+
es_url = config.registry.settings["elasticsearch.url"]
71+
config.registry.elasticsearch = Elasticsearch(es_url)
72+
73+
config.add_directive("add_elasticsearch_doctype",
74+
add_elasticsearch_doctype, action_wrap=False)

warehouse/search/views.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Licensed under the Apache License, Version 2.0 (the "License");
2+
# you may not use this file except in compliance with the License.
3+
# You may obtain a copy of the License at
4+
#
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
#
7+
# Unless required by applicable law or agreed to in writing, software
8+
# distributed under the License is distributed on an "AS IS" BASIS,
9+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
# See the License for the specific language governing permissions and
11+
# limitations under the License.
12+
13+
from warehouse.packaging.elasticsearch import ReleaseDoc
14+
15+
16+
@view_config(
17+
route_name="search.search",
18+
renderer="search/search.html",
19+
)
20+
def search(request):
21+
query = request.GET.get('q')
22+
if query:
23+
s = ReleaseDoc.search()
24+
s = s.query('match', title=query).highlight('name').highlight('description')
25+
response = s.execute()
26+
return {
27+
'num_results': response.hits.total,
28+
'results': response,
29+
}
30+
else:
31+
return {}

0 commit comments

Comments
 (0)