diff --git a/dev/config.yml b/dev/config.yml index 06ecc869e0d7..e4463068cf2e 100644 --- a/dev/config.yml +++ b/dev/config.yml @@ -5,6 +5,9 @@ configurator: database: url: "postgresql://postgres@db/warehouse" + elasticsearch: + url: "http://elasticsearch:9200" + download_stats: url: "redis://redis:6379/1" diff --git a/docker-compose.yml b/docker-compose.yml index 75849a75bf43..1be224c6511d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,6 +4,9 @@ db: redis: image: redis:latest +elasticsearch: + image: elasticsearch + camo: build: dev/camo command: node server.js @@ -23,5 +26,6 @@ web: links: - db - redis + - elasticsearch environment: PYTHONUNBUFFERED: 1 diff --git a/setup.py b/setup.py index c2062a21fe8d..16f35d25a40e 100644 --- a/setup.py +++ b/setup.py @@ -85,6 +85,7 @@ "pyramid_tm>=0.11", "readme>=0.5.1", "redis", + "elasticsearch", "setproctitle", "sqlalchemy>=0.9", "sqlalchemy-citext", diff --git a/warehouse/config.py b/warehouse/config.py index e8aa0260c954..7693e396e549 100644 --- a/warehouse/config.py +++ b/warehouse/config.py @@ -119,7 +119,10 @@ def configure(settings=None): # Register our CSRF support config.include(".csrf") - # Register our authentication support. + # Register our elasticsearch integration + config.include(".search") + + # Register our authentication support config.include(".accounts") # Allow the packaging app to register any services it has. diff --git a/warehouse/packaging/__init__.py b/warehouse/packaging/__init__.py index 5e286a666ca8..bed328793495 100644 --- a/warehouse/packaging/__init__.py +++ b/warehouse/packaging/__init__.py @@ -36,3 +36,4 @@ def includeme(config): "project", "project/{obj.project.normalized_name}", ) + config.include('.elasticsearch') \ No newline at end of file diff --git a/warehouse/packaging/elasticsearch.py b/warehouse/packaging/elasticsearch.py new file mode 100644 index 000000000000..0af8815f9d5d --- /dev/null +++ b/warehouse/packaging/elasticsearch.py @@ -0,0 +1,51 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Index sqlalchemy Session changes to ElasticSearch. + +It does not handle non-ORM cascades so they should be avoided. +""" + +from elasticsearch_dsl import DocType, String, Date, Integer + +from warehouse.packaging.models import Release + +# TODO: most basic documentation +# TODO: add travis test for server-side cascades? + + +class ReleaseDoc(DocType): + name = String(analyzer='snowball') + version = Integer() + body = String(analyzer='snowball') + published_from = Date() + # TODO: all about those fields + + class Meta: + index = 'release' + model = Release + + @classmethod + def from_model_instance(cls, obj): + return cls( + id=obj.name, + name=obj.name, + version=obj.version, + description=obj.description, + summary=obj.summary, + license=obj.license, + download_url=obj.download_url, + ) + + +def includeme(config): + config.add_elasticsearch_doctype(ReleaseDoc) \ No newline at end of file diff --git a/warehouse/search/__init__.py b/warehouse/search/__init__.py new file mode 100644 index 000000000000..c8cb5229657d --- /dev/null +++ b/warehouse/search/__init__.py @@ -0,0 +1,74 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import transaction +from sqlalchemy import event +from elasticsearch import Elasticsearch + +from warehouse.db import _Session + + +def add_elasticsearch_doctype(config, doctype): + doctype.init(using=config.registry.elasticsearch) + + def handle_insert(target): + obj = doctype.from_model_instance(target) + obj.save(using=config.registry.elasticsearch) + + def handle_delete(target): + # TODO: what if id doesn't exist? (add a test) + obj = doctype.get(id=target.id) + obj.delete(using=config.registry.elasticsearch) + + @event.listens_for(model, 'after_insert') + @event.listens_for(model, 'after_update') + def release_insert_update(mapper, connection, target): + """Signal insert/update events for the model""" + tx = transaction.get() + tx.addAfterCommitHook(handle_insert, + args=(target,)) + + @event.listens_for(model, 'before_delete') + def release_delete(mapper, connection, target): + """Signal idelete event for the model""" + tx = transaction.get() + tx.addAfterCommitHook(handle_delete, + args=(target,)) + + # TODO: our hooks defeat the purpose of bulk queries - these should be ran + # as part of extrnal process + + # TODO: these two callback currently work for all queries, they should be + # limited to a specific model + + @event.listens_for(_Session, 'after_bulk_update') + def release_after_bulk_update(update_context): + tx = transaction.get() + for obj in query: + tx.addAfterCommitHook(handle_delete, + args=(obj,)) + + @event.listens_for(_Session, 'before_bulk_delete') + def release_after_bulk_delete(update_context): + """Get affected ids before they are deleted""" + tx = transaction.get() + for obj in query: + tx.addAfterCommitHook(handle_delete, + args=(obj,)) + + +def includeme(config): + es_url = config.registry.settings["elasticsearch.url"] + config.registry.elasticsearch = Elasticsearch(es_url) + + config.add_directive("add_elasticsearch_doctype", + add_elasticsearch_doctype, action_wrap=False) \ No newline at end of file diff --git a/warehouse/search/views.py b/warehouse/search/views.py new file mode 100644 index 000000000000..011abf527b23 --- /dev/null +++ b/warehouse/search/views.py @@ -0,0 +1,31 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from warehouse.packaging.elasticsearch import ReleaseDoc + + +@view_config( + route_name="search.search", + renderer="search/search.html", +) +def search(request): + query = request.GET.get('q') + if query: + s = ReleaseDoc.search() + s = s.query('match', title=query).highlight('name').highlight('description') + response = s.execute() + return { + 'num_results': response.hits.total, + 'results': response, + } + else: + return {} \ No newline at end of file