Skip to content

Commit 80d606d

Browse files
committed
Merge pull request #1 from HonzaKral/search
Simplified search
2 parents 03ce80e + be2a8db commit 80d606d

File tree

5 files changed

+17
-26
lines changed

5 files changed

+17
-26
lines changed

tests/unit/cli/search/test_reindex.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ def test_project_docs(db_session):
3434
"_type": "project",
3535
"_source": {
3636
"name": p.name,
37-
"name.normalized": p.normalized_name,
3837
"version": [r.version for r in prs],
3938
},
4039
}

tests/unit/packaging/test_search.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ def test_build_search():
4242

4343
assert obj.meta.id == "foobar"
4444
assert obj["name"] == "Foobar"
45-
assert obj["name.normalized"] == "foobar"
4645
assert obj["version"] == ["1.0", "2.0", "3.0", "4.0"]
4746
assert obj["summary"] == "This is my summary"
4847
assert obj["description"] == "This is my description"

warehouse/cli/search/reindex.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
import click
1717

18-
from elasticsearch.helpers import streaming_bulk
18+
from elasticsearch.helpers import bulk
1919
from sqlalchemy.orm import lazyload, joinedload
2020

2121
from warehouse.cli.search import search
@@ -76,8 +76,7 @@ def reindex(config, **kwargs):
7676
)
7777
db.execute("SET statement_timeout = '600s'")
7878

79-
for _ in streaming_bulk(client, _project_docs(db)):
80-
pass
79+
bulk(client, _project_docs(db))
8180
except:
8281
new_index.delete()
8382
raise

warehouse/legacy/api/xmlrpc.py

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -58,28 +58,19 @@ def search(request, spec, operator="and"):
5858
}
5959
}
6060

61-
query_type = {"version": "term"}
62-
6361
primary_queries = []
6462
for field, value in spec.items():
6563
q = None
6664
for item in value:
6765
if q is None:
68-
q = Q(query_type.get(field, "match"), **{field: item})
66+
q = Q("match", **{field: item})
6967
else:
70-
q |= Q(query_type.get(field, "match"), **{field: item})
68+
q |= Q("match", **{field: item})
7169
primary_queries.append(q)
7270

73-
should_queries = []
74-
if "name" in spec:
75-
for name in spec["name"]:
76-
normalized = re.sub(r"[^a-z0-9]+", "-", name.lower())
77-
should_queries.append(
78-
Q("term", **{"name.normalized": normalized})
79-
)
80-
8171
if operator == "and":
8272
must_queries = primary_queries
73+
should_queries = []
8374
else:
8475
must_queries = []
8576
should_queries.extend(primary_queries)
@@ -88,9 +79,9 @@ def search(request, spec, operator="and"):
8879
results = query.execute()
8980

9081
return [
91-
{"name": r["name"], "summary": r.get("summary"), "version": v}
92-
for r in (h["_source"] for h in results.hits.hits)
93-
for v in r["version"]
82+
{"name": r.name, "summary": r.summary, "version": v}
83+
for r in results
84+
for v in r.version
9485
if v in spec.get("version", [v])
9586
]
9687

warehouse/packaging/search.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
# See the License for the specific language governing permissions and
1111
# limitations under the License.
1212

13-
from elasticsearch_dsl import DocType, String, analyzer
13+
from elasticsearch_dsl import DocType, String, analyzer, MetaField
1414

1515
from warehouse.search import doc_type
1616

@@ -25,25 +25,28 @@
2525
@doc_type
2626
class Project(DocType):
2727

28-
name = String(fields={"normalized": String(analyzer="keyword")})
29-
version = String(analyzer="keyword", multi=True)
28+
name = String()
29+
version = String(index="not_analyzed", multi=True)
3030
summary = String(analyzer="snowball")
3131
description = String(analyzer="snowball")
3232
author = String()
3333
author_email = String(analyzer=EmailAnalyzer)
3434
maintainer = String()
3535
maintainer_email = String(analyzer=EmailAnalyzer)
3636
license = String()
37-
home_page = String(analyzer="keyword")
38-
download_url = String(analyzer="keyword")
37+
home_page = String(index="not_analyzed")
38+
download_url = String(index="not_analyzed")
3939
keywords = String(analyzer="snowball")
4040
platform = String(index="not_analyzed")
4141

42+
class Meta:
43+
# disable the _all field to save some space
44+
all = MetaField(enabled=False)
45+
4246
@classmethod
4347
def from_db(cls, release):
4448
obj = cls(meta={"id": release.project.normalized_name})
4549
obj["name"] = release.project.name
46-
obj["name.normalized"] = release.project.normalized_name
4750
obj["version"] = [r.version for r in release.project.releases]
4851
obj["summary"] = release.summary
4952
obj["description"] = release.description

0 commit comments

Comments
 (0)