Skip to content

Experimental API search exposed through query. #1293

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 18, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 44 additions & 24 deletions app/lib/search/index_simple.dart
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
// BSD-style license that can be found in the LICENSE file.

import 'dart:async';
import 'dart:io';
import 'dart:math' as math;

import 'package:gcloud/service_scope.dart' as ss;
Expand All @@ -24,20 +23,19 @@ void registerPackageIndex(PackageIndex index) =>
ss.register(#packageIndexService, index);

class SimplePackageIndex implements PackageIndex {
final bool enableApiIndex;
final Map<String, PackageDocument> _packages = <String, PackageDocument>{};
final Map<String, String> _normalizedPackageText = <String, String>{};
final TokenIndex _nameIndex = new TokenIndex(minLength: 2);
final TokenIndex _descrIndex = new TokenIndex(minLength: 3);
final TokenIndex _readmeIndex = new TokenIndex(minLength: 3);
final TokenIndex _apiDocIndex = new TokenIndex(minLength: 3);
final StringInternPool _internPool = new StringInternPool();
final bool _apiSearchEnabled;
DateTime _lastUpdated;
bool _isReady = false;

SimplePackageIndex({bool enableApiIndex})
: this.enableApiIndex =
enableApiIndex ?? Platform.environment['SEARCH_API_INDEX'] == '1';
SimplePackageIndex({bool apiSearchEnabled: false})
: _apiSearchEnabled = apiSearchEnabled;

@override
bool get isReady => _isReady;
Expand Down Expand Up @@ -74,17 +72,20 @@ class SimplePackageIndex implements PackageIndex {
@override
Future addPackage(PackageDocument document) async {
final PackageDocument doc = document.intern(_internPool.intern);
await removePackage(doc.package);
if (document.isDiscontinued == true) return; // isDiscontinued may be null

// isDiscontinued may be null
if (document.isDiscontinued == true) {
await removePackage(doc.package);
return;
}

_packages[doc.package] = doc;
_nameIndex.add(doc.package, doc.package);
_descrIndex.add(doc.package, doc.description);
_readmeIndex.add(doc.package, doc.readme);
if (enableApiIndex) {
for (ApiDocPage page in doc.apiDocPages ?? const []) {
_apiDocIndex.add(
_apiDocPageId(doc.package, page), page.symbols?.join(' '));
}
for (ApiDocPage page in doc.apiDocPages ?? const []) {
_apiDocIndex.add(
_apiDocPageId(doc.package, page), page.symbols?.join(' '));
}
final String allText = [doc.package, doc.description, doc.readme]
.where((s) => s != null)
Expand Down Expand Up @@ -179,7 +180,8 @@ class SimplePackageIndex implements PackageIndex {
}

// do text matching
final Score textScore = _searchText(packages, query.parsedQuery.text);
final Score textScore = _searchText(packages, query.parsedQuery.text,
_apiSearchEnabled || query.parsedQuery.isApiEnabled);

// filter packages that doesn't match text query
if (textScore != null) {
Expand Down Expand Up @@ -299,15 +301,16 @@ class SimplePackageIndex implements PackageIndex {
return new Score(values);
}

Score _searchText(Set<String> packages, String text) {
Score _searchText(Set<String> packages, String text, bool isExperimental) {
if (text != null && text.isNotEmpty) {
final List<String> words = splitForIndexing(text).toList();
final int wordCount = words.length;
final List<Score> wordScores = words.map((String word) {
final nameTokens = _nameIndex.lookupTokens(word);
final descrTokens = _descrIndex.lookupTokens(word);
final readmeTokens = _readmeIndex.lookupTokens(word);
final apiDocTokens = _apiDocIndex.lookupTokens(word);
final apiDocTokens =
isExperimental ? _apiDocIndex.lookupTokens(word) : new TokenMatch();
final maxTokenLength = [
nameTokens.maxLength,
descrTokens.maxLength,
Expand All @@ -326,15 +329,20 @@ class SimplePackageIndex implements PackageIndex {
final readme = new Score(_readmeIndex.scoreDocs(readmeTokens,
weight: 0.90, wordCount: wordCount));

final apiPages = new Score(_apiDocIndex.scoreDocs(apiDocTokens,
weight: 0.80, wordCount: wordCount));
final apiPackages = <String, double>{};
for (String key in apiPages.getKeys()) {
final pkg = _apiDocPkg(key);
final value = apiPages[key];
apiPackages[pkg] = math.max(value, apiPackages[pkg] ?? 0.0);
Score apiScore;
if (isExperimental) {
final apiPages = new Score(_apiDocIndex.scoreDocs(apiDocTokens,
weight: 0.80, wordCount: wordCount));
final apiPackages = <String, double>{};
for (String key in apiPages.getKeys()) {
final pkg = _apiDocPkg(key);
final value = apiPages[key];
apiPackages[pkg] = math.max(value, apiPackages[pkg] ?? 0.0);
}
apiScore = new Score(apiPackages);
} else {
apiScore = new Score({});
}
final apiScore = new Score(apiPackages);

return Score.max([name, descr, readme, apiScore]).removeLowValues(
fraction: 0.01, minValue: 0.001);
Expand Down Expand Up @@ -528,6 +536,7 @@ class TokenMatch {
}

class TokenIndex {
final Map<String, String> _textHashes = <String, String>{};
final Map<String, Set<String>> _inverseIds = <String, Set<String>>{};
final Map<String, double> _docSizes = <String, double>{};
final int _minLength;
Expand All @@ -541,17 +550,28 @@ class TokenIndex {

void add(String id, String text) {
final Set<String> tokens = _tokenize(text, _minLength);
if (tokens == null || tokens.isEmpty) return;
if (tokens == null || tokens.isEmpty) {
if (_textHashes.containsKey(id)) {
remove(id);
}
return;
}
final String textHash = '${text.hashCode}/${tokens.length}';
if (_textHashes.containsKey(id) && _textHashes[id] != textHash) {
remove(id);
}
for (String token in tokens) {
final Set<String> set = _inverseIds.putIfAbsent(token, () => new Set());
set.add(id);
}
// Document size is a highly scaled-down proxy of the length.
final docSize = 1 + math.log(1 + tokens.length) / 100;
_docSizes[id] = docSize;
_textHashes[id] = textHash;
}

void remove(String id) {
_textHashes.remove(id);
_docSizes.remove(id);
final List<String> removeKeys = [];
_inverseIds.forEach((String key, Set<String> set) {
Expand Down
2 changes: 1 addition & 1 deletion app/lib/search/updater.dart
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ class BatchIndexUpdater implements TaskRunner {
await _ongoingBatchUpdate;
}
_batch.add(task);
if (_batch.length < 20) {
if (_batch.length < 5) {
_batchUpdateTimer ??= new Timer(const Duration(seconds: 10), () {
_updateBatch();
});
Expand Down
11 changes: 11 additions & 0 deletions app/lib/shared/search_service.dart
Original file line number Diff line number Diff line change
Expand Up @@ -351,16 +351,21 @@ class ParsedQuery {
/// Match authors and uploaders.
final List<String> emails;

/// Enable experimental API search.
final bool isApiEnabled;

ParsedQuery._(
this.text,
this.packagePrefix,
this.refDependencies,
this.allDependencies,
this.emails,
this.isApiEnabled,
);

factory ParsedQuery._parse(String q) {
String queryText = q ?? '';
queryText = ' $queryText ';
String packagePrefix;
final Match pkgMatch = _packageRegexp.firstMatch(queryText);
if (pkgMatch != null) {
Expand All @@ -381,6 +386,11 @@ class ParsedQuery {
final List<String> allDependencies = extractRegExp(_allDependencyRegExp);
final List<String> emails = extractRegExp(_emailRegexp);

final bool isApiEnabled = queryText.contains(' !!api ');
if (isApiEnabled) {
queryText = queryText.replaceFirst(' !!api ', ' ');
}

queryText = queryText.replaceAll(_whitespacesRegExp, ' ').trim();
if (queryText.isEmpty) {
queryText = null;
Expand All @@ -392,6 +402,7 @@ class ParsedQuery {
dependencies,
allDependencies,
emails,
isApiEnabled,
);
}

Expand Down
2 changes: 1 addition & 1 deletion app/test/search/api_doc_page_test.dart
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ void main() {
SimplePackageIndex index;

setUpAll(() async {
index = new SimplePackageIndex(enableApiIndex: true);
index = new SimplePackageIndex(apiSearchEnabled: true);
await index.addPackage(new PackageDocument(
package: 'foo',
version: '1.0.0',
Expand Down
13 changes: 13 additions & 0 deletions app/test/shared/search_service_test.dart
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,19 @@ void main() {
expect(new SearchQuery.parse(query: 'text').parsedQuery.text, 'text');
expect(new SearchQuery.parse(query: ' text ').query, 'text');
expect(new SearchQuery.parse(query: ' text ').parsedQuery.text, 'text');
expect(new SearchQuery.parse(query: ' text ').parsedQuery.isApiEnabled,
isFalse);
});

test('experimental API search', () {
expect(new SearchQuery.parse(query: '!!api').parsedQuery.isApiEnabled,
isTrue);
expect(
new SearchQuery.parse(query: 'text !!api').parsedQuery.isApiEnabled,
isTrue);
expect(
new SearchQuery.parse(query: '!!api text').parsedQuery.isApiEnabled,
isTrue);
});

test('no dependency', () {
Expand Down