diff --git a/app/lib/search/index_simple.dart b/app/lib/search/index_simple.dart index 06e4ea69e4..50e5278dab 100644 --- a/app/lib/search/index_simple.dart +++ b/app/lib/search/index_simple.dart @@ -3,7 +3,6 @@ // BSD-style license that can be found in the LICENSE file. import 'dart:async'; -import 'dart:io'; import 'dart:math' as math; import 'package:gcloud/service_scope.dart' as ss; @@ -24,7 +23,6 @@ void registerPackageIndex(PackageIndex index) => ss.register(#packageIndexService, index); class SimplePackageIndex implements PackageIndex { - final bool enableApiIndex; final Map _packages = {}; final Map _normalizedPackageText = {}; final TokenIndex _nameIndex = new TokenIndex(minLength: 2); @@ -32,12 +30,12 @@ class SimplePackageIndex implements PackageIndex { final TokenIndex _readmeIndex = new TokenIndex(minLength: 3); final TokenIndex _apiDocIndex = new TokenIndex(minLength: 3); final StringInternPool _internPool = new StringInternPool(); + final bool _apiSearchEnabled; DateTime _lastUpdated; bool _isReady = false; - SimplePackageIndex({bool enableApiIndex}) - : this.enableApiIndex = - enableApiIndex ?? Platform.environment['SEARCH_API_INDEX'] == '1'; + SimplePackageIndex({bool apiSearchEnabled: false}) + : _apiSearchEnabled = apiSearchEnabled; @override bool get isReady => _isReady; @@ -74,17 +72,20 @@ class SimplePackageIndex implements PackageIndex { @override Future addPackage(PackageDocument document) async { final PackageDocument doc = document.intern(_internPool.intern); - await removePackage(doc.package); - if (document.isDiscontinued == true) return; // isDiscontinued may be null + + // isDiscontinued may be null + if (document.isDiscontinued == true) { + await removePackage(doc.package); + return; + } + _packages[doc.package] = doc; _nameIndex.add(doc.package, doc.package); _descrIndex.add(doc.package, doc.description); _readmeIndex.add(doc.package, doc.readme); - if (enableApiIndex) { - for (ApiDocPage page in doc.apiDocPages ?? const []) { - _apiDocIndex.add( - _apiDocPageId(doc.package, page), page.symbols?.join(' ')); - } + for (ApiDocPage page in doc.apiDocPages ?? const []) { + _apiDocIndex.add( + _apiDocPageId(doc.package, page), page.symbols?.join(' ')); } final String allText = [doc.package, doc.description, doc.readme] .where((s) => s != null) @@ -179,7 +180,8 @@ class SimplePackageIndex implements PackageIndex { } // do text matching - final Score textScore = _searchText(packages, query.parsedQuery.text); + final Score textScore = _searchText(packages, query.parsedQuery.text, + _apiSearchEnabled || query.parsedQuery.isApiEnabled); // filter packages that doesn't match text query if (textScore != null) { @@ -299,7 +301,7 @@ class SimplePackageIndex implements PackageIndex { return new Score(values); } - Score _searchText(Set packages, String text) { + Score _searchText(Set packages, String text, bool isExperimental) { if (text != null && text.isNotEmpty) { final List words = splitForIndexing(text).toList(); final int wordCount = words.length; @@ -307,7 +309,8 @@ class SimplePackageIndex implements PackageIndex { final nameTokens = _nameIndex.lookupTokens(word); final descrTokens = _descrIndex.lookupTokens(word); final readmeTokens = _readmeIndex.lookupTokens(word); - final apiDocTokens = _apiDocIndex.lookupTokens(word); + final apiDocTokens = + isExperimental ? _apiDocIndex.lookupTokens(word) : new TokenMatch(); final maxTokenLength = [ nameTokens.maxLength, descrTokens.maxLength, @@ -326,15 +329,20 @@ class SimplePackageIndex implements PackageIndex { final readme = new Score(_readmeIndex.scoreDocs(readmeTokens, weight: 0.90, wordCount: wordCount)); - final apiPages = new Score(_apiDocIndex.scoreDocs(apiDocTokens, - weight: 0.80, wordCount: wordCount)); - final apiPackages = {}; - for (String key in apiPages.getKeys()) { - final pkg = _apiDocPkg(key); - final value = apiPages[key]; - apiPackages[pkg] = math.max(value, apiPackages[pkg] ?? 0.0); + Score apiScore; + if (isExperimental) { + final apiPages = new Score(_apiDocIndex.scoreDocs(apiDocTokens, + weight: 0.80, wordCount: wordCount)); + final apiPackages = {}; + for (String key in apiPages.getKeys()) { + final pkg = _apiDocPkg(key); + final value = apiPages[key]; + apiPackages[pkg] = math.max(value, apiPackages[pkg] ?? 0.0); + } + apiScore = new Score(apiPackages); + } else { + apiScore = new Score({}); } - final apiScore = new Score(apiPackages); return Score.max([name, descr, readme, apiScore]).removeLowValues( fraction: 0.01, minValue: 0.001); @@ -528,6 +536,7 @@ class TokenMatch { } class TokenIndex { + final Map _textHashes = {}; final Map> _inverseIds = >{}; final Map _docSizes = {}; final int _minLength; @@ -541,7 +550,16 @@ class TokenIndex { void add(String id, String text) { final Set tokens = _tokenize(text, _minLength); - if (tokens == null || tokens.isEmpty) return; + if (tokens == null || tokens.isEmpty) { + if (_textHashes.containsKey(id)) { + remove(id); + } + return; + } + final String textHash = '${text.hashCode}/${tokens.length}'; + if (_textHashes.containsKey(id) && _textHashes[id] != textHash) { + remove(id); + } for (String token in tokens) { final Set set = _inverseIds.putIfAbsent(token, () => new Set()); set.add(id); @@ -549,9 +567,11 @@ class TokenIndex { // Document size is a highly scaled-down proxy of the length. final docSize = 1 + math.log(1 + tokens.length) / 100; _docSizes[id] = docSize; + _textHashes[id] = textHash; } void remove(String id) { + _textHashes.remove(id); _docSizes.remove(id); final List removeKeys = []; _inverseIds.forEach((String key, Set set) { diff --git a/app/lib/search/updater.dart b/app/lib/search/updater.dart index e52e8d4edf..359d83ccd6 100644 --- a/app/lib/search/updater.dart +++ b/app/lib/search/updater.dart @@ -86,7 +86,7 @@ class BatchIndexUpdater implements TaskRunner { await _ongoingBatchUpdate; } _batch.add(task); - if (_batch.length < 20) { + if (_batch.length < 5) { _batchUpdateTimer ??= new Timer(const Duration(seconds: 10), () { _updateBatch(); }); diff --git a/app/lib/shared/search_service.dart b/app/lib/shared/search_service.dart index 58f261e7fe..78c3a95d04 100644 --- a/app/lib/shared/search_service.dart +++ b/app/lib/shared/search_service.dart @@ -351,16 +351,21 @@ class ParsedQuery { /// Match authors and uploaders. final List emails; + /// Enable experimental API search. + final bool isApiEnabled; + ParsedQuery._( this.text, this.packagePrefix, this.refDependencies, this.allDependencies, this.emails, + this.isApiEnabled, ); factory ParsedQuery._parse(String q) { String queryText = q ?? ''; + queryText = ' $queryText '; String packagePrefix; final Match pkgMatch = _packageRegexp.firstMatch(queryText); if (pkgMatch != null) { @@ -381,6 +386,11 @@ class ParsedQuery { final List allDependencies = extractRegExp(_allDependencyRegExp); final List emails = extractRegExp(_emailRegexp); + final bool isApiEnabled = queryText.contains(' !!api '); + if (isApiEnabled) { + queryText = queryText.replaceFirst(' !!api ', ' '); + } + queryText = queryText.replaceAll(_whitespacesRegExp, ' ').trim(); if (queryText.isEmpty) { queryText = null; @@ -392,6 +402,7 @@ class ParsedQuery { dependencies, allDependencies, emails, + isApiEnabled, ); } diff --git a/app/test/search/api_doc_page_test.dart b/app/test/search/api_doc_page_test.dart index 0ea6220358..656a44dd10 100644 --- a/app/test/search/api_doc_page_test.dart +++ b/app/test/search/api_doc_page_test.dart @@ -15,7 +15,7 @@ void main() { SimplePackageIndex index; setUpAll(() async { - index = new SimplePackageIndex(enableApiIndex: true); + index = new SimplePackageIndex(apiSearchEnabled: true); await index.addPackage(new PackageDocument( package: 'foo', version: '1.0.0', diff --git a/app/test/shared/search_service_test.dart b/app/test/shared/search_service_test.dart index 5cd3cbe089..6b6992c3ac 100644 --- a/app/test/shared/search_service_test.dart +++ b/app/test/shared/search_service_test.dart @@ -29,6 +29,19 @@ void main() { expect(new SearchQuery.parse(query: 'text').parsedQuery.text, 'text'); expect(new SearchQuery.parse(query: ' text ').query, 'text'); expect(new SearchQuery.parse(query: ' text ').parsedQuery.text, 'text'); + expect(new SearchQuery.parse(query: ' text ').parsedQuery.isApiEnabled, + isFalse); + }); + + test('experimental API search', () { + expect(new SearchQuery.parse(query: '!!api').parsedQuery.isApiEnabled, + isTrue); + expect( + new SearchQuery.parse(query: 'text !!api').parsedQuery.isApiEnabled, + isTrue); + expect( + new SearchQuery.parse(query: '!!api text').parsedQuery.isApiEnabled, + isTrue); }); test('no dependency', () {