Skip to content

Index and search public API symbols. #1220

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 25, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions app/bin/service/search.dart
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ import 'package:logging/logging.dart';
import 'package:pub_dartlang_org/shared/analyzer_client.dart';
import 'package:pub_dartlang_org/shared/analyzer_memcache.dart';
import 'package:pub_dartlang_org/shared/configuration.dart';
import 'package:pub_dartlang_org/shared/dartdoc_client.dart';
import 'package:pub_dartlang_org/shared/dartdoc_memcache.dart';
import 'package:pub_dartlang_org/shared/handler_helpers.dart';
import 'package:pub_dartlang_org/shared/popularity_storage.dart';
import 'package:pub_dartlang_org/shared/scheduler_stats.dart';
Expand Down Expand Up @@ -54,6 +56,11 @@ void _main(FrontendEntryMessage message) {
registerAnalyzerClient(analyzerClient);
registerScopeExitCallback(analyzerClient.close);

registerDartdocMemcache(new DartdocMemcache(memcacheService));
final DartdocClient dartdocClient = new DartdocClient();
registerDartdocClient(dartdocClient);
registerScopeExitCallback(dartdocClient.close);

registerSearchBackend(new SearchBackend(db.dbService));

final Bucket snapshotBucket = await getOrCreateBucket(
Expand Down
49 changes: 49 additions & 0 deletions app/lib/search/backend.dart
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import 'package:json_annotation/json_annotation.dart';
import '../frontend/model_properties.dart';
import '../frontend/models.dart';
import '../shared/analyzer_client.dart';
import '../shared/dartdoc_client.dart';
import '../shared/popularity_storage.dart';
import '../shared/search_service.dart';
import '../shared/utils.dart';
Expand Down Expand Up @@ -68,10 +69,15 @@ class SearchBackend {
versionList.where((pv) => pv != null),
key: (pv) => (pv as PackageVersion).package);

final indexJsonFutures = Future.wait(packages.map(
(p) => dartdocClient.getContentBytes(p.name, 'latest', 'index.json')));

final List<AnalysisView> analysisViews =
await analyzerClient.getAnalysisViews(packages.map((p) =>
p == null ? null : new AnalysisKey(p.name, p.latestVersion)));

final indexJsonContents = await indexJsonFutures;

final List<PackageDocument> results = new List(packages.length);
for (int i = 0; i < packages.length; i++) {
final Package p = packages[i];
Expand All @@ -82,6 +88,9 @@ class SearchBackend {
final analysisView = analysisViews[i];
final double popularity = popularityStorage.lookup(pv.package) ?? 0.0;

final List<int> indexJsonContent = indexJsonContents[i];
final apiDocPages = _apiDocPagesFromIndexJson(indexJsonContent);

results[i] = new PackageDocument(
package: pv.package,
version: p.latestVersion,
Expand All @@ -97,6 +106,7 @@ class SearchBackend {
maintenance: analysisView.maintenanceScore,
dependencies: _buildDependencies(analysisView),
emails: _buildEmails(p, pv),
apiDocPages: apiDocPages,
timestamp: new DateTime.now().toUtc(),
);
}
Expand All @@ -121,6 +131,45 @@ class SearchBackend {
}
return emails.toList()..sort();
}

List<ApiDocPage> _apiDocPagesFromIndexJson(List<int> bytes) {
if (bytes == null) return null;
try {
final list = json.decode(utf8.decode(bytes));

final pathMap = <String, String>{};
final symbolMap = <String, Set<String>>{};
for (Map map in list) {
final name = map['name'];
final type = map['type'];
if (isCommonApiSymbol(name) && type != 'library') {
continue;
}

final String qualifiedName = map['qualifiedName'];
final enclosedBy = map['enclosedBy'];
final enclosedByType = enclosedBy is Map ? enclosedBy['type'] : null;
final parentLevel = enclosedByType == 'class' ? 2 : 1;
final String key = qualifiedName.split('.').take(parentLevel).join('.');

if (key == qualifiedName) {
pathMap[key] = map['href'];
}
symbolMap.putIfAbsent(key, () => new Set()).add(map['name']);
}

final results = pathMap.keys.map((key) {
final path = pathMap[key];
final symbols = symbolMap[key].toList()..sort();
return new ApiDocPage(relativePath: path, symbols: symbols);
}).toList();
results.sort((a, b) => a.relativePath.compareTo(b.relativePath));
return results;
} catch (e, st) {
_logger.warning('Parsing dartdoc index.json failed.', e, st);
}
return null;
}
}

class SnapshotStorage {
Expand Down
40 changes: 36 additions & 4 deletions app/lib/search/index_simple.dart
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class SimplePackageIndex implements PackageIndex {
final TokenIndex _nameIndex = new TokenIndex(minLength: 2);
final TokenIndex _descrIndex = new TokenIndex(minLength: 3);
final TokenIndex _readmeIndex = new TokenIndex(minLength: 3);
final TokenIndex _apiDocIndex = new TokenIndex(minLength: 3);
final StringInternPool _internPool = new StringInternPool();
DateTime _lastUpdated;
bool _isReady = false;
Expand Down Expand Up @@ -73,6 +74,10 @@ class SimplePackageIndex implements PackageIndex {
_nameIndex.add(doc.package, doc.package);
_descrIndex.add(doc.package, doc.description);
_readmeIndex.add(doc.package, doc.readme);
for (ApiDocPage page in doc.apiDocPages ?? const []) {
_apiDocIndex.add(
_apiDocPageId(doc.package, page), page.symbols?.join(' '));
}
final String allText = [doc.package, doc.description, doc.readme]
.where((s) => s != null)
.join(' ');
Expand All @@ -94,6 +99,9 @@ class SimplePackageIndex implements PackageIndex {
_descrIndex.remove(package);
_readmeIndex.remove(package);
_normalizedPackageText.remove(package);
for (ApiDocPage page in doc.apiDocPages ?? const []) {
_apiDocIndex.remove(_apiDocPageId(package, page));
}
}

@override
Expand Down Expand Up @@ -280,20 +288,36 @@ class SimplePackageIndex implements PackageIndex {
final nameTokens = _nameIndex.lookupTokens(word);
final descrTokens = _descrIndex.lookupTokens(word);
final readmeTokens = _readmeIndex.lookupTokens(word);

final maxTokenLength = math.max(nameTokens.maxLength,
math.max(descrTokens.maxLength, readmeTokens.maxLength));
final apiDocTokens = _apiDocIndex.lookupTokens(word);
final maxTokenLength = [
nameTokens.maxLength,
descrTokens.maxLength,
readmeTokens.maxLength,
apiDocTokens.maxLength
].fold(0, math.max);
nameTokens.removeShortTokens(maxTokenLength);
descrTokens.removeShortTokens(maxTokenLength);
readmeTokens.removeShortTokens(maxTokenLength);
apiDocTokens.removeShortTokens(maxTokenLength);

final name = new Score(_nameIndex.scoreDocs(nameTokens,
weight: 1.00, wordCount: wordCount));
final descr = new Score(_descrIndex.scoreDocs(descrTokens,
weight: 0.95, wordCount: wordCount));
final readme = new Score(_readmeIndex.scoreDocs(readmeTokens,
weight: 0.90, wordCount: wordCount));
return Score.max([name, descr, readme]).removeLowValues(

final apiPages = new Score(_apiDocIndex.scoreDocs(apiDocTokens,
weight: 0.80, wordCount: wordCount));
final apiPackages = <String, double>{};
for (String key in apiPages.getKeys()) {
final pkg = _apiDocPkg(key);
final value = apiPages[key];
apiPackages[pkg] = math.max(value, apiPackages[pkg] ?? 0.0);
}
final apiScore = new Score(apiPackages);

return Score.max([name, descr, readme, apiScore]).removeLowValues(
fraction: 0.01, minValue: 0.001);
}).toList();
Score score = Score.multiply(wordScores);
Expand Down Expand Up @@ -359,6 +383,14 @@ class SimplePackageIndex implements PackageIndex {
if (b.updated == null) return 1;
return -a.updated.compareTo(b.updated);
}

String _apiDocPageId(String package, ApiDocPage page) {
return '$package:${page.relativePath}';
}

String _apiDocPkg(String id) {
return id.split(':').first;
}
}

class Score {
Expand Down
15 changes: 15 additions & 0 deletions app/lib/search/text_utils.dart
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,21 @@ final RegExp _nonCharacterRegExp = new RegExp('[^a-z0-9]');
final RegExp _multiWhitespaceRegExp = new RegExp('\\s+');
final RegExp _exactTermRegExp = new RegExp(r'"([^"]+)"');

final _commonApiSymbols = new Set.from([
'toString',
'noSuchMethod',
'hashCode',
'runtimeType',
]);

bool isCommonApiSymbol(String symbol) {
if (_commonApiSymbols.contains(symbol)) {
return true;
}
if (symbol.startsWith('operator ')) return true;
return false;
}

String compactText(String text, {int maxLength: -1}) {
if (text == null) return '';
String t = text.replaceAll(_multiWhitespaceRegExp, ' ').trim();
Expand Down
34 changes: 22 additions & 12 deletions app/lib/shared/dartdoc_client.dart
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import 'dart:async';
import 'package:gcloud/service_scope.dart' as ss;
import 'package:http/http.dart' as http;
import 'package:logging/logging.dart';
import 'package:path/path.dart' as p;
import 'package:pool/pool.dart';

import '../dartdoc/dartdoc_runner.dart' show statusFilePath;
Expand All @@ -15,6 +16,7 @@ import '../dartdoc/models.dart' show DartdocEntry;
import 'configuration.dart';
import 'dartdoc_memcache.dart';
import 'notification.dart' show notifyService;
import 'utils.dart' show getUrlWithRetry;

export '../dartdoc/models.dart' show DartdocEntry;

Expand Down Expand Up @@ -57,25 +59,33 @@ class DartdocClient {
_client.close();
}

Future<DartdocEntry> _getEntry(String package, String version) async {
final cachedContent =
await dartdocMemcache?.getEntryBytes(package, version, true);
if (cachedContent != null) {
return new DartdocEntry.fromBytes(cachedContent);
}
final url =
'$_dartdocServiceHttpHostPort/documentation/$package/$version/$statusFilePath';
Future<List<int>> getContentBytes(
String package, String version, String relativePath) async {
final url = p.join(_dartdocServiceHttpHostPort, 'documentation', package,
version, relativePath);
try {
final rs = await _client.get(url);
final rs = await getUrlWithRetry(_client, url);
if (rs.statusCode != 200) {
return null;
}
await dartdocMemcache?.setEntryBytes(
package, version, true, rs.bodyBytes);
return new DartdocEntry.fromBytes(rs.bodyBytes);
return rs.bodyBytes;
} catch (e) {
_logger.info('Error requesting entry for: $package $version');
}
return null;
}

Future<DartdocEntry> _getEntry(String package, String version) async {
final cachedContent =
await dartdocMemcache?.getEntryBytes(package, version, true);
if (cachedContent != null) {
return new DartdocEntry.fromBytes(cachedContent);
}
final content = await getContentBytes(package, version, statusFilePath);
if (content != null) {
await dartdocMemcache?.setEntryBytes(package, version, true, content);
return new DartdocEntry.fromBytes(content);
}
return null;
}
}
23 changes: 23 additions & 0 deletions app/lib/shared/search_service.dart
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ class PackageDocument extends Object with _$PackageDocumentSerializerMixin {
final Map<String, String> dependencies;
final List<String> emails;

final List<ApiDocPage> apiDocPages;

/// The creation timestamp of this document.
final DateTime timestamp;

Expand All @@ -76,6 +78,7 @@ class PackageDocument extends Object with _$PackageDocumentSerializerMixin {
this.maintenance,
this.dependencies,
this.emails,
this.apiDocPages,
this.timestamp,
});

Expand Down Expand Up @@ -104,11 +107,31 @@ class PackageDocument extends Object with _$PackageDocumentSerializerMixin {
value: (key) => internFn(dependencies[key]),
),
emails: emails?.map(internFn)?.toList(),
apiDocPages: apiDocPages?.map((p) => p.intern(internFn))?.toList(),
timestamp: timestamp,
);
}
}

/// A reference to an API doc page
@JsonSerializable()
class ApiDocPage extends Object with _$ApiDocPageSerializerMixin {
final String relativePath;
final List<String> symbols;

ApiDocPage({this.relativePath, this.symbols});

factory ApiDocPage.fromJson(Map<String, dynamic> json) =>
_$ApiDocPageFromJson(json);

ApiDocPage intern(String internFn(String value)) {
return new ApiDocPage(
relativePath: internFn(relativePath),
symbols: symbols?.map(internFn)?.toList(),
);
}
}

/// How search results should be ordered.
enum SearchOrder {
/// Search score should be a weighted value of [text], [popularity], [health]
Expand Down
18 changes: 18 additions & 0 deletions app/lib/shared/search_service.g.dart
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ PackageDocument _$PackageDocumentFromJson(Map<String, dynamic> json) =>
? null
: new Map<String, String>.from(json['dependencies'] as Map),
emails: (json['emails'] as List)?.map((e) => e as String)?.toList(),
apiDocPages: (json['apiDocPages'] as List)
?.map((e) => e == null
? null
: new ApiDocPage.fromJson(e as Map<String, dynamic>))
?.toList(),
timestamp: json['timestamp'] == null
? null
: DateTime.parse(json['timestamp'] as String));
Expand All @@ -54,6 +59,7 @@ abstract class _$PackageDocumentSerializerMixin {
double get maintenance;
Map<String, String> get dependencies;
List<String> get emails;
List<ApiDocPage> get apiDocPages;
DateTime get timestamp;
Map<String, dynamic> toJson() => <String, dynamic>{
'package': package,
Expand All @@ -70,10 +76,22 @@ abstract class _$PackageDocumentSerializerMixin {
'maintenance': maintenance,
'dependencies': dependencies,
'emails': emails,
'apiDocPages': apiDocPages,
'timestamp': timestamp?.toIso8601String()
};
}

ApiDocPage _$ApiDocPageFromJson(Map<String, dynamic> json) => new ApiDocPage(
relativePath: json['relativePath'] as String,
symbols: (json['symbols'] as List)?.map((e) => e as String)?.toList());

abstract class _$ApiDocPageSerializerMixin {
String get relativePath;
List<String> get symbols;
Map<String, dynamic> toJson() =>
<String, dynamic>{'relativePath': relativePath, 'symbols': symbols};
}

PackageSearchResult _$PackageSearchResultFromJson(Map<String, dynamic> json) =>
new PackageSearchResult(
indexUpdated: json['indexUpdated'] as String,
Expand Down
6 changes: 6 additions & 0 deletions app/test/frontend/handlers_test_utils.dart
Original file line number Diff line number Diff line change
Expand Up @@ -318,4 +318,10 @@ class DartdocClientMock implements DartdocClient {

@override
Future close() async {}

@override
Future<List<int>> getContentBytes(
String package, String version, String relativePath) async {
return null;
}
}
Loading