Skip to content

Commit 1e6f6b7

Browse files
committed
Index and search public API symbols.
1 parent 736ef88 commit 1e6f6b7

File tree

9 files changed

+300
-16
lines changed

9 files changed

+300
-16
lines changed

app/bin/service/search.dart

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ import 'package:logging/logging.dart';
1414
import 'package:pub_dartlang_org/shared/analyzer_client.dart';
1515
import 'package:pub_dartlang_org/shared/analyzer_memcache.dart';
1616
import 'package:pub_dartlang_org/shared/configuration.dart';
17+
import 'package:pub_dartlang_org/shared/dartdoc_client.dart';
18+
import 'package:pub_dartlang_org/shared/dartdoc_memcache.dart';
1719
import 'package:pub_dartlang_org/shared/handler_helpers.dart';
1820
import 'package:pub_dartlang_org/shared/popularity_storage.dart';
1921
import 'package:pub_dartlang_org/shared/scheduler_stats.dart';
@@ -54,6 +56,11 @@ void _main(FrontendEntryMessage message) {
5456
registerAnalyzerClient(analyzerClient);
5557
registerScopeExitCallback(analyzerClient.close);
5658

59+
registerDartdocMemcache(new DartdocMemcache(memcacheService));
60+
final DartdocClient dartdocClient = new DartdocClient();
61+
registerDartdocClient(dartdocClient);
62+
registerScopeExitCallback(dartdocClient.close);
63+
5764
registerSearchBackend(new SearchBackend(db.dbService));
5865

5966
final Bucket snapshotBucket = await getOrCreateBucket(

app/lib/search/backend.dart

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import 'package:json_annotation/json_annotation.dart';
1717
import '../frontend/model_properties.dart';
1818
import '../frontend/models.dart';
1919
import '../shared/analyzer_client.dart';
20+
import '../shared/dartdoc_client.dart';
2021
import '../shared/popularity_storage.dart';
2122
import '../shared/search_service.dart';
2223
import '../shared/utils.dart';
@@ -68,10 +69,15 @@ class SearchBackend {
6869
versionList.where((pv) => pv != null),
6970
key: (pv) => (pv as PackageVersion).package);
7071

72+
final indexJsonFutures = Future.wait(packages.map(
73+
(p) => dartdocClient.getContentBytes(p.name, 'latest', 'index.json')));
74+
7175
final List<AnalysisView> analysisViews =
7276
await analyzerClient.getAnalysisViews(packages.map((p) =>
7377
p == null ? null : new AnalysisKey(p.name, p.latestVersion)));
7478

79+
final indexJsonContents = await indexJsonFutures;
80+
7581
final List<PackageDocument> results = new List(packages.length);
7682
for (int i = 0; i < packages.length; i++) {
7783
final Package p = packages[i];
@@ -82,6 +88,9 @@ class SearchBackend {
8288
final analysisView = analysisViews[i];
8389
final double popularity = popularityStorage.lookup(pv.package) ?? 0.0;
8490

91+
final List<int> indexJsonContent = indexJsonContents[i];
92+
final apiDocPages = _apiDocPagesFromIndexJson(indexJsonContent);
93+
8594
results[i] = new PackageDocument(
8695
package: pv.package,
8796
version: p.latestVersion,
@@ -97,6 +106,7 @@ class SearchBackend {
97106
maintenance: analysisView.maintenanceScore,
98107
dependencies: _buildDependencies(analysisView),
99108
emails: _buildEmails(p, pv),
109+
apiDocPages: apiDocPages,
100110
timestamp: new DateTime.now().toUtc(),
101111
);
102112
}
@@ -121,6 +131,45 @@ class SearchBackend {
121131
}
122132
return emails.toList()..sort();
123133
}
134+
135+
List<ApiDocPage> _apiDocPagesFromIndexJson(List<int> bytes) {
136+
if (bytes == null) return null;
137+
try {
138+
final list = json.decode(utf8.decode(bytes));
139+
140+
final pathMap = <String, String>{};
141+
final symbolMap = <String, Set<String>>{};
142+
for (Map map in list) {
143+
final name = map['name'];
144+
final type = map['type'];
145+
if (isCommonApiSymbol(name) && type != 'library') {
146+
continue;
147+
}
148+
149+
final String qualifiedName = map['qualifiedName'];
150+
final enclosedBy = map['enclosedBy'];
151+
final enclosedByType = enclosedBy is Map ? enclosedBy['type'] : null;
152+
final parentLevel = enclosedByType == 'class' ? 2 : 1;
153+
final String key = qualifiedName.split('.').take(parentLevel).join('.');
154+
155+
if (key == qualifiedName) {
156+
pathMap[key] = map['href'];
157+
}
158+
symbolMap.putIfAbsent(key, () => new Set()).add(map['name']);
159+
}
160+
161+
final results = pathMap.keys.map((key) {
162+
final path = pathMap[key];
163+
final symbols = symbolMap[key].toList()..sort();
164+
return new ApiDocPage(relativePath: path, symbols: symbols);
165+
}).toList();
166+
results.sort((a, b) => a.relativePath.compareTo(b.relativePath));
167+
return results;
168+
} catch (e, st) {
169+
_logger.warning('Parsing dartdoc index.json failed.', e, st);
170+
}
171+
return null;
172+
}
124173
}
125174

126175
class SnapshotStorage {

app/lib/search/index_simple.dart

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ class SimplePackageIndex implements PackageIndex {
2828
final TokenIndex _nameIndex = new TokenIndex(minLength: 2);
2929
final TokenIndex _descrIndex = new TokenIndex(minLength: 3);
3030
final TokenIndex _readmeIndex = new TokenIndex(minLength: 3);
31+
final TokenIndex _apiDocIndex = new TokenIndex(minLength: 3);
3132
final StringInternPool _internPool = new StringInternPool();
3233
DateTime _lastUpdated;
3334
bool _isReady = false;
@@ -73,6 +74,10 @@ class SimplePackageIndex implements PackageIndex {
7374
_nameIndex.add(doc.package, doc.package);
7475
_descrIndex.add(doc.package, doc.description);
7576
_readmeIndex.add(doc.package, doc.readme);
77+
for (ApiDocPage page in doc.apiDocPages ?? const []) {
78+
_apiDocIndex.add(
79+
_apiDocPageId(doc.package, page), page.symbols?.join(' '));
80+
}
7681
final String allText = [doc.package, doc.description, doc.readme]
7782
.where((s) => s != null)
7883
.join(' ');
@@ -94,6 +99,9 @@ class SimplePackageIndex implements PackageIndex {
9499
_descrIndex.remove(package);
95100
_readmeIndex.remove(package);
96101
_normalizedPackageText.remove(package);
102+
for (ApiDocPage page in doc.apiDocPages ?? const []) {
103+
_apiDocIndex.remove(_apiDocPageId(package, page));
104+
}
97105
}
98106

99107
@override
@@ -280,20 +288,36 @@ class SimplePackageIndex implements PackageIndex {
280288
final nameTokens = _nameIndex.lookupTokens(word);
281289
final descrTokens = _descrIndex.lookupTokens(word);
282290
final readmeTokens = _readmeIndex.lookupTokens(word);
283-
284-
final maxTokenLength = math.max(nameTokens.maxLength,
285-
math.max(descrTokens.maxLength, readmeTokens.maxLength));
291+
final apiDocTokens = _apiDocIndex.lookupTokens(word);
292+
final maxTokenLength = [
293+
nameTokens.maxLength,
294+
descrTokens.maxLength,
295+
readmeTokens.maxLength,
296+
apiDocTokens.maxLength
297+
].fold(0, math.max);
286298
nameTokens.removeShortTokens(maxTokenLength);
287299
descrTokens.removeShortTokens(maxTokenLength);
288300
readmeTokens.removeShortTokens(maxTokenLength);
301+
apiDocTokens.removeShortTokens(maxTokenLength);
289302

290303
final name = new Score(_nameIndex.scoreDocs(nameTokens,
291304
weight: 1.00, wordCount: wordCount));
292305
final descr = new Score(_descrIndex.scoreDocs(descrTokens,
293306
weight: 0.95, wordCount: wordCount));
294307
final readme = new Score(_readmeIndex.scoreDocs(readmeTokens,
295308
weight: 0.90, wordCount: wordCount));
296-
return Score.max([name, descr, readme]).removeLowValues(
309+
310+
final apiPages = new Score(_apiDocIndex.scoreDocs(apiDocTokens,
311+
weight: 0.80, wordCount: wordCount));
312+
final apiPackages = <String, double>{};
313+
for (String key in apiPages.getKeys()) {
314+
final pkg = _apiDocPkg(key);
315+
final value = apiPages[key];
316+
apiPackages[pkg] = math.max(value, apiPackages[pkg] ?? 0.0);
317+
}
318+
final apiScore = new Score(apiPackages);
319+
320+
return Score.max([name, descr, readme, apiScore]).removeLowValues(
297321
fraction: 0.01, minValue: 0.001);
298322
}).toList();
299323
Score score = Score.multiply(wordScores);
@@ -359,6 +383,14 @@ class SimplePackageIndex implements PackageIndex {
359383
if (b.updated == null) return 1;
360384
return -a.updated.compareTo(b.updated);
361385
}
386+
387+
String _apiDocPageId(String package, ApiDocPage page) {
388+
return '$package:${page.relativePath}';
389+
}
390+
391+
String _apiDocPkg(String id) {
392+
return id.split(':').first;
393+
}
362394
}
363395

364396
class Score {

app/lib/search/text_utils.dart

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,21 @@ final RegExp _nonCharacterRegExp = new RegExp('[^a-z0-9]');
1212
final RegExp _multiWhitespaceRegExp = new RegExp('\\s+');
1313
final RegExp _exactTermRegExp = new RegExp(r'"([^"]+)"');
1414

15+
final _commonApiSymbols = new Set.from([
16+
'toString',
17+
'noSuchMethod',
18+
'hashCode',
19+
'runtimeType',
20+
]);
21+
22+
bool isCommonApiSymbol(String symbol) {
23+
if (_commonApiSymbols.contains(symbol)) {
24+
return true;
25+
}
26+
if (symbol.startsWith('operator ')) return true;
27+
return false;
28+
}
29+
1530
String compactText(String text, {int maxLength: -1}) {
1631
if (text == null) return '';
1732
String t = text.replaceAll(_multiWhitespaceRegExp, ' ').trim();

app/lib/shared/dartdoc_client.dart

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import 'dart:async';
77
import 'package:gcloud/service_scope.dart' as ss;
88
import 'package:http/http.dart' as http;
99
import 'package:logging/logging.dart';
10+
import 'package:path/path.dart' as p;
1011
import 'package:pool/pool.dart';
1112

1213
import '../dartdoc/dartdoc_runner.dart' show statusFilePath;
@@ -15,6 +16,7 @@ import '../dartdoc/models.dart' show DartdocEntry;
1516
import 'configuration.dart';
1617
import 'dartdoc_memcache.dart';
1718
import 'notification.dart' show notifyService;
19+
import 'utils.dart' show getUrlWithRetry;
1820

1921
export '../dartdoc/models.dart' show DartdocEntry;
2022

@@ -57,25 +59,33 @@ class DartdocClient {
5759
_client.close();
5860
}
5961

60-
Future<DartdocEntry> _getEntry(String package, String version) async {
61-
final cachedContent =
62-
await dartdocMemcache?.getEntryBytes(package, version, true);
63-
if (cachedContent != null) {
64-
return new DartdocEntry.fromBytes(cachedContent);
65-
}
66-
final url =
67-
'$_dartdocServiceHttpHostPort/documentation/$package/$version/$statusFilePath';
62+
Future<List<int>> getContentBytes(
63+
String package, String version, String relativePath) async {
64+
final url = p.join(_dartdocServiceHttpHostPort, 'documentation', package,
65+
version, relativePath);
6866
try {
69-
final rs = await _client.get(url);
67+
final rs = await getUrlWithRetry(_client, url);
7068
if (rs.statusCode != 200) {
7169
return null;
7270
}
73-
await dartdocMemcache?.setEntryBytes(
74-
package, version, true, rs.bodyBytes);
75-
return new DartdocEntry.fromBytes(rs.bodyBytes);
71+
return rs.bodyBytes;
7672
} catch (e) {
7773
_logger.info('Error requesting entry for: $package $version');
7874
}
7975
return null;
8076
}
77+
78+
Future<DartdocEntry> _getEntry(String package, String version) async {
79+
final cachedContent =
80+
await dartdocMemcache?.getEntryBytes(package, version, true);
81+
if (cachedContent != null) {
82+
return new DartdocEntry.fromBytes(cachedContent);
83+
}
84+
final content = await getContentBytes(package, version, statusFilePath);
85+
if (content != null) {
86+
await dartdocMemcache?.setEntryBytes(package, version, true, content);
87+
return new DartdocEntry.fromBytes(content);
88+
}
89+
return null;
90+
}
8191
}

app/lib/shared/search_service.dart

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ class PackageDocument extends Object with _$PackageDocumentSerializerMixin {
5858
final Map<String, String> dependencies;
5959
final List<String> emails;
6060

61+
final List<ApiDocPage> apiDocPages;
62+
6163
/// The creation timestamp of this document.
6264
final DateTime timestamp;
6365

@@ -76,6 +78,7 @@ class PackageDocument extends Object with _$PackageDocumentSerializerMixin {
7678
this.maintenance,
7779
this.dependencies,
7880
this.emails,
81+
this.apiDocPages,
7982
this.timestamp,
8083
});
8184

@@ -104,11 +107,31 @@ class PackageDocument extends Object with _$PackageDocumentSerializerMixin {
104107
value: (key) => internFn(dependencies[key]),
105108
),
106109
emails: emails?.map(internFn)?.toList(),
110+
apiDocPages: apiDocPages?.map((p) => p.intern(internFn))?.toList(),
107111
timestamp: timestamp,
108112
);
109113
}
110114
}
111115

116+
/// A reference to an API doc page
117+
@JsonSerializable()
118+
class ApiDocPage extends Object with _$ApiDocPageSerializerMixin {
119+
final String relativePath;
120+
final List<String> symbols;
121+
122+
ApiDocPage({this.relativePath, this.symbols});
123+
124+
factory ApiDocPage.fromJson(Map<String, dynamic> json) =>
125+
_$ApiDocPageFromJson(json);
126+
127+
ApiDocPage intern(String internFn(String value)) {
128+
return new ApiDocPage(
129+
relativePath: internFn(relativePath),
130+
symbols: symbols?.map(internFn)?.toList(),
131+
);
132+
}
133+
}
134+
112135
/// How search results should be ordered.
113136
enum SearchOrder {
114137
/// Search score should be a weighted value of [text], [popularity], [health]

app/lib/shared/search_service.g.dart

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@ PackageDocument _$PackageDocumentFromJson(Map<String, dynamic> json) =>
3535
? null
3636
: new Map<String, String>.from(json['dependencies'] as Map),
3737
emails: (json['emails'] as List)?.map((e) => e as String)?.toList(),
38+
apiDocPages: (json['apiDocPages'] as List)
39+
?.map((e) => e == null
40+
? null
41+
: new ApiDocPage.fromJson(e as Map<String, dynamic>))
42+
?.toList(),
3843
timestamp: json['timestamp'] == null
3944
? null
4045
: DateTime.parse(json['timestamp'] as String));
@@ -54,6 +59,7 @@ abstract class _$PackageDocumentSerializerMixin {
5459
double get maintenance;
5560
Map<String, String> get dependencies;
5661
List<String> get emails;
62+
List<ApiDocPage> get apiDocPages;
5763
DateTime get timestamp;
5864
Map<String, dynamic> toJson() => <String, dynamic>{
5965
'package': package,
@@ -70,10 +76,22 @@ abstract class _$PackageDocumentSerializerMixin {
7076
'maintenance': maintenance,
7177
'dependencies': dependencies,
7278
'emails': emails,
79+
'apiDocPages': apiDocPages,
7380
'timestamp': timestamp?.toIso8601String()
7481
};
7582
}
7683

84+
ApiDocPage _$ApiDocPageFromJson(Map<String, dynamic> json) => new ApiDocPage(
85+
relativePath: json['relativePath'] as String,
86+
symbols: (json['symbols'] as List)?.map((e) => e as String)?.toList());
87+
88+
abstract class _$ApiDocPageSerializerMixin {
89+
String get relativePath;
90+
List<String> get symbols;
91+
Map<String, dynamic> toJson() =>
92+
<String, dynamic>{'relativePath': relativePath, 'symbols': symbols};
93+
}
94+
7795
PackageSearchResult _$PackageSearchResultFromJson(Map<String, dynamic> json) =>
7896
new PackageSearchResult(
7997
indexUpdated: json['indexUpdated'] as String,

app/test/frontend/handlers_test_utils.dart

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,4 +318,10 @@ class DartdocClientMock implements DartdocClient {
318318

319319
@override
320320
Future close() async {}
321+
322+
@override
323+
Future<List<int>> getContentBytes(
324+
String package, String version, String relativePath) async {
325+
return null;
326+
}
321327
}

0 commit comments

Comments
 (0)