@@ -13,15 +13,51 @@ const _desc = r'Use of angle brackets in a doc comment is treated as HTML by '
13
13
'Markdown.' ;
14
14
15
15
const _details = r'''
16
- **DO** reference only in-scope identifiers in doc comments.
16
+ **DON'T** use angle-bracketed text, `<…>`, in a doc comment unless you want to
17
+ write an HTML tag or link.
17
18
18
- When a developer writes a reference with angle brackets within a doc comment,
19
- the angle brackets are interpreted as HTML. The text within pairs of opening and
20
- closing angle brackets generally get swallowed by the browser, and will not be
21
- displayed.
19
+ Markdown allows HTML tags as part of the Markdown code, so you can write, for
20
+ example, `T<sub>1</sub>`. Markdown does not restrict the allowed tags, it just
21
+ includes the tags verbatim in the output.
22
22
23
- You can use a code block or code span to wrap the text containing angle
24
- brackets. You can also replace `<` with `<` and `>` with `>`.
23
+ Dartdoc only allows some known and valid HTML tags, and will omit any disallowed
24
+ HTML tag from the output. See the list of allowed tags and directives below.
25
+ Your doc comment should not contain any HTML tags that are not on this list.
26
+
27
+ Markdown also allows you to write an "auto-link" to an URL as for example
28
+ `<https://example.com/page.html>`, delimited only by `<...>`. Such a link is
29
+ allowed by Dartdoc as well.
30
+ A `<...>` delimited text is an auto-link if it is a valid absolute URL, starting
31
+ with a scheme of at least two characters followed by a colon, like
32
+
33
+
34
+ Any other other occurrence of `<word...>` or `</word...>` is likely a mistake
35
+ and this lint will warn about it.
36
+ If something looks like an HTML tag, meaning it starts with `<` or `</`
37
+ and then a letter, and it has a later matching `>`, then it's considered an
38
+ invalid HTML tag unless it is an auto-link, or it starts with an *allowed*
39
+ HTML tag.
40
+
41
+ Such a mistake can, for example, happen if writing Dart code with type arguments
42
+ outside of a code span, for example `The type List<int> is ...`, where `<int>`
43
+ looks like an HTML tag. Missing the end quote of a code span can have the same
44
+ effect: ``The type `List<int> is ...`` will also treat `<int>` as an HTML tag.
45
+
46
+ Allowed HTML directives are: HTML comments, `<!-- text -->`, processing
47
+ instructions, `<?...?>`, CDATA-sections, `<[CDATA...]>`, and the allowed HTML
48
+ tags are:
49
+ `a`, `abbr`, `address`, `area`, `article`, `aside`, `audio`, `b`,
50
+ `bdi`, `bdo`, `blockquote`, `br`, `button`, `canvas`, `caption`,
51
+ `cite`, `code`, `col`, `colgroup`, `data`, `datalist`, `dd`, `del`,
52
+ `dfn`, `div`, `dl`, `dt`, `em`, `fieldset`, `figcaption`, `figure`,
53
+ `footer`, `form`, `h1`, `h2`, `h3`, `h4`, `h5`, `h6`, `header`, `hr`,
54
+ `i`, `iframe`, `img`, `input`, `ins`, `kbd`, `keygen`, `label`,
55
+ `legend`, `li`, `link`, `main`, `map`, `mark`, `meta`, `meter`, `nav`,
56
+ `noscript`, `object`, `ol`, `optgroup`, `option`, `output`, `p`,
57
+ `param`, `pre`, `progress`, `q`, `s`, `samp`, `script`, `section`,
58
+ `select`, `small`, `source`, `span`, `strong`, `style`, `sub`, `sup`,
59
+ `table`, `tbody`, `td`, `template`, `textarea`, `tfoot`, `th`, `thead`,
60
+ `time`, `title`, `tr`, `track`, `u`, `ul`, `var`, `video` and `wbr`.
25
61
26
62
**BAD:**
27
63
```dart
@@ -172,21 +208,6 @@ class _UnintendedTag {
172
208
}
173
209
174
210
class _Visitor extends SimpleAstVisitor <void > {
175
- // Matches autolinks: starting angle bracket, starting alphabetic character,
176
- // any alphabetic character or `-`, `+`, `.`, a semi-colon with optionally two
177
- // `/`s then anything but whitespace until a closing angle bracket.
178
- static final _autoLinkPattern =
179
- RegExp (r'<(([a-zA-Z][a-zA-Z\-\+\.]+):(?://)?[^\s>]*)>' );
180
-
181
- // Matches codespans: starting backtick with anything but a backtick until a
182
- // closing backtick.
183
- static final _codeSpanPattern = RegExp (r'`([^`]+)`' );
184
-
185
- // Matches unintential tags: starting `>`, optionally an opening `/` then one
186
- // or more valid tag characters then anything but a `>` until a closing `>`.
187
- static final _nonHtmlPattern =
188
- RegExp ("<(?!/?(${_validHtmlTags .join ("|" )})[>])[^>]*[>]" );
189
-
190
211
final LintRule rule;
191
212
192
213
_Visitor (this .rule);
@@ -215,18 +236,50 @@ class _Visitor extends SimpleAstVisitor<void> {
215
236
/// Finds tags that are not valid HTML tags, not contained in a code span, and
216
237
/// are not autolinks.
217
238
List <_UnintendedTag > _findUnintendedHtmlTags (String text) {
218
- var codeSpanOrAutoLink = [
219
- ..._codeSpanPattern.allMatches (text),
220
- ..._autoLinkPattern.allMatches (text)
221
- ];
222
- var unintendedHtmlTags = _nonHtmlPattern.allMatches (text);
239
+ var markdownTokenPattern = RegExp (
240
+ // Escaped Markdown character.
241
+ r'\\.'
242
+
243
+ // Or code span, from "`"*N to "`"*N or just the start if it's
244
+ // unterminated, to avoid "```a``" matching the "``a``".
245
+ // The ```-sequence is atomic.
246
+ r'|(?<cq>`+)(?:[^]*?\k<cq>)?'
247
+
248
+ // Or autolink, start with scheme + `:`.
249
+ r'|<[a-z][a-z\d\-+.]+:[^\x00-\x20\x7f<>]*>'
250
+
251
+ // Or HTML comments.
252
+ r'|<!--(?:-?>|[^]*?-->)'
253
+
254
+ // Or HTML declarations.
255
+ r'|<![a-z][^]*?!>'
256
+
257
+ // Or HTML processing instructions.
258
+ r'|<\?[^]*?\?>'
259
+
260
+ // Or HTML CDATA sections sections.
261
+ r'|<\[CDATA[^]*\]>'
262
+
263
+ // Or valid HTML tag.
264
+ // Matches `<validTag>`, `<validTag ...>`, `<validTag/>`, `</validTag>`
265
+ // and `</validTag ...>.
266
+ r'|<(?<et>/?)(?:'
267
+ '${_validHtmlTags .join ('|' )}'
268
+ r')'
269
+ r'(?:/(?=\k<et>)>|>|[\x20\r\n\t][^]*?>)'
270
+
271
+ // Or any of the following matches which are considered invalid tags.
272
+ // If the "nh" capture group is participating, one of these matched.
273
+ r'|(?<nh>)(?:'
274
+
275
+ // Any other `</?tag ...>` sequence.
276
+ r'</?[a-z][^]*?>'
277
+ r')' , caseSensitive: false );
223
278
224
279
var matches = < _UnintendedTag > [];
225
- for (var htmlTag in unintendedHtmlTags) {
226
- // If the tag is in a code span or is an autolink, we won't report it.
227
- if (! codeSpanOrAutoLink.any ((match) =>
228
- match.start <= htmlTag.start && htmlTag.end <= match.end)) {
229
- matches.add (_UnintendedTag (htmlTag.start, htmlTag.end - htmlTag.start));
280
+ for (var match in markdownTokenPattern.allMatches (text)) {
281
+ if (match.namedGroup ('nh' ) != null ) {
282
+ matches.add (_UnintendedTag (match.start, match.end - match.start));
230
283
}
231
284
}
232
285
return matches;
0 commit comments