From 1d668200a7eaa8cf0ecafe14c8be18b9b32c2c47 Mon Sep 17 00:00:00 2001 From: Sam Rawlins Date: Thu, 7 Aug 2014 21:10:07 -0700 Subject: [PATCH] Adding ids to headers --- CHANGELOG.md | 2 ++ README.md | 9 ++++- lib/src/ast.dart | 1 + lib/src/block_parser.dart | 38 +++++++++++++++++++- lib/src/html_renderer.dart | 25 +++++++++++++ test/extensions/headers_with_ids.unit | 27 ++++++++++++++ test/extensions/setext_headers_with_ids.unit | 18 ++++++++++ test/markdown_test.dart | 6 ++++ 8 files changed, 124 insertions(+), 2 deletions(-) create mode 100644 test/extensions/headers_with_ids.unit create mode 100644 test/extensions/setext_headers_with_ids.unit diff --git a/CHANGELOG.md b/CHANGELOG.md index 48c4542d..8bd82d67 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ * Inline HTML syntax support; This is also considered an extension (#18). * The text `[foo] (bar)` now parses as an inline link (#53). * The text `[foo]()` now renders as an inline link. +* Header identifier support in the HeaderWithIdSyntax and + SetextHeaderWithIdSyntax extensions. ## 0.8.0 diff --git a/README.md b/README.md index 940c5eff..fcd27eb4 100644 --- a/README.md +++ b/README.md @@ -25,12 +25,17 @@ specifying an Array of extension syntaxes in the `blockSyntaxes` or The currently supported inline extension syntaxes are: * `new InlineHtmlSyntax()` - approximately CommonMark's - [definition](http://spec.commonmark.org/0.22/#raw-html) of "Raw HTML". + [definition][commonmark-raw-html] of "Raw HTML". The currently supported block extension syntaxes are: * `const FencedCodeBlockSyntax()` - Code blocks familiar to Pandoc and PHP Markdown Extra users. +* `const HeaderWithIdSyntax()` - ATX-style headers have generated IDs, for link + anchors (akin to Pandoc's [`auto_identifiers`][pandoc-auto_identifiers]). +* `const SetextHeaderWithIdSyntax()` - Setext-style headers have generated IDs + for link anchors (akin to Pandoc's + [`auto_identifiers`][pandoc-auto_identifiers]). For example: @@ -75,3 +80,5 @@ void main() { [Perl Markdown]: http://daringfireball.net/projects/markdown/ [CommonMark]: http://commonmark.org/ +[commonMark-raw-html]: http://spec.commonmark.org/0.22/#raw-html +[pandoc-auto_identifiers]: http://pandoc.org/README.html#extension-auto_identifiers diff --git a/lib/src/ast.dart b/lib/src/ast.dart index 6da7ab4f..00f4ff35 100644 --- a/lib/src/ast.dart +++ b/lib/src/ast.dart @@ -17,6 +17,7 @@ class Element implements Node { final String tag; final List children; final Map attributes; + String generatedId; Element(this.tag, this.children) : attributes = {}; diff --git a/lib/src/block_parser.dart b/lib/src/block_parser.dart index 13c5b686..af313654 100644 --- a/lib/src/block_parser.dart +++ b/lib/src/block_parser.dart @@ -111,7 +111,6 @@ class BlockParser { } abstract class BlockSyntax { - const BlockSyntax(); /// Gets the regex used to identify the beginning of this block, if any. @@ -144,6 +143,20 @@ abstract class BlockSyntax { if (parser.isDone) return true; return parser.blockSyntaxes.any((s) => s.canParse(parser) && s.canEndBlock); } + + /// Generates a valid HTML anchor from the inner text of [element]. + static String generateAnchorHash(Element element) => + _concatenatedText(element) + .toLowerCase() + .trim() + .replaceFirst(new RegExp(r'^[^a-z]+'), '') + .replaceAll(new RegExp(r'[^a-z0-9 _-]'), '') + .replaceAll(new RegExp(r'\s'), '-'); + + /// Concatenates the text found in all the children of [element]. + static String _concatenatedText(Element element) => element.children + .map((child) => (child is Text) ? child.text : _concatenatedText(child)) + .join(''); } class EmptyBlockSyntax extends BlockSyntax { @@ -181,6 +194,18 @@ class SetextHeaderSyntax extends BlockSyntax { } } +/// Parses setext-style headers, and adds generated IDs to the generated +/// elements. +class SetextHeaderWithIdSyntax extends SetextHeaderSyntax { + const SetextHeaderWithIdSyntax(); + + Node parse(BlockParser parser) { + var element = super.parse(parser); + element.generatedId = BlockSyntax.generateAnchorHash(element); + return element; + } +} + /// Parses atx-style headers: `## Header ##`. class HeaderSyntax extends BlockSyntax { RegExp get pattern => _headerPattern; @@ -196,6 +221,17 @@ class HeaderSyntax extends BlockSyntax { } } +/// Parses atx-style headers, and adds generated IDs to the generated elements. +class HeaderWithIdSyntax extends HeaderSyntax { + const HeaderWithIdSyntax(); + + Node parse(BlockParser parser) { + var element = super.parse(parser); + element.generatedId = BlockSyntax.generateAnchorHash(element); + return element; + } +} + /// Parses email-style blockquotes: `> quote`. class BlockquoteSyntax extends BlockSyntax { RegExp get pattern => _blockquotePattern; diff --git a/lib/src/html_renderer.dart b/lib/src/html_renderer.dart index d1f7147a..399f992e 100644 --- a/lib/src/html_renderer.dart +++ b/lib/src/html_renderer.dart @@ -4,6 +4,8 @@ library markdown.src.html_renderer; +import 'dart:collection'; + import 'ast.dart'; import 'document.dart'; import 'extension_set.dart'; @@ -40,11 +42,13 @@ class HtmlRenderer implements NodeVisitor { static final _blockTags = new RegExp('blockquote|h1|h2|h3|h4|h5|h6|hr|p|pre'); StringBuffer buffer; + Set uniqueIds; HtmlRenderer(); String render(List nodes) { buffer = new StringBuffer(); + uniqueIds = new LinkedHashSet(); for (final node in nodes) node.accept(this); @@ -71,6 +75,11 @@ class HtmlRenderer implements NodeVisitor { buffer.write(' $name="${element.attributes[name]}"'); } + // attach header anchor ids generated from text + if (element.generatedId != null) { + buffer.write(' id="${uniquifyId(element.generatedId)}"'); + } + if (element.isEmpty) { // Empty element like
. buffer.write(' />'); @@ -84,4 +93,20 @@ class HtmlRenderer implements NodeVisitor { void visitElementAfter(Element element) { buffer.write(''); } + + /// Uniquifies an id generated from text. + String uniquifyId(String id) { + if (!uniqueIds.contains(id)) { + uniqueIds.add(id); + return id; + } + + int suffix = 2; + String suffixedId = '$id-$suffix'; + while (uniqueIds.contains(suffixedId)) { + suffixedId = '$id-${suffix++}'; + } + uniqueIds.add(suffixedId); + return suffixedId; + } } diff --git a/test/extensions/headers_with_ids.unit b/test/extensions/headers_with_ids.unit new file mode 100644 index 00000000..80bad95a --- /dev/null +++ b/test/extensions/headers_with_ids.unit @@ -0,0 +1,27 @@ +>>> simple header +# header + +<<< +

header

+>>> header that starts with garbage +## 2. header again + +<<< +

2. header again

+>>> header with inline syntaxes +### headers **rock** `etc.` + +<<< +

headers rock etc.

+>>> non-unique headers +# header + +## header + +<<< +

header

+

header

+>>> header starts with inline syntax +# *headers* etc. +<<< +

headers etc.

diff --git a/test/extensions/setext_headers_with_ids.unit b/test/extensions/setext_headers_with_ids.unit new file mode 100644 index 00000000..b8ef0224 --- /dev/null +++ b/test/extensions/setext_headers_with_ids.unit @@ -0,0 +1,18 @@ +>>> h1 +text +=== + +<<< +

text

+>>> h2 +text +--- + +<<< +

text

+>>> header with inline syntax +header *emphasised* +=== + +<<< +

header emphasised

diff --git a/test/markdown_test.dart b/test/markdown_test.dart index 4bfee805..6eb9fefe 100644 --- a/test/markdown_test.dart +++ b/test/markdown_test.dart @@ -105,4 +105,10 @@ nyan''', '''

~=[,,_,,]:3

testFile('extensions/inline_html.unit', inlineSyntaxes: [new InlineHtmlSyntax()]); + + testFile('extensions/headers_with_ids.unit', + blockSyntaxes: [const HeaderWithIdSyntax()]); + + testFile('extensions/setext_headers_with_ids.unit', + blockSyntaxes: [const SetextHeaderWithIdSyntax()]); }