From 972020d5308a262932b16d2e235af9923168f079 Mon Sep 17 00:00:00 2001
From: Nik Nyby <nikolas@gnu.org>
Date: Wed, 4 Jan 2017 22:08:56 -0500
Subject: [PATCH] Update commonmark spec to 0.27

---
 CHANGELOG.md          |   3 +
 CommonMark/blocks.py  |   2 +-
 CommonMark/inlines.py |  60 ++++++++++++------
 spec.txt              | 142 +++++++++++++++++++++++++++---------------
 4 files changed, 135 insertions(+), 72 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 23344ab..f12d075 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,6 @@
+## 0.7.3
+- The CommonMark spec has been updated to 0.27.
+
 ## 0.7.2 (2016-08-10)
 - Removed outdated files from distributed packages, reported by @hyperknot
 
diff --git a/CommonMark/blocks.py b/CommonMark/blocks.py
index 00fa4ae..ad4ff09 100644
--- a/CommonMark/blocks.py
+++ b/CommonMark/blocks.py
@@ -47,7 +47,7 @@
 reATXHeadingMarker = re.compile(r'^#{1,6}(?:[ \t]+|$)')
 reCodeFence = re.compile(r'^`{3,}(?!.*`)|^~{3,}(?!.*~)')
 reClosingCodeFence = re.compile(r'^(?:`{3,}|~{3,})(?= *$)')
-reSetextHeadingLine = re.compile(r'^(?:=+|-+) *$')
+reSetextHeadingLine = re.compile(r'^(?:=+|-+)[ \t]*$')
 reLineEnding = re.compile(r'\r\n|\n|\r')
 
 
diff --git a/CommonMark/inlines.py b/CommonMark/inlines.py
index 54af6e5..ee56bd5 100644
--- a/CommonMark/inlines.py
+++ b/CommonMark/inlines.py
@@ -20,8 +20,6 @@
 # Some regexps used in inline parser:
 
 ESCAPED_CHAR = '\\\\' + common.ESCAPABLE
-REG_CHAR = '[^\\\\()\\x00-\\x20]'
-IN_PARENS_NOSP = '\\((' + REG_CHAR + '|' + ESCAPED_CHAR + '|\\\\)*\\)'
 
 rePunctuation = re.compile(
     r'^[\u2000-\u206F\u2E00-\u2E7F\\' + "'" + '!"#\$%&\(\)'
@@ -36,9 +34,6 @@
 reLinkDestinationBraces = re.compile(
     '^(?:[<](?:[^ <>\\t\\n\\\\\\x00]' + '|' + ESCAPED_CHAR + '|' +
     '\\\\)*[>])')
-reLinkDestination = re.compile(
-    '^(?:' + REG_CHAR + '+|' + ESCAPED_CHAR + '|\\\\|' +
-    IN_PARENS_NOSP + ')*')
 
 reEscapable = re.compile('^' + common.ESCAPABLE)
 reEntityHere = re.compile('^' + common.ENTITY, re.IGNORECASE)
@@ -54,8 +49,9 @@
     r'^<[A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*>',
     re.IGNORECASE)
 reSpnl = re.compile(r'^ *(?:\n *)?')
-reWhitespaceChar = re.compile(r'^\s')
-reWhitespace = re.compile(r'\s+')
+reWhitespaceChar = re.compile(r'^^[ \t\n\x0b\x0c\x0d]')
+reWhitespace = re.compile(r'[ \t\n\x0b\x0c\x0d]+')
+reUnicodeWhitespaceChar = re.compile(r'^\s')
 reFinalSpace = re.compile(r' *$')
 reInitialSpace = re.compile(r'^ *')
 reSpaceAtEndOfLine = re.compile(r'^ *(?:\n|$)')
@@ -262,10 +258,10 @@ def scanDelims(self, c):
             c_after = '\n'
 
         # Python 2 doesn't recognize '\xa0' as whitespace
-        after_is_whitespace = re.match(reWhitespaceChar, c_after) or \
+        after_is_whitespace = re.match(reUnicodeWhitespaceChar, c_after) or \
             c_after == '\xa0'
         after_is_punctuation = re.match(rePunctuation, c_after)
-        before_is_whitespace = re.match(reWhitespaceChar, c_before) or \
+        before_is_whitespace = re.match(reUnicodeWhitespaceChar, c_before) or \
             c_before == '\xa0'
         before_is_punctuation = re.match(rePunctuation, c_before)
 
@@ -318,6 +314,7 @@ def handleDelim(self, cc, block):
         self.delimiters = {
             'cc': cc,
             'numdelims': numdelims,
+            'origdelims': numdelims,
             'node': node,
             'previous': self.delimiters,
             'next': None,
@@ -372,8 +369,8 @@ def processEmphasis(self, stack_bottom):
                        opener != openers_bottom[closercc]):
                     odd_match = (closer.get('can_open') or
                                  opener.get('can_close')) and \
-                                 (opener.get('numdelims') +
-                                  closer.get('numdelims')) % 3 == 0
+                                 (opener.get('origdelims') +
+                                  closer.get('origdelims')) % 3 == 0
                     if opener.get('cc') == closercc and \
                        opener.get('can_open') and \
                        not odd_match:
@@ -487,11 +484,31 @@ def parseLinkDestination(self):
         """
         res = self.match(reLinkDestinationBraces)
         if res is None:
-            res = self.match(reLinkDestination)
-            if res is None:
-                return None
-            else:
-                return normalize_uri(unescape_string(res))
+            # TODO handrolled parser; res should be None or the string
+            savepos = self.pos
+            openparens = 0
+            c = self.peek()
+            while c is not None:
+                if c == '\\':
+                    self.pos += 1
+                    if self.peek() is not None:
+                        self.pos += 1
+                elif c == '(':
+                    self.pos += 1
+                    openparens += 1
+                elif c == ')':
+                    if openparens < 1:
+                        break
+                    else:
+                        self.pos += 1
+                        openparens -= 1
+                elif re.match(reWhitespaceChar, c):
+                    break
+                else:
+                    self.pos += 1
+                c = self.peek()
+            res = self.subject[savepos:self.pos]
+            return normalize_uri(unescape_string(res))
         else:
             # chop off surrounding <..>:
             return normalize_uri(unescape_string(res[1:-1]))
@@ -575,22 +592,25 @@ def parseCloseBracket(self, block):
 
         # Check to see if we have a link/image
 
+        savepos = self.pos
+
         # Inline link?
         if self.peek() == '(':
             self.pos += 1
             self.spnl()
             dest = self.parseLinkDestination()
-            if dest is not None and \
-               self.spnl():
+            if dest is not None and self.spnl():
                 # make sure there's a space before the title
                 if re.match(reWhitespaceChar, self.subject[self.pos-1]):
                     title = self.parseLinkTitle()
                 if self.spnl() and self.peek() == ')':
                     self.pos += 1
                     matched = True
-        else:
+            else:
+                self.pos = savepos
+
+        if not matched:
             # Next, see if there's a link label
-            savepos = self.pos
             beforelabel = self.pos
             n = self.parseLinkLabel()
             if n > 2:
diff --git a/spec.txt b/spec.txt
index e2b6834..c49e85b 100644
--- a/spec.txt
+++ b/spec.txt
@@ -1,8 +1,8 @@
 ---
 title: CommonMark Spec
 author: John MacFarlane
-version: 0.26
-date: '2016-07-15'
+version: 0.27
+date: '2016-11-18'
 license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)'
 ...
 
@@ -1985,7 +1985,7 @@ by their start and end conditions.  The block begins with a line that
 meets a [start condition](@) (after up to three spaces
 optional indentation).  It ends with the first subsequent line that
 meets a matching [end condition](@), or the last line of
-the document or other [container block](@), if no line is encountered that meets the
+the document or other [container block]), if no line is encountered that meets the
 [end condition].  If the first line meets both the [start condition]
 and the [end condition], the block will contain just that line.
 
@@ -2015,7 +2015,8 @@ followed by one of the strings (case-insensitive) `address`,
 `article`, `aside`, `base`, `basefont`, `blockquote`, `body`,
 `caption`, `center`, `col`, `colgroup`, `dd`, `details`, `dialog`,
 `dir`, `div`, `dl`, `dt`, `fieldset`, `figcaption`, `figure`,
-`footer`, `form`, `frame`, `frameset`, `h1`, `head`, `header`, `hr`,
+`footer`, `form`, `frame`, `frameset`,
+`h1`, `h2`, `h3`, `h4`, `h5`, `h6`, `head`, `header`, `hr`,
 `html`, `iframe`, `legend`, `li`, `link`, `main`, `menu`, `menuitem`,
 `meta`, `nav`, `noframes`, `ol`, `optgroup`, `option`, `p`, `param`,
 `section`, `source`, `summary`, `table`, `tbody`, `td`,
@@ -3636,11 +3637,11 @@ The following rules define [list items]:
     If the list item is ordered, then it is also assigned a start
     number, based on the ordered list marker.
 
-    Exceptions: When the list item interrupts a paragraph---that
-    is, when it starts on a line that would otherwise count as
-    [paragraph continuation text]---then (a) the lines *Ls* must
-    not begin with a blank line, and (b) if the list item is
-    ordered, the start number must be 1.
+    Exceptions: When the first list item in a [list] interrupts
+    a paragraph---that is, when it starts on a line that would
+    otherwise count as [paragraph continuation text]---then (a)
+    the lines *Ls* must not begin with a blank line, and (b) if
+    the list item is ordered, the start number must be 1.
 
 For example, let *Ls* be the lines
 
@@ -4730,8 +4731,7 @@ takes four spaces (a common case), but diverge in other cases.
 
 A [list](@) is a sequence of one or more
 list items [of the same type].  The list items
-may be separated by single [blank lines], but two
-blank lines end all containing lists.
+may be separated by any number of blank lines.
 
 Two list items are [of the same type](@)
 if they begin with a [list marker] of the same type.
@@ -4809,10 +4809,11 @@ Foo
 `Markdown.pl` does not allow this, through fear of triggering a list
 via a numeral in a hard-wrapped line:
 
-```````````````````````````````` markdown
+``` markdown
 The number of windows in my house is
 14.  The number of doors is 6.
-````````````````````````````````
+```
+
 Oddly, though, `Markdown.pl` *does* allow a blockquote to
 interrupt a paragraph, even though the same considerations might
 apply.
@@ -4821,10 +4822,12 @@ In CommonMark, we do allow lists to interrupt paragraphs, for
 two reasons.  First, it is natural and not uncommon for people
 to start lists without blank lines:
 
-    I need to buy
-    - new shoes
-    - a coat
-    - a plane ticket
+``` markdown
+I need to buy
+- new shoes
+- a coat
+- a plane ticket
+```
 
 Second, we are attracted to a
 
@@ -4836,20 +4839,24 @@ Second, we are attracted to a
 (Indeed, the spec for [list items] and [block quotes] presupposes
 this principle.) This principle implies that if
 
-      * I need to buy
-        - new shoes
-        - a coat
-        - a plane ticket
+``` markdown
+  * I need to buy
+    - new shoes
+    - a coat
+    - a plane ticket
+```
 
 is a list item containing a paragraph followed by a nested sublist,
 as all Markdown implementations agree it is (though the paragraph
 may be rendered without `<p>` tags, since the list is "tight"),
 then
 
-    I need to buy
-    - new shoes
-    - a coat
-    - a plane ticket
+``` markdown
+I need to buy
+- new shoes
+- a coat
+- a plane ticket
+```
 
 by itself should be a paragraph followed by a nested sublist.
 
@@ -5671,6 +5678,16 @@ single spaces, just as they would be by a browser:
 ````````````````````````````````
 
 
+Not all [Unicode whitespace] (for instance, non-breaking space) is
+collapsed, however:
+
+```````````````````````````````` example
+`a  b`
+.
+<p><code>a  b</code></p>
+````````````````````````````````
+
+
 Q: Why not just leave the spaces, since browsers will collapse them
 anyway?  A:  Because we might be targeting a non-HTML format, and we
 shouldn't rely on HTML-specific rendering assumptions.
@@ -6558,7 +6575,7 @@ Note that in the preceding case, the interpretation
 
 
 is precluded by the condition that a delimiter that
-can both open and close (like the `*` after `foo`
+can both open and close (like the `*` after `foo`)
 cannot form emphasis if the sum of the lengths of
 the delimiter runs containing the opening and
 closing delimiters is a multiple of 3.
@@ -6590,12 +6607,6 @@ omitted:
 ````````````````````````````````
 
 
-```````````````````````````````` example
-*foo**bar***
-.
-<p><em>foo<strong>bar</strong></em></p>
-````````````````````````````````
-
 Indefinite levels of nesting are possible:
 
 ```````````````````````````````` example
@@ -7137,8 +7148,7 @@ A [link destination](@) consists of either
 - a nonempty sequence of characters that does not include
   ASCII space or control characters, and includes parentheses
   only if (a) they are backslash-escaped or (b) they are part of
-  a balanced pair of unescaped parentheses that is not itself
-  inside a balanced pair of unescaped parentheses.
+  a balanced pair of unescaped parentheses.
 
 A [link title](@)  consists of either
 
@@ -7244,35 +7254,29 @@ Parentheses inside the link destination may be escaped:
 <p><a href="(foo)">link</a></p>
 ````````````````````````````````
 
-One level of balanced parentheses is allowed without escaping:
-
-```````````````````````````````` example
-[link]((foo)and(bar))
-.
-<p><a href="(foo)and(bar)">link</a></p>
-````````````````````````````````
-
-However, if you have parentheses within parentheses, you need to escape
-or use the `<...>` form:
+Any number parentheses are allowed without escaping, as long as they are
+balanced:
 
 ```````````````````````````````` example
 [link](foo(and(bar)))
 .
-<p>[link](foo(and(bar)))</p>
+<p><a href="foo(and(bar))">link</a></p>
 ````````````````````````````````
 
+However, if you have unbalanced parentheses, you need to escape or use the
+`<...>` form:
 
 ```````````````````````````````` example
-[link](foo(and\(bar\)))
+[link](foo\(and\(bar\))
 .
-<p><a href="foo(and(bar))">link</a></p>
+<p><a href="foo(and(bar)">link</a></p>
 ````````````````````````````````
 
 
 ```````````````````````````````` example
-[link](<foo(and(bar))>)
+[link](<foo(and(bar)>)
 .
-<p><a href="foo(and(bar))">link</a></p>
+<p><a href="foo(and(bar)">link</a></p>
 ````````````````````````````````
 
 
@@ -7361,6 +7365,16 @@ may be used in titles:
 ````````````````````````````````
 
 
+Titles must be separated from the link using a [whitespace].
+Other [Unicode whitespace] like non-breaking space doesn't work.
+
+```````````````````````````````` example
+[link](/url "title")
+.
+<p><a href="/url%C2%A0%22title%22">link</a></p>
+````````````````````````````````
+
+
 Nested balanced quotes are not allowed without escaping:
 
 ```````````````````````````````` example
@@ -8025,7 +8039,8 @@ following closing bracket:
 ````````````````````````````````
 
 
-Full references take precedence over shortcut references:
+Full and compact references take precedence over shortcut
+references:
 
 ```````````````````````````````` example
 [foo][bar]
@@ -8036,6 +8051,31 @@ Full references take precedence over shortcut references:
 <p><a href="/url2">foo</a></p>
 ````````````````````````````````
 
+```````````````````````````````` example
+[foo][]
+
+[foo]: /url1
+.
+<p><a href="/url1">foo</a></p>
+````````````````````````````````
+
+Inline links also take precedence:
+
+```````````````````````````````` example
+[foo]()
+
+[foo]: /url1
+.
+<p><a href="">foo</a></p>
+````````````````````````````````
+
+```````````````````````````````` example
+[foo](not a link)
+
+[foo]: /url1
+.
+<p><a href="/url1">foo</a>(not a link)</p>
+````````````````````````````````
 
 In the following case `[bar][baz]` is parsed as a reference,
 `[foo]` as normal text:
@@ -9045,7 +9085,7 @@ blocks.  But we cannot close unmatched blocks yet, because we may have a
 [lazy continuation line].
 
 2.  Next, after consuming the continuation markers for existing
-blocks, we look for new block starts (e.g. `>` for a block quote.
+blocks, we look for new block starts (e.g. `>` for a block quote).
 If we encounter a new block start, we close any blocks unmatched
 in step 1 before creating the new block as a child of the last
 matched block.