|
20 | 20 | # Some regexps used in inline parser:
|
21 | 21 |
|
22 | 22 | ESCAPED_CHAR = '\\\\' + common.ESCAPABLE
|
23 |
| -REG_CHAR = '[^\\\\()\\x00-\\x20]' |
24 |
| -IN_PARENS_NOSP = '\\((' + REG_CHAR + '|' + ESCAPED_CHAR + '|\\\\)*\\)' |
25 | 23 |
|
26 | 24 | rePunctuation = re.compile(
|
27 | 25 | r'^[\u2000-\u206F\u2E00-\u2E7F\\' + "'" + '!"#\$%&\(\)'
|
|
36 | 34 | reLinkDestinationBraces = re.compile(
|
37 | 35 | '^(?:[<](?:[^ <>\\t\\n\\\\\\x00]' + '|' + ESCAPED_CHAR + '|' +
|
38 | 36 | '\\\\)*[>])')
|
39 |
| -reLinkDestination = re.compile( |
40 |
| - '^(?:' + REG_CHAR + '+|' + ESCAPED_CHAR + '|\\\\|' + |
41 |
| - IN_PARENS_NOSP + ')*') |
42 | 37 |
|
43 | 38 | reEscapable = re.compile('^' + common.ESCAPABLE)
|
44 | 39 | reEntityHere = re.compile('^' + common.ENTITY, re.IGNORECASE)
|
|
54 | 49 | r'^<[A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*>',
|
55 | 50 | re.IGNORECASE)
|
56 | 51 | reSpnl = re.compile(r'^ *(?:\n *)?')
|
57 |
| -reWhitespaceChar = re.compile(r'^\s') |
58 |
| -reWhitespace = re.compile(r'\s+') |
| 52 | +reWhitespaceChar = re.compile(r'^^[ \t\n\x0b\x0c\x0d]') |
| 53 | +reWhitespace = re.compile(r'[ \t\n\x0b\x0c\x0d]+') |
| 54 | +reUnicodeWhitespaceChar = re.compile(r'^\s') |
59 | 55 | reFinalSpace = re.compile(r' *$')
|
60 | 56 | reInitialSpace = re.compile(r'^ *')
|
61 | 57 | reSpaceAtEndOfLine = re.compile(r'^ *(?:\n|$)')
|
@@ -262,10 +258,10 @@ def scanDelims(self, c):
|
262 | 258 | c_after = '\n'
|
263 | 259 |
|
264 | 260 | # Python 2 doesn't recognize '\xa0' as whitespace
|
265 |
| - after_is_whitespace = re.match(reWhitespaceChar, c_after) or \ |
| 261 | + after_is_whitespace = re.match(reUnicodeWhitespaceChar, c_after) or \ |
266 | 262 | c_after == '\xa0'
|
267 | 263 | after_is_punctuation = re.match(rePunctuation, c_after)
|
268 |
| - before_is_whitespace = re.match(reWhitespaceChar, c_before) or \ |
| 264 | + before_is_whitespace = re.match(reUnicodeWhitespaceChar, c_before) or \ |
269 | 265 | c_before == '\xa0'
|
270 | 266 | before_is_punctuation = re.match(rePunctuation, c_before)
|
271 | 267 |
|
@@ -318,6 +314,7 @@ def handleDelim(self, cc, block):
|
318 | 314 | self.delimiters = {
|
319 | 315 | 'cc': cc,
|
320 | 316 | 'numdelims': numdelims,
|
| 317 | + 'origdelims': numdelims, |
321 | 318 | 'node': node,
|
322 | 319 | 'previous': self.delimiters,
|
323 | 320 | 'next': None,
|
@@ -372,8 +369,8 @@ def processEmphasis(self, stack_bottom):
|
372 | 369 | opener != openers_bottom[closercc]):
|
373 | 370 | odd_match = (closer.get('can_open') or
|
374 | 371 | opener.get('can_close')) and \
|
375 |
| - (opener.get('numdelims') + |
376 |
| - closer.get('numdelims')) % 3 == 0 |
| 372 | + (opener.get('origdelims') + |
| 373 | + closer.get('origdelims')) % 3 == 0 |
377 | 374 | if opener.get('cc') == closercc and \
|
378 | 375 | opener.get('can_open') and \
|
379 | 376 | not odd_match:
|
@@ -487,11 +484,31 @@ def parseLinkDestination(self):
|
487 | 484 | """
|
488 | 485 | res = self.match(reLinkDestinationBraces)
|
489 | 486 | if res is None:
|
490 |
| - res = self.match(reLinkDestination) |
491 |
| - if res is None: |
492 |
| - return None |
493 |
| - else: |
494 |
| - return normalize_uri(unescape_string(res)) |
| 487 | + # TODO handrolled parser; res should be None or the string |
| 488 | + savepos = self.pos |
| 489 | + openparens = 0 |
| 490 | + c = self.peek() |
| 491 | + while c is not None: |
| 492 | + if c == '\\': |
| 493 | + self.pos += 1 |
| 494 | + if self.peek() is not None: |
| 495 | + self.pos += 1 |
| 496 | + elif c == '(': |
| 497 | + self.pos += 1 |
| 498 | + openparens += 1 |
| 499 | + elif c == ')': |
| 500 | + if openparens < 1: |
| 501 | + break |
| 502 | + else: |
| 503 | + self.pos += 1 |
| 504 | + openparens -= 1 |
| 505 | + elif re.match(reWhitespaceChar, c): |
| 506 | + break |
| 507 | + else: |
| 508 | + self.pos += 1 |
| 509 | + c = self.peek() |
| 510 | + res = self.subject[savepos:self.pos] |
| 511 | + return normalize_uri(unescape_string(res)) |
495 | 512 | else:
|
496 | 513 | # chop off surrounding <..>:
|
497 | 514 | return normalize_uri(unescape_string(res[1:-1]))
|
@@ -575,22 +592,25 @@ def parseCloseBracket(self, block):
|
575 | 592 |
|
576 | 593 | # Check to see if we have a link/image
|
577 | 594 |
|
| 595 | + savepos = self.pos |
| 596 | + |
578 | 597 | # Inline link?
|
579 | 598 | if self.peek() == '(':
|
580 | 599 | self.pos += 1
|
581 | 600 | self.spnl()
|
582 | 601 | dest = self.parseLinkDestination()
|
583 |
| - if dest is not None and \ |
584 |
| - self.spnl(): |
| 602 | + if dest is not None and self.spnl(): |
585 | 603 | # make sure there's a space before the title
|
586 | 604 | if re.match(reWhitespaceChar, self.subject[self.pos-1]):
|
587 | 605 | title = self.parseLinkTitle()
|
588 | 606 | if self.spnl() and self.peek() == ')':
|
589 | 607 | self.pos += 1
|
590 | 608 | matched = True
|
591 |
| - else: |
| 609 | + else: |
| 610 | + self.pos = savepos |
| 611 | + |
| 612 | + if not matched: |
592 | 613 | # Next, see if there's a link label
|
593 |
| - savepos = self.pos |
594 | 614 | beforelabel = self.pos
|
595 | 615 | n = self.parseLinkLabel()
|
596 | 616 | if n > 2:
|
|
0 commit comments