Skip to content

[gyb] Python 2 or 3 compatible Generate Your Boilerplate #806

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Dec 31, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion lib/ClangImporter/SortedCFDatabase.def.gyb
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
%{

import re
import sys
import codecs

prologueLines = ""
epilogueLines = ""
Expand All @@ -26,7 +28,7 @@ epilogueLines = ""
lineForName = {}

# Load the data file.
with open(CFDatabaseFile, 'rb') as f:
with codecs.open(CFDatabaseFile, encoding=sys.getfilesystemencoding(), errors='strict') as f:
for line in f:
# Pass through preprocessor directives literally.
# Assume that they all fall into either a strict prologue or epilogue.
Expand Down
27 changes: 16 additions & 11 deletions utils/GYBUnicodeDataUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
##===----------------------------------------------------------------------===##

import re
import sys
import codecs

class UnicodeProperty(object):
"""Abstract base class for Unicode properties."""
Expand Down Expand Up @@ -64,11 +66,11 @@ def __init__(self, grapheme_break_property_file_name):
# values to symbolic values.
self.symbolic_values = \
[ None ] * (max(self.numeric_value_table.values()) + 1)
for k,v in self.numeric_value_table.iteritems():
for k,v in self.numeric_value_table.items():
self.symbolic_values[v] = k

# Load the data file.
with open(grapheme_break_property_file_name, 'rb') as f:
with codecs.open(grapheme_break_property_file_name, encoding=sys.getfilesystemencoding(), errors='strict') as f:
for line in f:
# Strip comments.
line = re.sub('#.*', '', line)
Expand Down Expand Up @@ -329,7 +331,10 @@ def map_index(idx):
else:
return idx

return map(map_index, indexes)
# NOTE: Python 2's `map` function returns a list. Where Python 3's
# `map` function returns an iterator. To work around this the
# result of the `map` is explicitly converted to a `list`.
return list(map(map_index, indexes))

# If self.BMP_data contains identical data blocks, keep the first one,
# remove duplicates and change the indexes in self.BMP_lookup to point to
Expand Down Expand Up @@ -514,9 +519,9 @@ def _convert_line(line):

# Match a list of code points.
for token in line.split(" "):
if token == "÷":
if token == u"÷":
boundaries += [ curr_bytes ]
elif token == "×":
elif token == u"×":
pass
else:
code_point = int(token, 16)
Expand All @@ -529,21 +534,21 @@ def _convert_line(line):
# and test separately that we handle ill-formed UTF-8 sequences.
if code_point >= 0xd800 and code_point <= 0xdfff:
code_point = 0x200b
code_point = ('\U%(cp)08x' % { 'cp': code_point }).decode('unicode_escape')
as_UTF8_bytes = code_point.encode('utf8')
as_UTF8_escaped = ''.join(['\\x%(byte)02x' % { 'byte': ord(byte) } for byte in as_UTF8_bytes])
code_point = (b'\U%(cp)08x' % { b'cp': code_point }).decode('unicode_escape', 'strict')
as_UTF8_bytes = bytearray(code_point.encode('utf8', 'strict'))
as_UTF8_escaped = ''.join(['\\x%(byte)02x' % { 'byte': byte } for byte in as_UTF8_bytes])
test += as_UTF8_escaped
curr_bytes += len(as_UTF8_bytes)

return (test, boundaries)

# Self-test.
assert(_convert_line('÷ 0903 × 0308 ÷ AC01 ÷ # abc') == ('\\xe0\\xa4\\x83\\xcc\\x88\\xea\\xb0\\x81', [ 0, 5, 8 ]))
assert(_convert_line('÷ D800 ÷ # abc') == ('\\xe2\\x80\\x8b', [ 0, 3 ]))
assert(_convert_line(u'÷ 0903 × 0308 ÷ AC01 ÷ # abc') == ('\\xe0\\xa4\\x83\\xcc\\x88\\xea\\xb0\\x81', [ 0, 5, 8 ]))
assert(_convert_line(u'÷ D800 ÷ # abc') == ('\\xe2\\x80\\x8b', [ 0, 3 ]))

result = []

with open(grapheme_break_test_file_name, 'rb') as f:
with codecs.open(grapheme_break_test_file_name, encoding=sys.getfilesystemencoding(), errors='strict') as f:
for line in f:
test = _convert_line(line)
if test:
Expand Down
14 changes: 9 additions & 5 deletions utils/gyb.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
from __future__ import print_function

import re
from cStringIO import StringIO
try:
from cStringIO import StringIO
except ImportError:
from io import StringIO
import tokenize
import textwrap
from bisect import bisect
Expand Down Expand Up @@ -135,7 +138,8 @@ def tokenizePythonToUnmatchedCloseCurly(sourceText, start, lineStarts):
if nesting < 0:
return tokenPosToIndex(tokenStart, start, lineStarts)

except tokenize.TokenError, (message, errorPos):
except tokenize.TokenError as error:
(message, errorPos) = error.args
return tokenPosToIndex(errorPos, start, lineStarts)

return len(sourceText)
Expand Down Expand Up @@ -304,7 +308,7 @@ def splitGybLines(sourceLines):
dedents = 0
try:
for tokenKind, tokenText, tokenStart, (tokenEndLine, tokenEndCol), lineText \
in tokenize.generate_tokens(sourceLines.__iter__().next):
in tokenize.generate_tokens(lambda i = iter(sourceLines): next(i)):

if tokenKind in (tokenize.COMMENT, tokenize.ENDMARKER):
continue
Expand All @@ -324,7 +328,7 @@ def splitGybLines(sourceLines):

lastTokenText,lastTokenKind = tokenText,tokenKind

except tokenize.TokenError, (message, errorPos):
except tokenize.TokenError:
return [] # Let the later compile() call report the error

if lastTokenText == ':':
Expand All @@ -347,7 +351,7 @@ def codeStartsWithDedentKeyword(sourceLines):
"""
tokenText = None
for tokenKind, tokenText, _, _, _ \
in tokenize.generate_tokens(sourceLines.__iter__().next):
in tokenize.generate_tokens(lambda i = iter(sourceLines): next(i)):

if tokenKind != tokenize.COMMENT and tokenText.strip() != '':
break
Expand Down
5 changes: 4 additions & 1 deletion utils/line-directive
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,10 @@ def run():
sources = sys.argv[1:dashes]

command = subprocess.Popen(
sys.argv[dashes + 1:], stderr = subprocess.STDOUT, stdout = subprocess.PIPE
sys.argv[dashes + 1:],
stderr = subprocess.STDOUT,
stdout = subprocess.PIPE,
universal_newlines = True
)

error_pattern = re.compile(
Expand Down