From 49d90e55b3615e6759e1233be8d2bae8684641fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Lapeyre?= Date: Thu, 16 May 2019 17:06:46 +0200 Subject: [PATCH 1/9] Implement str.dedent --- Lib/collections/__init__.py | 1 + Lib/test/test_unicode.py | 118 ++++++++++++++++++++++++ Lib/textwrap.py | 35 +------- Objects/clinic/unicodeobject.c.h | 19 +++- Objects/unicodeobject.c | 149 +++++++++++++++++++++++++++++++ 5 files changed, 287 insertions(+), 35 deletions(-) diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py index 706907ad4a282d..8b7790109825ff 100644 --- a/Lib/collections/__init__.py +++ b/Lib/collections/__init__.py @@ -1285,3 +1285,4 @@ def translate(self, *args): return self.__class__(self.data.translate(*args)) def upper(self): return self.__class__(self.data.upper()) def zfill(self, width): return self.__class__(self.data.zfill(width)) + def dedent(self): return self.__class__(self.data.dedent()) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 36b72e40c7e419..4347e45fe2d882 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2436,6 +2436,124 @@ def test_free_after_iterating(self): support.check_free_after_iterating(self, iter, str) support.check_free_after_iterating(self, reversed, str) + def assertUnchanged(self, text): + """assert that dedent() has no effect on 'text'""" + self.assertEqual(text, text.dedent()) + + def test_dedent_nomargin(self): + # No lines indented. + text = "Hello there.\nHow are you?\nOh good, I'm glad." + self.assertUnchanged(text) + + # Similar, with a blank line. + text = "Hello there.\n\nBoo!" + self.assertUnchanged(text) + + # Some lines indented, but overall margin is still zero. + text = "Hello there.\n This is indented." + self.assertUnchanged(text) + + # Again, add a blank line. + text = "Hello there.\n\n Boo!\n" + self.assertUnchanged(text) + + def test_dedent_even(self): + # All lines indented by two spaces. + text = " Hello there.\n How are ya?\n Oh good." + expect = "Hello there.\nHow are ya?\nOh good." + self.assertEqual(expect, text.dedent()) + + # Same, with blank lines. + text = " Hello there.\n\n How are ya?\n Oh good.\n" + expect = "Hello there.\n\nHow are ya?\nOh good.\n" + self.assertEqual(expect, text.dedent()) + + # Now indent one of the blank lines. + text = " Hello there.\n \n How are ya?\n Oh good.\n" + expect = "Hello there.\n\nHow are ya?\nOh good.\n" + self.assertEqual(expect, text.dedent()) + + def test_dedent_uneven(self): + # Lines indented unevenly. + text = '''\ + def foo(): + while 1: + return foo + ''' + expect = '''\ +def foo(): + while 1: + return foo +''' + self.assertEqual(expect, text.dedent()) + + # Uneven indentation with a blank line. + text = " Foo\n Bar\n\n Baz\n" + expect = "Foo\n Bar\n\n Baz\n" + self.assertEqual(expect, text.dedent()) + + # Uneven indentation with a whitespace-only line. + text = " Foo\n Bar\n \n Baz\n" + expect = "Foo\n Bar\n\n Baz\n" + self.assertEqual(expect, text.dedent()) + + def test_dedent_declining(self): + # Uneven indentation with declining indent level. + text = " Foo\n Bar\n" # 5 spaces, then 4 + expect = " Foo\nBar\n" + self.assertEqual(expect, text.dedent()) + + # Declining indent level with blank line. + text = " Foo\n\n Bar\n" # 5 spaces, blank, then 4 + expect = " Foo\n\nBar\n" + self.assertEqual(expect, text.dedent()) + + # Declining indent level with whitespace only line. + text = " Foo\n \n Bar\n" # 5 spaces, then 4, then 4 + expect = " Foo\n\nBar\n" + self.assertEqual(expect, text.dedent()) + + # dedent() should not mangle internal tabs + def test_dedent_preserve_internal_tabs(self): + text = " hello\tthere\n how are\tyou?" + expect = "hello\tthere\nhow are\tyou?" + self.assertEqual(expect, text.dedent()) + + # make sure that it preserves tabs when it's not making any + # changes at all + self.assertEqual(expect, expect.dedent()) + + # dedent() should not mangle tabs in the margin (i.e. + # tabs and spaces both count as margin, but are *not* + # considered equivalent) + def test_dedent_preserve_margin_tabs(self): + text = " hello there\n\thow are you?" + self.assertUnchanged(text) + + # same effect even if we have 8 spaces + text = " hello there\n\thow are you?" + self.assertUnchanged(text) + + # dedent() only removes whitespace that can be uniformly removed! + text = "\thello there\n\thow are you?" + expect = "hello there\nhow are you?" + self.assertEqual(expect, text.dedent()) + + text = " \thello there\n \thow are you?" + self.assertEqual(expect, text.dedent()) + + text = " \t hello there\n \t how are you?" + self.assertEqual(expect, text.dedent()) + + text = " \thello there\n \t how are you?" + expect = "hello there\n how are you?" + self.assertEqual(expect, text.dedent()) + + # test margin is smaller than smallest indent + text = " \thello there\n \thow are you?\n \tI'm fine, thanks" + expect = " \thello there\n \thow are you?\n\tI'm fine, thanks" + self.assertEqual(expect, text.dedent()) + class CAPITest(unittest.TestCase): diff --git a/Lib/textwrap.py b/Lib/textwrap.py index 8103f347452d35..f69a328ddea199 100644 --- a/Lib/textwrap.py +++ b/Lib/textwrap.py @@ -426,40 +426,7 @@ def dedent(text): """ # Look for the longest leading string of spaces and tabs common to # all lines. - margin = None - text = _whitespace_only_re.sub('', text) - indents = _leading_whitespace_re.findall(text) - for indent in indents: - if margin is None: - margin = indent - - # Current line more deeply indented than previous winner: - # no change (previous winner is still on top). - elif indent.startswith(margin): - pass - - # Current line consistent with and no deeper than previous winner: - # it's the new winner. - elif margin.startswith(indent): - margin = indent - - # Find the largest common whitespace between current line and previous - # winner. - else: - for i, (x, y) in enumerate(zip(margin, indent)): - if x != y: - margin = margin[:i] - break - - # sanity check (testing/debugging only) - if 0 and margin: - for line in text.split("\n"): - assert not line or line.startswith(margin), \ - "line = %r, margin = %r" % (line, margin) - - if margin: - text = re.sub(r'(?m)^' + margin, '', text) - return text + return text.dedent() def indent(text, prefix, predicate=None): diff --git a/Objects/clinic/unicodeobject.c.h b/Objects/clinic/unicodeobject.c.h index 647507dea61a48..c004e6acea281c 100644 --- a/Objects/clinic/unicodeobject.c.h +++ b/Objects/clinic/unicodeobject.c.h @@ -1232,4 +1232,21 @@ unicode_sizeof(PyObject *self, PyObject *Py_UNUSED(ignored)) { return unicode_sizeof_impl(self); } -/*[clinic end generated code: output=d1541724cb4a0070 input=a9049054013a1b77]*/ + +PyDoc_STRVAR(unicode_dedent__doc__, +"dedent($self, /)\n" +"--\n" +"\n"); + +#define UNICODE_DEDENT_METHODDEF \ + {"dedent", (PyCFunction)unicode_dedent, METH_NOARGS, unicode_dedent__doc__}, + +static PyObject * +unicode_dedent_impl(PyObject *self); + +static PyObject * +unicode_dedent(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + return unicode_dedent_impl(self); +} +/*[clinic end generated code: output=631704de60a08efa input=a9049054013a1b77]*/ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index eaba5836cb1c71..986971e72e981d 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -227,6 +227,7 @@ static PyObject *unicode_empty = NULL; return unicode_empty; \ } while (0) + static inline void unicode_fill(enum PyUnicode_Kind kind, void *data, Py_UCS4 value, Py_ssize_t start, Py_ssize_t length) @@ -13885,6 +13886,153 @@ unicode_sizeof_impl(PyObject *self) return PyLong_FromSsize_t(size); } +/*[clinic input] +str.dedent as unicode_dedent + +[clinic start generated code]*/ + +static PyObject * +unicode_dedent_impl(PyObject *self) +/*[clinic end generated code: output=4d41f65b94304b63 input=032d062ea6d3d9f3]*/ +{ + // Look for the longest leading string of spaces and tabs common to + // all lines. + _Py_IDENTIFIER(startswith); + _Py_IDENTIFIER(compile); + _Py_IDENTIFIER(MULTILINE); + _Py_IDENTIFIER(sub); + _Py_IDENTIFIER(findall); + + PyObject *re, *compile, *multiline, *_whitespace_only_re, *_leading_whitespace_re, + *margin, *text, *indents, *iterator, *indent; + + re = compile = multiline = _whitespace_only_re = NULL; + _leading_whitespace_re = margin = text = indents = iterator = indent = NULL; + + re = PyImport_ImportModule("re"); + if (!re) { + goto fail; + } + compile = _PyObject_GetAttrId(re, &PyId_compile); + multiline = _PyObject_GetAttrId(re, &PyId_MULTILINE); + if (!multiline || !compile) { + goto fail; + } + _whitespace_only_re = PyObject_CallFunction(compile, "sO", "^[ \t]+$", multiline); + _leading_whitespace_re = PyObject_CallFunction(compile, "sO", "(^[ \t]*)(?:[^ \t\n])", multiline); + if (!_whitespace_only_re || !_leading_whitespace_re) { + goto fail; + } + + text = _PyObject_CallMethodId(_whitespace_only_re, &PyId_sub, "sO", "", self); + indents = _PyObject_CallMethodId(_leading_whitespace_re, &PyId_findall, "O", self); + iterator = PyObject_GetIter(indents); + if (!text || !indents || !iterator) { + goto fail; + } + + while ((indent = PyIter_Next(iterator)) != NULL) { + if (margin == NULL) { + margin = indent; + Py_INCREF(indent); + } + else { + PyObject *indent_startswith = _PyObject_CallMethodId(indent, &PyId_startswith, "O", margin); + if (!indent_startswith) { + goto fail; + } + if (PyObject_IsTrue(indent_startswith)) { + // Current line more deeply indented than previous winner: + // no change (previous winner is still on top). + Py_DECREF(indent_startswith); + Py_DECREF(indent); + continue; + } + Py_DECREF(indent_startswith); + + PyObject *margin_startswith = _PyObject_CallMethodId(margin, &PyId_startswith, "O", indent); + if (!margin_startswith) { + goto fail; + } + if (PyObject_IsTrue(margin_startswith)) { + // Current line consistent with and no deeper than previous winner: + // it's the new winner. + Py_DECREF(margin_startswith); + Py_XDECREF(margin); + margin = indent; + // No need to incref here, since we should also decref before looping + continue; + } + Py_DECREF(margin_startswith); + + // Find the largest common whitespace between current line and previous + // winner. + PyObject *iter_margin = PyObject_GetIter(margin); + PyObject *iter_indent = PyObject_GetIter(indent); + if (!iter_margin || !iter_indent) { + Py_XDECREF(iter_margin); + goto fail; + } + size_t i = 0; + PyObject *x, *y; + while ((x = PyIter_Next(iter_margin)) && (y = PyIter_Next(iter_indent))) { + int comp = PyObject_RichCompareBool(x, y, Py_NE); + Py_CLEAR(x); + Py_DECREF(y); + if (comp) { + PyObject *oldmargin = margin; + margin = PySequence_GetSlice(oldmargin, 0, i); + Py_DECREF(oldmargin); + break; + } + i++; + } + Py_XDECREF(x); + Py_DECREF(iter_margin); + Py_DECREF(iter_indent); + } + Py_DECREF(indent); + } + + if (margin != NULL && PyObject_IsTrue(margin)) { + PyObject *sub = _PyObject_GetAttrId(re, &PyId_sub); + PyObject *format = PyUnicode_FromFormat("%s%U", "(?m)^", margin); + if (!sub || !format) { + Py_XDECREF(sub); + goto fail; + } + PyObject *result = PyObject_CallFunction(sub, "OsO", format, "", text); + Py_DECREF(sub); + Py_DECREF(format); + Py_DECREF(text); + text = result; + } + + Py_DECREF(re); + Py_DECREF(compile); + Py_DECREF(multiline); + Py_DECREF(_whitespace_only_re); + Py_DECREF(_leading_whitespace_re); + Py_DECREF(indents); + Py_DECREF(iterator); + Py_XDECREF(margin); + + return text; + +fail: + Py_XDECREF(re); + Py_XDECREF(compile); + Py_XDECREF(multiline); + Py_XDECREF(_whitespace_only_re); + Py_XDECREF(_leading_whitespace_re); + Py_XDECREF(margin); + Py_XDECREF(text); + Py_XDECREF(indents); + Py_XDECREF(iterator); + Py_XDECREF(indent); + return NULL; +} + static PyObject * unicode_getnewargs(PyObject *v, PyObject *Py_UNUSED(ignored)) { @@ -13942,6 +14090,7 @@ static PyMethodDef unicode_methods[] = { UNICODE___FORMAT___METHODDEF UNICODE_MAKETRANS_METHODDEF UNICODE_SIZEOF_METHODDEF + UNICODE_DEDENT_METHODDEF #if 0 /* These methods are just used for debugging the implementation. */ {"_decimal2ascii", (PyCFunction) unicode__decimal2ascii, METH_NOARGS}, From 64fda8f1ecea90538ca7aaee6e9d2d66424ad9ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Lapeyre?= Date: Mon, 20 May 2019 15:28:01 +0200 Subject: [PATCH 2/9] Peephole str.dedent --- Lib/test/test_peepholer.py | 25 +++++++++++++++++++++++++ Python/peephole.c | 30 ++++++++++++++++++++++++------ 2 files changed, 49 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 794d104d5919bd..81c59197721cbc 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -335,6 +335,31 @@ def forloop(): pass self.assertEqual(count_instr_recursively(forloop, 'BUILD_LIST'), 0) + def test_fold_str_dedent(self): + exprs = [ + '"foo".dedent()', + ] + + for e in exprs: + code = compile(e, '', 'single') + self.assertNotInBytecode(code, 'LOAD_METHOD') + self.assertNotInBytecode(code, 'CALL_METHOD') + + # TODO: One constant replace all calls to dedent with the same constant + # expr = '" foo".dedent(); " foo".dedent()' + # code = compile(e, '', 'single') + # self.assertEqual(code.co_consts.count('foo'), 1) + + exprs = [ + # TODO: Multiple calls to dedent are not all removed yet + '"foo".dedent().dedent()', + # Calls to dedent with something else than a string are not removed + '3 .dedent()', + ] + for e in exprs: + code = compile(e, '', 'single') + self.assertInBytecode(code, 'LOAD_METHOD') + self.assertInBytecode(code, 'CALL_METHOD') class TestBuglets(unittest.TestCase): diff --git a/Python/peephole.c b/Python/peephole.c index cc244aa433ee18..9e7ebb2f48ec83 100644 --- a/Python/peephole.c +++ b/Python/peephole.c @@ -240,6 +240,8 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, // Count runs of consecutive LOAD_CONSTs unsigned int cumlc = 0, lastlc = 0; unsigned int *blocks = NULL; + _Py_IDENTIFIER(dedent); + PyObject *dedent = PyUnicode_FromString("dedent"); /* Bail out if an exception is set */ if (PyErr_Occurred()) @@ -301,12 +303,28 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, "while 1" performance. */ case LOAD_CONST: cumlc = lastlc + 1; - if (nextop != POP_JUMP_IF_FALSE || - !ISBASICBLOCK(blocks, op_start, i + 1) || - !PyObject_IsTrue(PyList_GET_ITEM(consts, get_arg(codestr, i)))) - break; - fill_nops(codestr, op_start, nexti + 1); - cumlc = 0; + if (nextop == POP_JUMP_IF_FALSE && + ISBASICBLOCK(blocks, op_start, i + 1) && + PyObject_IsTrue(PyList_GET_ITEM(consts, get_arg(codestr, i)))) { + + fill_nops(codestr, op_start, nexti + 1); + cumlc = 0; + } + + if (nextop == LOAD_METHOD && + _Py_OPCODE(codestr[nexti+1]) == CALL_METHOD) { + + if (PyUnicode_CheckExact(PyList_GET_ITEM(consts, get_arg(codestr, i))) && + PyObject_RichCompareBool(dedent, PyTuple_GET_ITEM(names, get_arg(codestr, nexti)), Py_EQ)) { + + PyObject *text = PyList_GET_ITEM(consts, get_arg(codestr, i)); + text = _PyObject_CallMethodId(text, &PyId_dedent, NULL); + set_arg(codestr, i, PySequence_Length(consts)); + PyList_Append(consts, text); + + fill_nops(codestr, op_start + 1, nexti + 2); + } + } break; /* Try to fold tuples of constants. From 2f22dd152c85c259b06d47ca63f8f5314b82c72a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Lapeyre?= Date: Mon, 20 May 2019 16:32:22 +0200 Subject: [PATCH 3/9] Add PendingDeprecationWarning --- Lib/test/test___all__.py | 1 + Lib/test/test_configparser.py | 69 ++++++++++----------- Lib/test/test_email/test_email.py | 100 ++++++++++++++---------------- Lib/test/test_logging.py | 13 ++-- Lib/test/test_mailbox.py | 9 ++- Lib/textwrap.py | 4 ++ Lib/unittest/test/test_result.py | 8 +-- 7 files changed, 101 insertions(+), 103 deletions(-) diff --git a/Lib/test/test___all__.py b/Lib/test/test___all__.py index f6e82eb64ab025..075f2f3fe53766 100644 --- a/Lib/test/test___all__.py +++ b/Lib/test/test___all__.py @@ -17,6 +17,7 @@ def check_all(self, modname): names = {} with support.check_warnings( (".* (module|package)", DeprecationWarning), + ("", PendingDeprecationWarning), ("", ResourceWarning), quiet=True): try: diff --git a/Lib/test/test_configparser.py b/Lib/test/test_configparser.py index f16da116a745f3..07e9602965a5fd 100644 --- a/Lib/test/test_configparser.py +++ b/Lib/test/test_configparser.py @@ -3,7 +3,6 @@ import io import os import pathlib -import textwrap import unittest import warnings @@ -329,31 +328,31 @@ def test_basic(self): self.basic_test(cf) if self.strict: with self.assertRaises(configparser.DuplicateOptionError): - cf.read_string(textwrap.dedent("""\ + cf.read_string("""\ [Duplicate Options Here] option {0[0]} with a value option {0[1]} with another value - """.format(self.delimiters))) + """.dedent().format(self.delimiters)) with self.assertRaises(configparser.DuplicateSectionError): - cf.read_string(textwrap.dedent("""\ + cf.read_string("""\ [And Now For Something] completely different {0[0]} True [And Now For Something] the larch {0[1]} 1 - """.format(self.delimiters))) + """.dedent().format(self.delimiters)) else: - cf.read_string(textwrap.dedent("""\ + cf.read_string("""\ [Duplicate Options Here] option {0[0]} with a value option {0[1]} with another value - """.format(self.delimiters))) + """.dedent().format(self.delimiters)) - cf.read_string(textwrap.dedent("""\ + cf.read_string("""\ [And Now For Something] completely different {0[0]} True [And Now For Something] the larch {0[1]} 1 - """.format(self.delimiters))) + """.dedent().format(self.delimiters)) def test_basic_from_dict(self): config = { @@ -619,14 +618,14 @@ def test_weird_errors(self): if self.strict: with self.assertRaises(configparser.DuplicateSectionError) as cm: - cf.read_string(textwrap.dedent("""\ + cf.read_string("""\ [Foo] will this be added{equals}True [Bar] what about this{equals}True [Foo] oops{equals}this won't - """.format(equals=self.delimiters[0])), source='') + """.dedent().format(equals=self.delimiters[0]), source='') e = cm.exception self.assertEqual(str(e), "While reading from '' " "[line 5]: section 'Foo' already exists") @@ -867,10 +866,10 @@ def test_invalid_multiline_value(self): if self.allow_no_value: self.skipTest('if no_value is allowed, ParsingError is not raised') - invalid = textwrap.dedent("""\ + invalid = """\ [DEFAULT] test {0} test - invalid""".format(self.delimiters[0]) + invalid""".dedent().format(self.delimiters[0] ) cf = self.newconfig() with self.assertRaises(configparser.ParsingError): @@ -985,7 +984,7 @@ def test_defaults_keyword(self): class ConfigParserTestCaseNoInterpolation(BasicTestCase, unittest.TestCase): config_class = configparser.ConfigParser interpolation = None - ini = textwrap.dedent(""" + ini = """ [numbers] one = 1 two = %(one)s * 2 @@ -993,7 +992,7 @@ class ConfigParserTestCaseNoInterpolation(BasicTestCase, unittest.TestCase): [hexen] sixteen = ${numbers:two} * 8 - """).strip() + """.dedent().strip() def assertMatchesIni(self, cf): self.assertEqual(cf['numbers']['one'], '1') @@ -1170,7 +1169,7 @@ def fromstring(self, string, defaults=None, optionxform=None): return cf def test_extended_interpolation(self): - cf = self.fromstring(textwrap.dedent(""" + cf = self.fromstring(""" [common] favourite Beatle = Paul favourite color = green @@ -1192,7 +1191,7 @@ def test_extended_interpolation(self): favourite state of mind = paranoid favourite movie = soylent ${common:favourite color} favourite song = ${favourite color} sabbath - ${favourite state of mind} - """).strip()) + """.dedent().strip()) eq = self.assertEqual eq(cf['common']['favourite Beatle'], 'Paul') @@ -1215,7 +1214,7 @@ def test_extended_interpolation(self): 'black sabbath - paranoid') def test_endless_loop(self): - cf = self.fromstring(textwrap.dedent(""" + cf = self.fromstring(""" [one for you] ping = ${one for me:pong} @@ -1224,7 +1223,7 @@ def test_endless_loop(self): [selfish] me = ${me} - """).strip()) + """.dedent().strip()) with self.assertRaises(configparser.InterpolationDepthError): cf['one for you']['ping'] @@ -1253,7 +1252,7 @@ def test_strange_options(self): self.assertEqual(cm.exception.args[2], '${dollars:${sick}}') #rawval def test_case_sensitivity_basic(self): - ini = textwrap.dedent(""" + ini = """ [common] optionlower = value OptionUpper = Value @@ -1265,7 +1264,7 @@ def test_case_sensitivity_basic(self): [random] foolower = ${common:optionlower} redefined FooUpper = ${Common:OptionUpper} Redefined - """).strip() + """.dedent().strip() cf = self.fromstring(ini) eq = self.assertEqual @@ -1277,7 +1276,7 @@ def test_case_sensitivity_basic(self): eq(cf['random']['FooUpper'], 'A Better Value Redefined') def test_case_sensitivity_conflicts(self): - ini = textwrap.dedent(""" + ini = """ [common] option = value Option = Value @@ -1289,7 +1288,7 @@ def test_case_sensitivity_conflicts(self): [random] foo = ${common:option} redefined Foo = ${Common:Option} Redefined - """).strip() + """.dedent().strip() with self.assertRaises(configparser.DuplicateOptionError): cf = self.fromstring(ini) @@ -1434,7 +1433,7 @@ class CompatibleTestCase(CfgParserTestCaseClass, unittest.TestCase): inline_comment_prefixes = ';' def test_comment_handling(self): - config_string = textwrap.dedent("""\ + config_string = """\ [Commented Bar] baz=qwe ; a comment foo: bar # not a comment! @@ -1442,7 +1441,7 @@ def test_comment_handling(self): ; another comment quirk: this;is not a comment ; a space must precede an inline comment - """) + """.dedent() cf = self.fromstring(config_string) self.assertEqual(cf.get('Commented Bar', 'foo'), 'bar # not a comment!') @@ -1506,9 +1505,9 @@ def test_file(self): self.assertEqual(parser["Foo Bar"]["foo"], "newbar") def test_iterable(self): - lines = textwrap.dedent(""" + lines = """ [Foo Bar] - foo=newbar""").strip().split('\n') + foo=newbar""".dedent().strip().split('\n') parser = configparser.ConfigParser() parser.read_file(lines) self.assertIn("Foo Bar", parser) @@ -1527,9 +1526,9 @@ def test_readline_generator(self): def test_source_as_bytes(self): """Issue #18260.""" - lines = textwrap.dedent(""" + lines = """ [badbad] - [badbad]""").strip().split('\n') + [badbad]""".dedent().strip().split('\n') parser = configparser.ConfigParser() with self.assertRaises(configparser.DuplicateSectionError) as dse: parser.read_file(lines, source=b"badbad") @@ -1538,10 +1537,10 @@ def test_source_as_bytes(self): "While reading from b'badbad' [line 2]: section 'badbad' " "already exists" ) - lines = textwrap.dedent(""" + lines = """ [badbad] bad = bad - bad = bad""").strip().split('\n') + bad = bad""".dedent().strip().split('\n') parser = configparser.ConfigParser() with self.assertRaises(configparser.DuplicateOptionError) as dse: parser.read_file(lines, source=b"badbad") @@ -1550,9 +1549,9 @@ def test_source_as_bytes(self): "While reading from b'badbad' [line 3]: option 'bad' in section " "'badbad' already exists" ) - lines = textwrap.dedent(""" + lines = """ [badbad] - = bad""").strip().split('\n') + = bad""".dedent().strip().split('\n') parser = configparser.ConfigParser() with self.assertRaises(configparser.ParsingError) as dse: parser.read_file(lines, source=b"badbad") @@ -1560,9 +1559,9 @@ def test_source_as_bytes(self): str(dse.exception), "Source contains parsing errors: b'badbad'\n\t[line 2]: '= bad'" ) - lines = textwrap.dedent(""" + lines = """ [badbad - bad = bad""").strip().split('\n') + bad = bad""".dedent().strip().split('\n') parser = configparser.ConfigParser() with self.assertRaises(configparser.MissingSectionHeaderError) as dse: parser.read_file(lines, source=b"badbad") diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 621754cf753daa..fb6b2b3109b53f 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -6,7 +6,6 @@ import time import base64 import unittest -import textwrap from io import StringIO, BytesIO from itertools import chain @@ -736,25 +735,25 @@ def test_unicode_header_defaults_to_utf8_encoding(self): # Issue 14291 m = MIMEText('abc\n') m['Subject'] = 'É test' - self.assertEqual(str(m),textwrap.dedent("""\ + self.assertEqual(str(m),"""\ Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: =?utf-8?q?=C3=89_test?= abc - """)) + """.dedent()) def test_unicode_body_defaults_to_utf8_encoding(self): # Issue 14291 m = MIMEText('É testabc\n') - self.assertEqual(str(m),textwrap.dedent("""\ + self.assertEqual(str(m),"""\ Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: base64 w4kgdGVzdGFiYwo= - """)) + """.dedent()) # Test the email.encoders module @@ -794,34 +793,34 @@ def test_encode7or8bit(self): eq = self.assertEqual msg = MIMEText('文\n', _charset='euc-jp') eq(msg['content-transfer-encoding'], '7bit') - eq(msg.as_string(), textwrap.dedent("""\ + eq(msg.as_string(), """\ MIME-Version: 1.0 Content-Type: text/plain; charset="iso-2022-jp" Content-Transfer-Encoding: 7bit \x1b$BJ8\x1b(B - """)) + """.dedent()) def test_qp_encode_latin1(self): msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1') - self.assertEqual(str(msg), textwrap.dedent("""\ + self.assertEqual(str(msg), """\ MIME-Version: 1.0 Content-Type: text/text; charset="iso-8859-1" Content-Transfer-Encoding: quoted-printable =E1=F6 - """)) + """.dedent()) def test_qp_encode_non_latin1(self): # Issue 16948 msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2') - self.assertEqual(str(msg), textwrap.dedent("""\ + self.assertEqual(str(msg), """\ MIME-Version: 1.0 Content-Type: text/text; charset="iso-8859-2" Content-Transfer-Encoding: quoted-printable =BF - """)) + """.dedent()) # Test long header wrapping @@ -1383,17 +1382,17 @@ def test_long_lines_with_different_header(self): """) def test_long_rfc2047_header_with_embedded_fws(self): - h = Header(textwrap.dedent("""\ + h = Header("""\ We're going to pretend this header is in a non-ascii character set \tto see if line wrapping with encoded words and embedded - folding white space works"""), + folding white space works""".dedent(), charset='utf-8', header_name='Test') - self.assertEqual(h.encode()+'\n', textwrap.dedent("""\ + self.assertEqual(h.encode()+'\n', """\ =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?= =?utf-8?q?cter_set?= =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?= - =?utf-8?q?_folding_white_space_works?=""")+'\n') + =?utf-8?q?_folding_white_space_works?=""".dedent()+'\n') @@ -1432,7 +1431,7 @@ def test_dont_mangle_from(self): def test_mangle_from_in_preamble_and_epilog(self): s = StringIO() g = Generator(s, mangle_from_=True) - msg = email.message_from_string(textwrap.dedent("""\ + msg = email.message_from_string("""\ From: foo@bar.com Mime-Version: 1.0 Content-Type: multipart/mixed; boundary=XXX @@ -1447,19 +1446,19 @@ def test_mangle_from_in_preamble_and_epilog(self): --XXX-- From somewhere unknowable - """)) + """.dedent()) g.flatten(msg) self.assertEqual(len([1 for x in s.getvalue().split('\n') if x.startswith('>From ')]), 2) def test_mangled_from_with_bad_bytes(self): - source = textwrap.dedent("""\ + source = """\ Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit From: aaa@bbb.org - """).encode('utf-8') + """.dedent().encode('utf-8') msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n') b = BytesIO() g = BytesGenerator(b, mangle_from_=True) @@ -2114,7 +2113,7 @@ def test_multipart_no_boundary(self): self.assertIsInstance(msg.defects[1], errors.MultipartInvariantViolationDefect) - multipart_msg = textwrap.dedent("""\ + multipart_msg = """\ Date: Wed, 14 Nov 2007 12:56:23 GMT From: foo@bar.invalid To: foo@bar.invalid @@ -2135,7 +2134,7 @@ def test_multipart_no_boundary(self): YWJj --===============3344438784458119861==-- - """) + """.dedent() # test_defect_handling def test_multipart_invalid_cte(self): @@ -3742,7 +3741,7 @@ class Test8BitBytesHandling(TestEmailBase): # but it does allow us to parse and preserve them, and to decode body # parts that use an 8bit CTE. - bodytest_msg = textwrap.dedent("""\ + bodytest_msg = """\ From: foo@bar.com To: baz Mime-Version: 1.0 @@ -3750,7 +3749,7 @@ class Test8BitBytesHandling(TestEmailBase): Content-Transfer-Encoding: {cte} {bodyline} - """) + """.dedent() def test_known_8bit_CTE(self): m = self.bodytest_msg.format(charset='utf-8', @@ -3849,14 +3848,14 @@ def test_get_8bit_header(self): def test_print_8bit_headers(self): msg = email.message_from_bytes(self.headertest_msg) self.assertEqual(str(msg), - textwrap.dedent("""\ + """\ From: {} To: {} Subject: {} From: {} Yes, they are flying. - """).format(*[expected[1] for (_, expected) in + """.dedent().format(*[expected[1] for (_, expected) in self.headertest_headers])) def test_values_with_8bit_headers(self): @@ -3889,9 +3888,9 @@ def test_get_all_with_8bit_headers(self): 'g\uFFFD\uFFFDst']) def test_get_content_type_with_8bit(self): - msg = email.message_from_bytes(textwrap.dedent("""\ + msg = email.message_from_bytes("""\ Content-Type: text/pl\xA7in; charset=utf-8 - """).encode('latin-1')) + """.dedent().encode('latin-1')) self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin") self.assertEqual(msg.get_content_maintype(), "text") self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin") @@ -3908,41 +3907,38 @@ def test_get_params_with_8bit(self): # test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value def test_get_rfc2231_params_with_8bit(self): - msg = email.message_from_bytes(textwrap.dedent("""\ + msg = email.message_from_bytes("""\ Content-Type: text/plain; charset=us-ascii; - title*=us-ascii'en'This%20is%20not%20f\xa7n""" - ).encode('latin-1')) + title*=us-ascii'en'This%20is%20not%20f\xa7n""".dedent().encode('latin-1')) self.assertEqual(msg.get_param('title'), ('us-ascii', 'en', 'This is not f\uFFFDn')) def test_set_rfc2231_params_with_8bit(self): - msg = email.message_from_bytes(textwrap.dedent("""\ + msg = email.message_from_bytes("""\ Content-Type: text/plain; charset=us-ascii; - title*=us-ascii'en'This%20is%20not%20f\xa7n""" - ).encode('latin-1')) + title*=us-ascii'en'This%20is%20not%20f\xa7n""".dedent().encode('latin-1')) msg.set_param('title', 'test') self.assertEqual(msg.get_param('title'), 'test') def test_del_rfc2231_params_with_8bit(self): - msg = email.message_from_bytes(textwrap.dedent("""\ + msg = email.message_from_bytes("""\ Content-Type: text/plain; charset=us-ascii; - title*=us-ascii'en'This%20is%20not%20f\xa7n""" - ).encode('latin-1')) + title*=us-ascii'en'This%20is%20not%20f\xa7n""".dedent().encode('latin-1')) msg.del_param('title') self.assertEqual(msg.get_param('title'), None) self.assertEqual(msg.get_content_maintype(), 'text') def test_get_payload_with_8bit_cte_header(self): - msg = email.message_from_bytes(textwrap.dedent("""\ + msg = email.message_from_bytes("""\ Content-Transfer-Encoding: b\xa7se64 Content-Type: text/plain; charset=latin-1 payload - """).encode('latin-1')) + """.dedent().encode('latin-1')) self.assertEqual(msg.get_payload(), 'payload\n') self.assertEqual(msg.get_payload(decode=True), b'payload\n') - non_latin_bin_msg = textwrap.dedent("""\ + non_latin_bin_msg = """\ From: foo@bar.com To: báz Subject: Maintenant je vous présente mon collègue, le pouf célèbre @@ -3952,7 +3948,7 @@ def test_get_payload_with_8bit_cte_header(self): Content-Transfer-Encoding: 8bit Да, они летят. - """).encode('utf-8') + """.dedent().encode('utf-8') def test_bytes_generator(self): msg = email.message_from_bytes(self.non_latin_bin_msg) @@ -3967,7 +3963,7 @@ def test_bytes_generator_handles_None_body(self): email.generator.BytesGenerator(out).flatten(msg) self.assertEqual(out.getvalue(), b"\n") - non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\ + non_latin_bin_msg_as7bit_wrapped = """\ From: foo@bar.com To: =?unknown-8bit?q?b=C3=A1z?= Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?= @@ -3978,7 +3974,7 @@ def test_bytes_generator_handles_None_body(self): Content-Transfer-Encoding: base64 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg== - """) + """.dedent() def test_generator_handles_8bit(self): msg = email.message_from_bytes(self.non_latin_bin_msg) @@ -4022,7 +4018,7 @@ def test_message_from_binary_file(self): m = email.parser.BytesParser().parse(testfile) self.assertEqual(str(m), self.non_latin_bin_msg_as7bit) - latin_bin_msg = textwrap.dedent("""\ + latin_bin_msg = """\ From: foo@bar.com To: Dinsdale Subject: Nudge nudge, wink, wink @@ -4031,9 +4027,9 @@ def test_message_from_binary_file(self): Content-Transfer-Encoding: 8bit oh là là, know what I mean, know what I mean? - """).encode('latin-1') + """.dedent().encode('latin-1') - latin_bin_msg_as7bit = textwrap.dedent("""\ + latin_bin_msg_as7bit = """\ From: foo@bar.com To: Dinsdale Subject: Nudge nudge, wink, wink @@ -4042,7 +4038,7 @@ def test_message_from_binary_file(self): Content-Transfer-Encoding: quoted-printable oh l=E0 l=E0, know what I mean, know what I mean? - """) + """.dedent() def test_string_generator_reencodes_to_quopri_when_appropriate(self): m = email.message_from_bytes(self.latin_bin_msg) @@ -4076,7 +4072,7 @@ def test_crlf_flatten(self): def test_8bit_multipart(self): # Issue 11605 - source = textwrap.dedent("""\ + source = """\ Date: Fri, 18 Mar 2011 17:15:43 +0100 To: foo@example.com From: foodwatch-Newsletter @@ -4113,7 +4109,7 @@ def test_8bit_multipart(self): --b1_76a486bee62b0d200f33dc2ca08220ad-- - """).encode('utf-8') + """.dedent().encode('utf-8') msg = email.message_from_bytes(source) s = BytesIO() g = email.generator.BytesGenerator(s) @@ -5066,13 +5062,13 @@ def test_rfc2231_get_content_charset(self): # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes def test_rfc2231_parse_rfc_quoting(self): - m = textwrap.dedent('''\ + m = '''\ Content-Disposition: inline; \tfilename*0*=''This%20is%20even%20more%20; \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20; \tfilename*2="is it not.pdf" - ''') + '''.dedent() msg = email.message_from_string(m) self.assertEqual(msg.get_filename(), 'This is even more ***fun*** is it not.pdf') @@ -5080,13 +5076,13 @@ def test_rfc2231_parse_rfc_quoting(self): # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes def test_rfc2231_parse_extra_quoting(self): - m = textwrap.dedent('''\ + m = '''\ Content-Disposition: inline; \tfilename*0*="''This%20is%20even%20more%20"; \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; \tfilename*2="is it not.pdf" - ''') + '''.dedent() msg = email.message_from_string(m) self.assertEqual(msg.get_filename(), 'This is even more ***fun*** is it not.pdf') diff --git a/Lib/test/test_logging.py b/Lib/test/test_logging.py index b884753ad39785..e6f99583eb8982 100644 --- a/Lib/test/test_logging.py +++ b/Lib/test/test_logging.py @@ -43,7 +43,6 @@ import tempfile from test.support.script_helper import assert_python_ok, assert_python_failure from test import support -import textwrap import threading import time import unittest @@ -1436,7 +1435,7 @@ class ConfigFileTest(BaseTest): """ def apply_config(self, conf, **kwargs): - file = io.StringIO(textwrap.dedent(conf)) + file = io.StringIO(conf.dedent()) logging.config.fileConfig(file, **kwargs) def test_config0_ok(self): @@ -1457,7 +1456,7 @@ def test_config0_ok(self): def test_config0_using_cp_ok(self): # A simple config file which overrides the default settings. with support.captured_stdout() as output: - file = io.StringIO(textwrap.dedent(self.config0)) + file = io.StringIO(self.config0.dedent()) cp = configparser.ConfigParser() cp.read_file(file) logging.config.fileConfig(cp) @@ -1593,7 +1592,7 @@ def test_logger_disabling(self): def test_defaults_do_no_interpolation(self): """bpo-33802 defaults should not get interpolated""" - ini = textwrap.dedent(""" + ini = """ [formatters] keys=default @@ -1612,7 +1611,7 @@ def test_defaults_do_no_interpolation(self): [logger_root] formatter=default handlers=console - """).strip() + """.dedent().strip() fd, fn = tempfile.mkstemp(prefix='test_logging_', suffix='.ini') try: os.write(fd, ini.encode('ascii')) @@ -3227,7 +3226,7 @@ def test_listen_config_10_ok(self): def test_listen_config_1_ok(self): with support.captured_stdout() as output: - self.setup_via_listener(textwrap.dedent(ConfigFileTest.config1)) + self.setup_via_listener(ConfigFileTest.config1.dedent()) logger = logging.getLogger("compiler.parser") # Both will output a message logger.info(self.next_message()) @@ -3248,7 +3247,7 @@ def verify_reverse(stuff): return stuff[::-1] logger = logging.getLogger("compiler.parser") - to_send = textwrap.dedent(ConfigFileTest.config1) + to_send = ConfigFileTest.config1.dedent() # First, specify a verification function that will fail. # We expect to see no output, since our configuration # never took effect. diff --git a/Lib/test/test_mailbox.py b/Lib/test/test_mailbox.py index 0995b1e386d00b..a843d069d3480e 100644 --- a/Lib/test/test_mailbox.py +++ b/Lib/test/test_mailbox.py @@ -10,7 +10,6 @@ import tempfile from test import support import unittest -import textwrap import mailbox import glob @@ -82,12 +81,12 @@ def test_add(self): for i in (1, 2, 3, 4, 5, 6): self._check_sample(self._box[keys[i]]) - _nonascii_msg = textwrap.dedent("""\ + _nonascii_msg = """\ From: foo Subject: Falinaptár házhozszállítással. Már rendeltél? 0 - """) + """.dedent() def test_add_invalid_8bit_bytes_header(self): key = self._box.add(self._nonascii_msg.encode('latin-1')) @@ -119,7 +118,7 @@ def raiser(*args, **kw): self._box.close() self.assertMailboxEmpty() - _non_latin_bin_msg = textwrap.dedent("""\ + _non_latin_bin_msg = """\ From: foo@bar.com To: báz Subject: Maintenant je vous présente mon collègue, le pouf célèbre @@ -129,7 +128,7 @@ def raiser(*args, **kw): Content-Transfer-Encoding: 8bit Да, они летят. - """).encode('utf-8') + """.dedent().encode('utf-8') def test_add_8bit_body(self): key = self._box.add(self._non_latin_bin_msg) diff --git a/Lib/textwrap.py b/Lib/textwrap.py index f69a328ddea199..f5d404f02de54d 100644 --- a/Lib/textwrap.py +++ b/Lib/textwrap.py @@ -6,6 +6,7 @@ # Written by Greg Ward import re +import warnings __all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent', 'shorten'] @@ -426,6 +427,9 @@ def dedent(text): """ # Look for the longest leading string of spaces and tabs common to # all lines. + + warnings.warn("textwrap.dedent is deprecated, use the str.dedent method instead", + PendingDeprecationWarning) return text.dedent() diff --git a/Lib/unittest/test/test_result.py b/Lib/unittest/test/test_result.py index 0ffb87b40256cf..762a009dd35af6 100644 --- a/Lib/unittest/test/test_result.py +++ b/Lib/unittest/test/test_result.py @@ -615,16 +615,16 @@ def testBufferOutputAddErrorOrFailure(self): self.assertEqual(len(result_list), 1) test, message = result_list[0] - expectedOutMessage = textwrap.dedent(""" + expectedOutMessage = """ Stdout: foo - """) + """.dedent() expectedErrMessage = '' if include_error: - expectedErrMessage = textwrap.dedent(""" + expectedErrMessage = """ Stderr: bar - """) + """.dedent() expectedFullMessage = 'A traceback%s%s' % (expectedOutMessage, expectedErrMessage) From 3c1449c2bb2049ef989bb197c3469ec11b8948fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Lapeyre?= Date: Mon, 20 May 2019 18:13:32 +0200 Subject: [PATCH 4/9] Move str.dedent optimization in the AST --- Lib/test/test_peepholer.py | 25 ------------------------- Python/ast_opt.c | 33 +++++++++++++++++++++++++++++++++ Python/peephole.c | 30 ++++++------------------------ 3 files changed, 39 insertions(+), 49 deletions(-) diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 81c59197721cbc..794d104d5919bd 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -335,31 +335,6 @@ def forloop(): pass self.assertEqual(count_instr_recursively(forloop, 'BUILD_LIST'), 0) - def test_fold_str_dedent(self): - exprs = [ - '"foo".dedent()', - ] - - for e in exprs: - code = compile(e, '', 'single') - self.assertNotInBytecode(code, 'LOAD_METHOD') - self.assertNotInBytecode(code, 'CALL_METHOD') - - # TODO: One constant replace all calls to dedent with the same constant - # expr = '" foo".dedent(); " foo".dedent()' - # code = compile(e, '', 'single') - # self.assertEqual(code.co_consts.count('foo'), 1) - - exprs = [ - # TODO: Multiple calls to dedent are not all removed yet - '"foo".dedent().dedent()', - # Calls to dedent with something else than a string are not removed - '3 .dedent()', - ] - for e in exprs: - code = compile(e, '', 'single') - self.assertInBytecode(code, 'LOAD_METHOD') - self.assertInBytecode(code, 'CALL_METHOD') class TestBuglets(unittest.TestCase): diff --git a/Python/ast_opt.c b/Python/ast_opt.c index 96c766fc0957d4..13be95848d4f23 100644 --- a/Python/ast_opt.c +++ b/Python/ast_opt.c @@ -471,6 +471,38 @@ astfold_mod(mod_ty node_, PyArena *ctx_, int optimize_) return 1; } +static int +astfold_dedent(expr_ty node_, PyArena *arena, int optimize_) +{ + _Py_IDENTIFIER(dedent); + PyObject *dedent = PyUnicode_FromString("dedent"); + if (!dedent) { + return 0; + } + if (asdl_seq_LEN(node_->v.Call.args) != 0 || + asdl_seq_LEN(node_->v.Call.keywords) != 0 || + node_->v.Call.func->kind != Attribute_kind) { + return 1; + } + expr_ty attr = node_->v.Call.func; + if (attr->v.Attribute.value->kind != Constant_kind || + attr->v.Attribute.value->v.Constant.kind != NULL || + !PyUnicode_CheckExact(attr->v.Attribute.value->v.Constant.value) || + !PyObject_RichCompareBool(attr->v.Attribute.attr, dedent, Py_EQ)) { + + return 1; + } + + PyObject *value = attr->v.Attribute.value->v.Constant.value; + PyObject *newval = _PyObject_CallMethodId(value, &PyId_dedent, "", NULL); + if (!newval) { + Py_DECREF(dedent); + return 0; + } + + return make_const(node_, newval, arena); +} + static int astfold_expr(expr_ty node_, PyArena *ctx_, int optimize_) { @@ -538,6 +570,7 @@ astfold_expr(expr_ty node_, PyArena *ctx_, int optimize_) CALL(astfold_expr, expr_ty, node_->v.Call.func); CALL_SEQ(astfold_expr, expr_ty, node_->v.Call.args); CALL_SEQ(astfold_keyword, keyword_ty, node_->v.Call.keywords); + CALL(astfold_dedent, expr_ty, node_); break; case FormattedValue_kind: CALL(astfold_expr, expr_ty, node_->v.FormattedValue.value); diff --git a/Python/peephole.c b/Python/peephole.c index 9e7ebb2f48ec83..cc244aa433ee18 100644 --- a/Python/peephole.c +++ b/Python/peephole.c @@ -240,8 +240,6 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, // Count runs of consecutive LOAD_CONSTs unsigned int cumlc = 0, lastlc = 0; unsigned int *blocks = NULL; - _Py_IDENTIFIER(dedent); - PyObject *dedent = PyUnicode_FromString("dedent"); /* Bail out if an exception is set */ if (PyErr_Occurred()) @@ -303,28 +301,12 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, "while 1" performance. */ case LOAD_CONST: cumlc = lastlc + 1; - if (nextop == POP_JUMP_IF_FALSE && - ISBASICBLOCK(blocks, op_start, i + 1) && - PyObject_IsTrue(PyList_GET_ITEM(consts, get_arg(codestr, i)))) { - - fill_nops(codestr, op_start, nexti + 1); - cumlc = 0; - } - - if (nextop == LOAD_METHOD && - _Py_OPCODE(codestr[nexti+1]) == CALL_METHOD) { - - if (PyUnicode_CheckExact(PyList_GET_ITEM(consts, get_arg(codestr, i))) && - PyObject_RichCompareBool(dedent, PyTuple_GET_ITEM(names, get_arg(codestr, nexti)), Py_EQ)) { - - PyObject *text = PyList_GET_ITEM(consts, get_arg(codestr, i)); - text = _PyObject_CallMethodId(text, &PyId_dedent, NULL); - set_arg(codestr, i, PySequence_Length(consts)); - PyList_Append(consts, text); - - fill_nops(codestr, op_start + 1, nexti + 2); - } - } + if (nextop != POP_JUMP_IF_FALSE || + !ISBASICBLOCK(blocks, op_start, i + 1) || + !PyObject_IsTrue(PyList_GET_ITEM(consts, get_arg(codestr, i)))) + break; + fill_nops(codestr, op_start, nexti + 1); + cumlc = 0; break; /* Try to fold tuples of constants. From 69eab29add2a9c3671ce9c3aa25196bda9d66757 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Lapeyre?= Date: Mon, 20 May 2019 18:30:22 +0200 Subject: [PATCH 5/9] Add missing DECREF --- Python/ast_opt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Python/ast_opt.c b/Python/ast_opt.c index 13be95848d4f23..b1d4fc19d8d7fe 100644 --- a/Python/ast_opt.c +++ b/Python/ast_opt.c @@ -500,6 +500,7 @@ astfold_dedent(expr_ty node_, PyArena *arena, int optimize_) return 0; } + Py_DECREF(dedent); return make_const(node_, newval, arena); } From 514571b77bf26389fc726eaf3ea261f438c132d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Lapeyre?= Date: Tue, 21 May 2019 12:02:02 +0200 Subject: [PATCH 6/9] Fix review comments --- Lib/test/test_configparser.py | 3 +- Lib/test/test_unicode.py | 14 ++-- Lib/textwrap.py | 52 ++++++++++-- Objects/unicodeobject.c | 146 +++------------------------------- Python/ast_opt.c | 10 +-- 5 files changed, 71 insertions(+), 154 deletions(-) diff --git a/Lib/test/test_configparser.py b/Lib/test/test_configparser.py index 07e9602965a5fd..dea5b85896e83c 100644 --- a/Lib/test/test_configparser.py +++ b/Lib/test/test_configparser.py @@ -869,8 +869,7 @@ def test_invalid_multiline_value(self): invalid = """\ [DEFAULT] test {0} test - invalid""".dedent().format(self.delimiters[0] - ) + invalid""".dedent().format(self.delimiters[0]) cf = self.newconfig() with self.assertRaises(configparser.ParsingError): cf.read_string(invalid) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 4347e45fe2d882..a413a8897007a6 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2436,26 +2436,26 @@ def test_free_after_iterating(self): support.check_free_after_iterating(self, iter, str) support.check_free_after_iterating(self, reversed, str) - def assertUnchanged(self, text): + def dedent_and_assert_unchanged(self, text): """assert that dedent() has no effect on 'text'""" self.assertEqual(text, text.dedent()) def test_dedent_nomargin(self): # No lines indented. text = "Hello there.\nHow are you?\nOh good, I'm glad." - self.assertUnchanged(text) + self.dedent_and_assert_unchanged(text) # Similar, with a blank line. text = "Hello there.\n\nBoo!" - self.assertUnchanged(text) + self.dedent_and_assert_unchanged(text) # Some lines indented, but overall margin is still zero. text = "Hello there.\n This is indented." - self.assertUnchanged(text) + self.dedent_and_assert_unchanged(text) # Again, add a blank line. text = "Hello there.\n\n Boo!\n" - self.assertUnchanged(text) + self.dedent_and_assert_unchanged(text) def test_dedent_even(self): # All lines indented by two spaces. @@ -2528,11 +2528,11 @@ def test_dedent_preserve_internal_tabs(self): # considered equivalent) def test_dedent_preserve_margin_tabs(self): text = " hello there\n\thow are you?" - self.assertUnchanged(text) + self.dedent_and_assert_unchanged(text) # same effect even if we have 8 spaces text = " hello there\n\thow are you?" - self.assertUnchanged(text) + self.dedent_and_assert_unchanged(text) # dedent() only removes whitespace that can be uniformly removed! text = "\thello there\n\thow are you?" diff --git a/Lib/textwrap.py b/Lib/textwrap.py index f5d404f02de54d..ae13578bac015f 100644 --- a/Lib/textwrap.py +++ b/Lib/textwrap.py @@ -412,6 +412,46 @@ def shorten(text, width, **kwargs): _whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE) _leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE) + +def _dedent(text): + # Look for the longest leading string of spaces and tabs common to + # all lines. + margin = None + text = _whitespace_only_re.sub('', text) + indents = _leading_whitespace_re.findall(text) + for indent in indents: + if margin is None: + margin = indent + + # Current line more deeply indented than previous winner: + # no change (previous winner is still on top). + elif indent.startswith(margin): + pass + + # Current line consistent with and no deeper than previous winner: + # it's the new winner. + elif margin.startswith(indent): + margin = indent + + # Find the largest common whitespace between current line and previous + # winner. + else: + for i, (x, y) in enumerate(zip(margin, indent)): + if x != y: + margin = margin[:i] + break + + # sanity check (testing/debugging only) + if 0 and margin: + for line in text.split("\n"): + assert not line or line.startswith(margin), \ + "line = %r, margin = %r" % (line, margin) + + if margin: + text = re.sub(r'(?m)^' + margin, '', text) + return text + + def dedent(text): """Remove any common leading whitespace from every line in `text`. @@ -425,12 +465,12 @@ def dedent(text): new in Python 2.5; older versions of this module incorrectly expanded tabs before searching for common leading whitespace.) """ - # Look for the longest leading string of spaces and tabs common to - # all lines. - - warnings.warn("textwrap.dedent is deprecated, use the str.dedent method instead", - PendingDeprecationWarning) - return text.dedent() + warnings.warn( + "textwrap.dedent is pending deprecation, use the str.dedent method instead", + PendingDeprecationWarning, + stacklevel=2 + ) + return _dedent(text) def indent(text, prefix, predicate=None): diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 986971e72e981d..d3cfee91af639b 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13895,142 +13895,20 @@ static PyObject * unicode_dedent_impl(PyObject *self) /*[clinic end generated code: output=4d41f65b94304b63 input=032d062ea6d3d9f3]*/ { - // Look for the longest leading string of spaces and tabs common to - // all lines. - _Py_IDENTIFIER(startswith); - _Py_IDENTIFIER(compile); - _Py_IDENTIFIER(MULTILINE); - _Py_IDENTIFIER(sub); - _Py_IDENTIFIER(findall); - - PyObject *re, *compile, *multiline, *_whitespace_only_re, *_leading_whitespace_re, - *margin, *text, *indents, *iterator, *indent; - - re = compile = multiline = _whitespace_only_re = NULL; - _leading_whitespace_re = margin = text = indents = iterator = indent = NULL; - - re = PyImport_ImportModule("re"); - if (!re) { - goto fail; - } - compile = _PyObject_GetAttrId(re, &PyId_compile); - multiline = _PyObject_GetAttrId(re, &PyId_MULTILINE); - if (!multiline || !compile) { - goto fail; - } - _whitespace_only_re = PyObject_CallFunction(compile, "sO", "^[ \t]+$", multiline); - _leading_whitespace_re = PyObject_CallFunction(compile, "sO", "(^[ \t]*)(?:[^ \t\n])", multiline); - if (!_whitespace_only_re || !_leading_whitespace_re) { - goto fail; + _Py_IDENTIFIER(_dedent); + PyObject *textwrap = PyImport_ImportModule("textwrap"); + if (!textwrap) { + return NULL; } - - text = _PyObject_CallMethodId(_whitespace_only_re, &PyId_sub, "sO", "", self); - indents = _PyObject_CallMethodId(_leading_whitespace_re, &PyId_findall, "O", self); - iterator = PyObject_GetIter(indents); - if (!text || !indents || !iterator) { - goto fail; + PyObject *dedent = _PyObject_GetAttrId(textwrap, &PyId__dedent); + if (!dedent) { + Py_DECREF(textwrap); + return NULL; } - - while ((indent = PyIter_Next(iterator)) != NULL) { - if (margin == NULL) { - margin = indent; - Py_INCREF(indent); - } - else { - PyObject *indent_startswith = _PyObject_CallMethodId(indent, &PyId_startswith, "O", margin); - if (!indent_startswith) { - goto fail; - } - if (PyObject_IsTrue(indent_startswith)) { - // Current line more deeply indented than previous winner: - // no change (previous winner is still on top). - Py_DECREF(indent_startswith); - Py_DECREF(indent); - continue; - } - Py_DECREF(indent_startswith); - - PyObject *margin_startswith = _PyObject_CallMethodId(margin, &PyId_startswith, "O", indent); - if (!margin_startswith) { - goto fail; - } - if (PyObject_IsTrue(margin_startswith)) { - // Current line consistent with and no deeper than previous winner: - // it's the new winner. - Py_DECREF(margin_startswith); - Py_XDECREF(margin); - margin = indent; - // No need to incref here, since we should also decref before looping - continue; - } - Py_DECREF(margin_startswith); - - // Find the largest common whitespace between current line and previous - // winner. - PyObject *iter_margin = PyObject_GetIter(margin); - PyObject *iter_indent = PyObject_GetIter(indent); - if (!iter_margin || !iter_indent) { - Py_XDECREF(iter_margin); - goto fail; - } - size_t i = 0; - PyObject *x, *y; - while ((x = PyIter_Next(iter_margin)) && (y = PyIter_Next(iter_indent))) { - int comp = PyObject_RichCompareBool(x, y, Py_NE); - Py_CLEAR(x); - Py_DECREF(y); - if (comp) { - PyObject *oldmargin = margin; - margin = PySequence_GetSlice(oldmargin, 0, i); - Py_DECREF(oldmargin); - break; - } - i++; - } - Py_XDECREF(x); - Py_DECREF(iter_margin); - Py_DECREF(iter_indent); - } - Py_DECREF(indent); - } - - if (margin != NULL && PyObject_IsTrue(margin)) { - PyObject *sub = _PyObject_GetAttrId(re, &PyId_sub); - PyObject *format = PyUnicode_FromFormat("%s%U", "(?m)^", margin); - if (!sub || !format) { - Py_XDECREF(sub); - goto fail; - } - PyObject *result = PyObject_CallFunction(sub, "OsO", format, "", text); - Py_DECREF(sub); - Py_DECREF(format); - Py_DECREF(text); - text = result; - } - - Py_DECREF(re); - Py_DECREF(compile); - Py_DECREF(multiline); - Py_DECREF(_whitespace_only_re); - Py_DECREF(_leading_whitespace_re); - Py_DECREF(indents); - Py_DECREF(iterator); - Py_XDECREF(margin); - - return text; - -fail: - Py_XDECREF(re); - Py_XDECREF(compile); - Py_XDECREF(multiline); - Py_XDECREF(_whitespace_only_re); - Py_XDECREF(_leading_whitespace_re); - Py_XDECREF(margin); - Py_XDECREF(text); - Py_XDECREF(indents); - Py_XDECREF(iterator); - Py_XDECREF(indent); - return NULL; + PyObject *result = PyObject_CallFunction(dedent, "O", self); + Py_DECREF(textwrap); + Py_DECREF(dedent); + return result; } static PyObject * diff --git a/Python/ast_opt.c b/Python/ast_opt.c index b1d4fc19d8d7fe..d88424aee43713 100644 --- a/Python/ast_opt.c +++ b/Python/ast_opt.c @@ -474,16 +474,16 @@ astfold_mod(mod_ty node_, PyArena *ctx_, int optimize_) static int astfold_dedent(expr_ty node_, PyArena *arena, int optimize_) { - _Py_IDENTIFIER(dedent); - PyObject *dedent = PyUnicode_FromString("dedent"); - if (!dedent) { - return 0; - } if (asdl_seq_LEN(node_->v.Call.args) != 0 || asdl_seq_LEN(node_->v.Call.keywords) != 0 || node_->v.Call.func->kind != Attribute_kind) { return 1; } + _Py_IDENTIFIER(dedent); + PyObject *dedent = PyUnicode_FromString("dedent"); + if (!dedent) { + return 0; + } expr_ty attr = node_->v.Call.func; if (attr->v.Attribute.value->kind != Constant_kind || attr->v.Attribute.value->v.Constant.kind != NULL || From c984b7ad89e4b210b773b339649a56cee503438f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Lapeyre?= Date: Tue, 21 May 2019 12:05:06 +0200 Subject: [PATCH 7/9] Document str.dedent() --- Doc/library/stdtypes.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 53337291dd39ce..275097664ebddf 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -1546,6 +1546,18 @@ expression support in the :mod:`re` module). interpreted as in slice notation. +.. method:: str.dedent() + + Return without any common leading whitespace from every line. + + This can be used to make triple-quoted strings line up with the left edge of the + display, while still presenting them in the source code in indented form. + + Note that tabs and spaces are both treated as whitespace, but they are not + equal: the lines ``" hello"`` and ``"\thello"`` are considered to have no + common leading whitespace. + + .. method:: str.encode(encoding="utf-8", errors="strict") Return an encoded version of the string as a bytes object. Default encoding From 886cdf9a99f7c1cee50152d650203f4590f71332 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Lapeyre?= Date: Tue, 21 May 2019 12:08:50 +0200 Subject: [PATCH 8/9] Add blurb --- .../Core and Builtins/2019-05-21-12-08-40.bpo-36906.5_cnlN.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2019-05-21-12-08-40.bpo-36906.5_cnlN.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-05-21-12-08-40.bpo-36906.5_cnlN.rst b/Misc/NEWS.d/next/Core and Builtins/2019-05-21-12-08-40.bpo-36906.5_cnlN.rst new file mode 100644 index 00000000000000..edeaf90823d0ac --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2019-05-21-12-08-40.bpo-36906.5_cnlN.rst @@ -0,0 +1,2 @@ +Add new method `str.dedent()` that is equivalent to ``textwrap.dedent`` but +cached at compile time. Patch contributed by Rémi Lapeyre. From 7d0d82d14a029f8a8831e44c2cbbaa29f200b50f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Lapeyre?= Date: Mon, 27 Apr 2020 01:27:14 +0200 Subject: [PATCH 9/9] Remove useless textwrap imports --- Lib/distutils/tests/test_build_ext.py | 1 - Lib/distutils/tests/test_check.py | 1 - Lib/distutils/tests/test_dist.py | 1 - Lib/distutils/tests/test_sysconfig.py | 1 - Lib/idlelib/idle_test/htest.py | 1 - Lib/site.py | 1 - Lib/test/test__xxsubinterpreters.py | 2 +- Lib/test/test_argparse.py | 1 - Lib/test/test_cmd_line_script.py | 1 - Lib/test/test_peg_generator/test_pegen.py | 1 - Lib/test/test_pkg.py | 1 - Lib/test/test_regrtest.py | 1 - Lib/test/test_subprocess.py | 1 - Lib/test/test_tools/test_pindent.py | 1 - Lib/test/test_trace.py | 1 - Lib/unittest/test/test_result.py | 1 - 16 files changed, 1 insertion(+), 16 deletions(-) diff --git a/Lib/distutils/tests/test_build_ext.py b/Lib/distutils/tests/test_build_ext.py index 3cbcb10afa449a..16ff78125cefcc 100644 --- a/Lib/distutils/tests/test_build_ext.py +++ b/Lib/distutils/tests/test_build_ext.py @@ -1,7 +1,6 @@ import sys import os from io import StringIO -import textwrap from distutils.core import Distribution from distutils.command.build_ext import build_ext diff --git a/Lib/distutils/tests/test_check.py b/Lib/distutils/tests/test_check.py index 770d7a217605e1..61d12a7ce387fd 100644 --- a/Lib/distutils/tests/test_check.py +++ b/Lib/distutils/tests/test_check.py @@ -1,6 +1,5 @@ """Tests for distutils.command.check.""" import os -import textwrap import unittest from test.support import run_unittest diff --git a/Lib/distutils/tests/test_dist.py b/Lib/distutils/tests/test_dist.py index 37dc23f6759f30..575c8e3c2ea850 100644 --- a/Lib/distutils/tests/test_dist.py +++ b/Lib/distutils/tests/test_dist.py @@ -4,7 +4,6 @@ import sys import unittest import warnings -import textwrap from unittest import mock diff --git a/Lib/distutils/tests/test_sysconfig.py b/Lib/distutils/tests/test_sysconfig.py index 98ebdd9d7f34a7..c73a51ec94b757 100644 --- a/Lib/distutils/tests/test_sysconfig.py +++ b/Lib/distutils/tests/test_sysconfig.py @@ -4,7 +4,6 @@ import shutil import subprocess import sys -import textwrap import unittest from distutils import sysconfig diff --git a/Lib/idlelib/idle_test/htest.py b/Lib/idlelib/idle_test/htest.py index a18b42837d1d9e..53711fe4daa62a 100644 --- a/Lib/idlelib/idle_test/htest.py +++ b/Lib/idlelib/idle_test/htest.py @@ -67,7 +67,6 @@ def _wrapper(parent): # htest # import idlelib.pyshell # Set Windows DPI awareness before Tk(). from importlib import import_module -import textwrap import tkinter as tk from tkinter.ttk import Scrollbar tk.NoDefaultRoot() diff --git a/Lib/site.py b/Lib/site.py index 4a1196823d277e..062a5fbebbda77 100644 --- a/Lib/site.py +++ b/Lib/site.py @@ -635,7 +635,6 @@ def _script(): else: sys.exit(3) else: - import textwrap print((help % (sys.argv[0], os.pathsep)).dedent()) sys.exit(10) diff --git a/Lib/test/test__xxsubinterpreters.py b/Lib/test/test__xxsubinterpreters.py index b606ae60df110e..c2f016fa9e148e 100644 --- a/Lib/test/test__xxsubinterpreters.py +++ b/Lib/test/test__xxsubinterpreters.py @@ -1056,7 +1056,7 @@ def test_execution_namespace_is_main(self): @unittest.skip('blocking forever') def test_still_running_at_exit(self): script = f""" - from textwrap import import threading + import threading import _xxsubinterpreters as _interpreters id = _interpreters.create() def f(): diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 7e81dbf90b844c..1f285b52dd7aaa 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -5,7 +5,6 @@ import shutil import stat import sys -import textwrap import tempfile import unittest import argparse diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py index 4d5fab1323c94b..e8845a82ce2c5b 100644 --- a/Lib/test/test_cmd_line_script.py +++ b/Lib/test/test_cmd_line_script.py @@ -12,7 +12,6 @@ import subprocess import io -import textwrap from test import support from test.support.script_helper import ( make_pkg, make_script, make_zip_pkg, make_zip_script, diff --git a/Lib/test/test_peg_generator/test_pegen.py b/Lib/test/test_peg_generator/test_pegen.py index 26a7408cbf880e..fefb4f9079535c 100644 --- a/Lib/test/test_peg_generator/test_pegen.py +++ b/Lib/test/test_peg_generator/test_pegen.py @@ -1,5 +1,4 @@ import io -import textwrap import unittest from test import test_tools diff --git a/Lib/test/test_pkg.py b/Lib/test/test_pkg.py index bfa0301a9b375e..9c512d33c8adfd 100644 --- a/Lib/test/test_pkg.py +++ b/Lib/test/test_pkg.py @@ -3,7 +3,6 @@ import sys import os import tempfile -import textwrap import unittest diff --git a/Lib/test/test_regrtest.py b/Lib/test/test_regrtest.py index e0269dacc91637..1c3b72ae00334c 100644 --- a/Lib/test/test_regrtest.py +++ b/Lib/test/test_regrtest.py @@ -15,7 +15,6 @@ import sys import sysconfig import tempfile -import textwrap import unittest from test import libregrtest from test import support diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index 260a817c17ccd3..d298648e873de6 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -18,7 +18,6 @@ import shutil import threading import gc -import textwrap import json from test.support import FakePath diff --git a/Lib/test/test_tools/test_pindent.py b/Lib/test/test_tools/test_pindent.py index 6dd1b15b9f7aa3..fb6735fca04e6a 100644 --- a/Lib/test/test_tools/test_pindent.py +++ b/Lib/test/test_tools/test_pindent.py @@ -4,7 +4,6 @@ import sys import unittest import subprocess -import textwrap from test import support from test.support.script_helper import assert_python_ok diff --git a/Lib/test/test_trace.py b/Lib/test/test_trace.py index 71a8852d4ac19c..dbf3a3e8d52841 100644 --- a/Lib/test/test_trace.py +++ b/Lib/test/test_trace.py @@ -2,7 +2,6 @@ import sys from test.support import TESTFN, rmtree, unlink, captured_stdout from test.support.script_helper import assert_python_ok, assert_python_failure -import textwrap import unittest import trace diff --git a/Lib/unittest/test/test_result.py b/Lib/unittest/test/test_result.py index 762a009dd35af6..0bb8f7029389b6 100644 --- a/Lib/unittest/test/test_result.py +++ b/Lib/unittest/test/test_result.py @@ -1,6 +1,5 @@ import io import sys -import textwrap from test import support