Skip to content

gh-103997: Automatically dedent the argument to "-c" #103998

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 49 commits into from
Apr 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
16be08f
Very rough proof-of-concept
Erotemic Apr 29, 2023
e88216b
Cleanups and comments
Erotemic Apr 29, 2023
bcb7c77
Fix bad decref, only trigger if command starts with a newline
Erotemic Apr 29, 2023
fb8985a
wchar dedent
Erotemic Apr 30, 2023
26f27a8
tweaks
Erotemic Apr 30, 2023
417eff8
Use new char* implementation
Erotemic Apr 30, 2023
924e0a6
Rename function
Erotemic Apr 30, 2023
9f95672
tweaks
Erotemic Apr 30, 2023
3f4a78b
More tweaks
Erotemic May 1, 2023
97f2079
Replace strncmp with direct char comparison
Erotemic May 1, 2023
04435eb
Remove debug code
Erotemic May 1, 2023
4c4eca9
Made new function static
Erotemic May 1, 2023
f9c969b
Handwritten char iter and _PyBytesWriter_
Erotemic May 1, 2023
674f1e0
reimplement it to imitate `textwrap.dedent`
sunmy2019 May 1, 2023
05d4169
fix missing initialization
sunmy2019 May 1, 2023
9d53c4e
fix ref leak
sunmy2019 May 1, 2023
689a13a
fix empty string
sunmy2019 May 1, 2023
f0ac7ea
nit: remove unnecessary variable
sunmy2019 May 1, 2023
71cad01
remove unnecessary include
sunmy2019 May 1, 2023
4549de8
Add test cases
Erotemic May 1, 2023
0c3b90b
Fix test on windows
Erotemic May 1, 2023
1f5b746
normalize windows line endings
Erotemic May 1, 2023
ca40589
Merge branch 'main' into dedent_pymain_command
Erotemic May 1, 2023
1f17e23
Update Modules/main.c
sunmy2019 May 3, 2023
2de2e1e
Merge branch 'python:main' into dedent_pymain_command
sunmy2019 Jul 23, 2023
c84616c
refactor code
sunmy2019 Jul 23, 2023
a19b675
Apply suggestions from code review
sunmy2019 Jul 23, 2023
7ce411f
Update Misc/NEWS.d/next/Core and Builtins/2023-04-29-23-15-38.gh-issu…
sunmy2019 Jul 23, 2023
dea4301
resolve comments
sunmy2019 Jul 23, 2023
e06d40c
Update Modules/main.c
sunmy2019 Jul 23, 2023
a40d028
rename `out` to `dest`
sunmy2019 Jul 23, 2023
9569655
move to _PyUnicode_Dedent
sunmy2019 Jul 24, 2023
1735d0f
Apply suggestions from code review
sunmy2019 Jul 24, 2023
d3681b7
clean up things
sunmy2019 Jul 25, 2023
3b4a7bc
Merge branch 'main' into dedent_pymain_command
Erotemic Apr 2, 2024
f355760
Merge branch 'main' into dedent_pymain_command
Erotemic Apr 19, 2024
b1e89c9
Merge branch 'main' into dedent_pymain_command
AA-Turner Apr 9, 2025
d1b4cd1
Update Misc/NEWS.d/next/Core and Builtins/2023-04-29-23-15-38.gh-issu…
Erotemic Apr 10, 2025
e556bbf
lint: space in folder name
Erotemic Apr 10, 2025
136c8b0
Explicit include of pycore_unicodeobject.h
Erotemic Apr 10, 2025
8e5cc7f
Merge branch 'main' into dedent_pymain_command
Erotemic Apr 10, 2025
cd14a00
Apply suggestions from code review
sunmy2019 Apr 17, 2025
07d2273
Resolve Comments
sunmy2019 Apr 17, 2025
ed6e17b
Refactor implementation
sunmy2019 Apr 18, 2025
d1edb1b
Merge branch 'main' into dedent_pymain_command
sunmy2019 Apr 18, 2025
4c78c57
Apply suggestions from code review
methane Apr 18, 2025
38d2a4e
add what's new entry
methane Apr 18, 2025
42b6330
Document dedentation of command in version 3.14
methane Apr 18, 2025
98c17e5
Apply suggestions from code review
picnixz Apr 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Doc/using/cmdline.rst
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ source.

.. audit-event:: cpython.run_command command cmdoption-c

.. versionchanged:: next
*command* is automatically dedented before execution.

.. option:: -m <module-name>

Search :data:`sys.path` for the named module and execute its contents as
Expand Down
6 changes: 6 additions & 0 deletions Doc/whatsnew/3.14.rst
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,12 @@ Other language changes
explicitly overridden in the subclass.
(Contributed by Tomasz Pytel in :gh:`132329`.)

* The command line option :option:`-c` now automatically dedents its code
argument before execution. The auto-dedentation behavior mirrors
:func:`textwrap.dedent`.
(Contributed by Jon Crall and Steven Sun in :gh:`103998`.)


.. _whatsnew314-pep765:

PEP 765: Disallow return/break/continue that exit a finally block
Expand Down
6 changes: 6 additions & 0 deletions Include/internal/pycore_unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,12 @@ extern Py_ssize_t _PyUnicode_InsertThousandsGrouping(
Py_UCS4 *maxchar,
int forward);

/* Dedent a string.
Behaviour is expected to be an exact match of `textwrap.dedent`.
Return a new reference on success, NULL with exception set on error.
*/
extern PyObject* _PyUnicode_Dedent(PyObject *unicode);

/* --- Misc functions ----------------------------------------------------- */

extern PyObject* _PyUnicode_FormatLong(PyObject *, int, int, int);
Expand Down
84 changes: 84 additions & 0 deletions Lib/test/test_cmd_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
spawn_python, kill_python, assert_python_ok, assert_python_failure,
interpreter_requires_environment
)
from textwrap import dedent


if not support.has_subprocess_support:
raise unittest.SkipTest("test module requires subprocess")
Expand Down Expand Up @@ -1051,6 +1053,88 @@ def test_int_max_str_digits(self):
)
self.assertEqual(res2int(res), (6000, 6000))

def test_cmd_dedent(self):
# test that -c auto-dedents its arguments
test_cases = [
(
"""
print('space-auto-dedent')
""",
"space-auto-dedent",
),
(
dedent(
"""
^^^print('tab-auto-dedent')
"""
).replace("^", "\t"),
"tab-auto-dedent",
),
(
dedent(
"""
^^if 1:
^^^^print('mixed-auto-dedent-1')
^^print('mixed-auto-dedent-2')
"""
).replace("^", "\t \t"),
"mixed-auto-dedent-1\nmixed-auto-dedent-2",
),
(
'''
data = """$

this data has an empty newline above and a newline with spaces below $
$
"""$
if 1: $
print(repr(data))$
'''.replace(
"$", ""
),
# Note: entirely blank lines are normalized to \n, even if they
# are part of a data string. This is consistent with
# textwrap.dedent behavior, but might not be intuitive.
"'\\n\\nthis data has an empty newline above and a newline with spaces below \\n\\n'",
),
(
'',
'',
),
(
' \t\n\t\n \t\t\t \t\t \t\n\t\t \n\n\n\t\t\t ',
'',
),
]
for code, expected in test_cases:
# Run the auto-dedent case
args1 = sys.executable, '-c', code
proc1 = subprocess.run(args1, stdout=subprocess.PIPE)
self.assertEqual(proc1.returncode, 0, proc1)
output1 = proc1.stdout.strip().decode(encoding='utf-8')

# Manually dedent beforehand, check the result is the same.
args2 = sys.executable, '-c', dedent(code)
proc2 = subprocess.run(args2, stdout=subprocess.PIPE)
self.assertEqual(proc2.returncode, 0, proc2)
output2 = proc2.stdout.strip().decode(encoding='utf-8')

self.assertEqual(output1, output2)
self.assertEqual(output1.replace('\r\n', '\n'), expected)

def test_cmd_dedent_failcase(self):
# Mixing tabs and spaces is not allowed
from textwrap import dedent
template = dedent(
'''
-+if 1:
+-++ print('will fail')
''')
code = template.replace('-', ' ').replace('+', '\t')
assert_python_failure('-c', code)
code = template.replace('-', '\t').replace('+', ' ')
assert_python_failure('-c', code)

def test_cpu_count(self):
code = "import os; print(os.cpu_count(), os.process_cpu_count())"
res = assert_python_ok('-X', 'cpu_count=4321', '-c', code)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
String arguments passed to "-c" are now automatically dedented as if by
:func:`textwrap.dedent`. This allows "python -c" invocations to be indented
in shell scripts without causing indentation errors. (Patch by Jon Crall and
Steven Sun)
6 changes: 6 additions & 0 deletions Modules/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "pycore_pylifecycle.h" // _Py_PreInitializeFromPyArgv()
#include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "pycore_pythonrun.h" // _PyRun_AnyFileObject()
#include "pycore_unicodeobject.h" // _PyUnicode_Dedent()

/* Includes for exit_sigint() */
#include <stdio.h> // perror()
Expand Down Expand Up @@ -244,6 +245,11 @@ pymain_run_command(wchar_t *command)
return pymain_exit_err_print();
}

Py_SETREF(unicode, _PyUnicode_Dedent(unicode));
if (unicode == NULL) {
goto error;
}

bytes = PyUnicode_AsUTF8String(unicode);
Py_DECREF(unicode);
if (bytes == NULL) {
Expand Down
157 changes: 157 additions & 0 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -14270,6 +14270,163 @@ unicode_getnewargs(PyObject *v, PyObject *Py_UNUSED(ignored))
return Py_BuildValue("(N)", copy);
}

/*
This function searchs the longest common leading whitespace
of all lines in the [src, end).
It returns the length of the common leading whitespace and sets `output` to
point to the beginning of the common leading whitespace if length > 0.
*/
static Py_ssize_t
search_longest_common_leading_whitespace(
const char *const src,
const char *const end,
const char **output)
{
// [_start, _start + _len)
// describes the current longest common leading whitespace
const char *_start = NULL;
Py_ssize_t _len = 0;

for (const char *iter = src; iter < end; ++iter) {
const char *line_start = iter;
const char *leading_whitespace_end = NULL;

// scan the whole line
while (iter < end && *iter != '\n') {
if (!leading_whitespace_end && *iter != ' ' && *iter != '\t') {
/* `iter` points to the first non-whitespace character
in this line */
if (iter == line_start) {
// some line has no indent, fast exit!
return 0;
}
leading_whitespace_end = iter;
}
++iter;
}

// if this line has all white space, skip it
if (!leading_whitespace_end) {
continue;
}

if (!_start) {
// update the first leading whitespace
_start = line_start;
_len = leading_whitespace_end - line_start;
assert(_len > 0);
}
else {
/* We then compare with the current longest leading whitespace.

[line_start, leading_whitespace_end) is the leading
whitespace of this line,

[_start, _start + _len) is the leading whitespace of the
current longest leading whitespace. */
Py_ssize_t new_len = 0;
const char *_iter = _start, *line_iter = line_start;

while (_iter < _start + _len && line_iter < leading_whitespace_end
&& *_iter == *line_iter)
{
++_iter;
++line_iter;
++new_len;
}

_len = new_len;
if (_len == 0) {
// No common things now, fast exit!
return 0;
}
}
}

assert(_len >= 0);
if (_len > 0) {
*output = _start;
}
return _len;
}

/* Dedent a string.
Behaviour is expected to be an exact match of `textwrap.dedent`.
Return a new reference on success, NULL with exception set on error.
*/
PyObject *
_PyUnicode_Dedent(PyObject *unicode)
{
Py_ssize_t src_len = 0;
const char *src = PyUnicode_AsUTF8AndSize(unicode, &src_len);
if (!src) {
return NULL;
}
assert(src_len >= 0);
if (src_len == 0) {
return Py_NewRef(unicode);
}

const char *const end = src + src_len;

// [whitespace_start, whitespace_start + whitespace_len)
// describes the current longest common leading whitespace
const char *whitespace_start = NULL;
Py_ssize_t whitespace_len = search_longest_common_leading_whitespace(
src, end, &whitespace_start);

if (whitespace_len == 0) {
return Py_NewRef(unicode);
}

// now we should trigger a dedent
char *dest = PyMem_Malloc(src_len);
if (!dest) {
PyErr_NoMemory();
return NULL;
}
char *dest_iter = dest;

for (const char *iter = src; iter < end; ++iter) {
const char *line_start = iter;
bool in_leading_space = true;

// iterate over a line to find the end of a line
while (iter < end && *iter != '\n') {
if (in_leading_space && *iter != ' ' && *iter != '\t') {
in_leading_space = false;
}
++iter;
}

// invariant: *iter == '\n' or iter == end
bool append_newline = iter < end;

// if this line has all white space, write '\n' and continue
if (in_leading_space && append_newline) {
*dest_iter++ = '\n';
continue;
}

/* copy [new_line_start + whitespace_len, iter) to buffer, then
conditionally append '\n' */

Py_ssize_t new_line_len = iter - line_start - whitespace_len;
assert(new_line_len >= 0);
memcpy(dest_iter, line_start + whitespace_len, new_line_len);

dest_iter += new_line_len;

if (append_newline) {
*dest_iter++ = '\n';
}
}

PyObject *res = PyUnicode_FromStringAndSize(dest, dest_iter - dest);
PyMem_Free(dest);
return res;
}

static PyMethodDef unicode_methods[] = {
UNICODE_ENCODE_METHODDEF
UNICODE_REPLACE_METHODDEF
Expand Down
Loading