From 208f03caa7035d711d086ba29546adaf8f1b90cd Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 27 May 2022 10:52:11 +0900 Subject: [PATCH 1/3] gh-85308: argparse: Use filesystem encoding for arguments file --- Doc/library/argparse.rst | 12 +++++++++++- Lib/argparse.py | 4 +++- .../2022-05-27-10-52-06.gh-issue-85308.K6r-tJ.rst | 4 ++++ 3 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-05-27-10-52-06.gh-issue-85308.K6r-tJ.rst diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index 0e62e99d706d4c..15b2ac54625bd4 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -562,7 +562,7 @@ at the command line. If the ``fromfile_prefix_chars=`` argument is given to the specified characters will be treated as files, and will be replaced by the arguments they contain. For example:: - >>> with open('args.txt', 'w') as fp: + >>> with open('args.txt', 'w', encoding=sys.getfilesystemencoding()) as fp: ... fp.write('-f\nbar') >>> parser = argparse.ArgumentParser(fromfile_prefix_chars='@') >>> parser.add_argument('-f') @@ -575,9 +575,19 @@ were in the same place as the original file referencing argument on the command line. So in the example above, the expression ``['-f', 'foo', '@args.txt']`` is considered equivalent to the expression ``['-f', 'foo', '-f', 'bar']``. +:class:`ArgumentParser` uses :term:`filesystem encoding and error handler` +to read the file containing arguments. + The ``fromfile_prefix_chars=`` argument defaults to ``None``, meaning that arguments will never be treated as file references. +.. versionchanged:: 3.12 + :class:`ArgumentParser` changed encoding and errors to read arguments files + from default text encoding (e.g. :func:`locale.getpreferredencoding(False)` + and `"strict"`) to :term:`filesystem encoding and error handler`. + This change affects Windows; argument file should be encoded with UTF-8 + instead of ANSI Codepage. + argument_default ^^^^^^^^^^^^^^^^ diff --git a/Lib/argparse.py b/Lib/argparse.py index 1c5520c4b41bd1..02e98bbf920cf1 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -2161,7 +2161,9 @@ def _read_args_from_files(self, arg_strings): # replace arguments referencing files with the file content else: try: - with open(arg_string[1:]) as args_file: + with open(arg_string[1:], + encoding=_sys.getfilesystemencoding(), + errors=_sys.getfilesystemencodeerrors()) as args_file: arg_strings = [] for arg_line in args_file.read().splitlines(): for arg in self.convert_arg_line_to_args(arg_line): diff --git a/Misc/NEWS.d/next/Library/2022-05-27-10-52-06.gh-issue-85308.K6r-tJ.rst b/Misc/NEWS.d/next/Library/2022-05-27-10-52-06.gh-issue-85308.K6r-tJ.rst new file mode 100644 index 00000000000000..4574264dd4d433 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-05-27-10-52-06.gh-issue-85308.K6r-tJ.rst @@ -0,0 +1,4 @@ +Changed :class:`argparse.ArgumentParser` to use :term:`filesystem encoding +and error handler` instead of default text encoding to read arguments from +file (e.g. ``fromfile_prefix_chars`` option). This change affects Windows; +argument file should be encoded with UTF-8 instead of ANSI Codepage. From 5283ca58148d2e52280c5a50b5e8b7b82e76859a Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 27 May 2022 11:40:51 +0900 Subject: [PATCH 2/3] Add what's new entry for backward incompatibility. --- Doc/whatsnew/3.12.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index 033de1780b3d18..98ec27fd3a59e2 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -140,6 +140,13 @@ Changes in the Python API select from a larger range than ``randrange(10**25)``. (Originally suggested by Serhiy Storchaka gh-86388.) +* :class:`argparse.ArgumentParser` changed encoding and error handler + for reading arguments from file (e.g. ``fromfile_prefix_chars`` option) + from default text encoding (e.g. :func:`locale.getpreferredencoding(False) `) + to :term:`filesystem encoding and error handler`. + Argument files should be encoded in UTF-8 instead of ANSI Codepage on Windows + unless :envvar:`PYTHONLEGACYWINDOWSFSENCODING` is set. + Build Changes ============= From 6c6bffb31286e0d8b5f38c4d526ce068371659e6 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 31 May 2022 18:14:37 +0900 Subject: [PATCH 3/3] update docs --- Doc/library/argparse.rst | 7 +++---- Doc/whatsnew/3.12.rst | 3 +-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index 15b2ac54625bd4..b2fa0b3c23c3a1 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -583,10 +583,9 @@ arguments will never be treated as file references. .. versionchanged:: 3.12 :class:`ArgumentParser` changed encoding and errors to read arguments files - from default text encoding (e.g. :func:`locale.getpreferredencoding(False)` - and `"strict"`) to :term:`filesystem encoding and error handler`. - This change affects Windows; argument file should be encoded with UTF-8 - instead of ANSI Codepage. + from default (e.g. :func:`locale.getpreferredencoding(False)` and + ``"strict"``) to :term:`filesystem encoding and error handler`. + Arguments file should be encoded in UTF-8 instead of ANSI Codepage on Windows. argument_default diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index 98ec27fd3a59e2..88013117564965 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -144,8 +144,7 @@ Changes in the Python API for reading arguments from file (e.g. ``fromfile_prefix_chars`` option) from default text encoding (e.g. :func:`locale.getpreferredencoding(False) `) to :term:`filesystem encoding and error handler`. - Argument files should be encoded in UTF-8 instead of ANSI Codepage on Windows - unless :envvar:`PYTHONLEGACYWINDOWSFSENCODING` is set. + Argument files should be encoded in UTF-8 instead of ANSI Codepage on Windows. Build Changes