Skip to content

Commit e38d0af

Browse files
authored
GH-120754: Add a strace helper and test set of syscalls for open().read() (#121143)
1 parent 86f06cb commit e38d0af

File tree

4 files changed

+280
-33
lines changed

4 files changed

+280
-33
lines changed

Lib/test/support/strace_helper.py

+166
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
import re
2+
import sys
3+
import textwrap
4+
import unittest
5+
from dataclasses import dataclass
6+
from functools import cache
7+
from test import support
8+
from test.support.script_helper import run_python_until_end
9+
10+
_strace_binary = "/usr/bin/strace"
11+
_syscall_regex = re.compile(
12+
r"(?P<syscall>[^(]*)\((?P<args>[^)]*)\)\s*[=]\s*(?P<returncode>.+)")
13+
_returncode_regex = re.compile(
14+
br"\+\+\+ exited with (?P<returncode>\d+) \+\+\+")
15+
16+
17+
@dataclass
18+
class StraceEvent:
19+
syscall: str
20+
args: list[str]
21+
returncode: str
22+
23+
24+
@dataclass
25+
class StraceResult:
26+
strace_returncode: int
27+
python_returncode: int
28+
29+
"""The event messages generated by strace. This is very similar to the
30+
stderr strace produces with returncode marker section removed."""
31+
event_bytes: bytes
32+
stdout: bytes
33+
stderr: bytes
34+
35+
def events(self):
36+
"""Parse event_bytes data into system calls for easier processing.
37+
38+
This assumes the program under inspection doesn't print any non-utf8
39+
strings which would mix into the strace output."""
40+
decoded_events = self.event_bytes.decode('utf-8')
41+
matches = [
42+
_syscall_regex.match(event)
43+
for event in decoded_events.splitlines()
44+
]
45+
return [
46+
StraceEvent(match["syscall"],
47+
[arg.strip() for arg in (match["args"].split(","))],
48+
match["returncode"]) for match in matches if match
49+
]
50+
51+
def sections(self):
52+
"""Find all "MARK <X>" writes and use them to make groups of events.
53+
54+
This is useful to avoid variable / overhead events, like those at
55+
interpreter startup or when opening a file so a test can verify just
56+
the small case under study."""
57+
current_section = "__startup"
58+
sections = {current_section: []}
59+
for event in self.events():
60+
if event.syscall == 'write' and len(
61+
event.args) > 2 and event.args[1].startswith("\"MARK "):
62+
# Found a new section, don't include the write in the section
63+
# but all events until next mark should be in that section
64+
current_section = event.args[1].split(
65+
" ", 1)[1].removesuffix('\\n"')
66+
if current_section not in sections:
67+
sections[current_section] = list()
68+
else:
69+
sections[current_section].append(event)
70+
71+
return sections
72+
73+
74+
@support.requires_subprocess()
75+
def strace_python(code, strace_flags, check=True):
76+
"""Run strace and return the trace.
77+
78+
Sets strace_returncode and python_returncode to `-1` on error."""
79+
res = None
80+
81+
def _make_error(reason, details):
82+
return StraceResult(
83+
strace_returncode=-1,
84+
python_returncode=-1,
85+
event_bytes=f"error({reason},details={details}) = -1".encode('utf-8'),
86+
stdout=res.out if res else "",
87+
stderr=res.err if res else "")
88+
89+
# Run strace, and get out the raw text
90+
try:
91+
res, cmd_line = run_python_until_end(
92+
"-c",
93+
textwrap.dedent(code),
94+
__run_using_command=[_strace_binary] + strace_flags)
95+
except OSError as err:
96+
return _make_error("Caught OSError", err)
97+
98+
if check and res.rc:
99+
res.fail(cmd_line)
100+
101+
# Get out program returncode
102+
stripped = res.err.strip()
103+
output = stripped.rsplit(b"\n", 1)
104+
if len(output) != 2:
105+
return _make_error("Expected strace events and exit code line",
106+
stripped[-50:])
107+
108+
returncode_match = _returncode_regex.match(output[1])
109+
if not returncode_match:
110+
return _make_error("Expected to find returncode in last line.",
111+
output[1][:50])
112+
113+
python_returncode = int(returncode_match["returncode"])
114+
if check and python_returncode:
115+
res.fail(cmd_line)
116+
117+
return StraceResult(strace_returncode=res.rc,
118+
python_returncode=python_returncode,
119+
event_bytes=output[0],
120+
stdout=res.out,
121+
stderr=res.err)
122+
123+
124+
def _get_events(code, strace_flags, prelude, cleanup):
125+
# NOTE: The flush is currently required to prevent the prints from getting
126+
# buffered and done all at once at exit
127+
prelude = textwrap.dedent(prelude)
128+
code = textwrap.dedent(code)
129+
cleanup = textwrap.dedent(cleanup)
130+
to_run = f"""
131+
print("MARK prelude", flush=True)
132+
{prelude}
133+
print("MARK code", flush=True)
134+
{code}
135+
print("MARK cleanup", flush=True)
136+
{cleanup}
137+
print("MARK __shutdown", flush=True)
138+
"""
139+
trace = strace_python(to_run, strace_flags)
140+
all_sections = trace.sections()
141+
return all_sections['code']
142+
143+
144+
def get_syscalls(code, strace_flags, prelude="", cleanup=""):
145+
"""Get the syscalls which a given chunk of python code generates"""
146+
events = _get_events(code, strace_flags, prelude=prelude, cleanup=cleanup)
147+
return [ev.syscall for ev in events]
148+
149+
150+
# Moderately expensive (spawns a subprocess), so share results when possible.
151+
@cache
152+
def _can_strace():
153+
res = strace_python("import sys; sys.exit(0)", [], check=False)
154+
assert res.events(), "Should have parsed multiple calls"
155+
156+
return res.strace_returncode == 0 and res.python_returncode == 0
157+
158+
159+
def requires_strace():
160+
if sys.platform != "linux":
161+
return unittest.skip("Linux only, requires strace.")
162+
163+
return unittest.skipUnless(_can_strace(), "Requires working strace")
164+
165+
166+
__all__ = ["requires_strace", "strace_python", "StraceEvent", "StraceResult"]

Lib/test/test_fileio.py

+92-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
from test.support import (
1313
cpython_only, swap_attr, gc_collect, is_emscripten, is_wasi,
14-
infinite_recursion,
14+
infinite_recursion, strace_helper
1515
)
1616
from test.support.os_helper import (
1717
TESTFN, TESTFN_ASCII, TESTFN_UNICODE, make_bad_fd,
@@ -24,6 +24,9 @@
2424
import _pyio # Python implementation of io
2525

2626

27+
_strace_flags=["--trace=%file,%desc"]
28+
29+
2730
class AutoFileTests:
2831
# file tests for which a test file is automatically set up
2932

@@ -359,6 +362,94 @@ def testErrnoOnClosedReadinto(self, f):
359362
a = array('b', b'x'*10)
360363
f.readinto(a)
361364

365+
@strace_helper.requires_strace()
366+
def test_syscalls_read(self):
367+
"""Check that the set of system calls produced by the I/O stack is what
368+
is expected for various read cases.
369+
370+
It's expected as bits of the I/O implementation change, this will need
371+
to change. The goal is to catch changes that unintentionally add
372+
additional systemcalls (ex. additional calls have been looked at in
373+
bpo-21679 and gh-120754).
374+
"""
375+
self.f.write(b"Hello, World!")
376+
self.f.close()
377+
378+
379+
def check_readall(name, code, prelude="", cleanup=""):
380+
with self.subTest(name=name):
381+
syscalls = strace_helper.get_syscalls(code, _strace_flags,
382+
prelude=prelude,
383+
cleanup=cleanup)
384+
385+
# There are a number of related syscalls used to implement
386+
# behaviors in a libc (ex. fstat, newfstatat, open, openat).
387+
# Allow any that use the same substring.
388+
def count_similarname(name):
389+
return len([sc for sc in syscalls if name in sc])
390+
391+
# Should open and close the file exactly once
392+
self.assertEqual(count_similarname('open'), 1)
393+
self.assertEqual(count_similarname('close'), 1)
394+
395+
# Should only have one fstat (bpo-21679, gh-120754)
396+
self.assertEqual(count_similarname('fstat'), 1)
397+
398+
399+
# "open, read, close" file using different common patterns.
400+
check_readall(
401+
"open builtin with default options",
402+
f"""
403+
f = open('{TESTFN}')
404+
f.read()
405+
f.close()
406+
"""
407+
)
408+
409+
check_readall(
410+
"open in binary mode",
411+
f"""
412+
f = open('{TESTFN}', 'rb')
413+
f.read()
414+
f.close()
415+
"""
416+
)
417+
418+
check_readall(
419+
"open in text mode",
420+
f"""
421+
f = open('{TESTFN}', 'rt')
422+
f.read()
423+
f.close()
424+
"""
425+
)
426+
427+
check_readall(
428+
"pathlib read_bytes",
429+
"p.read_bytes()",
430+
prelude=f"""from pathlib import Path; p = Path("{TESTFN}")"""
431+
432+
)
433+
434+
check_readall(
435+
"pathlib read_text",
436+
"p.read_text()",
437+
prelude=f"""from pathlib import Path; p = Path("{TESTFN}")"""
438+
)
439+
440+
# Focus on just `read()`.
441+
calls = strace_helper.get_syscalls(
442+
prelude=f"f = open('{TESTFN}')",
443+
code="f.read()",
444+
cleanup="f.close()",
445+
strace_flags=_strace_flags
446+
)
447+
# One to read all the bytes
448+
# One to read the EOF and get a size 0 return.
449+
self.assertEqual(calls.count("read"), 2)
450+
451+
452+
362453
class CAutoFileTests(AutoFileTests, unittest.TestCase):
363454
FileIO = _io.FileIO
364455
modulename = '_io'

Lib/test/test_subprocess.py

+21-32
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from test.support import check_sanitizer
55
from test.support import import_helper
66
from test.support import os_helper
7+
from test.support import strace_helper
78
from test.support import warnings_helper
89
from test.support.script_helper import assert_python_ok
910
import subprocess
@@ -3415,44 +3416,33 @@ def __del__(self):
34153416

34163417
@unittest.skipIf(not sysconfig.get_config_var("HAVE_VFORK"),
34173418
"vfork() not enabled by configure.")
3418-
@unittest.skipIf(sys.platform != "linux", "Linux only, requires strace.")
3419+
@strace_helper.requires_strace()
34193420
@mock.patch("subprocess._USE_POSIX_SPAWN", new=False)
34203421
def test_vfork_used_when_expected(self):
34213422
# This is a performance regression test to ensure we default to using
34223423
# vfork() when possible.
34233424
# Technically this test could pass when posix_spawn is used as well
34243425
# because libc tends to implement that internally using vfork. But
34253426
# that'd just be testing a libc+kernel implementation detail.
3426-
strace_binary = "/usr/bin/strace"
3427-
# The only system calls we are interested in.
3428-
strace_filter = "--trace=clone,clone2,clone3,fork,vfork,exit,exit_group"
3429-
true_binary = "/bin/true"
3430-
strace_command = [strace_binary, strace_filter]
34313427

3432-
try:
3433-
does_strace_work_process = subprocess.run(
3434-
strace_command + [true_binary],
3435-
stderr=subprocess.PIPE,
3436-
stdout=subprocess.DEVNULL,
3437-
)
3438-
rc = does_strace_work_process.returncode
3439-
stderr = does_strace_work_process.stderr
3440-
except OSError:
3441-
rc = -1
3442-
stderr = ""
3443-
if rc or (b"+++ exited with 0 +++" not in stderr):
3444-
self.skipTest("strace not found or not working as expected.")
3428+
# Are intersted in the system calls:
3429+
# clone,clone2,clone3,fork,vfork,exit,exit_group
3430+
# Unfortunately using `--trace` with that list to strace fails because
3431+
# not all are supported on all platforms (ex. clone2 is ia64 only...)
3432+
# So instead use `%process` which is recommended by strace, and contains
3433+
# the above.
3434+
true_binary = "/bin/true"
3435+
strace_args = ["--trace=%process"]
34453436

34463437
with self.subTest(name="default_is_vfork"):
3447-
vfork_result = assert_python_ok(
3448-
"-c",
3449-
textwrap.dedent(f"""\
3450-
import subprocess
3451-
subprocess.check_call([{true_binary!r}])"""),
3452-
__run_using_command=strace_command,
3438+
vfork_result = strace_helper.strace_python(
3439+
f"""\
3440+
import subprocess
3441+
subprocess.check_call([{true_binary!r}])""",
3442+
strace_args
34533443
)
34543444
# Match both vfork() and clone(..., flags=...|CLONE_VFORK|...)
3455-
self.assertRegex(vfork_result.err, br"(?i)vfork")
3445+
self.assertRegex(vfork_result.event_bytes, br"(?i)vfork")
34563446
# Do NOT check that fork() or other clones did not happen.
34573447
# If the OS denys the vfork it'll fallback to plain fork().
34583448

@@ -3465,21 +3455,20 @@ def test_vfork_used_when_expected(self):
34653455
("setgroups", "", "extra_groups=[]", True),
34663456
):
34673457
with self.subTest(name=sub_name):
3468-
non_vfork_result = assert_python_ok(
3469-
"-c",
3470-
textwrap.dedent(f"""\
3458+
non_vfork_result = strace_helper.strace_python(
3459+
f"""\
34713460
import subprocess
34723461
{preamble}
34733462
try:
34743463
subprocess.check_call(
34753464
[{true_binary!r}], **dict({sp_kwarg}))
34763465
except PermissionError:
34773466
if not {expect_permission_error}:
3478-
raise"""),
3479-
__run_using_command=strace_command,
3467+
raise""",
3468+
strace_args
34803469
)
34813470
# Ensure neither vfork() or clone(..., flags=...|CLONE_VFORK|...).
3482-
self.assertNotRegex(non_vfork_result.err, br"(?i)vfork")
3471+
self.assertNotRegex(non_vfork_result.event_bytes, br"(?i)vfork")
34833472

34843473

34853474
@unittest.skipUnless(mswindows, "Windows specific tests")

Misc/ACKS

+1
Original file line numberDiff line numberDiff line change
@@ -1164,6 +1164,7 @@ Grzegorz Makarewicz
11641164
David Malcolm
11651165
Greg Malcolm
11661166
William Mallard
1167+
Cody Maloney
11671168
Ken Manheimer
11681169
Vladimir Marangozov
11691170
Colin Marc

0 commit comments

Comments
 (0)