Skip to content

Commit 15aadf9

Browse files
committed
Fixed bugs with drive-relative paths and NTFS ADS paths in pathlib
1 parent 838f264 commit 15aadf9

File tree

3 files changed

+89
-30
lines changed

3 files changed

+89
-30
lines changed

Lib/pathlib.py

+52-28
Original file line numberDiff line numberDiff line change
@@ -60,38 +60,49 @@ class _Flavour(object):
6060
def __init__(self):
6161
self.join = self.sep.join
6262

63+
def _split_part(self, part):
64+
"""
65+
Return the drive, root and path parts from a given part.
66+
If the part is a tuple, it already contains these values and therefore is returned.
67+
Otherwise, splitroot is used to parse the part.
68+
"""
69+
if isinstance(part, tuple):
70+
return part
71+
elif isinstance(part, str):
72+
if self.altsep:
73+
part = part.replace(self.altsep, self.sep)
74+
drv, root, rel = self.splitroot(part)
75+
return drv, root, rel.split(self.sep)
76+
else:
77+
raise TypeError(f'argument should be a tuple or an str object, not {type(part)}')
78+
6379
def parse_parts(self, parts):
80+
"""
81+
Parse and join multiple path strings, and
82+
return a tuple of the final drive, root and path parts.
83+
The given parts can be either strings of paths,
84+
or tuples that represent paths, containing the drive, root and list of path parts.
85+
The option for passing a tuple is needed, as the part 'a:b' could be interpreted
86+
either as the relative path 'b' with the drive 'a:',
87+
or as a file 'a' with the NTFS data-stream 'b'.
88+
For example, passing either ('a:', '', ['b']) or ('', '', ['a:b']) instead of 'a:b'
89+
will allow parse_parts to behave properly in these cases.
90+
"""
6491
parsed = []
65-
sep = self.sep
66-
altsep = self.altsep
6792
drv = root = ''
6893
it = reversed(parts)
6994
for part in it:
7095
if not part:
7196
continue
72-
if altsep:
73-
part = part.replace(altsep, sep)
74-
drv, root, rel = self.splitroot(part)
75-
if sep in rel:
76-
for x in reversed(rel.split(sep)):
97+
current_drv, current_root, rel_parts = self._split_part(part)
98+
if not drv:
99+
drv = current_drv
100+
if not root:
101+
root = current_root
102+
for x in reversed(rel_parts):
77103
if x and x != '.':
78104
parsed.append(sys.intern(x))
79-
else:
80-
if rel and rel != '.':
81-
parsed.append(sys.intern(rel))
82-
if drv or root:
83-
if not drv:
84-
# If no drive is present, try to find one in the previous
85-
# parts. This makes the result of parsing e.g.
86-
# ("C:", "/", "a") reasonably intuitive.
87-
for part in it:
88-
if not part:
89-
continue
90-
if altsep:
91-
part = part.replace(altsep, sep)
92-
drv = self.splitroot(part)[0]
93-
if drv:
94-
break
105+
if root and drv:
95106
break
96107
if drv or root:
97108
parsed.append(drv + root)
@@ -115,6 +126,9 @@ def join_parsed_parts(self, drv, root, parts, drv2, root2, parts2):
115126
return drv, root, parts + parts2
116127
return drv2, root2, parts2
117128

129+
def has_drive(self, part):
130+
return self.splitroot(part)[0] != ''
131+
118132

119133
class _WindowsFlavour(_Flavour):
120134
# Reference for Windows paths can be found at
@@ -191,18 +205,21 @@ def resolve(self, path, strict=False):
191205
s = str(path)
192206
if not s:
193207
return os.getcwd()
194-
previous_s = None
195208
if _getfinalpathname is not None:
196209
if strict:
197210
return self._ext_to_normal(_getfinalpathname(s))
198211
else:
212+
previous_s = None
199213
tail_parts = [] # End of the path after the first one not found
200214
while True:
201215
try:
202216
s = self._ext_to_normal(_getfinalpathname(s))
203217
except FileNotFoundError:
204218
previous_s = s
205219
s, tail = os.path.split(s)
220+
if self.has_drive(tail):
221+
# To avoid confusing between a filename with a data-stream and a drive letter
222+
tail = f'.{self.sep}{tail}'
206223
tail_parts.append(tail)
207224
if previous_s == s:
208225
return path
@@ -646,7 +663,10 @@ def _parse_args(cls, args):
646663
parts = []
647664
for a in args:
648665
if isinstance(a, PurePath):
649-
parts += a._parts
666+
path_parts = a._parts
667+
if a._drv or a._root:
668+
path_parts = path_parts[1:]
669+
parts.append((a._drv, a._root, path_parts))
650670
else:
651671
a = os.fspath(a)
652672
if isinstance(a, str):
@@ -684,6 +704,10 @@ def _from_parsed_parts(cls, drv, root, parts, init=True):
684704

685705
@classmethod
686706
def _format_parsed_parts(cls, drv, root, parts):
707+
if parts and not drv and cls._flavour.has_drive(parts[0]):
708+
# In case there is no drive, and the first part might be interpreted as a drive,
709+
# we add a dot to clarify the first part is not a drive.
710+
parts = ['.'] + parts
687711
if drv or root:
688712
return drv + root + cls._flavour.join(parts[1:])
689713
else:
@@ -910,7 +934,7 @@ def __truediv__(self, key):
910934
return self._make_child((key,))
911935

912936
def __rtruediv__(self, key):
913-
return self._from_parts([key] + self._parts)
937+
return self._from_parts([key, self])
914938

915939
@property
916940
def parent(self):
@@ -1138,7 +1162,7 @@ def absolute(self):
11381162
return self
11391163
# FIXME this must defer to the specific flavour (and, under Windows,
11401164
# use nt._getfullpathname())
1141-
obj = self._from_parts([os.getcwd()] + self._parts, init=False)
1165+
obj = self._from_parts([os.getcwd(), self], init=False)
11421166
obj._init(template=self)
11431167
return obj
11441168

@@ -1507,7 +1531,7 @@ def expanduser(self):
15071531
if (not (self._drv or self._root) and
15081532
self._parts and self._parts[0][:1] == '~'):
15091533
homedir = self._flavour.gethomedir(self._parts[0][1:])
1510-
return self._from_parts([homedir] + self._parts[1:])
1534+
return self._from_parts([homedir, self.relative_to(self._parts[0])])
15111535

15121536
return self
15131537

Lib/test/test_pathlib.py

+36-2
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,6 @@ def test_parse_parts(self):
114114
check(['//a/b/'], ('\\\\a\\b', '\\', ['\\\\a\\b\\']))
115115
check(['//a/b/c'], ('\\\\a\\b', '\\', ['\\\\a\\b\\', 'c']))
116116
# Second part is anchored, so that the first part is ignored.
117-
check(['a', 'Z:b', 'c'], ('Z:', '', ['Z:', 'b', 'c']))
118117
check(['a', 'Z:/b', 'c'], ('Z:', '\\', ['Z:\\', 'b', 'c']))
119118
# UNC paths.
120119
check(['a', '//b/c', 'd'], ('\\\\b\\c', '\\', ['\\\\b\\c\\', 'd']))
@@ -133,6 +132,16 @@ def test_parse_parts(self):
133132
check(['a', '/b', 'c'], ('', '\\', ['\\', 'b', 'c']))
134133
check(['Z:/a', '/b', 'c'], ('Z:', '\\', ['Z:\\', 'b', 'c']))
135134
check(['//?/Z:/a', '/b', 'c'], ('\\\\?\\Z:', '\\', ['\\\\?\\Z:\\', 'b', 'c']))
135+
# Second part has a drive but not root.
136+
check(['a', 'Z:b', 'c'], ('Z:', '', ['Z:', 'a', 'b', 'c']))
137+
check(['Y:a', 'Z:b', 'c'], ('Z:', '', ['Z:', 'a', 'b', 'c']))
138+
# Paths to files with NTFS alternate data streams
139+
check(['./c:s'], ('', '', ['c:s']))
140+
check(['cc:s'], ('', '', ['cc:s']))
141+
check(['C:c:s'], ('C:', '', ['C:', 'c:s']))
142+
check(['C:/c:s'], ('C:', '\\', ['C:\\', 'c:s']))
143+
check(['D:a', './c:b'], ('D:', '', ['D:', 'a', 'c:b']))
144+
check(['D:/a', './c:b'], ('D:', '\\', ['D:\\', 'a', 'c:b']))
136145

137146
def test_splitroot(self):
138147
f = self.flavour.splitroot
@@ -201,6 +210,7 @@ def test_constructor_common(self):
201210
self.assertEqual(P(P('a'), 'b'), P('a/b'))
202211
self.assertEqual(P(P('a'), P('b')), P('a/b'))
203212
self.assertEqual(P(P('a'), P('b'), P('c')), P(FakePath("a/b/c")))
213+
self.assertEqual(P(P('./a:b')), P('./a:b'))
204214

205215
def _check_str_subclass(self, *args):
206216
# Issue #21127: it should be possible to construct a PurePath object
@@ -712,7 +722,9 @@ class PureWindowsPathTest(_BasePurePathTest, unittest.TestCase):
712722

713723
equivalences = _BasePurePathTest.equivalences.copy()
714724
equivalences.update({
715-
'c:a': [ ('c:', 'a'), ('c:', 'a/'), ('/', 'c:', 'a') ],
725+
'./a:b': [ ('./a:b',) ],
726+
'a:b:c': [ ('./b:c', 'a:'), ('b:', 'a:b:c') ],
727+
'c:a': [ ('c:', 'a'), ('c:', 'a/'), ('.', 'c:', 'a') ],
716728
'c:/a': [
717729
('c:/', 'a'), ('c:', '/', 'a'), ('c:', '/a'),
718730
('/z', 'c:/', 'a'), ('//x/y', 'c:/', 'a'),
@@ -736,6 +748,7 @@ def test_str(self):
736748
self.assertEqual(str(p), '\\\\a\\b\\c\\d')
737749

738750
def test_str_subclass(self):
751+
self._check_str_subclass('.\\a:b')
739752
self._check_str_subclass('c:')
740753
self._check_str_subclass('c:a')
741754
self._check_str_subclass('c:a\\b.txt')
@@ -882,6 +895,7 @@ def test_drive(self):
882895
self.assertEqual(P('//a/b').drive, '\\\\a\\b')
883896
self.assertEqual(P('//a/b/').drive, '\\\\a\\b')
884897
self.assertEqual(P('//a/b/c/d').drive, '\\\\a\\b')
898+
self.assertEqual(P('./c:a').drive, '')
885899

886900
def test_root(self):
887901
P = self.cls
@@ -1104,6 +1118,14 @@ def test_join(self):
11041118
self.assertEqual(pp, P('C:/a/b/x/y'))
11051119
pp = p.joinpath('c:/x/y')
11061120
self.assertEqual(pp, P('C:/x/y'))
1121+
# Joining with files with NTFS data streams => the filename should
1122+
# not be parsed as a drive letter
1123+
pp = p.joinpath(P('./d:s'))
1124+
self.assertEqual(pp, P('C:/a/b/d:s'))
1125+
pp = p.joinpath(P('./dd:s'))
1126+
self.assertEqual(pp, P('C:/a/b/dd:s'))
1127+
pp = p.joinpath(P('E:d:s'))
1128+
self.assertEqual(pp, P('E:d:s'))
11071129

11081130
def test_div(self):
11091131
# Basically the same as joinpath().
@@ -1124,6 +1146,11 @@ def test_div(self):
11241146
# the second path is relative.
11251147
self.assertEqual(p / 'c:x/y', P('C:/a/b/x/y'))
11261148
self.assertEqual(p / 'c:/x/y', P('C:/x/y'))
1149+
# Joining with files with NTFS data streams => the filename should
1150+
# not be parsed as a drive letter
1151+
self.assertEqual(p / P('./d:s'), P('C:/a/b/d:s'))
1152+
self.assertEqual(p / P('./dd:s'), P('C:/a/b/dd:s'))
1153+
self.assertEqual(p / P('E:d:s'), P('E:d:s'))
11271154

11281155
def test_is_reserved(self):
11291156
P = self.cls
@@ -1333,6 +1360,8 @@ def test_expanduser_common(self):
13331360
self.assertEqual(p.expanduser(), p)
13341361
p = P(P('').absolute().anchor) / '~'
13351362
self.assertEqual(p.expanduser(), p)
1363+
p = P('~/a:b')
1364+
self.assertEqual(p.expanduser(), P(os.path.expanduser('~'), './a:b'))
13361365

13371366
def test_exists(self):
13381367
P = self.cls
@@ -2328,6 +2357,11 @@ def check():
23282357
env['USERPROFILE'] = 'C:\\Users\\alice'
23292358
check()
23302359

2360+
def test_resolve(self):
2361+
P = self.cls
2362+
p = P(BASE, './a:b')
2363+
self.assertEqual(str(p.resolve(strict=False)), f'{BASE}\\a:b')
2364+
23312365

23322366
if __name__ == "__main__":
23332367
unittest.main()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix a ``pathlib`` inconsistency in handling of paths containing colons.

0 commit comments

Comments
 (0)