Skip to content

Commit

Permalink
pythonGH-112855: Speed up pathlib.PurePath pickling
Browse files Browse the repository at this point in the history
The second item in the tuple returned from `__reduce__()` is a tuple of
arguments to supply to path constructor. Previously we returned the `parts`
tuple here, which entailed joining, parsing and normalising the path
object, and produced a compact pickle representation.

With this patch, we instead return a tuple of paths that were originally
given to the path constructor. This makes pickling much faster (at the
expense of compactness). By also omitting to `sys.intern()` the path parts,
we slightly speed up path parsing/normalization more generally.
  • Loading branch information
barneygale committed Dec 7, 2023
1 parent 304a1b3 commit 5b08580
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 26 deletions.
4 changes: 2 additions & 2 deletions Lib/pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ def _parse_path(cls, path):
elif len(drv_parts) == 6:
# e.g. //?/unc/server/share
root = sep
parsed = [sys.intern(str(x)) for x in rel.split(sep) if x and x != '.']
parsed = [x for x in rel.split(sep) if x and x != '.']
return drv, root, parsed

def _load_parts(self):
Expand Down Expand Up @@ -606,7 +606,7 @@ def __init__(self, *args):
def __reduce__(self):
# Using the parts tuple helps share interned path parts
# when pickling related paths.
return (self.__class__, self.parts)
return (self.__class__, tuple(self._raw_paths))

def __fspath__(self):
return str(self)
Expand Down
37 changes: 13 additions & 24 deletions Lib/test/test_pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -695,14 +695,19 @@ def test_is_relative_to_common(self):

def test_pickling_common(self):
P = self.cls
p = P('/a/b')
for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
dumped = pickle.dumps(p, proto)
pp = pickle.loads(dumped)
self.assertIs(pp.__class__, p.__class__)
self.assertEqual(pp, p)
self.assertEqual(hash(pp), hash(p))
self.assertEqual(str(pp), str(p))
paths = [
P('a'), P('a', 'b'), P('a/b'), P('a', 'b', 'c'), P('a/b/c'),
P('/'), P('/a', 'b'), P('/a/b'), P('/a', 'b', 'c'), P('/a/b/c'),
]
for p in paths:
with self.subTest(path=p):
for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
dumped = pickle.dumps(p, proto)
pp = pickle.loads(dumped)
self.assertIs(pp.__class__, p.__class__)
self.assertEqual(pp, p)
self.assertEqual(hash(pp), hash(p))
self.assertEqual(str(pp), str(p))

def test_fspath_common(self):
P = self.cls
Expand Down Expand Up @@ -2545,22 +2550,6 @@ def test_is_char_device_false(self):
self.assertIs((P / 'fileA\udfff').is_char_device(), False)
self.assertIs((P / 'fileA\x00').is_char_device(), False)

def test_pickling_common(self):
p = self.cls(BASE, 'fileA')
for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
dumped = pickle.dumps(p, proto)
pp = pickle.loads(dumped)
self.assertEqual(pp.stat(), p.stat())

def test_parts_interning(self):
P = self.cls
p = P('/usr/bin/foo')
q = P('/usr/local/bin')
# 'usr'
self.assertIs(p.parts[1], q.parts[1])
# 'bin'
self.assertIs(p.parts[2], q.parts[3])

def _check_complex_symlinks(self, link0_target):
if not self.can_symlink:
self.skipTest("symlinks required")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Speed up pickling of :class:`pathlib.PurePath` objects. Patch by Barney
Gale.

0 comments on commit 5b08580

Please sign in to comment.