-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathutils.py
152 lines (139 loc) · 4.23 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#
import re
# The less helpful version of os.path.join, for two arguments only, and
# if the second argument is an absolute path we do not discard the first.
# Sort of. This one is explicitly for URLs only, so we always use '/'
# as the path separator.
def pjoin(a, b):
if not a: return b
if not b: return a
return a + '/' + b
# Canonicalize a path into a normalized form, returning the normalized
# path and the the 'name', the last component.
# Paths returned are absolute paths but do not start with a '/';
# the root is ''.
def canon_path(path):
path = path.strip('/')
if not path:
return ('', '')
name = path.split('/')[-1]
return (name, path)
# The parent is the path minus the name, and necessarily '' at the root.
def parent_path(path):
ps = path.split('/')
if len(ps) == 1:
return ''
else:
return '/'.join(ps[:-1])
def name_path(path):
if '/' not in path:
return path
return path.split('/')[-1]
# Like os.walk, but on pages and does not return directories.
# TODO: unused and worth removing?
def walk(page):
res = []
for child in page.children():
if child.type != "dir":
res.append(child)
else:
res.extend(walk(child))
return res
# We are in dirn, with (relative) path path. Canonicalize the
# result in the presence of '..' and other fun and games.
# If the result escapes dirn, return None.
def canonpath(dirn, path):
# First, take out all '/./' cases in the path.
sl = [x for x in path.split('/') if x != '.']
dl = dirn.split('/')
# We insert a fake root, because all of our canonical paths
# don't start with a '/'.
dl.insert(0, '')
while dl and sl:
top = sl.pop(0)
if top == '..':
dl.pop()
else:
dl.append(top)
# We have path elements left over due to '..' running off the
# top.
if sl:
return None
dl.pop(0)
return '/'.join(dl)
# Is a path a good path?
# This is called SO OFTEN that it is worth some micro optimizations.
# Note: good_path_elem() beats an RE-based matcher.
badElem = dict.fromkeys(('.', '..', '', 'RCS'))
def good_path_elem(pelem):
return not (pelem in badElem or \
pelem[0] == '.' or \
pelem[-1] == '~' or \
pelem[-2:] == ",v")
def goodpath_old(path):
if path == '':
return True
pelems = [x for x in path.split('/') if not good_path_elem(x)]
return not bool(pelems)
# note that the '.'-at-start pattern takes out '..' as well.
# a path that starts with a / is not good, but this is tricky
# in the re; it matches '^<empty>/'.
# This beats goodpath_old().
badpath_re = re.compile(r"(^|/)(\.[^/]*|RCS||[^/]*~|[^/]*,v)(/|$)")
def goodpath(path):
if badpath_re.search(path) and path != "":
return False
else:
return True
# A bogus path is one that has directory motion elements in it that
# make us grind our teeth.
# Note that split's behavior means that disallowing '' as a path
# element also disallows paths starting with '/'.
# As surprising as it might be, this implementation beats a regexp.
def boguspath(path):
if path == '':
return False
pelems = [x for x in path.split('/') if x in ('.', '..', '')]
return bool(pelems)
#
def yield_names(plist):
for path in plist:
if not path:
yield path
else:
yield path.split("/")[-1]
# Walk up to the root, yielding everything going.
def walk_to_root(page):
while page.path != '':
yield page
page = page.parent()
yield page
# and we're done
# Return the common prefix of paths a and b.
def common_prefix(a, b):
l1 = a.split('/')
l2 = b.split('/')
i = 0
while i < len(l1) and i < len(l2):
if l1[i] != l2[i]:
break
i += 1
return '/'.join(l1[:i])
# This is a stable sort for 'time lists', as returned from eg
# page_children(). It sorts most recent first and breaks ties by
# sorting on the page path.
def timelist_sorter(a, b):
return cmp(b[0], a[0]) or cmp(a[1], b[1])
# Sort a timelist like thing into what we consider order.
# There are two approaches to this: .sort(timelist_sorter) or
# just .sort() + .reverse. The latter is much faster, but
# reverses the alphabetical order for things with the same
# timestamp (you get (10, "ghi"), (10, "def"), (10, "abc")
# instead of vice versa). Arguably this is the right thing,
# and it certainly is faster, and identical timestamps are
# unlikely anyways.
def sort_timelist(lst):
lst.sort(reverse=True)
#lst.sort(); lst.reverse()
# or:
#lst.sort(timelist_sorter)