-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathatomgen.py
634 lines (583 loc) · 22 KB
/
atomgen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
#
# Generate an Atom format feed for some portion of the dwiki hierarchy.
#
# See http://www.intertwingly.net/wiki/pie/FrontPage
# http://www.atomenabled.org/developers/syndication/atom-format-spec.php
#
# Atom feeds are generated from special templates using a view that
# knows that the result is not text/html. The renderers only have
# to worry about variable portions of the feed.
#
# See also http://www.xml.com/pub/a/2004/04/14/atomwiki.html
# Also http://feedvalidator.org/
# Also http://diveintomark.org/, for a parsing library.
#
# The complication with XML is that it requires escaping pretty
# much everything in the text, which fits badly with reusing
# standard renderers (which do not).
#
# Open issue: the live standard calls for <updated>...</updated>,
# and the validator doesn't. Right now I have all of updated,
# modified, and issued in there. (The validator can tentatively
# bite me.)
#
# Atom feed readers that intuit the port for us, so far:
# - liferea, NetNewsWire (OSX) & NNW Lite.
#
# Charset issues: see http://skew.org/xml/tutorial/
# Not only do I need UTF-8, but I need 'UTF-8 stripped of bad characters'.
# Or I need to throw errors on bad characters. Or just let the user blow
# their foot off. Or ... something.
import time
import urllib
import utils
import htmlrends, httputil, pageranges, wikirend, template
import comments
import views
# Atom is served as application/atom+xml.
atomCType = "application/atom+xml"
# This is arbitrary.
defCutoff = 100
def get_cutpoint(context):
if 'atomfeed-display-howmany' in context:
return context['atomfeed-display-howmany']
else:
return defCutoff
defFeedMax = False
def get_feedmax(context):
if 'feed-max-size' not in context:
return defFeedMax
if 'feed-max-size-ips' not in context:
return context['feed-max-size']
sip = context['remote-ip']
fmax = context['feed-max-size']
if httputil.matchIP(sip, context['feed-max-size-ips']):
return fmax
return defFeedMax
# The minimum file timestamp to be included in the feed.
# This is somewhat of a hack feature, since it uses a Unix timestamp.
def get_cuttime(context):
if 'feed-start-time' not in context:
return 0
else:
return context['feed-start-time']
# Joy, yet another time string format.
def atomtimestr(ts):
return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(ts))
def rss2timestr(ts):
return time.strftime("%a, %d %b %Y %T %z", time.gmtime(ts))
def feedtitle(context):
"""Generate an Atom feed title for the current page."""
title = context.getfirst("wikititle", "wikiname")
if context.page.path != "":
title = title + " :: " + context.page.path
return httputil.quotehtml(title)
htmlrends.register("atom::feedtitle", feedtitle)
#
# I think this is another case like that of redirection: you can't
# supply an explicit port because everyone will fix it up for you
# and thereby *double* the port.
# How this works when people pass feeds around I have no <censored>
# idea. Hopefully they rewrite the URL.
# NOTE: this makes it dangerous for use in <id> ... </id> context.
# Fix later. Head hurts.
# Apparently feed readers that do this are buggy (it should be an
# absolute URI). The question is 'how common are they?', because
# liferea is at least one of them.
def atomurl(context):
"""Generate the URL of this page in its normal view."""
url = context.nurl(context.page)
return context.web.uri_from_url(url, context)
#return context.nuri(context.page)
htmlrends.register("atom::pageurl", atomurl)
# Generate a page ID. This is either atom::pageurl or something
# generated from 'atomfeed-tag' plus the page path. See
# http://diveintomark.org/archives/2004/05/28/howto-atom-id for
# a discussion of Atom IDs and also
# http://atomenabled.org/developers/syndication/.
# The page path is quoted; the tag is considered to not need quoting.
# As a bonus feature, we have 'atomfeed-tag-time'; if this is defined,
# any entries from before this time return atom::pageurl and only entries
# after it return real atom::pagetags.
def atomtag(context):
"""Generate an Atom entry ID. If the _atomfeed-tag_
configuration option is not defined, this is the same as
atom::pageurl. If _atomfeed-tag_ is defined, the entry ID is
<tag value>:/<page path>. If _atomfeed-tag-time_ is defined,
only pages from after this time are given tag-based IDs; for
pages before then, this is the same as atom::pageurl."""
if 'atomfeed-tag' not in context:
return atomurl(context)
# if our special magic tag is configured, we return old-style
# atom url IDs for entries before the cutoff time.
if 'atomfeed-tag-time' in context and \
context['atomfeed-tag-time'] > context.page.timestamp:
return atomurl(context)
tag = context['atomfeed-tag']
return "%s:/%s" % (tag, urllib.quote(context.page.path))
htmlrends.register("atom::pagetag", atomtag)
def atomfeedurl(context):
"""Generate the URL of this page for the current feed."""
url = context.url(context.page)
return context.web.uri_from_url(url, context)
htmlrends.register("atom::feedurl", atomfeedurl)
def atomstamp(context):
"""Generate an Atom timestamp for the current page."""
ts = context.page.timestamp
if ts <= 0:
ts = time.time()
return atomtimestr(ts)
htmlrends.register("atom::timestamp", atomstamp)
def atomctime(context):
"""Generate an Atom timestamp for the current page based on its
change time."""
ts = context.page.modstamp
if ts <= 0:
ts = time.time()
return atomtimestr(ts)
htmlrends.register("atom::modstamp", atomctime)
def rss2stamp(context):
"""Generate a RSS 2.0 timestamp for the current page."""
ts = context.page.timestamp
if ts <= 0:
ts = time.time()
return rss2timestr(ts)
htmlrends.register("rss2::timestamp", rss2stamp)
# We have this because the <update> entry is required to come
# *before* the <entry> entries, and we only know the most recent
# time after we generate all of the latter. And we are not
# generating them, stashing them, and then recreating them
# for you. Updated is when we generated this feed, and that's
# *right now*, baby.
#
# This causes people to pull the feed all the time, but it costs
# us less CPU time (I think), *especially* if we are restricted.
# At the moment I feel I have more bandwidth than CPU time, so.
def atomnow(context):
"""Generate an Atom timestamp for right now."""
__pychecker__ = "no-argsused"
return atomtimestr(time.time())
htmlrends.register("atom::now", atomnow)
# Generate and cache the list of pages that we are going to
# process.
atom_cachekey = "atom:pagelist"
def _fillpages(context):
r = context.getcache(atom_cachekey)
if r is not None:
return r
if context.page.type != "dir":
return []
cutpoint = get_cutpoint(context)
cuttime = get_cuttime(context)
#dl = context.page.descendants(context)
# We deliberately use this context routine because it uses the
# disk cache (if that exists).
dl = context.cache_page_children(context.page)
# Force the generator to be expanded to a full list so we can use
# .sort on it.
dl = list(dl)
utils.sort_timelist(dl)
if not dl:
context.setcache(atom_cachekey, [])
return []
res = []
count = 0
dupDict = {}
for ent in dl:
if count >= cutpoint:
break
# Drop pages older than our cut time.
if ent[0] < cuttime:
continue
np = context.model.get_page(ent[1])
# We explicitly don't check access permissions here,
# because what to show for forbidden pages is a policy
# decision that is inappropriate to make here.
if np.is_util() or np.is_redirect() or not np.displayable():
continue
# Suppress duplicate pages; these might happen through,
# eg, hardlinks. When this happens we put only the first
# one encountered in the Atom feed. Our sorting process
# means that this is the lexically first, which may not
# actually be the same one that was in the *last* Atom
# feed generation run, but that's life.
# Tricky issue: we assume that all versions of the page
# have the same access permissions. If they don't, this
# may suppress readable pages in favour of earlier
# unreadable ones.
pageid = np.identity()
if pageid in dupDict:
continue
else:
dupDict[pageid] = True
count += 1
res.append(ent)
context.setcache(atom_cachekey, res)
return res
def synd_gen(context, tmpl):
to = context.model.get_template(tmpl)
res = []
sz = 0
maxsz = get_feedmax(context)
rootpath = context.page.me().path
if rootpath == '':
rprefl = 0
else:
rprefl = len(rootpath)+1
for ts, path in _fillpages(context):
np = context.model.get_page(path)
nc = context.clone_to_page(np)
nc.setvar('relname', path[rprefl:])
rdir = np.curdir().path[rprefl:]
if rdir:
nc.setvar('reldir', rdir)
t = template.Template(to).render(nc)
sz += len(t)
res.append(t)
context.newtime(nc.modtime)
# Update for directory parent timestamp too.
pdir = np.parent()
context.newtime(pdir.modstamp)
# And look for size limits. Note that we may go over
# them, because we allow one entry's worth of slop;
# this simplifies some cases.
if maxsz and sz >= maxsz:
break
return ''.join(res)
def atompages(context):
"""Generate an Atom feed of the current directory and all its
descendants (showing only the most recent so many entries, newest
first). Each page is rendered through _syndication/atomentry.tmpl_,
which should result in a valid Atom feed entry. Supports
VirtualDirectory restrictions."""
return synd_gen(context, "syndication/atomentry.tmpl")
htmlrends.register("atom::pages", atompages)
def rss2pages(context):
"""Generate a RSS 2.0 feed of the current directory and all its
descendants (showing only the most recent so many entries, newest
first). Each page is rendered through _syndication/rss2entry.tmpl_,
which should result in a valid RSS 2.0 feed entry. Supports
VirtualDirectory restrictions."""
return synd_gen(context, "syndication/rss2entry.tmpl")
htmlrends.register("rss2::pages", rss2pages)
def atompagestamp(context):
"""Generate an Atom format timestamp for an Atom page feed for
the current directory (and all its descendants)."""
rl = _fillpages(context)
if not rl:
return atomnow(context)
tl = []
for ts, path in rl:
np = context.model.get_page(path)
pdir = np.parent()
tl.append(np.modstamp)
tl.append(pdir.modstamp)
return atomtimestr(max(tl))
htmlrends.register("atom::recentpage", atompagestamp)
def rss2pagestamp(context):
"""Generate an RSS 2.0 format timestamp for an RSS 2.0 page feed for
the current directory (and all its descendants)."""
rl = _fillpages(context)
if not rl:
return rss2timestr(time.time())
tl = []
for ts, path in rl:
np = context.model.get_page(path)
pdir = np.parent()
tl.append(np.modstamp)
tl.append(pdir.modstamp)
return rss2timestr(max(tl))
htmlrends.register("rss2::recentpage", rss2pagestamp)
def pageterse(context):
"""Generate wikitext:terse run through a HTML entity quoter,
thus suitable for use in Atom feeds."""
return httputil.quotehtml(wikirend.terserend(context))
htmlrends.register("atom::pageterse", pageterse)
def pagetersenotitle(context):
"""Generate wikitext:terse:notitle run through a HTML entity quoter,
thus suitable for use in Atom feeds."""
return httputil.quotehtml(wikirend.tersenotitle(context))
htmlrends.register("atom::pageterse:notitle", pagetersenotitle)
#
# Generate and cache the list of comments that we are going to
# process.
atom_comkey = "atom:commentlist"
def _fillcomments(context):
r = context.getcache(atom_comkey)
if r is not None:
return r
cutpoint = get_cutpoint(context)
#dl = context.model.comments_children(context.page.me())
dl = comments.cached_comments_children(context, context.page.me())
# Force the generator to be expanded to a full list, so we can
# sort it.
dl = list(dl)
utils.sort_timelist(dl)
if not dl:
context.setcache(atom_comkey, [])
return []
# Virtualization of comments means that we restrict the pages
# that the comments are on to be within the virtualization
# range. We cannot simply use pageranges.filter_files() on
# the comments list itself, because the timestamps in that
# are the *comment* timestamps, not the *page* timestamps.
filterComments = False
filterD = {}
if pageranges.is_restriction(context):
filterComments = True
for ts, p in context.page.descendants(context):
filterD[p] = True
res = []
count = 0
for ts, path, cname in dl:
if count > cutpoint:
break
if filterComments and path not in filterD:
continue
np = context.model.get_page(path)
# We drop entirely pages that can't be accessed with
# the current (lack of) permissions, rather than
# insert a message about denied content; this seems
# better.
if not np.displayable() or np.is_redirect() or \
not np.access_ok(context):
continue
c = context.model.get_comment(np, cname)
if not c:
continue
count += 1
res.append((ts, path, cname, c))
context.setcache(atom_comkey, res)
return res
def atomcomments(context):
"""Generate an Atom feed of recent comments on or below the
current page. Each comment is rendered through
_syndication/atomcomment.tmpl_. Supports VirtualDirectory
restrictions, which limit which pages the feed will include
comments for."""
to = context.model.get_template("syndication/atomcomment.tmpl")
res = []
sz = 0
maxsz = get_feedmax(context)
for ts, path, cname, c in _fillcomments(context):
np = context.model.get_page(path)
nc = context.clone_to_page(np)
comments.set_com_vars(nc, c)
nc.setvar(":comment:name", cname)
t = template.Template(to).render(nc)
sz += len(t)
res.append(t)
context.newtime(nc.modtime)
# clip maximum feed entry size (this is approximate, since
# we don't know how much prefix and postfix we have).
if maxsz and sz >= maxsz:
break
return ''.join(res)
htmlrends.register("atom::comments", atomcomments)
def atomcommentstamp(context):
"""Generate an Atom format timestamp for the most recent comment
that will be displayed in a comment syndication feed."""
r = _fillcomments(context)
if r:
return atomtimestr(r[0][3].time)
else:
return atomnow(context)
htmlrends.register("atom::recentcomment", atomcommentstamp)
def atomcomment(context):
"""Display the current comment in a way suitable for inclusion in
an Atom feed."""
if comments.com_stash_var not in context:
return ''
c = context[comments.com_stash_var]
res = comments.show_comment(c.data, context, wikirend.ABSLINKS)
context.newtime(c.time)
return httputil.quotehtml(res)
htmlrends.register("atom::comment", atomcomment)
def commentstamp(context):
"""Generate an Atom feed format timestamp for the current comment."""
if comments.com_stash_var not in context:
return ''
c = context[comments.com_stash_var]
return atomtimestr(c.time)
htmlrends.register("atom::commentstamp", commentstamp)
def commenturl(context):
"""Generate the URL for the current comment."""
if comments.com_stash_var not in context:
return ''
c = context[comments.com_stash_var]
url = context.uri(context.page, context.comment_view())
url += '#%s' % comments.anchor_for(c)
return url
htmlrends.register("atom::commenturl", commenturl)
# Note that this is not a resolvable URL (the fragment isn't valid),
# *but* it is a *unique* one. However, it changes based on different
# hostnames used. Sigh. We can't goddamn win, apparently.
# The old default scheme of
# tag:${wikiname}:${page}:${:comment:name}
# isn't necessarily global, and doesn't pass theoretical tag
# validation because a) it needs a ',1970-01-01' there, and
# b) it theoretically should be a domain and c) feedvalidator
# gets in a snit if the wikiname is not in all lower case.
#
# However, the hostname switching is no worse than regular
# atom feeds, for which the entry ID has always been the
# page's URL.
def commentid(context):
"""Generate a hopefully unique ID for the current comment."""
if comments.com_stash_var not in context:
return ''
url = context.nuri(context.page)
url += '#%s' % context[':comment:name']
return url
htmlrends.register("atom::commentid", commentid)
def hasatomfeed(context):
return context.page.type == "dir" or \
context.page.path == context.wiki_root()
# Return the true page for an atom feed on a virtual directory. This
# is only different from the vdir if atomfeed-virt-only-* stuff is
# set; if it's set and the vdir restriction type is not one of the
# listed allowed ones, we return the true directory instead of the
# virtual directory.
# only_in is true if we should only take restrictions from
# atomfeed-virt-only-in, instead of starting with a-f-only-adv.
# This is used for enforcing actual restrictions on what feeds exist
# instead of just which ones are advertised.
def true_atom_page(context, only_in = False):
if not context.page.virtual() or \
not pageranges.is_restriction(context) or \
not ('atomfeed-virt-only-in' in context or \
'atomfeed-virt-only-adv' in context):
# We must use .curdir() here because hasatomfeed() is
# true for the root page as well as directories.
return context.page.curdir()
# tricky: .curdir() is not necessary now since virtual pages are
# always directories.
rt = pageranges.restriction(context)
# If we are called with only_in True, a-v-only-in is known to exist.
if only_in:
atypes = context['atomfeed-virt-only-in']
else:
atypes = context.get('atomfeed-virt-only-adv',
context.get('atomfeed-virt-only-in', None))
if rt in atypes or \
(rt in ('year', 'month', 'day') and 'calendar' in atypes):
return context.page
return context.page.me()
# Atom view of a directory or a page.
def atomfeed(context):
"""Generate a link to the Atom feed for the current page if
the current page is a directory or the wiki root."""
if not hasatomfeed(context):
return ''
curl = context.url(true_atom_page(context), "atom")
return htmlrends.makelink("Recent Pages", curl, True, atomCType)
htmlrends.register("atom::dirfeed", atomfeed)
def atomcommentfeed(context):
"""Generate a link to the Atom comments feed for the current
page, if comments are turned on."""
if not context.model.comments_on():
return ''
page = context.page.me()
# Special bonus hack.
if page.path == context.wiki_root():
page = page.parent()
# .comments_on() can return True for pages no one can ever
# comment on because the access restrictions to them are
# impossible to pass. However, this is a lesser evil; it
# just generates useless Atom feeds.
# By using '!= "dir"' we automatically exclude bad pages
# and so on from generating that Atom comment link.
if page.type != "dir" and not page.comments_on(context):
return ''
curl = context.url(page, 'atomcomments')
return htmlrends.makelink("Recent Comments", curl, True, atomCType)
htmlrends.register("atom::commentfeed", atomcommentfeed)
atomfeeds = (atomfeed, atomcommentfeed, )
def atomtools(context):
"""Generate a comma-separated list of all Atom feed links, that
are applicable for the current page."""
res = []
for ft in atomfeeds:
res.append(ft(context))
res = [x for x in res if x]
return ', '.join(res)
htmlrends.register("atom::feeds", atomtools)
# This generates things suitable for autodiscovery in the <head>
# section.
def gendisclink(url):
return '<link rel="alternate" type="%s" href="%s">' % (atomCType, url)
def atomdisc(context):
"""Generate a suitable Atom feed autodiscovery _<link>_ string,
suitable for inclusion in the _<head>_ section. Generates nothing
if there is no Atom recent changes feed."""
if hasatomfeed(context):
return gendisclink(context.url(true_atom_page(context), "atom"))
# For a page in a blogdir view, we generate an autodiscovery link
# for the blog's top level feed. I think this is probably the
# most useful thing to do in general, since it lets people who just
# landed on a blog page immediately grab a full feed.
# This would clash if we ever wanted to do comments, but ennh.
(pv, vdir) = context.pref_view_and_dir(context.page.curdir())
if pv != 'blog':
return ''
return gendisclink(context.url(vdir, "atom"))
htmlrends.register("atom::autodisc", atomdisc)
# ---
# Register the 'atom' and 'atomcomments' views.
# Atom views are just like template views, except that they can be
# used on the root directory and that their content-type is
# application/atom+xml, not text/html.
class AtomView(views.AltType):
content_type = atomCType
#
# If the atomfeed-virt-only-in directive is set, we treat requests for
# disallowed atom feeds as some sort of problem. To be friendly, a
# disallowed request for a 'latest/' or 'range/' virtual directory is
# redirected to the base page's feed. Other requests produce 404's.
# This requires hooking into both page_ok() (for the 404's) and
# redirect_page() (for the redirections).
class RestrictedAtomView(AtomView):
def _restrictable(self, optlist):
if 'atomfeed-virt-only-in' not in self.context or \
not self.page.virtual() or \
not pageranges.is_restriction(self.context) or \
pageranges.restriction(self.context) in optlist:
return False
return True
def page_ok(self):
r = super(AtomView, self).page_ok()
if not r or not self._restrictable(('latest', 'range')):
return r
tp = true_atom_page(self.context, True)
if tp != self.page:
self.error("nopage")
return False
return True
def redirect_page(self):
r = super(AtomView, self).redirect_page()
if r or not self._restrictable(()):
return r
tp = true_atom_page(self.context, True)
if tp == self.page:
return False
self.response.redirect(self.context.uri(tp, self.view))
return True
# Generating an RSS 2.0 feed is a huge hack. We do not advertise it in
# page tools or anything else; it exists only for a few RSS2-only
# consumers that Chris (now) cares about and because existing feed
# conversion services either don't do it right or are undependable (in
# the long term) or both. It is deliberate that there is no RSS 2.0
# comments feed. RSS 2.0 feeds are just like Atom feeds and obey all
# of the same restrictions.
#
# All of the magic happens in renderers. We need a separate class here
# only to change the content-type.
class RestrictedRss2View(AtomView):
# This is allegedly the right content-type. RSS being RSS,
# there are apparently variations.
content_type = "application/rss+xml"
# An atom view cannot be applied to a file, only a directory.
# Atom comments can be applied to anything.
views.register('atom', RestrictedAtomView, onDir = True, onFile = False)
views.register('atomcomments', AtomView, onDir = True)
views.register('rss2', RestrictedRss2View, onDir = True, onFile = False)