forked from imagej/list-of-update-sites
-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate-legacy-pages.py
75 lines (62 loc) · 2.68 KB
/
generate-legacy-pages.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import datetime, re
import jinja2, markdown2, yaml
from bs4 import BeautifulSoup
from xml.sax.saxutils import escape
# NB: Avoid annoying BeautifulSoup warnings of the following kind:
#
# UserWarning: "https://.../" looks like a URL. Beautiful Soup is not an
# HTTP client. You should probably use an HTTP client like requests to get
# the document behind the URL, and feed that document to Beautiful Soup.
#
# See: https://stackoverflow.com/a/41496131/1207769
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module='bs4')
def html(markdown_string):
s = markdown2.markdown(markdown_string).rstrip()
if s.startswith('<p>') and s.endswith('</p>') and s.count('</p>') == 1:
# Strip containing <p>...</p> tags.
return s[3:-4]
return s
def plain(html_string):
return BeautifulSoup(html_string, features="html.parser").get_text().rstrip()
template_loader = jinja2.FileSystemLoader(searchpath="./")
template_env = jinja2.Environment(loader=template_loader)
now = datetime.datetime.now()
date = now.strftime("%d %B %Y")
time = now.strftime("%H:%M")
# Parse the YAML source to a sites data structure.
with open('sites.yml', 'r') as stream:
sites = yaml.safe_load(stream)
# Open the XML template.
xml_template = template_env.get_template('sites.xml.template')
# Render sites.xml from the sites data structure.
xml_data = xml_template.render(sites=[{
'name': escape(plain(html(site['name']))),
'url': escape(plain(html(site['url']))),
'description': escape(plain(html(site['description']))),
'maintainer': escape(', '.join([plain(html(m)) for m in site['maintainers']]))
} for site in sites['sites']], date=date, time=time)
with open('sites.xml', 'w') as sites_xml_file:
sites_xml_file.write(xml_data)
# Tweak the XML: HTTPS -> HTTP for select URLs.
# And write the tweaked result to sites_insecure.xml.
xml_data_insecure = re.sub(
'https://(update.imagej.net|update.fiji.sc|sites.imagej.net)/',
'http://\\1/', xml_data)
with open('sites_insecure.xml', 'w') as sites_xml_file_insecure:
sites_xml_file_insecure.write(xml_data_insecure)
# Validate the resulting XML files.
import xml.dom.minidom as dom
dom.parse('sites_insecure.xml')
dom.parse('sites.xml')
# Open the HTML template.
html_template = template_env.get_template('sites.html.template')
# Render sites.html from the sites data structure.
result = html_template.render(sites=[{
'name': html(site['name']),
'url': html(site['url']),
'description': html(site['description']),
'maintainer': ', '.join([html(m) for m in site['maintainers']])
} for site in sites['sites']], date=date, time=time)
with open('sites.html', 'w') as sites_html_file:
sites_html_file.write(result)