-
Notifications
You must be signed in to change notification settings - Fork 0
/
roam2tex.py
157 lines (129 loc) · 6.38 KB
/
roam2tex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#######################################
# A converter from Roam's markdown to LaTeX, highly inspired by roam2tex.js
#
# Meant for mostly mathematical work where I expect copious use of environments
#
# Replaces markup (bold, italics, highlight) and hyperlinks, which use hyperref
# inserts citations in place of Roam page links ala roam2tex.js
#
# environments delineated using \begin{environment} and indenting; environments are automatically closed.
# enumerate and itemize environments will automatically \item only the blocks one indent level after them.
# all environments can be nested.
# equations are LaTeX on their own line, automatically placed into equation* environment. inline LaTeX is converted accordingly.
# refs are not supported yet! But it would be great to add support for block refs when Roam exports properly.
# customizable preamble and indent size (if your Roam exporting is weird, I don't expect this to be used)
#
# usage: roam2tex.py infile_name [-o outfile_name] [-p preamble] [-i indent_size]
# if outfile_name is not given, outfile is infile + .tex
#
# Serina Hu (github.com/Cathrach)
# License: WTFPL
#######################################
import re
import argparse
import os
def render_citation(link_match):
link_text = link_match.group(1)
if not re.match(r"^[a-zA-Z]+[0-9]+[a-zA-Z]*$", link_text):
return link_text
return "\\cite{link_text}"
def replace_markup_via(text, delims, fun):
chunks = text.split(delims)
for i in range(1, len(chunks), 2):
chunks[i] = fun(chunks[i])
return "".join(chunks)
def convert_to_tex(infile_name, outfile_name, preamble, indent_size, ignore_properties):
infile = open(infile_name, "r")
outfile = open(outfile_name, "w")
outfile.write(preamble)
# TODO: make properties additional commands in the preamble
outfile.write("\\begin{document}")
# track indent levels for sections and paragraphs
section_indent = None
par_indent = None
# track enumeration
environments = []
enum_indents = []
for line in infile:
# remove indentations and markdown bullets
indent = len(re.match(r" *", line).group())
line = re.sub(r"^ *(- )?", "", line)
# skip any lines corresponding to properties we want to ignore
if line.startswith(tuple([f"{s}::" for s in ignore_properties])):
line = ""
# skip any {{ functions
line = re.sub(r"^\{\{(.+?)\}\}", "", line)
# indentation management
is_section = re.search(r"^#", line) is not None
if is_section:
section_indent = indent
par_indent = None
elif par_indent is None:
# corresponds to previous line being a section header or no header
par_indent = indent
# close any active environments (which would only happen if we went back in indent, but whatever)
# enumerate backwards over the `environments` list until we have closed everything appropriate
length = len(environments)
add_text = ""
for i, (ind, env) in enumerate(environments[::-1]):
if indent <= ind:
add_text += f"\\end{{{env}}}\n"
environments.pop(length - i - 1)
if env in ["enumerate", "itemize"]:
enum_indents.remove(ind + indent_size)
else:
break
if indent <= par_indent:
add_text += "\n"
par_indent = indent
# turn section headings into section, subsection, subsubsection
line = re.sub(r"^# (.*)", lambda m : f"\n\\section{{{m.group(1)}}}", line)
line = re.sub(r"^## (.*)", lambda m : f"\n\\subsection{{{m.group(1)}}}", line)
line = re.sub(r"^### (.*)", lambda m : f"\n\\subsubsection{{{m.group(1)}}}", line)
# check if the line begins an environment
env_begin = re.match(r"\\begin\{(.*?)\}", line)
if env_begin:
environment = env_begin.group(1)
environments.append((indent, environment))
if environment in ["enumerate", "itemize"]:
enum_indents.append(indent + indent_size)
# replace latex equations
latex_eq = re.match(r"^\$\$([^$]*?)\$\$$", line)
if latex_eq:
line = f"\\begin{{equation*}}\n{latex_eq.group(1)}\n\\end{{equation*}}"
else:
# replace markup
line = replace_markup_via(line, "__", lambda t : f"\\emph{{{t}}}")
line = replace_markup_via(line, "**", lambda t : f"\\textbf{{{t}}}")
line = replace_markup_via(line, "^^", lambda t : f"\\hl{{{t}}}")
# replace inline latex
line = line.replace("$$", "$")
# replace Roam links
line = re.sub(r"\[\[(.*?)\]\]", render_citation, line)
# replace inline links
line = re.sub(r"\[(.*?)\]\((.*)\)", lambda m : f"\\href{{{m.group(2)}}}{{{m.group(1)}}}", line)
if indent in enum_indents:
line = "\\item " + line
line = add_text + line
outfile.write(line + "\n")
if environments != []:
outfile.write("\n".join([f"\\end{{{env}}}\n" for _, env in environments[::-1]]))
outfile.write("\n\\end{document}")
infile.close()
outfile.close()
def main():
default_preamble = "\\documentclass[12pt]{article}\n\\usepackage{serina}\n"
parser = argparse.ArgumentParser()
parser.add_argument("infile", help="name of input file")
parser.add_argument("-o", "--outfile", help="name of output file if different from input")
parser.add_argument("-i", "--indent-size", help="number of spaces in indent", default=4)
parser.add_argument("-p", "--preamble", help="preamble for document", default=default_preamble)
args = parser.parse_args()
infile_name = args.infile
outfile_name = args.outfile if args.outfile is not None else os.path.splitext(infile_name)[0] + ".tex"
indent_size = args.indent_size
preamble = args.preamble
ignore_properties = ["tags", "do", "due"] #TODO: make this an option
convert_to_tex(infile_name, outfile_name, preamble, indent_size, ignore_properties)
if __name__ == "__main__":
main()