-
Notifications
You must be signed in to change notification settings - Fork 0
/
bibyml.py
125 lines (99 loc) · 3.36 KB
/
bibyml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
"""
Library for parsing bibyml format.
bibyml is a variant of a subset YAML for bibtex values.
It can only represent dictionnaries and values are dictionnaries or bibtex values (or both)
In addition, tabulations are not supported.
Example:
eurocrypt: test
2013: aa
2015: bb
corresponds to the dictionnary:
{
"eurocrypt": {
"": test,
"2013": aa,
"2015": bb
}
}
"""
import re
import io
from collections import OrderedDict
import typing
_parser_re = re.compile(r'^(\s*)([^:]+):(.*)$')
_spaces_re = re.compile(r'^ *$')
class ParserError(Exception):
def __init__(self, line, msg=""):
message = """BibYml parsing error:\n line: "{}"\n message: {}""".format(
line[:-1], msg)
super(ParserError, self).__init__(message)
def dict_get_path(d: dict, p: list, make=False):
""" get the element of path p in dictionnary d,
make the path if it does not exists and make=True """
cur = d
for i in p:
if make and i not in cur:
cur[i] = OrderedDict()
cur = cur[i]
return cur
def parse(f: typing.TextIO) -> dict:
""" Parse a bibyml file f intro a dictionnary """
res = OrderedDict()
# `path` is the path of the current element
path = []
# `path_indent` stores the indentation of all the elements along the path
# + the indentation of the children in the path
# it can end by -1 if the indentation of the children is not yet known (i.e., before the first children)
path_indent = [0]
for line in f:
if line.strip() == "":
continue
r = _parser_re.match(line)
if r is None:
raise ParserError(line)
(spaces_indent, key, value) = r.groups()
if _spaces_re.match(spaces_indent) is None:
raise ParserError(line, "only spaces are accepted")
value = value.strip()
key = key.strip()
indent = len(spaces_indent)
if path_indent[-1] == -1:
if indent > path_indent[-2]:
# new indentation level
path_indent[-1] = indent
else:
# no new indentation level
path_indent.pop()
path.pop()
# find indentation level
while len(path_indent) > 1 and path_indent[-1] > indent:
path_indent.pop()
path.pop()
if indent != path_indent[-1]:
raise ParserError(line, "indentation problem")
d = dict_get_path(res, path)
d[key] = OrderedDict([("", value)]) if value != "" else OrderedDict()
path_indent.append(-1) # we do not know the next indentation level
path.append(key)
return res
def write(out: typing.TextIO, d: dict, indent_key=4, indent_value=24, cur_indent=0) -> None:
for (k, v) in d.items():
if k == "":
continue
if "" in v and v[""] != "":
out.write("{}{}: {}{}\n".format(
" "*indent_key*cur_indent,
k,
" "*(max(0, indent_value-(len(k)+2+indent_key*cur_indent))),
v[""]
))
else:
out.write("{}{}: \n".format(
" "*indent_key*cur_indent,
k
))
write(out, v, cur_indent=cur_indent+1)
def write_str(d: dict, *args, **kwargs) -> str:
out = io.StringIO()
write(out, d, *args, **kwargs)
return out.getvalue()