-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconverter.py
executable file
·82 lines (63 loc) · 3.02 KB
/
converter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env python3.6
import csv
import re
from lxml import etree
import click
class TEI:
def __init__(self, data):
self.ns = {'t':'http://www.tei-c.org/ns/1.0'}
self.data = data
self.tree = etree.parse('template.xml', etree.XMLParser(remove_blank_text=True))
self.root = self.tree.getroot()
extent = self.root.xpath('//t:extent/t:measure', namespaces=self.ns)[0]
extent.attrib['quantity'] = str(len(self.data))
extent.text = "{} words".format(len(self.data))
def generate(self):
for row in self.data:
entry = etree.Element('entry')
form = etree.SubElement(entry, 'form', type='lemma')
orth = etree.SubElement(form, 'orth')
form.text = row[0]
gramgrp = etree.SubElement(entry, 'gramGrp')
pos = etree.SubElement(gramgrp, 'pos')
pos.text = row[1]
sense = etree.SubElement(entry, 'sense')
for meaning in row[2].split(';'):
cit = etree.SubElement(sense, 'cit', type='translation', attrib={"{http://www.w3.org/XML/1998/namespace}lang": "hu"})
translation = etree.SubElement(cit, 'quote')
translation.text = meaning
if row[3] != '' or row[5] != '':
etym = etree.SubElement(entry, 'etym')
if row[3] != '':
etym_lang = etree.SubElement(etym, 'lang')
etym_form = etree.SubElement(etym, 'mentioned')
etym_lang.text = 'Root'
etym_form.text = row[3]
if row[4] != '':
etym_gloss = etree.SubElement(etym, 'gloss')
etym_gloss.text = row[4]
if row[5] != '':
vals = re.match(r'([A-Z]+) ([\w\s]+\w)', row[5])
if vals:
etym_lang = etree.SubElement(etym, 'lang')
etym_form = etree.SubElement(etym, 'mentioned')
etym_lang.text = vals[1]
etym_form.text = vals[2]
if row[6] != '':
etym_gloss = etree.SubElement(etym, 'gloss')
glossval = row[6]
glossval = re.sub(r'([A-Z]+) ([\w\s]+\w)', '<lang>\\1</lang> <mentioned>\\2</mentioned>', glossval)
etym_gloss.text = glossval
self.root.xpath('//t:body', namespaces=self.ns)[0].append(entry)
return self
def write(self, output_file):
self.tree.write(output_file, pretty_print=True, xml_declaration = True, encoding = 'UTF-8')
@click.command()
@click.argument('file')
@click.option('--out', help='Output file', default='tei-dict.xml')
def generate(file, out):
"""Simple program that greets NAME for a total of COUNT times."""
with open(file + '.csv') as csvfile:
TEI(list(csv.reader(csvfile, delimiter=',', quotechar='"'))).generate().write(out)
if __name__ == '__main__':
generate()