-
Notifications
You must be signed in to change notification settings - Fork 1
/
lexibank_suntb.py
64 lines (53 loc) · 2.06 KB
/
lexibank_suntb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from pathlib import Path
import attr
import pylexibank
from clldutils.misc import slug
@attr.s
class CustomLanguage(pylexibank.Language):
Latitude = attr.ib(default=None)
Longitude = attr.ib(default=None)
SubGroup = attr.ib(default=None)
Family = attr.ib(default="Sino-Tibetan")
@attr.s
class CustomConcept(pylexibank.Concept):
Chinese_Gloss = attr.ib(default=None)
Number = attr.ib(default=None)
class Dataset(pylexibank.Dataset):
dir = Path(__file__).parent
id = "suntb"
writer_options = dict(keep_languages=False, keep_parameters=False)
language_class = CustomLanguage
concept_class = CustomConcept
form_spec = pylexibank.FormSpec(
separators=";,/",
missing_data=("*", "---", "-", "--"),
replacements=[(" ", "_")],
brackets={"[": "]", "(": ")"},
)
def cmd_download(self, args):
self.raw_dir.write("sources.bib", pylexibank.util.getEvoBibAsBibtex("Sun1991"))
def cmd_makecldf(self, args):
args.writer.add_sources()
language_lookup = args.writer.add_languages(lookup_factory="Name")
concept_lookup = {}
for concept in self.conceptlists[0].concepts.values():
idx = concept.id.split("-")[-1] + "_" + slug(concept.english)
concept_lookup[concept.english] = idx
concept_lookup[concept.number] = idx
args.writer.add_concept(
ID=idx,
Concepticon_ID=concept.concepticon_id,
Concepticon_Gloss=concept.concepticon_gloss,
Name=concept.english,
Chinese_Gloss=concept.attributes["chinese"],
)
for entry in pylexibank.progressbar(
self.raw_dir.read_csv("ZMYYC.csv", delimiter="\t", dicts=True)
):
args.writer.add_forms_from_value(
Language_ID=language_lookup[entry["language"]],
Parameter_ID=concept_lookup.get(entry["srcid"].split(".")[0]),
Local_ID=entry["rn"],
Value=entry["reflex"],
Source=["Sun1991"],
)