-
Notifications
You must be signed in to change notification settings - Fork 0
/
lexibank_castroyi.py
54 lines (42 loc) · 1.65 KB
/
lexibank_castroyi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from pathlib import Path
import attr
from clldutils.misc import slug
import lingpy
import pylexibank
@attr.s
class CustomConcept(pylexibank.Concept):
Chinese_Gloss = attr.ib(default=None)
@attr.s
class CustomLanguage(pylexibank.Language):
SubGroup = attr.ib(default="Nesu")
Family = attr.ib(default="Sino-Tibetan")
class Dataset(pylexibank.Dataset):
id = "castroyi"
dir = Path(__file__).parent
concept_class = CustomConcept
language_class = CustomLanguage
def cmd_makecldf(self, args):
wl = lingpy.Wordlist(self.raw_dir.joinpath("yi-wl.tsv").as_posix())
args.writer.add_sources()
languages = args.writer.add_languages(lookup_factory="Name")
concepts = {}
for concept in self.conceptlists[0].concepts.values():
idx = concept.id.split("-")[-1] + "_" + slug(concept.english)
args.writer.add_concept(
ID=idx,
Name=concept.english,
Concepticon_ID=concept.concepticon_id,
Concepticon_Gloss=concept.concepticon_gloss,
Chinese_Gloss=concept.attributes["chinese"],
)
concepts[concept.english] = idx
concepts["Daughter-in-law"] = concepts["daughter-in-law"]
for idx in pylexibank.progressbar(wl, desc="cldfify", total=len(wl)):
args.writer.add_form_with_segments(
Language_ID=languages[wl[idx, "doculect"]],
Parameter_ID=concepts[wl[idx, "concept"]],
Value=wl[idx, "value"],
Form=wl[idx, "form"],
Segments=wl[idx, "tokens"],
Source=["Castro2010"],
)