Skip to content

Commit

Permalink
Code cleanup, Glottolog 4.0., release 2.0.1.
Browse files Browse the repository at this point in the history
  • Loading branch information
chrzyki committed Jul 3, 2019
1 parent fcef1c4 commit 1474ca2
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 42 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@ Conceptlists in Concepticon:
- **Invalid lexemes:** 0
- **Tokens:** 168,052
- **Segments:** 150 (0 BIPA errors, 0 CTLS sound class errors, 150 CLTS modified)
- **Inventory size (avg):** 40.02
- **Inventory size (avg):** 40.02
9 changes: 7 additions & 2 deletions cldf/cldf-metadata.json
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
{
"@context": "http://www.w3.org/ns/csvw",
"aboutUrl": null,
"dc:bibliographicCitation": "Z'graggen, J A. (1980) A comparative word list of the Northern Adelbert Range Languages, Madang Province, Papua New Guinea. Canberra: Pacific Linguistics. and others from the same author.",
"dc:conformsTo": "http://cldf.clld.org/v1.0/terms.rdf#Wordlist",
"dc:description": null,
"dc:format": [
"http://concepticon.clld.org/contributions/Zgraggen-1980-380"
],
"dc:identifier": null,
"dc:isVersionOf": null,
"dc:license": null,
"dc:related": null,
"dc:source": "sources.bib",
"dc:title": "Z'graggen Madang",
"dcat:accessURL": "https://github.com/lexibank/zgraggenmadang",
"rdf:ID": "zgraggenmadang",
"rdf:type": "http://www.w3.org/ns/dcat#Distribution",
"dialect": {
Expand All @@ -17,7 +22,7 @@
{
"dc:title": "environment",
"properties": {
"glottolog_version": "v3.4",
"glottolog_version": "v4.0",
"concepticon_version": "v2.0"
}
}
Expand Down
63 changes: 32 additions & 31 deletions lexibank_zgraggenmadang.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,8 @@
# coding=utf-8
from __future__ import unicode_literals, print_function
import re

import attr
from clldutils.path import Path
from pylexibank.dataset import Metadata
from clldutils.misc import slug
from clldutils.path import Path
from pylexibank.dataset import Dataset as BaseDataset, Language
from pylexibank.util import getEvoBibAsBibtex, pb
from pylexibank.util import getEvoBibAsBibtex


@attr.s
Expand All @@ -17,63 +12,69 @@ class Variety(Language):

class Dataset(BaseDataset):
dir = Path(__file__).parent
id = 'zgraggenmadang'
id = "zgraggenmadang"
language_class = Variety

def cmd_install(self, **kw):
#wl_data = {}
wl_data = []
log = kw['log']
log.info('building wordlist ...')
for idx, row in enumerate(self.raw.read_tsv('madang.csv')):
log = kw["log"]
log.info("building wordlist ...")
for idx, row in enumerate(self.raw.read_tsv("madang.csv")):
if idx > 1:
# get fields for the entry, correcting it if necessary
_, doculect, concept, counterpart = row
if counterpart in self.lexemes:
counterpart = self.lexemes[counterpart]

# add to wordlist data
wl_data.append({
'doculect' : doculect,
'concept' : concept,
'counterpart' : counterpart,
})
wl_data.append(
{"doculect": doculect, "concept": concept, "counterpart": counterpart}
)

log.info('... data assembled ...')
log.info("... data assembled ...")

# build CLDF data
with self.cldf as ds:
ds.add_sources()

# add languages, and build dictionary of sources
ds.add_languages(id_factory=lambda l: l['Name'])
lang_source = {l['Name']: l['Source'] for l in self.languages}
ds.add_languages(id_factory=lambda l: l["Name"])
lang_source = {l["Name"]: l["Source"] for l in self.languages}

for concept in self.concepts:
ds.add_concept(
ID=slug(concept['ENGLISH']),
Concepticon_ID=concept['CONCEPTICON_ID'],
Name=concept['ENGLISH']
)
ID=slug(concept["ENGLISH"]),
Concepticon_ID=concept["CONCEPTICON_ID"],
Name=concept["ENGLISH"],
)

for concept in self.conceptlist.concepts.values():
ds.add_concept(
ID=slug(concept.english),
Concepticon_ID=concept.concepticon_id,
Name=concept.concepticon_gloss
Name=concept.concepticon_gloss,
)

# add lexemes
for row in wl_data:
ds.add_lexemes(
Language_ID=row['doculect'],
Parameter_ID=slug(row['concept']),
Value=row['counterpart'],
Source=lang_source[row['doculect']],
Language_ID=row["doculect"],
Parameter_ID=slug(row["concept"]),
Value=row["counterpart"],
Source=lang_source[row["doculect"]],
)


def cmd_download(self, **kw):
if not self.raw.exists():
self.raw.mkdir()
self.raw.write('sources.bib', getEvoBibAsBibtex('Zgraggen1980NA', 'Zgraggen1980RC', 'Zgraggen1980SA', 'Zgraggen1980MA', 'List2014e', **kw))
self.raw.write(
"sources.bib",
getEvoBibAsBibtex(
"Zgraggen1980NA",
"Zgraggen1980RC",
"Zgraggen1980SA",
"Zgraggen1980MA",
"List2014e",
**kw
),
)
10 changes: 7 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from setuptools import setup
import sys
import json


with open('metadata.json', encoding='utf-8') as fp:
with open('metadata.json') as fp:
metadata = json.load(fp)


Expand All @@ -22,6 +21,11 @@
},
install_requires=[
'pylexibank==1.1.1',
'segments==2.0.2'
],
extras_require={'test': 'pytest-cldf>=0.2'}
extras_require={
'test': [
'pytest-cldf',
],
},
)
5 changes: 0 additions & 5 deletions test.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
# coding: utf-8
from __future__ import unicode_literals


def test_valid(cldf_dataset, cldf_logger):
assert any(r['Value'] == 'ezandə kɨ-' for r in cldf_dataset['FormTable'])
assert cldf_dataset.validate(log=cldf_logger)

0 comments on commit 1474ca2

Please sign in to comment.