diff --git a/.github/workflows/cldf-validation.yml b/.github/workflows/cldf-validation.yml index 62c167f..f107ba4 100644 --- a/.github/workflows/cldf-validation.yml +++ b/.github/workflows/cldf-validation.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.6] + python-version: ["3.10", 3.11, 3.12] steps: - uses: actions/checkout@v2 diff --git a/.zenodo.json b/.zenodo.json index 5224158..2b8e6b1 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -8,20 +8,15 @@ "creators": [ { "name": "Timotheus A. Bodt" + }, + { + "name": "Johann-Mattis List" } ], "contributors": [ { "name": "Timotheus A. Bodt", "type": "RightsHolder" - }, - { - "name": "Timotheus A. Bodt", - "type": "DataCurator" - }, - { - "name": "Johann-Mattis List", - "type": "Other" } ], "communities": [ diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 4e6df7e..5e4045f 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -2,6 +2,5 @@ Name | GitHub user | Description |Role --- | --- | --- | --- -Timotheus A. Bodt | | maintainer | Author, RightsHolder, DataCurator -Johann-Mattis List | @LinguList | maintainer | Other - +Timotheus A. Bodt | @monpasang | Data curation | Author, RightsHolder +Johann-Mattis List | @LinguList | maintainer | Author diff --git a/README.md b/README.md index 2da8f71..e45383e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # CLDF dataset derived from Bodt's "Lexical Cognates in Western Kho-Bwa" from 2019 -[![CLDF validation](https://github.com/lingpy/bodtkhobwa//workflows/CLDF-validation/badge.svg)](https://github.com/lingpy/bodtkhobwa//actions?query=workflow%3ACLDF-validation) +[![CLDF validation](https://github.com/lexibank/bodtkhobwa/workflows/CLDF-validation/badge.svg)](https://github.com/lexibank/bodtkhobwa/actions?query=workflow%3ACLDF-validation) ## How to cite @@ -50,15 +50,15 @@ The collection of the data was funded by: ## Statistics -[![CLDF validation](https://github.com/lingpy/bodtkhobwa//workflows/CLDF-validation/badge.svg)](https://github.com/lingpy/bodtkhobwa//actions?query=workflow%3ACLDF-validation) +[![CLDF validation](https://github.com/lexibank/bodtkhobwa/workflows/CLDF-validation/badge.svg)](https://github.com/lexibank/bodtkhobwa/actions?query=workflow%3ACLDF-validation) ![Glottolog: 100%](https://img.shields.io/badge/Glottolog-100%25-brightgreen.svg "Glottolog: 100%") ![Concepticon: 90%](https://img.shields.io/badge/Concepticon-90%25-green.svg "Concepticon: 90%") ![Source: 100%](https://img.shields.io/badge/Source-100%25-brightgreen.svg "Source: 100%") ![BIPA: 100%](https://img.shields.io/badge/BIPA-100%25-brightgreen.svg "BIPA: 100%") ![CLTS SoundClass: 100%](https://img.shields.io/badge/CLTS%20SoundClass-100%25-brightgreen.svg "CLTS SoundClass: 100%") -- **Varieties:** 8 -- **Concepts:** 662 +- **Varieties:** 8 (linked to 8 different Glottocodes) +- **Concepts:** 662 (linked to 542 different Concepticon concept sets) - **Lexemes:** 4,720 - **Sources:** 1 - **Synonymy:** 1.14 @@ -66,16 +66,15 @@ The collection of the data was funded by: - **Cognate Diversity:** 0.48 - **Invalid lexemes:** 0 - **Tokens:** 20,415 -- **Segments:** 232 (0 BIPA errors, 0 CTLS sound class errors, 232 CLTS modified) +- **Segments:** 232 (0 BIPA errors, 0 CLTS sound class errors, 232 CLTS modified) - **Inventory size (avg):** 86.62 # Contributors Name | GitHub user | Description |Role --- | --- | --- | --- -Timotheus A. Bodt | | maintainer | Author, RightsHolder, DataCurator -Johann-Mattis List | @LinguList | maintainer | Other - +Timotheus A. Bodt | @monpasang | Data curation | Author, RightsHolder +Johann-Mattis List | @LinguList | maintainer | Author diff --git a/cldf/README.md b/cldf/README.md index 2047eba..4987ef5 100644 --- a/cldf/README.md +++ b/cldf/README.md @@ -12,9 +12,9 @@ property | value [dc:conformsTo](http://purl.org/dc/terms/conformsTo) | [CLDF Wordlist](http://cldf.clld.org/v1.0/terms.rdf#Wordlist) [dc:format](http://purl.org/dc/terms/format) |
  1. http://concepticon.clld.org/contributions/Bodt-2019-664
[dc:license](http://purl.org/dc/terms/license) | https://creativecommons.org/licenses/by/4.0/ -[dcat:accessURL](http://www.w3.org/ns/dcat#accessURL) | https://github.com/lingpy/bodtkhobwa/ -[prov:wasDerivedFrom](http://www.w3.org/ns/prov#wasDerivedFrom) |
  1. lingpy/bodtkhobwa/ v3.0.1
  2. Glottolog v4.5
  3. Concepticon v2.5.0
  4. CLTS v2.1.0-26-gb12a7df
-[prov:wasGeneratedBy](http://www.w3.org/ns/prov#wasGeneratedBy) |
  1. lingpy-rcParams: lingpy-rcParams.json
  2. python: 3.9.9
  3. python-packages: requirements.txt
+[dcat:accessURL](http://www.w3.org/ns/dcat#accessURL) | https://github.com/lexibank/bodtkhobwa +[prov:wasDerivedFrom](http://www.w3.org/ns/prov#wasDerivedFrom) |
  1. lexibank/bodtkhobwa v3.1.0
  2. Glottolog v5.0
  3. Concepticon v3.2.0
  4. CLTS v2.3.0
+[prov:wasGeneratedBy](http://www.w3.org/ns/prov#wasGeneratedBy) |
  1. lingpy-rcParams: lingpy-rcParams.json
  2. python: 3.12.4
  3. python-packages: requirements.txt
[rdf:ID](http://www.w3.org/1999/02/22-rdf-syntax-ns#ID) | bodtkhobwa [rdf:type](http://www.w3.org/1999/02/22-rdf-syntax-ns#type) | http://www.w3.org/ns/dcat#Distribution @@ -64,8 +64,8 @@ Name/Property | Datatype | Description `Glottolog_Name` | `string` | [ISO639P3code](http://cldf.clld.org/v1.0/terms.rdf#iso639P3code) | `string` | [Macroarea](http://cldf.clld.org/v1.0/terms.rdf#macroarea) | `string` | -[Latitude](http://cldf.clld.org/v1.0/terms.rdf#latitude) | `decimal` | -[Longitude](http://cldf.clld.org/v1.0/terms.rdf#longitude) | `decimal` | +[Latitude](http://cldf.clld.org/v1.0/terms.rdf#latitude) | `decimal`
≥ -90
≤ 90 | +[Longitude](http://cldf.clld.org/v1.0/terms.rdf#longitude) | `decimal`
≥ -180
≤ 180 | `Family` | `string` | `SubGroup` | `string` | diff --git a/cldf/cldf-metadata.json b/cldf/cldf-metadata.json index fc86288..ab680fd 100644 --- a/cldf/cldf-metadata.json +++ b/cldf/cldf-metadata.json @@ -11,30 +11,30 @@ "dc:related": null, "dc:source": "sources.bib", "dc:title": "CLDF dataset derived from Bodt's \"Lexical Cognates in Western Kho-Bwa\" from 2019", - "dcat:accessURL": "https://github.com/lingpy/bodtkhobwa/", + "dcat:accessURL": "https://github.com/lexibank/bodtkhobwa", "prov:wasDerivedFrom": [ { - "rdf:about": "https://github.com/lingpy/bodtkhobwa/", + "rdf:about": "https://github.com/lexibank/bodtkhobwa", "rdf:type": "prov:Entity", - "dc:created": "v3.0.1", + "dc:created": "v3.1.0", "dc:title": "Repository" }, { "rdf:about": "https://github.com/glottolog/glottolog", "rdf:type": "prov:Entity", - "dc:created": "v4.5", + "dc:created": "v5.0", "dc:title": "Glottolog" }, { "rdf:about": "https://github.com/concepticon/concepticon-data", "rdf:type": "prov:Entity", - "dc:created": "v2.5.0", + "dc:created": "v3.2.0", "dc:title": "Concepticon" }, { - "rdf:about": "https://github.com/cldf-clts/clts/", + "rdf:about": "https://github.com/cldf-clts/clts", "rdf:type": "prov:Entity", - "dc:created": "v2.1.0-26-gb12a7df", + "dc:created": "v2.3.0", "dc:title": "CLTS" } ], @@ -45,7 +45,7 @@ }, { "dc:title": "python", - "dc:description": "3.9.9" + "dc:description": "3.12.4" }, { "dc:title": "python-packages", @@ -54,9 +54,6 @@ ], "rdf:ID": "bodtkhobwa", "rdf:type": "http://www.w3.org/ns/dcat#Distribution", - "dialect": { - "commentPrefix": null - }, "tables": [ { "dc:conformsTo": "http://cldf.clld.org/v1.0/terms.rdf#FormTable", @@ -183,7 +180,7 @@ { "datatype": "string", "propertyUrl": "http://cldf.clld.org/v1.0/terms.rdf#glottocode", - "valueUrl": "http://glottolog.org/resource/languoid/id/{glottolog_id}", + "valueUrl": "http://glottolog.org/resource/languoid/id/{Glottocode}", "name": "Glottocode" }, { @@ -253,7 +250,7 @@ { "datatype": "string", "propertyUrl": "http://cldf.clld.org/v1.0/terms.rdf#concepticonReference", - "valueUrl": "http://concepticon.clld.org/parameters/{concepticon_id}", + "valueUrl": "http://concepticon.clld.org/parameters/{Concepticon_ID}", "name": "Concepticon_ID" }, { diff --git a/cldf/lingpy-rcParams.json b/cldf/lingpy-rcParams.json index 63046ed..77198a7 100644 --- a/cldf/lingpy-rcParams.json +++ b/cldf/lingpy-rcParams.json @@ -64,7 +64,7 @@ 10, 10 ], - "filename": "lingpy-2022-03-16", + "filename": "lingpy-2024-07-08", "gap_symbol": "-", "gap_weight": 0.5, "gop": -2, @@ -123,7 +123,7 @@ "scorer": {}, "sonar": true, "stress": "\u02c8\u02cc'", - "timestamp": "2022-03-16 22:07", + "timestamp": "2024-07-08 14:08", "tones": "\u00b9\u00b2\u00b3\u2074\u2075\u2076\u2077\u2078\u2079\u2070\u2081\u2082\u2083\u2084\u2085\u2086\u2087\u2088\u2089\u20800123456789\u02e5\u02e6\u02e7\u02e8\u02e9\u02ea\u02eb-\ua708-\ua709-\ua70a-\ua70b-\ua70c-\ua70d-\ua70e-\ua70f-\ua710-\ua711-\ua712-\ua713-\ua714-\ua715-\ua716-\ua717-\ua718-\ua719-\ua71a-\ua700-\ua701-\ua702-\ua703-\ua704-\ua705-\ua706-\ua707", "tree_calc": "neighbor", "unique_sequences": true, diff --git a/cldf/requirements.txt b/cldf/requirements.txt index cd6d108..ecf1a13 100644 --- a/cldf/requirements.txt +++ b/cldf/requirements.txt @@ -1,56 +1,52 @@ appdirs==1.4.4 -bs4==0.0.1 -certifi==2021.10.8 -cldfbench==1.9.0 -cldfcatalog==1.4.0 -cldfzenodo==0.2.0 -clldutils==3.10.1 -colorlog==6.6.0 -csvw==2.0.0 -gitdb==4.0.9 -greenlet==1.1.2 -html5lib==1.1 -idna==3.3 -igraph==0.9.9 -iniconfig==1.1.1 +attrs==23.2.0 +Babel==2.15.0 +bibtexparser==2.0.0b7 +bs4==0.0.2 +certifi==2024.7.4 +cldfbench==1.14.0 +cldfcatalog==1.5.1 +cldfzenodo==2.1.1 +clldutils==3.22.2 +colorama==0.4.6 +colorlog==6.8.2 +csvw==3.3.0 +gitdb==4.0.11 +idna==3.7 isodate==0.6.1 -lingpy==2.6.9 -Markdown==3.3.6 -nameparser==1.1.1 -networkx==2.6.3 -newick==1.3.2 -numpy==1.22.2 -openpyxl==3.0.9 -packaging==21.3 -pluggy==1.0.0 -purl==1.6 -py==1.11.0 +jsonschema==4.22.0 +lingpy==2.6.13 +lxml==5.2.2 +Markdown==3.6 +nameparser==1.1.3 +networkx==3.3 +newick==1.9.0 +numpy==2.0.0 +openpyxl==3.1.5 +packaging==24.1 pybtex==0.24.0 -pycldf==1.25.1 -pyclts==3.1.1 -pyconcepticon==2.8.0 -pycountry==22.1.10 --e git+https://github.com/clld/pyglottolog.git@13af3b6f070bad186768c589c7b335edc43b7a7f#egg=pyglottolog -pylexibank==3.3.0 -pysem==0.4.0 -pytest==7.0.1 -python-dateutil==2.8.2 -python-igraph==0.9.9 -regex==2022.1.18 -requests==2.27.1 +pycldf==1.38.1 +pyclts==3.2.0 +pyconcepticon==3.1.0 +pycountry==24.6.1 +pyglottolog==3.13.0 +pylatexenc==2.10 +pylexibank==3.5.0 +python-dateutil==2.9.0.post0 +rdflib==7.0.0 +referencing==0.35.1 +regex==2024.5.15 +requests==2.32.3 rfc3986==1.5.0 -segments==2.2.0 +segments==2.2.1 six==1.16.0 -smmap==5.0.0 -soupsieve==2.3.1 -SQLAlchemy==1.4.31 -tabulate==0.8.9 -termcolor==1.1.0 -texttable==1.6.4 -tqdm==4.62.3 +smmap==5.0.1 +soupsieve==2.5 +SQLAlchemy==1.4.52 +tabulate==0.9.0 +termcolor==2.4.0 +tqdm==4.66.4 uritemplate==4.1.1 -urllib3==1.26.8 -webencodings==0.5.1 +urllib3==2.2.2 xlrd==2.0.1 -zenodoclient==0.4.1 -zipp==3.7.0 \ No newline at end of file +zenodoclient==0.5.1 \ No newline at end of file diff --git a/cldf/sources.bib b/cldf/sources.bib index 92b3eaa..5c9357d 100644 --- a/cldf/sources.bib +++ b/cldf/sources.bib @@ -2,7 +2,7 @@ @article{Bodt2019 author = {Bodt, Timotheus A. and List, Johann-Mattis}, journal = {Papers in Historical Phonology}, number = {1}, - pages = {22-44}, + pages = {22--44}, title = {Testing the predictive strength of the comparative method: An ongoing experiment on unattested words in Western Kho-Bwa langauges}, volume = {4}, year = {2019} diff --git a/lexibank_bodtkhobwa.py b/lexibank_bodtkhobwa.py index c055f93..83afd9d 100644 --- a/lexibank_bodtkhobwa.py +++ b/lexibank_bodtkhobwa.py @@ -31,6 +31,7 @@ class Dataset(pylexibank.Dataset): language_class = CustomLanguage cross_concept_cognates = True lexeme_class = CustomLexeme + writer_options = dict(keep_languages=False, keep_parameters=False) def cmd_makecldf(self, args): diff --git a/raw/bodt-khobwa-cleaned.tsv b/raw/bodt-khobwa-cleaned.tsv index 9662eac..6689129 100644 --- a/raw/bodt-khobwa-cleaned.tsv +++ b/raw/bodt-khobwa-cleaned.tsv @@ -4431,9 +4431,9 @@ ID ALIGNMENT COGIDS CONCEPT CONCEPT_IN_SOURCE CROSSIDS DOCULECT FORM MORPHEMES N # 4857 ʦ ɔ p step onto 938 Duhumbi ʦɔp STEP 938 i n c ʦ ɔ p ʦɔp # -3955 ts ɐ k 1075 stich stich 1075 Khoina ʦɐk STICH 1075 i n c ts ɐ k ʦɐk -3959 ts~ʨ/ʨ a k 1075 stich stich 1075 Rupa ʦak STICH 1075 i n c ts~ʨ/ʨ a k ʦak ~ ʨak -3960 tɕ a k 1075 stich stich 1075 Shergaon ʨak STICH 1075 i n c tɕ a k ʨak +3955 ts ɐ k 1075 stich stitch 1075 Khoina ʦɐk STITCH 1075 i n c ts ɐ k ʦɐk +3959 ts~ʨ/ʨ a k 1075 stich stitch 1075 Rupa ʦak STITCH 1075 i n c ts~ʨ/ʨ a k ʦak ~ ʨak +3960 tɕ a k 1075 stich stitch 1075 Shergaon ʨak STITCH 1075 i n c tɕ a k ʨak # 3961 kʰ aj 1076 stick stick 1076 Khispi kʰaj STICK 1076 i n kʰ aj kʰaj 3962 kʰ ɛj 1076 stick stick 1076 Duhumbi kʰɛj STICK 1076 i n kʰ ɛj kʰɛj diff --git a/raw/sources.bib b/raw/sources.bib index 4d029ca..e8c7f04 100644 --- a/raw/sources.bib +++ b/raw/sources.bib @@ -4,6 +4,6 @@ @Article{Bodt2019 journal = {Papers in Historical Phonology}, year = {2019}, number = {1}, - pages = {22-44}, + pages = {22--44}, volume = {4}, }