diff --git a/.github/workflows/cldf-validation.yml b/.github/workflows/cldf-validation.yml
index 62c167f..f107ba4 100644
--- a/.github/workflows/cldf-validation.yml
+++ b/.github/workflows/cldf-validation.yml
@@ -12,7 +12,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
- python-version: [3.6]
+ python-version: ["3.10", 3.11, 3.12]
steps:
- uses: actions/checkout@v2
diff --git a/.zenodo.json b/.zenodo.json
index 5224158..2b8e6b1 100644
--- a/.zenodo.json
+++ b/.zenodo.json
@@ -8,20 +8,15 @@
"creators": [
{
"name": "Timotheus A. Bodt"
+ },
+ {
+ "name": "Johann-Mattis List"
}
],
"contributors": [
{
"name": "Timotheus A. Bodt",
"type": "RightsHolder"
- },
- {
- "name": "Timotheus A. Bodt",
- "type": "DataCurator"
- },
- {
- "name": "Johann-Mattis List",
- "type": "Other"
}
],
"communities": [
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index 4e6df7e..5e4045f 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -2,6 +2,5 @@
Name | GitHub user | Description |Role
--- | --- | --- | ---
-Timotheus A. Bodt | | maintainer | Author, RightsHolder, DataCurator
-Johann-Mattis List | @LinguList | maintainer | Other
-
+Timotheus A. Bodt | @monpasang | Data curation | Author, RightsHolder
+Johann-Mattis List | @LinguList | maintainer | Author
diff --git a/README.md b/README.md
index 2da8f71..e45383e 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
# CLDF dataset derived from Bodt's "Lexical Cognates in Western Kho-Bwa" from 2019
-[![CLDF validation](https://github.com/lingpy/bodtkhobwa//workflows/CLDF-validation/badge.svg)](https://github.com/lingpy/bodtkhobwa//actions?query=workflow%3ACLDF-validation)
+[![CLDF validation](https://github.com/lexibank/bodtkhobwa/workflows/CLDF-validation/badge.svg)](https://github.com/lexibank/bodtkhobwa/actions?query=workflow%3ACLDF-validation)
## How to cite
@@ -50,15 +50,15 @@ The collection of the data was funded by:
## Statistics
-[![CLDF validation](https://github.com/lingpy/bodtkhobwa//workflows/CLDF-validation/badge.svg)](https://github.com/lingpy/bodtkhobwa//actions?query=workflow%3ACLDF-validation)
+[![CLDF validation](https://github.com/lexibank/bodtkhobwa/workflows/CLDF-validation/badge.svg)](https://github.com/lexibank/bodtkhobwa/actions?query=workflow%3ACLDF-validation)
![Glottolog: 100%](https://img.shields.io/badge/Glottolog-100%25-brightgreen.svg "Glottolog: 100%")
![Concepticon: 90%](https://img.shields.io/badge/Concepticon-90%25-green.svg "Concepticon: 90%")
![Source: 100%](https://img.shields.io/badge/Source-100%25-brightgreen.svg "Source: 100%")
![BIPA: 100%](https://img.shields.io/badge/BIPA-100%25-brightgreen.svg "BIPA: 100%")
![CLTS SoundClass: 100%](https://img.shields.io/badge/CLTS%20SoundClass-100%25-brightgreen.svg "CLTS SoundClass: 100%")
-- **Varieties:** 8
-- **Concepts:** 662
+- **Varieties:** 8 (linked to 8 different Glottocodes)
+- **Concepts:** 662 (linked to 542 different Concepticon concept sets)
- **Lexemes:** 4,720
- **Sources:** 1
- **Synonymy:** 1.14
@@ -66,16 +66,15 @@ The collection of the data was funded by:
- **Cognate Diversity:** 0.48
- **Invalid lexemes:** 0
- **Tokens:** 20,415
-- **Segments:** 232 (0 BIPA errors, 0 CTLS sound class errors, 232 CLTS modified)
+- **Segments:** 232 (0 BIPA errors, 0 CLTS sound class errors, 232 CLTS modified)
- **Inventory size (avg):** 86.62
# Contributors
Name | GitHub user | Description |Role
--- | --- | --- | ---
-Timotheus A. Bodt | | maintainer | Author, RightsHolder, DataCurator
-Johann-Mattis List | @LinguList | maintainer | Other
-
+Timotheus A. Bodt | @monpasang | Data curation | Author, RightsHolder
+Johann-Mattis List | @LinguList | maintainer | Author
diff --git a/cldf/README.md b/cldf/README.md
index 2047eba..4987ef5 100644
--- a/cldf/README.md
+++ b/cldf/README.md
@@ -12,9 +12,9 @@ property | value
[dc:conformsTo](http://purl.org/dc/terms/conformsTo) | [CLDF Wordlist](http://cldf.clld.org/v1.0/terms.rdf#Wordlist)
[dc:format](http://purl.org/dc/terms/format) |
- http://concepticon.clld.org/contributions/Bodt-2019-664
[dc:license](http://purl.org/dc/terms/license) | https://creativecommons.org/licenses/by/4.0/
-[dcat:accessURL](http://www.w3.org/ns/dcat#accessURL) | https://github.com/lingpy/bodtkhobwa/
-[prov:wasDerivedFrom](http://www.w3.org/ns/prov#wasDerivedFrom) | - lingpy/bodtkhobwa/ v3.0.1
- Glottolog v4.5
- Concepticon v2.5.0
- CLTS v2.1.0-26-gb12a7df
-[prov:wasGeneratedBy](http://www.w3.org/ns/prov#wasGeneratedBy) | - lingpy-rcParams: lingpy-rcParams.json
- python: 3.9.9
- python-packages: requirements.txt
+[dcat:accessURL](http://www.w3.org/ns/dcat#accessURL) | https://github.com/lexibank/bodtkhobwa
+[prov:wasDerivedFrom](http://www.w3.org/ns/prov#wasDerivedFrom) | - lexibank/bodtkhobwa v3.1.0
- Glottolog v5.0
- Concepticon v3.2.0
- CLTS v2.3.0
+[prov:wasGeneratedBy](http://www.w3.org/ns/prov#wasGeneratedBy) | - lingpy-rcParams: lingpy-rcParams.json
- python: 3.12.4
- python-packages: requirements.txt
[rdf:ID](http://www.w3.org/1999/02/22-rdf-syntax-ns#ID) | bodtkhobwa
[rdf:type](http://www.w3.org/1999/02/22-rdf-syntax-ns#type) | http://www.w3.org/ns/dcat#Distribution
@@ -64,8 +64,8 @@ Name/Property | Datatype | Description
`Glottolog_Name` | `string` |
[ISO639P3code](http://cldf.clld.org/v1.0/terms.rdf#iso639P3code) | `string` |
[Macroarea](http://cldf.clld.org/v1.0/terms.rdf#macroarea) | `string` |
-[Latitude](http://cldf.clld.org/v1.0/terms.rdf#latitude) | `decimal` |
-[Longitude](http://cldf.clld.org/v1.0/terms.rdf#longitude) | `decimal` |
+[Latitude](http://cldf.clld.org/v1.0/terms.rdf#latitude) | `decimal`
≥ -90
≤ 90 |
+[Longitude](http://cldf.clld.org/v1.0/terms.rdf#longitude) | `decimal`
≥ -180
≤ 180 |
`Family` | `string` |
`SubGroup` | `string` |
diff --git a/cldf/cldf-metadata.json b/cldf/cldf-metadata.json
index fc86288..ab680fd 100644
--- a/cldf/cldf-metadata.json
+++ b/cldf/cldf-metadata.json
@@ -11,30 +11,30 @@
"dc:related": null,
"dc:source": "sources.bib",
"dc:title": "CLDF dataset derived from Bodt's \"Lexical Cognates in Western Kho-Bwa\" from 2019",
- "dcat:accessURL": "https://github.com/lingpy/bodtkhobwa/",
+ "dcat:accessURL": "https://github.com/lexibank/bodtkhobwa",
"prov:wasDerivedFrom": [
{
- "rdf:about": "https://github.com/lingpy/bodtkhobwa/",
+ "rdf:about": "https://github.com/lexibank/bodtkhobwa",
"rdf:type": "prov:Entity",
- "dc:created": "v3.0.1",
+ "dc:created": "v3.1.0",
"dc:title": "Repository"
},
{
"rdf:about": "https://github.com/glottolog/glottolog",
"rdf:type": "prov:Entity",
- "dc:created": "v4.5",
+ "dc:created": "v5.0",
"dc:title": "Glottolog"
},
{
"rdf:about": "https://github.com/concepticon/concepticon-data",
"rdf:type": "prov:Entity",
- "dc:created": "v2.5.0",
+ "dc:created": "v3.2.0",
"dc:title": "Concepticon"
},
{
- "rdf:about": "https://github.com/cldf-clts/clts/",
+ "rdf:about": "https://github.com/cldf-clts/clts",
"rdf:type": "prov:Entity",
- "dc:created": "v2.1.0-26-gb12a7df",
+ "dc:created": "v2.3.0",
"dc:title": "CLTS"
}
],
@@ -45,7 +45,7 @@
},
{
"dc:title": "python",
- "dc:description": "3.9.9"
+ "dc:description": "3.12.4"
},
{
"dc:title": "python-packages",
@@ -54,9 +54,6 @@
],
"rdf:ID": "bodtkhobwa",
"rdf:type": "http://www.w3.org/ns/dcat#Distribution",
- "dialect": {
- "commentPrefix": null
- },
"tables": [
{
"dc:conformsTo": "http://cldf.clld.org/v1.0/terms.rdf#FormTable",
@@ -183,7 +180,7 @@
{
"datatype": "string",
"propertyUrl": "http://cldf.clld.org/v1.0/terms.rdf#glottocode",
- "valueUrl": "http://glottolog.org/resource/languoid/id/{glottolog_id}",
+ "valueUrl": "http://glottolog.org/resource/languoid/id/{Glottocode}",
"name": "Glottocode"
},
{
@@ -253,7 +250,7 @@
{
"datatype": "string",
"propertyUrl": "http://cldf.clld.org/v1.0/terms.rdf#concepticonReference",
- "valueUrl": "http://concepticon.clld.org/parameters/{concepticon_id}",
+ "valueUrl": "http://concepticon.clld.org/parameters/{Concepticon_ID}",
"name": "Concepticon_ID"
},
{
diff --git a/cldf/lingpy-rcParams.json b/cldf/lingpy-rcParams.json
index 63046ed..77198a7 100644
--- a/cldf/lingpy-rcParams.json
+++ b/cldf/lingpy-rcParams.json
@@ -64,7 +64,7 @@
10,
10
],
- "filename": "lingpy-2022-03-16",
+ "filename": "lingpy-2024-07-08",
"gap_symbol": "-",
"gap_weight": 0.5,
"gop": -2,
@@ -123,7 +123,7 @@
"scorer": {},
"sonar": true,
"stress": "\u02c8\u02cc'",
- "timestamp": "2022-03-16 22:07",
+ "timestamp": "2024-07-08 14:08",
"tones": "\u00b9\u00b2\u00b3\u2074\u2075\u2076\u2077\u2078\u2079\u2070\u2081\u2082\u2083\u2084\u2085\u2086\u2087\u2088\u2089\u20800123456789\u02e5\u02e6\u02e7\u02e8\u02e9\u02ea\u02eb-\ua708-\ua709-\ua70a-\ua70b-\ua70c-\ua70d-\ua70e-\ua70f-\ua710-\ua711-\ua712-\ua713-\ua714-\ua715-\ua716-\ua717-\ua718-\ua719-\ua71a-\ua700-\ua701-\ua702-\ua703-\ua704-\ua705-\ua706-\ua707",
"tree_calc": "neighbor",
"unique_sequences": true,
diff --git a/cldf/requirements.txt b/cldf/requirements.txt
index cd6d108..ecf1a13 100644
--- a/cldf/requirements.txt
+++ b/cldf/requirements.txt
@@ -1,56 +1,52 @@
appdirs==1.4.4
-bs4==0.0.1
-certifi==2021.10.8
-cldfbench==1.9.0
-cldfcatalog==1.4.0
-cldfzenodo==0.2.0
-clldutils==3.10.1
-colorlog==6.6.0
-csvw==2.0.0
-gitdb==4.0.9
-greenlet==1.1.2
-html5lib==1.1
-idna==3.3
-igraph==0.9.9
-iniconfig==1.1.1
+attrs==23.2.0
+Babel==2.15.0
+bibtexparser==2.0.0b7
+bs4==0.0.2
+certifi==2024.7.4
+cldfbench==1.14.0
+cldfcatalog==1.5.1
+cldfzenodo==2.1.1
+clldutils==3.22.2
+colorama==0.4.6
+colorlog==6.8.2
+csvw==3.3.0
+gitdb==4.0.11
+idna==3.7
isodate==0.6.1
-lingpy==2.6.9
-Markdown==3.3.6
-nameparser==1.1.1
-networkx==2.6.3
-newick==1.3.2
-numpy==1.22.2
-openpyxl==3.0.9
-packaging==21.3
-pluggy==1.0.0
-purl==1.6
-py==1.11.0
+jsonschema==4.22.0
+lingpy==2.6.13
+lxml==5.2.2
+Markdown==3.6
+nameparser==1.1.3
+networkx==3.3
+newick==1.9.0
+numpy==2.0.0
+openpyxl==3.1.5
+packaging==24.1
pybtex==0.24.0
-pycldf==1.25.1
-pyclts==3.1.1
-pyconcepticon==2.8.0
-pycountry==22.1.10
--e git+https://github.com/clld/pyglottolog.git@13af3b6f070bad186768c589c7b335edc43b7a7f#egg=pyglottolog
-pylexibank==3.3.0
-pysem==0.4.0
-pytest==7.0.1
-python-dateutil==2.8.2
-python-igraph==0.9.9
-regex==2022.1.18
-requests==2.27.1
+pycldf==1.38.1
+pyclts==3.2.0
+pyconcepticon==3.1.0
+pycountry==24.6.1
+pyglottolog==3.13.0
+pylatexenc==2.10
+pylexibank==3.5.0
+python-dateutil==2.9.0.post0
+rdflib==7.0.0
+referencing==0.35.1
+regex==2024.5.15
+requests==2.32.3
rfc3986==1.5.0
-segments==2.2.0
+segments==2.2.1
six==1.16.0
-smmap==5.0.0
-soupsieve==2.3.1
-SQLAlchemy==1.4.31
-tabulate==0.8.9
-termcolor==1.1.0
-texttable==1.6.4
-tqdm==4.62.3
+smmap==5.0.1
+soupsieve==2.5
+SQLAlchemy==1.4.52
+tabulate==0.9.0
+termcolor==2.4.0
+tqdm==4.66.4
uritemplate==4.1.1
-urllib3==1.26.8
-webencodings==0.5.1
+urllib3==2.2.2
xlrd==2.0.1
-zenodoclient==0.4.1
-zipp==3.7.0
\ No newline at end of file
+zenodoclient==0.5.1
\ No newline at end of file
diff --git a/cldf/sources.bib b/cldf/sources.bib
index 92b3eaa..5c9357d 100644
--- a/cldf/sources.bib
+++ b/cldf/sources.bib
@@ -2,7 +2,7 @@ @article{Bodt2019
author = {Bodt, Timotheus A. and List, Johann-Mattis},
journal = {Papers in Historical Phonology},
number = {1},
- pages = {22-44},
+ pages = {22--44},
title = {Testing the predictive strength of the comparative method: An ongoing experiment on unattested words in Western Kho-Bwa langauges},
volume = {4},
year = {2019}
diff --git a/lexibank_bodtkhobwa.py b/lexibank_bodtkhobwa.py
index c055f93..83afd9d 100644
--- a/lexibank_bodtkhobwa.py
+++ b/lexibank_bodtkhobwa.py
@@ -31,6 +31,7 @@ class Dataset(pylexibank.Dataset):
language_class = CustomLanguage
cross_concept_cognates = True
lexeme_class = CustomLexeme
+ writer_options = dict(keep_languages=False, keep_parameters=False)
def cmd_makecldf(self, args):
diff --git a/raw/bodt-khobwa-cleaned.tsv b/raw/bodt-khobwa-cleaned.tsv
index 9662eac..6689129 100644
--- a/raw/bodt-khobwa-cleaned.tsv
+++ b/raw/bodt-khobwa-cleaned.tsv
@@ -4431,9 +4431,9 @@ ID ALIGNMENT COGIDS CONCEPT CONCEPT_IN_SOURCE CROSSIDS DOCULECT FORM MORPHEMES N
#
4857 ʦ ɔ p step onto 938 Duhumbi ʦɔp STEP 938 i n c ʦ ɔ p ʦɔp
#
-3955 ts ɐ k 1075 stich stich 1075 Khoina ʦɐk STICH 1075 i n c ts ɐ k ʦɐk
-3959 ts~ʨ/ʨ a k 1075 stich stich 1075 Rupa ʦak STICH 1075 i n c ts~ʨ/ʨ a k ʦak ~ ʨak
-3960 tɕ a k 1075 stich stich 1075 Shergaon ʨak STICH 1075 i n c tɕ a k ʨak
+3955 ts ɐ k 1075 stich stitch 1075 Khoina ʦɐk STITCH 1075 i n c ts ɐ k ʦɐk
+3959 ts~ʨ/ʨ a k 1075 stich stitch 1075 Rupa ʦak STITCH 1075 i n c ts~ʨ/ʨ a k ʦak ~ ʨak
+3960 tɕ a k 1075 stich stitch 1075 Shergaon ʨak STITCH 1075 i n c tɕ a k ʨak
#
3961 kʰ aj 1076 stick stick 1076 Khispi kʰaj STICK 1076 i n kʰ aj kʰaj
3962 kʰ ɛj 1076 stick stick 1076 Duhumbi kʰɛj STICK 1076 i n kʰ ɛj kʰɛj
diff --git a/raw/sources.bib b/raw/sources.bib
index 4d029ca..e8c7f04 100644
--- a/raw/sources.bib
+++ b/raw/sources.bib
@@ -4,6 +4,6 @@ @Article{Bodt2019
journal = {Papers in Historical Phonology},
year = {2019},
number = {1},
- pages = {22-44},
+ pages = {22--44},
volume = {4},
}