From e959611be803921b8dfdb6db0aa934cda35478cd Mon Sep 17 00:00:00 2001 From: chrzyki Date: Thu, 1 Aug 2024 10:29:45 +0200 Subject: [PATCH] Prepare release --- .github/workflows/cldf-validation.yml | 2 +- .zenodo.json | 6 +- CONTRIBUTORS.md | 4 +- README.md | 12 +-- cldf/README.md | 120 ++++++++++++++++++++++++++ cldf/cldf-metadata.json | 17 ++-- cldf/languages.csv | 2 +- cldf/lingpy-rcParams.json | 4 +- cldf/parameters.csv | 8 +- cldf/requirements.txt | 94 +++++++++++--------- 10 files changed, 201 insertions(+), 68 deletions(-) create mode 100644 cldf/README.md diff --git a/.github/workflows/cldf-validation.yml b/.github/workflows/cldf-validation.yml index 62c167f..b2f938e 100644 --- a/.github/workflows/cldf-validation.yml +++ b/.github/workflows/cldf-validation.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.6] + python-version: [3.12] steps: - uses: actions/checkout@v2 diff --git a/.zenodo.json b/.zenodo.json index 60bd00c..990902e 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -16,7 +16,11 @@ "contributors": [ { "name": "Christoph Rzymski", - "type": "Other" + "type": "Editor" + }, + { + "name": "Johann-Mattis List", + "type": "Editor" } ], "communities": [ diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 9e45ec8..2a189b6 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -4,4 +4,6 @@ Name | GitHub user | Description | Role --- | --- | --- | --- Robinson, Laura C. | | data collector | Author Holton, Gary | | data collector | Author -Christoph Rzymski | @chrzyki | maintainer, patron | Other +Christoph Rzymski | @chrzyki | maintainer, patron | Editor +Johann-Mattis List | @LinguList | maintainer, profile | Editor + diff --git a/README.md b/README.md index f671934..5039e5a 100644 --- a/README.md +++ b/README.md @@ -29,13 +29,13 @@ Any dataset specific notes on lexibank decisions/mapping choices etc go in here. [![CLDF validation](https://github.com/lexibank/robinsonap/workflows/CLDF-validation/badge.svg)](https://github.com/lexibank/robinsonap/actions?query=workflow%3ACLDF-validation) ![Glottolog: 100%](https://img.shields.io/badge/Glottolog-100%25-brightgreen.svg "Glottolog: 100%") -![Concepticon: 98%](https://img.shields.io/badge/Concepticon-98%25-green.svg "Concepticon: 98%") +![Concepticon: 99%](https://img.shields.io/badge/Concepticon-99%25-green.svg "Concepticon: 99%") ![Source: 100%](https://img.shields.io/badge/Source-100%25-brightgreen.svg "Source: 100%") ![BIPA: 100%](https://img.shields.io/badge/BIPA-100%25-brightgreen.svg "BIPA: 100%") ![CLTS SoundClass: 100%](https://img.shields.io/badge/CLTS%20SoundClass-100%25-brightgreen.svg "CLTS SoundClass: 100%") -- **Varieties:** 13 -- **Concepts:** 398 +- **Varieties:** 13 (linked to 13 different Glottocodes) +- **Concepts:** 398 (linked to 392 different Concepticon concept sets) - **Lexemes:** 4,841 - **Sources:** 1 - **Synonymy:** 1.06 @@ -43,7 +43,7 @@ Any dataset specific notes on lexibank decisions/mapping choices etc go in here. - **Cognate Diversity:** 0.44 - **Invalid lexemes:** 0 - **Tokens:** 25,844 -- **Segments:** 49 (0 BIPA errors, 0 CTLS sound class errors, 49 CLTS modified) +- **Segments:** 49 (0 BIPA errors, 0 CLTS sound class errors, 49 CLTS modified) - **Inventory size (avg):** 27.31 # Contributors @@ -52,7 +52,9 @@ Name | GitHub user | Description | Role --- | --- | --- | --- Robinson, Laura C. | | data collector | Author Holton, Gary | | data collector | Author -Christoph Rzymski | @chrzyki | maintainer, patron | Other +Christoph Rzymski | @chrzyki | maintainer, patron | Editor +Johann-Mattis List | @LinguList | maintainer, profile | Editor + diff --git a/cldf/README.md b/cldf/README.md new file mode 100644 index 0000000..a6cb20e --- /dev/null +++ b/cldf/README.md @@ -0,0 +1,120 @@ + + +# Wordlist CLDF dataset derived from Robinson and Holton's "Internal Classification of the Alor-Pantar Language Family" from 2012 + +**CLDF Metadata**: [cldf-metadata.json](./cldf-metadata.json) + +**Sources**: [sources.bib](./sources.bib) + +property | value + --- | --- +[dc:bibliographicCitation](http://purl.org/dc/terms/bibliographicCitation) | Robinson, Laura C. and Holton, Gary (2012): Internal Classification of the Alor-Pantar Language Family Using Computational Methods Applied to the Lexicon. Language Dynamics and Change 2.2. 123-149. +[dc:conformsTo](http://purl.org/dc/terms/conformsTo) | [CLDF Wordlist](http://cldf.clld.org/v1.0/terms.rdf#Wordlist) +[dc:format](http://purl.org/dc/terms/format) |
  1. http://concepticon.clld.org/contributions/Robinson-2012-398
+[dc:identifier](http://purl.org/dc/terms/identifier) | https://doi.org/10.1163/22105832-20120201 +[dc:license](http://purl.org/dc/terms/license) | https://creativecommons.org/licenses/by/4.0/ +[dcat:accessURL](http://www.w3.org/ns/dcat#accessURL) | https://github.com/lexibank/robinsonap +[prov:wasDerivedFrom](http://www.w3.org/ns/prov#wasDerivedFrom) |
  1. lexibank/robinsonap v4.0
  2. Glottolog v5.0
  3. Concepticon v3.2.0
  4. CLTS v2.3.0
+[prov:wasGeneratedBy](http://www.w3.org/ns/prov#wasGeneratedBy) |
  1. lingpy-rcParams: lingpy-rcParams.json
  2. python: 3.12.4
  3. python-packages: requirements.txt
+[rdf:ID](http://www.w3.org/1999/02/22-rdf-syntax-ns#ID) | robinsonap +[rdf:type](http://www.w3.org/1999/02/22-rdf-syntax-ns#type) | http://www.w3.org/ns/dcat#Distribution + + +## Table [forms.csv](./forms.csv) + + +Raw lexical data item as it can be pulled out of the original datasets. + +This is the basis for creating rows in CLDF representations of the data by +- splitting the lexical item into forms +- cleaning the forms +- potentially tokenizing the form + + +property | value + --- | --- +[dc:conformsTo](http://purl.org/dc/terms/conformsTo) | [CLDF FormTable](http://cldf.clld.org/v1.0/terms.rdf#FormTable) +[dc:extent](http://purl.org/dc/terms/extent) | 4841 + + +### Columns + +Name/Property | Datatype | Description + --- | --- | --- +[ID](http://cldf.clld.org/v1.0/terms.rdf#id) | `string` | Primary key +[Local_ID](http://purl.org/dc/terms/identifier) | `string` | +[Language_ID](http://cldf.clld.org/v1.0/terms.rdf#languageReference) | `string` | References [languages.csv::ID](#table-languagescsv) +[Parameter_ID](http://cldf.clld.org/v1.0/terms.rdf#parameterReference) | `string` | References [parameters.csv::ID](#table-parameterscsv) +[Value](http://cldf.clld.org/v1.0/terms.rdf#value) | `string` | +[Form](http://cldf.clld.org/v1.0/terms.rdf#form) | `string` | +[Segments](http://cldf.clld.org/v1.0/terms.rdf#segments) | list of `string` (separated by ` `) | +[Comment](http://cldf.clld.org/v1.0/terms.rdf#comment) | `string` | +[Source](http://cldf.clld.org/v1.0/terms.rdf#source) | list of `string` (separated by `;`) | References [sources.bib::BibTeX-key](./sources.bib) +`Cognacy` | `string` | +`Loan` | `boolean` | +`Graphemes` | `string` | +`Profile` | `string` | + +## Table [languages.csv](./languages.csv) + +property | value + --- | --- +[dc:conformsTo](http://purl.org/dc/terms/conformsTo) | [CLDF LanguageTable](http://cldf.clld.org/v1.0/terms.rdf#LanguageTable) +[dc:extent](http://purl.org/dc/terms/extent) | 13 + + +### Columns + +Name/Property | Datatype | Description + --- | --- | --- +[ID](http://cldf.clld.org/v1.0/terms.rdf#id) | `string` | Primary key +[Name](http://cldf.clld.org/v1.0/terms.rdf#name) | `string` | +[Glottocode](http://cldf.clld.org/v1.0/terms.rdf#glottocode) | `string` | +`Glottolog_Name` | `string` | +[ISO639P3code](http://cldf.clld.org/v1.0/terms.rdf#iso639P3code) | `string` | +[Macroarea](http://cldf.clld.org/v1.0/terms.rdf#macroarea) | `string` | +[Latitude](http://cldf.clld.org/v1.0/terms.rdf#latitude) | `decimal`
≥ -90
≤ 90 | +[Longitude](http://cldf.clld.org/v1.0/terms.rdf#longitude) | `decimal`
≥ -180
≤ 180 | +`Family` | `string` | +`Token` | `string` | + +## Table [parameters.csv](./parameters.csv) + +property | value + --- | --- +[dc:conformsTo](http://purl.org/dc/terms/conformsTo) | [CLDF ParameterTable](http://cldf.clld.org/v1.0/terms.rdf#ParameterTable) +[dc:extent](http://purl.org/dc/terms/extent) | 398 + + +### Columns + +Name/Property | Datatype | Description + --- | --- | --- +[ID](http://cldf.clld.org/v1.0/terms.rdf#id) | `string` | Primary key +[Name](http://cldf.clld.org/v1.0/terms.rdf#name) | `string` | +[Concepticon_ID](http://cldf.clld.org/v1.0/terms.rdf#concepticonReference) | `string` | +`Concepticon_Gloss` | `string` | + +## Table [cognates.csv](./cognates.csv) + +property | value + --- | --- +[dc:conformsTo](http://purl.org/dc/terms/conformsTo) | [CLDF CognateTable](http://cldf.clld.org/v1.0/terms.rdf#CognateTable) +[dc:extent](http://purl.org/dc/terms/extent) | 3902 + + +### Columns + +Name/Property | Datatype | Description + --- | --- | --- +[ID](http://cldf.clld.org/v1.0/terms.rdf#id) | `string` | Primary key +[Form_ID](http://cldf.clld.org/v1.0/terms.rdf#formReference) | `string` | References [forms.csv::ID](#table-formscsv) +[Form](http://linguistics-ontology.org/gold/2010/FormUnit) | `string` | +[Cognateset_ID](http://cldf.clld.org/v1.0/terms.rdf#cognatesetReference) | `string` | +`Doubt` | `boolean` | +`Cognate_Detection_Method` | `string` | +[Source](http://cldf.clld.org/v1.0/terms.rdf#source) | list of `string` (separated by `;`) | References [sources.bib::BibTeX-key](./sources.bib) +[Alignment](http://cldf.clld.org/v1.0/terms.rdf#alignment) | list of `string` (separated by ` `) | +`Alignment_Method` | `string` | +`Alignment_Source` | `string` | + diff --git a/cldf/cldf-metadata.json b/cldf/cldf-metadata.json index 85931c4..644315f 100644 --- a/cldf/cldf-metadata.json +++ b/cldf/cldf-metadata.json @@ -17,25 +17,25 @@ { "rdf:about": "https://github.com/lexibank/robinsonap", "rdf:type": "prov:Entity", - "dc:created": "v3.0-16-g5f5b9db", + "dc:created": "v4.0", "dc:title": "Repository" }, { "rdf:about": "https://github.com/glottolog/glottolog", "rdf:type": "prov:Entity", - "dc:created": "v4.4", + "dc:created": "v5.0", "dc:title": "Glottolog" }, { "rdf:about": "https://github.com/concepticon/concepticon-data", "rdf:type": "prov:Entity", - "dc:created": "v2.5.0", + "dc:created": "v3.2.0", "dc:title": "Concepticon" }, { "rdf:about": "https://github.com/cldf-clts/clts", "rdf:type": "prov:Entity", - "dc:created": "v2.1.0", + "dc:created": "v2.3.0", "dc:title": "CLTS" } ], @@ -46,7 +46,7 @@ }, { "dc:title": "python", - "dc:description": "3.8.10" + "dc:description": "3.12.4" }, { "dc:title": "python-packages", @@ -55,9 +55,6 @@ ], "rdf:ID": "robinsonap", "rdf:type": "http://www.w3.org/ns/dcat#Distribution", - "dialect": { - "commentPrefix": null - }, "tables": [ { "dc:conformsTo": "http://cldf.clld.org/v1.0/terms.rdf#FormTable", @@ -181,7 +178,7 @@ { "datatype": "string", "propertyUrl": "http://cldf.clld.org/v1.0/terms.rdf#glottocode", - "valueUrl": "http://glottolog.org/resource/languoid/id/{glottolog_id}", + "valueUrl": "http://glottolog.org/resource/languoid/id/{Glottocode}", "name": "Glottocode" }, { @@ -251,7 +248,7 @@ { "datatype": "string", "propertyUrl": "http://cldf.clld.org/v1.0/terms.rdf#concepticonReference", - "valueUrl": "http://concepticon.clld.org/parameters/{concepticon_id}", + "valueUrl": "http://concepticon.clld.org/parameters/{Concepticon_ID}", "name": "Concepticon_ID" }, { diff --git a/cldf/languages.csv b/cldf/languages.csv index 70ce026..13898b3 100644 --- a/cldf/languages.csv +++ b/cldf/languages.csv @@ -10,5 +10,5 @@ westernpantar,Western Pantar,lamm1241,Western Pantar,lev,Papunesia,-8.52787,124. kui,Kui,kuii1254,Kui,kvd,Papunesia,,,Timor-Alor-Pantar,Ki adang,Adang,adan1251,Adang,adn,Papunesia,-8.18958,124.448,Timor-Alor-Pantar,Ad sawila,Sawila,sawi1256,Sawila,swt,Papunesia,-8.29105,125.078,Timor-Alor-Pantar,Sw -nedebang,Nedebang,nede1245,Klamu,nec,Papunesia,-8.28776,124.192,Timor-Alor-Pantar,Nd +nedebang,Nedebang,nede1245,Nedebang,nec,Papunesia,-8.28776,124.192,Timor-Alor-Pantar,Nd klon,Klon,kelo1247,Klon,kyo,Papunesia,-8.40688,124.429,Timor-Alor-Pantar,Kl diff --git a/cldf/lingpy-rcParams.json b/cldf/lingpy-rcParams.json index 4e08938..d707b91 100644 --- a/cldf/lingpy-rcParams.json +++ b/cldf/lingpy-rcParams.json @@ -64,7 +64,7 @@ 10, 10 ], - "filename": "lingpy-2021-07-22", + "filename": "lingpy-2024-08-01", "gap_symbol": "-", "gap_weight": 0.5, "gop": -2, @@ -123,7 +123,7 @@ "scorer": {}, "sonar": true, "stress": "\u02c8\u02cc'", - "timestamp": "2021-07-22 11:19", + "timestamp": "2024-08-01 10:28", "tones": "\u00b9\u00b2\u00b3\u2074\u2075\u2076\u2077\u2078\u2079\u2070\u2081\u2082\u2083\u2084\u2085\u2086\u2087\u2088\u2089\u20800123456789\u02e5\u02e6\u02e7\u02e8\u02e9\u02ea\u02eb-\ua708-\ua709-\ua70a-\ua70b-\ua70c-\ua70d-\ua70e-\ua70f-\ua710-\ua711-\ua712-\ua713-\ua714-\ua715-\ua716-\ua717-\ua718-\ua719-\ua71a-\ua700-\ua701-\ua702-\ua703-\ua704-\ua705-\ua706-\ua707", "tree_calc": "neighbor", "unique_sequences": true, diff --git a/cldf/parameters.csv b/cldf/parameters.csv index 28993de..5bda6fe 100644 --- a/cldf/parameters.csv +++ b/cldf/parameters.csv @@ -44,7 +44,7 @@ ID,Name,Concepticon_ID,Concepticon_Gloss 43_burnshine,burn/shine,2102,BURN 44_butterfly,butterfly,1791,BUTTERFLY 45_buy,buy,1869,BUY -46_callout,call out,, +46_callout,call out,715,SHOUT 47_canoe,canoe,1970,CANOE 48_cassava,cassava,925,CASSAVA 49_chaseawayexpel,chase away/expel,30,DISPEL @@ -209,7 +209,7 @@ ID,Name,Concepticon_ID,Concepticon_Gloss 208_oldersibling,older sibling,405,OLDER SIBLING 209_one,one,1493,ONE 210_onehundred,one hundred,1634,HUNDRED -211_onehundredthousand,one hundred thousand,, +211_onehundredthousand,one hundred thousand,3532,ONE HUNDRED THOUSAND 212_oven,oven,1143,OVEN 213_papaya,papaya,2445,PAPAYA 214_penis,penis,1222,PENIS @@ -240,7 +240,7 @@ ID,Name,Concepticon_ID,Concepticon_Gloss 239_salt,salt,1274,SALT 240_salty,salty,1091,SALTY 241_sand,sand,671,SAND -242_scabies,scabies,2664,SCAB +242_scabies,scabies,3172,SCABIES 243_scared,scared,3033,SCARED 244_scorpion,scorpion,1538,SCORPION 245_scratch,scratch,1436,SCRATCH @@ -307,7 +307,7 @@ ID,Name,Concepticon_ID,Concepticon_Gloss 306_thousand,thousand,1843,THOUSAND 307_three,three,492,THREE 308_thunder,thunder,1150,THUNDER -309_tinea,tinea,1189,ULCER +309_tinea,tinea,3173,TINEA 310_tobark,to bark,1206,BARKING 311_tobathe,to bathe,138,BATHE 312_tobatheachild,to bathe a child,3170,BATHE (SOMEONE) diff --git a/cldf/requirements.txt b/cldf/requirements.txt index 0267e9b..19ecbc1 100644 --- a/cldf/requirements.txt +++ b/cldf/requirements.txt @@ -1,48 +1,56 @@ appdirs==1.4.4 -bs4==0.0.1 -certifi==2021.5.30 -chardet==4.0.0 -cldfbench==1.7.1 -cldfcatalog==1.3.2 -clldutils==3.9.0 -colorlog==5.0.1 -csvw==1.11.0 -gitdb==4.0.7 -greenlet==1.1.0 -idna==2.10 -iniconfig==1.1.1 -isodate==0.6.0 -lingpy==2.6.8 -Markdown==3.3.4 -networkx==2.6.1 -newick==1.3.0 -numpy==1.21.0 -openpyxl==3.0.7 -packaging==21.0 -pluggy==0.13.1 -purl==1.6 -py==1.10.0 +attrs==23.2.0 +Babel==2.15.0 +bibtexparser==2.0.0b7 +bs4==0.0.2 +certifi==2024.7.4 +cldfbench==1.14.0 +cldfcatalog==1.5.1 +cldfzenodo==2.1.1 +clldutils==3.22.2 +colorama==0.4.6 +colorlog==6.8.2 +csvw==3.3.0 +gitdb==4.0.11 +greenlet==3.0.3 +idna==3.7 +iniconfig==2.0.0 +isodate==0.6.1 +jsonschema==4.23.0 +lingpy==2.6.13 +lxml==5.2.2 +Markdown==3.6 +nameparser==1.1.3 +networkx==3.3 +newick==1.9.0 +numpy==2.0.1 +openpyxl==3.1.5 +packaging==24.1 +pluggy==1.5.0 pybtex==0.24.0 -pycldf==1.22.0 -pyclts==3.1.1 -pyconcepticon==2.8.0 -pycountry==20.7.3 -pyglottolog==3.6.0 -pylexibank==3.2.0 -pytest==6.2.4 -regex==2021.7.6 -requests==2.25.1 +pycldf==1.38.1 +pyclts==3.2.0 +pyconcepticon==3.1.0 +pycountry==24.6.1 +pyglottolog==3.13.0 +pylatexenc==2.10 +pylexibank==3.5.0 +pytest==8.3.2 +python-dateutil==2.9.0.post0 +rdflib==7.0.0 +referencing==0.35.1 +regex==2024.7.24 +requests==2.32.3 rfc3986==1.5.0 -scipy==1.7.0 -segments==2.2.0 +segments==2.2.1 six==1.16.0 -smmap==4.0.0 -soupsieve==2.2.1 -SQLAlchemy==1.4.20 -tabulate==0.8.9 -termcolor==1.1.0 -tqdm==4.61.2 -uritemplate==3.0.1 -urllib3==1.26.6 +smmap==5.0.1 +soupsieve==2.5 +SQLAlchemy==1.4.53 +tabulate==0.9.0 +termcolor==2.4.0 +tqdm==4.66.4 +uritemplate==4.1.1 +urllib3==2.2.2 xlrd==2.0.1 -zenodoclient==0.4.1 \ No newline at end of file +zenodoclient==0.5.1 \ No newline at end of file