From b2c3487b75aab3f98606d486143d451f237a62b9 Mon Sep 17 00:00:00 2001 From: chrzyki Date: Tue, 6 Aug 2024 12:29:33 +0200 Subject: [PATCH] Prepare release --- .github/workflows/cldf-validation.yml | 2 +- .zenodo.json | 8 +-- CONTRIBUTORS.md | 14 ++-- README.md | 20 +++--- cldf/README.md | 8 +-- cldf/cldf-metadata.json | 17 ++--- cldf/languages.csv | 2 +- cldf/lingpy-rcParams.json | 4 +- cldf/requirements.txt | 94 +++++++++++++++------------ etc/orthography.tsv | 2 +- lexibank_marrisonnaga.py | 2 + 11 files changed, 88 insertions(+), 85 deletions(-) diff --git a/.github/workflows/cldf-validation.yml b/.github/workflows/cldf-validation.yml index 62c167f..b2f938e 100644 --- a/.github/workflows/cldf-validation.yml +++ b/.github/workflows/cldf-validation.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.6] + python-version: [3.12] steps: - uses: actions/checkout@v2 diff --git a/.zenodo.json b/.zenodo.json index 49a279d..aa36fee 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -7,13 +7,13 @@ ], "creators": [ { - "name": "Marrison, G. E." + "name": "Geoffrey E. Marrison" } ], "contributors": [ { "name": "Johann-Mattis List", - "type": "Other" + "type": "Editor" }, { "name": "Mei-Shin Wu", @@ -23,10 +23,6 @@ "name": "Tiago Tresoldi", "type": "Other" }, - { - "name": "STEDT", - "type": "Editor" - }, { "name": "STEDT", "type": "Distributor" diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index be1d8f7..f902834 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -1,9 +1,9 @@ # Contributors -Name | GitHub user | Description | Role ---- | --- | --- | --- -Johann-Mattis List | @LinguList | maintainer | Other -Mei-Shin Wu | @MacyL | maintainer | Other -Tiago Tresoldi | @tresoldi | help with coding | Other -STEDT | | digitization | Editor, Distributor -Marrison, G. E. | | original data collection | Author +Name | GitHub user | Description | Role +--- | --- | --- | --- +Johann-Mattis List | @LinguList | maintainer | Editor +Mei-Shin Wu | @MacyL | concepts, profile, language mapping | Other +Tiago Tresoldi | @tresoldi | help with coding | Other +STEDT | | digitization | Distributor +Geoffrey E. Marrison | | original data collection | Author diff --git a/README.md b/README.md index 218c606..11091c0 100644 --- a/README.md +++ b/README.md @@ -35,25 +35,25 @@ This dataset was digitized by the STEDT project. In order to provide a CLTS-base ![BIPA: 100%](https://img.shields.io/badge/BIPA-100%25-brightgreen.svg "BIPA: 100%") ![CLTS SoundClass: 100%](https://img.shields.io/badge/CLTS%20SoundClass-100%25-brightgreen.svg "CLTS SoundClass: 100%") -- **Varieties:** 40 -- **Concepts:** 884 +- **Varieties:** 40 (linked to 39 different Glottocodes) +- **Concepts:** 884 (linked to 827 different Concepticon concept sets) - **Lexemes:** 27,594 - **Sources:** 1 - **Synonymy:** 1.14 - **Invalid lexemes:** 0 - **Tokens:** 131,654 -- **Segments:** 123 (0 BIPA errors, 0 CTLS sound class errors, 123 CLTS modified) +- **Segments:** 123 (0 BIPA errors, 0 CLTS sound class errors, 123 CLTS modified) - **Inventory size (avg):** 40.40 # Contributors -Name | GitHub user | Description | Role ---- | --- | --- | --- -Johann-Mattis List | @LinguList | maintainer | Other -Mei-Shin Wu | @MacyL | maintainer | Other -Tiago Tresoldi | @tresoldi | help with coding | Other -STEDT | | digitization | Editor, Distributor -Marrison, G. E. | | original data collection | Author +Name | GitHub user | Description | Role +--- | --- | --- | --- +Johann-Mattis List | @LinguList | maintainer | Editor +Mei-Shin Wu | @MacyL | concepts, profile, language mapping | Other +Tiago Tresoldi | @tresoldi | help with coding | Other +STEDT | | digitization | Distributor +Geoffrey E. Marrison | | original data collection | Author diff --git a/cldf/README.md b/cldf/README.md index 85836bc..0fa9478 100644 --- a/cldf/README.md +++ b/cldf/README.md @@ -14,8 +14,8 @@ property | value [dc:identifier](http://purl.org/dc/terms/identifier) | http://stedt.berkeley.edu/~stedt-cgi/rootcanal.pl/source/GEM-CNL [dc:license](http://purl.org/dc/terms/license) | https://creativecommons.org/licenses/by/4.0/ [dcat:accessURL](http://www.w3.org/ns/dcat#accessURL) | https://github.com/lexibank/marrisonnaga -[prov:wasDerivedFrom](http://www.w3.org/ns/prov#wasDerivedFrom) |
  1. lexibank/marrisonnaga v2.0-25-gd2ff315
  2. Glottolog v4.4
  3. Concepticon v2.5.0
  4. CLTS v2.1.0
-[prov:wasGeneratedBy](http://www.w3.org/ns/prov#wasGeneratedBy) |
  1. lingpy-rcParams: lingpy-rcParams.json
  2. python: 3.8.10
  3. python-packages: requirements.txt
+[prov:wasDerivedFrom](http://www.w3.org/ns/prov#wasDerivedFrom) |
  1. lexibank/marrisonnaga v3.0
  2. Glottolog v5.0
  3. Concepticon v3.2.0
  4. CLTS v2.3.0
+[prov:wasGeneratedBy](http://www.w3.org/ns/prov#wasGeneratedBy) |
  1. lingpy-rcParams: lingpy-rcParams.json
  2. python: 3.12.4
  3. python-packages: requirements.txt
[rdf:ID](http://www.w3.org/1999/02/22-rdf-syntax-ns#ID) | marrisonnaga [rdf:type](http://www.w3.org/1999/02/22-rdf-syntax-ns#type) | http://www.w3.org/ns/dcat#Distribution @@ -73,8 +73,8 @@ Name/Property | Datatype | Description `Glottolog_Name` | `string` | [ISO639P3code](http://cldf.clld.org/v1.0/terms.rdf#iso639P3code) | `string` | [Macroarea](http://cldf.clld.org/v1.0/terms.rdf#macroarea) | `string` | -[Latitude](http://cldf.clld.org/v1.0/terms.rdf#latitude) | `decimal` | -[Longitude](http://cldf.clld.org/v1.0/terms.rdf#longitude) | `decimal` | +[Latitude](http://cldf.clld.org/v1.0/terms.rdf#latitude) | `decimal`
≥ -90
≤ 90 | +[Longitude](http://cldf.clld.org/v1.0/terms.rdf#longitude) | `decimal`
≥ -180
≤ 180 | `Family` | `string` | `STEDT_Name` | `string` | `SubGroup` | `string` | diff --git a/cldf/cldf-metadata.json b/cldf/cldf-metadata.json index 48f2218..6cba06c 100644 --- a/cldf/cldf-metadata.json +++ b/cldf/cldf-metadata.json @@ -17,25 +17,25 @@ { "rdf:about": "https://github.com/lexibank/marrisonnaga", "rdf:type": "prov:Entity", - "dc:created": "v2.0-25-gd2ff315", + "dc:created": "v3.0", "dc:title": "Repository" }, { "rdf:about": "https://github.com/glottolog/glottolog", "rdf:type": "prov:Entity", - "dc:created": "v4.4", + "dc:created": "v5.0", "dc:title": "Glottolog" }, { "rdf:about": "https://github.com/concepticon/concepticon-data", "rdf:type": "prov:Entity", - "dc:created": "v2.5.0", + "dc:created": "v3.2.0", "dc:title": "Concepticon" }, { "rdf:about": "https://github.com/cldf-clts/clts", "rdf:type": "prov:Entity", - "dc:created": "v2.1.0", + "dc:created": "v2.3.0", "dc:title": "CLTS" } ], @@ -46,7 +46,7 @@ }, { "dc:title": "python", - "dc:description": "3.8.10" + "dc:description": "3.12.4" }, { "dc:title": "python-packages", @@ -55,9 +55,6 @@ ], "rdf:ID": "marrisonnaga", "rdf:type": "http://www.w3.org/ns/dcat#Distribution", - "dialect": { - "commentPrefix": null - }, "tables": [ { "dc:conformsTo": "http://cldf.clld.org/v1.0/terms.rdf#FormTable", @@ -181,7 +178,7 @@ { "datatype": "string", "propertyUrl": "http://cldf.clld.org/v1.0/terms.rdf#glottocode", - "valueUrl": "http://glottolog.org/resource/languoid/id/{glottolog_id}", + "valueUrl": "http://glottolog.org/resource/languoid/id/{Glottocode}", "name": "Glottocode" }, { @@ -263,7 +260,7 @@ { "datatype": "string", "propertyUrl": "http://cldf.clld.org/v1.0/terms.rdf#concepticonReference", - "valueUrl": "http://concepticon.clld.org/parameters/{concepticon_id}", + "valueUrl": "http://concepticon.clld.org/parameters/{Concepticon_ID}", "name": "Concepticon_ID" }, { diff --git a/cldf/languages.csv b/cldf/languages.csv index 54822fa..3c6477d 100644 --- a/cldf/languages.csv +++ b/cldf/languages.csv @@ -8,7 +8,7 @@ Kezhama,Kezhama,khez1235,Khezha Naga,nkh,Eurasia,25.5167,94.2,Sino-Tibetan,Khezh Khoirao,Khoirao,than1255,Thangal Naga,nki,Eurasia,25.2167,94.0333,Sino-Tibetan,Khoirao,Zemeic,406,India KhonomaAngami,Angami Khonoma,khon1248,Khonoma,njm,Eurasia,25.65,94.0333,Sino-Tibetan,Angami (Khonoma),Angami,842,India KohimaAngami,Angami Kohima,anga1288,Angami Naga,njm,Eurasia,25.55,94.1333,Sino-Tibetan,Angami (Kohima),Angami,971,India -Konyak,Konyak,kony1246,Konyak,nbe,Eurasia,26.55,95.05,Sino-Tibetan,Konyak,Konyak,979,India +Konyak,Konyak,kony1246,Patkaian,nbe,Eurasia,26.55,95.05,Sino-Tibetan,Konyak,Konyak,979,India Liangmai,Liangmai,lian1251,Liangmai Naga,njn,Eurasia,25.3667,93.6333,Sino-Tibetan,Liangmei,Zemeic,724,India Lotha,Lotha,loth1237,Lotha Naga,njh,Eurasia,26.1,94.2667,Sino-Tibetan,Lotha Naga,Lotha,1068,India Lushai,Lushai,lush1249,Mizo,lus,Eurasia,22.60535,92.629457,Sino-Tibetan,Lushai [Mizo],Kuki Chin-Central,1105,India diff --git a/cldf/lingpy-rcParams.json b/cldf/lingpy-rcParams.json index 7aa644c..6cfb511 100644 --- a/cldf/lingpy-rcParams.json +++ b/cldf/lingpy-rcParams.json @@ -64,7 +64,7 @@ 10, 10 ], - "filename": "lingpy-2021-07-22", + "filename": "lingpy-2024-08-06", "gap_symbol": "-", "gap_weight": 0.5, "gop": -2, @@ -123,7 +123,7 @@ "scorer": {}, "sonar": true, "stress": "\u02c8\u02cc'", - "timestamp": "2021-07-22 11:00", + "timestamp": "2024-08-06 12:28", "tones": "\u00b9\u00b2\u00b3\u2074\u2075\u2076\u2077\u2078\u2079\u2070\u2081\u2082\u2083\u2084\u2085\u2086\u2087\u2088\u2089\u20800123456789\u02e5\u02e6\u02e7\u02e8\u02e9\u02ea\u02eb-\ua708-\ua709-\ua70a-\ua70b-\ua70c-\ua70d-\ua70e-\ua70f-\ua710-\ua711-\ua712-\ua713-\ua714-\ua715-\ua716-\ua717-\ua718-\ua719-\ua71a-\ua700-\ua701-\ua702-\ua703-\ua704-\ua705-\ua706-\ua707", "tree_calc": "neighbor", "unique_sequences": true, diff --git a/cldf/requirements.txt b/cldf/requirements.txt index 0267e9b..a96e206 100644 --- a/cldf/requirements.txt +++ b/cldf/requirements.txt @@ -1,48 +1,56 @@ appdirs==1.4.4 -bs4==0.0.1 -certifi==2021.5.30 -chardet==4.0.0 -cldfbench==1.7.1 -cldfcatalog==1.3.2 -clldutils==3.9.0 -colorlog==5.0.1 -csvw==1.11.0 -gitdb==4.0.7 -greenlet==1.1.0 -idna==2.10 -iniconfig==1.1.1 -isodate==0.6.0 -lingpy==2.6.8 -Markdown==3.3.4 -networkx==2.6.1 -newick==1.3.0 -numpy==1.21.0 -openpyxl==3.0.7 -packaging==21.0 -pluggy==0.13.1 -purl==1.6 -py==1.10.0 +attrs==24.1.0 +Babel==2.15.0 +bibtexparser==2.0.0b7 +bs4==0.0.2 +certifi==2024.7.4 +cldfbench==1.14.0 +cldfcatalog==1.5.1 +cldfzenodo==2.1.1 +clldutils==3.22.2 +colorama==0.4.6 +colorlog==6.8.2 +csvw==3.3.0 +gitdb==4.0.11 +greenlet==3.0.3 +idna==3.7 +iniconfig==2.0.0 +isodate==0.6.1 +jsonschema==4.23.0 +lingpy==2.6.13 +lxml==5.2.2 +Markdown==3.6 +nameparser==1.1.3 +networkx==3.3 +newick==1.9.0 +numpy==2.0.1 +openpyxl==3.1.5 +packaging==24.1 +pluggy==1.5.0 pybtex==0.24.0 -pycldf==1.22.0 -pyclts==3.1.1 -pyconcepticon==2.8.0 -pycountry==20.7.3 -pyglottolog==3.6.0 -pylexibank==3.2.0 -pytest==6.2.4 -regex==2021.7.6 -requests==2.25.1 +pycldf==1.38.1 +pyclts==3.2.0 +pyconcepticon==3.1.0 +pycountry==24.6.1 +pyglottolog==3.13.0 +pylatexenc==2.10 +pylexibank==3.5.0 +pytest==8.3.2 +python-dateutil==2.9.0.post0 +rdflib==7.0.0 +referencing==0.35.1 +regex==2024.7.24 +requests==2.32.3 rfc3986==1.5.0 -scipy==1.7.0 -segments==2.2.0 +segments==2.2.1 six==1.16.0 -smmap==4.0.0 -soupsieve==2.2.1 -SQLAlchemy==1.4.20 -tabulate==0.8.9 -termcolor==1.1.0 -tqdm==4.61.2 -uritemplate==3.0.1 -urllib3==1.26.6 +smmap==5.0.1 +soupsieve==2.5 +SQLAlchemy==1.4.53 +tabulate==0.9.0 +termcolor==2.4.0 +tqdm==4.66.5 +uritemplate==4.1.1 +urllib3==2.2.2 xlrd==2.0.1 -zenodoclient==0.4.1 \ No newline at end of file +zenodoclient==0.5.1 \ No newline at end of file diff --git a/etc/orthography.tsv b/etc/orthography.tsv index cd7f1c7..a5404c9 100644 --- a/etc/orthography.tsv +++ b/etc/orthography.tsv @@ -304,7 +304,7 @@ tsw ts w tt tː tw t w v v -V V/ə +V V/ə v$ v vh vh/v̥ vw v w diff --git a/lexibank_marrisonnaga.py b/lexibank_marrisonnaga.py index 44a2cfd..62a0f3c 100644 --- a/lexibank_marrisonnaga.py +++ b/lexibank_marrisonnaga.py @@ -19,6 +19,8 @@ class CustomLanguage(pylexibank.Language): class Dataset(pylexibank.Dataset): dir = Path(__file__).parent id = "marrisonnaga" + writer_options = dict(keep_languages=False, keep_parameters=False) + language_class = CustomLanguage form_spec = pylexibank.FormSpec(missing_data=("*", "---", ""), brackets={"[": "]", "(": ")"})