From 14d5ba27203721c9714c42837f3d2fc8454346f1 Mon Sep 17 00:00:00 2001 From: Michael Herstine Date: Tue, 2 Jul 2024 07:14:25 -0700 Subject: [PATCH] Prep the branch for merge. - Add additional test cases for Berghel & Roach - Update NEWS - Update ChangeLog --- ChangeLog | 27 ++++++++ NEWS | 33 ++++++++-- admin/make-dl-test-data/.gitignore | 1 + test/Makefile.am | 5 +- test/test-data-12-8 | 4 ++ test/test-data-12-8.txt | 100 +++++++++++++++++++++++++++++ test/test-data-5-6 | 2 + 7 files changed, 163 insertions(+), 9 deletions(-) create mode 100644 admin/make-dl-test-data/.gitignore create mode 100755 test/test-data-12-8 create mode 100644 test/test-data-12-8.txt diff --git a/ChangeLog b/ChangeLog index f6e9051..a84b25f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,30 @@ +2024-07-02 Michael Herstine + + Add additional test cases for Berghel & Roach. + +2024-07-02 Michael Herstine + + Fix issue #5. + Berghel's & Roach's implementation assumes that the source string + is shorter than or equal to the target in length. This patch will + enforce this, both by: + + - asserting this condition in `berghel_roach()`, and + - changing `read_corpus` to always sort the two strings + by length before returning them + +2024-07-01 Michael Herstine + + Start a CI workflow + + Add a test data generation program + + Fix broken link in the README + + Tidy-up the project. + This patch will fix a few typos, add test data and add a signoff + script. + 2021-07-18 Michael Herstine Fix issue #2. diff --git a/NEWS b/NEWS index 98593f2..3c0f246 100644 --- a/NEWS +++ b/NEWS @@ -1,13 +1,32 @@ damerau–levenshtein News -- history of user-visible changes -*- outline -*- -* 0.2.1 release +* 0.2 - - address https://github.com/sp1ff/damerau-levenshtein/issues/2 -* 0.2.0 release +** 0.2.2 - - remove the check on C vararrays - - just use flat arrays; do the two-dimensional indexing manually -* 0.1.4 release +*** Bugfixes - - initial release +**** Fix issue 5 + +Enforce the source string being shorter than or equal to the target in length for +Berghel & Roach. +*** User-visible Changes + +**** Add a test-generation program + +In `admin/make-dl-test-data`. +** 0.2.1 + +*** Bugfixes + +**** Fix https://github.com/sp1ff/damerau-levenshtein/issues/2 +** 0.2.0 + +*** Bugfixes + +**** Remove the check on C vararrays +**** Just use flat arrays; do the two-dimensional indexing manually +* 0.1 + +** 0.1.4 Initial Release diff --git a/admin/make-dl-test-data/.gitignore b/admin/make-dl-test-data/.gitignore new file mode 100644 index 0000000..48a004c --- /dev/null +++ b/admin/make-dl-test-data/.gitignore @@ -0,0 +1 @@ +dist-newstyle diff --git a/test/Makefile.am b/test/Makefile.am index b78cd52..b3b7a1e 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -17,12 +17,13 @@ EXTRA_DIST = lw-smoke uk-smoke br-smoke lw-br uk-br br-br \ lw-damerau uk-damerau br-damerau \ issue-2 issue-2.txt \ issue-3 issue-3.txt \ - test-data-5-6 test-data-5-6.txt + test-data-5-6 test-data-5-6.txt \ + test-data-12-8 test-data-12-8.txt AM_TESTS_ENVIRONMENT= \ srcdir=$(srcdir) \ builddir=$(builddir) TESTS = lw-smoke uk-smoke br-smoke lw-br uk-br br-br lw-damerau uk-damerau \ - br-damerau issue-2 issue-3 test-data-5-6 + br-damerau issue-2 issue-3 test-data-5-6 test-data-12-8 timing-tests: check-am FORCE builddir=$(builddir) srcdir=$(srcdir) ./timing diff --git a/test/test-data-12-8 b/test/test-data-12-8 new file mode 100755 index 0000000..adc59c0 --- /dev/null +++ b/test/test-data-12-8 @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +# 100 test cases, generated with `make-dl-test-data`, using `--length=12` +# and `num-edits=8` +${builddir}/../src/dl -a br ${srcdir}/test-data-12-8.txt diff --git a/test/test-data-12-8.txt b/test/test-data-12-8.txt new file mode 100644 index 0000000..2ae0db6 --- /dev/null +++ b/test/test-data-12-8.txt @@ -0,0 +1,100 @@ +GNWGJJSRNULN GNWGJJRNUN 2 +GNWGJJSRNULN HGWGMRNUFAN 7 +NWGJJSRNULNZ ENWJJNUNNL 6 +WGJJSRNULNZR WJJJSNRLUZNR 4 +GJJSRNULNZRF EGJJNLURN 7 +JJSRNULNZRFQ EJJRLZNFR 7 +JSRNULNZRFQX JJRNULNZRFQNX 2 +SRNULNZRFQXH SRNUNZRFQH 2 +RNULNZRFQXHE RNULZFRQXE 3 +NULNZRFQXHEA NULNZRFQXENA 2 +ULNZRFQXHEAA ULNZRQFHXAAEA 4 +LNZRFQXHEAAI LNZRFXQEHAAEI 3 +LNZRFQXHEAAI LNZRFXQEHAEI 3 +LNZRFQXHEAAI LNZRFXQEHAI 3 +NZRFQXHEAAIJ NZRFQHXAENIJE 4 +ZRFQXHEAAIJM MHZFQXHEAFAAIJM 5 +ZRFQXHEAAIJM MHZFQHEAFAAIJM 6 +RFQXHEAAIJMK RFQXHAEIAJMK 2 +RFQXHEAAIJMK MRFQXHAEIAAJMKM 5 +FQXHEAAIJMKM HFQXHEAAFIJMKM 2 +FQXHEAAIJMKM HFQXHEAAFIJAMM 4 +QXHEAAIJMKMU MQXHEAIMJAKMUM 5 +XHEAAIJMKMUA XHEAIAJMKMUA 1 +HEAAIJMKMUAS CHEHAIJMKAMUM 5 +EAAIJMKMUASF CEAAIMKMUAASM 4 +AAIJMKMUASFB MAAIJKMUAASAFM 5 +AIJMKMUASFBJ AIJMKMUASAFBJ 1 +AIJMKMUASFBJ IJMKUASAFBM 4 +AIJMKMUASFBJ IAJMKMUASAFBM 3 +IJMKMUASFBJM IJMKMUASFBM 1 +JMKMUASFBJMB JMKMUASFBJJMB 1 +MKMUASFBJMBC MKMUMAFBJMBC 2 +KMUASFBJMBCC KUAFAJQCBCC 5 +MUASFBJMBCCW MUASBJMBCCW 1 +MUASFBJMBCCW MUASBJCBCCW 2 +UASFBJMBCCWQ ASFBMBQCCWQ 3 +ASFBJMBCCWQM AFBJABCCWQM 2 +ASFBJMBCCWQM AFKJACBWQM 5 +SFBJMBCCWQMG SFBJMCCWQMG 1 +FBJMBCCWQMGT CBMBCCWQGT 3 +FBJMBCCWQMGT CBMMCCWQGZW 6 +BJMBCCWQMGTZ BMKCCWMGTZ 3 +JMBCCWQMGTZW JJMBCCCQMGTZW 2 +MBCCWQMGTZWM MBCCWQCGTZW 2 +BCCWQMGTZWMJ CCWMQMGTZZWMJ 3 +CCWQMGTZWMJW CCWQMGQCZWMJW 2 +CCWQMGTZWMJW CCWQMGZWMJW 1 +CWQMGTZWMJWK CWQMGTZWMZWK 1 +WQMGTZWMJWKG WQMGTZWMJWKWGW 2 +QMGTZWMJWKGC QMKTZJWWFC 5 +MGTZWMJWKGCA MGTMWMJWKGCA 1 +GTZWMJWKGCAT GTZWMJWKGWTWACT 4 +TZWMJWKGCATM TZKMJCWF 7 +ZWMJWKGCATMK GWMJWKGCAZTWMK 3 +WMJWKGCATMKN WMJWKGCATWWMKN 2 +MJWKGCATMKNF MJKGTCFF 6 +JWKGCATMKNFW GWKGCATMKFTNW 4 +WKGCATMKNFWN GKGCATMKNFWWN 2 +KGCATMKNFWNJ KGKATMKFFJ 4 +KGCATMKNFWNJ KGKATMKNFFJN 3 +GCATMKNFWNJF GCATMKNFWNTNF 2 +CATMKNFWNJFG CTACMKNFFJG 5 +ATMKNFWNJFGI ATMKNFWNJFIN 2 +TMKNFWNJFGIP TMKNFWNJFGTIP 1 +MKNFWNJFGIPR MKNFWNJFFPM 3 +KNFWNJFGIPRH KNFNJFFFR 5 +NFWNJFGIPRHA NFWNJFGIFFHN 3 +FWNJFGIPRHAF RFNWJFGIFFA 6 +FWNJFGIPRHAF RFNWJFGFFA 7 +FWNJFGIPRHAF FWNJFGIFFA 4 +WNJFGIPRHAFD WAGIPFFD 6 +NJFGIPRHAFDH RNFJGIPFDF 6 +JFGIPRHAFDHB RJIGPRHFDHB 4 +JFGIPRHAFDHB REIGPRHFDHB 4 +JFGIPRHAFDHB REIGPRHFDHB 4 +FGIPRHAFDHBR FGIPRHAFHBR 1 +GIPRHAFDHBRA RGEIGPRHAFDHBRA 3 +IPRHAFDHBRAC REHRAFHHBRAC 4 +PRHAFDHBRACZ PRHAFDHRACZ 1 +PRHAFDHBRACZ RPEHRAFDRAC 6 +RHAFDHBRACZX HXFDHBRAACXT 5 +HAFDHBRACZXE HFADHHBRACZXE 2 +AFDHBRACZXEM REDFHBRCXZMT 7 +FDHBRACZXEMS FDHFRACZXEMS 1 +DHBRACZXEMSN REBHRACXMESN 5 +DHBRACZXEMSN RDBHRACXEMSN 3 +DHBRACZXEMSN REBHRACXEMSN 4 +HBRACZXEMSNF HXACZXEMSAF 3 +BRACZXEMSNFB RCZXEMNTF 5 +RACZXEMSNFBX RRACZXESNFBX 2 +ACZXEMSNFBXL ACZXEMSNFAXLX 2 +ACZXEMSNFBXL ACZXESNFXALTX 5 +CZXEMSNFBXLY CZXEMSNBXLY 1 +ZXEMSNFBXLYT ZXEMNMFBLXYT 3 +XEMSNFBXLYTU XEMSNBXLYTUX 2 +EMSNFBXLYTUL EESNFBXLYL 3 +MSNFBXLYTULM MSNFBMLYTLTM 3 +SNFBXLYTULMY SNFBXYTULYT 3 +NFBXLYTULMYC NBXLYTULYMCT 3 +FBXLYTULMYCB FXBLYTULMCB 2 diff --git a/test/test-data-5-6 b/test/test-data-5-6 index c94ecfb..461292b 100755 --- a/test/test-data-5-6 +++ b/test/test-data-5-6 @@ -1,2 +1,4 @@ #!/usr/bin/env bash +# 100 test cases, generated with `make-dl-test-data`, using `--length=5` +# and `num-edits=6` ${builddir}/../src/dl -a br ${srcdir}/test-data-5-6.txt