From 8f73d42c7957d242f4ea3dc6f58fae893f723815 Mon Sep 17 00:00:00 2001 From: Nishchal Bhandari Date: Wed, 17 Apr 2024 19:42:13 +0000 Subject: [PATCH] add test --- test/data/short.sbs.txt | 125 ++++++++++++++++++++++++++++++ test/data/short_punc.wer_tag.json | 8 ++ test/fstalign_Test.cc | 2 + 3 files changed, 135 insertions(+) create mode 100644 test/data/short.sbs.txt create mode 100644 test/data/short_punc.wer_tag.json diff --git a/test/data/short.sbs.txt b/test/data/short.sbs.txt new file mode 100644 index 0000000..bb1cc40 --- /dev/null +++ b/test/data/short.sbs.txt @@ -0,0 +1,125 @@ + ref_token hyp_token IsErr Class Wer_Tag_Entities + + yeah yeah + , ERR + yeah ERR + , ERR + right right + . ERR + yeah ERR + , ERR + all ERR + right ERR + , i'll ERR + probably do ERR + just just + that that + . ERR + are are + there there + any any + visuals visuals + that that + come come + to to + mind mind + or or + yeah yeah + , ERR + sure sure + . ERR + when when + i i + hear hear + foobar foobar ###1_PROPER_NOUN###|###2_SPACY>ORG###| + , ERR + i i + think think + about about + just just + that that + : ERR + foo foobar ERR + a a +------------------------------------------------------------ + Line Group + 4 , yeah , <-> *** + 8 . yeah , all right , probably <-> i'll do + 17 . <-> *** + 28 , <-> *** + 30 . <-> *** + 35 , <-> *** + 41 : foo <-> foobar +------------------------------------------------------------ + Unigram Prec. Recall + , 0/0 (0.0 %) 0/6 (0.0 %) + . 0/0 (0.0 %) 0/3 (0.0 %) + : 0/0 (0.0 %) 0/1 (0.0 %) + all 0/0 (0.0 %) 0/1 (0.0 %) + do 0/1 (0.0 %) 0/0 (0.0 %) + foo 0/0 (0.0 %) 0/1 (0.0 %) + i'll 0/1 (0.0 %) 0/0 (0.0 %) + probably 0/0 (0.0 %) 0/1 (0.0 %) + foobar 1/2 (50.0 %) 1/1 (100.0 %) + right 1/1 (100.0 %) 1/2 (50.0 %) + yeah 2/2 (100.0 %) 2/4 (50.0 %) + 1/1 (100.0 %) 1/1 (100.0 %) + a 1/1 (100.0 %) 1/1 (100.0 %) + about 1/1 (100.0 %) 1/1 (100.0 %) + any 1/1 (100.0 %) 1/1 (100.0 %) + are 1/1 (100.0 %) 1/1 (100.0 %) + come 1/1 (100.0 %) 1/1 (100.0 %) + hear 1/1 (100.0 %) 1/1 (100.0 %) + i 2/2 (100.0 %) 2/2 (100.0 %) + just 2/2 (100.0 %) 2/2 (100.0 %) + mind 1/1 (100.0 %) 1/1 (100.0 %) + or 1/1 (100.0 %) 1/1 (100.0 %) + sure 1/1 (100.0 %) 1/1 (100.0 %) + that 3/3 (100.0 %) 3/3 (100.0 %) + there 1/1 (100.0 %) 1/1 (100.0 %) + think 1/1 (100.0 %) 1/1 (100.0 %) + to 1/1 (100.0 %) 1/1 (100.0 %) + visuals 1/1 (100.0 %) 1/1 (100.0 %) + when 1/1 (100.0 %) 1/1 (100.0 %) +------------------------------------------------------------ + Bigram Precision Recall + , all 0/0 (0.0 %) 0/1 (0.0 %) + , i 0/0 (0.0 %) 0/1 (0.0 %) + , probably 0/0 (0.0 %) 0/1 (0.0 %) + , right 0/0 (0.0 %) 0/1 (0.0 %) + , sure 0/0 (0.0 %) 0/1 (0.0 %) + , yeah 0/0 (0.0 %) 0/1 (0.0 %) + . are 0/0 (0.0 %) 0/1 (0.0 %) + . when 0/0 (0.0 %) 0/1 (0.0 %) + . yeah 0/0 (0.0 %) 0/1 (0.0 %) + : foo 0/0 (0.0 %) 0/1 (0.0 %) + all right 0/0 (0.0 %) 0/1 (0.0 %) + do just 0/1 (0.0 %) 0/0 (0.0 %) + foo a 0/0 (0.0 %) 0/1 (0.0 %) + foobar , 0/0 (0.0 %) 0/1 (0.0 %) + foobar a 0/1 (0.0 %) 0/0 (0.0 %) + i'll do 0/1 (0.0 %) 0/0 (0.0 %) + probably just 0/0 (0.0 %) 0/1 (0.0 %) + right , 0/0 (0.0 %) 0/1 (0.0 %) + right . 0/0 (0.0 %) 0/1 (0.0 %) + sure . 0/0 (0.0 %) 0/1 (0.0 %) + that . 0/0 (0.0 %) 0/1 (0.0 %) + that : 0/0 (0.0 %) 0/1 (0.0 %) + yeah , 0/0 (0.0 %) 0/4 (0.0 %) + yeah 1/1 (100.0 %) 1/1 (100.0 %) + about just 1/1 (100.0 %) 1/1 (100.0 %) + any visuals 1/1 (100.0 %) 1/1 (100.0 %) + are there 1/1 (100.0 %) 1/1 (100.0 %) + come to 1/1 (100.0 %) 1/1 (100.0 %) + hear foobar 1/1 (100.0 %) 1/1 (100.0 %) + i hear 1/1 (100.0 %) 1/1 (100.0 %) + i think 1/1 (100.0 %) 1/1 (100.0 %) + just that 2/2 (100.0 %) 2/2 (100.0 %) + mind or 1/1 (100.0 %) 1/1 (100.0 %) + or yeah 1/1 (100.0 %) 1/1 (100.0 %) + that come 1/1 (100.0 %) 1/1 (100.0 %) + there any 1/1 (100.0 %) 1/1 (100.0 %) + think about 1/1 (100.0 %) 1/1 (100.0 %) + to mind 1/1 (100.0 %) 1/1 (100.0 %) + visuals that 1/1 (100.0 %) 1/1 (100.0 %) + when i 1/1 (100.0 %) 1/1 (100.0 %) diff --git a/test/data/short_punc.wer_tag.json b/test/data/short_punc.wer_tag.json new file mode 100644 index 0000000..24f8ba3 --- /dev/null +++ b/test/data/short_punc.wer_tag.json @@ -0,0 +1,8 @@ +{ + "1": { + "entity_type": "PROPER_NOUN" + }, + "2": { + "entity_type": "SPACY>ORG" + } +} diff --git a/test/fstalign_Test.cc b/test/fstalign_Test.cc index 1a182e3..e8ef9f1 100644 --- a/test/fstalign_Test.cc +++ b/test/fstalign_Test.cc @@ -683,8 +683,10 @@ TEST_CASE_METHOD(UniqueTestsFixture, "main-adapted-composition()") { const auto result = exec(command("wer", approach, "short_punc.ref.nlp", "short_punc.hyp.nlp", sbs_output, nlp_output, TEST_SYNONYMS)+" --use-punctuation --use-case --wer-sidecar short_punc.wer_tag.json"); const auto testFile = std::string{TEST_DATA} + "short.aligned.punc_case.nlp"; + const auto testSbsFile = std::string{TEST_DATA} + "short.sbs.txt"; REQUIRE(compareFiles(nlp_output.c_str(), testFile.c_str())); + REQUIRE(compareFiles(sbs_output.c_str(), testSbsFile.c_str())); REQUIRE_THAT(result, Contains("WER: 13/42 = 0.3095")); REQUIRE_THAT(result, Contains("WER: INS:2 DEL:7 SUB:4")); }