From 4bf12ec07e5d717b28d3d3d945a317f02b90451a Mon Sep 17 00:00:00 2001 From: cm-ayf Date: Mon, 15 Jan 2024 13:17:08 +0900 Subject: [PATCH 1/3] implement parser for label --- Cargo.lock | 58 +++++++++ Cargo.toml | 1 + src/parser.rs | 322 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 381 insertions(+) create mode 100644 src/parser.rs diff --git a/Cargo.lock b/Cargo.lock index 723fe90..320c40b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5,3 +5,61 @@ version = 3 [[package]] name = "jlabel" version = "0.1.0" +dependencies = [ + "thiserror", +] + +[[package]] +name = "proc-macro2" +version = "1.0.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "syn" +version = "2.0.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" diff --git a/Cargo.toml b/Cargo.toml index 4997399..d092e80 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,3 +6,4 @@ edition = "2021" rust-version = "1.65.0" [dependencies] +thiserror = "1.0.56" diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..97eaf2c --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,322 @@ +use std::{num::ParseIntError, str::FromStr}; + +use crate::fullcontext_label::{ + AccentPhraseCurrent, AccentPhrasePrevNext, BreathGroupCurrent, BreathGroupPrevNext, Label, + Mora, Phoneme, Utterance, Word, +}; + +#[derive(Debug, thiserror::Error)] +pub enum ParseError { + #[error("Symbol not found: expected {0}")] + SymbolNotFound(&'static str), + #[error("Parse int error: {0}")] + ParseIntError(#[from] ParseIntError), + #[error("Parse bool error")] + ParseBoolError, + #[error("Not undefined")] + NotUndefined, +} + +#[derive(Debug)] +struct LabelTokenizer<'a> { + input: &'a str, + index: usize, +} + +impl<'a> LabelTokenizer<'a> { + fn new(input: &'a str) -> Self { + Self { input, index: 0 } + } + + fn until(&mut self, symbol: &'static str) -> Result<&'a str, ParseError> { + match self.input[self.index..].find(symbol) { + Some(i) => { + let result = &self.input[self.index..(self.index + i)]; + self.index += i + symbol.len(); + Ok(result) + } + None => Err(ParseError::SymbolNotFound(symbol)), + } + } + + fn parse_or_xx(input: &'a str) -> Result, T::Err> { + if input == "xx" { + Ok(None) + } else { + input.parse().map(Some) + } + } + + fn parse_bool_or_xx(input: &'a str) -> Result, ParseError> { + match input { + "xx" => Ok(None), + "0" => Ok(Some(false)), + "1" => Ok(Some(true)), + _ => Err(ParseError::ParseBoolError), + } + } + + fn assert_xx(input: &'a str) -> Result<(), ParseError> { + if input == "xx" { + Ok(()) + } else { + Err(ParseError::NotUndefined) + } + } + + /// `p1ˆp2-p3+p4=p5` + fn p(&mut self) -> Result { + let p1 = self.until("^")?; + let p2 = self.until("-")?; + let p3 = self.until("+")?; + let p4 = self.until("=")?; + let p5 = self.until("/A:")?; + // Ok(Phoneme { + // p2: Self::parse_or_xx(p1)?, + // p1: Self::parse_or_xx(p2)?, + // c: Self::parse_or_xx(p3)?.ok_or(ParseError::ShouldBeUndefined)?, + // n1: Self::parse_or_xx(p4)?, + // n2: Self::parse_or_xx(p5)?, + // }) + Ok(Phoneme { + p2: p1.to_string(), + p1: p2.to_string(), + c: p3.to_string(), + n1: p4.to_string(), + n2: p5.to_string(), + }) + } + + /// `/A:a1+a2+a3` + fn a(&mut self) -> Result, ParseError> { + let a1 = Self::parse_or_xx(self.until("+")?)?; + let a2 = Self::parse_or_xx(self.until("+")?)?; + let a3 = Self::parse_or_xx(self.until("/B:")?)?; + + if let (Some(a1), Some(a2), Some(a3)) = (a1, a2, a3) { + Ok(Some(Mora { + relative_accent_position: a1, + position_forward: a2, + position_backward: a3, + })) + } else { + Ok(None) + } + } + + /// `/B:b1-b2_b3` + fn b(&mut self) -> Result, ParseError> { + let b1 = Self::parse_or_xx(self.until("-")?)?; + let b2 = Self::parse_or_xx(self.until("_")?)?; + let b3 = Self::parse_or_xx(self.until("/C:")?)?; + + if [b1, b2, b3].iter().all(Option::is_none) { + Ok(None) + } else { + Ok(Some(Word { + pos: b1, + ctype: b2, + cform: b3, + })) + } + } + + /// `/C:c1_c2+c3` + fn c(&mut self) -> Result, ParseError> { + let c1 = Self::parse_or_xx(self.until("_")?)?; + let c2 = Self::parse_or_xx(self.until("+")?)?; + let c3 = Self::parse_or_xx(self.until("/D:")?)?; + + if [c1, c2, c3].iter().all(Option::is_none) { + Ok(None) + } else { + Ok(Some(Word { + pos: c1, + ctype: c2, + cform: c3, + })) + } + } + + /// `/D:d1+d2_d3` + fn d(&mut self) -> Result, ParseError> { + let d1 = Self::parse_or_xx(self.until("+")?)?; + let d2 = Self::parse_or_xx(self.until("_")?)?; + let d3 = Self::parse_or_xx(self.until("/E:")?)?; + + if [d1, d2, d3].iter().all(Option::is_none) { + Ok(None) + } else { + Ok(Some(Word { + pos: d1, + ctype: d2, + cform: d3, + })) + } + } + + /// `/E:e1_e2!e3_e4-e5` + fn e(&mut self) -> Result, ParseError> { + let e1 = Self::parse_or_xx(self.until("_")?)?; + let e2 = Self::parse_or_xx(self.until("!")?)?; + let e3 = Self::parse_bool_or_xx(self.until("_")?)?; + Self::assert_xx(self.until("-")?)?; + let e5 = Self::parse_bool_or_xx(self.until("/F:")?)?; + + if let (Some(e1), Some(e2), Some(e3)) = (e1, e2, e3) { + Ok(Some(AccentPhrasePrevNext { + mora_count: e1, + accent_position: e2, + is_interrogative: e3, + is_pause_insertion: e5, + })) + } else { + Ok(None) + } + } + + /// `/F:f1_f2#_f3_f4@_f5_f6|f7_f8` + fn f(&mut self) -> Result, ParseError> { + let f1 = Self::parse_or_xx(self.until("_")?)?; + let f2 = Self::parse_or_xx(self.until("#")?)?; + let f3 = Self::parse_bool_or_xx(self.until("_")?)?; + Self::assert_xx(self.until("@")?)?; + let f5 = Self::parse_or_xx(self.until("_")?)?; + let f6 = Self::parse_or_xx(self.until("|")?)?; + let f7 = Self::parse_or_xx(self.until("_")?)?; + let f8 = Self::parse_or_xx(self.until("/G:")?)?; + + if let (Some(f1), Some(f2), Some(f3), Some(f5), Some(f6), Some(f7), Some(f8)) = + (f1, f2, f3, f5, f6, f7, f8) + { + Ok(Some(AccentPhraseCurrent { + mora_count: f1, + accent_position: f2, + is_interrogative: f3, + accent_phrase_position_forward: f5, + accent_phrase_position_backward: f6, + mora_position_forward: f7, + mora_position_backward: f8, + })) + } else { + Ok(None) + } + } + + /// `/G:g1_g2%g3_g4_g5` + fn g(&mut self) -> Result, ParseError> { + let g1 = Self::parse_or_xx(self.until("_")?)?; + let g2 = Self::parse_or_xx(self.until("%")?)?; + let g3 = Self::parse_bool_or_xx(self.until("_")?)?; + Self::assert_xx(self.until("_")?)?; + let g5 = Self::parse_bool_or_xx(self.until("/H:")?)?; + + if let (Some(g1), Some(g2), Some(g3)) = (g1, g2, g3) { + Ok(Some(AccentPhrasePrevNext { + mora_count: g1, + accent_position: g2, + is_interrogative: g3, + is_pause_insertion: g5, + })) + } else { + Ok(None) + } + } + + /// `/H:h1_h2` + fn h(&mut self) -> Result, ParseError> { + let h1 = Self::parse_or_xx(self.until("_")?)?; + let h2 = Self::parse_or_xx(self.until("/I:")?)?; + + if let (Some(h1), Some(h2)) = (h1, h2) { + Ok(Some(BreathGroupPrevNext { + accent_phrase_count: h1, + mora_count: h2, + })) + } else { + Ok(None) + } + } + + /// `/I:i1-i2@i3+i4&i5-i6|i7+i8` + fn i(&mut self) -> Result, ParseError> { + let i1 = Self::parse_or_xx(self.until("-")?)?; + let i2 = Self::parse_or_xx(self.until("@")?)?; + let i3 = Self::parse_or_xx(self.until("+")?)?; + let i4 = Self::parse_or_xx(self.until("&")?)?; + let i5 = Self::parse_or_xx(self.until("-")?)?; + let i6 = Self::parse_or_xx(self.until("|")?)?; + let i7 = Self::parse_or_xx(self.until("+")?)?; + let i8 = Self::parse_or_xx(self.until("/J:")?)?; + + if let (Some(i1), Some(i2), Some(i3), Some(i4), Some(i5), Some(i6), Some(i7), Some(i8)) = + (i1, i2, i3, i4, i5, i6, i7, i8) + { + Ok(Some(BreathGroupCurrent { + accent_phrase_count: i1, + mora_count: i2, + breath_group_position_forward: i3, + breath_group_position_backward: i4, + accent_phrase_position_forward: i5, + accent_phrase_position_backward: i6, + mora_position_forward: i7, + mora_position_backward: i8, + })) + } else { + Ok(None) + } + } + + /// `/J:j1_j2` + fn j(&mut self) -> Result, ParseError> { + let j1 = Self::parse_or_xx(self.until("_")?)?; + let j2 = Self::parse_or_xx(self.until("/K:")?)?; + + if let (Some(j1), Some(j2)) = (j1, j2) { + Ok(Some(BreathGroupPrevNext { + accent_phrase_count: j1, + mora_count: j2, + })) + } else { + Ok(None) + } + } + + /// `/K:k1+k2-k3` + fn k(&mut self) -> Result { + let k1 = self.until("+")?.parse()?; + let k2 = self.until("-")?.parse()?; + let k3 = self.input[self.index..].parse()?; + + Ok(Utterance { + breath_group_count: k1, + accent_phrase_count: k2, + mora_count: k3, + }) + } + + fn consume(mut self) -> Result { + Ok(Label { + phoneme: self.p()?, + mora: self.a()?, + word_prev: self.b()?, + word_curr: self.c()?, + word_next: self.d()?, + accent_phrase_prev: self.e()?, + accent_phrase_curr: self.f()?, + accent_phrase_next: self.g()?, + breath_group_prev: self.h()?, + breath_group_curr: self.i()?, + breath_group_next: self.j()?, + utterance: self.k()?, + }) + } +} + +impl FromStr for Label { + type Err = ParseError; + + fn from_str(s: &str) -> Result { + LabelTokenizer::new(s).consume() + } +} From ec7d6ed92a16e0007205172029be19341043676c Mon Sep 17 00:00:00 2001 From: cm-ayf Date: Mon, 15 Jan 2024 13:49:31 +0900 Subject: [PATCH 2/3] fix after rebase --- src/lib.rs | 1 + src/parser.rs | 35 ++++++++++++++++++----------------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index dc2bc1e..50f101a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1 +1,2 @@ mod fullcontext_label; +mod parser; diff --git a/src/parser.rs b/src/parser.rs index 97eaf2c..98c49d0 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -39,6 +39,14 @@ impl<'a> LabelTokenizer<'a> { } } + fn string_or_xx(input: &'a str) -> Option { + if input == "xx" { + None + } else { + Some(input.to_string()) + } + } + fn parse_or_xx(input: &'a str) -> Result, T::Err> { if input == "xx" { Ok(None) @@ -66,24 +74,17 @@ impl<'a> LabelTokenizer<'a> { /// `p1ˆp2-p3+p4=p5` fn p(&mut self) -> Result { - let p1 = self.until("^")?; - let p2 = self.until("-")?; - let p3 = self.until("+")?; - let p4 = self.until("=")?; - let p5 = self.until("/A:")?; - // Ok(Phoneme { - // p2: Self::parse_or_xx(p1)?, - // p1: Self::parse_or_xx(p2)?, - // c: Self::parse_or_xx(p3)?.ok_or(ParseError::ShouldBeUndefined)?, - // n1: Self::parse_or_xx(p4)?, - // n2: Self::parse_or_xx(p5)?, - // }) + let p1 = Self::string_or_xx(self.until("^")?); + let p2 = Self::string_or_xx(self.until("-")?); + let p3 = Self::string_or_xx(self.until("+")?); + let p4 = Self::string_or_xx(self.until("=")?); + let p5 = Self::string_or_xx(self.until("/A:")?); Ok(Phoneme { - p2: p1.to_string(), - p1: p2.to_string(), - c: p3.to_string(), - n1: p4.to_string(), - n2: p5.to_string(), + p2: p1, + p1: p2, + c: p3, + n1: p4, + n2: p5, }) } From 569156eda764fcf83af98071654ec39d37f31114 Mon Sep 17 00:00:00 2001 From: cm-ayf Date: Mon, 15 Jan 2024 14:05:37 +0900 Subject: [PATCH 3/3] add tests for parser --- src/{parser.rs => parser/mod.rs} | 3 + src/parser/test.rs | 550 +++++++++++++++++++++++++++++++ 2 files changed, 553 insertions(+) rename src/{parser.rs => parser/mod.rs} (99%) create mode 100644 src/parser/test.rs diff --git a/src/parser.rs b/src/parser/mod.rs similarity index 99% rename from src/parser.rs rename to src/parser/mod.rs index 98c49d0..57f5e04 100644 --- a/src/parser.rs +++ b/src/parser/mod.rs @@ -321,3 +321,6 @@ impl FromStr for Label { LabelTokenizer::new(s).consume() } } + +#[cfg(test)] +mod test; diff --git a/src/parser/test.rs b/src/parser/test.rs new file mode 100644 index 0000000..160970d --- /dev/null +++ b/src/parser/test.rs @@ -0,0 +1,550 @@ +use super::*; + +#[test] +fn test_parse() { + let tests = [ + ( + "xx^xx-sil+k=o/A:xx+xx+xx/B:xx-xx_xx/C:xx_xx+xx/D:xx+xx_xx/E:xx_xx!xx_xx-xx/F:xx_xx#xx_xx@xx_xx|xx_xx/G:5_5%0_xx_xx/H:xx_xx/I:xx-xx@xx+xx&xx-xx|xx+xx/J:1_5/K:1+1-5", + Label { + phoneme: Phoneme { + p2: None, + p1: None, + c: Some("sil".to_string()), + n1: Some("k".to_string()), + n2: Some("o".to_string()), + }, + mora: None, + word_prev: None, + word_curr: None, + word_next: None, + accent_phrase_prev: None, + accent_phrase_curr: None, + accent_phrase_next: Some(AccentPhrasePrevNext { + mora_count: 5, + accent_position: 5, + is_interrogative: false, + is_pause_insertion: None + }), + breath_group_prev: None, + breath_group_curr: None, + breath_group_next: Some(BreathGroupPrevNext { + accent_phrase_count: 1, + mora_count: 5, + }), + utterance: Utterance { + breath_group_count: 1, + accent_phrase_count: 1, + mora_count: 5, + }, + }, + ), + ( + "xx^sil-k+o=N/A:-4+1+5/B:xx-xx_xx/C:09_xx+xx/D:xx+xx_xx/E:xx_xx!xx_xx-xx/F:5_5#0_xx@1_1|1_5/G:xx_xx%xx_xx_xx/H:xx_xx/I:1-5@1+1&1-1|1+5/J:xx_xx/K:1+1-5", + Label { + phoneme: Phoneme { + p2: None, + p1: Some("sil".to_string()), + c: Some("k".to_string()), + n1: Some("o".to_string()), + n2: Some("N".to_string()), + }, + mora: Some(Mora { + relative_accent_position: -4, + position_forward: 1, + position_backward: 5, + }), + word_prev: None, + word_curr: Some(Word { + pos: Some(9), + ctype: None, + cform: None, + }), + word_next: None, + accent_phrase_prev: None, + accent_phrase_curr: Some(AccentPhraseCurrent { + mora_count: 5, + accent_position: 5, + is_interrogative: false, + accent_phrase_position_forward: 1, + accent_phrase_position_backward: 1, + mora_position_forward: 1, + mora_position_backward: 5, + }), + accent_phrase_next: None, + breath_group_prev: None, + breath_group_curr: Some(BreathGroupCurrent { + accent_phrase_count: 1, + mora_count: 5, + breath_group_position_forward: 1, + breath_group_position_backward: 1, + accent_phrase_position_forward: 1, + accent_phrase_position_backward: 1, + mora_position_forward: 1, + mora_position_backward: 5, + }), + breath_group_next: None, + utterance: Utterance { + breath_group_count: 1, + accent_phrase_count: 1, + mora_count: 5, + }, + }, + ), + ( + "sil^k-o+N=n/A:-4+1+5/B:xx-xx_xx/C:09_xx+xx/D:xx+xx_xx/E:xx_xx!xx_xx-xx/F:5_5#0_xx@1_1|1_5/G:xx_xx%xx_xx_xx/H:xx_xx/I:1-5@1+1&1-1|1+5/J:xx_xx/K:1+1-5", + Label { + phoneme: Phoneme { + p2: Some("sil".to_string()), + p1: Some("k".to_string()), + c: Some("o".to_string()), + n1: Some("N".to_string()), + n2: Some("n".to_string()), + }, + mora: Some(Mora { + relative_accent_position: -4, + position_forward: 1, + position_backward: 5, + }), + word_prev: None, + word_curr: Some(Word { + pos: Some(9), + ctype: None, + cform: None, + }), + word_next: None, + accent_phrase_prev: None, + accent_phrase_curr: Some(AccentPhraseCurrent { + mora_count: 5, + accent_position: 5, + is_interrogative: false, + accent_phrase_position_forward: 1, + accent_phrase_position_backward: 1, + mora_position_forward: 1, + mora_position_backward: 5, + }), + accent_phrase_next: None, + breath_group_prev: None, + breath_group_curr: Some(BreathGroupCurrent { + accent_phrase_count: 1, + mora_count: 5, + breath_group_position_forward: 1, + breath_group_position_backward: 1, + accent_phrase_position_forward: 1, + accent_phrase_position_backward: 1, + mora_position_forward: 1, + mora_position_backward: 5, + }), + breath_group_next: None, + utterance: Utterance { + breath_group_count: 1, + accent_phrase_count: 1, + mora_count: 5, + }, + }, + ), + ( + "k^o-N+n=i/A:-3+2+4/B:xx-xx_xx/C:09_xx+xx/D:xx+xx_xx/E:xx_xx!xx_xx-xx/F:5_5#0_xx@1_1|1_5/G:xx_xx%xx_xx_xx/H:xx_xx/I:1-5@1+1&1-1|1+5/J:xx_xx/K:1+1-5", + Label { + phoneme: Phoneme { + p2: Some("k".to_string()), + p1: Some("o".to_string()), + c: Some("N".to_string()), + n1: Some("n".to_string()), + n2: Some("i".to_string()), + }, + mora: Some(Mora { + relative_accent_position: -3, + position_forward: 2, + position_backward: 4, + }), + word_prev: None, + word_curr: Some(Word { + pos: Some(9), + ctype: None, + cform: None, + }), + word_next: None, + accent_phrase_prev: None, + accent_phrase_curr: Some(AccentPhraseCurrent { + mora_count: 5, + accent_position: 5, + is_interrogative: false, + accent_phrase_position_forward: 1, + accent_phrase_position_backward: 1, + mora_position_forward: 1, + mora_position_backward: 5, + }), + accent_phrase_next: None, + breath_group_prev: None, + breath_group_curr: Some(BreathGroupCurrent { + accent_phrase_count: 1, + mora_count: 5, + breath_group_position_forward: 1, + breath_group_position_backward: 1, + accent_phrase_position_forward: 1, + accent_phrase_position_backward: 1, + mora_position_forward: 1, + mora_position_backward: 5, + }), + breath_group_next: None, + utterance: Utterance { + breath_group_count: 1, + accent_phrase_count: 1, + mora_count: 5, + }, + }, + ), + ( + "o^N-n+i=ch/A:-2+3+3/B:xx-xx_xx/C:09_xx+xx/D:xx+xx_xx/E:xx_xx!xx_xx-xx/F:5_5#0_xx@1_1|1_5/G:xx_xx%xx_xx_xx/H:xx_xx/I:1-5@1+1&1-1|1+5/J:xx_xx/K:1+1-5", + Label { + phoneme: Phoneme { + p2: Some("o".to_string()), + p1: Some("N".to_string()), + c: Some("n".to_string()), + n1: Some("i".to_string()), + n2: Some("ch".to_string()), + }, + mora: Some(Mora { + relative_accent_position: -2, + position_forward: 3, + position_backward: 3, + }), + word_prev: None, + word_curr: Some(Word { + pos: Some(9), + ctype: None, + cform: None, + }), + word_next: None, + accent_phrase_prev: None, + accent_phrase_curr: Some(AccentPhraseCurrent { + mora_count: 5, + accent_position: 5, + is_interrogative: false, + accent_phrase_position_forward: 1, + accent_phrase_position_backward: 1, + mora_position_forward: 1, + mora_position_backward: 5, + }), + accent_phrase_next: None, + breath_group_prev: None, + breath_group_curr: Some(BreathGroupCurrent { + accent_phrase_count: 1, + mora_count: 5, + breath_group_position_forward: 1, + breath_group_position_backward: 1, + accent_phrase_position_forward: 1, + accent_phrase_position_backward: 1, + mora_position_forward: 1, + mora_position_backward: 5, + }), + breath_group_next: None, + utterance: Utterance { + breath_group_count: 1, + accent_phrase_count: 1, + mora_count: 5, + }, + }, + ), + ( + "N^n-i+ch=i/A:-2+3+3/B:xx-xx_xx/C:09_xx+xx/D:xx+xx_xx/E:xx_xx!xx_xx-xx/F:5_5#0_xx@1_1|1_5/G:xx_xx%xx_xx_xx/H:xx_xx/I:1-5@1+1&1-1|1+5/J:xx_xx/K:1+1-5", + Label { + phoneme: Phoneme { + p2: Some("N".to_string()), + p1: Some("n".to_string()), + c: Some("i".to_string()), + n1: Some("ch".to_string()), + n2: Some("i".to_string()), + }, + mora: Some(Mora { + relative_accent_position: -2, + position_forward: 3, + position_backward: 3, + }), + word_prev: None, + word_curr: Some(Word { + pos: Some(9), + ctype: None, + cform: None, + }), + word_next: None, + accent_phrase_prev: None, + accent_phrase_curr: Some(AccentPhraseCurrent { + mora_count: 5, + accent_position: 5, + is_interrogative: false, + accent_phrase_position_forward: 1, + accent_phrase_position_backward: 1, + mora_position_forward: 1, + mora_position_backward: 5, + }), + accent_phrase_next: None, + breath_group_prev: None, + breath_group_curr: Some(BreathGroupCurrent { + accent_phrase_count: 1, + mora_count: 5, + breath_group_position_forward: 1, + breath_group_position_backward: 1, + accent_phrase_position_forward: 1, + accent_phrase_position_backward: 1, + mora_position_forward: 1, + mora_position_backward: 5, + }), + breath_group_next: None, + utterance: Utterance { + breath_group_count: 1, + accent_phrase_count: 1, + mora_count: 5, + }, + }, + ), + ( + "n^i-ch+i=w/A:-1+4+2/B:xx-xx_xx/C:09_xx+xx/D:xx+xx_xx/E:xx_xx!xx_xx-xx/F:5_5#0_xx@1_1|1_5/G:xx_xx%xx_xx_xx/H:xx_xx/I:1-5@1+1&1-1|1+5/J:xx_xx/K:1+1-5", + Label { + phoneme: Phoneme { + p2: Some("n".to_string()), + p1: Some("i".to_string()), + c: Some("ch".to_string()), + n1: Some("i".to_string()), + n2: Some("w".to_string()), + }, + mora: Some(Mora { + relative_accent_position: -1, + position_forward: 4, + position_backward: 2, + }), + word_prev: None, + word_curr: Some(Word { + pos: Some(9), + ctype: None, + cform: None, + }), + word_next: None, + accent_phrase_prev: None, + accent_phrase_curr: Some(AccentPhraseCurrent { + mora_count: 5, + accent_position: 5, + is_interrogative: false, + accent_phrase_position_forward: 1, + accent_phrase_position_backward: 1, + mora_position_forward: 1, + mora_position_backward: 5, + }), + accent_phrase_next: None, + breath_group_prev: None, + breath_group_curr: Some(BreathGroupCurrent { + accent_phrase_count: 1, + mora_count: 5, + breath_group_position_forward: 1, + breath_group_position_backward: 1, + accent_phrase_position_forward: 1, + accent_phrase_position_backward: 1, + mora_position_forward: 1, + mora_position_backward: 5, + }), + breath_group_next: None, + utterance: Utterance { + breath_group_count: 1, + accent_phrase_count: 1, + mora_count: 5, + }, + }, + ), + ( + "i^ch-i+w=a/A:-1+4+2/B:xx-xx_xx/C:09_xx+xx/D:xx+xx_xx/E:xx_xx!xx_xx-xx/F:5_5#0_xx@1_1|1_5/G:xx_xx%xx_xx_xx/H:xx_xx/I:1-5@1+1&1-1|1+5/J:xx_xx/K:1+1-5", + Label { + phoneme: Phoneme { + p2: Some("i".to_string()), + p1: Some("ch".to_string()), + c: Some("i".to_string()), + n1: Some("w".to_string()), + n2: Some("a".to_string()), + }, + mora: Some(Mora { + relative_accent_position: -1, + position_forward: 4, + position_backward: 2, + }), + word_prev: None, + word_curr: Some(Word { + pos: Some(9), + ctype: None, + cform: None, + }), + word_next: None, + accent_phrase_prev: None, + accent_phrase_curr: Some(AccentPhraseCurrent { + mora_count: 5, + accent_position: 5, + is_interrogative: false, + accent_phrase_position_forward: 1, + accent_phrase_position_backward: 1, + mora_position_forward: 1, + mora_position_backward: 5, + }), + accent_phrase_next: None, + breath_group_prev: None, + breath_group_curr: Some(BreathGroupCurrent { + accent_phrase_count: 1, + mora_count: 5, + breath_group_position_forward: 1, + breath_group_position_backward: 1, + accent_phrase_position_forward: 1, + accent_phrase_position_backward: 1, + mora_position_forward: 1, + mora_position_backward: 5, + }), + breath_group_next: None, + utterance: Utterance { + breath_group_count: 1, + accent_phrase_count: 1, + mora_count: 5, + }, + }, + ), + ( + "ch^i-w+a=sil/A:0+5+1/B:xx-xx_xx/C:09_xx+xx/D:xx+xx_xx/E:xx_xx!xx_xx-xx/F:5_5#0_xx@1_1|1_5/G:xx_xx%xx_xx_xx/H:xx_xx/I:1-5@1+1&1-1|1+5/J:xx_xx/K:1+1-5", + Label { + phoneme: Phoneme { + p2: Some("ch".to_string()), + p1: Some("i".to_string()), + c: Some("w".to_string()), + n1: Some("a".to_string()), + n2: Some("sil".to_string()), + }, + mora: Some(Mora { + relative_accent_position: 0, + position_forward: 5, + position_backward: 1, + }), + word_prev: None, + word_curr: Some(Word { + pos: Some(9), + ctype: None, + cform: None, + }), + word_next: None, + accent_phrase_prev: None, + accent_phrase_curr: Some(AccentPhraseCurrent { + mora_count: 5, + accent_position: 5, + is_interrogative: false, + accent_phrase_position_forward: 1, + accent_phrase_position_backward: 1, + mora_position_forward: 1, + mora_position_backward: 5, + }), + accent_phrase_next: None, + breath_group_prev: None, + breath_group_curr: Some(BreathGroupCurrent { + accent_phrase_count: 1, + mora_count: 5, + breath_group_position_forward: 1, + breath_group_position_backward: 1, + accent_phrase_position_forward: 1, + accent_phrase_position_backward: 1, + mora_position_forward: 1, + mora_position_backward: 5, + }), + breath_group_next: None, + utterance: Utterance { + breath_group_count: 1, + accent_phrase_count: 1, + mora_count: 5, + }, + }, + ), + ( + "i^w-a+sil=xx/A:0+5+1/B:xx-xx_xx/C:09_xx+xx/D:xx+xx_xx/E:xx_xx!xx_xx-xx/F:5_5#0_xx@1_1|1_5/G:xx_xx%xx_xx_xx/H:xx_xx/I:1-5@1+1&1-1|1+5/J:xx_xx/K:1+1-5", + Label { + phoneme: Phoneme { + p2: Some("i".to_string()), + p1: Some("w".to_string()), + c: Some("a".to_string()), + n1: Some("sil".to_string()), + n2: None, + }, + mora: Some(Mora { + relative_accent_position: 0, + position_forward: 5, + position_backward: 1, + }), + word_prev: None, + word_curr: Some(Word { + pos: Some(9), + ctype: None, + cform: None, + }), + word_next: None, + accent_phrase_prev: None, + accent_phrase_curr: Some(AccentPhraseCurrent { + mora_count: 5, + accent_position: 5, + is_interrogative: false, + accent_phrase_position_forward: 1, + accent_phrase_position_backward: 1, + mora_position_forward: 1, + mora_position_backward: 5, + }), + accent_phrase_next: None, + breath_group_prev: None, + breath_group_curr: Some(BreathGroupCurrent { + accent_phrase_count: 1, + mora_count: 5, + breath_group_position_forward: 1, + breath_group_position_backward: 1, + accent_phrase_position_forward: 1, + accent_phrase_position_backward: 1, + mora_position_forward: 1, + mora_position_backward: 5, + }), + breath_group_next: None, + utterance: Utterance { + breath_group_count: 1, + accent_phrase_count: 1, + mora_count: 5, + }, + }, + ), + ( + "w^a-sil+xx=xx/A:xx+xx+xx/B:xx-xx_xx/C:xx_xx+xx/D:xx+xx_xx/E:5_5!0_xx-xx/F:xx_xx#xx_xx@xx_xx|xx_xx/G:xx_xx%xx_xx_xx/H:1_5/I:xx-xx@xx+xx&xx-xx|xx+xx/J:xx_xx/K:1+1-5", + Label { + phoneme: Phoneme { + p2: Some("w".to_string()), + p1: Some("a".to_string()), + c: Some("sil".to_string()), + n1: None, + n2: None, + }, + mora: None, + word_prev: None, + word_curr: None, + word_next: None, + accent_phrase_prev: Some(AccentPhrasePrevNext { + mora_count: 5, + accent_position: 5, + is_interrogative: false, + is_pause_insertion: None, + }), + accent_phrase_curr: None, + accent_phrase_next: None, + breath_group_prev: Some(BreathGroupPrevNext { + accent_phrase_count: 1, + mora_count: 5, + }), + breath_group_curr: None, + breath_group_next: None, + utterance: Utterance { + breath_group_count: 1, + accent_phrase_count: 1, + mora_count: 5, + }, + }, + ), + ]; + + for (input, expected) in tests.iter() { + let actual = input.parse::