Skip to content

Commit

Permalink
Fix jlabel parser/serializer bugs (#25)
Browse files Browse the repository at this point in the history
* fix reversed logic of e5/g5

* remove colon

* fix word formatter

* add new test

* use warning class
  • Loading branch information
phenylshima authored Feb 6, 2024
1 parent a5ef2d6 commit 8ce3797
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 11 deletions.
7 changes: 7 additions & 0 deletions crates/jlabel/src/fullcontext_label.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,13 @@ pub struct AccentPhrasePrevNext {
/// E3/G3: whether the accent phrase interrogative or not
pub is_interrogative: bool,
/// E5/G5: whether pause insertion or not in between the accent phrase and the current accent phrase
///
/// <div class="warning">
///
/// The logic of this field is reversed from the E5/G5 of full-context label:
/// "1" is false and "0" is true.
///
/// </div>
pub is_pause_insertion: Option<bool>,
}

Expand Down
4 changes: 2 additions & 2 deletions crates/jlabel/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ impl<'a> LabelTokenizer<'a> {
mora_count: e1,
accent_position: e2,
is_interrogative: e3,
is_pause_insertion: e5,
is_pause_insertion: e5.map(|e5| !e5),
}))
} else {
Ok(None)
Expand Down Expand Up @@ -222,7 +222,7 @@ impl<'a> LabelTokenizer<'a> {
mora_count: g1,
accent_position: g2,
is_interrogative: g3,
is_pause_insertion: g5,
is_pause_insertion: g5.map(|g5| !g5),
}))
} else {
Ok(None)
Expand Down
23 changes: 15 additions & 8 deletions crates/jlabel/src/serializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ impl<'a, 'b> Serializer<'a, 'b> {
}
}

fn d01_or_xx<T: Display>(&mut self, value: &Option<T>) -> Result {
match value {
Some(v) => write!(self.f, "{:01}", v),
None => self.xx(),
}
}

fn d02_or_xx<T: Display>(&mut self, value: &Option<T>) -> Result {
match value {
Some(v) => write!(self.f, "{:02}", v),
Expand Down Expand Up @@ -95,9 +102,9 @@ impl<'a, 'b> Serializer<'a, 'b> {
if let Some(word_prev) = word_prev {
self.d02_or_xx(&word_prev.pos)?;
self.f.write_char('-')?;
self.d02_or_xx(&word_prev.ctype)?;
self.d01_or_xx(&word_prev.ctype)?;
self.f.write_char('_')?;
self.d02_or_xx(&word_prev.cform)?;
self.d01_or_xx(&word_prev.cform)?;
} else {
self.all_xx(&['-', '_'])?;
}
Expand All @@ -112,9 +119,9 @@ impl<'a, 'b> Serializer<'a, 'b> {
if let Some(word_curr) = word_curr {
self.d02_or_xx(&word_curr.pos)?;
self.f.write_char('_')?;
self.d02_or_xx(&word_curr.ctype)?;
self.d01_or_xx(&word_curr.ctype)?;
self.f.write_char('+')?;
self.d02_or_xx(&word_curr.cform)?;
self.d01_or_xx(&word_curr.cform)?;
} else {
self.all_xx(&['_', '+'])?;
}
Expand All @@ -129,9 +136,9 @@ impl<'a, 'b> Serializer<'a, 'b> {
if let Some(word_next) = word_next {
self.d02_or_xx(&word_next.pos)?;
self.f.write_char('+')?;
self.d02_or_xx(&word_next.ctype)?;
self.d01_or_xx(&word_next.ctype)?;
self.f.write_char('_')?;
self.d02_or_xx(&word_next.cform)?;
self.d01_or_xx(&word_next.cform)?;
} else {
self.all_xx(&['+', '_'])?;
}
Expand All @@ -152,7 +159,7 @@ impl<'a, 'b> Serializer<'a, 'b> {
self.f.write_char('_')?;
self.xx()?;
self.f.write_char('-')?;
self.bool_or_xx(&accent_phrase_prev.is_pause_insertion)?;
self.bool_or_xx(&accent_phrase_prev.is_pause_insertion.map(|value| !value))?;
} else {
self.all_xx(&['_', '!', '_', '-'])?;
}
Expand Down Expand Up @@ -204,7 +211,7 @@ impl<'a, 'b> Serializer<'a, 'b> {
self.f.write_char('_')?;
self.xx()?;
self.f.write_char('_')?;
self.bool_or_xx(&accent_phrase_next.is_pause_insertion)?;
self.bool_or_xx(&accent_phrase_next.is_pause_insertion.map(|value| !value))?;
} else {
self.all_xx(&['_', '%', '_', '_'])?;
}
Expand Down
73 changes: 72 additions & 1 deletion crates/jlabel/tests/fixtures.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ use jlabel::{
Mora, Phoneme, Utterance, Word,
};

pub fn fixtures() -> [(&'static str, Label); 11] {
pub fn fixtures() -> [(&'static str, Label); 12] {
[
// こんにちは
(
"xx^xx-sil+k=o/A:xx+xx+xx/B:xx-xx_xx/C:xx_xx+xx/D:xx+xx_xx/E:xx_xx!xx_xx-xx/F:xx_xx#xx_xx@xx_xx|xx_xx/G:5_5%0_xx_xx/H:xx_xx/I:xx-xx@xx+xx&xx-xx|xx+xx/J:1_5/K:1+1-5",
Label {
Expand Down Expand Up @@ -543,5 +544,75 @@ pub fn fixtures() -> [(&'static str, Label); 11] {
},
},
),
// 「なにを言っているのですか,それはスマホですよ.」
// (partial; 6th phoneme including the first sil)
(
"n^i-o+i=cl/A:2+3+1/B:04-xx_xx/C:13_xx+xx/D:20+1_1/E:xx_xx!xx_xx-xx/F:3_1#0_xx@1_4|1_12/G:3_3%0_xx_1/H:xx_xx/I:4-12@1+2&1-6|1+21/J:2_9/K:2+6-21",
Label {
phoneme: Phoneme {
p2: Some("n".to_string()),
p1: Some("i".to_string()),
c: Some("o".to_string()),
n1: Some("i".to_string()),
n2: Some("cl".to_string()),
},
mora: Some(Mora {
relative_accent_position: 2,
position_forward: 3,
position_backward: 1,
}),
word_prev: Some(Word {
pos: Some(4),
ctype: None,
cform: None,
}),
word_curr: Some(Word {
pos: Some(13),
ctype: None,
cform: None,
}),
word_next: Some(Word {
pos: Some(20),
ctype: Some(1),
cform: Some(1),
}),
accent_phrase_prev: None,
accent_phrase_curr: Some(AccentPhraseCurrent {
mora_count: 3,
accent_position: 1,
is_interrogative: false,
accent_phrase_position_forward: 1,
accent_phrase_position_backward: 4,
mora_position_forward: 1,
mora_position_backward: 12,
}),
accent_phrase_next: Some(AccentPhrasePrevNext {
mora_count: 3,
accent_position: 3,
is_interrogative: false,
is_pause_insertion: Some(false),
}),
breath_group_prev: None,
breath_group_curr: Some(BreathGroupCurrent {
accent_phrase_count: 4,
mora_count: 12,
breath_group_position_forward: 1,
breath_group_position_backward: 2,
accent_phrase_position_forward: 1,
accent_phrase_position_backward: 6,
mora_position_forward: 1,
mora_position_backward: 21,
}),
breath_group_next: Some(BreathGroupPrevNext {
accent_phrase_count: 2,
mora_count: 9,
}),
utterance: Utterance {
breath_group_count: 2,
accent_phrase_count: 6,
mora_count: 21,
},
},
)
]
}

0 comments on commit 8ce3797

Please sign in to comment.