Skip to content

Commit

Permalink
implement parser for label
Browse files Browse the repository at this point in the history
  • Loading branch information
cm-ayf committed Jan 15, 2024
1 parent ec4265d commit c951f19
Show file tree
Hide file tree
Showing 3 changed files with 381 additions and 0 deletions.
58 changes: 58 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ edition = "2021"
rust-version = "1.65.0"

[dependencies]
thiserror = "1.0.56"
322 changes: 322 additions & 0 deletions src/parser.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,322 @@
use std::{num::ParseIntError, str::FromStr};

use crate::fullcontext_label::{
AccentPhraseCurrent, AccentPhrasePrevNext, BreathGroupCurrent, BreathGroupPrevNext, Label,
Mora, Phoneme, Utterance, Word,
};

#[derive(Debug, thiserror::Error)]
pub enum ParseError {
#[error("Symbol not found: expected {0}")]
SymbolNotFound(&'static str),
#[error("Parse int error: {0}")]
ParseIntError(#[from] ParseIntError),
#[error("Parse bool error")]
ParseBoolError,
#[error("Not undefined")]
NotUndefined,
}

#[derive(Debug)]
struct LabelTokenizer<'a> {
input: &'a str,
index: usize,
}

impl<'a> LabelTokenizer<'a> {
fn new(input: &'a str) -> Self {

Check warning on line 27 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L27

Added line #L27 was not covered by tests
Self { input, index: 0 }
}

fn until(&mut self, symbol: &'static str) -> Result<&'a str, ParseError> {
match self.input[self.index..].find(symbol) {
Some(i) => {
let result = &self.input[self.index..(self.index + i)];
self.index += i + symbol.len();
Ok(result)

Check warning on line 36 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L31-L36

Added lines #L31 - L36 were not covered by tests
}
None => Err(ParseError::SymbolNotFound(symbol)),

Check warning on line 38 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L38

Added line #L38 was not covered by tests
}
}

fn parse_or_xx<T: FromStr>(input: &'a str) -> Result<Option<T>, T::Err> {
if input == "xx" {
Ok(None)

Check warning on line 44 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L42-L44

Added lines #L42 - L44 were not covered by tests
} else {
input.parse().map(Some)

Check warning on line 46 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L46

Added line #L46 was not covered by tests
}
}

fn parse_bool_or_xx(input: &'a str) -> Result<Option<bool>, ParseError> {
match input {
"xx" => Ok(None),
"0" => Ok(Some(false)),
"1" => Ok(Some(true)),
_ => Err(ParseError::ParseBoolError),

Check warning on line 55 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L50-L55

Added lines #L50 - L55 were not covered by tests
}
}

fn assert_xx(input: &'a str) -> Result<(), ParseError> {
if input == "xx" {
Ok(())

Check warning on line 61 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L59-L61

Added lines #L59 - L61 were not covered by tests
} else {
Err(ParseError::NotUndefined)

Check warning on line 63 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L63

Added line #L63 was not covered by tests
}
}

/// `p1ˆp2-p3+p4=p5`
fn p(&mut self) -> Result<Phoneme, ParseError> {
let p1 = self.until("^")?;
let p2 = self.until("-")?;
let p3 = self.until("+")?;
let p4 = self.until("=")?;
let p5 = self.until("/A:")?;

Check warning on line 73 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L68-L73

Added lines #L68 - L73 were not covered by tests
// Ok(Phoneme {
// p2: Self::parse_or_xx(p1)?,
// p1: Self::parse_or_xx(p2)?,
// c: Self::parse_or_xx(p3)?.ok_or(ParseError::ShouldBeUndefined)?,
// n1: Self::parse_or_xx(p4)?,
// n2: Self::parse_or_xx(p5)?,
// })
Ok(Phoneme {
p2: p1.to_string(),
p1: p2.to_string(),
c: p3.to_string(),
n1: p4.to_string(),
n2: p5.to_string(),

Check warning on line 86 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L81-L86

Added lines #L81 - L86 were not covered by tests
})
}

/// `/A:a1+a2+a3`
fn a(&mut self) -> Result<Option<Mora>, ParseError> {
let a1 = Self::parse_or_xx(self.until("+")?)?;
let a2 = Self::parse_or_xx(self.until("+")?)?;
let a3 = Self::parse_or_xx(self.until("/B:")?)?;

Check warning on line 94 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L91-L94

Added lines #L91 - L94 were not covered by tests

if let (Some(a1), Some(a2), Some(a3)) = (a1, a2, a3) {
Ok(Some(Mora {
relative_accent_position: a1,
position_forward: a2,
position_backward: a3,

Check warning on line 100 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L96-L100

Added lines #L96 - L100 were not covered by tests
}))
} else {
Ok(None)

Check warning on line 103 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L103

Added line #L103 was not covered by tests
}
}

/// `/B:b1-b2_b3`
fn b(&mut self) -> Result<Option<Word>, ParseError> {
let b1 = Self::parse_or_xx(self.until("-")?)?;
let b2 = Self::parse_or_xx(self.until("_")?)?;
let b3 = Self::parse_or_xx(self.until("/C:")?)?;

Check warning on line 111 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L108-L111

Added lines #L108 - L111 were not covered by tests

if [b1, b2, b3].iter().all(Option::is_none) {
Ok(None)

Check warning on line 114 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L113-L114

Added lines #L113 - L114 were not covered by tests
} else {
Ok(Some(Word {
pos: b1,
ctype: b2,
cform: b3,

Check warning on line 119 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L116-L119

Added lines #L116 - L119 were not covered by tests
}))
}
}

/// `/C:c1_c2+c3`
fn c(&mut self) -> Result<Option<Word>, ParseError> {
let c1 = Self::parse_or_xx(self.until("_")?)?;
let c2 = Self::parse_or_xx(self.until("+")?)?;
let c3 = Self::parse_or_xx(self.until("/D:")?)?;

Check warning on line 128 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L125-L128

Added lines #L125 - L128 were not covered by tests

if [c1, c2, c3].iter().all(Option::is_none) {
Ok(None)

Check warning on line 131 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L130-L131

Added lines #L130 - L131 were not covered by tests
} else {
Ok(Some(Word {
pos: c1,
ctype: c2,
cform: c3,

Check warning on line 136 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L133-L136

Added lines #L133 - L136 were not covered by tests
}))
}
}

/// `/D:d1+d2_d3`
fn d(&mut self) -> Result<Option<Word>, ParseError> {
let d1 = Self::parse_or_xx(self.until("+")?)?;
let d2 = Self::parse_or_xx(self.until("_")?)?;
let d3 = Self::parse_or_xx(self.until("/E:")?)?;

Check warning on line 145 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L142-L145

Added lines #L142 - L145 were not covered by tests

if [d1, d2, d3].iter().all(Option::is_none) {
Ok(None)

Check warning on line 148 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L147-L148

Added lines #L147 - L148 were not covered by tests
} else {
Ok(Some(Word {
pos: d1,
ctype: d2,
cform: d3,

Check warning on line 153 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L150-L153

Added lines #L150 - L153 were not covered by tests
}))
}
}

/// `/E:e1_e2!e3_e4-e5`
fn e(&mut self) -> Result<Option<AccentPhrasePrevNext>, ParseError> {
let e1 = Self::parse_or_xx(self.until("_")?)?;
let e2 = Self::parse_or_xx(self.until("!")?)?;
let e3 = Self::parse_bool_or_xx(self.until("_")?)?;
Self::assert_xx(self.until("-")?)?;
let e5 = Self::parse_bool_or_xx(self.until("/F:")?)?;

Check warning on line 164 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L159-L164

Added lines #L159 - L164 were not covered by tests

if let (Some(e1), Some(e2), Some(e3)) = (e1, e2, e3) {
Ok(Some(AccentPhrasePrevNext {
mora_count: e1,
accent_position: e2,
is_interrogative: e3,
is_pause_insertion: e5,

Check warning on line 171 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L166-L171

Added lines #L166 - L171 were not covered by tests
}))
} else {
Ok(None)

Check warning on line 174 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L174

Added line #L174 was not covered by tests
}
}

/// `/F:f1_f2#_f3_f4@_f5_f6|f7_f8`
fn f(&mut self) -> Result<Option<AccentPhraseCurrent>, ParseError> {
let f1 = Self::parse_or_xx(self.until("_")?)?;
let f2 = Self::parse_or_xx(self.until("#")?)?;
let f3 = Self::parse_bool_or_xx(self.until("_")?)?;
Self::assert_xx(self.until("@")?)?;
let f5 = Self::parse_or_xx(self.until("_")?)?;
let f6 = Self::parse_or_xx(self.until("|")?)?;
let f7 = Self::parse_or_xx(self.until("_")?)?;
let f8 = Self::parse_or_xx(self.until("/G:")?)?;

Check warning on line 187 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L179-L187

Added lines #L179 - L187 were not covered by tests

if let (Some(f1), Some(f2), Some(f3), Some(f5), Some(f6), Some(f7), Some(f8)) =
(f1, f2, f3, f5, f6, f7, f8)

Check warning on line 190 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L189-L190

Added lines #L189 - L190 were not covered by tests
{
Ok(Some(AccentPhraseCurrent {
mora_count: f1,
accent_position: f2,
is_interrogative: f3,
accent_phrase_position_forward: f5,
accent_phrase_position_backward: f6,
mora_position_forward: f7,
mora_position_backward: f8,

Check warning on line 199 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L192-L199

Added lines #L192 - L199 were not covered by tests
}))
} else {
Ok(None)

Check warning on line 202 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L202

Added line #L202 was not covered by tests
}
}

/// `/G:g1_g2%g3_g4_g5`
fn g(&mut self) -> Result<Option<AccentPhrasePrevNext>, ParseError> {
let g1 = Self::parse_or_xx(self.until("_")?)?;
let g2 = Self::parse_or_xx(self.until("%")?)?;
let g3 = Self::parse_bool_or_xx(self.until("_")?)?;
Self::assert_xx(self.until("_")?)?;
let g5 = Self::parse_bool_or_xx(self.until("/H:")?)?;

Check warning on line 212 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L207-L212

Added lines #L207 - L212 were not covered by tests

if let (Some(g1), Some(g2), Some(g3)) = (g1, g2, g3) {
Ok(Some(AccentPhrasePrevNext {
mora_count: g1,
accent_position: g2,
is_interrogative: g3,
is_pause_insertion: g5,

Check warning on line 219 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L214-L219

Added lines #L214 - L219 were not covered by tests
}))
} else {
Ok(None)

Check warning on line 222 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L222

Added line #L222 was not covered by tests
}
}

/// `/H:h1_h2`
fn h(&mut self) -> Result<Option<BreathGroupPrevNext>, ParseError> {
let h1 = Self::parse_or_xx(self.until("_")?)?;
let h2 = Self::parse_or_xx(self.until("/I:")?)?;

Check warning on line 229 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L227-L229

Added lines #L227 - L229 were not covered by tests

if let (Some(h1), Some(h2)) = (h1, h2) {
Ok(Some(BreathGroupPrevNext {
accent_phrase_count: h1,
mora_count: h2,

Check warning on line 234 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L231-L234

Added lines #L231 - L234 were not covered by tests
}))
} else {
Ok(None)

Check warning on line 237 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L237

Added line #L237 was not covered by tests
}
}

/// `/I:i1-i2@i3+i4&i5-i6|i7+i8`
fn i(&mut self) -> Result<Option<BreathGroupCurrent>, ParseError> {
let i1 = Self::parse_or_xx(self.until("-")?)?;
let i2 = Self::parse_or_xx(self.until("@")?)?;
let i3 = Self::parse_or_xx(self.until("+")?)?;
let i4 = Self::parse_or_xx(self.until("&")?)?;
let i5 = Self::parse_or_xx(self.until("-")?)?;
let i6 = Self::parse_or_xx(self.until("|")?)?;
let i7 = Self::parse_or_xx(self.until("+")?)?;
let i8 = Self::parse_or_xx(self.until("/J:")?)?;

Check warning on line 250 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L242-L250

Added lines #L242 - L250 were not covered by tests

if let (Some(i1), Some(i2), Some(i3), Some(i4), Some(i5), Some(i6), Some(i7), Some(i8)) =
(i1, i2, i3, i4, i5, i6, i7, i8)

Check warning on line 253 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L252-L253

Added lines #L252 - L253 were not covered by tests
{
Ok(Some(BreathGroupCurrent {
accent_phrase_count: i1,
mora_count: i2,
breath_group_position_forward: i3,
breath_group_position_backward: i4,
accent_phrase_position_forward: i5,
accent_phrase_position_backward: i6,
mora_position_forward: i7,
mora_position_backward: i8,

Check warning on line 263 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L255-L263

Added lines #L255 - L263 were not covered by tests
}))
} else {
Ok(None)

Check warning on line 266 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L266

Added line #L266 was not covered by tests
}
}

/// `/J:j1_j2`
fn j(&mut self) -> Result<Option<BreathGroupPrevNext>, ParseError> {
let j1 = Self::parse_or_xx(self.until("_")?)?;
let j2 = Self::parse_or_xx(self.until("/K:")?)?;

Check warning on line 273 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L271-L273

Added lines #L271 - L273 were not covered by tests

if let (Some(j1), Some(j2)) = (j1, j2) {
Ok(Some(BreathGroupPrevNext {
accent_phrase_count: j1,
mora_count: j2,

Check warning on line 278 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L275-L278

Added lines #L275 - L278 were not covered by tests
}))
} else {
Ok(None)

Check warning on line 281 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L281

Added line #L281 was not covered by tests
}
}

/// `/K:k1+k2-k3`
fn k(&mut self) -> Result<Utterance, ParseError> {
let k1 = self.until("+")?.parse()?;
let k2 = self.until("-")?.parse()?;
let k3 = self.input[self.index..].parse()?;

Check warning on line 289 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L286-L289

Added lines #L286 - L289 were not covered by tests

Ok(Utterance {
breath_group_count: k1,
accent_phrase_count: k2,
mora_count: k3,

Check warning on line 294 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L291-L294

Added lines #L291 - L294 were not covered by tests
})
}

fn consume(mut self) -> Result<Label, ParseError> {
Ok(Label {
phoneme: self.p()?,
mora: self.a()?,
word_prev: self.b()?,
word_curr: self.c()?,
word_next: self.d()?,
accent_phrase_prev: self.e()?,
accent_phrase_curr: self.f()?,
accent_phrase_next: self.g()?,
breath_group_prev: self.h()?,
breath_group_curr: self.i()?,
breath_group_next: self.j()?,
utterance: self.k()?,

Check warning on line 311 in src/parser.rs

View check run for this annotation

Codecov / codecov/patch

src/parser.rs#L298-L311

Added lines #L298 - L311 were not covered by tests
})
}
}

impl FromStr for Label {
type Err = ParseError;

fn from_str(s: &str) -> Result<Self, Self::Err> {
LabelTokenizer::new(s).consume()
}
}

0 comments on commit c951f19

Please sign in to comment.