diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ea12f9a..ec4e423 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -16,6 +16,8 @@ jobs: - uses: actions-rs/toolchain@v1 with: toolchain: stable + - name: Lint + run: cargo fmt --check - name: Build run: cargo build --verbose - name: Test diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..d75e615 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "editor.tabSize": 4, + "files.insertFinalNewline": true, + "files.trimFinalNewlines": true, + "editor.insertSpaces": true +} diff --git a/Makefile b/Makefile index 946c59c..10c1a0c 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,6 @@ +lint: + cargo clippy + release-patch: cargo release patch --no-publish --execute @@ -5,4 +8,4 @@ release-minor: cargo release minor --no-publish --execute release-major: - cargo release major --no-publish --execute \ No newline at end of file + cargo release major --no-publish --execute diff --git a/src/ast.rs b/src/ast.rs index 6738737..7e237ed 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -1,128 +1,127 @@ use std::ops::Range; -use serde::{Serialize, Deserialize}; +use serde::{Deserialize, Serialize}; pub type Span = Range; pub type Spanned = (Span, T); #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub enum Stmt { - SoundChange { - source: Spanned, - target: Spanned, - environment: Option>, - description: Option>, - }, - Import { - path: Vec>, - absolute: bool, - names: Vec>, - }, - Language { - id: Spanned, - parent: Option>, - name: Option>, - }, - Word { - gloss: Spanned, - pronunciation: Spanned>, - definitions: Vec, - }, - Class { - label: Spanned, - encodes: Vec>, - annotates: Vec>, - phonemes: Vec>, - }, - Series { - label: Spanned, - series: Spanned, - }, - Trait { - label: Spanned, - members: Vec>, - }, - Milestone { - time: Option>, - language: Option>, - } + SoundChange { + source: Spanned, + target: Spanned, + environment: Option>, + description: Option>, + }, + Import { + path: Vec>, + absolute: bool, + names: Vec>, + }, + Language { + id: Spanned, + parent: Option>, + name: Option>, + }, + Word { + gloss: Spanned, + pronunciation: Spanned>, + definitions: Vec, + }, + Class { + label: Spanned, + encodes: Vec>, + annotates: Vec>, + phonemes: Vec>, + }, + Series { + label: Spanned, + series: Spanned, + }, + Trait { + label: Spanned, + members: Vec>, + }, + Milestone { + time: Option>, + language: Option>, + }, } #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub enum Source { - Pattern(Pattern), - Empty, + Pattern(Pattern), + Empty, } #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub enum Target { - Modification(Vec>), - Pattern(Pattern), - Empty, + Modification(Vec>), + Pattern(Pattern), + Empty, } #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub enum Feature { - Positive(String), - Negative(String), + Positive(String), + Negative(String), } pub type Pattern = Vec; #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub enum Segment { - Category(Category), - Phonemes(String), + Category(Category), + Phonemes(String), } #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct Category { - pub base_class: Option>, - pub features: Vec>, + pub base_class: Option>, + pub features: Vec>, } #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct Environment { - pub before: Option, - pub after: Option, + pub before: Option, + pub after: Option, } pub type EnvPattern = Vec; #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub enum EnvElement { - Segment(Segment), - SyllableBoundary, - WordBoundary, + Segment(Segment), + SyllableBoundary, + WordBoundary, } #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct Definition { - pub pos: Option>, - pub definition: Spanned, + pub pos: Option>, + pub definition: Spanned, } #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub enum Series { - Category(Category), - List(Vec>), + Category(Category), + List(Vec>), } #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct PhonemeDef { - pub label: Spanned, - pub traits: Vec>, + pub label: Spanned, + pub traits: Vec>, } #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct TraitMember { - pub labels: Vec>, - pub notation: Option>, - pub default: bool, + pub labels: Vec>, + pub notation: Option>, + pub default: bool, } #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub enum Time { - Instant(i64), - Range(i64, i64), + Instant(i64), + Range(i64, i64), } - diff --git a/src/lib.rs b/src/lib.rs index 3b66481..da38500 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,57 +5,56 @@ pub mod ast; #[cfg(test)] mod test { - use super::*; + use super::*; - use crate::ast::{Stmt, Spanned}; + use crate::ast::{Spanned, Stmt}; - use ariadne::{ - Report, - Label, - Source as SourceCode, - ReportKind, Color, - }; - - use chumsky::{error::SimpleReason, prelude::Simple}; + use ariadne::{Color, Label, Report, ReportKind, Source as SourceCode}; - fn display_errs(src: &str, errs: &Vec>) { - let start = errs.iter() - .map(|err| err.span()) - .fold(src.len(), |min, cur| if cur.start < min { cur.start } else { min }); + use chumsky::{error::SimpleReason, prelude::Simple}; - Report::build(ReportKind::Error, (), start) - .with_labels( - errs.iter() - .map(|err| { - Label::new(err.span()) - .with_message(match err.reason() { - SimpleReason::Unexpected => err.to_string(), - SimpleReason::Unclosed { span: _, delimiter } => format!("Unmatched delimited {}", delimiter), - SimpleReason::Custom(msg) => msg.clone(), - }) - .with_color(Color::Red) - }) - ) - .finish() - .eprint(SourceCode::from(src.clone())) - .unwrap(); - } + fn display_errs(src: &str, errs: &Vec>) { + let start = errs + .iter() + .map(|err| err.span()) + .fold( + src.len(), + |min, cur| if cur.start < min { cur.start } else { min }, + ); - fn _parse(src: &str) -> Result>, Vec>> { - let res = parse(src); + Report::build(ReportKind::Error, (), start) + .with_labels(errs.iter().map(|err| { + Label::new(err.span()) + .with_message(match err.reason() { + SimpleReason::Unexpected => err.to_string(), + SimpleReason::Unclosed { span: _, delimiter } => { + format!("Unmatched delimited {}", delimiter) + } + SimpleReason::Custom(msg) => msg.clone(), + }) + .with_color(Color::Red) + })) + .finish() + .eprint(SourceCode::from(src.clone())) + .unwrap(); + } + + fn _parse(src: &str) -> Result>, Vec>> { + let res = parse(src); - match res { - Ok(ast) => Ok(ast), - Err(errs) => { - display_errs(&src, &errs); - Err(errs) - }, + match res { + Ok(ast) => Ok(ast), + Err(errs) => { + display_errs(&src, &errs); + Err(errs) + } + } } - } - #[test] - fn it_works() { - let res = _parse(" + #[test] + fn it_works() { + let res = _parse( + " import * from @core/ipa series F = { i, e, ε, æ } @@ -79,10 +78,11 @@ mod test { @ 1940, AmEng $ [C+alveolar+stop] > [+flap] / V_V : Alveolar stops lenite to flaps intervocallically - "); + ", + ); - println!("{:#?}", res); + println!("{:#?}", res); - assert!(res.is_ok()) - } + assert!(res.is_ok()) + } } diff --git a/src/parser.rs b/src/parser.rs index e5a42c5..59724ff 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,40 +1,42 @@ -use chumsky::{prelude::*, text::{newline, whitespace}}; +use chumsky::{ + prelude::*, + text::{newline, whitespace}, +}; -use crate::ast::{Stmt, Spanned}; +use crate::ast::{Spanned, Stmt}; mod class_definition; -mod series_definition; mod common; mod import; mod lang_definition; +mod milestone; +mod series_definition; mod sound_change; mod trait_definition; mod word_definition; -mod milestone; fn stmt() -> impl Parser, Error = Simple> { - choice([ - sound_change::parser().boxed(), - import::parser().boxed(), - lang_definition::parser().boxed(), - word_definition::parser().boxed(), - trait_definition::parser().boxed(), - class_definition::parser().boxed(), - series_definition::parser().boxed(), - milestone::parser().boxed(), - ]) + choice([ + sound_change::parser().boxed(), + import::parser().boxed(), + lang_definition::parser().boxed(), + word_definition::parser().boxed(), + trait_definition::parser().boxed(), + class_definition::parser().boxed(), + series_definition::parser().boxed(), + milestone::parser().boxed(), + ]) .map_with_span(|stmt, span| (span, stmt)) .then_ignore(newline().repeated().at_least(1).ignored().or(end())) .then_ignore(whitespace()) } fn root() -> impl Parser>, Error = Simple> { - stmt() - .repeated() - .padded() - .then_ignore(end()) + stmt().repeated().padded().then_ignore(end()) } -pub fn parse(source: &str) -> Result, Stmt)>, Vec>> { - root().parse(source) +pub fn parse( + source: &str, +) -> Result, Stmt)>, Vec>> { + root().parse(source) } diff --git a/src/parser/class_definition.rs b/src/parser/class_definition.rs index cd321c8..f7cf8a2 100644 --- a/src/parser/class_definition.rs +++ b/src/parser/class_definition.rs @@ -1,98 +1,126 @@ -use chumsky::{prelude::*, text::{ident, whitespace}}; +use crate::ast::{PhonemeDef, Stmt}; use crate::parser::common::*; -use crate::ast::{ - Stmt, - PhonemeDef, +use chumsky::{ + prelude::*, + text::{ident, whitespace}, }; pub fn parser() -> impl Parser> { - let label = class() - .map_with_span(|c, span| (span, c.to_string())); + let label = class().map_with_span(|c, span| (span, c.to_string())); - let start = just("class") - .padded(); + let start = just("class").padded(); - let encodes = just("encodes") - .padded() - .ignore_then( - ident() - .map_with_span(|id, span| (span, id)) - .separated_by(whitespace()) - .allow_leading() - .allow_trailing() - .delimited_by(just("("), just(")")) + let encodes = just("encodes").padded().ignore_then( + ident() + .map_with_span(|id, span| (span, id)) + .separated_by(whitespace()) + .allow_leading() + .allow_trailing() + .delimited_by(just("("), just(")")), ); - - let annotates = just("annotates") - .padded() - .ignore_then( - ident() - .map_with_span(|id, span| (span, id)) - .separated_by(whitespace()) - .allow_leading() - .allow_trailing() - .delimited_by(just("("), just(")")) - ) - .or_not() - .map(|a| match a { - Some(v) => v, - None => vec![], - }); - let phoneme_definition = word_chars() - .map_with_span(|phoneme, span| (span, phoneme)) - .then_ignore(just("=").padded()) - .then( - ident() - .map_with_span(|traits, span| (span, traits)) - .separated_by(inline_whitespace()) - .allow_leading() + let annotates = just("annotates") + .padded() + .ignore_then( + ident() + .map_with_span(|id, span| (span, id)) + .separated_by(whitespace()) + .allow_leading() + .allow_trailing() + .delimited_by(just("("), just(")")), + ) + .or_not() + .map(|a| match a { + Some(v) => v, + None => vec![], + }); + + let phoneme_definition = word_chars() + .map_with_span(|phoneme, span| (span, phoneme)) + .then_ignore(just("=").padded()) + .then( + ident() + .map_with_span(|traits, span| (span, traits)) + .separated_by(inline_whitespace()) + .allow_leading() + .allow_trailing(), + ) + .map(|(label, traits)| PhonemeDef { label, traits }); + + let body = phoneme_definition + .map_with_span(|ph, span| (span, ph)) + .separated_by(just(",").padded()) .allow_trailing() - ) - .map(|(label, traits)| PhonemeDef { label, traits }); - - let body = phoneme_definition - .map_with_span(|ph, span| (span, ph)) - .separated_by(just(",").padded()) - .allow_trailing() - .at_least(1) - .then_ignore(whitespace()) - .delimited_by(just("{").padded(), just("}")); + .at_least(1) + .then_ignore(whitespace()) + .delimited_by(just("{").padded(), just("}")); - start - .ignore_then(label) - .then(encodes) - .then(annotates) - .then(body) - .map(|(((label, encodes), annotates), phonemes)| Stmt::Class { label, encodes, annotates, phonemes }) + start + .ignore_then(label) + .then(encodes) + .then(annotates) + .then(body) + .map(|(((label, encodes), annotates), phonemes)| Stmt::Class { + label, + encodes, + annotates, + phonemes, + }) } #[cfg(test)] mod test { - use super::*; + use super::*; - #[test] - fn it_parses_a_full_class_definition() { - assert_eq!( - parser().parse( - "class C encodes (place manner) { + #[test] + fn it_parses_a_full_class_definition() { + assert_eq!( + parser().parse( + "class C encodes (place manner) { p = bilabial plosive, t = alveolar plosive, k = velar plosive, t͡s = alveolar affricate, }" - ), - Ok(Stmt::Class { - label: (6..7, "C".into()), - encodes: vec![(17..22, "place".into()), (23..29, "manner".into())], - annotates: vec![], - phonemes: vec![ - (43..63, PhonemeDef { label: (43..44, "p".into()), traits: vec![(47..55, "bilabial".into()), (56..63, "plosive".into())] }), - (75..95, PhonemeDef { label: (75..76, "t".into()), traits: vec![(79..87, "alveolar".into()), (88..95, "plosive".into())] }), - (107..124, PhonemeDef { label: (107..108, "k".into()), traits: vec![(111..116, "velar".into()), (117..124, "plosive".into())] }), - (136..160, PhonemeDef { label: (136..139, "t͡s".into()), traits: vec![(142..150, "alveolar".into()), (151..160, "affricate".into())] }), - ] - }) - ) - } + ), + Ok(Stmt::Class { + label: (6..7, "C".into()), + encodes: vec![(17..22, "place".into()), (23..29, "manner".into())], + annotates: vec![], + phonemes: vec![ + ( + 43..63, + PhonemeDef { + label: (43..44, "p".into()), + traits: vec![(47..55, "bilabial".into()), (56..63, "plosive".into())] + } + ), + ( + 75..95, + PhonemeDef { + label: (75..76, "t".into()), + traits: vec![(79..87, "alveolar".into()), (88..95, "plosive".into())] + } + ), + ( + 107..124, + PhonemeDef { + label: (107..108, "k".into()), + traits: vec![(111..116, "velar".into()), (117..124, "plosive".into())] + } + ), + ( + 136..160, + PhonemeDef { + label: (136..139, "t͡s".into()), + traits: vec![ + (142..150, "alveolar".into()), + (151..160, "affricate".into()) + ] + } + ), + ] + }) + ) + } } diff --git a/src/parser/common.rs b/src/parser/common.rs index 87e0f09..3920f43 100644 --- a/src/parser/common.rs +++ b/src/parser/common.rs @@ -1,79 +1,80 @@ -use chumsky::{prelude::*, text::{Character, ident} }; -use crate::ast::{ - Feature, - Category, +use crate::ast::{Category, Feature}; +use chumsky::{ + prelude::*, + text::{ident, Character}, }; pub fn inline_whitespace() -> impl Parser> { - filter(|c: &char| c.is_inline_whitespace()) - .repeated() - .ignored() + filter(|c: &char| c.is_inline_whitespace()) + .repeated() + .ignored() } pub fn word_char() -> impl Parser> { - filter(|c: &char| !"/.,[]{}()>+-_#".contains(*c) && !c.is_whitespace()) + filter(|c: &char| !"/.,[]{}()>+-_#".contains(*c) && !c.is_whitespace()) } pub fn word_chars() -> impl Parser> { - word_char() - .repeated() - .at_least(1) - .map(|cs| cs.iter().collect()) + word_char() + .repeated() + .at_least(1) + .map(|cs| cs.iter().collect()) } pub fn class() -> impl Parser> { - filter(|c: &char| c.is_ascii_uppercase()) + filter(|c: &char| c.is_ascii_uppercase()) } pub fn description() -> impl Parser> { - filter(|c: &char| !"\r\n{}".contains(*c)) - .repeated() - .at_least(1) - .map(|cs| cs.iter().collect()) + filter(|c: &char| !"\r\n{}".contains(*c)) + .repeated() + .at_least(1) + .map(|cs| cs.iter().collect()) } pub fn syllable() -> impl Parser> { - word_char() - .repeated() - .at_least(1) - .map(|cs| cs.iter().collect()) + word_char() + .repeated() + .at_least(1) + .map(|cs| cs.iter().collect()) } pub fn feature() -> impl Parser> { - let sign = filter(|c: &char| "+-".contains(*c)); - let ident = ident(); - - sign - .then(ident) - .padded() - .map(|(s, i)| match s { - '+' => Feature::Positive(i), - _ => Feature::Negative(i), + let sign = filter(|c: &char| "+-".contains(*c)); + let ident = ident(); + + sign.then(ident).padded().map(|(s, i)| match s { + '+' => Feature::Positive(i), + _ => Feature::Negative(i), }) } pub fn category() -> impl Parser> { - class() - .map_with_span(|class, span| (span, class)) - .padded() - .or_not() - .then( - feature() - .map_with_span(|feat, span| (span, feat)) - .repeated() - .at_least(1) - ) - .delimited_by(just("["), just("]")) - .map(|(base_class, features)| Category { base_class, features }) + class() + .map_with_span(|class, span| (span, class)) + .padded() + .or_not() + .then( + feature() + .map_with_span(|feat, span| (span, feat)) + .repeated() + .at_least(1), + ) + .delimited_by(just("["), just("]")) + .map(|(base_class, features)| Category { + base_class, + features, + }) } pub fn integer() -> impl Parser> { - filter(|c: &char| c.is_numeric()) - .repeated() - .at_least(1) - .try_map(|cs, span| cs - .iter() - .collect::() - .parse::() - .map_err(|e| Simple::custom(span, format!("{}", e)))) + filter(|c: &char| c.is_numeric()) + .repeated() + .at_least(1) + .try_map(|cs, span| { + cs.iter() + .collect::() + .parse::() + .map_err(|e| Simple::custom(span, format!("{}", e))) + }) } diff --git a/src/parser/import.rs b/src/parser/import.rs index 3b7d2f4..fff7eb0 100644 --- a/src/parser/import.rs +++ b/src/parser/import.rs @@ -3,126 +3,117 @@ use chumsky::{prelude::*, text::ident}; use crate::ast::Stmt; pub fn parser() -> impl Parser> { + let name = just("@") + .then(ident::>()) + .map(|(at, id)| at.to_owned() + &id); - let name = just("@") - .then(ident::>()) - .map(|(at, id)| at.to_owned() + &id); + let segment = name + .or(ident()) + .or(just(".").map(|s| s.to_string())) + .or(just("..").map(|s| s.to_string())); - let segment = name - .or(ident()) - .or(just(".").map(|s| s.to_string())) - .or(just("..").map(|s| s.to_string())); + let path = just("/") + .or_not() + .then( + segment + .map_with_span(|seg, span| (span, seg)) + .separated_by(just("/")) + .at_least(1), + ) + .map(|(slash, segs)| (slash.is_some(), segs)); - let path = just("/") - .or_not() - .then( - segment - .map_with_span(|seg, span| (span, seg)) - .separated_by(just("/")) - .at_least(1) - ) - .map(|(slash, segs)| (slash.is_some(), segs)); + let names = ident() + .map_with_span(|name, span| (span, name)) + .separated_by(just(",").padded()) + .allow_trailing() + .delimited_by(just("(").padded(), just(")").padded()) + .or(just("*").map_with_span(|star, span| vec![(span, star.to_string())])); - let names = ident() - .map_with_span(|name, span| (span, name)) - .separated_by(just(",").padded()) - .allow_trailing() - .delimited_by(just("(").padded(), just(")").padded()) - .or(just("*").map_with_span(|star, span| vec![(span, star.to_string())])); - - just("import") - .padded() - .ignore_then(names) - .then_ignore(just("from").padded()) - .then(path) - .map(|(names, (absolute, path))| Stmt::Import { path, absolute, names }) + just("import") + .padded() + .ignore_then(names) + .then_ignore(just("from").padded()) + .then(path) + .map(|(names, (absolute, path))| Stmt::Import { + path, + absolute, + names, + }) } #[cfg(test)] mod test { - use super::*; + use super::*; - #[test] - fn it_parses_a_relative_star_import() { - let src = "import * from ./my/phonology"; - assert_eq!( - parser().parse(src.to_string()), - Ok(Stmt::Import { - path: vec![ - (14..15, ".".into()), - (16..18, "my".into()), - (19..28, "phonology".into()), - ], - absolute: false, - names: vec![(7..8, "*".into())] - }) - ) - } + #[test] + fn it_parses_a_relative_star_import() { + let src = "import * from ./my/phonology"; + assert_eq!( + parser().parse(src.to_string()), + Ok(Stmt::Import { + path: vec![ + (14..15, ".".into()), + (16..18, "my".into()), + (19..28, "phonology".into()), + ], + absolute: false, + names: vec![(7..8, "*".into())] + }) + ) + } - #[test] - fn it_parses_an_absolute_star_import() { - let src = "import * from /my/phonology"; - assert_eq!( - parser().parse(src.to_string()), - Ok(Stmt::Import { - path: vec![ - (15..17, "my".into()), - (18..27, "phonology".into()), - ], - absolute: true, - names: vec![(7..8, "*".into())] - }) - ) - } + #[test] + fn it_parses_an_absolute_star_import() { + let src = "import * from /my/phonology"; + assert_eq!( + parser().parse(src.to_string()), + Ok(Stmt::Import { + path: vec![(15..17, "my".into()), (18..27, "phonology".into()),], + absolute: true, + names: vec![(7..8, "*".into())] + }) + ) + } - #[test] - fn it_parses_an_external_import() { - let src = "import * from @core/ipa"; - assert_eq!( - parser().parse(src.to_string()), - Ok(Stmt::Import { - path: vec![ - (14..19, "@core".into()), - (20..23, "ipa".into()), - ], - absolute: false, - names: vec![(7..8, "*".into())] - }) - ) - } + #[test] + fn it_parses_an_external_import() { + let src = "import * from @core/ipa"; + assert_eq!( + parser().parse(src.to_string()), + Ok(Stmt::Import { + path: vec![(14..19, "@core".into()), (20..23, "ipa".into()),], + absolute: false, + names: vec![(7..8, "*".into())] + }) + ) + } - #[test] - fn it_parses_named_imports() { - let src = "import (C, V) from @core/ipa"; - assert_eq!( - parser().parse(src.to_string()), - Ok(Stmt::Import { - path: vec![ - (19..24, "@core".into()), - (25..28, "ipa".into()), - ], - absolute: false, - names: vec![(8..9, "C".into()), (11..12, "V".into())] - }) - ) - } + #[test] + fn it_parses_named_imports() { + let src = "import (C, V) from @core/ipa"; + assert_eq!( + parser().parse(src.to_string()), + Ok(Stmt::Import { + path: vec![(19..24, "@core".into()), (25..28, "ipa".into()),], + absolute: false, + names: vec![(8..9, "C".into()), (11..12, "V".into())] + }) + ) + } - #[test] - fn it_parses_multiline_imports() { - let src = "import ( + #[test] + fn it_parses_multiline_imports() { + let src = "import ( Place, Manner, ) from @core/ipa"; - assert_eq!( - parser().parse(src.to_string()), - Ok(Stmt::Import { - path: vec![ - (47..52, "@core".into()), - (53..56, "ipa".into()), - ], - absolute: false, - names: vec![(15..20, "Place".into()), (28..34, "Manner".into())] - }) - ) - } + assert_eq!( + parser().parse(src.to_string()), + Ok(Stmt::Import { + path: vec![(47..52, "@core".into()), (53..56, "ipa".into()),], + absolute: false, + names: vec![(15..20, "Place".into()), (28..34, "Manner".into())] + }) + ) + } } diff --git a/src/parser/lang_definition.rs b/src/parser/lang_definition.rs index c324020..d297128 100644 --- a/src/parser/lang_definition.rs +++ b/src/parser/lang_definition.rs @@ -1,74 +1,91 @@ -use chumsky::{prelude::*, text::{ident, Character}}; use crate::ast::Stmt; +use chumsky::{ + prelude::*, + text::{ident, Character}, +}; pub fn parser() -> impl Parser> { - let start = just("lang").padded(); + let start = just("lang").padded(); - let id = ident() - .map_with_span(|id, span| (span, id)); + let id = ident().map_with_span(|id, span| (span, id)); - let parent = just("<") - .padded() - .ignore_then( - ident() - .map_with_span(|p, span| (span, p)) - ) - .or_not(); + let parent = just("<") + .padded() + .ignore_then(ident().map_with_span(|p, span| (span, p))) + .or_not(); - let name = just(":") - .padded() - .ignore_then( - filter(|c: &char| c.is_alphanumeric() || c.is_inline_whitespace() || "-()".contains(*c)) - .repeated() - .at_least(1) - .map_with_span(|cs, span| (span, cs.iter().collect())) - ) - .or_not(); + let name = just(":") + .padded() + .ignore_then( + filter(|c: &char| { + c.is_alphanumeric() || c.is_inline_whitespace() || "-()".contains(*c) + }) + .repeated() + .at_least(1) + .map_with_span(|cs, span| (span, cs.iter().collect())), + ) + .or_not(); - start - .ignore_then(id) - .then(parent) - .then(name) - .map(|((id, parent), name)| Stmt::Language { id, parent, name }) + start + .ignore_then(id) + .then(parent) + .then(name) + .map(|((id, parent), name)| Stmt::Language { id, parent, name }) } #[cfg(test)] mod test { - use super::*; + use super::*; - #[test] - fn it_parses_a_language() { - let src = "lang PA"; - assert_eq!( - parser().parse(src.to_string()), - Ok(Stmt::Language { id: (5..7, "PA".to_string()), parent: None, name: None }) - ) - } + #[test] + fn it_parses_a_language() { + let src = "lang PA"; + assert_eq!( + parser().parse(src.to_string()), + Ok(Stmt::Language { + id: (5..7, "PA".to_string()), + parent: None, + name: None + }) + ) + } - #[test] - fn it_parses_a_language_with_a_parent() { - let src = "lang OA < PA"; - assert_eq!( - parser().parse(src.to_string()), - Ok(Stmt::Language { id: (5..7, "OA".to_string()), parent: Some((10..12, "PA".to_string())), name: None }) - ) - } + #[test] + fn it_parses_a_language_with_a_parent() { + let src = "lang OA < PA"; + assert_eq!( + parser().parse(src.to_string()), + Ok(Stmt::Language { + id: (5..7, "OA".to_string()), + parent: Some((10..12, "PA".to_string())), + name: None + }) + ) + } - #[test] - fn it_parses_a_language_with_a_name() { - let src = "lang PA: Proto-A"; - assert_eq!( - parser().parse(src.to_string()), - Ok(Stmt::Language { id: (5..7, "PA".to_string()), parent: None, name: Some((9..16, "Proto-A".to_string())) }) - ) - } + #[test] + fn it_parses_a_language_with_a_name() { + let src = "lang PA: Proto-A"; + assert_eq!( + parser().parse(src.to_string()), + Ok(Stmt::Language { + id: (5..7, "PA".to_string()), + parent: None, + name: Some((9..16, "Proto-A".to_string())) + }) + ) + } - #[test] - fn it_parses_a_language_with_a_parent_and_a_name() { - let src = "lang OA < PA: Old A"; - assert_eq!( - parser().parse(src.to_string()), - Ok(Stmt::Language { id: (5..7, "OA".to_string()), parent: Some((10..12, "PA".to_string())), name: Some((14..19, "Old A".to_string())) }) - ) - } + #[test] + fn it_parses_a_language_with_a_parent_and_a_name() { + let src = "lang OA < PA: Old A"; + assert_eq!( + parser().parse(src.to_string()), + Ok(Stmt::Language { + id: (5..7, "OA".to_string()), + parent: Some((10..12, "PA".to_string())), + name: Some((14..19, "Old A".to_string())) + }) + ) + } } diff --git a/src/parser/milestone.rs b/src/parser/milestone.rs index 3699d21..4be3972 100644 --- a/src/parser/milestone.rs +++ b/src/parser/milestone.rs @@ -1,126 +1,114 @@ use chumsky::{prelude::*, text::ident}; -use crate::ast::{Stmt, Time, Spanned}; +use crate::ast::{Spanned, Stmt, Time}; use super::common::integer; -fn time() -> impl Parser, Error = Simple> { - integer() - .then( - just("-") - .padded() - .ignore_then(integer()) - .or_not() - ) - .map_with_span(|(from, to), span| (span, match to { - Some(to) => Time::Range(from, to), - None => Time::Instant(from), - })) +fn time() -> impl Parser, Error = Simple> { + integer() + .then(just("-").padded().ignore_then(integer()).or_not()) + .map_with_span(|(from, to), span| { + ( + span, + match to { + Some(to) => Time::Range(from, to), + None => Time::Instant(from), + }, + ) + }) } fn language() -> impl Parser, Error = Simple> { - ident().map_with_span(|lang, span| (span, lang)) + ident().map_with_span(|lang, span| (span, lang)) } pub fn parser() -> impl Parser> { - let start = just("@") - .padded(); - - let time_only = start - .ignore_then(time()) - .map(|t| Stmt::Milestone { time: Some(t), language: None }); - - let language_only = start - .ignore_then(language()) - .map(|l| Stmt::Milestone { time: None, language: Some(l) }); - - let both = start - .ignore_then(time()) - .then_ignore(just(",").padded()) - .then(language()) - .map(|(t, l)| Stmt::Milestone { time: Some(t), language: Some(l) }); - - choice([ - both.boxed(), - time_only.boxed(), - language_only.boxed(), - ]) - + let start = just("@").padded(); + + let time_only = start.ignore_then(time()).map(|t| Stmt::Milestone { + time: Some(t), + language: None, + }); + + let language_only = start.ignore_then(language()).map(|l| Stmt::Milestone { + time: None, + language: Some(l), + }); + + let both = start + .ignore_then(time()) + .then_ignore(just(",").padded()) + .then(language()) + .map(|(t, l)| Stmt::Milestone { + time: Some(t), + language: Some(l), + }); + + choice([both.boxed(), time_only.boxed(), language_only.boxed()]) } #[cfg(test)] mod test { - use super::*; - - use crate::ast::{ - Stmt, - Time, - }; - - #[test] - fn it_parses_an_instant_milestone() { - let src = "@ 42"; - let res = parser().parse(src.to_string()); - assert_eq!( - res, - Ok( - Stmt::Milestone { - time: Some((2..4, Time::Instant(42))), - language: None - } - ) - ) - } - - #[test] - fn it_parses_a_range_milestone() { - let src = "@ 0-100"; - let res = parser().parse(src.to_string()); - assert_eq!( - res, - Ok( - Stmt::Milestone { - time: Some((2..7, Time::Range(0, 100))), - language: None - } - ) - ) - } - - #[test] - fn it_parses_a_milestone_with_a_language() { - let src = "@ 42, TokiPona"; - let res = parser().parse(src.to_string()); - assert_eq!( - res, - Ok( - Stmt::Milestone { - time: Some((2..4, Time::Instant(42))), - language: Some((6..14, "TokiPona".into())), - } - ) - ) - } - - #[test] - fn it_parses_a_milestone_with_only_a_language() { - let src = "@ TokiPona"; - let res = parser().parse(src.to_string()); - assert_eq!( - res, - Ok( - Stmt::Milestone { - time: None, - language: Some((2..10, "TokiPona".into())), - } - ) - ) - } - - #[test] - fn it_does_not_parse_a_milestone_without_a_time_or_a_language() { - let src = "@ "; - let res = parser().parse(src.to_string()); - assert!(res.is_err()) - } + use super::*; + + use crate::ast::{Stmt, Time}; + + #[test] + fn it_parses_an_instant_milestone() { + let src = "@ 42"; + let res = parser().parse(src.to_string()); + assert_eq!( + res, + Ok(Stmt::Milestone { + time: Some((2..4, Time::Instant(42))), + language: None + }) + ) + } + + #[test] + fn it_parses_a_range_milestone() { + let src = "@ 0-100"; + let res = parser().parse(src.to_string()); + assert_eq!( + res, + Ok(Stmt::Milestone { + time: Some((2..7, Time::Range(0, 100))), + language: None + }) + ) + } + + #[test] + fn it_parses_a_milestone_with_a_language() { + let src = "@ 42, TokiPona"; + let res = parser().parse(src.to_string()); + assert_eq!( + res, + Ok(Stmt::Milestone { + time: Some((2..4, Time::Instant(42))), + language: Some((6..14, "TokiPona".into())), + }) + ) + } + + #[test] + fn it_parses_a_milestone_with_only_a_language() { + let src = "@ TokiPona"; + let res = parser().parse(src.to_string()); + assert_eq!( + res, + Ok(Stmt::Milestone { + time: None, + language: Some((2..10, "TokiPona".into())), + }) + ) + } + + #[test] + fn it_does_not_parse_a_milestone_without_a_time_or_a_language() { + let src = "@ "; + let res = parser().parse(src.to_string()); + assert!(res.is_err()) + } } diff --git a/src/parser/series_definition.rs b/src/parser/series_definition.rs index 76fde0c..106d85b 100644 --- a/src/parser/series_definition.rs +++ b/src/parser/series_definition.rs @@ -1,74 +1,76 @@ -use chumsky::{prelude::*, text::whitespace}; -use crate::{parser::common::*, ast::{Spanned, Series}}; use crate::ast::Stmt; +use crate::{ + ast::{Series, Spanned}, + parser::common::*, +}; +use chumsky::{prelude::*, text::whitespace}; fn label() -> impl Parser, Error = Simple> { - class() - .map_with_span(|c, span| (span, c.to_string())) + class().map_with_span(|c, span| (span, c.to_string())) } pub fn parser() -> impl Parser> { - let start = just("series") - .padded(); + let start = just("series").padded(); + + let list = word_chars() + .map_with_span(|phoneme, span| (span, phoneme)) + .separated_by(just(",").padded()) + .allow_trailing() + .at_least(1) + .then_ignore(whitespace()) + .delimited_by(just("{").padded(), just("}")) + .map(Series::List); - let list = - word_chars() - .map_with_span(|phoneme, span| (span, phoneme)) - .separated_by(just(",").padded()) - .allow_trailing() - .at_least(1) - .then_ignore(whitespace()) - .delimited_by(just("{").padded(), just("}")) - .map(|phonemes| Series::List(phonemes)); - - let category = category() - .map(|cat| Series::Category(cat)); + let category = category().map(Series::Category); - let body = - choice([ - list.boxed(), - category.boxed(), - ]).map_with_span(|series, span| (span, series)); + let body = + choice([list.boxed(), category.boxed()]).map_with_span(|series, span| (span, series)); - start - .ignore_then(label()) - .then_ignore(just("=").padded()) - .then(body) - .map(|(label, series)| Stmt::Series { label, series }) + start + .ignore_then(label()) + .then_ignore(just("=").padded()) + .then(body) + .map(|(label, series)| Stmt::Series { label, series }) } #[cfg(test)] mod test { - use crate::ast::{Category, Feature}; + use crate::ast::{Category, Feature}; - use super::*; + use super::*; - #[test] - fn it_parses_a_list_class_definition() { - assert_eq!( - parser().parse("series C = { a, b, c }"), - Ok(Stmt::Series { - label: (7..8, "C".into()), - series: (11..22, Series::List(vec![ - (13..14, "a".into()), - (16..17, "b".into()), - (19..20, "c".into()), - ])), - }) - ) - } + #[test] + fn it_parses_a_list_class_definition() { + assert_eq!( + parser().parse("series C = { a, b, c }"), + Ok(Stmt::Series { + label: (7..8, "C".into()), + series: ( + 11..22, + Series::List(vec![ + (13..14, "a".into()), + (16..17, "b".into()), + (19..20, "c".into()), + ]) + ), + }) + ) + } - #[test] - fn it_parses_a_category_class_definition() { - assert_eq!( - parser().parse("series F = [C+fricative]"), - Ok(Stmt::Series { - label: (7..8, "F".into()), - series: (11..24, Series::Category(Category { - base_class: Some((12..13, 'C')), - features: vec![(13..23, Feature::Positive("fricative".into()))] - })), - }) - ) - } + #[test] + fn it_parses_a_category_class_definition() { + assert_eq!( + parser().parse("series F = [C+fricative]"), + Ok(Stmt::Series { + label: (7..8, "F".into()), + series: ( + 11..24, + Series::Category(Category { + base_class: Some((12..13, 'C')), + features: vec![(13..23, Feature::Positive("fricative".into()))] + }) + ), + }) + ) + } } diff --git a/src/parser/sound_change.rs b/src/parser/sound_change.rs index 08adf98..af0f7ad 100644 --- a/src/parser/sound_change.rs +++ b/src/parser/sound_change.rs @@ -3,237 +3,217 @@ use chumsky::prelude::*; use crate::parser::common::*; use crate::ast::{ - Segment, - Pattern, - Spanned, - Source, - Target, - EnvPattern, - EnvElement, - Environment, - Stmt, + EnvElement, EnvPattern, Environment, Pattern, Segment, Source, Spanned, Stmt, Target, }; fn segment() -> impl Parser> { - choice([ - word_chars().map(|cs| Segment::Phonemes(cs)).boxed(), - category().map(|c| Segment::Category(c)).boxed(), - ]) + choice([ + word_chars().map(Segment::Phonemes).boxed(), + category().map(Segment::Category).boxed(), + ]) } fn pattern() -> impl Parser> { - segment().repeated().at_least(1) + segment().repeated().at_least(1) } fn empty_source() -> impl Parser> { - just("[]").map(|_| Source::Empty) + just("[]").map(|_| Source::Empty) } fn source() -> impl Parser, Error = Simple> { - choice([ - pattern().map(Source::Pattern).boxed(), - empty_source().boxed(), - ]) - .map_with_span(|source, span| (span, source)) + choice([ + pattern().map(Source::Pattern).boxed(), + empty_source().boxed(), + ]) + .map_with_span(|source, span| (span, source)) } fn modification() -> impl Parser> { - feature() - .map_with_span(|feat, span| (span, feat)) - .repeated() - .at_least(1) - .delimited_by(just("["), just("]")) - .map(Target::Modification) + feature() + .map_with_span(|feat, span| (span, feat)) + .repeated() + .at_least(1) + .delimited_by(just("["), just("]")) + .map(Target::Modification) } fn empty_target() -> impl Parser> { - just("[]").map(|_| Target::Empty) + just("[]").map(|_| Target::Empty) } fn target() -> impl Parser, Error = Simple> { - choice([ - modification().boxed(), - pattern().map(Target::Pattern).boxed(), - empty_target().boxed(), - ]) - .map_with_span(|target, span| (span, target)) + choice([ + modification().boxed(), + pattern().map(Target::Pattern).boxed(), + empty_target().boxed(), + ]) + .map_with_span(|target, span| (span, target)) } fn env_pattern() -> impl Parser> { - segment().map(EnvElement::Segment) - .or(just(".").map(|_| EnvElement::SyllableBoundary)) - .or(just("#").map(|_| EnvElement::WordBoundary)) - .repeated().at_least(1) + segment() + .map(EnvElement::Segment) + .or(just(".").map(|_| EnvElement::SyllableBoundary)) + .or(just("#").map(|_| EnvElement::WordBoundary)) + .repeated() + .at_least(1) } fn environment() -> impl Parser, Error = Simple> { - env_pattern().or_not() - .then_ignore(just("_")) - .then(env_pattern().or_not()) - .map_with_span(|(before, after), span| (span, Environment { before, after })) + env_pattern() + .or_not() + .then_ignore(just("_")) + .then(env_pattern().or_not()) + .map_with_span(|(before, after), span| (span, Environment { before, after })) } pub fn parser() -> impl Parser> { - let start = just("$").padded(); - - let source = source(); - - let target = - just(">") - .padded() - .ignore_then(target()); - - let environment = - just("/") - .padded() - .ignore_then(environment()) - .or_not(); - - let description = - just(":") - .padded() - .ignore_then( - description() - .map_with_span(|desc, span| (span, desc)) - ) - .or_not(); - - start - .ignore_then(source) - .then(target) - .then(environment) - .then(description) - .map(|(((source, target), environment), description)| Stmt::SoundChange { source, target, environment, description }) + let start = just("$").padded(); + + let source = source(); + + let target = just(">").padded().ignore_then(target()); + + let environment = just("/").padded().ignore_then(environment()).or_not(); + + let description = just(":") + .padded() + .ignore_then(description().map_with_span(|desc, span| (span, desc))) + .or_not(); + + start + .ignore_then(source) + .then(target) + .then(environment) + .then(description) + .map( + |(((source, target), environment), description)| Stmt::SoundChange { + source, + target, + environment, + description, + }, + ) } #[cfg(test)] mod test { - use super::*; - - use crate::ast::{ - Category, - Feature, - }; - - #[test] - fn it_parses_a_sound_change() { - let src = "$ ɢ > g"; - let res = parser().parse(src.to_string()); - assert_eq!( - res, - Ok( - Stmt::SoundChange { - source: (2..3, Source::Pattern(vec![Segment::Phonemes("ɢ".into())])), - target: (6..7, Target::Pattern(vec![Segment::Phonemes("g".into())])), - environment: None, - description: None, - } - ) - ) - } - - #[test] - fn it_parses_a_sound_change_with_an_environment() { - let src = "$ k > c / _[V+close]"; - assert_eq!( - parser().parse(src.to_string()), - Ok( - Stmt::SoundChange { - source: (2..3, Source::Pattern(vec![Segment::Phonemes("k".into())])), - target: (6..7, Target::Pattern(vec![Segment::Phonemes("c".into())])), - environment: Some((10..20, Environment { - before: None, - after: Some(vec![ - EnvElement::Segment(Segment::Category(Category { - base_class: Some((12..13, 'V')), - features: vec![(13..19, Feature::Positive("close".to_string()))] - })) - ]) - })), - description: None, - } - ) - ) - } - - #[test] - fn it_parses_a_sound_change_with_an_environment_and_description() { - let src = "$ k > c / #_i : Word-initial k lenites to c before i"; - assert_eq!( - parser().parse(src.to_string()), - Ok( - Stmt::SoundChange { - source: (2..3, Source::Pattern(vec![Segment::Phonemes("k".into())])), - target: (6..7, Target::Pattern(vec![Segment::Phonemes("c".into())])), - environment: Some((10..13, Environment { - before: Some(vec![EnvElement::WordBoundary]), - after: Some(vec![EnvElement::Segment(Segment::Phonemes("i".into()))]) - })), - description: Some((16..52, "Word-initial k lenites to c before i".to_string())), - } - ) - ) - } - - #[test] - fn it_parses_a_sound_change_with_categories_and_modifications() { - let src = "$ [C+stop+alveolar] > [+flap]"; - assert_eq!( - parser().parse(src.to_string()), - Ok( - Stmt::SoundChange { - source: ( - 2..19, - Source::Pattern(vec![Segment::Category(Category { - base_class: Some((3..4, 'C')), - features: vec![ - (4..9, Feature::Positive("stop".to_string())), - (9..18, Feature::Positive("alveolar".to_string())), - ], - })]) - ), - target: ( - 22..29, - Target::Modification(vec![ - (23..28, Feature::Positive("flap".to_string())), - ]) - ), - environment: None, - description: None, - } - ) - ) - } - - #[test] - fn it_parses_a_sound_change_with_empty_source_or_target() { - let src = "$ [] > []"; - assert_eq!( - parser().parse(src.to_string()), - Ok( - Stmt::SoundChange { - source: (2..4, Source::Empty), - target: (7..9, Target::Empty), - environment: None, - description: None, - } - ) - ) - } - - #[test] - fn it_does_not_parse_a_sound_change_with_no_source() { - let src = "$ > [] / _"; - let res= parser().parse(src.to_string()); - - assert!(res.is_err()); - } - - #[test] - fn it_does_not_parse_a_sound_change_with_no_target() { - let src = "$ [] > / _"; - let res= parser().parse(src.to_string()); - - assert!(res.is_err()); - } + use super::*; + + use crate::ast::{Category, Feature}; + + #[test] + fn it_parses_a_sound_change() { + let src = "$ ɢ > g"; + let res = parser().parse(src.to_string()); + assert_eq!( + res, + Ok(Stmt::SoundChange { + source: (2..3, Source::Pattern(vec![Segment::Phonemes("ɢ".into())])), + target: (6..7, Target::Pattern(vec![Segment::Phonemes("g".into())])), + environment: None, + description: None, + }) + ) + } + + #[test] + fn it_parses_a_sound_change_with_an_environment() { + let src = "$ k > c / _[V+close]"; + assert_eq!( + parser().parse(src.to_string()), + Ok(Stmt::SoundChange { + source: (2..3, Source::Pattern(vec![Segment::Phonemes("k".into())])), + target: (6..7, Target::Pattern(vec![Segment::Phonemes("c".into())])), + environment: Some(( + 10..20, + Environment { + before: None, + after: Some(vec![EnvElement::Segment(Segment::Category(Category { + base_class: Some((12..13, 'V')), + features: vec![(13..19, Feature::Positive("close".to_string()))] + }))]) + } + )), + description: None, + }) + ) + } + + #[test] + fn it_parses_a_sound_change_with_an_environment_and_description() { + let src = "$ k > c / #_i : Word-initial k lenites to c before i"; + assert_eq!( + parser().parse(src.to_string()), + Ok(Stmt::SoundChange { + source: (2..3, Source::Pattern(vec![Segment::Phonemes("k".into())])), + target: (6..7, Target::Pattern(vec![Segment::Phonemes("c".into())])), + environment: Some(( + 10..13, + Environment { + before: Some(vec![EnvElement::WordBoundary]), + after: Some(vec![EnvElement::Segment(Segment::Phonemes("i".into()))]) + } + )), + description: Some((16..52, "Word-initial k lenites to c before i".to_string())), + }) + ) + } + + #[test] + fn it_parses_a_sound_change_with_categories_and_modifications() { + let src = "$ [C+stop+alveolar] > [+flap]"; + assert_eq!( + parser().parse(src.to_string()), + Ok(Stmt::SoundChange { + source: ( + 2..19, + Source::Pattern(vec![Segment::Category(Category { + base_class: Some((3..4, 'C')), + features: vec![ + (4..9, Feature::Positive("stop".to_string())), + (9..18, Feature::Positive("alveolar".to_string())), + ], + })]) + ), + target: ( + 22..29, + Target::Modification(vec![(23..28, Feature::Positive("flap".to_string())),]) + ), + environment: None, + description: None, + }) + ) + } + + #[test] + fn it_parses_a_sound_change_with_empty_source_or_target() { + let src = "$ [] > []"; + assert_eq!( + parser().parse(src.to_string()), + Ok(Stmt::SoundChange { + source: (2..4, Source::Empty), + target: (7..9, Target::Empty), + environment: None, + description: None, + }) + ) + } + + #[test] + fn it_does_not_parse_a_sound_change_with_no_source() { + let src = "$ > [] / _"; + let res = parser().parse(src.to_string()); + + assert!(res.is_err()); + } + + #[test] + fn it_does_not_parse_a_sound_change_with_no_target() { + let src = "$ [] > / _"; + let res = parser().parse(src.to_string()); + + assert!(res.is_err()); + } } diff --git a/src/parser/trait_definition.rs b/src/parser/trait_definition.rs index c5bfa17..4e3b6f7 100644 --- a/src/parser/trait_definition.rs +++ b/src/parser/trait_definition.rs @@ -1,129 +1,182 @@ -use chumsky::{prelude::*, text::{ident, whitespace}}; +use crate::ast::{Stmt, TraitMember}; use crate::parser::common::*; -use crate::ast::{ - Stmt, - TraitMember, +use chumsky::{ + prelude::*, + text::{ident, whitespace}, }; pub fn parser() -> impl Parser> { - let start = just("trait") - .padded(); + let start = just("trait").padded(); - let sequence = || word_chars() - .or_not() - .map(|opt| match opt { - Some(ch) => ch, - None => "".to_string(), - }); + let sequence = || { + word_chars().or_not().map(|opt| match opt { + Some(ch) => ch, + None => "".to_string(), + }) + }; - let notation = just("=") - .padded() - .ignore_then( - sequence() - .then_ignore(just("_")) - .then(sequence()) - .map_with_span(|(before, after), span| (span, before + "_" + &after)) - ) - .or_not(); + let notation = just("=") + .padded() + .ignore_then( + sequence() + .then_ignore(just("_")) + .then(sequence()) + .map_with_span(|(before, after), span| (span, before + "_" + &after)), + ) + .or_not(); - let member = just("default") - .padded() - .or_not() - .map(|d| d.is_some()) - .then( - ident() - .map_with_span(|id, span| (span, id)) - .separated_by(just("|").padded()) - .at_least(1) - ) - .then(notation) - .map(|((default, labels), notation)| TraitMember { default, labels, notation }); + let member = just("default") + .padded() + .or_not() + .map(|d| d.is_some()) + .then( + ident() + .map_with_span(|id, span| (span, id)) + .separated_by(just("|").padded()) + .at_least(1), + ) + .then(notation) + .map(|((default, labels), notation)| TraitMember { + default, + labels, + notation, + }); - let body = member - .map_with_span(|m, span| (span, m)) - .separated_by(just(",").padded()) - .allow_trailing() - .at_least(1) - .then_ignore(whitespace()) - .delimited_by(just("{").padded(), just("}")); + let body = member + .map_with_span(|m, span| (span, m)) + .separated_by(just(",").padded()) + .allow_trailing() + .at_least(1) + .then_ignore(whitespace()) + .delimited_by(just("{").padded(), just("}")); - start - .ignore_then(ident().map_with_span(|id, span| (span, id))) - .then(body) - .map(|(label, members)| Stmt::Trait { label, members }) + start + .ignore_then(ident().map_with_span(|id, span| (span, id))) + .then(body) + .map(|(label, members)| Stmt::Trait { label, members }) } #[cfg(test)] mod test { - use super::*; + use super::*; - #[test] - fn it_parses_a_trait() { - let src = " + #[test] + fn it_parses_a_trait() { + let src = " trait Place { labial, alveolar, velar, } "; - assert_eq!( - parser().parse(src.to_string()), - Ok( - Stmt::Trait { - label: (13..18, "Place".into()), - members: vec![ - (29..35, TraitMember { labels: vec![(29..35, "labial".into())], notation: None, default: false }), - (45..53, TraitMember { labels: vec![(45..53, "alveolar".into())], notation: None, default: false }), - (63..68, TraitMember { labels: vec![(63..68, "velar".into())], notation: None, default: false }), - ], - }, - ) - ) - } + assert_eq!( + parser().parse(src.to_string()), + Ok(Stmt::Trait { + label: (13..18, "Place".into()), + members: vec![ + ( + 29..35, + TraitMember { + labels: vec![(29..35, "labial".into())], + notation: None, + default: false + } + ), + ( + 45..53, + TraitMember { + labels: vec![(45..53, "alveolar".into())], + notation: None, + default: false + } + ), + ( + 63..68, + TraitMember { + labels: vec![(63..68, "velar".into())], + notation: None, + default: false + } + ), + ], + },) + ) + } - #[test] - fn it_parses_a_trait_with_annotations() { - let src = " + #[test] + fn it_parses_a_trait_with_annotations() { + let src = " trait Stress { primary = ˈ_, secondary = ˌ_, } "; - assert_eq!( - parser().parse(src.to_string()), - Ok( - Stmt::Trait { - label: (13..19, "Stress".into()), - members: vec![ - (30..42, TraitMember { labels: vec![(30..37, "primary".into())], notation: Some((40..42, "ˈ_".into())), default: false }), - (52..66, TraitMember { labels: vec![(52..61, "secondary".into())], notation: Some((64..66, "ˌ_".into())), default: false }), - ], - }, - ) - ) - } + assert_eq!( + parser().parse(src.to_string()), + Ok(Stmt::Trait { + label: (13..19, "Stress".into()), + members: vec![ + ( + 30..42, + TraitMember { + labels: vec![(30..37, "primary".into())], + notation: Some((40..42, "ˈ_".into())), + default: false + } + ), + ( + 52..66, + TraitMember { + labels: vec![(52..61, "secondary".into())], + notation: Some((64..66, "ˌ_".into())), + default: false + } + ), + ], + },) + ) + } - #[test] - fn it_parses_a_trait_with_a_default() { - let src = " + #[test] + fn it_parses_a_trait_with_a_default() { + let src = " trait Length { default short, long = _:, overlong = _::, } "; - assert_eq!( - parser().parse(src.to_string()), - Ok( - Stmt::Trait { - label: (13..19, "Length".into()), - members: vec![ - (30..43, TraitMember { labels: vec![(38..43, "short".into())], notation: None, default: true }), - (53..62, TraitMember { labels: vec![(53..57, "long".into())], notation: Some((60..62, "_:".into())), default: false }), - (72..86, TraitMember { labels: vec![(72..80, "overlong".into())], notation: Some((83..86, "_::".into())), default: false }), - ], - }, - ) - ) - } + assert_eq!( + parser().parse(src.to_string()), + Ok(Stmt::Trait { + label: (13..19, "Length".into()), + members: vec![ + ( + 30..43, + TraitMember { + labels: vec![(38..43, "short".into())], + notation: None, + default: true + } + ), + ( + 53..62, + TraitMember { + labels: vec![(53..57, "long".into())], + notation: Some((60..62, "_:".into())), + default: false + } + ), + ( + 72..86, + TraitMember { + labels: vec![(72..80, "overlong".into())], + notation: Some((83..86, "_::".into())), + default: false + } + ), + ], + },) + ) + } } diff --git a/src/parser/word_definition.rs b/src/parser/word_definition.rs index afbebed..9b37358 100644 --- a/src/parser/word_definition.rs +++ b/src/parser/word_definition.rs @@ -1,132 +1,123 @@ -use chumsky::{prelude::*, text::{newline, ident, whitespace}}; use crate::parser::common::*; - -use crate::ast::{ - Definition, - Stmt, +use chumsky::{ + prelude::*, + text::{ident, newline, whitespace}, }; +use crate::ast::{Definition, Stmt}; + fn definition() -> impl Parser> { - ident() - .map_with_span(|pos, span| (span, pos)) - .then_ignore(just(".")) - .padded() - .or_not() - .then( - description() - .map_with_span(|def, span| (span, def)) - ) - .map(|(pos, definition)| Definition { pos, definition }) + ident() + .map_with_span(|pos, span| (span, pos)) + .then_ignore(just(".")) + .padded() + .or_not() + .then(description().map_with_span(|def, span| (span, def))) + .map(|(pos, definition)| Definition { pos, definition }) } fn definition_block() -> impl Parser, Error = Simple> { - definition() - .separated_by(newline().then(whitespace())) - .allow_leading() - .allow_trailing() - .at_least(1) - .delimited_by(just("{"), just("}")) + definition() + .separated_by(newline().then(whitespace())) + .allow_leading() + .allow_trailing() + .at_least(1) + .delimited_by(just("{"), just("}")) } pub fn parser() -> impl Parser> { - let start = just("-").padded(); + let start = just("-").padded(); - let gloss = ident() - .map_with_span(|g, span| (span, g)); + let gloss = ident().map_with_span(|g, span| (span, g)); - let pronunciation = syllable() - .separated_by(just(".")) - .at_least(1) - .delimited_by(just("/"), just("/")) - .map_with_span(|p, span| (span, p)) - .padded(); + let pronunciation = syllable() + .separated_by(just(".")) + .at_least(1) + .delimited_by(just("/"), just("/")) + .map_with_span(|p, span| (span, p)) + .padded(); - let definitions = whitespace().ignore_then( - definition_block() - .or(definition().map(|d| vec![d])) - ); + let definitions = + whitespace().ignore_then(definition_block().or(definition().map(|d| vec![d]))); - start - .ignore_then(gloss) - .then(pronunciation) - .then(definitions) - .map(|((gloss, pronunciation), definitions)| Stmt::Word { gloss, pronunciation, definitions }) + start + .ignore_then(gloss) + .then(pronunciation) + .then(definitions) + .map(|((gloss, pronunciation), definitions)| Stmt::Word { + gloss, + pronunciation, + definitions, + }) } #[cfg(test)] mod test { - use super::*; + use super::*; - #[test] - fn it_parses_a_word_with_an_inline_definition() { - let src = "- water /'wa.ter/ noun. the liquid state of H20"; - assert_eq!( - parser().parse(src.to_string()), - Ok( - Stmt::Word { - gloss: (2..7, "water".to_string()), - pronunciation: (8..17, vec!["'wa".to_string(), "ter".to_string()]), - definitions: vec![ - Definition { - pos: Some((18..22, "noun".to_string())), - definition: (24..47, "the liquid state of H20".to_string()), - } - ], - }, - ) - ) - } + #[test] + fn it_parses_a_word_with_an_inline_definition() { + let src = "- water /'wa.ter/ noun. the liquid state of H20"; + assert_eq!( + parser().parse(src.to_string()), + Ok(Stmt::Word { + gloss: (2..7, "water".to_string()), + pronunciation: (8..17, vec!["'wa".to_string(), "ter".to_string()]), + definitions: vec![Definition { + pos: Some((18..22, "noun".to_string())), + definition: (24..47, "the liquid state of H20".to_string()), + }], + },) + ) + } - #[test] - fn it_parses_a_word_with_one_definition() { - let src = " + #[test] + fn it_parses_a_word_with_one_definition() { + let src = " - water /'wa.ter/ { noun. the liquid state of H20 } "; - assert_eq!( - parser().parse(src.to_string()), - Ok( - Stmt::Word { - gloss: (9..14, "water".to_string()), - pronunciation: (15..24, vec!["'wa".to_string(), "ter".to_string()]), - definitions: vec![ - Definition { - pos: Some((35..39, "noun".to_string())), - definition: (41..64, "the liquid state of H20".to_string()), - } - ], - }, - ) - ) - } + assert_eq!( + parser().parse(src.to_string()), + Ok(Stmt::Word { + gloss: (9..14, "water".to_string()), + pronunciation: (15..24, vec!["'wa".to_string(), "ter".to_string()]), + definitions: vec![Definition { + pos: Some((35..39, "noun".to_string())), + definition: (41..64, "the liquid state of H20".to_string()), + }], + },) + ) + } - #[test] - fn it_parses_a_word_with_multiple_definitions() { - let src = " + #[test] + fn it_parses_a_word_with_multiple_definitions() { + let src = " - water /'wa.ter/ { noun. the liquid state of H20 verb. to pour water over a plant or area of land } "; - assert_eq!( - parser().parse(src.to_string()), - Ok( - Stmt::Word { - gloss: (9..14, "water".to_string()), - pronunciation: (15..24, vec!["'wa".to_string(), "ter".to_string()]), - definitions: vec![ - Definition { - pos: Some((35..39, "noun".to_string())), - definition: (41..64, "the liquid state of H20".to_string()), - }, - Definition { - pos: Some((73..77, "verb".to_string())), - definition: (79..121, "to pour water over a plant or area of land".to_string()), - }, - ], - }, - ) - ) - } + assert_eq!( + parser().parse(src.to_string()), + Ok(Stmt::Word { + gloss: (9..14, "water".to_string()), + pronunciation: (15..24, vec!["'wa".to_string(), "ter".to_string()]), + definitions: vec![ + Definition { + pos: Some((35..39, "noun".to_string())), + definition: (41..64, "the liquid state of H20".to_string()), + }, + Definition { + pos: Some((73..77, "verb".to_string())), + definition: ( + 79..121, + "to pour water over a plant or area of land".to_string() + ), + }, + ], + },) + ) + } }