From d69f7f31e4dd19ea69407ee44910bf6db81a03ef Mon Sep 17 00:00:00 2001 From: Aster Date: Fri, 21 Jun 2024 18:30:52 +0800 Subject: [PATCH] Check the paragraph break --- projects/pex-core/src/helpers/comment/mod.rs | 11 +++++++ projects/pex-core/src/helpers/mod.rs | 3 +- projects/pex-core/src/helpers/string/mod.rs | 32 ++++++++++++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/projects/pex-core/src/helpers/comment/mod.rs b/projects/pex-core/src/helpers/comment/mod.rs index 376b613..d65ad70 100644 --- a/projects/pex-core/src/helpers/comment/mod.rs +++ b/projects/pex-core/src/helpers/comment/mod.rs @@ -51,6 +51,17 @@ impl<'i> FnOnce<(ParseState<'i>,)> for CommentLine { } } +/// Parse the rest of the line, note this does not catch the newline, +pub fn rest_of_line(input: ParseState) -> ParseResult { + let offset = match input.residual.find(&['\r', '\n']) { + Some(s) => s, + None => input.residual.len(), + }; + // SAFETY: find offset always valid + let body = unsafe { input.residual.get_unchecked(0..offset) }; + input.advance(offset).finish(StringView::new(body, input.start_offset)) +} + /// Parse the comment block /// /// # Patterns diff --git a/projects/pex-core/src/helpers/mod.rs b/projects/pex-core/src/helpers/mod.rs index 2d881cf..65887fe 100644 --- a/projects/pex-core/src/helpers/mod.rs +++ b/projects/pex-core/src/helpers/mod.rs @@ -14,7 +14,8 @@ pub use self::{ comment::{CommentBlock, CommentLine}, number::*, string::{ - quotation_pair, quotation_pair_escaped, quotation_pair_nested, surround_pair_with_escaper, unescape_us, UnicodeUnescape, + paragraph_break, quotation_pair, quotation_pair_escaped, quotation_pair_nested, surround_pair_with_escaper, + unescape_us, UnicodeUnescape, }, surround_pair::{SurroundPair, SurroundPattern}, trie_set::CharactersTrie, diff --git a/projects/pex-core/src/helpers/string/mod.rs b/projects/pex-core/src/helpers/string/mod.rs index 64a515a..4fa945f 100644 --- a/projects/pex-core/src/helpers/string/mod.rs +++ b/projects/pex-core/src/helpers/string/mod.rs @@ -219,3 +219,35 @@ pub fn unescape_us(input: ParseState) -> ParseResult { None => StopBecause::custom_error("Characters must not beyond U+10FFFF", start.start_offset, state.start_offset)?, } } + +/// A period of whitespace with more than two newlines, and terminated by a newline +pub fn paragraph_break<'i>(input: ParseState<'i>) -> ParseResult<&'i str> { + let mut offset = 0; + // Capture all newlines and spaces + for c in input.residual.chars() { + if c.is_whitespace() { + offset += c.len_utf8(); + } + else { + break; + } + } + let text = unsafe { input.residual.get_unchecked(..offset) }; + // Fallback for spaces that don't have to be captured + for c in text.chars().rev() { + if c == ' ' { + offset -= c.len_utf8(); + } + else { + break; + } + } + if offset == 0 { + StopBecause::missing_string("PARAGRAPH_LINE", input.start_offset)?; + } + let newlines = text.chars().filter(|c| *c == '\n').count(); + if newlines <= 1 { + StopBecause::missing_string("PARAGRAPH_BREAK", input.start_offset)?; + } + input.advance_view(offset) +}