From 22fd82396abd1a52a6edaef5bc707d087856d4d3 Mon Sep 17 00:00:00 2001 From: Charles Edward Gagnon Date: Wed, 4 Sep 2024 17:06:59 -0400 Subject: [PATCH] fix comments --- README.md | 1 + src/parser.rs | 6 +++++- src/parser/error.rs | 46 ++++---------------------------------------- src/parser/lex.rs | 18 +++++++---------- src/parser/macros.rs | 4 ++-- 5 files changed, 19 insertions(+), 56 deletions(-) diff --git a/README.md b/README.md index c43ffc8..06fae81 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ the `mathml` output may be tweaked to make it resemble what `pdflatex`, `KaTeX` ### TODO's/Known Bugs - [ ] array `\hline` and `\hdashline` before any content. +- [ ] Test comments - [ ] raise and lower boxes. - [ ] `\sideset` - [ ] `\mathop` and `\mathbin`, etc. diff --git a/src/parser.rs b/src/parser.rs index 1abe8c0..d952c20 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -223,7 +223,11 @@ impl<'b, 'store> InnerParser<'b, 'store> { fn parse(&mut self) -> InnerResult, ScriptDescriptor)>> { // 1. Parse the next token and output everything to the staging stack. let original_content = self.content.trim_start(); - let token = lex::token(&mut self.content)?; + let token = match lex::token(&mut self.content) { + Ok(token) => token, + Err(ErrorKind::Token) => return Ok(None), + Err(e) => return Err(e), + }; match token { Token::ControlSequence(cs) => { if let Some(result) = diff --git a/src/parser/error.rs b/src/parser/error.rs index dcd9d8c..e84d16c 100644 --- a/src/parser/error.rs +++ b/src/parser/error.rs @@ -22,7 +22,6 @@ struct Inner { context: Box, } -// TODO: add arrows: ^^^ to show where in the context the error occurred. impl ParserError { pub(super) fn new(error: ErrorKind, place: *const u8, span_stack: &mut SpanStack) -> Self { const CONTEXT_SIZE: usize = 12; @@ -121,45 +120,6 @@ fn write_context_str(context: &str, out: &mut String, last: bool, has_previous_c } } -// fn reach_original_call_site(&mut self, substr_start: *const u8) -> usize { -// let mut ptr_val = substr_start as isize; -// -// dbg!(&self, ptr_val); -// -// while let Some(expansion) = self.expansions.last() { -// let expansion_ptr = expansion.full_expansion.as_ptr() as isize; -// -// if ptr_val >= expansion_ptr -// && ptr_val <= expansion_ptr + expansion.full_expansion.len() as isize -// { -// let index = if ptr_val <= expansion_ptr + expansion.expansion_length as isize { -// (ptr_val - expansion_ptr) as usize -// } else { -// dbg!("we are here"); -// let distance_from_effective_stop = -// ptr_val - expansion_ptr - expansion.expansion_length as isize; -// self.expansions.pop(); -// ptr_val = self -// .expansions -// .last() -// .map(|exp| exp.full_expansion) -// .unwrap_or(self.input) -// .as_ptr() as isize -// + distance_from_effective_stop; -// continue; -// }; -// return index; -// } -// self.expansions.pop(); -// } -// let input_start = self.input.as_ptr() as isize; -// -// dbg!(&self, ptr_val, input_start, self.input, self.input.len()); -// -// assert!(ptr_val > input_start && ptr_val <= input_start + self.input.len() as isize); -// (ptr_val - input_start) as usize -// } - impl Error for ParserError { fn source(&self) -> Option<&(dyn Error + 'static)> { Some(&self.inner.error) @@ -184,7 +144,6 @@ pub(crate) enum ErrorKind { Environment, MathShift, HashSign, - EndOfInput, DimensionArgument, DimensionUnit, MathUnit, @@ -210,12 +169,14 @@ pub(crate) enum ErrorKind { TooManyParams, StandaloneHashSign, IncorrectMacroPrefix, + MacroSuffixNotFound, MacroAlreadyDefined, MacroNotDefined, Alignment, NewLine, ArrayNoColumns, MissingExpansion, + Token } impl Display for ErrorKind { @@ -231,7 +192,6 @@ impl Display for ErrorKind { ErrorKind::HashSign => f.write_str( "unexpected hash sign `#` character - this character can only be used in macro definitions" ), - ErrorKind::EndOfInput => f.write_str("unexpected end of input"), ErrorKind::MathUnit => f.write_str("expected mathematical units (mu) in dimension specification"), ErrorKind::Delimiter => f.write_str("expected a delimiter token"), ErrorKind::ControlSequence => f.write_str("expected a control sequence"), @@ -259,6 +219,7 @@ impl Display for ErrorKind { ErrorKind::TooManyParams => f.write_str("macro definition contains too many parameters, the maximum is 9"), ErrorKind::StandaloneHashSign => f.write_str("macro definition contains a standalone '#'"), ErrorKind::IncorrectMacroPrefix => f.write_str("macro use does not match its definition, expected it to begin with a prefix string as specified in the definition"), + ErrorKind::MacroSuffixNotFound => f.write_str("macro use does not match its definition, expected its argument(s) to end with a suffix string as specified in the definition"), ErrorKind::MacroAlreadyDefined => f.write_str("macro already defined"), ErrorKind::MacroNotDefined => f.write_str("macro not defined"), ErrorKind::DimensionArgument => f.write_str("expected a dimension or glue argument"), @@ -267,6 +228,7 @@ impl Display for ErrorKind { ErrorKind::NewLine => f.write_str("new line command not allowed in current environment"), ErrorKind::ArrayNoColumns => f.write_str("array must have at least one column of the type `c`, `l` or `r`"), ErrorKind::MissingExpansion => f.write_str("The macro definition is missing an expansion"), + ErrorKind::Token => f.write_str("expected a token"), } } } diff --git a/src/parser/lex.rs b/src/parser/lex.rs index cd40ce4..e085ffd 100644 --- a/src/parser/lex.rs +++ b/src/parser/lex.rs @@ -113,7 +113,7 @@ pub fn content_with_suffix<'a>(input: &mut &'a str, suffix: &str) -> InnerResult while escaped || !bytes[index..].starts_with(suffix.as_bytes()) { if index + suffix.len() > input.len() { *input = &input[input.len()..]; - return Err(ErrorKind::EndOfInput); + return Err(ErrorKind::MacroSuffixNotFound); } match bytes[index] { b'\\' => escaped = !escaped, @@ -181,7 +181,7 @@ pub fn control_sequence<'a>(input: &mut &'a str) -> InnerResult<&'a str> { input .chars() .next() - .map_or(Err(ErrorKind::EndOfInput), |_| { + .map_or(Err(ErrorKind::EmptyControlSequence), |_| { Err(ErrorKind::ControlSequence) }) } @@ -249,10 +249,6 @@ pub fn dimension(input: &mut &str) -> InnerResult { /// Parse a dimension unit (TeXBook p. 266). pub fn dimension_unit(input: &mut &str) -> InnerResult { *input = input.trim_start(); - if input.len() < 2 { - return Err(ErrorKind::EndOfInput); - } - let unit = input.get(0..2).ok_or(ErrorKind::DimensionUnit)?; let unit = match unit { "em" => DimensionUnit::Em, @@ -290,17 +286,17 @@ pub fn integer(input: &mut &str) -> InnerResult { pub fn unsigned_integer(input: &mut &str) -> InnerResult { // The following character must be ascii. - let next_char = input.chars().next().ok_or(ErrorKind::EndOfInput)?; + let next_char = input.chars().next().ok_or(ErrorKind::Number)?; if next_char.is_ascii_digit() { return Ok(decimal(input)); } *input = &input[1..]; match next_char { '`' => { - let mut next_byte = *input.as_bytes().first().ok_or(ErrorKind::EndOfInput)?; + let mut next_byte = *input.as_bytes().first().ok_or(ErrorKind::Number)?; if next_byte == b'\\' { *input = &input[1..]; - next_byte = *input.as_bytes().first().ok_or(ErrorKind::EndOfInput)?; + next_byte = *input.as_bytes().first().ok_or(ErrorKind::Number)?; } if next_byte.is_ascii() { *input = &input[1..]; @@ -434,7 +430,7 @@ pub fn token<'a>(input: &mut &'a str) -> InnerResult> { Ok(Token::ControlSequence(rhs_control_sequence(input)?)) } Some('%') => { - let (_, rest) = input.split_once('\n').ok_or(ErrorKind::EndOfInput)?; + let (_, rest) = input.split_once('\n').unwrap_or(("", &input[input.len()..])); *input = rest; token(input) } @@ -443,7 +439,7 @@ pub fn token<'a>(input: &mut &'a str) -> InnerResult> { *input = input.split_at(c.len_utf8()).1; Ok(Token::Character(CharToken::from_str(context))) } - None => Err(ErrorKind::EndOfInput), + None => Err(ErrorKind::Token), } } diff --git a/src/parser/macros.rs b/src/parser/macros.rs index fe70148..6bed268 100644 --- a/src/parser/macros.rs +++ b/src/parser/macros.rs @@ -160,12 +160,12 @@ impl<'input> MacroContext<'input> { let full_suffix = format!("{}{{", suffix); let (before, _) = input_rest .split_once(&full_suffix) - .ok_or(ErrorKind::EndOfInput)?; + .ok_or(ErrorKind::MacroSuffixNotFound)?; arguments.push(Err(before)); input_rest = &input_rest[before.len()..]; } else { let (before, _) = - input_rest.split_once('{').ok_or(ErrorKind::EndOfInput)?; + input_rest.split_once('{').ok_or(ErrorKind::MacroSuffixNotFound)?; arguments.push(Err(before)); input_rest = &input_rest[before.len()..]; }