From 3c3f4943cbbda350a26e82f4b33ace36c4b1f038 Mon Sep 17 00:00:00 2001 From: Eric Scouten Date: Sun, 10 Dec 2023 18:10:36 -0800 Subject: [PATCH] Add primitive parser for normalized, non-empty line (#6) --- src/primitives/line.rs | 35 ++++++++++++++++++- src/primitives/mod.rs | 2 +- src/tests/primitives/line.rs | 66 ++++++++++++++++++++++++++++++++++++ 3 files changed, 101 insertions(+), 2 deletions(-) diff --git a/src/primitives/line.rs b/src/primitives/line.rs index db2bcec..07f8da2 100644 --- a/src/primitives/line.rs +++ b/src/primitives/line.rs @@ -1,4 +1,7 @@ -use nom::{bytes::complete::take_till, IResult}; +use nom::{ + bytes::complete::{take_till, take_till1}, + IResult, +}; /// Return a single line from the source. /// @@ -27,6 +30,36 @@ pub(crate) fn normalized_line(input: &str) -> IResult<&str, &str> { .map(trim_trailing_spaces) } +/// Return a single _normalized, non-empty_ line from the source. +/// +/// A line is terminated by end-of-input or a single `\n` character +/// or a single `\r\n` sequence. The end of line sequence is consumed +/// but not included in the returned line. +/// +/// All trailing spaces are removed from the line. +/// +/// Returns an error if the line becomes empty after trailing spaces have been +/// removed. +#[allow(dead_code)] // TEMPORARY +pub(crate) fn non_empty_line(input: &str) -> IResult<&str, &str> { + use nom::{ + error::{Error, ErrorKind}, + Err, + }; + + take_till1(|c| c == '\n')(input) + .map(|ri| trim_rem_start_matches(ri, '\n')) + .map(|ri| trim_rem_end_matches(ri, '\r')) + .map(trim_trailing_spaces) + .and_then(|(rem, inp)| { + if inp.is_empty() { + Err(Err::Error(Error::new(input, ErrorKind::TakeTill1))) + } else { + Ok((rem, inp)) + } + }) +} + #[allow(dead_code)] // TEMPORARY fn trim_rem_start_matches<'a>(rem_inp: (&'a str, &'a str), c: char) -> (&'a str, &'a str) { if let Some(rem) = rem_inp.0.strip_prefix(c) { diff --git a/src/primitives/mod.rs b/src/primitives/mod.rs index e91fe93..afeadd7 100644 --- a/src/primitives/mod.rs +++ b/src/primitives/mod.rs @@ -3,4 +3,4 @@ mod line; #[allow(unused_imports)] -pub(crate) use line::{line, normalized_line}; +pub(crate) use line::{line, non_empty_line, normalized_line}; diff --git a/src/tests/primitives/line.rs b/src/tests/primitives/line.rs index 922f3b3..9d92834 100644 --- a/src/tests/primitives/line.rs +++ b/src/tests/primitives/line.rs @@ -88,3 +88,69 @@ mod normalized_line { assert_eq!(normalized_line("abc\rdef"), Ok(("", "abc\rdef"))); } } + +mod non_empty_line { + use nom::{ + error::{Error, ErrorKind}, + Err, + }; + + use crate::primitives::non_empty_line; + + #[test] + fn empty_source() { + let expected_err: Err> = Err::Error(Error::new("", ErrorKind::TakeTill1)); + + let actual_err = non_empty_line("").unwrap_err(); + + assert_eq!(expected_err, actual_err); + } + + #[test] + fn only_spaces() { + let expected_err: Err> = Err::Error(Error::new(" ", ErrorKind::TakeTill1)); + + let actual_err = non_empty_line(" ").unwrap_err(); + + assert_eq!(expected_err, actual_err); + } + + #[test] + fn simple_line() { + assert_eq!(non_empty_line("abc"), Ok(("", "abc"))); + } + + #[test] + fn trailing_space() { + assert_eq!(non_empty_line("abc "), Ok(("", "abc"))); + } + + #[test] + fn trailing_spaces() { + assert_eq!(non_empty_line("abc "), Ok(("", "abc"))); + } + + #[test] + fn consumes_lf() { + // Should consume but not return \n. + assert_eq!(non_empty_line("abc \ndef"), Ok(("def", "abc"))); + } + + #[test] + fn consumes_crlf() { + // Should consume but not return \r\n. + assert_eq!(non_empty_line("abc\r\ndef"), Ok(("def", "abc"))); + } + + #[test] + fn doesnt_consume_lfcr() { + // Should consume \n but not a subsequent \r. + assert_eq!(non_empty_line("abc\n\rdef"), Ok(("\rdef", "abc"))); + } + + #[test] + fn doesnt_consume_standalone_cr() { + // Shouldn't terminate non_empty_line at \r without \n. + assert_eq!(non_empty_line("abc\rdef"), Ok(("", "abc\rdef"))); + } +}