From 467daf81489533b301a41da6ffea36f5f612db9f Mon Sep 17 00:00:00 2001 From: Eric Scouten Date: Sun, 10 Dec 2023 17:05:15 -0800 Subject: [PATCH] Add primitive parser for normalized line Normalized is defined as having any trailing spaces removed. --- src/primitives/line.rs | 25 +++++++++++++++++ src/primitives/mod.rs | 2 +- src/tests/primitives/line.rs | 53 ++++++++++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+), 1 deletion(-) diff --git a/src/primitives/line.rs b/src/primitives/line.rs index 9fe0975..db2bcec 100644 --- a/src/primitives/line.rs +++ b/src/primitives/line.rs @@ -1,5 +1,10 @@ use nom::{bytes::complete::take_till, IResult}; +/// Return a single line from the source. +/// +/// A line is terminated by end-of-input or a single `\n` character +/// or a single `\r\n` sequence. The end of line sequence is consumed +/// but not included in the returned line. #[allow(dead_code)] // TEMPORARY pub(crate) fn line(input: &str) -> IResult<&str, &str> { take_till(|c| c == '\n')(input) @@ -7,6 +12,21 @@ pub(crate) fn line(input: &str) -> IResult<&str, &str> { .map(|ri| trim_rem_end_matches(ri, '\r')) } +/// Return a single _normalized_ line from the source. +/// +/// A line is terminated by end-of-input or a single `\n` character +/// or a single `\r\n` sequence. The end of line sequence is consumed +/// but not included in the returned line. +/// +/// All trailing spaces are removed from the line. +#[allow(dead_code)] // TEMPORARY +pub(crate) fn normalized_line(input: &str) -> IResult<&str, &str> { + take_till(|c| c == '\n')(input) + .map(|ri| trim_rem_start_matches(ri, '\n')) + .map(|ri| trim_rem_end_matches(ri, '\r')) + .map(trim_trailing_spaces) +} + #[allow(dead_code)] // TEMPORARY fn trim_rem_start_matches<'a>(rem_inp: (&'a str, &'a str), c: char) -> (&'a str, &'a str) { if let Some(rem) = rem_inp.0.strip_prefix(c) { @@ -24,3 +44,8 @@ fn trim_rem_end_matches<'a>(rem_inp: (&'a str, &'a str), c: char) -> (&'a str, & rem_inp } } + +#[allow(dead_code)] // TEMPORARY +fn trim_trailing_spaces<'a>(rem_inp: (&'a str, &'a str)) -> (&'a str, &'a str) { + (rem_inp.0, rem_inp.1.trim_end_matches(' ')) +} diff --git a/src/primitives/mod.rs b/src/primitives/mod.rs index bfaed2a..e91fe93 100644 --- a/src/primitives/mod.rs +++ b/src/primitives/mod.rs @@ -3,4 +3,4 @@ mod line; #[allow(unused_imports)] -pub(crate) use line::line; +pub(crate) use line::{line, normalized_line}; diff --git a/src/tests/primitives/line.rs b/src/tests/primitives/line.rs index 2d6288c..922f3b3 100644 --- a/src/tests/primitives/line.rs +++ b/src/tests/primitives/line.rs @@ -11,6 +11,11 @@ mod fn_line { assert_eq!(line("abc"), Ok(("", "abc"))); } + #[test] + fn trailing_space() { + assert_eq!(line("abc "), Ok(("", "abc "))); + } + #[test] fn consumes_lf() { // Should consume but not return \n. @@ -35,3 +40,51 @@ mod fn_line { assert_eq!(line("abc\rdef"), Ok(("", "abc\rdef"))); } } + +mod normalized_line { + use crate::primitives::normalized_line; + + #[test] + fn empty_source() { + assert_eq!(normalized_line(""), Ok(("", ""))); + } + + #[test] + fn simple_line() { + assert_eq!(normalized_line("abc"), Ok(("", "abc"))); + } + + #[test] + fn trailing_space() { + assert_eq!(normalized_line("abc "), Ok(("", "abc"))); + } + + #[test] + fn trailing_spaces() { + assert_eq!(normalized_line("abc "), Ok(("", "abc"))); + } + + #[test] + fn consumes_lf() { + // Should consume but not return \n. + assert_eq!(normalized_line("abc \ndef"), Ok(("def", "abc"))); + } + + #[test] + fn consumes_crlf() { + // Should consume but not return \r\n. + assert_eq!(normalized_line("abc\r\ndef"), Ok(("def", "abc"))); + } + + #[test] + fn doesnt_consume_lfcr() { + // Should consume \n but not a subsequent \r. + assert_eq!(normalized_line("abc\n\rdef"), Ok(("\rdef", "abc"))); + } + + #[test] + fn doesnt_consume_standalone_cr() { + // Shouldn't terminate normalized_line at \r without \n. + assert_eq!(normalized_line("abc\rdef"), Ok(("", "abc\rdef"))); + } +}