diff --git a/src/de/mod.rs b/src/de/mod.rs index 673ad781..b7f07dd1 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -2168,31 +2168,6 @@ struct XmlReader<'i, R: XmlRead<'i>, E: EntityResolver = PredefinedEntityResolve entity_resolver: E, } -fn trim_cow<'a, F>(value: Cow<'a, str>, trim: F) -> Cow<'a, str> -where - F: FnOnce(&str) -> &str, -{ - match value { - Cow::Borrowed(bytes) => Cow::Borrowed(trim(bytes)), - Cow::Owned(mut bytes) => { - let trimmed = trim(&bytes); - if trimmed.len() != bytes.len() { - bytes = trimmed.to_string(); - } - Cow::Owned(bytes) - } - } -} - -/// Removes trailing XML whitespace bytes from text content. -/// -/// Returns `true` if content is empty after that -fn inplace_trim_end(mut s: &mut Cow) -> bool { - let c: Cow = replace(&mut s, Cow::Borrowed("")); - *s = trim_cow(c, str::trim_end); - s.is_empty() -} - impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> { fn new(mut reader: R, entity_resolver: E) -> Self { // Lookahead by one event immediately, so we do not need to check in the @@ -2369,6 +2344,16 @@ where T::deserialize(&mut de) } +/// Deserialize from a custom reader. +pub fn from_custom_reader(reader: Reader) -> Result +where + R: BufRead, + T: DeserializeOwned, +{ + let mut de = Deserializer::from_custom_reader(reader); + T::deserialize(&mut de) +} + // TODO: According to the https://www.w3.org/TR/xmlschema11-2/#boolean, // valid boolean representations are only "true", "false", "1", and "0" fn str2bool<'de, V>(value: &str, visitor: V) -> Result @@ -2875,8 +2860,6 @@ where pub fn from_str_with_resolver(source: &'de str, entity_resolver: E) -> Self { let mut reader = Reader::from_str(source); let config = reader.config_mut(); - config.trim_text_start = true; - config.trim_text_end = true; config.expand_empty_elements = true; Self::new( @@ -3129,7 +3112,7 @@ impl StartTrimmer { /// Converts raw reader's event into a payload event. /// Returns `None`, if event should be skipped. #[inline(always)] - fn trim<'a>(&mut self, event: Event<'a>) -> Option> { + fn trim<'a>(&mut self, event: Event<'a>, trim_text_start: bool) -> Option> { let (event, trim_next_event) = match event { Event::DocType(e) => (PayloadEvent::DocType(e), true), Event::Start(e) => (PayloadEvent::Start(e), true), @@ -3140,7 +3123,10 @@ impl StartTrimmer { Event::CData(e) => (PayloadEvent::CData(e), false), Event::Text(mut e) => { // If event is empty after trimming, skip it - if self.trim_start && e.inplace_trim_start() { + // Or if event is all white space, skip it regardless of trimming settings + if (trim_text_start && self.trim_start && e.inplace_trim_start()) + || e.is_all_whitespace() + { return None; } (PayloadEvent::Text(e), false) @@ -3233,8 +3219,9 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader { loop { self.buf.clear(); + let trim_text_start = self.reader.config().trim_text_start; let event = self.reader.read_event_into(&mut self.buf)?; - if let Some(event) = self.start_trimmer.trim(event) { + if let Some(event) = self.start_trimmer.trim(event, trim_text_start) { return Ok(event.into_owned()); } } @@ -3303,7 +3290,10 @@ impl<'de> XmlRead<'de> for SliceReader<'de> { fn next(&mut self) -> Result, DeError> { loop { let event = self.reader.read_event()?; - if let Some(event) = self.start_trimmer.trim(event) { + if let Some(event) = self + .start_trimmer + .trim(event, self.config().trim_text_start) + { return Ok(event); } } @@ -4481,7 +4471,7 @@ mod tests { fn start() { let mut de = make_de(" text "); // Text is trimmed from both sides - assert_eq!(de.next().unwrap(), DeEvent::Text("text".into())); + assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); diff --git a/src/events/mod.rs b/src/events/mod.rs index c9acb5b7..26f2b55f 100644 --- a/src/events/mod.rs +++ b/src/events/mod.rs @@ -53,7 +53,7 @@ use crate::escape::{ use crate::name::{LocalName, QName}; #[cfg(feature = "serialize")] use crate::utils::CowRef; -use crate::utils::{name_len, trim_xml_end, trim_xml_start, write_cow_string}; +use crate::utils::{is_whitespace, name_len, trim_xml_end, trim_xml_start, write_cow_string}; use attributes::{Attribute, Attributes}; /// Opening tag data (`Event::Start`), with optional attributes: ``. @@ -622,6 +622,11 @@ impl<'a> BytesText<'a> { self.content = trim_cow(replace(&mut self.content, Cow::Borrowed(b"")), trim_xml_end); self.content.is_empty() } + + /// Returns `true` if all characters are whitespace characters. + pub fn is_all_whitespace(&mut self) -> bool { + self.content.iter().all(|&x| is_whitespace(x)) + } } impl<'a> Debug for BytesText<'a> { diff --git a/tests/reader.rs b/tests/reader.rs index 2bc27e57..28791679 100644 --- a/tests/reader.rs +++ b/tests/reader.rs @@ -15,6 +15,13 @@ small_buffers_tests!( read_event_into: std::io::BufReader<_> ); +#[test] +fn test_text() { + let mut r = Reader::from_str(" text "); + + assert_eq!(r.read_event().unwrap(), Text(BytesText::new(" text "))); +} + #[test] fn test_start_end() { let mut r = Reader::from_str("");