Skip to content

Commit

Permalink
Make trimming more consistent with regular reader api
Browse files Browse the repository at this point in the history
  • Loading branch information
elrnv committed Jul 21, 2024
1 parent 4820820 commit d66f823
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 33 deletions.
54 changes: 22 additions & 32 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2168,31 +2168,6 @@ struct XmlReader<'i, R: XmlRead<'i>, E: EntityResolver = PredefinedEntityResolve
entity_resolver: E,
}

fn trim_cow<'a, F>(value: Cow<'a, str>, trim: F) -> Cow<'a, str>
where
F: FnOnce(&str) -> &str,
{
match value {
Cow::Borrowed(bytes) => Cow::Borrowed(trim(bytes)),
Cow::Owned(mut bytes) => {
let trimmed = trim(&bytes);
if trimmed.len() != bytes.len() {
bytes = trimmed.to_string();
}
Cow::Owned(bytes)
}
}
}

/// Removes trailing XML whitespace bytes from text content.
///
/// Returns `true` if content is empty after that
fn inplace_trim_end(mut s: &mut Cow<str>) -> bool {
let c: Cow<str> = replace(&mut s, Cow::Borrowed(""));
*s = trim_cow(c, str::trim_end);
s.is_empty()
}

impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
fn new(mut reader: R, entity_resolver: E) -> Self {
// Lookahead by one event immediately, so we do not need to check in the
Expand Down Expand Up @@ -2369,6 +2344,16 @@ where
T::deserialize(&mut de)
}

/// Deserialize from a custom reader.
pub fn from_custom_reader<R, T>(reader: Reader<R>) -> Result<T, DeError>
where
R: BufRead,
T: DeserializeOwned,
{
let mut de = Deserializer::from_custom_reader(reader);
T::deserialize(&mut de)
}

// TODO: According to the https://www.w3.org/TR/xmlschema11-2/#boolean,
// valid boolean representations are only "true", "false", "1", and "0"
fn str2bool<'de, V>(value: &str, visitor: V) -> Result<V::Value, DeError>
Expand Down Expand Up @@ -2875,8 +2860,6 @@ where
pub fn from_str_with_resolver(source: &'de str, entity_resolver: E) -> Self {
let mut reader = Reader::from_str(source);
let config = reader.config_mut();
config.trim_text_start = true;
config.trim_text_end = true;
config.expand_empty_elements = true;

Self::new(
Expand Down Expand Up @@ -3129,7 +3112,7 @@ impl StartTrimmer {
/// Converts raw reader's event into a payload event.
/// Returns `None`, if event should be skipped.
#[inline(always)]
fn trim<'a>(&mut self, event: Event<'a>) -> Option<PayloadEvent<'a>> {
fn trim<'a>(&mut self, event: Event<'a>, trim_text_start: bool) -> Option<PayloadEvent<'a>> {
let (event, trim_next_event) = match event {
Event::DocType(e) => (PayloadEvent::DocType(e), true),
Event::Start(e) => (PayloadEvent::Start(e), true),
Expand All @@ -3140,7 +3123,10 @@ impl StartTrimmer {
Event::CData(e) => (PayloadEvent::CData(e), false),
Event::Text(mut e) => {
// If event is empty after trimming, skip it
if self.trim_start && e.inplace_trim_start() {
// Or if event is all white space, skip it regardless of trimming settings
if (trim_text_start && self.trim_start && e.inplace_trim_start())
|| e.is_all_whitespace()
{
return None;
}
(PayloadEvent::Text(e), false)
Expand Down Expand Up @@ -3233,8 +3219,9 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
loop {
self.buf.clear();

let trim_text_start = self.reader.config().trim_text_start;
let event = self.reader.read_event_into(&mut self.buf)?;
if let Some(event) = self.start_trimmer.trim(event) {
if let Some(event) = self.start_trimmer.trim(event, trim_text_start) {
return Ok(event.into_owned());
}
}
Expand Down Expand Up @@ -3303,7 +3290,10 @@ impl<'de> XmlRead<'de> for SliceReader<'de> {
fn next(&mut self) -> Result<PayloadEvent<'de>, DeError> {
loop {
let event = self.reader.read_event()?;
if let Some(event) = self.start_trimmer.trim(event) {
if let Some(event) = self
.start_trimmer
.trim(event, self.config().trim_text_start)
{
return Ok(event);
}
}
Expand Down Expand Up @@ -4481,7 +4471,7 @@ mod tests {
fn start() {
let mut de = make_de(" text <tag1><tag2>");
// Text is trimmed from both sides
assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into()));
assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
assert_eq!(de.next().unwrap(), DeEvent::Eof);
Expand Down
7 changes: 6 additions & 1 deletion src/events/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ use crate::escape::{
use crate::name::{LocalName, QName};
#[cfg(feature = "serialize")]
use crate::utils::CowRef;
use crate::utils::{name_len, trim_xml_end, trim_xml_start, write_cow_string};
use crate::utils::{is_whitespace, name_len, trim_xml_end, trim_xml_start, write_cow_string};
use attributes::{Attribute, Attributes};

/// Opening tag data (`Event::Start`), with optional attributes: `<name attr="value">`.
Expand Down Expand Up @@ -622,6 +622,11 @@ impl<'a> BytesText<'a> {
self.content = trim_cow(replace(&mut self.content, Cow::Borrowed(b"")), trim_xml_end);
self.content.is_empty()
}

/// Returns `true` if all characters are whitespace characters.
pub fn is_all_whitespace(&mut self) -> bool {
self.content.iter().all(|&x| is_whitespace(x))
}
}

impl<'a> Debug for BytesText<'a> {
Expand Down
7 changes: 7 additions & 0 deletions tests/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@ small_buffers_tests!(
read_event_into: std::io::BufReader<_>
);

#[test]
fn test_text() {
let mut r = Reader::from_str(" text ");

assert_eq!(r.read_event().unwrap(), Text(BytesText::new(" text ")));
}

#[test]
fn test_start_end() {
let mut r = Reader::from_str("<a></a>");
Expand Down

0 comments on commit d66f823

Please sign in to comment.