Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert some unit tests to integration tests #767

Merged
merged 6 commits into from
Jun 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 20 additions & 34 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2165,8 +2165,9 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
replace(&mut self.lookahead, self.reader.next())
}

/// Returns `true` when next event is not a text event in any form.
#[inline(always)]
const fn need_trim_end(&self) -> bool {
const fn current_event_is_last_text(&self) -> bool {
// If next event is a text or CDATA, we should not trim trailing spaces
!matches!(
self.lookahead,
Expand All @@ -2182,43 +2183,27 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
/// [`CData`]: PayloadEvent::CData
fn drain_text(&mut self, mut result: Cow<'i, str>) -> Result<DeEvent<'i>, DeError> {
loop {
match self.lookahead {
Ok(PayloadEvent::Text(_) | PayloadEvent::CData(_)) => {
let text = self.next_text()?;

let mut s = result.into_owned();
s += &text;
result = Cow::Owned(s);
}
_ => break,
if self.current_event_is_last_text() {
break;
}
}
Ok(DeEvent::Text(Text { text: result }))
}

/// Read one text event, panics if current event is not a text event
///
/// |Event |XML |Handling
/// |-----------------------|---------------------------|----------------------------------------
/// |[`PayloadEvent::Start`]|`<tag>...</tag>` |Possible panic (unreachable)
/// |[`PayloadEvent::End`] |`</any-tag>` |Possible panic (unreachable)
/// |[`PayloadEvent::Text`] |`text content` |Unescapes `text content` and returns it
/// |[`PayloadEvent::CData`]|`<![CDATA[cdata content]]>`|Returns `cdata content` unchanged
/// |[`PayloadEvent::Eof`] | |Possible panic (unreachable)
#[inline(always)]
fn next_text(&mut self) -> Result<Cow<'i, str>, DeError> {
match self.next_impl()? {
PayloadEvent::Text(mut e) => {
if self.need_trim_end() {
e.inplace_trim_end();
match self.next_impl()? {
PayloadEvent::Text(mut e) => {
if self.current_event_is_last_text() {
// FIXME: Actually, we should trim after decoding text, but now we trim before
e.inplace_trim_end();
}
result
.to_mut()
.push_str(&e.unescape_with(|entity| self.entity_resolver.resolve(entity))?);
}
Ok(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?)
}
PayloadEvent::CData(e) => Ok(e.decode()?),
PayloadEvent::CData(e) => result.to_mut().push_str(&e.decode()?),

// SAFETY: this method is called only when we peeked Text or CData
_ => unreachable!("Only `Text` and `CData` events can come here"),
// SAFETY: current_event_is_last_text checks that event is Text or CData
_ => unreachable!("Only `Text` and `CData` events can come here"),
}
}
Ok(DeEvent::Text(Text { text: result }))
}

/// Return an input-borrowing event.
Expand All @@ -2228,7 +2213,8 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
PayloadEvent::Start(e) => Ok(DeEvent::Start(e)),
PayloadEvent::End(e) => Ok(DeEvent::End(e)),
PayloadEvent::Text(mut e) => {
if self.need_trim_end() && e.inplace_trim_end() {
if self.current_event_is_last_text() && e.inplace_trim_end() {
// FIXME: Actually, we should trim after decoding text, but now we trim before
continue;
}
self.drain_text(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?)
Expand Down
8 changes: 1 addition & 7 deletions src/reader/async_tokio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ impl<R: AsyncBufRead + Unpin> NsReader<R> {
#[cfg(test)]
mod test {
use super::TokioAdapter;
use crate::reader::test::{check, small_buffers};
use crate::reader::test::check;

check!(
#[tokio::test]
Expand All @@ -370,12 +370,6 @@ mod test {
async, await
);

small_buffers!(
#[tokio::test]
read_event_into_async: tokio::io::BufReader<_>,
async, await
);

#[test]
fn test_future_is_send() {
// This test should just compile, no actual runtime checks are performed here.
Expand Down
57 changes: 1 addition & 56 deletions src/reader/buffered_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ impl Reader<BufReader<File>> {

#[cfg(test)]
mod test {
use crate::reader::test::{check, small_buffers};
use crate::reader::test::check;
use crate::reader::XmlSource;

/// Default buffer constructor just pass the byte array from the test
Expand All @@ -460,59 +460,4 @@ mod test {
identity,
&mut Vec::new()
);

small_buffers!(
#[test]
read_event_into: std::io::BufReader<_>
);

#[cfg(feature = "encoding")]
mod encoding {
use crate::events::Event;
use crate::reader::Reader;
use encoding_rs::{UTF_16LE, UTF_8, WINDOWS_1251};
use pretty_assertions::assert_eq;

/// Checks that encoding is detected by BOM and changed after XML declaration
/// BOM indicates UTF-16LE, but XML - windows-1251
#[test]
fn bom_detected() {
let mut reader =
Reader::from_reader(b"\xFF\xFE<?xml encoding='windows-1251'?>".as_ref());
let mut buf = Vec::new();

assert_eq!(reader.decoder().encoding(), UTF_8);
assert!(matches!(
reader.read_event_into(&mut buf).unwrap(),
Event::Decl(_)
));
assert_eq!(reader.decoder().encoding(), WINDOWS_1251);

assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof);
}

/// Checks that encoding is changed by XML declaration, but only once
#[test]
fn xml_declaration() {
let mut reader = Reader::from_reader(
b"<?xml encoding='UTF-16'?><?xml encoding='windows-1251'?>".as_ref(),
);
let mut buf = Vec::new();

assert_eq!(reader.decoder().encoding(), UTF_8);
assert!(matches!(
reader.read_event_into(&mut buf).unwrap(),
Event::Decl(_)
));
assert_eq!(reader.decoder().encoding(), UTF_16LE);

assert!(matches!(
reader.read_event_into(&mut buf).unwrap(),
Event::Decl(_)
));
assert_eq!(reader.decoder().encoding(), UTF_16LE);

assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof);
}
}
}
149 changes: 0 additions & 149 deletions src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1826,157 +1826,8 @@ mod test {
};
}

/// Tests for https://github.com/tafia/quick-xml/issues/469
macro_rules! small_buffers {
(
#[$test:meta]
$read_event:ident: $BufReader:ty
$(, $async:ident, $await:ident)?
) => {
mod small_buffers {
use crate::events::{BytesCData, BytesDecl, BytesPI, BytesStart, BytesText, Event};
use crate::reader::Reader;
use pretty_assertions::assert_eq;

#[$test]
$($async)? fn decl() {
let xml = "<?xml ?>";
// ^^^^^^^ data that fit into buffer
let size = xml.match_indices("?>").next().unwrap().0 + 1;
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
let mut reader = Reader::from_reader(br);
let mut buf = Vec::new();

assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3)))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Eof
);
}

#[$test]
$($async)? fn pi() {
let xml = "<?pi?>";
// ^^^^^ data that fit into buffer
let size = xml.match_indices("?>").next().unwrap().0 + 1;
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
let mut reader = Reader::from_reader(br);
let mut buf = Vec::new();

assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::PI(BytesPI::new("pi"))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Eof
);
}

#[$test]
$($async)? fn empty() {
let xml = "<empty/>";
// ^^^^^^^ data that fit into buffer
let size = xml.match_indices("/>").next().unwrap().0 + 1;
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
let mut reader = Reader::from_reader(br);
let mut buf = Vec::new();

assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Empty(BytesStart::new("empty"))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Eof
);
}

#[$test]
$($async)? fn cdata1() {
let xml = "<![CDATA[cdata]]>";
// ^^^^^^^^^^^^^^^ data that fit into buffer
let size = xml.match_indices("]]>").next().unwrap().0 + 1;
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
let mut reader = Reader::from_reader(br);
let mut buf = Vec::new();

assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::CData(BytesCData::new("cdata"))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Eof
);
}

#[$test]
$($async)? fn cdata2() {
let xml = "<![CDATA[cdata]]>";
// ^^^^^^^^^^^^^^^^ data that fit into buffer
let size = xml.match_indices("]]>").next().unwrap().0 + 2;
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
let mut reader = Reader::from_reader(br);
let mut buf = Vec::new();

assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::CData(BytesCData::new("cdata"))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Eof
);
}

#[$test]
$($async)? fn comment1() {
let xml = "<!--comment-->";
// ^^^^^^^^^^^^ data that fit into buffer
let size = xml.match_indices("-->").next().unwrap().0 + 1;
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
let mut reader = Reader::from_reader(br);
let mut buf = Vec::new();

assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Comment(BytesText::new("comment"))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Eof
);
}

#[$test]
$($async)? fn comment2() {
let xml = "<!--comment-->";
// ^^^^^^^^^^^^^ data that fit into buffer
let size = xml.match_indices("-->").next().unwrap().0 + 2;
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
let mut reader = Reader::from_reader(br);
let mut buf = Vec::new();

assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Comment(BytesText::new("comment"))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Eof
);
}
}
};
}

// Export macros for the child modules:
// - buffered_reader
// - slice_reader
pub(super) use check;
pub(super) use small_buffers;
}
21 changes: 0 additions & 21 deletions src/reader/slice_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -376,25 +376,4 @@ mod test {
identity,
()
);

#[cfg(feature = "encoding")]
mod encoding {
use crate::events::Event;
use crate::reader::Reader;
use encoding_rs::UTF_8;
use pretty_assertions::assert_eq;

/// Checks that XML declaration cannot change the encoding from UTF-8 if
/// a `Reader` was created using `from_str` method
#[test]
fn str_always_has_utf8() {
let mut reader = Reader::from_str("<?xml encoding='UTF-16'?>");

assert_eq!(reader.decoder().encoding(), UTF_8);
reader.read_event().unwrap();
assert_eq!(reader.decoder().encoding(), UTF_8);

assert_eq!(reader.read_event().unwrap(), Event::Eof);
}
}
}
23 changes: 21 additions & 2 deletions src/reader/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,20 @@ impl ReaderState {
BytesText::wrap(content, self.decoder())
}

/// reads `BytesElement` starting with a `!`,
/// return `Comment`, `CData` or `DocType` event
/// Returns `Comment`, `CData` or `DocType` event.
///
/// `buf` contains data between `<` and `>`:
/// - CDATA: `![CDATA[...]]`
/// - Comment: `!--...--`
/// - Doctype (uppercase): `!D...`
/// - Doctype (lowercase): `!d...`
pub fn emit_bang<'b>(&mut self, bang_type: BangType, buf: &'b [u8]) -> Result<Event<'b>> {
debug_assert_eq!(
buf.first(),
Some(&b'!'),
"CDATA, comment or DOCTYPE should start from '!'"
);

let uncased_starts_with = |string: &[u8], prefix: &[u8]| {
string.len() >= prefix.len() && string[..prefix.len()].eq_ignore_ascii_case(prefix)
};
Expand Down Expand Up @@ -153,7 +164,15 @@ impl ReaderState {

/// Wraps content of `buf` into the [`Event::End`] event. Does the check that
/// end name matches the last opened start name if `self.config.check_end_names` is set.
///
/// `buf` contains data between `<` and `>`, for example `/tag`.
pub fn emit_end<'b>(&mut self, buf: &'b [u8]) -> Result<Event<'b>> {
debug_assert_eq!(
buf.first(),
Some(&b'/'),
"closing tag should start from '/'"
);

// Strip the `/` character. `content` contains data between `</` and `>`
let content = &buf[1..];
// XML standard permits whitespaces after the markup name in closing tags.
Expand Down
Loading