From 908ac15e168d7c36edd1aa4f9ccf7060797f87b9 Mon Sep 17 00:00:00 2001 From: Mingun Date: Sun, 23 Jun 2024 02:15:33 +0500 Subject: [PATCH] Add new example which demonstrates new Reader API --- examples/high-level-entities.rs | 192 ++++++++++++++++++ ...stom_entities.rs => low-level-entities.rs} | 2 +- 2 files changed, 193 insertions(+), 1 deletion(-) create mode 100644 examples/high-level-entities.rs rename examples/{custom_entities.rs => low-level-entities.rs} (99%) diff --git a/examples/high-level-entities.rs b/examples/high-level-entities.rs new file mode 100644 index 00000000..cff5840b --- /dev/null +++ b/examples/high-level-entities.rs @@ -0,0 +1,192 @@ +//! This example demonstrate how custom entities can be extracted from the DOCTYPE +//! and usage of the high-level `Reader` API. +//! +//! NB: this example is deliberately kept simple: +//! * the regex in this example is simple but brittle; +//! * it does not support the use of entities in entity declaration. + +use std::borrow::Cow; +use std::collections::HashMap; +use std::convert::Infallible; +use std::io::{BufRead, Cursor}; + +use quick_xml::events::{BytesEnd, BytesStart, BytesText}; +use quick_xml::reader::{Event, Entity, EntityResolver, Reader, RawReader}; +use regex::bytes::Regex; + +use pretty_assertions::assert_eq; + +const XML1: &str = r#" + +" > +&element1;" > +]> +&element2; +&external; +"#; + +/// Additional document which in reality would be referenced by +/// `` +const XML2: &str = r#" + +text +"#; + +struct MyResolver<'i> { + /// Map of captured internal _parsed general entities_. _Parsed_ means that + /// value of the entity is parsed by XML reader. + entities: HashMap, Cow<'i, [u8]>>, + /// In this example we use simple regular expression to capture entities from DTD. + /// In real application you should use DTD parser. + entity_re: Regex, +} +impl<'i> MyResolver<'i> { + fn new() -> Result { + Ok(Self { + entities: Default::default(), + // Capture "name" and "content" from such string: + // + entity_re: Regex::new(r#""#)?, + }) + } + fn capture_borrowed(&mut self, doctype: &'i [u8]) { + for cap in self.entity_re.captures_iter(doctype) { + self.entities.insert( + cap.get(1).unwrap().as_bytes().into(), + cap.get(2).unwrap().as_bytes().into(), + ); + } + } + fn capture_owned(&mut self, doctype: Vec) { + for cap in self.entity_re.captures_iter(&doctype) { + self.entities.insert( + cap.get(1).unwrap().as_bytes().to_owned().into(), + cap.get(2).unwrap().as_bytes().to_owned().into(), + ); + } + } +} +impl<'i> EntityResolver<'i> for MyResolver<'i> { + type Error = Infallible; + + fn capture(&mut self, doctype: BytesText<'i>) -> Result<(), Self::Error> { + match doctype.into_inner() { + Cow::Borrowed(doctype) => self.capture_borrowed(doctype), + Cow::Owned(doctype) => self.capture_owned(doctype), + } + Ok(()) + } + + fn resolve(&self, entity: &str) -> Option> { + if entity == "external" { + return Some(Entity::External(Box::new(Cursor::new(XML2.as_bytes())))); + } + match self.entities.get(entity.as_bytes()) { + Some(Cow::Borrowed(replacement)) => Some(Entity::Internal(replacement)), + Some(Cow::Owned(replacement)) => { + Some(Entity::External(Box::new(Cursor::new(replacement.clone())))) + } + None => None, + } + } +} + +/// In this example the events will borrow from the first document +fn borrowed() -> Result<(), Box> { + let mut reader = RawReader::from_str(XML1); + reader.config_mut().trim_text(true); + + let mut r = Reader::borrowed(reader, MyResolver::new()?); + + assert_eq!( + r.read_event()?, + Event::Start(BytesStart::from_content( + r#"test label="Message: &text;""#, + 4 + )) + ); + + //-------------------------------------------------------------------------- + // This part was inserted into original document from entity defined in DTD + assert_eq!(r.read_event()?, Event::Start(BytesStart::new("a"))); + assert_eq!( + r.read_event()?, + Event::Empty(BytesStart::from_content( + r#"dtd attr = 'Message: &text;'"#, + 3 + )) + ); + assert_eq!(r.read_event()?, Event::End(BytesEnd::new("a"))); + //-------------------------------------------------------------------------- + + assert_eq!(r.read_event()?, Event::End(BytesEnd::new("test"))); + + //-------------------------------------------------------------------------- + // Start of external document + assert_eq!( + r.read_event()?, + Event::Start(BytesStart::new("external")) + ); + assert_eq!(r.read_event()?, Event::Text(BytesText::new("text"))); + assert_eq!(r.read_event()?, Event::End(BytesEnd::new("external"))); + //-------------------------------------------------------------------------- + + assert_eq!(r.read_event()?, Event::Eof); + + Ok(()) +} + +/// In this example the events will always copy data +fn buffered() -> Result<(), Box> { + let boxed: Box = Box::new(Cursor::new(XML1.as_bytes())); + let mut reader = RawReader::from_reader(boxed); + reader.config_mut().trim_text(true); + + let mut r = Reader::buffered(reader, MyResolver::new()?); + + assert_eq!( + r.read_event()?, + Event::Start(BytesStart::from_content( + r#"test label="Message: &text;""#, + 4 + )) + ); + + //-------------------------------------------------------------------------- + // This part was inserted into original document from entity defined in DTD + assert_eq!(r.read_event()?, Event::Start(BytesStart::new("a"))); + assert_eq!( + r.read_event()?, + Event::Empty(BytesStart::from_content( + r#"dtd attr = 'Message: &text;'"#, + 3 + )) + ); + assert_eq!(r.read_event()?, Event::End(BytesEnd::new("a"))); + //-------------------------------------------------------------------------- + + assert_eq!(r.read_event()?, Event::End(BytesEnd::new("test"))); + + //-------------------------------------------------------------------------- + // Start of external document + assert_eq!( + r.read_event()?, + Event::Start(BytesStart::new("external")) + ); + assert_eq!(r.read_event()?, Event::Text(BytesText::new("text"))); + assert_eq!(r.read_event()?, Event::End(BytesEnd::new("external"))); + //-------------------------------------------------------------------------- + + assert_eq!(r.read_event()?, Event::Eof); + + Ok(()) +} + +fn main() -> Result<(), Box> { + // In this example the events will borrow from the first document + borrowed()?; + // In this example the events will always copy data + buffered()?; + Ok(()) +} diff --git a/examples/custom_entities.rs b/examples/low-level-entities.rs similarity index 99% rename from examples/custom_entities.rs rename to examples/low-level-entities.rs index 1c6e3bb4..a21ca4d4 100644 --- a/examples/custom_entities.rs +++ b/examples/low-level-entities.rs @@ -1,5 +1,5 @@ //! This example demonstrate how custom entities can be extracted from the DOCTYPE, -//! and later use to: +//! usage of the low-level `RawReader` API, and later used to: //! - insert new pieces of document (particular case - insert only textual content) //! - decode attribute values //!