Skip to content

Commit

Permalink
Add new example which demonstrates new Reader API
Browse files Browse the repository at this point in the history
  • Loading branch information
Mingun committed Jun 23, 2024
1 parent 0d8ac7b commit 908ac15
Show file tree
Hide file tree
Showing 2 changed files with 193 additions and 1 deletion.
192 changes: 192 additions & 0 deletions examples/high-level-entities.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
//! This example demonstrate how custom entities can be extracted from the DOCTYPE
//! and usage of the high-level `Reader` API.
//!
//! NB: this example is deliberately kept simple:
//! * the regex in this example is simple but brittle;
//! * it does not support the use of entities in entity declaration.

use std::borrow::Cow;
use std::collections::HashMap;
use std::convert::Infallible;
use std::io::{BufRead, Cursor};

use quick_xml::events::{BytesEnd, BytesStart, BytesText};
use quick_xml::reader::{Event, Entity, EntityResolver, Reader, RawReader};
use regex::bytes::Regex;

use pretty_assertions::assert_eq;

const XML1: &str = r#"
<!DOCTYPE test [
<!ENTITY text "hello world" >
<!ENTITY element1 "<dtd attr = 'Message: &text;'/>" >
<!ENTITY element2 "<a>&element1;</a>" >
]>
<test label="Message: &text;">&element2;</test>
&external;
"#;

/// Additional document which in reality would be referenced by
/// `<!ENTITY external SYSTEM "URI to the document, for example, relative file path" >`
const XML2: &str = r#"
<?xml version='1.0'?>
<external>text</external>
"#;

struct MyResolver<'i> {
/// Map of captured internal _parsed general entities_. _Parsed_ means that
/// value of the entity is parsed by XML reader.
entities: HashMap<Cow<'i, [u8]>, Cow<'i, [u8]>>,
/// In this example we use simple regular expression to capture entities from DTD.
/// In real application you should use DTD parser.
entity_re: Regex,
}
impl<'i> MyResolver<'i> {
fn new() -> Result<Self, regex::Error> {
Ok(Self {
entities: Default::default(),
// Capture "name" and "content" from such string:
// <!ENTITY name "content" >
entity_re: Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#)?,
})
}
fn capture_borrowed(&mut self, doctype: &'i [u8]) {
for cap in self.entity_re.captures_iter(doctype) {
self.entities.insert(
cap.get(1).unwrap().as_bytes().into(),
cap.get(2).unwrap().as_bytes().into(),
);
}
}
fn capture_owned(&mut self, doctype: Vec<u8>) {
for cap in self.entity_re.captures_iter(&doctype) {
self.entities.insert(
cap.get(1).unwrap().as_bytes().to_owned().into(),
cap.get(2).unwrap().as_bytes().to_owned().into(),
);
}
}
}
impl<'i> EntityResolver<'i> for MyResolver<'i> {
type Error = Infallible;

fn capture(&mut self, doctype: BytesText<'i>) -> Result<(), Self::Error> {
match doctype.into_inner() {
Cow::Borrowed(doctype) => self.capture_borrowed(doctype),
Cow::Owned(doctype) => self.capture_owned(doctype),
}
Ok(())
}

fn resolve(&self, entity: &str) -> Option<Entity<'i>> {
if entity == "external" {
return Some(Entity::External(Box::new(Cursor::new(XML2.as_bytes()))));
}
match self.entities.get(entity.as_bytes()) {
Some(Cow::Borrowed(replacement)) => Some(Entity::Internal(replacement)),
Some(Cow::Owned(replacement)) => {
Some(Entity::External(Box::new(Cursor::new(replacement.clone()))))
}
None => None,
}
}
}

/// In this example the events will borrow from the first document
fn borrowed() -> Result<(), Box<dyn std::error::Error>> {
let mut reader = RawReader::from_str(XML1);
reader.config_mut().trim_text(true);

let mut r = Reader::borrowed(reader, MyResolver::new()?);

assert_eq!(
r.read_event()?,
Event::Start(BytesStart::from_content(
r#"test label="Message: &text;""#,
4
))
);

//--------------------------------------------------------------------------
// This part was inserted into original document from entity defined in DTD
assert_eq!(r.read_event()?, Event::Start(BytesStart::new("a")));
assert_eq!(
r.read_event()?,
Event::Empty(BytesStart::from_content(
r#"dtd attr = 'Message: &text;'"#,
3
))
);
assert_eq!(r.read_event()?, Event::End(BytesEnd::new("a")));
//--------------------------------------------------------------------------

assert_eq!(r.read_event()?, Event::End(BytesEnd::new("test")));

//--------------------------------------------------------------------------
// Start of external document
assert_eq!(
r.read_event()?,
Event::Start(BytesStart::new("external"))
);
assert_eq!(r.read_event()?, Event::Text(BytesText::new("text")));
assert_eq!(r.read_event()?, Event::End(BytesEnd::new("external")));
//--------------------------------------------------------------------------

assert_eq!(r.read_event()?, Event::Eof);

Ok(())
}

/// In this example the events will always copy data
fn buffered() -> Result<(), Box<dyn std::error::Error>> {
let boxed: Box<dyn BufRead> = Box::new(Cursor::new(XML1.as_bytes()));
let mut reader = RawReader::from_reader(boxed);
reader.config_mut().trim_text(true);

let mut r = Reader::buffered(reader, MyResolver::new()?);

assert_eq!(
r.read_event()?,
Event::Start(BytesStart::from_content(
r#"test label="Message: &text;""#,
4
))
);

//--------------------------------------------------------------------------
// This part was inserted into original document from entity defined in DTD
assert_eq!(r.read_event()?, Event::Start(BytesStart::new("a")));
assert_eq!(
r.read_event()?,
Event::Empty(BytesStart::from_content(
r#"dtd attr = 'Message: &text;'"#,
3
))
);
assert_eq!(r.read_event()?, Event::End(BytesEnd::new("a")));
//--------------------------------------------------------------------------

assert_eq!(r.read_event()?, Event::End(BytesEnd::new("test")));

//--------------------------------------------------------------------------
// Start of external document
assert_eq!(
r.read_event()?,
Event::Start(BytesStart::new("external"))
);
assert_eq!(r.read_event()?, Event::Text(BytesText::new("text")));
assert_eq!(r.read_event()?, Event::End(BytesEnd::new("external")));
//--------------------------------------------------------------------------

assert_eq!(r.read_event()?, Event::Eof);

Ok(())
}

fn main() -> Result<(), Box<dyn std::error::Error>> {
// In this example the events will borrow from the first document
borrowed()?;
// In this example the events will always copy data
buffered()?;
Ok(())
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//! This example demonstrate how custom entities can be extracted from the DOCTYPE,
//! and later use to:
//! usage of the low-level `RawReader` API, and later used to:
//! - insert new pieces of document (particular case - insert only textual content)
//! - decode attribute values
//!
Expand Down

0 comments on commit 908ac15

Please sign in to comment.