-
Notifications
You must be signed in to change notification settings - Fork 236
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add new example which demonstrates new Reader API
- Loading branch information
Showing
2 changed files
with
193 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,192 @@ | ||
//! This example demonstrate how custom entities can be extracted from the DOCTYPE | ||
//! and usage of the high-level `Reader` API. | ||
//! | ||
//! NB: this example is deliberately kept simple: | ||
//! * the regex in this example is simple but brittle; | ||
//! * it does not support the use of entities in entity declaration. | ||
|
||
use std::borrow::Cow; | ||
use std::collections::HashMap; | ||
use std::convert::Infallible; | ||
use std::io::{BufRead, Cursor}; | ||
|
||
use quick_xml::events::{BytesEnd, BytesStart, BytesText}; | ||
use quick_xml::reader::{Event, Entity, EntityResolver, Reader, RawReader}; | ||
use regex::bytes::Regex; | ||
|
||
use pretty_assertions::assert_eq; | ||
|
||
const XML1: &str = r#" | ||
<!DOCTYPE test [ | ||
<!ENTITY text "hello world" > | ||
<!ENTITY element1 "<dtd attr = 'Message: &text;'/>" > | ||
<!ENTITY element2 "<a>&element1;</a>" > | ||
]> | ||
<test label="Message: &text;">&element2;</test> | ||
&external; | ||
"#; | ||
|
||
/// Additional document which in reality would be referenced by | ||
/// `<!ENTITY external SYSTEM "URI to the document, for example, relative file path" >` | ||
const XML2: &str = r#" | ||
<?xml version='1.0'?> | ||
<external>text</external> | ||
"#; | ||
|
||
struct MyResolver<'i> { | ||
/// Map of captured internal _parsed general entities_. _Parsed_ means that | ||
/// value of the entity is parsed by XML reader. | ||
entities: HashMap<Cow<'i, [u8]>, Cow<'i, [u8]>>, | ||
/// In this example we use simple regular expression to capture entities from DTD. | ||
/// In real application you should use DTD parser. | ||
entity_re: Regex, | ||
} | ||
impl<'i> MyResolver<'i> { | ||
fn new() -> Result<Self, regex::Error> { | ||
Ok(Self { | ||
entities: Default::default(), | ||
// Capture "name" and "content" from such string: | ||
// <!ENTITY name "content" > | ||
entity_re: Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#)?, | ||
}) | ||
} | ||
fn capture_borrowed(&mut self, doctype: &'i [u8]) { | ||
for cap in self.entity_re.captures_iter(doctype) { | ||
self.entities.insert( | ||
cap.get(1).unwrap().as_bytes().into(), | ||
cap.get(2).unwrap().as_bytes().into(), | ||
); | ||
} | ||
} | ||
fn capture_owned(&mut self, doctype: Vec<u8>) { | ||
for cap in self.entity_re.captures_iter(&doctype) { | ||
self.entities.insert( | ||
cap.get(1).unwrap().as_bytes().to_owned().into(), | ||
cap.get(2).unwrap().as_bytes().to_owned().into(), | ||
); | ||
} | ||
} | ||
} | ||
impl<'i> EntityResolver<'i> for MyResolver<'i> { | ||
type Error = Infallible; | ||
|
||
fn capture(&mut self, doctype: BytesText<'i>) -> Result<(), Self::Error> { | ||
match doctype.into_inner() { | ||
Cow::Borrowed(doctype) => self.capture_borrowed(doctype), | ||
Cow::Owned(doctype) => self.capture_owned(doctype), | ||
} | ||
Ok(()) | ||
} | ||
|
||
fn resolve(&self, entity: &str) -> Option<Entity<'i>> { | ||
if entity == "external" { | ||
return Some(Entity::External(Box::new(Cursor::new(XML2.as_bytes())))); | ||
} | ||
match self.entities.get(entity.as_bytes()) { | ||
Some(Cow::Borrowed(replacement)) => Some(Entity::Internal(replacement)), | ||
Some(Cow::Owned(replacement)) => { | ||
Some(Entity::External(Box::new(Cursor::new(replacement.clone())))) | ||
} | ||
None => None, | ||
} | ||
} | ||
} | ||
|
||
/// In this example the events will borrow from the first document | ||
fn borrowed() -> Result<(), Box<dyn std::error::Error>> { | ||
let mut reader = RawReader::from_str(XML1); | ||
reader.config_mut().trim_text(true); | ||
|
||
let mut r = Reader::borrowed(reader, MyResolver::new()?); | ||
|
||
assert_eq!( | ||
r.read_event()?, | ||
Event::Start(BytesStart::from_content( | ||
r#"test label="Message: &text;""#, | ||
4 | ||
)) | ||
); | ||
|
||
//-------------------------------------------------------------------------- | ||
// This part was inserted into original document from entity defined in DTD | ||
assert_eq!(r.read_event()?, Event::Start(BytesStart::new("a"))); | ||
assert_eq!( | ||
r.read_event()?, | ||
Event::Empty(BytesStart::from_content( | ||
r#"dtd attr = 'Message: &text;'"#, | ||
3 | ||
)) | ||
); | ||
assert_eq!(r.read_event()?, Event::End(BytesEnd::new("a"))); | ||
//-------------------------------------------------------------------------- | ||
|
||
assert_eq!(r.read_event()?, Event::End(BytesEnd::new("test"))); | ||
|
||
//-------------------------------------------------------------------------- | ||
// Start of external document | ||
assert_eq!( | ||
r.read_event()?, | ||
Event::Start(BytesStart::new("external")) | ||
); | ||
assert_eq!(r.read_event()?, Event::Text(BytesText::new("text"))); | ||
assert_eq!(r.read_event()?, Event::End(BytesEnd::new("external"))); | ||
//-------------------------------------------------------------------------- | ||
|
||
assert_eq!(r.read_event()?, Event::Eof); | ||
|
||
Ok(()) | ||
} | ||
|
||
/// In this example the events will always copy data | ||
fn buffered() -> Result<(), Box<dyn std::error::Error>> { | ||
let boxed: Box<dyn BufRead> = Box::new(Cursor::new(XML1.as_bytes())); | ||
let mut reader = RawReader::from_reader(boxed); | ||
reader.config_mut().trim_text(true); | ||
|
||
let mut r = Reader::buffered(reader, MyResolver::new()?); | ||
|
||
assert_eq!( | ||
r.read_event()?, | ||
Event::Start(BytesStart::from_content( | ||
r#"test label="Message: &text;""#, | ||
4 | ||
)) | ||
); | ||
|
||
//-------------------------------------------------------------------------- | ||
// This part was inserted into original document from entity defined in DTD | ||
assert_eq!(r.read_event()?, Event::Start(BytesStart::new("a"))); | ||
assert_eq!( | ||
r.read_event()?, | ||
Event::Empty(BytesStart::from_content( | ||
r#"dtd attr = 'Message: &text;'"#, | ||
3 | ||
)) | ||
); | ||
assert_eq!(r.read_event()?, Event::End(BytesEnd::new("a"))); | ||
//-------------------------------------------------------------------------- | ||
|
||
assert_eq!(r.read_event()?, Event::End(BytesEnd::new("test"))); | ||
|
||
//-------------------------------------------------------------------------- | ||
// Start of external document | ||
assert_eq!( | ||
r.read_event()?, | ||
Event::Start(BytesStart::new("external")) | ||
); | ||
assert_eq!(r.read_event()?, Event::Text(BytesText::new("text"))); | ||
assert_eq!(r.read_event()?, Event::End(BytesEnd::new("external"))); | ||
//-------------------------------------------------------------------------- | ||
|
||
assert_eq!(r.read_event()?, Event::Eof); | ||
|
||
Ok(()) | ||
} | ||
|
||
fn main() -> Result<(), Box<dyn std::error::Error>> { | ||
// In this example the events will borrow from the first document | ||
borrowed()?; | ||
// In this example the events will always copy data | ||
buffered()?; | ||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters