Skip to content

Commit

Permalink
add tests for Document::fragment, minor changes
Browse files Browse the repository at this point in the history
  • Loading branch information
niklak committed Oct 9, 2024
1 parent c1a559d commit 10f042c
Show file tree
Hide file tree
Showing 6 changed files with 165 additions and 12 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "dom_query"
version = "0.4.2"
version = "0.5.0"
description = "HTML querying and manipulations with CSS seletors"
license = "MIT"
repository = "https://github.com/niklak/dom_query"
Expand Down
47 changes: 45 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,52 @@ It is a fork of [nipper](https://crates.io/crates/nipper), with some updates. Al

```rust
use dom_query::Document;
let html = r#"<!DOCTYPE html>
use tendril::StrTendril;
// Document may consume &str, String, StrTendril
let contents_str = r#"<!DOCTYPE html>
<html><head><title>Test Page</title></head><body></body></html>"#;
let document = Document::from(html);
let doc = Document::from(contents_str);

let contents_string = contents_str.to_string();
let doc = Document::from(contents_string);

let contents_tendril = StrTendril::from(contents_str);
let doc = Document::from(contents_tendril);

// The root element for the `Document` is a Document
assert!(doc.root().is_document());

// and the first child node of it is a Doctype
assert!(doc.root().first_child().unwrap().is_doctype());

//both of them are not elements.
```
</details>


<details>
<summary><b>Parsing a fragment</b></summary>

```rust
use dom_query::Document;
use tendril::StrTendril;
// fragment can be created with Document::fragment(), which accepts &str, String, StrTendril
let contents_str = r#"<!DOCTYPE html>
<html><head><title>Test Page</title></head><body></body></html>"#;
let fragment = Document::fragment(contents_str);

let contents_string = contents_str.to_string();
let fragment = Document::fragment(contents_string);

let contents_tendril = StrTendril::from(contents_str);
let fragment = Document::fragment(contents_tendril);

// The root element for the fragment is not a Document but a Fragment
assert!(!fragment.root().is_document());
assert!(fragment.root().is_fragment());

// and when it parses a fragment, it drops Doctype
assert!(!fragment.root().first_child().unwrap().is_doctype());
```
</details>

Expand Down
12 changes: 12 additions & 0 deletions src/dom_tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,10 @@ impl InnerNode<NodeData> {
pub fn is_fragment(&self) -> bool {
matches!(self.data, NodeData::Fragment)
}

pub fn is_doctype(&self) -> bool {
matches !(self.data, NodeData::Doctype { .. })
}
}

impl<T: Clone> Clone for InnerNode<T> {
Expand Down Expand Up @@ -832,6 +836,10 @@ impl<'a> Node<'a> {
self.query(|node| node.is_document()).unwrap_or(false)
}

pub fn is_fragment(&self) -> bool {
self.query(|node| node.is_fragment()).unwrap_or(false)
}

pub fn is_element(&self) -> bool {
self.query(|node| node.is_element()).unwrap_or(false)
}
Expand All @@ -842,6 +850,10 @@ impl<'a> Node<'a> {
pub fn is_comment(&self) -> bool {
self.query(|node| node.is_comment()).unwrap_or(false)
}

pub fn is_doctype(&self) -> bool {
self.query(|node| node.is_doctype()).unwrap_or(false)
}
}

impl<'a> Node<'a> {
Expand Down
8 changes: 2 additions & 6 deletions src/fragment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,11 @@ impl Document {
ParseOpts {
tokenizer: Default::default(),
tree_builder: tree_builder::TreeBuilderOpts {
exact_errors: false,
scripting_enabled: true,
iframe_srcdoc: false,
drop_doctype: true,
ignore_missing_rules: false,
quirks_mode: tree_builder::NoQuirks,
..Default::default()
},
},
QualName::new(None, ns!(html), local_name!("")),
QualName::new(None, ns!(html), local_name!("body")),
Vec::new(),
)
.one(html)
Expand Down
45 changes: 42 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,49 @@
//! ## Parsing a document
//! ```
//! use dom_query::Document;
//! let html = r#"<!DOCTYPE html>
//! use tendril::StrTendril;
//! // Document may consume `&str`, `String`, `StrTendril`
//! let contents_str = r#"<!DOCTYPE html>
//! <html><head><title>Test Page</title></head><body></body></html>"#;
//! let document = Document::from(html);

//! let doc = Document::from(contents_str);
//!
//! let contents_string = contents_str.to_string();
//! let doc = Document::from(contents_string);
//!
//! let contents_tendril = StrTendril::from(contents_str);
//! let doc = Document::from(contents_tendril);
//!
//! // The root element for the `Document` is a Document
//! assert!(doc.root().is_document());
//!
//! // and the first child node of it is a Doctype
//! assert!(doc.root().first_child().unwrap().is_doctype());
//!
//! //both of them are not elements.
//! ```
//!
//! ## Parsing a fragment
//! ```
//! use dom_query::Document;
//! use tendril::StrTendril;
//! // fragment can be created with `Document::fragment()`, which accepts` &str`, `String`, `StrTendril`
//! let contents_str = r#"<!DOCTYPE html>
//! <html><head><title>Test Page</title></head><body></body></html>"#;
//! let fragment = Document::fragment(contents_str);
//!
//! let contents_string = contents_str.to_string();
//! let fragment = Document::fragment(contents_string);
//!
//! let contents_tendril = StrTendril::from(contents_str);
//! let fragment = Document::fragment(contents_tendril);
//!
//! // The root element for the fragment is not a Document but a Fragment
//! assert!(!fragment.root().is_document());
//! assert!(fragment.root().is_fragment());
//!
//! // and when it parses a fragment, it drops Doctype
//! assert!(!fragment.root().first_child().unwrap().is_doctype());
//!
//! ```
//!
//! ## Selecting elements
Expand Down
63 changes: 63 additions & 0 deletions tests/parsing.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
use dom_query::Document;
use tendril::StrTendril;


const HTML_CONTENTS: &str = r#"<!DOCTYPE html>
<html>
<head><title>Test</title></head>
<body>
<div class="content">
<h1>Test Page</h1>
</div>
<div class="content">
<p>This is a test page contents.</p>
</div
</body>
</html>"#;

#[test]
fn parse_doc_str() {
let doc = Document::from(HTML_CONTENTS);
assert!(doc.root().is_document());
// document has a <!DOCTYPE>
let doc_type_el = doc.root().first_child().unwrap();
assert!(doc_type_el.is_doctype());
}

#[test]
fn parse_fragment_str() {
let fragment = Document::fragment(HTML_CONTENTS);
assert!(fragment.root().is_fragment());
// <!DOCTYPE> is dropped in fragments
assert!(!fragment.root().first_child().unwrap().is_doctype());
let element_name = fragment.root().first_child().unwrap().node_name().unwrap();
assert_eq!(element_name, "html".into());
}

#[test]
fn parse_doc_string() {
let contents = String::from(HTML_CONTENTS);
let doc = Document::from(contents);
assert!(doc.root().is_document());
}

#[test]
fn parse_fragment_string() {
let contents = String::from(HTML_CONTENTS);
let fragment = Document::fragment(contents);
assert!(!fragment.root().first_child().unwrap().is_doctype());
}

#[test]
fn parse_doc_str_tendril() {
let contents = StrTendril::from(HTML_CONTENTS);
let doc = Document::from(contents);
assert!(doc.root().is_document());
}

#[test]
fn parse_fragment_str_tendril() {
let contents = StrTendril::from(HTML_CONTENTS);
let fragment = Document::fragment(contents);
assert!(!fragment.root().first_child().unwrap().is_doctype());
}

0 comments on commit 10f042c

Please sign in to comment.