Skip to content

Commit

Permalink
Merge pull request #21 from niklak/feature/tree-merge-tree
Browse files Browse the repository at this point in the history
Replace logic `Tree::append_prev_siblings_from_another_tree` and `Tree::append_children_from_another_tree` with `Tree::merge`
  • Loading branch information
niklak authored Nov 3, 2024
2 parents 6176408 + 4813bc3 commit 7c0f8fb
Show file tree
Hide file tree
Showing 9 changed files with 150 additions and 220 deletions.
9 changes: 8 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ All notable changes to the `dom_query` crate will be documented in this file.
- Simplified `Node::has_text`.
- Replaced generic types with the concrete type `NodeData`, simplifying code and improving readability without affecting the public API.
- Replaced implementations for `Node` with implementations for `NodeRef`. `Node` is just an alias for `NodeRef`.
- Simplified internal logic of `Selection::replace_with_html`, `Selection::set_html`,
`Selection::append_html`, `Node::set_html`, `Node::append_html`, and `Node::replace_with_html` by using `Tree::merge`.

### Added
- Added `Selection::filter` , `Selection::filter_matcher` and `Selection::try_filter` methods that filter a current selection.
Expand All @@ -18,12 +20,17 @@ All notable changes to the `dom_query` crate will be documented in this file.
Previously these functions required `NodeId` as a parameter.
- Added a new pseudo-class `:only-text` that allows selecting a node with no child elements except a single **text** child node.
- Added the `NodeRef::set_text` method, which sets the text content of a node, replacing any existing content.
- Added `NodeRef::append_prev_siblings` method, which allows to prepend other nodes and their siblings before the selected node.

### Fixed
- Fixed `Tree::append_prev_siblings_from_another_tree` method. It didn't assign `TreeNode.prev_sibling` properly.
- Fixed `<NodeRef<'a> as selectors::Element>::is_empty` to correctly handle line breaks, whitespace, and ensure only elements pass the check.


### Removed
- Removed `Tree::append_children_from_another_tree` method.
- Removed `Tree::append_prev_siblings_from_another_tree` method.
- Removed `Node::append_children_from_another_tree` method.
- Removed `Node::append_prev_siblings_from_another_tree` method.

## [0.7.0] - 2024-10-27

Expand Down
12 changes: 4 additions & 8 deletions examples/pseudo_classes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,10 @@ fn main() {
let html = include_str!("../test-pages/rustwiki_2024.html");
let doc = Document::from(html);

// searching list items inside a `tr` element which has a `a` element
// searching list items inside a `tr` element which has a `a` element
// with title="Programming paradigm"
let paradigm_selection =
doc.select(
r#"table tr:has(a[title="Programming paradigm"]) td.infobox-data ul > li"#
);
doc.select(r#"table tr:has(a[title="Programming paradigm"]) td.infobox-data ul > li"#);

println!("Rust programming paradigms:");
for item in paradigm_selection.iter() {
Expand All @@ -31,9 +29,7 @@ fn main() {
// Since `foreign function interface` located in its own tag,
// we have to use `:contains` pseudo class
let links_selection =
doc.select(
r#"p:contains("Rust has a foreign function interface") a[href^="/"]"#
);
doc.select(r#"p:contains("Rust has a foreign function interface") a[href^="/"]"#);

println!("Links in the FFI block:");
for item in links_selection.iter() {
Expand All @@ -44,7 +40,7 @@ fn main() {
// :only-text selects an element that contains only a single text node,
// with no child elements.
// It can be combined with other pseudo-classes to achieve more specific selections.
// For example, to select a <div> inside an <a>
// For example, to select a <div> inside an <a>
//that has no siblings and no child elements other than text.
println!("Single <div> inside an <a> with text only:");
for el in doc.select("a div:only-text:only-child").iter() {
Expand Down
208 changes: 36 additions & 172 deletions src/dom_tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,10 @@ use tendril::StrTendril;
use crate::node::{ancestor_nodes, child_nodes, AncestorNodes, ChildNodes};
use crate::node::{Element, NodeData, NodeId, NodeRef, TreeNode};

fn fix_id(id: Option<NodeId>, offset: usize) -> Option<NodeId> {
id.map(|old| NodeId::new(old.value + offset))
}

/// fixes node ids
fn fix_node(n: &mut TreeNode, offset: usize) {
n.id = n.id.map(|id| NodeId::new(id.value + offset));
n.parent = n.parent.map(|id| NodeId::new(id.value + offset));
n.prev_sibling = n.prev_sibling.map(|id| NodeId::new(id.value + offset));
n.next_sibling = n.next_sibling.map(|id| NodeId::new(id.value + offset));
n.first_child = n.first_child.map(|id| NodeId::new(id.value + offset));
Expand Down Expand Up @@ -55,7 +52,7 @@ impl Tree {
/// Creates a new text node with the given text, without parent
pub fn new_text<T: Into<StrTendril>>(&self, text: T) -> NodeRef {
let text = text.into();
let id = self.create_node(NodeData::Text{contents: text});
let id = self.create_node(NodeData::Text { contents: text });
NodeRef { id, tree: self }
}

Expand Down Expand Up @@ -279,173 +276,6 @@ impl Tree {
}
}

/// Appends children nodes from another tree. Another tree is a tree from document fragment.
pub fn append_children_from_another_tree(&self, id: &NodeId, tree: Tree) {
let mut nodes = self.nodes.borrow_mut();
let mut new_nodes = tree.nodes.into_inner();
assert!(
!new_nodes.is_empty(),
"Another tree should have at least one root node"
);
assert!(
!nodes.is_empty(),
"The tree should have at least one root node"
);

let offset = nodes.len();

// `parse_fragment` returns a document that looks like:
// <:root> id -> 0
// <body> id -> 1
// <html> id -> 2
// things we need.
// </html>
// </body>
// <:root>
const TRUE_ROOT_ID: usize = 2;
let node_root_id = NodeId::new(TRUE_ROOT_ID);
let root = match new_nodes.get(node_root_id.value) {
Some(node) => node,
None => return,
};

let first_child_id = fix_id(root.first_child, offset);
let last_child_id = fix_id(root.last_child, offset);

// Update new parent's first and last child id.

let parent = match nodes.get_mut(id.value) {
Some(node) => node,
None => return,
};

if parent.first_child.is_none() {
parent.first_child = first_child_id;
}

let parent_last_child_id = parent.last_child;
parent.last_child = last_child_id;

// Update next_sibling_id
if let Some(last_child_id) = parent_last_child_id {
if let Some(last_child) = nodes.get_mut(last_child_id.value) {
last_child.next_sibling = first_child_id;
}
}

let mut first_valid_child = false;

// Fix nodes's ref id.
for node in new_nodes.iter_mut() {
node.parent = node.parent.and_then(|parent_id| match parent_id.value {
i if i < TRUE_ROOT_ID => None,
i if i == TRUE_ROOT_ID => Some(*id),
i => fix_id(Some(NodeId::new(i)), offset),
});

// Update prev_sibling_id
if !first_valid_child && node.parent == Some(*id) {
first_valid_child = true;

node.prev_sibling = parent_last_child_id;
}

fix_node(node, offset);
}

// Put all the new nodes except the root node into the nodes.
nodes.extend(new_nodes);
}

pub fn append_prev_siblings_from_another_tree(&self, id: &NodeId, tree: Tree) {
let mut nodes = self.nodes.borrow_mut();
let mut new_nodes = tree.nodes.into_inner();
assert!(
!new_nodes.is_empty(),
"Another tree should have at least one root node"
);
assert!(
!nodes.is_empty(),
"The tree should have at least one root node"
);

let offset = nodes.len();

// `parse_fragment` returns a document that looks like:
// <:root> id -> 0
// <body> id -> 1
// <html> id -> 2
// things we need.
// </html>
// </body>
// <:root>
const TRUE_ROOT_ID: usize = 2;
let node_root_id = NodeId::new(TRUE_ROOT_ID);
let root = match new_nodes.get(node_root_id.value) {
Some(node) => node,
None => return,
};

let first_child_id = fix_id(root.first_child, offset);
let last_child_id = fix_id(root.last_child, offset);

let node = match nodes.get_mut(id.value) {
Some(node) => node,
None => return,
};

let prev_sibling_id = node.prev_sibling;
let parent_id = node.parent;

// Update node's previous sibling.
node.prev_sibling = last_child_id;

// Update prev sibling's next sibling
if let Some(prev_sibling_id) = prev_sibling_id {
if let Some(prev_sibling) = nodes.get_mut(prev_sibling_id.value) {
prev_sibling.next_sibling = first_child_id;
}

// Update parent's first child.
} else if let Some(parent_id) = parent_id {
if let Some(parent) = nodes.get_mut(parent_id.value) {
parent.first_child = first_child_id;
}
}

let mut last_valid_child = 0;
let mut first_valid_child = false;

// Fix nodes's ref id.
for (i, node) in new_nodes.iter_mut().enumerate() {
node.parent = node
.parent
.and_then(|old_parent_id| match old_parent_id.value {
i if i < TRUE_ROOT_ID => None,
i if i == TRUE_ROOT_ID => parent_id,
i => fix_id(Some(NodeId::new(i)), offset),
});

fix_node(node, offset);

// Update first child's prev_sibling
if !first_valid_child && node.parent == parent_id {
first_valid_child = true;
node.prev_sibling = prev_sibling_id;
}

if node.parent == parent_id {
last_valid_child = i;
}
}

// Update last child's next_sibling.
new_nodes[last_valid_child].next_sibling = Some(*id);

// Put all the new nodes except the root node into the nodes.
nodes.extend(new_nodes);
}

/// Remove a node from the its parent by id. The node remains in the tree.
/// It is possible to assign it to another node in the tree after this operation.
pub fn remove_from_parent(&self, id: &NodeId) {
Expand Down Expand Up @@ -600,3 +430,37 @@ impl Tree {
Some(f(node_a, node_b))
}
}

impl Tree {
/// Adds nodes from another tree to the current tree
pub(crate) fn merge(&self, other: Tree) {
// `parse_fragment` returns a document that looks like:
// <:root> id -> 0
// <body> id -> 1
// <html> id -> 2
// things we need.
// </html>
// </body>
// <:root>
let mut nodes = self.nodes.borrow_mut();

let mut other_nodes = other.nodes.into_inner();

let offset = nodes.len();
let skip: usize = 3;
let id_offset = offset - skip;

for node in other_nodes.iter_mut().skip(skip) {
fix_node(node, id_offset);
}
nodes.extend(other_nodes.into_iter().skip(skip));
}

/// Get the new id, that is not in the Tree.
///
/// This function doesn't add a new id.
/// it is just a convenient wrapper to get the new id.
pub (crate) fn get_new_id(&self) -> NodeId {
NodeId::new(self.nodes.borrow().len())
}
}
52 changes: 38 additions & 14 deletions src/node/node_ref.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,16 +152,32 @@ impl<'a> NodeRef<'a> {
self.tree.append_child_of(&self.id, id_provider.node_id())
}

/// Appends another tree to the selected node from another tree.
/// Appends another node and it's siblings to the selected node.
#[inline]
pub fn append_children_from_another_tree(&self, tree: Tree) {
self.tree.append_children_from_another_tree(&self.id, tree)
pub fn append_children<P: NodeIdProver>(&self, id_provider: P) {
let mut next_node = self.tree.get(id_provider.node_id());

while let Some(ref node) = next_node {
self.tree.append_child_of(&self.id, &node.id);
next_node = node.next_sibling();
}
}

/// Appends another node and it's siblings to the parent node
/// of the selected node, shifting itself.
#[inline]
pub fn append_prev_siblings_from_another_tree(&self, tree: Tree) {
self.tree
.append_prev_siblings_from_another_tree(&self.id, tree)
pub fn append_prev_siblings<P: NodeIdProver>(&self, id_provider: P) {
let mut next_node = self.tree.get(id_provider.node_id());

let mut siblings = vec![];
while let Some(node) = next_node {
next_node = node.next_sibling();
siblings.push(node);
}

for node in siblings {
self.tree.append_prev_sibling_of(&self.id, &node.id);
}
}

/// Replaces the current node with other node by id. It'is actually a shortcut of two operations:
Expand All @@ -171,26 +187,33 @@ impl<'a> NodeRef<'a> {
self.remove_from_parent();
}


}

impl<'a> NodeRef<'a> {

/// Replaces the current node with other node, created from the given fragment html.
/// Behaves similarly to [`crate::Selection::replace_with_html`] but only for one node.
pub fn replace_with_html<T>(&self, html: T)
where
T: Into<StrTendril>,
{
let dom = Document::fragment(html);
self.append_prev_siblings_from_another_tree(dom.tree);
let fragment = Document::fragment(html);
let new_node_id = self.tree.get_new_id();
self.tree.merge(fragment.tree);
self.append_prev_siblings(&new_node_id);
self.remove_from_parent();
}
}

impl<'a> NodeRef<'a> {
/// Parses given fragment html and appends its contents to the selected node.
pub fn append_html<T>(&self, html: T)
where
T: Into<StrTendril>,
{
let fragment = Document::fragment(html);
self.append_children_from_another_tree(fragment.tree);
let new_node_id = self.tree.get_new_id();
self.tree.merge(fragment.tree);
self.append_children(&new_node_id);
}

/// Parses given fragment html and sets its contents to the selected node.
Expand Down Expand Up @@ -528,8 +551,9 @@ impl<'a> NodeRef<'a> {
/// Determines if the node is an element, has no child elements, and any text nodes
/// it contains consist only of whitespace.
pub fn is_empty_element(&self) -> bool {
self.is_element() && !self
.children_it()
.any(|child| child.is_element() || (child.is_text() && !child.text().trim().is_empty()))
self.is_element()
&& !self.children_it().any(|child| {
child.is_element() || (child.is_text() && !child.text().trim().is_empty())
})
}
}
Loading

0 comments on commit 7c0f8fb

Please sign in to comment.