Skip to content

Commit

Permalink
update version to 0.3.4
Browse files Browse the repository at this point in the history
  • Loading branch information
niklak committed Feb 17, 2024
1 parent 1dc1fc6 commit 8f5c4b4
Show file tree
Hide file tree
Showing 7 changed files with 38 additions and 36 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ jobs:
- name: Build
run: cargo build --verbose --all
- name: Run tests
run: cargo test --verbose --all
run: cargo test --verbose --all-targets
- name: Run tests with hashbrown
run: cargo test --verbose --all --features "hashbrown"
run: cargo test --verbose --all-targets --features "hashbrown"
- name: Run clippy
run: cargo clippy --verbose --all -- -D warnings
run: cargo clippy --verbose --all-targets -- -D warnings
- name: Run audit
run: cargo audit
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Changelog

## [0.3.4] - 2024-02-17

### Added
- Add support for `:has-text` and `:contains` pseudo-classes, which allow to search elements by their text contents. Some example are [here](./tests/pseudo-class.rs).

## [0.3.3] - 2024-02-10

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "dom_query"
version = "0.3.3"
version = "0.3.4"
description = "HTML manipulation with CSS seletors"
license = "MIT"
repository = "https://github.com/niklak/dom_query"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

DOM_QUERY is based on HTML crate html5ever and the CSS selector crate selectors. You can use the jQuery-like syntax to query and manipulate an HTML document quickly. **Not only can query, but also can modify**.

It is a fork of [nipper](https://crates.io/crates/nipper), with some updates. Also this fork supports ":has" pseudo-class, and some others.
It is a fork of [nipper](https://crates.io/crates/nipper), with some updates. Also this fork supports `:has`, `:has-text`, `:contains` pseudo-classes, and some others.

## Example

Expand Down
2 changes: 1 addition & 1 deletion examples/demo.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use dom_query::Document;
use std::error::Error;
use std::time::Instant;
use ureq;

fn main() -> Result<(), Box<dyn Error>> {
let html: String = ureq::get("https://news.ycombinator.com/news")
.call()?
Expand Down
53 changes: 25 additions & 28 deletions examples/readability.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#![allow(clippy::all)]

use dom_query::Document;
use dom_query::Selection;
use lazy_static::lazy_static;
Expand Down Expand Up @@ -29,7 +31,7 @@ lazy_static! {
static ref RE_SPACES: Regex = Regex::new(r#"(?is)\s{2,}|\n+"#).unwrap();
}

const DATA_TABLE_ATTR: &'static str = "XXX-DATA-TABLE";
const DATA_TABLE_ATTR: &str = "XXX-DATA-TABLE";

macro_rules! is_valid_by_line {
($text: expr) => {
Expand Down Expand Up @@ -121,8 +123,6 @@ struct MetaData {
cover: Option<String>,
description: Option<String>,
author: Option<String>,
min_read_time: Option<usize>,
max_read_time: Option<usize>,
}

impl Default for MetaData {
Expand All @@ -132,8 +132,6 @@ impl Default for MetaData {
cover: None,
description: None,
author: None,
min_read_time: None,
max_read_time: None,
}
}
}
Expand All @@ -159,7 +157,7 @@ fn remove_attrs(s: &Selection) {
.get(0)
.unwrap()
.node_name()
.unwrap_or(tendril::StrTendril::new());
.unwrap_or_default();
if tag_name.to_lowercase() == "svg" {
return;
}
Expand Down Expand Up @@ -542,7 +540,7 @@ fn grab_article<'a>(doc: &'a Document, title: &str) -> (String, Option<String>)
.children()
.iter()
.for_each(|sibling| {
let append_sibling = if sibling.is_selection(&top_selection) {
let append_sibling = if sibling.is_selection(top_selection) {
true
} else {
// let sibling_class = sibling.attr_or("class", "");
Expand Down Expand Up @@ -601,15 +599,15 @@ fn clean_html(doc: &Document) -> String {
}

fn pre_article(content: &Selection, title: &str) {
mark_data_tables(&content);
remove_attrs(&content);
remove_conditionally(&content, "form");
remove_conditionally(&content, "fieldset");
remove_tag(&content, "h1");
remove_tag(&content, "object");
remove_tag(&content, "embed");
remove_tag(&content, "footer");
remove_tag(&content, "link");
mark_data_tables(content);
remove_attrs(content);
remove_conditionally(content, "form");
remove_conditionally(content, "fieldset");
remove_tag(content, "h1");
remove_tag(content, "object");
remove_tag(content, "embed");
remove_tag(content, "footer");
remove_tag(content, "link");

content.select("*").iter().for_each(|mut s| {
let id = s.attr_or("id", "");
Expand Down Expand Up @@ -641,15 +639,15 @@ fn pre_article(content: &Selection, title: &str) {
}
}

remove_tag(&content, "iframe");
remove_tag(&content, "input");
remove_tag(&content, "textarea");
remove_tag(&content, "select");
remove_tag(&content, "button");
remove_headers(&content);
remove_tag(content, "iframe");
remove_tag(content, "input");
remove_tag(content, "textarea");
remove_tag(content, "select");
remove_tag(content, "button");
remove_headers(content);

remove_conditionally(&content, "table");
remove_conditionally(&content, "ul");
remove_conditionally(content, "table");
remove_conditionally(content, "ul");
// remove_conditionally(&content, "div");

content.select("p").iter().for_each(|mut p| {
Expand Down Expand Up @@ -715,7 +713,6 @@ fn mark_data_tables(s: &Selection) {

if rows * colums > 10 {
table.set_attr(DATA_TABLE_ATTR, "1");
return;
}
})
}
Expand Down Expand Up @@ -746,9 +743,9 @@ fn get_table_row_and_column_count(table: &Selection) -> (usize, usize) {

fn main() {
let start = Instant::now();
let path = env::args().skip(1).next().unwrap();
let path = env::args().nth(1).unwrap();
let mut html = String::new();
let mut html_file = File::open(&path).expect("correct HTML file path");
let mut html_file = File::open(path).expect("correct HTML file path");
html_file
.read_to_string(&mut html)
.expect("read HTML page file");
Expand All @@ -761,7 +758,7 @@ fn main() {

let metadata = get_article_metadata(&document);
let title = &metadata.title.as_ref().unwrap();
let (article_html, author) = grab_article(&document, &title);
let (article_html, author) = grab_article(&document, title);

// println!("{}", document.html());
println!("{:?}", metadata);
Expand Down
4 changes: 2 additions & 2 deletions examples/readability2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ use std::io::Cursor;

fn main() -> Result<(), Box<dyn Error>> {
let start = Instant::now();
let url = env::args().skip(1).next().unwrap();
let url = env::args().nth(1).unwrap();

let html: String = ureq::get(&url).call()?.into_string()?;
let url = &url.parse()?;
let mut c = Cursor::new(html.as_bytes());

let article = extract(&mut c, &url)?;
let article = extract(&mut c, url)?;

println!("title ====> {}", article.title);
println!("article ====> {}", article.content);
Expand Down

0 comments on commit 8f5c4b4

Please sign in to comment.