diff --git a/regex-filtered/examples/bench.rs b/regex-filtered/examples/bench.rs index 69bd556..17646df 100644 --- a/regex-filtered/examples/bench.rs +++ b/regex-filtered/examples/bench.rs @@ -1,6 +1,6 @@ use clap::Parser; -use std::path::PathBuf; use std::io::{BufRead, BufReader}; +use std::path::PathBuf; #[derive(Parser)] struct Args { @@ -15,23 +15,34 @@ struct Args { } fn main() -> Result<(), Box> { - let Args {regexes, user_agents, repetitions, quiet} = Args::parse(); + let Args { + regexes, + user_agents, + repetitions, + quiet, + } = Args::parse(); let start = std::time::Instant::now(); let regexes = BufReader::new(std::fs::File::open(regexes)?) .lines() .collect::, _>>()?; - let f = regex_filtered::Builder::new() - .push_all(®exes)? - .build()?; - eprintln!("{} regexes in {}s", regexes.len(), start.elapsed().as_secs_f32()); + let f = regex_filtered::Builder::new().push_all(®exes)?.build()?; + eprintln!( + "{} regexes in {}s", + regexes.len(), + start.elapsed().as_secs_f32() + ); let start = std::time::Instant::now(); let user_agents = BufReader::new(std::fs::File::open(user_agents)?) .lines() .collect::, _>>()?; - eprintln!("{} user agents in {}s", user_agents.len(), start.elapsed().as_secs_f32()); + eprintln!( + "{} user agents in {}s", + user_agents.len(), + start.elapsed().as_secs_f32() + ); for _ in 0..repetitions { for ua in user_agents.iter() { diff --git a/regex-filtered/src/int_set.rs b/regex-filtered/src/int_set.rs index 7765b07..565eaf1 100644 --- a/regex-filtered/src/int_set.rs +++ b/regex-filtered/src/int_set.rs @@ -6,7 +6,7 @@ pub struct IntSet { impl IntSet { pub fn new(capacity: usize) -> Self { Self { - sparse: vec![usize::MAX;capacity], + sparse: vec![usize::MAX; capacity], dense: Vec::with_capacity(capacity), } } @@ -47,7 +47,7 @@ impl std::iter::Extend for IntSet { } } -impl <'a> std::iter::Extend<&'a usize> for IntSet { +impl<'a> std::iter::Extend<&'a usize> for IntSet { fn extend>(&mut self, iter: T) { for val in iter { self.insert(*val); diff --git a/regex-filtered/src/lib.rs b/regex-filtered/src/lib.rs index e79b16f..7f56316 100644 --- a/regex-filtered/src/lib.rs +++ b/regex-filtered/src/lib.rs @@ -4,9 +4,9 @@ use aho_corasick::AhoCorasick; +mod int_set; mod mapper; mod model; -mod int_set; pub use model::Error as ModelError; /// Builder for the regexes set diff --git a/regex-filtered/src/mapper.rs b/regex-filtered/src/mapper.rs index b8a506f..7950703 100644 --- a/regex-filtered/src/mapper.rs +++ b/regex-filtered/src/mapper.rs @@ -1,8 +1,8 @@ use std::fmt::Display; use std::fmt::Formatter; -use crate::int_set::IntSet; use super::model::Model; +use crate::int_set::IntSet; pub struct Builder { min_atom_len: usize, @@ -257,7 +257,7 @@ impl Mapper { } fn propagate_match(&self, work: &mut IntSet) -> IntSet { - let mut count = vec![0;self.entries.len()]; + let mut count = vec![0; self.entries.len()]; let mut regexps = IntSet::new(self.regexp_count); diff --git a/ua-parser/src/lib.rs b/ua-parser/src/lib.rs index 7b413ad..b28a880 100644 --- a/ua-parser/src/lib.rs +++ b/ua-parser/src/lib.rs @@ -623,12 +623,12 @@ fn rewrite_regex(re: &str) -> std::borrow::Cow<'_, str> { match c { '\\' if !escape => { escape = true; - continue + continue; } '{' if !escape && inclass == 0 => { if idx == 0 { // we're repeating nothing, this regex is broken, bail - return re.into() + return re.into(); } // we don't need to loop, we only want to replace {0, ...} and {1, ...} let Some((_, start)) = it.next() else { @@ -649,45 +649,47 @@ fn rewrite_regex(re: &str) -> std::borrow::Cow<'_, str> { // here idx is the index of the start of // the range and ri is the end of range out.push_str(&re[from..idx]); - from = ri+1; + from = ri + 1; out.push_str(if start == '0' { "*" } else { "+" }); break; } c if c.is_ascii_digit() => { digits += 1; } - _ => { - continue 'main - } + _ => continue 'main, } } } - '[' if !escape => { inclass += 1; } - ']' if !escape => { inclass += 1; } + '[' if !escape => { + inclass += 1; + } + ']' if !escape => { + inclass += 1; + } // no need for special cases because regex allows nesting // character classes, whereas js or python don't \o/ 'd' if escape => { // idx is d so idx-1 is \\, and we want to exclude it - out.push_str(&re[from..idx-1]); - from = idx+1; + out.push_str(&re[from..idx - 1]); + from = idx + 1; out.push_str("[0-9]"); } 'D' if escape => { - out.push_str(&re[from..idx-1]); - from = idx+1; + out.push_str(&re[from..idx - 1]); + from = idx + 1; out.push_str("[^0-9]"); } 'w' if escape => { - out.push_str(&re[from..idx-1]); - from = idx+1; + out.push_str(&re[from..idx - 1]); + from = idx + 1; out.push_str("[A-Za-z0-9_]"); } 'W' if escape => { - out.push_str(&re[from..idx-1]); - from = idx+1; + out.push_str(&re[from..idx - 1]); + from = idx + 1; out.push_str("[^A-Za-z0-9_]"); } - _ => () + _ => (), } escape = false; } @@ -724,7 +726,8 @@ mod test_rewrite_regex { assert_eq!( rewrite(r"\{1,2}"), r"\{1,2}", - "if the opening brace is escaped it's not a repetition"); + "if the opening brace is escaped it's not a repetition" + ); assert_eq!( rewrite("[.{1,100}]"), "[.{1,100}]",