Skip to content

Commit

Permalink
cargo fmt
Browse files Browse the repository at this point in the history
  • Loading branch information
masklinn committed Jul 14, 2024
1 parent 29b9195 commit 40e44d1
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 30 deletions.
25 changes: 18 additions & 7 deletions regex-filtered/examples/bench.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use clap::Parser;
use std::path::PathBuf;
use std::io::{BufRead, BufReader};
use std::path::PathBuf;

#[derive(Parser)]
struct Args {
Expand All @@ -15,23 +15,34 @@ struct Args {
}

fn main() -> Result<(), Box<dyn std::error::Error>> {
let Args {regexes, user_agents, repetitions, quiet} = Args::parse();
let Args {
regexes,
user_agents,
repetitions,
quiet,
} = Args::parse();

let start = std::time::Instant::now();
let regexes = BufReader::new(std::fs::File::open(regexes)?)
.lines()
.collect::<Result<Vec<String>, _>>()?;

let f = regex_filtered::Builder::new()
.push_all(&regexes)?
.build()?;
eprintln!("{} regexes in {}s", regexes.len(), start.elapsed().as_secs_f32());
let f = regex_filtered::Builder::new().push_all(&regexes)?.build()?;
eprintln!(
"{} regexes in {}s",
regexes.len(),
start.elapsed().as_secs_f32()
);

let start = std::time::Instant::now();
let user_agents = BufReader::new(std::fs::File::open(user_agents)?)
.lines()
.collect::<Result<Vec<String>, _>>()?;
eprintln!("{} user agents in {}s", user_agents.len(), start.elapsed().as_secs_f32());
eprintln!(
"{} user agents in {}s",
user_agents.len(),
start.elapsed().as_secs_f32()
);

for _ in 0..repetitions {
for ua in user_agents.iter() {
Expand Down
4 changes: 2 additions & 2 deletions regex-filtered/src/int_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ pub struct IntSet {
impl IntSet {
pub fn new(capacity: usize) -> Self {
Self {
sparse: vec![usize::MAX;capacity],
sparse: vec![usize::MAX; capacity],
dense: Vec::with_capacity(capacity),
}
}
Expand Down Expand Up @@ -47,7 +47,7 @@ impl std::iter::Extend<usize> for IntSet {
}
}

impl <'a> std::iter::Extend<&'a usize> for IntSet {
impl<'a> std::iter::Extend<&'a usize> for IntSet {
fn extend<T: IntoIterator<Item = &'a usize>>(&mut self, iter: T) {
for val in iter {
self.insert(*val);
Expand Down
2 changes: 1 addition & 1 deletion regex-filtered/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

use aho_corasick::AhoCorasick;

mod int_set;
mod mapper;
mod model;
mod int_set;
pub use model::Error as ModelError;

/// Builder for the regexes set
Expand Down
4 changes: 2 additions & 2 deletions regex-filtered/src/mapper.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use std::fmt::Display;
use std::fmt::Formatter;

use crate::int_set::IntSet;
use super::model::Model;
use crate::int_set::IntSet;

pub struct Builder {
min_atom_len: usize,
Expand Down Expand Up @@ -257,7 +257,7 @@ impl Mapper {
}

fn propagate_match(&self, work: &mut IntSet) -> IntSet {
let mut count = vec![0;self.entries.len()];
let mut count = vec![0; self.entries.len()];

let mut regexps = IntSet::new(self.regexp_count);

Expand Down
39 changes: 21 additions & 18 deletions ua-parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -623,12 +623,12 @@ fn rewrite_regex(re: &str) -> std::borrow::Cow<'_, str> {
match c {
'\\' if !escape => {
escape = true;
continue
continue;
}
'{' if !escape && inclass == 0 => {
if idx == 0 {
// we're repeating nothing, this regex is broken, bail
return re.into()
return re.into();
}
// we don't need to loop, we only want to replace {0, ...} and {1, ...}
let Some((_, start)) = it.next() else {
Expand All @@ -649,45 +649,47 @@ fn rewrite_regex(re: &str) -> std::borrow::Cow<'_, str> {
// here idx is the index of the start of
// the range and ri is the end of range
out.push_str(&re[from..idx]);
from = ri+1;
from = ri + 1;
out.push_str(if start == '0' { "*" } else { "+" });
break;
}
c if c.is_ascii_digit() => {
digits += 1;
}
_ => {
continue 'main
}
_ => continue 'main,
}
}
}
'[' if !escape => { inclass += 1; }
']' if !escape => { inclass += 1; }
'[' if !escape => {
inclass += 1;
}
']' if !escape => {
inclass += 1;
}
// no need for special cases because regex allows nesting
// character classes, whereas js or python don't \o/
'd' if escape => {
// idx is d so idx-1 is \\, and we want to exclude it
out.push_str(&re[from..idx-1]);
from = idx+1;
out.push_str(&re[from..idx - 1]);
from = idx + 1;
out.push_str("[0-9]");
}
'D' if escape => {
out.push_str(&re[from..idx-1]);
from = idx+1;
out.push_str(&re[from..idx - 1]);
from = idx + 1;
out.push_str("[^0-9]");
}
'w' if escape => {
out.push_str(&re[from..idx-1]);
from = idx+1;
out.push_str(&re[from..idx - 1]);
from = idx + 1;
out.push_str("[A-Za-z0-9_]");
}
'W' if escape => {
out.push_str(&re[from..idx-1]);
from = idx+1;
out.push_str(&re[from..idx - 1]);
from = idx + 1;
out.push_str("[^A-Za-z0-9_]");
}
_ => ()
_ => (),
}
escape = false;
}
Expand Down Expand Up @@ -724,7 +726,8 @@ mod test_rewrite_regex {
assert_eq!(
rewrite(r"\{1,2}"),
r"\{1,2}",
"if the opening brace is escaped it's not a repetition");
"if the opening brace is escaped it's not a repetition"
);
assert_eq!(
rewrite("[.{1,100}]"),
"[.{1,100}]",
Expand Down

0 comments on commit 40e44d1

Please sign in to comment.