Skip to content

Commit

Permalink
Implement support for "name matching processors".
Browse files Browse the repository at this point in the history
This is an unwieldy name for a complicated-sounding feature that's
actually rather simple. In essence, this generalises the previous
support we had for "distinct name matching", allowing the user to
determine what set of name matching pairs should be considered a
successful match or not.

For example, distinct name matching is just:

```
.name_matching_processor(|names| {
    names.values().collect::<HashSet<_>>().len() == names.len()
})
```

Of course, there are other uses this can be put towards!

Because I'm a nice person, this commit still supports the
`distinct_name_matching` function, though it is deprecated. There is a
-- very unlikely -- sequence you could call which would mean that if you
turn distinct name matching on and then off, it won't actually turn off:
there's only so much I can do.
  • Loading branch information
ltratt committed Jul 23, 2024
1 parent 5da2577 commit 15957fc
Showing 1 changed file with 105 additions and 14 deletions.
119 changes: 105 additions & 14 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
#![doc = include_str!("../README.md")]
#![allow(clippy::upper_case_acronyms)]
#![allow(clippy::type_complexity)]

use std::{
collections::hash_map::{Entry, HashMap},
collections::{
hash_map::{Entry, HashMap},
HashSet,
},
default::Default,
error::Error,
fmt,
panic::UnwindSafe,
};

use regex::Regex;
Expand All @@ -16,11 +21,10 @@ const GROUP_ANCHOR_WILDCARD: &str = "..~";
const INTRALINE_WILDCARD: &str = "...";
const ERROR_MARKER: &str = ">>";

#[derive(Debug)]
struct FMOptions {
output_formatter: OutputFormatter,
name_matchers: Vec<(Regex, Regex, bool)>,
distinct_name_matching: bool,
name_matching_processors: Vec<Box<dyn Fn(&HashMap<&str, &str>) -> bool + UnwindSafe>>,
trim_whitespace: bool,
}

Expand All @@ -29,12 +33,18 @@ impl Default for FMOptions {
FMOptions {
output_formatter: OutputFormatter::InputThenSummary,
name_matchers: Vec::new(),
distinct_name_matching: false,
name_matching_processors: Vec::new(),
trim_whitespace: true,
}
}
}

impl fmt::Debug for FMOptions {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "blah")
}
}

/// How should an [FMatchError] format itself? Where:
///
/// * `Input` means the literal text passed to fmt.
Expand Down Expand Up @@ -175,8 +185,58 @@ impl<'a> FMBuilder<'a> {
/// to `a` then `$2` will refuse to match against `a` (though `$1` will continue to match
/// against only `a`). Note that ignorable name matches (see [Self::name_matcher_ignore]) are
/// never subject to distinct name matching. Defaults to `false`.
pub fn distinct_name_matching(mut self, yes: bool) -> Self {
self.options.distinct_name_matching = yes;
#[deprecated(since = "0.3.1", note = "Please use name_matching_processor instead")]
pub fn distinct_name_matching(self, yes: bool) -> Self {
if yes {
self.name_matching_processor(|names| {
names.values().collect::<HashSet<_>>().len() == names.len()
})
} else {
self
}
}

/// Add a name matching processor: this takes a [HashMap] of `(key, value)` pairs and must
/// return `true` if this is a valid set of pairs or false otherwise. Name matching processors
/// allow you to customise what names are valid matches. For example, if you want distinct
/// names to match distinct values you can add a name matching processor which converts values
/// to a [HashSet] and fails if the resulting set has fewer entries than the input hashmap:
///
/// ```rust
/// use {fm::FMBuilder, regex::Regex};
/// use std::collections::HashSet;
///
/// let ptn_re = Regex::new(r"\$[0-9]+?\b").unwrap();
/// let text_re = Regex::new(r"[a-b]+?\b").unwrap();
/// let matcher = FMBuilder::new("$1 $2")
/// .unwrap()
/// .name_matcher(ptn_re, text_re)
/// .name_matching_processor(|names| {
/// names.values().collect::<HashSet<_>>().len() == names.len()
/// })
/// .build()
/// .unwrap();
/// assert!(matcher.matches("a b").is_ok());
/// assert!(matcher.matches("a a").is_err());
/// ```
///
/// As this shows, since `$1` matches `a`, the name matching processor returns false if `$2`
/// also matches `a`.
///
/// Note that name matching processors must not confuse "doesn't match" with "is an error": fm
/// calls name matching processors to see if a match is possible. Just because text doesn't
/// match at a given point does not mean there is an error.
///
/// Name matching processors are called frequently, so their performance can be an issue if you
/// have large inputs. You may need to benchmark carefully.
///
/// Multiple name matching processors are allowed: they are matched in the order they were
/// added to `FMBuilder`.
pub fn name_matching_processor<F>(mut self, f: F) -> Self
where
F: Fn(&HashMap<&str, &str>) -> bool + UnwindSafe + 'static,
{
self.options.name_matching_processors.push(Box::new(f));
self
}

Expand Down Expand Up @@ -406,13 +466,6 @@ impl<'a> FMatcher<'a> {
panic!("Text pattern matched the empty string.");
}
if !ignore {
if self.options.distinct_name_matching {
for (x, y) in names.iter().chain(new_names.iter()) {
if *x != key && *y == val {
return false;
}
}
}
match names.entry(key) {
Entry::Occupied(e) => {
if *e.get() != val {
Expand All @@ -430,6 +483,16 @@ impl<'a> FMatcher<'a> {
}
},
}

if !self.options.name_matching_processors.is_empty() {
let mut all_names = names.clone();
all_names.extend(&new_names);
for nmp in &self.options.name_matching_processors {
if !nmp(&all_names) {
return false;
}
}
}
}
ptn_i += ptnm.len();
text_i += textm.len();
Expand Down Expand Up @@ -648,6 +711,7 @@ fn line_trimmer<'a>(trim: bool, s: &'a str) -> (Vec<&'a str>, usize) {
mod tests {
use super::*;
use proptest::proptest;
use std::collections::HashSet;

#[test]
fn line_trimming() {
Expand Down Expand Up @@ -940,7 +1004,10 @@ mod tests {
let helper = |ptn: &str, text: &str| -> bool {
FMBuilder::new(ptn)
.unwrap()
.distinct_name_matching(true)
.name_matching_processor(|names| {
let vals = names.values().collect::<HashSet<_>>();
vals.len() == names.len()
})
.name_matcher_ignore(nameptn_ignore_re.clone(), name_re.clone())
.name_matcher(nameptn_normal_re.clone(), name_re.clone())
.build()
Expand Down Expand Up @@ -1080,6 +1147,30 @@ mod tests {
let nameptn_re = Regex::new(r"\$.+?\b").unwrap();
let name_re = Regex::new(r".+?\b").unwrap();
let helper = |ptn: &str, text: &str| -> bool {
FMBuilder::new(ptn)
.unwrap()
.name_matcher(nameptn_re.clone(), name_re.clone())
.name_matching_processor(|names| {
names.values().collect::<HashSet<_>>().len() == names.len()
})
.build()
.unwrap()
.matches(text)
.is_ok()
};

assert!(helper("$1 $1", "a a"));
assert!(!helper("$1 $1", "a b"));
assert!(!helper("$1 $2", "a a"));
}

/// This test can be removed when [FMBuilder::distinct_name_matching] is removed.
#[test]
fn distinct_names_deprecated() {
let nameptn_re = Regex::new(r"\$.+?\b").unwrap();
let name_re = Regex::new(r".+?\b").unwrap();
let helper = |ptn: &str, text: &str| -> bool {
#[allow(deprecated)]
FMBuilder::new(ptn)
.unwrap()
.name_matcher(nameptn_re.clone(), name_re.clone())
Expand Down

0 comments on commit 15957fc

Please sign in to comment.