Skip to content

Commit

Permalink
Implement support for "name matching validators".
Browse files Browse the repository at this point in the history
This is an unwieldy name for a complicated-sounding feature that's
actually rather simple. In essence, this generalises the previous
support we had for "distinct name matching", allowing the user to
determine what set of name matching pairs should be considered a
successful match or not.

For example, distinct name matching is just:

```
.name_matching_validator(|names| {
    names.values().collect::<HashSet<_>>().len() == names.len()
})
```

Of course, there are other uses this can be put towards!

Because I'm a nice person, this commit still supports the
`distinct_name_matching` function, though it is deprecated. There is a
-- very unlikely -- sequence you could call which would mean that if you
turn distinct name matching on and then off, it won't actually turn off:
there's only so much I can do.
  • Loading branch information
ltratt committed Jul 24, 2024
1 parent 5da2577 commit c4ec81c
Showing 1 changed file with 126 additions and 16 deletions.
142 changes: 126 additions & 16 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
#![doc = include_str!("../README.md")]
#![allow(clippy::upper_case_acronyms)]
#![allow(clippy::type_complexity)]

use std::{
collections::hash_map::{Entry, HashMap},
collections::{
hash_map::{Entry, HashMap},
HashSet,
},
default::Default,
error::Error,
fmt,
panic::UnwindSafe,
};

use regex::Regex;
Expand All @@ -16,11 +21,10 @@ const GROUP_ANCHOR_WILDCARD: &str = "..~";
const INTRALINE_WILDCARD: &str = "...";
const ERROR_MARKER: &str = ">>";

#[derive(Debug)]
struct FMOptions {
output_formatter: OutputFormatter,
name_matchers: Vec<(Regex, Regex, bool)>,
distinct_name_matching: bool,
name_matching_validators: Vec<Box<dyn Fn(&HashMap<&str, &str>) -> bool + UnwindSafe>>,
trim_whitespace: bool,
}

Expand All @@ -29,12 +33,18 @@ impl Default for FMOptions {
FMOptions {
output_formatter: OutputFormatter::InputThenSummary,
name_matchers: Vec::new(),
distinct_name_matching: false,
name_matching_validators: Vec::new(),
trim_whitespace: true,
}
}
}

impl fmt::Debug for FMOptions {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "FMOptions { .. }")
}
}

/// How should an [FMatchError] format itself? Where:
///
/// * `Input` means the literal text passed to fmt.
Expand Down Expand Up @@ -164,8 +174,7 @@ impl<'a> FMBuilder<'a> {
/// ```
///
/// As this shows, once `$1` has matched "a", all further instances of `$1` must also match
/// "a", but `_` can match different values at different points. This is true even if distinct
/// name matching (see [Self::distinct_name_matching] is enabled.
/// "a", but `_` can match different values at different points.
pub fn name_matcher_ignore(mut self, ptn_re: Regex, text_re: Regex) -> Self {
self.options.name_matchers.push((ptn_re, text_re, true));
self
Expand All @@ -175,8 +184,62 @@ impl<'a> FMBuilder<'a> {
/// to `a` then `$2` will refuse to match against `a` (though `$1` will continue to match
/// against only `a`). Note that ignorable name matches (see [Self::name_matcher_ignore]) are
/// never subject to distinct name matching. Defaults to `false`.
pub fn distinct_name_matching(mut self, yes: bool) -> Self {
self.options.distinct_name_matching = yes;
///
/// # Warning
///
/// If you call this function with `true` then later with `false`, the latter will not take
/// effect.
#[deprecated(since = "0.3.1", note = "Please use name_matching_validator instead")]
pub fn distinct_name_matching(self, yes: bool) -> Self {
if yes {
self.name_matching_validator(|names| {
names.values().collect::<HashSet<_>>().len() == names.len()
})
} else {
self
}
}

/// Add a name matching validator: this takes a [HashMap] of `(key, value)` pairs and must
/// return `true` if this is a valid set of pairs or false otherwise. Name matching validators
/// allow you to customise what names are valid matches. For example, if you want distinct
/// names to match distinct values you can add a name matching validator which converts values
/// to a [HashSet] and fails if the resulting set has fewer entries than the input hashmap:
///
/// ```rust
/// use {fm::FMBuilder, regex::Regex};
/// use std::collections::HashSet;
///
/// let ptn_re = Regex::new(r"\$[0-9]+?\b").unwrap();
/// let text_re = Regex::new(r"[a-b]+?\b").unwrap();
/// let matcher = FMBuilder::new("$1 $2")
/// .unwrap()
/// .name_matcher(ptn_re, text_re)
/// .name_matching_validator(|names| {
/// names.values().collect::<HashSet<_>>().len() == names.len()
/// })
/// .build()
/// .unwrap();
/// assert!(matcher.matches("a b").is_ok());
/// assert!(matcher.matches("a a").is_err());
/// ```
///
/// As this shows, since `$1` matches `a`, the name matching validator returns false if `$2`
/// also matches `a`.
///
/// Note that name matching validators must not confuse "doesn't match" with "is an error":
/// just because text doesn't match at a given point does not mean there is an error.
///
/// Name matching validators are called frequently, so their performance can be an issue if you
/// have large inputs. You may need to benchmark carefully.
///
/// Multiple name matching validators are allowed: they are matched in the order they were
/// added to `FMBuilder`.
pub fn name_matching_validator<F>(mut self, f: F) -> Self
where
F: Fn(&HashMap<&str, &str>) -> bool + UnwindSafe + 'static,
{
self.options.name_matching_validators.push(Box::new(f));
self
}

Expand All @@ -191,6 +254,10 @@ impl<'a> FMBuilder<'a> {

/// Turn this `FMBuilder` into a `FMatcher`.
pub fn build(self) -> Result<FMatcher<'a>, Box<dyn Error>> {
if !self.options.name_matching_validators.is_empty() && self.options.name_matchers.is_empty()
{
return Err("If a name matching validator(s) is/are specified, one or more name matchers must also be specified".into());
}
self.validate()?;
let (ptn_lines, ptn_lines_off) = line_trimmer(self.options.trim_whitespace, self.ptn);
Ok(FMatcher {
Expand Down Expand Up @@ -406,13 +473,6 @@ impl<'a> FMatcher<'a> {
panic!("Text pattern matched the empty string.");
}
if !ignore {
if self.options.distinct_name_matching {
for (x, y) in names.iter().chain(new_names.iter()) {
if *x != key && *y == val {
return false;
}
}
}
match names.entry(key) {
Entry::Occupied(e) => {
if *e.get() != val {
Expand All @@ -430,6 +490,16 @@ impl<'a> FMatcher<'a> {
}
},
}

if !self.options.name_matching_validators.is_empty() {
let mut all_names = names.clone();
all_names.extend(&new_names);
for nmv in &self.options.name_matching_validators {
if !nmv(&all_names) {
return false;
}
}
}
}
ptn_i += ptnm.len();
text_i += textm.len();
Expand Down Expand Up @@ -648,6 +718,7 @@ fn line_trimmer<'a>(trim: bool, s: &'a str) -> (Vec<&'a str>, usize) {
mod tests {
use super::*;
use proptest::proptest;
use std::collections::HashSet;

#[test]
fn line_trimming() {
Expand Down Expand Up @@ -940,7 +1011,10 @@ mod tests {
let helper = |ptn: &str, text: &str| -> bool {
FMBuilder::new(ptn)
.unwrap()
.distinct_name_matching(true)
.name_matching_validator(|names| {
let vals = names.values().collect::<HashSet<_>>();
vals.len() == names.len()
})
.name_matcher_ignore(nameptn_ignore_re.clone(), name_re.clone())
.name_matcher(nameptn_normal_re.clone(), name_re.clone())
.build()
Expand Down Expand Up @@ -1029,6 +1103,18 @@ mod tests {
.unwrap();
}

#[test]
#[should_panic(
expected = "If a name matching validator(s) is/are specified, one or more name matchers must also be specified"
)]
fn name_matching_validator_requires_name_matcher() {
FMBuilder::new("$1")
.unwrap()
.name_matching_validator(|_| true)
.build()
.unwrap();
}

#[test]
fn consecutive_wildcards_disallowed() {
match FMatcher::new("...\n...") {
Expand Down Expand Up @@ -1080,6 +1166,30 @@ mod tests {
let nameptn_re = Regex::new(r"\$.+?\b").unwrap();
let name_re = Regex::new(r".+?\b").unwrap();
let helper = |ptn: &str, text: &str| -> bool {
FMBuilder::new(ptn)
.unwrap()
.name_matcher(nameptn_re.clone(), name_re.clone())
.name_matching_validator(|names| {
names.values().collect::<HashSet<_>>().len() == names.len()
})
.build()
.unwrap()
.matches(text)
.is_ok()
};

assert!(helper("$1 $1", "a a"));
assert!(!helper("$1 $1", "a b"));
assert!(!helper("$1 $2", "a a"));
}

/// This test can be removed when [FMBuilder::distinct_name_matching] is removed.
#[test]
fn distinct_names_deprecated() {
let nameptn_re = Regex::new(r"\$.+?\b").unwrap();
let name_re = Regex::new(r".+?\b").unwrap();
let helper = |ptn: &str, text: &str| -> bool {
#[allow(deprecated)]
FMBuilder::new(ptn)
.unwrap()
.name_matcher(nameptn_re.clone(), name_re.clone())
Expand Down

0 comments on commit c4ec81c

Please sign in to comment.