From fb6250e8775af5cd3bb5961d3effbec8d1813872 Mon Sep 17 00:00:00 2001 From: Laurence Tratt Date: Tue, 23 Jul 2024 09:41:36 +0100 Subject: [PATCH] Implement support for "name matching validators". This is an unwieldy name for a complicated-sounding feature that's actually rather simple. In essence, this generalises the previous support we had for "distinct name matching", allowing the user to determine what set of name matching pairs should be considered a successful match or not. For example, distinct name matching is just: ``` .name_matching_validator(|names| { names.values().collect::>().len() == names.len() }) ``` Of course, there are other uses this can be put towards! Because I'm a nice person, this commit still supports the `distinct_name_matching` function, though it is deprecated. There is a -- very unlikely -- sequence you could call which would mean that if you turn distinct name matching on and then off, it won't actually turn off: there's only so much I can do. --- src/lib.rs | 143 +++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 127 insertions(+), 16 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 7869fb3..3480db5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,11 +1,16 @@ #![doc = include_str!("../README.md")] #![allow(clippy::upper_case_acronyms)] +#![allow(clippy::type_complexity)] use std::{ - collections::hash_map::{Entry, HashMap}, + collections::{ + hash_map::{Entry, HashMap}, + HashSet, + }, default::Default, error::Error, fmt, + panic::UnwindSafe, }; use regex::Regex; @@ -16,11 +21,10 @@ const GROUP_ANCHOR_WILDCARD: &str = "..~"; const INTRALINE_WILDCARD: &str = "..."; const ERROR_MARKER: &str = ">>"; -#[derive(Debug)] struct FMOptions { output_formatter: OutputFormatter, name_matchers: Vec<(Regex, Regex, bool)>, - distinct_name_matching: bool, + name_matching_validators: Vec) -> bool + UnwindSafe>>, trim_whitespace: bool, } @@ -29,12 +33,18 @@ impl Default for FMOptions { FMOptions { output_formatter: OutputFormatter::InputThenSummary, name_matchers: Vec::new(), - distinct_name_matching: false, + name_matching_validators: Vec::new(), trim_whitespace: true, } } } +impl fmt::Debug for FMOptions { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "FMOptions {{ .. }}") + } +} + /// How should an [FMatchError] format itself? Where: /// /// * `Input` means the literal text passed to fmt. @@ -164,8 +174,7 @@ impl<'a> FMBuilder<'a> { /// ``` /// /// As this shows, once `$1` has matched "a", all further instances of `$1` must also match - /// "a", but `_` can match different values at different points. This is true even if distinct - /// name matching (see [Self::distinct_name_matching] is enabled. + /// "a", but `_` can match different values at different points. pub fn name_matcher_ignore(mut self, ptn_re: Regex, text_re: Regex) -> Self { self.options.name_matchers.push((ptn_re, text_re, true)); self @@ -175,8 +184,62 @@ impl<'a> FMBuilder<'a> { /// to `a` then `$2` will refuse to match against `a` (though `$1` will continue to match /// against only `a`). Note that ignorable name matches (see [Self::name_matcher_ignore]) are /// never subject to distinct name matching. Defaults to `false`. - pub fn distinct_name_matching(mut self, yes: bool) -> Self { - self.options.distinct_name_matching = yes; + /// + /// # Warning + /// + /// If you call this function with `true` then later with `false`, the latter will not take + /// effect. + #[deprecated(since = "0.3.1", note = "Please use name_matching_validator instead")] + pub fn distinct_name_matching(self, yes: bool) -> Self { + if yes { + self.name_matching_validator(|names| { + names.values().collect::>().len() == names.len() + }) + } else { + self + } + } + + /// Add a name matching validator: this takes a [HashMap] of `(key, value)` pairs and must + /// return `true` if this is a valid set of pairs or false otherwise. Name matching validators + /// allow you to customise what names are valid matches. For example, if you want distinct + /// names to match distinct values you can add a name matching validator which converts values + /// to a [HashSet] and fails if the resulting set has fewer entries than the input hashmap: + /// + /// ```rust + /// use {fm::FMBuilder, regex::Regex}; + /// use std::collections::HashSet; + /// + /// let ptn_re = Regex::new(r"\$[0-9]+?\b").unwrap(); + /// let text_re = Regex::new(r"[a-b]+?\b").unwrap(); + /// let matcher = FMBuilder::new("$1 $2") + /// .unwrap() + /// .name_matcher(ptn_re, text_re) + /// .name_matching_validator(|names| { + /// names.values().collect::>().len() == names.len() + /// }) + /// .build() + /// .unwrap(); + /// assert!(matcher.matches("a b").is_ok()); + /// assert!(matcher.matches("a a").is_err()); + /// ``` + /// + /// As this shows, since `$1` matches `a`, the name matching validator returns false if `$2` + /// also matches `a`. + /// + /// Note that name matching validators must not confuse "doesn't match" with "is an error": + /// just because text doesn't match at a given point does not mean there is an error. + /// + /// Name matching validators are called frequently, so their performance can be an issue if you + /// have large inputs. You may need to benchmark carefully. + /// + /// Multiple name matching validators are allowed: they are matched in the order they were + /// added to `FMBuilder`. + pub fn name_matching_validator(mut self, f: F) -> Self + where + F: Fn(&HashMap<&str, &str>) -> bool + UnwindSafe + 'static, + { + self.options.name_matching_validators.push(Box::new(f)); self } @@ -191,6 +254,11 @@ impl<'a> FMBuilder<'a> { /// Turn this `FMBuilder` into a `FMatcher`. pub fn build(self) -> Result, Box> { + if !self.options.name_matching_validators.is_empty() + && self.options.name_matchers.is_empty() + { + return Err("If a name matching validator(s) is/are specified, one or more name matchers must also be specified".into()); + } self.validate()?; let (ptn_lines, ptn_lines_off) = line_trimmer(self.options.trim_whitespace, self.ptn); Ok(FMatcher { @@ -406,13 +474,6 @@ impl<'a> FMatcher<'a> { panic!("Text pattern matched the empty string."); } if !ignore { - if self.options.distinct_name_matching { - for (x, y) in names.iter().chain(new_names.iter()) { - if *x != key && *y == val { - return false; - } - } - } match names.entry(key) { Entry::Occupied(e) => { if *e.get() != val { @@ -430,6 +491,16 @@ impl<'a> FMatcher<'a> { } }, } + + if !self.options.name_matching_validators.is_empty() { + let mut all_names = names.clone(); + all_names.extend(&new_names); + for nmv in &self.options.name_matching_validators { + if !nmv(&all_names) { + return false; + } + } + } } ptn_i += ptnm.len(); text_i += textm.len(); @@ -648,6 +719,7 @@ fn line_trimmer<'a>(trim: bool, s: &'a str) -> (Vec<&'a str>, usize) { mod tests { use super::*; use proptest::proptest; + use std::collections::HashSet; #[test] fn line_trimming() { @@ -940,7 +1012,10 @@ mod tests { let helper = |ptn: &str, text: &str| -> bool { FMBuilder::new(ptn) .unwrap() - .distinct_name_matching(true) + .name_matching_validator(|names| { + let vals = names.values().collect::>(); + vals.len() == names.len() + }) .name_matcher_ignore(nameptn_ignore_re.clone(), name_re.clone()) .name_matcher(nameptn_normal_re.clone(), name_re.clone()) .build() @@ -1029,6 +1104,18 @@ mod tests { .unwrap(); } + #[test] + #[should_panic( + expected = "If a name matching validator(s) is/are specified, one or more name matchers must also be specified" + )] + fn name_matching_validator_requires_name_matcher() { + FMBuilder::new("$1") + .unwrap() + .name_matching_validator(|_| true) + .build() + .unwrap(); + } + #[test] fn consecutive_wildcards_disallowed() { match FMatcher::new("...\n...") { @@ -1080,6 +1167,30 @@ mod tests { let nameptn_re = Regex::new(r"\$.+?\b").unwrap(); let name_re = Regex::new(r".+?\b").unwrap(); let helper = |ptn: &str, text: &str| -> bool { + FMBuilder::new(ptn) + .unwrap() + .name_matcher(nameptn_re.clone(), name_re.clone()) + .name_matching_validator(|names| { + names.values().collect::>().len() == names.len() + }) + .build() + .unwrap() + .matches(text) + .is_ok() + }; + + assert!(helper("$1 $1", "a a")); + assert!(!helper("$1 $1", "a b")); + assert!(!helper("$1 $2", "a a")); + } + + /// This test can be removed when [FMBuilder::distinct_name_matching] is removed. + #[test] + fn distinct_names_deprecated() { + let nameptn_re = Regex::new(r"\$.+?\b").unwrap(); + let name_re = Regex::new(r".+?\b").unwrap(); + let helper = |ptn: &str, text: &str| -> bool { + #[allow(deprecated)] FMBuilder::new(ptn) .unwrap() .name_matcher(nameptn_re.clone(), name_re.clone())