From 15957fc82381b4d4c653ffa7013877da43f0b1d1 Mon Sep 17 00:00:00 2001 From: Laurence Tratt Date: Tue, 23 Jul 2024 09:41:36 +0100 Subject: [PATCH] Implement support for "name matching processors". This is an unwieldy name for a complicated-sounding feature that's actually rather simple. In essence, this generalises the previous support we had for "distinct name matching", allowing the user to determine what set of name matching pairs should be considered a successful match or not. For example, distinct name matching is just: ``` .name_matching_processor(|names| { names.values().collect::>().len() == names.len() }) ``` Of course, there are other uses this can be put towards! Because I'm a nice person, this commit still supports the `distinct_name_matching` function, though it is deprecated. There is a -- very unlikely -- sequence you could call which would mean that if you turn distinct name matching on and then off, it won't actually turn off: there's only so much I can do. --- src/lib.rs | 119 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 105 insertions(+), 14 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 7869fb3..09a8ecf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,11 +1,16 @@ #![doc = include_str!("../README.md")] #![allow(clippy::upper_case_acronyms)] +#![allow(clippy::type_complexity)] use std::{ - collections::hash_map::{Entry, HashMap}, + collections::{ + hash_map::{Entry, HashMap}, + HashSet, + }, default::Default, error::Error, fmt, + panic::UnwindSafe, }; use regex::Regex; @@ -16,11 +21,10 @@ const GROUP_ANCHOR_WILDCARD: &str = "..~"; const INTRALINE_WILDCARD: &str = "..."; const ERROR_MARKER: &str = ">>"; -#[derive(Debug)] struct FMOptions { output_formatter: OutputFormatter, name_matchers: Vec<(Regex, Regex, bool)>, - distinct_name_matching: bool, + name_matching_processors: Vec) -> bool + UnwindSafe>>, trim_whitespace: bool, } @@ -29,12 +33,18 @@ impl Default for FMOptions { FMOptions { output_formatter: OutputFormatter::InputThenSummary, name_matchers: Vec::new(), - distinct_name_matching: false, + name_matching_processors: Vec::new(), trim_whitespace: true, } } } +impl fmt::Debug for FMOptions { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "blah") + } +} + /// How should an [FMatchError] format itself? Where: /// /// * `Input` means the literal text passed to fmt. @@ -175,8 +185,58 @@ impl<'a> FMBuilder<'a> { /// to `a` then `$2` will refuse to match against `a` (though `$1` will continue to match /// against only `a`). Note that ignorable name matches (see [Self::name_matcher_ignore]) are /// never subject to distinct name matching. Defaults to `false`. - pub fn distinct_name_matching(mut self, yes: bool) -> Self { - self.options.distinct_name_matching = yes; + #[deprecated(since = "0.3.1", note = "Please use name_matching_processor instead")] + pub fn distinct_name_matching(self, yes: bool) -> Self { + if yes { + self.name_matching_processor(|names| { + names.values().collect::>().len() == names.len() + }) + } else { + self + } + } + + /// Add a name matching processor: this takes a [HashMap] of `(key, value)` pairs and must + /// return `true` if this is a valid set of pairs or false otherwise. Name matching processors + /// allow you to customise what names are valid matches. For example, if you want distinct + /// names to match distinct values you can add a name matching processor which converts values + /// to a [HashSet] and fails if the resulting set has fewer entries than the input hashmap: + /// + /// ```rust + /// use {fm::FMBuilder, regex::Regex}; + /// use std::collections::HashSet; + /// + /// let ptn_re = Regex::new(r"\$[0-9]+?\b").unwrap(); + /// let text_re = Regex::new(r"[a-b]+?\b").unwrap(); + /// let matcher = FMBuilder::new("$1 $2") + /// .unwrap() + /// .name_matcher(ptn_re, text_re) + /// .name_matching_processor(|names| { + /// names.values().collect::>().len() == names.len() + /// }) + /// .build() + /// .unwrap(); + /// assert!(matcher.matches("a b").is_ok()); + /// assert!(matcher.matches("a a").is_err()); + /// ``` + /// + /// As this shows, since `$1` matches `a`, the name matching processor returns false if `$2` + /// also matches `a`. + /// + /// Note that name matching processors must not confuse "doesn't match" with "is an error": fm + /// calls name matching processors to see if a match is possible. Just because text doesn't + /// match at a given point does not mean there is an error. + /// + /// Name matching processors are called frequently, so their performance can be an issue if you + /// have large inputs. You may need to benchmark carefully. + /// + /// Multiple name matching processors are allowed: they are matched in the order they were + /// added to `FMBuilder`. + pub fn name_matching_processor(mut self, f: F) -> Self + where + F: Fn(&HashMap<&str, &str>) -> bool + UnwindSafe + 'static, + { + self.options.name_matching_processors.push(Box::new(f)); self } @@ -406,13 +466,6 @@ impl<'a> FMatcher<'a> { panic!("Text pattern matched the empty string."); } if !ignore { - if self.options.distinct_name_matching { - for (x, y) in names.iter().chain(new_names.iter()) { - if *x != key && *y == val { - return false; - } - } - } match names.entry(key) { Entry::Occupied(e) => { if *e.get() != val { @@ -430,6 +483,16 @@ impl<'a> FMatcher<'a> { } }, } + + if !self.options.name_matching_processors.is_empty() { + let mut all_names = names.clone(); + all_names.extend(&new_names); + for nmp in &self.options.name_matching_processors { + if !nmp(&all_names) { + return false; + } + } + } } ptn_i += ptnm.len(); text_i += textm.len(); @@ -648,6 +711,7 @@ fn line_trimmer<'a>(trim: bool, s: &'a str) -> (Vec<&'a str>, usize) { mod tests { use super::*; use proptest::proptest; + use std::collections::HashSet; #[test] fn line_trimming() { @@ -940,7 +1004,10 @@ mod tests { let helper = |ptn: &str, text: &str| -> bool { FMBuilder::new(ptn) .unwrap() - .distinct_name_matching(true) + .name_matching_processor(|names| { + let vals = names.values().collect::>(); + vals.len() == names.len() + }) .name_matcher_ignore(nameptn_ignore_re.clone(), name_re.clone()) .name_matcher(nameptn_normal_re.clone(), name_re.clone()) .build() @@ -1080,6 +1147,30 @@ mod tests { let nameptn_re = Regex::new(r"\$.+?\b").unwrap(); let name_re = Regex::new(r".+?\b").unwrap(); let helper = |ptn: &str, text: &str| -> bool { + FMBuilder::new(ptn) + .unwrap() + .name_matcher(nameptn_re.clone(), name_re.clone()) + .name_matching_processor(|names| { + names.values().collect::>().len() == names.len() + }) + .build() + .unwrap() + .matches(text) + .is_ok() + }; + + assert!(helper("$1 $1", "a a")); + assert!(!helper("$1 $1", "a b")); + assert!(!helper("$1 $2", "a a")); + } + + /// This test can be removed when [FMBuilder::distinct_name_matching] is removed. + #[test] + fn distinct_names_deprecated() { + let nameptn_re = Regex::new(r"\$.+?\b").unwrap(); + let name_re = Regex::new(r".+?\b").unwrap(); + let helper = |ptn: &str, text: &str| -> bool { + #[allow(deprecated)] FMBuilder::new(ptn) .unwrap() .name_matcher(nameptn_re.clone(), name_re.clone())