Skip to content

Commit

Permalink
Add support for "ignorable" name matches.
Browse files Browse the repository at this point in the history
One irritation with fm is that if you need to match (potentially or
definitely) different literal text in a pattern, you need to generate
fresh pattern names for each match point, even though you're not
comparing the contents of these names. For example, if you want to match
against the text "a b" you might use a pattern `$1 $2` (note: `$1 $1`
would not match).

This commit adds support for "ignorable" name matches, which match
against literal text, but ignore what the contents are. For example, if
you add `$_` as the ignorable pattern matching literal text then you can
match against `a b` with `$_ $_`.

I'm not sure if `name_matcher_ignore` is the best name for the relevant
configuration function, but at the very least it shows what this
functionality can do, and that it's relatively easy to add in.
  • Loading branch information
ltratt committed May 20, 2024
1 parent 52fc129 commit 393dfb4
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 25 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,8 @@ can set options as follows:
```rust
use {fm::FMBuilder, regex::Regex};

let ptn_re = Regex::new(r"\$.+?\b").unwrap();
let text_re = Regex::new(r".+?\b").unwrap();
let ptn_re = Regex::new(r"\$[0-9]+?\b").unwrap();
let text_re = Regex::new(r"[a-z]+?\b").unwrap();
let matcher = FMBuilder::new("$1 $1")
.unwrap()
.name_matcher(ptn_re, text_re)
Expand Down
126 changes: 103 additions & 23 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ const ERROR_MARKER: &str = ">>";
#[derive(Debug)]
struct FMOptions {
output_formatter: OutputFormatter,
name_matchers: Vec<(Regex, Regex)>,
name_matchers: Vec<(Regex, Regex, bool)>,
distinct_name_matching: bool,
ignore_leading_whitespace: bool,
ignore_trailing_whitespace: bool,
Expand Down Expand Up @@ -123,8 +123,8 @@ impl<'a> FMBuilder<'a> {
/// ```rust
/// use {fm::FMBuilder, regex::Regex};
///
/// let ptn_re = Regex::new(r"\$.+?\b").unwrap();
/// let text_re = Regex::new(r".+?\b").unwrap();
/// let ptn_re = Regex::new(r"\$[1]+?\b").unwrap();
/// let text_re = Regex::new(r"[a-b]+?\b").unwrap();
/// let matcher = FMBuilder::new("$1 b $1")
/// .unwrap()
/// .name_matcher(ptn_re, text_re)
Expand All @@ -142,13 +142,44 @@ impl<'a> FMBuilder<'a> {
/// Multiple name matchers are allowed: they are matched in the order they were added to
/// `FMBuilder`.
pub fn name_matcher(mut self, ptn_re: Regex, text_re: Regex) -> Self {
self.options.name_matchers.push((ptn_re, text_re));
self.options.name_matchers.push((ptn_re, text_re, false));
self
}

/// Add a name matcher that has the same semantics as a name matcher added with
/// [Self::name_matcher] *but* which ignores the contents of the matched text. This can be
/// used to ensure that the text follows a certain "shape" but without worrying about either a)
/// the concrete value b) having to generate fresh names for each such instance. This can be
/// combined with "normal" name matching, as in the following example:
///
/// ```rust
/// use {fm::FMBuilder, regex::Regex};
///
/// let ptn_re = Regex::new(r"\$[1]+?\b").unwrap();
/// let ptn_ignore_re = Regex::new(r"\$_\b").unwrap();
/// let text_re = Regex::new(r"[a-b]+?\b").unwrap();
/// let matcher = FMBuilder::new("$1 $_ $1 $_")
/// .unwrap()
/// .name_matcher(ptn_re, text_re.clone())
/// .name_matcher_ignore(ptn_ignore_re, text_re)
/// .build()
/// .unwrap();
/// assert!(matcher.matches("a b a a").is_ok());
/// assert!(matcher.matches("a b b a").is_err());
/// ```
///
/// As this shows, once `$1` has matched "a", all further instances of `$1` must also match
/// "a", but `_` can match different values at different points. This is true even if distinct
/// name matching (see [Self::distinct_name_matching] is enabled.
pub fn name_matcher_ignore(mut self, ptn_re: Regex, text_re: Regex) -> Self {
self.options.name_matchers.push((ptn_re, text_re, true));
self
}

/// If `yes`, then different names cannot match the same text value. For example if `$1` binds
/// to `a` then `$2` will refuse to match against `a` (though `$1` will continue to match
/// against only `a`). Defaults to `false`.
/// against only `a`). Note that ignorable name matches (see [Self::name_matcher_ignore]) are
/// never subject to distinct name matching. Defaults to `false`.
pub fn distinct_name_matching(mut self, yes: bool) -> Self {
self.options.distinct_name_matching = yes;
self
Expand Down Expand Up @@ -200,7 +231,7 @@ impl<'a> FMBuilder<'a> {
}
}

for (ref ptn_re, _) in &self.options.name_matchers {
for (ref ptn_re, _, _) in &self.options.name_matchers {
for (i, l) in lines.iter().enumerate() {
if l.starts_with(INTRALINE_WILDCARD) && ptn_re.is_match(l) {
return Err(Box::<dyn Error>::from(format!(
Expand Down Expand Up @@ -466,7 +497,7 @@ impl<'a> FMatcher<'a> {
&& text_i < text.len()
&& &ptn[ptn_i..] != INTRALINE_WILDCARD
{
for (ref ptn_re, ref text_re) in &self.options.name_matchers {
for (ref ptn_re, ref text_re, ignore) in &self.options.name_matchers {
if let Some(ptnm) = ptn_re.find(&ptn[ptn_i..]) {
if ptnm.start() != 0 {
continue;
Expand All @@ -478,29 +509,31 @@ impl<'a> FMatcher<'a> {
if val.is_empty() {
panic!("Text pattern matched the empty string.");
}
if self.options.distinct_name_matching {
for (x, y) in names.iter().chain(new_names.iter()) {
if *x != key && *y == val {
return false;
}
}
}
match names.entry(key) {
Entry::Occupied(e) => {
if *e.get() != val {
return false;
if !ignore {
if self.options.distinct_name_matching {
for (x, y) in names.iter().chain(new_names.iter()) {
if *x != key && *y == val {
return false;
}
}
}
Entry::Vacant(_) => match new_names.entry(key) {
match names.entry(key) {
Entry::Occupied(e) => {
if *e.get() != val {
return false;
}
}
Entry::Vacant(e) => {
e.insert(val);
}
},
Entry::Vacant(_) => match new_names.entry(key) {
Entry::Occupied(e) => {
if *e.get() != val {
return false;
}
}
Entry::Vacant(e) => {
e.insert(val);
}
},
}
}
ptn_i += ptnm.len();
text_i += textm.len();
Expand Down Expand Up @@ -894,6 +927,53 @@ mod tests {
assert!(!helper("..~\n$1\n$1\n..~", "a\nb\na\nb"));
}

#[test]
fn name_matcher_ignore() {
let nameptn_ignore_re = Regex::new(r"\$_\b").unwrap();
let nameptn_normal_re = Regex::new(r"\$[^_]+?\b").unwrap();
let name_re = Regex::new(r"[a-z]+?\b").unwrap();
let helper = |ptn: &str, text: &str| -> bool {
FMBuilder::new(ptn)
.unwrap()
.name_matcher_ignore(nameptn_ignore_re.clone(), name_re.clone())
.name_matcher(nameptn_normal_re.clone(), name_re.clone())
.build()
.unwrap()
.matches(text)
.is_ok()
};

assert!(helper("$1, $1", "a, a"));
assert!(!helper("$1, $1", "a, b"));
assert!(helper("$_, $_", "a, b"));
assert!(!helper("$_, $_", "1, 2"));
assert!(helper("$1, $_, $1", "a, b, a"));
assert!(helper("$1, $_, $1", "a, a, a"));
}

#[test]
fn name_matcher_ignore_distinct_matching() {
let nameptn_ignore_re = Regex::new(r"\$_\b").unwrap();
let nameptn_normal_re = Regex::new(r"\$[^_]+?\b").unwrap();
let name_re = Regex::new(r"[a-z]+?\b").unwrap();
let helper = |ptn: &str, text: &str| -> bool {
FMBuilder::new(ptn)
.unwrap()
.distinct_name_matching(true)
.name_matcher_ignore(nameptn_ignore_re.clone(), name_re.clone())
.name_matcher(nameptn_normal_re.clone(), name_re.clone())
.build()
.unwrap()
.matches(text)
.is_ok()
};

assert!(helper("$1 $1 $2 $2", "a a b b"));
assert!(!helper("$1 $1 $2 $2", "a a a a"));
assert!(helper("$1 $1 $_ $_", "a a b b"));
assert!(helper("$1 $1 $_ $_", "a a a a"));
}

#[test]
fn error_lines() {
let ptn_re = Regex::new("\\$.+?\\b").unwrap();
Expand Down

0 comments on commit 393dfb4

Please sign in to comment.