Skip to content

Commit

Permalink
Merge pull request #218 from vedang/fix/simplify-precompiled-regex
Browse files Browse the repository at this point in the history
Simplify pre-compiled regular expressions for sigils
  • Loading branch information
stepchowfun authored Mar 14, 2024
2 parents c4e961d + 4f65fe6 commit 63663a8
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 20 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [1.10.0] - 2024-03-14

### Changed
- Tagref now supports whitespace in tag names and in paths in file and directory references.

## [1.9.1] - 2024-02-21

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "tagref"
version = "1.9.1"
version = "1.10.0"
authors = ["Stephan Boyer <stephan@stephanboyer.com>"]
edition = "2021"
description = "Tagref helps you maintain cross-references in your code."
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ A directory reference guarantees that the given directory exists. For example:

## Tag names

The name of a tag may consist of any UTF-8 text except whitespace and the right square bracket `]`. For example, `[tag:foo_bar]` and `[tag:ほげ〜ふが]` are valid, but `[tag:foo bar]` is not. Tag names are case-sensitive, so `[tag:foo]` and `[tag:Foo]` are different tags.
The name of a tag may consist of any UTF-8 text except whitespace and the right square bracket `]`. Internal whitespace (as in `[tag:foo bar]`) is allowed, and surrounding whitespace (as in `[tag: baz ]`) is ignored. More examples of valid tags: `[tag:foo_bar]` and `[tag:ほげ〜ふが]`. Tag names are case-sensitive, so `[tag:foo]` and `[tag:Foo]` are different tags.

You can use any naming convention you like. The Tagref authors prefer to use lowercase words separated by underscores `_`, like `[tag:important_note]`.

Expand Down
24 changes: 12 additions & 12 deletions src/directive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,10 @@ mod tests {
std::path::Path,
};

const TAG_REGEX: &str = "(?i)\\[\\s*tag\\s*:\\s*([^\\]\\s]*)\\s*\\]";
const REF_REGEX: &str = "(?i)\\[\\s*ref\\s*:\\s*([^\\]\\s]*)\\s*\\]";
const FILE_REGEX: &str = "(?i)\\[\\s*file\\s*:\\s*([^\\]\\s]*)\\s*\\]";
const DIR_REGEX: &str = "(?i)\\[\\s*dir\\s*:\\s*([^\\]\\s]*)\\s*\\]";
const TAG_REGEX: &str = "(?i)\\[\\s*tag\\s*:\\s*([^\\]]*?)\\s*\\]"; // [ref:directive_regex]
const REF_REGEX: &str = "(?i)\\[\\s*ref\\s*:\\s*([^\\]]*?)\\s*\\]"; // [ref:directive_regex]
const FILE_REGEX: &str = "(?i)\\[\\s*file\\s*:\\s*([^\\]]*?)\\s*\\]"; // [ref:directive_regex]
const DIR_REGEX: &str = "(?i)\\[\\s*dir\\s*:\\s*([^\\]]*?)\\s*\\]"; // [ref:directive_regex]

#[test]
fn parse_empty() {
Expand Down Expand Up @@ -409,10 +409,10 @@ mod tests {
fn parse_whitespace() {
let path = Path::new("file.rs").to_owned();
let contents = r"
[ ?tag : label ]
[ ?ref : label ]
[ ?file : foo/bar/baz.txt ]
[ ?dir : foo/bar/baz ]
[ ?tag : foo bar ]
[ ?ref : foo bar ]
[ ?file : foo bar/baz qux.txt ]
[ ?dir : foo bar/baz qux ]
"
.trim()
.replace('?', "")
Expand All @@ -435,25 +435,25 @@ mod tests {

assert_eq!(directives.tags.len(), 1);
assert_eq!(directives.tags[0].r#type, Type::Tag);
assert_eq!(directives.tags[0].label, "label");
assert_eq!(directives.tags[0].label, "foo bar");
assert_eq!(directives.tags[0].path, path);
assert_eq!(directives.tags[0].line_number, 1);

assert_eq!(directives.refs.len(), 1);
assert_eq!(directives.refs[0].r#type, Type::Ref);
assert_eq!(directives.refs[0].label, "label");
assert_eq!(directives.refs[0].label, "foo bar");
assert_eq!(directives.refs[0].path, path);
assert_eq!(directives.refs[0].line_number, 2);

assert_eq!(directives.files.len(), 1);
assert_eq!(directives.files[0].r#type, Type::File);
assert_eq!(directives.files[0].label, "foo/bar/baz.txt");
assert_eq!(directives.files[0].label, "foo bar/baz qux.txt");
assert_eq!(directives.files[0].path, path);
assert_eq!(directives.files[0].line_number, 3);

assert_eq!(directives.dirs.len(), 1);
assert_eq!(directives.dirs[0].r#type, Type::Dir);
assert_eq!(directives.dirs[0].label, "foo/bar/baz");
assert_eq!(directives.dirs[0].label, "foo bar/baz qux");
assert_eq!(directives.dirs[0].path, path);
assert_eq!(directives.dirs[0].line_number, 4);
}
Expand Down
15 changes: 10 additions & 5 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,24 +207,29 @@ fn entry() -> Result<(), String> {
// Parse the command-line options.
let settings = settings();

// Compile the regular expressions in advance.
// [tag:directive_regex] Compile the regular expressions in
// advance. The string literal used here in the format macro is
// also used in other places. IF you change the literal, make sure
// to change all references to the tag as well. See:
// https://github.com/rust-lang/rust/issues/69133 for why format
// needs a string literal.
let tag_regex: Regex = Regex::new(&format!(
"(?i)\\[\\s*{}\\s*:\\s*([^\\]\\s]*)\\s*\\]",
"(?i)\\[\\s*{}\\s*:\\s*([^\\]]*?)\\s*\\]", // [ref:directive_regex]
escape(&settings.tag_sigil),
))
.unwrap(); // Safe by manual inspection
let ref_regex: Regex = Regex::new(&format!(
"(?i)\\[\\s*{}\\s*:\\s*([^\\]\\s]*)\\s*\\]",
"(?i)\\[\\s*{}\\s*:\\s*([^\\]]*?)\\s*\\]", // [ref:directive_regex]
escape(&settings.ref_sigil),
))
.unwrap(); // Safe by manual inspection
let file_regex: Regex = Regex::new(&format!(
"(?i)\\[\\s*{}\\s*:\\s*([^\\]\\s]*)\\s*\\]",
"(?i)\\[\\s*{}\\s*:\\s*([^\\]]*?)\\s*\\]", // [ref:directive_regex]
escape(&settings.file_sigil),
))
.unwrap(); // Safe by manual inspection
let dir_regex: Regex = Regex::new(&format!(
"(?i)\\[\\s*{}\\s*:\\s*([^\\]\\s]*)\\s*\\]",
"(?i)\\[\\s*{}\\s*:\\s*([^\\]]*?)\\s*\\]", // [ref:directive_regex]
escape(&settings.dir_sigil),
))
.unwrap(); // Safe by manual inspection
Expand Down

0 comments on commit 63663a8

Please sign in to comment.