diff --git a/src/lib.rs b/src/lib.rs index c4fcfd6..ce95968 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,18 +18,17 @@ //! ``` mod render; +mod slug; use std::borrow::Borrow; -use std::collections::HashMap; use std::fmt::Write; use std::slice::Iter; -use once_cell::sync::Lazy; pub use pulldown_cmark::HeadingLevel; use pulldown_cmark::{Event, Options as CmarkOptions, Parser, Tag}; -use regex::Regex; pub use render::{ItemSymbol, Options}; +pub use slug::{GitHubSlugifier, Slugify}; ///////////////////////////////////////////////////////////////////////// // Definitions @@ -77,15 +76,6 @@ impl Heading<'_> { } buf } - - /// Generate an anchor link for this heading. - /// - /// This is calculated in the same way that GitHub calculates it. - pub fn anchor(&self) -> String { - static RE: Lazy = Lazy::new(|| Regex::new(r"[^\w\- ]").unwrap()); - RE.replace_all(&self.text().to_ascii_lowercase().replace(' ', "-"), "") - .into_owned() - } } impl<'a> TableOfContents<'a> { @@ -212,26 +202,15 @@ impl<'a> TableOfContents<'a> { item_symbol, levels, indent, + slugifier: mut slugger, } = options; - // this is to record duplicates - let mut counts = HashMap::new(); - let mut buf = String::new(); for heading in self.headings().filter(|h| levels.contains(&h.level())) { let title = crate::render::to_cmark(heading.events()); - let anchor = heading.anchor(); let indent = indent * (heading.level() as usize - *levels.start() as usize); // make sure the anchor is unique - let i = counts - .entry(anchor.clone()) - .and_modify(|i| *i += 1) - .or_insert(0); - let anchor = match *i { - 0 => anchor, - i => format!("{}-{}", anchor, i), - }; writeln!( buf, @@ -239,7 +218,7 @@ impl<'a> TableOfContents<'a> { "", item_symbol, title, - anchor, + slugger.slugify(&heading.text()), indent = indent, ) .unwrap(); @@ -278,25 +257,6 @@ mod tests { assert_eq!(heading.text(), "Here TOML"); } - #[test] - fn heading_anchor_with_code() { - let heading = Heading { - events: vec![Code(Borrowed("Another")), Text(Borrowed(" heading"))], - level: HeadingLevel::H1, - }; - assert_eq!(heading.anchor(), "another-heading"); - } - - #[test] - fn heading_anchor_with_links() { - let events = Parser::new("Here [TOML](https://toml.io)").collect(); - let heading = Heading { - events, - level: HeadingLevel::H1, - }; - assert_eq!(heading.anchor(), "here-toml"); - } - #[test] fn toc_new() { let toc = TableOfContents::new("# Heading\n\n## `Another` heading\n"); diff --git a/src/render.rs b/src/render.rs index d1b0346..4a0d689 100644 --- a/src/render.rs +++ b/src/render.rs @@ -7,6 +7,8 @@ use std::ops::RangeInclusive; use pulldown_cmark::{Event, HeadingLevel, Tag}; +use crate::slug::{GitHubSlugifier, Slugify}; + /// Which symbol to use when rendering Markdown list items. pub enum ItemSymbol { /// `-` @@ -32,6 +34,7 @@ pub struct Options { pub(crate) item_symbol: ItemSymbol, pub(crate) levels: RangeInclusive, pub(crate) indent: usize, + pub(crate) slugifier: Box, } pub(crate) fn to_cmark<'a, I, E>(events: I) -> String @@ -73,6 +76,7 @@ impl Default for Options { item_symbol: ItemSymbol::Hyphen, levels: (HeadingLevel::H1..=HeadingLevel::H6), indent: 2, + slugifier: Box::new(GitHubSlugifier::default()), } } } @@ -98,4 +102,11 @@ impl Options { self.indent = indent; self } + + /// The slugifier to use for the heading anchors. + #[must_use] + pub fn slugifier(mut self, slugifier: Box) -> Self { + self.slugifier = slugifier; + self + } } diff --git a/src/slug.rs b/src/slug.rs new file mode 100644 index 0000000..d5acbb2 --- /dev/null +++ b/src/slug.rs @@ -0,0 +1,77 @@ +use std::{borrow::Cow, collections::HashMap}; + +use once_cell::sync::Lazy; +use regex::Regex; + +/// A trait to specify the anchor calculation. +pub trait Slugify { + fn slugify<'a>(&mut self, str: &'a str) -> Cow<'a, str>; +} + +/// A slugifier that attempts to mimic GitHub's behavior. +/// +/// Unfortunately GitHub's behavior is not documented anywhere by GitHub. +/// This should really be part of the [GitHub Flavored Markdown Spec][gfm] +/// but alas it's not. And there also does not appear to be a public issue +/// tracker for the spec where that issue could be raised. +/// +/// [gfm]: https://github.github.com/gfm/ +#[derive(Default)] +pub struct GitHubSlugifier { + counts: HashMap, +} + +impl Slugify for GitHubSlugifier { + fn slugify<'a>(&mut self, str: &'a str) -> Cow<'a, str> { + static RE: Lazy = Lazy::new(|| Regex::new(r"[^\w\- ]").unwrap()); + let anchor = RE + .replace_all(&str.to_ascii_lowercase().replace(' ', "-"), "") + .into_owned(); + + let i = self + .counts + .entry(anchor.clone()) + .and_modify(|i| *i += 1) + .or_insert(0); + + match *i { + 0 => anchor, + i => format!("{}-{}", anchor, i), + } + .into() + } +} + +#[cfg(test)] +mod tests { + use crate::slug::{GitHubSlugifier, Slugify}; + use crate::Heading; + use pulldown_cmark::CowStr::Borrowed; + use pulldown_cmark::Event::{Code, Text}; + use pulldown_cmark::{HeadingLevel, Parser}; + + #[test] + fn heading_anchor_with_code() { + let heading = Heading { + events: vec![Code(Borrowed("Another")), Text(Borrowed(" heading"))], + level: HeadingLevel::H1, + }; + assert_eq!( + GitHubSlugifier::default().slugify(&heading.text()), + "another-heading" + ); + } + + #[test] + fn heading_anchor_with_links() { + let events = Parser::new("Here [TOML](https://toml.io)").collect(); + let heading = Heading { + events, + level: HeadingLevel::H1, + }; + assert_eq!( + GitHubSlugifier::default().slugify(&heading.text()), + "here-toml" + ); + } +}