Skip to content

Commit

Permalink
feat: add support for unicode characters in anchor links
Browse files Browse the repository at this point in the history
  • Loading branch information
dklimpel committed Jun 1, 2024
1 parent 71ccb41 commit b2beaa0
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 6 deletions.
27 changes: 24 additions & 3 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,30 @@ function extractSections(markdown) {

const sectionTitles = markdown.match(/^#+ .*$/gm) || [];

const sections = sectionTitles.map(section =>
section.replace(/^\W+/, '').replace(/\W+$/, '').replace(/[^\w\s-]+/g, '').replace(/\s+/g, '-').toLowerCase()
);
const sections = sectionTitles.map(section => {
// replace links, the links can start with "./", "/", "http://", "https://" or "#"
// and keep the value of the text ($1)
section = section.replace(/\[(.+)\]\(((?:\.?\/|https?:\/\/|#)[\w\d./?=#-]+)\)/, "$1")
// make everything (Unicode-aware) lower case
section = section.toLowerCase();
// remove white spaces and "#" at the beginning
section = section.replace(/^#+\s*/, '')
// remove everything that is NOT a (Unicode) Letter, (Unicode) Number decimal,
// (Unicode) Number letter, white space, underscore or hyphen
section = section.replace(/[^\p{L}\p{Nd}\p{Nl}\s_\-`]/gu, "");
// remove sequences of *
section = section.replace(/\*(?=.*)/gu, "");
// remove leftover backticks
section = section.replace(/`/gu, "");
// Now replace remaining blanks with '-'
section = section.replace(/\s/gu, "-");
// The links are compared with the headings (simple text comparison).
// However, the links are url-encoded beforehand, so the headings
// have to also be encoded so that they can also be matched.
section = encodeURIComponent(section)

return section;
});

var uniq = {};
for (var section of sections) {
Expand Down
32 changes: 30 additions & 2 deletions test/hash-links.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,34 @@ There is no section named [Potato](#potato).

There is an anchor named [Tomato](#tomato).

## Header with special char ✨
## Header with special char at end

Test [header with image](#header-with-special-char-)
Test [header with image](#header-with-special-char-at-end-)

## Header with multiple special chars at end ✨✨

Test [header with multiple images](#header-with-multiple-special-chars-at-end-)

## Header with special ✨ char

Test [header with image](#header-with-special--char)

## Header with multiple special ✨✨ chars

Test [header with multiple images](#header-with-multiple-special--chars)

## Header with German umlaut Ö

Link to [German umlaut Ö](#header-with-german-umlaut-ö)

## Header with German umlaut ö manual encoded link

Link to [German umlaut ö manual encoded in link](#header-with-german-umlaut-%C3%B6-manual-encoded-link)

### [Heading with a link](https://github.com/tcort/markdown-link-check)

An [anchor link](#heading-with-a-link) to a heading.

### [Heading with an anchor link](#foo)

An [anchor link](#heading-with-an-anchor-link) to a heading.
10 changes: 9 additions & 1 deletion test/markdown-link-check.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,15 @@ describe('markdown-link-check', function () {
{ link: '#bar', statusCode: 200, err: null, status: 'alive' },
{ link: '#potato', statusCode: 404, err: null, status: 'dead' },
{ link: '#tomato', statusCode: 404, err: null, status: 'dead' },
{ link: '#header-with-special-char-', statusCode: 404, err: null, status: 'dead' },
{ link: '#header-with-special-char-at-end-', statusCode: 200, err: null, status: 'alive' },
{ link: '#header-with-multiple-special-chars-at-end-', statusCode: 200, err: null, status: 'alive' },
{ link: '#header-with-special--char', statusCode: 200, err: null, status: 'alive' },
{ link: '#header-with-multiple-special--chars', statusCode: 200, err: null, status: 'alive' },
{ link: '#header-with-german-umlaut-%C3%B6', statusCode: 200, err: null, status: 'alive' },
{ link: '#header-with-german-umlaut-%C3%B6-manual-encoded-link', statusCode: 200, err: null, status: 'alive' },
{ link: 'https://github.com/tcort/markdown-link-check', statusCode: 200, err: null, status: 'alive' },
{ link: '#heading-with-a-link', statusCode: 200, err: null, status: 'alive' },
{ link: '#heading-with-an-anchor-link', statusCode: 200, err: null, status: 'alive' },
]);
done();
});
Expand Down

0 comments on commit b2beaa0

Please sign in to comment.