Merge branch 'master' into html_anchors

tcort · Nov 5, 2024 · 51fc856 · 51fc856
2 parents 181d704 + 014ff95
commit 51fc856
Show file tree

Hide file tree

Showing 13 changed files with 1,196 additions and 1,082 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
 node_modules
 .tern-port
+junit.xml
diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml
@@ -4,4 +4,4 @@
   entry: markdown-link-check
   language: node
   types: [markdown]
-  stages: [commit, push, manual]
+  stages: [pre-commit, pre-push, manual]
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,14 @@
 # Changes
 
+## Version 3.12.2
+
+- fix status badge in README by @dklimpel in https://github.com/tcort/markdown-link-check/pull/303
+- enable skipped tests for hash links by @dklimpel in https://github.com/tcort/markdown-link-check/pull/306
+- chore: Upgrade to ESLint 9 by @nschonni in https://github.com/tcort/markdown-link-check/pull/318
+- Check GitHub markdown section links by @rkitover in https://github.com/tcort/markdown-link-check/pull/312
+- docs: add example for GitLab pipeline by @dklimpel in https://github.com/tcort/markdown-link-check/pull/309
+- ci: Use matrix for cross-OS testing by @nschonni in https://github.com/tcort/markdown-link-check/pull/307
+
 ## Version 3.12.1
 
 - fix: fix crash #297 @CanadaHonk

diff --git a/README.md b/README.md
@@ -59,7 +59,7 @@ linkchecker:
     name: ghcr.io/tcort/markdown-link-check:3.11.2
     entrypoint: ["/bin/sh", "-c"]
   script:
-    - find . -name \*.md -print0 | xargs -0 -n1 markdown-link-check
+    - markdown-link-check ./docs
   rules:
     - changes:
       - "**/*.md"
@@ -169,19 +169,22 @@ markdown-link-check ./README.md
 
 #### Check links from a local markdown folder (recursive)
 
-Avoid using `find -exec` because it will swallow the error from each consecutive run.
-Instead, use `xargs`:
+This checks all files in folder `./docs` with file extension `*.md`:
+
 ```shell
-find . -name \*.md -print0 | xargs -0 -n1 markdown-link-check
+markdown-link-check ./docs
 ```
 
-There is an [open issue](https://github.com/tcort/markdown-link-check/issues/78) for allowing the tool to specify
-multiple files on the command line.
+The files can also be searched for and filtered manually:
+
+```shell
+find . -name \*.md -print0 | xargs -0 -n1 markdown-link-check
+```
 
 #### Usage
 
 ```shell
-Usage: markdown-link-check [options] [filenameOrUrl]
+Usage: markdown-link-check [options] [filenameOrDirectorynameOrUrl]
 
 Options:
   -p, --progress              show progress bar
@@ -200,7 +203,7 @@ Options:
 `config.json`:
 
 * `ignorePatterns`: An array of objects holding regular expressions which a link is checked against and skipped for checking in case of a match.
-* `replacementPatterns`: An array of objects holding regular expressions which are replaced in a link with their corresponding replacement string. This behavior allows (for example) to adapt to certain platform conventions hosting the Markdown. The special replacement `{{BASEURL}}` can be used to dynamically link to the current working directory (for example that `/` points to the root of your current working directory).
+* `replacementPatterns`: An array of objects holding regular expressions which are replaced in a link with their corresponding replacement string. This behavior allows (for example) to adapt to certain platform conventions hosting the Markdown. The special replacement `{{BASEURL}}` can be used to dynamically link to the current working directory (for example that `/` points to the root of your current working directory). This parameter supports named regex groups the same way as `string.replace` [method](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace#specifying_a_string_as_the_replacement) in node.
 * `httpHeaders`: The headers are only applied to links where the link **starts with** one of the supplied URLs in the `urls` section.
 * `timeout` timeout in [zeit/ms](https://www.npmjs.com/package/ms) format. (e.g. `"2000ms"`, `20s`, `1m`). Default `10s`.
 * `retryOn429` if this is `true` then retry request when response is an HTTP code 429 after the duration indicated by `retry-after` header.
@@ -232,6 +235,10 @@ Options:
       "pattern": "%20",
       "replacement": "-",
       "global": true
+    },
+    {
+      "pattern": "images/(?<filename>.*)",
+      "replacement": "assets/$<filename>"
     }
   ],
   "httpHeaders": [

diff --git a/index.js b/index.js
@@ -1,6 +1,5 @@
 'use strict';
 
-const _ = require('lodash');
 const async = require('async');
 const linkCheck = require('link-check');
 const LinkCheckResult = require('link-check').LinkCheckResult;
@@ -73,7 +72,29 @@ function extractSections(markdown) {
     const sectionTitles = markdown.match(/^#+ .*$/gm) || [];
 
     const sections = sectionTitles.map(section =>
-        section.replace(/^\W+/, '').replace(/\W+$/, '').replace(/[^\w\s-]+/g, '').replace(/\s+/g, '-').toLowerCase()
+        // The links are compared with the headings (simple text comparison).
+        // However, the links are url-encoded beforehand, so the headings
+        // have to also be encoded so that they can also be matched.
+        encodeURIComponent(
+            section
+                // replace links, the links can start with "./", "/", "http://", "https://" or "#"
+                // and keep the value of the text ($1)
+                .replace(/\[(.+)\]\(((?:\.?\/|https?:\/\/|#)[\w\d./?=#-]+)\)/, "$1")
+                // make everything (Unicode-aware) lower case
+                .toLowerCase()
+                // remove white spaces and "#" at the beginning
+                .replace(/^#+\s*/, '')
+                // remove everything that is NOT a (Unicode) Letter, (Unicode) Number decimal,
+                // (Unicode) Number letter, white space, underscore or hyphen
+                // https://ruby-doc.org/3.3.2/Regexp.html#class-Regexp-label-Unicode+Character+Categories
+                .replace(/[^\p{L}\p{Nd}\p{Nl}\s_\-`]/gu, "")
+                // remove sequences of *
+                .replace(/\*(?=.*)/gu, "")
+                // remove leftover backticks
+                .replace(/`/gu, "")
+                // Now replace remaining blanks with '-'
+                .replace(/\s/gu, "-")
+        )
     );
 
     var uniq = {};
@@ -109,7 +130,7 @@ module.exports = function markdownLinkCheck(markdown, opts, callback) {
 
     const links = markdownLinkExtractor(markdown);
     const sections = extractSections(markdown).concat(extractHtmlSections(markdown));
-    const linksCollection = _.uniq(links);
+    const linksCollection = [...new Set(links)]
     const bar = (opts.showProgressBar) ?
         new ProgressBar('Checking... [:bar] :percent', {
             complete: '=',

diff --git a/markdown-link-check b/markdown-link-check
@@ -4,13 +4,13 @@
 
 let chalk;
 const fs = require('fs');
-const markdownLinkCheck = require('./');
+const { promisify } = require('util');
+const markdownLinkCheck = promisify(require('./'));
 const needle = require('needle');
 const path = require('path');
 const pkg = require('./package.json');
 const { Command } = require('commander');
 const program = new Command();
-const url = require('url');
 const { ProxyAgent } = require('proxy-agent');
 
 class Input {
@@ -31,6 +31,26 @@ function commaSeparatedCodesList(value, dummyPrevious) {
     });
 }
 
+/**
+ * Load all files in the rootFolder and all subfolders that end with .md
+ */
+function loadAllMarkdownFiles(rootFolder = '.') {
+    const files = [];
+    fs.readdirSync(rootFolder).forEach(file => {
+        const fullPath = path.join(rootFolder, file);
+        if (fs.lstatSync(fullPath).isDirectory()) {
+            files.push(...loadAllMarkdownFiles(fullPath));
+        } else if (fullPath.endsWith('.md')) {
+            files.push(fullPath);
+        }
+    });
+    return files;
+}
+
+function commaSeparatedReportersList(value) {
+    return value.split(',').map((reporter) => require(path.resolve('reporters', reporter)));
+}
+
 function getInputs() {
     const inputs = [];
 
@@ -40,11 +60,12 @@ function getInputs() {
         .option('-c, --config [config]', 'apply a config file (JSON), holding e.g. url specific header configuration')
         .option('-q, --quiet', 'displays errors only')
         .option('-v, --verbose', 'displays detailed error information')
-        .option('-i --ignore <paths>', 'ignore input paths including an ignore path', commaSeparatedPathsList)
+        .option('-i, --ignore <paths>', 'ignore input paths including an ignore path', commaSeparatedPathsList)
         .option('-a, --alive <code>', 'comma separated list of HTTP codes to be considered as alive', commaSeparatedCodesList)
         .option('-r, --retry', 'retry after the duration indicated in \'retry-after\' header when HTTP code is 429')
+        .option('--reporters <names>', 'specify reporters to use', commaSeparatedReportersList)
         .option('--projectBaseUrl <url>', 'the URL to use for {{BASEURL}} replacement')
-        .arguments('[filenamesOrUrls...]')
+        .arguments('[filenamesOrDirectorynamesOrUrls...]')
         .action(function (filenamesOrUrls) {
             let filenameForOutput;
             let stream;
@@ -70,6 +91,7 @@ function getInputs() {
             for (const filenameOrUrl of filenamesOrUrls) {
                 filenameForOutput = filenameOrUrl;
                 let baseUrl = '';
+                // remote file
                 if (/https?:/.test(filenameOrUrl)) {
                     stream = needle.get(
                         filenameOrUrl, { agent: new ProxyAgent(), use_proxy_from_env_var: false }
@@ -81,37 +103,44 @@ function getInputs() {
                         parsed.search = '';
                         parsed.hash = '';
                         if (parsed.pathname.lastIndexOf('/') !== -1) {
-                            parsed.pathname = parsed.pathname.substr(0, parsed.pathname.lastIndexOf('/') + 1);
+                            parsed.pathname = parsed.pathname.substring(0, parsed.pathname.lastIndexOf('/') + 1);
                         }
                         baseUrl = parsed.toString();
-                    } catch (err) { /* ignore error */
-                        }
+                        inputs.push(new Input(filenameForOutput, stream, {baseUrl: baseUrl}));
+                    } catch (err) {
+                        /* ignore error */
+                    }
                 } else {
-                    const stats = fs.statSync(filenameOrUrl);
-                    if (stats.isDirectory()){
-                        console.error(chalk.red('\nERROR: ' + filenameOrUrl + ' is a directory! Please provide a valid filename as an argument.'));
-                        process.exit(1);
+                    // local file or directory
+                    let files = [];
+
+                    if (fs.statSync(filenameOrUrl).isDirectory()){
+                        files = loadAllMarkdownFiles(filenameOrUrl)
+                    } else {
+                        files = [filenameOrUrl]
                     }
 
-                    const resolved = path.resolve(filenameOrUrl);
+                    for (let file of files) {
+                        filenameForOutput = file;
+                        const resolved = path.resolve(filenameForOutput);
 
-                    // skip paths given if it includes a path to ignore.
-                    // todo: allow ignore paths to be glob or regex instead of just includes?
-                    if (ignore && ignore.some((ignorePath) => resolved.includes(ignorePath))) {
-                        continue;
-                    }
+                        // skip paths given if it includes a path to ignore.
+                        // todo: allow ignore paths to be glob or regex instead of just includes?
+                        if (ignore && ignore.some((ignorePath) => resolved.includes(ignorePath))) {
+                            continue;
+                        }
 
-                    if (process.platform === 'win32') {
-                        baseUrl = 'file://' + path.dirname(resolved).replace(/\\/g, '/');
-                    }
-                    else {
-                        baseUrl = 'file://' + path.dirname(resolved);
-                    }
+                        if (process.platform === 'win32') {
+                            baseUrl = 'file://' + path.dirname(resolved).replace(/\\/g, '/');
+                        }
+                        else {
+                            baseUrl = 'file://' + path.dirname(resolved);
+                        }
 
-                    stream = fs.createReadStream(filenameOrUrl);
+                        stream = fs.createReadStream(filenameForOutput);
+                        inputs.push(new Input(filenameForOutput, stream, {baseUrl: baseUrl}));
+                    }
                 }
-
-                inputs.push(new Input(filenameForOutput, stream, {baseUrl: baseUrl}));
             }
         }
     ).parse(process.argv);
@@ -122,6 +151,7 @@ function getInputs() {
         input.opts.verbose = (program.opts().verbose === true);
         input.opts.retryOn429 = (program.opts().retry === true);
         input.opts.aliveStatusCodes = program.opts().alive;
+        input.opts.reporters = program.opts().reporters ?? [require(path.resolve('reporters', 'default.js'))];
         const config = program.opts().config;
         if (config) {
             input.opts.config = config.trim();
@@ -196,68 +226,23 @@ async function processInput(filenameForOutput, stream, opts) {
         opts.retryCount = config.retryCount;
         opts.fallbackRetryDelay = config.fallbackRetryDelay;
         opts.aliveStatusCodes = config.aliveStatusCodes;
+        opts.reporters = config.reporters;
     }
 
     await runMarkdownLinkCheck(filenameForOutput, markdown, opts);
 }
 
 async function runMarkdownLinkCheck(filenameForOutput, markdown, opts) {
-    const statusLabels = {
-        alive: chalk.green('✓'),
-        dead: chalk.red('✖'),
-        ignored: chalk.gray('/'),
-        error: chalk.yellow('⚠'),
-    };
+    const [err, results] = await markdownLinkCheck(markdown, opts)
+        .then(res => [null, res]).catch(err => [err]);
 
-    return new Promise((resolve, reject) => {
-        markdownLinkCheck(markdown, opts, function (err, results) {
-            if (err) {
-                console.error(chalk.red('\n  ERROR: something went wrong!'));
-                console.error(err.stack);
-                reject();
-            }
-
-            if (results.length === 0 && !opts.quiet) {
-                console.log(chalk.yellow('  No hyperlinks found!'));
-            }
-            results.forEach(function (result) {
-                // Skip messages for non-deadlinks in quiet mode.
-                if (opts.quiet && result.status !== 'dead') {
-                    return;
-                }
-
-                if (opts.verbose) {
-                    if (result.err) {
-                        console.log('  [%s] %s → Status: %s %s', statusLabels[result.status], result.link, result.statusCode, result.err);
-                    } else {
-                        console.log('  [%s] %s → Status: %s', statusLabels[result.status], result.link, result.statusCode);
-                    }
-                }
-                else if(!opts.quiet) {
-                    console.log('  [%s] %s', statusLabels[result.status], result.link);
-                }
-            });
+    await Promise.allSettled(
+        opts.reporters.map(reporter => reporter(err, results, opts, filenameForOutput)
+    ));
 
-            if(!opts.quiet){
-                console.log('\n  %s links checked.', results.length);
-            }
-
-            if (results.some((result) => result.status === 'dead')) {
-                let deadLinks = results.filter(result => { return result.status === 'dead'; });
-                if(!opts.quiet){
-                    console.error(chalk.red('\n  ERROR: %s dead links found!'), deadLinks.length);
-                } else {
-                    console.error(chalk.red('\n  ERROR: %s dead links found in %s !'), deadLinks.length, filenameForOutput);
-                }
-                deadLinks.forEach(function (result) {
-                    console.log('  [%s] %s → Status: %s', statusLabels[result.status], result.link, result.statusCode);
-                });
-                reject();
-            }
-
-            resolve();
-        });
-    });
+    if (err) throw null;
+    else if (results.some((result) => result.status === 'dead')) return;
+    else return;
 }
 
 async function main() {