Skip to content

Commit

Permalink
Merge branch 'master' into html_anchors
Browse files Browse the repository at this point in the history
  • Loading branch information
dklimpel committed Nov 5, 2024
2 parents 181d704 + 014ff95 commit 51fc856
Show file tree
Hide file tree
Showing 13 changed files with 1,196 additions and 1,082 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
node_modules
.tern-port
junit.xml
2 changes: 1 addition & 1 deletion .pre-commit-hooks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
entry: markdown-link-check
language: node
types: [markdown]
stages: [commit, push, manual]
stages: [pre-commit, pre-push, manual]
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
# Changes

## Version 3.12.2

- fix status badge in README by @dklimpel in https://github.com/tcort/markdown-link-check/pull/303
- enable skipped tests for hash links by @dklimpel in https://github.com/tcort/markdown-link-check/pull/306
- chore: Upgrade to ESLint 9 by @nschonni in https://github.com/tcort/markdown-link-check/pull/318
- Check GitHub markdown section links by @rkitover in https://github.com/tcort/markdown-link-check/pull/312
- docs: add example for GitLab pipeline by @dklimpel in https://github.com/tcort/markdown-link-check/pull/309
- ci: Use matrix for cross-OS testing by @nschonni in https://github.com/tcort/markdown-link-check/pull/307

## Version 3.12.1

- fix: fix crash #297 @CanadaHonk
Expand Down
23 changes: 15 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ linkchecker:
name: ghcr.io/tcort/markdown-link-check:3.11.2
entrypoint: ["/bin/sh", "-c"]
script:
- find . -name \*.md -print0 | xargs -0 -n1 markdown-link-check
- markdown-link-check ./docs
rules:
- changes:
- "**/*.md"
Expand Down Expand Up @@ -169,19 +169,22 @@ markdown-link-check ./README.md

#### Check links from a local markdown folder (recursive)

Avoid using `find -exec` because it will swallow the error from each consecutive run.
Instead, use `xargs`:
This checks all files in folder `./docs` with file extension `*.md`:

```shell
find . -name \*.md -print0 | xargs -0 -n1 markdown-link-check
markdown-link-check ./docs
```

There is an [open issue](https://github.com/tcort/markdown-link-check/issues/78) for allowing the tool to specify
multiple files on the command line.
The files can also be searched for and filtered manually:

```shell
find . -name \*.md -print0 | xargs -0 -n1 markdown-link-check
```

#### Usage

```shell
Usage: markdown-link-check [options] [filenameOrUrl]
Usage: markdown-link-check [options] [filenameOrDirectorynameOrUrl]
Options:
-p, --progress show progress bar
Expand All @@ -200,7 +203,7 @@ Options:
`config.json`:

* `ignorePatterns`: An array of objects holding regular expressions which a link is checked against and skipped for checking in case of a match.
* `replacementPatterns`: An array of objects holding regular expressions which are replaced in a link with their corresponding replacement string. This behavior allows (for example) to adapt to certain platform conventions hosting the Markdown. The special replacement `{{BASEURL}}` can be used to dynamically link to the current working directory (for example that `/` points to the root of your current working directory).
* `replacementPatterns`: An array of objects holding regular expressions which are replaced in a link with their corresponding replacement string. This behavior allows (for example) to adapt to certain platform conventions hosting the Markdown. The special replacement `{{BASEURL}}` can be used to dynamically link to the current working directory (for example that `/` points to the root of your current working directory). This parameter supports named regex groups the same way as `string.replace` [method](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace#specifying_a_string_as_the_replacement) in node.
* `httpHeaders`: The headers are only applied to links where the link **starts with** one of the supplied URLs in the `urls` section.
* `timeout` timeout in [zeit/ms](https://www.npmjs.com/package/ms) format. (e.g. `"2000ms"`, `20s`, `1m`). Default `10s`.
* `retryOn429` if this is `true` then retry request when response is an HTTP code 429 after the duration indicated by `retry-after` header.
Expand Down Expand Up @@ -232,6 +235,10 @@ Options:
"pattern": "%20",
"replacement": "-",
"global": true
},
{
"pattern": "images/(?<filename>.*)",
"replacement": "assets/$<filename>"
}
],
"httpHeaders": [
Expand Down
27 changes: 24 additions & 3 deletions index.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
'use strict';

const _ = require('lodash');
const async = require('async');
const linkCheck = require('link-check');
const LinkCheckResult = require('link-check').LinkCheckResult;
Expand Down Expand Up @@ -73,7 +72,29 @@ function extractSections(markdown) {
const sectionTitles = markdown.match(/^#+ .*$/gm) || [];

const sections = sectionTitles.map(section =>
section.replace(/^\W+/, '').replace(/\W+$/, '').replace(/[^\w\s-]+/g, '').replace(/\s+/g, '-').toLowerCase()
// The links are compared with the headings (simple text comparison).
// However, the links are url-encoded beforehand, so the headings
// have to also be encoded so that they can also be matched.
encodeURIComponent(
section
// replace links, the links can start with "./", "/", "http://", "https://" or "#"
// and keep the value of the text ($1)
.replace(/\[(.+)\]\(((?:\.?\/|https?:\/\/|#)[\w\d./?=#-]+)\)/, "$1")
// make everything (Unicode-aware) lower case
.toLowerCase()
// remove white spaces and "#" at the beginning
.replace(/^#+\s*/, '')
// remove everything that is NOT a (Unicode) Letter, (Unicode) Number decimal,
// (Unicode) Number letter, white space, underscore or hyphen
// https://ruby-doc.org/3.3.2/Regexp.html#class-Regexp-label-Unicode+Character+Categories
.replace(/[^\p{L}\p{Nd}\p{Nl}\s_\-`]/gu, "")
// remove sequences of *
.replace(/\*(?=.*)/gu, "")
// remove leftover backticks
.replace(/`/gu, "")
// Now replace remaining blanks with '-'
.replace(/\s/gu, "-")
)
);

var uniq = {};
Expand Down Expand Up @@ -109,7 +130,7 @@ module.exports = function markdownLinkCheck(markdown, opts, callback) {

const links = markdownLinkExtractor(markdown);
const sections = extractSections(markdown).concat(extractHtmlSections(markdown));
const linksCollection = _.uniq(links);
const linksCollection = [...new Set(links)]
const bar = (opts.showProgressBar) ?
new ProgressBar('Checking... [:bar] :percent', {
complete: '=',
Expand Down
145 changes: 65 additions & 80 deletions markdown-link-check
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@

let chalk;
const fs = require('fs');
const markdownLinkCheck = require('./');
const { promisify } = require('util');
const markdownLinkCheck = promisify(require('./'));
const needle = require('needle');
const path = require('path');
const pkg = require('./package.json');
const { Command } = require('commander');
const program = new Command();
const url = require('url');
const { ProxyAgent } = require('proxy-agent');

class Input {
Expand All @@ -31,6 +31,26 @@ function commaSeparatedCodesList(value, dummyPrevious) {
});
}

/**
* Load all files in the rootFolder and all subfolders that end with .md
*/
function loadAllMarkdownFiles(rootFolder = '.') {
const files = [];
fs.readdirSync(rootFolder).forEach(file => {
const fullPath = path.join(rootFolder, file);
if (fs.lstatSync(fullPath).isDirectory()) {
files.push(...loadAllMarkdownFiles(fullPath));
} else if (fullPath.endsWith('.md')) {
files.push(fullPath);
}
});
return files;
}

function commaSeparatedReportersList(value) {
return value.split(',').map((reporter) => require(path.resolve('reporters', reporter)));
}

function getInputs() {
const inputs = [];

Expand All @@ -40,11 +60,12 @@ function getInputs() {
.option('-c, --config [config]', 'apply a config file (JSON), holding e.g. url specific header configuration')
.option('-q, --quiet', 'displays errors only')
.option('-v, --verbose', 'displays detailed error information')
.option('-i --ignore <paths>', 'ignore input paths including an ignore path', commaSeparatedPathsList)
.option('-i, --ignore <paths>', 'ignore input paths including an ignore path', commaSeparatedPathsList)
.option('-a, --alive <code>', 'comma separated list of HTTP codes to be considered as alive', commaSeparatedCodesList)
.option('-r, --retry', 'retry after the duration indicated in \'retry-after\' header when HTTP code is 429')
.option('--reporters <names>', 'specify reporters to use', commaSeparatedReportersList)
.option('--projectBaseUrl <url>', 'the URL to use for {{BASEURL}} replacement')
.arguments('[filenamesOrUrls...]')
.arguments('[filenamesOrDirectorynamesOrUrls...]')
.action(function (filenamesOrUrls) {
let filenameForOutput;
let stream;
Expand All @@ -70,6 +91,7 @@ function getInputs() {
for (const filenameOrUrl of filenamesOrUrls) {
filenameForOutput = filenameOrUrl;
let baseUrl = '';
// remote file
if (/https?:/.test(filenameOrUrl)) {
stream = needle.get(
filenameOrUrl, { agent: new ProxyAgent(), use_proxy_from_env_var: false }
Expand All @@ -81,37 +103,44 @@ function getInputs() {
parsed.search = '';
parsed.hash = '';
if (parsed.pathname.lastIndexOf('/') !== -1) {
parsed.pathname = parsed.pathname.substr(0, parsed.pathname.lastIndexOf('/') + 1);
parsed.pathname = parsed.pathname.substring(0, parsed.pathname.lastIndexOf('/') + 1);
}
baseUrl = parsed.toString();
} catch (err) { /* ignore error */
}
inputs.push(new Input(filenameForOutput, stream, {baseUrl: baseUrl}));
} catch (err) {
/* ignore error */
}
} else {
const stats = fs.statSync(filenameOrUrl);
if (stats.isDirectory()){
console.error(chalk.red('\nERROR: ' + filenameOrUrl + ' is a directory! Please provide a valid filename as an argument.'));
process.exit(1);
// local file or directory
let files = [];

if (fs.statSync(filenameOrUrl).isDirectory()){
files = loadAllMarkdownFiles(filenameOrUrl)
} else {
files = [filenameOrUrl]
}

const resolved = path.resolve(filenameOrUrl);
for (let file of files) {
filenameForOutput = file;
const resolved = path.resolve(filenameForOutput);

// skip paths given if it includes a path to ignore.
// todo: allow ignore paths to be glob or regex instead of just includes?
if (ignore && ignore.some((ignorePath) => resolved.includes(ignorePath))) {
continue;
}
// skip paths given if it includes a path to ignore.
// todo: allow ignore paths to be glob or regex instead of just includes?
if (ignore && ignore.some((ignorePath) => resolved.includes(ignorePath))) {
continue;
}

if (process.platform === 'win32') {
baseUrl = 'file://' + path.dirname(resolved).replace(/\\/g, '/');
}
else {
baseUrl = 'file://' + path.dirname(resolved);
}
if (process.platform === 'win32') {
baseUrl = 'file://' + path.dirname(resolved).replace(/\\/g, '/');
}
else {
baseUrl = 'file://' + path.dirname(resolved);
}

stream = fs.createReadStream(filenameOrUrl);
stream = fs.createReadStream(filenameForOutput);
inputs.push(new Input(filenameForOutput, stream, {baseUrl: baseUrl}));
}
}

inputs.push(new Input(filenameForOutput, stream, {baseUrl: baseUrl}));
}
}
).parse(process.argv);
Expand All @@ -122,6 +151,7 @@ function getInputs() {
input.opts.verbose = (program.opts().verbose === true);
input.opts.retryOn429 = (program.opts().retry === true);
input.opts.aliveStatusCodes = program.opts().alive;
input.opts.reporters = program.opts().reporters ?? [require(path.resolve('reporters', 'default.js'))];
const config = program.opts().config;
if (config) {
input.opts.config = config.trim();
Expand Down Expand Up @@ -196,68 +226,23 @@ async function processInput(filenameForOutput, stream, opts) {
opts.retryCount = config.retryCount;
opts.fallbackRetryDelay = config.fallbackRetryDelay;
opts.aliveStatusCodes = config.aliveStatusCodes;
opts.reporters = config.reporters;
}

await runMarkdownLinkCheck(filenameForOutput, markdown, opts);
}

async function runMarkdownLinkCheck(filenameForOutput, markdown, opts) {
const statusLabels = {
alive: chalk.green('✓'),
dead: chalk.red('✖'),
ignored: chalk.gray('/'),
error: chalk.yellow('⚠'),
};
const [err, results] = await markdownLinkCheck(markdown, opts)
.then(res => [null, res]).catch(err => [err]);

return new Promise((resolve, reject) => {
markdownLinkCheck(markdown, opts, function (err, results) {
if (err) {
console.error(chalk.red('\n ERROR: something went wrong!'));
console.error(err.stack);
reject();
}

if (results.length === 0 && !opts.quiet) {
console.log(chalk.yellow(' No hyperlinks found!'));
}
results.forEach(function (result) {
// Skip messages for non-deadlinks in quiet mode.
if (opts.quiet && result.status !== 'dead') {
return;
}

if (opts.verbose) {
if (result.err) {
console.log(' [%s] %s → Status: %s %s', statusLabels[result.status], result.link, result.statusCode, result.err);
} else {
console.log(' [%s] %s → Status: %s', statusLabels[result.status], result.link, result.statusCode);
}
}
else if(!opts.quiet) {
console.log(' [%s] %s', statusLabels[result.status], result.link);
}
});
await Promise.allSettled(
opts.reporters.map(reporter => reporter(err, results, opts, filenameForOutput)
));

if(!opts.quiet){
console.log('\n %s links checked.', results.length);
}

if (results.some((result) => result.status === 'dead')) {
let deadLinks = results.filter(result => { return result.status === 'dead'; });
if(!opts.quiet){
console.error(chalk.red('\n ERROR: %s dead links found!'), deadLinks.length);
} else {
console.error(chalk.red('\n ERROR: %s dead links found in %s !'), deadLinks.length, filenameForOutput);
}
deadLinks.forEach(function (result) {
console.log(' [%s] %s → Status: %s', statusLabels[result.status], result.link, result.statusCode);
});
reject();
}

resolve();
});
});
if (err) throw null;
else if (results.some((result) => result.status === 'dead')) return;
else return;
}

async function main() {
Expand Down
Loading

0 comments on commit 51fc856

Please sign in to comment.