Skip to content

Commit

Permalink
fix: remove invalid character for XML feed; fix saberland#537
Browse files Browse the repository at this point in the history
  • Loading branch information
geekplux committed Oct 30, 2019
1 parent ed1e9be commit 9842704
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 2 deletions.
4 changes: 2 additions & 2 deletions packages/saber-plugin-feed/lib/index.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
const path = require('path')
const { Feed } = require('feed')
const { getFeedPath, resolveURL } = require('./utils')
const { getFeedPath, resolveURL, removeXMLInvalidChars } = require('./utils')

const ID = 'generate-feed'

Expand Down Expand Up @@ -69,7 +69,7 @@ exports.apply = (api, options = {}) => {
// Strip HTML tags in excerpt and use it as description (a.k.a. summary)
description:
page.excerpt && page.excerpt.replace(/<(?:.|\n)*?>/gm, ''),
content,
content: removeXMLInvalidChars(content),
date: page.updatedAt,
published: page.createdAt
})
Expand Down
30 changes: 30 additions & 0 deletions packages/saber-plugin-feed/lib/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,33 @@ exports.getFeedPath = (feedPath, defaultPath) => {
exports.resolveURL = (base, pathname) => {
return new URL(pathname, base).href
}

/**
* Removes XML-invalid characters from a string.
* @param {string} string - a string potentially containing XML-invalid characters, such as non-UTF8 characters, STX, EOX and so on.
* @param {boolean} removeDiscouragedChars - a string potentially containing XML-invalid characters, such as non-UTF8 characters, STX, EOX and so on.
* @return : a sanitized string without all the XML-invalid characters.
*/
exports.removeXMLInvalidChars = (string, removeDiscouragedChars = true) => {
// remove everything forbidden by XML 1.0 specifications, plus the unicode replacement character U+FFFD
var regex = /((?:[\0-\x08\x0B\f\x0E-\x1F\uFFFD\uFFFE\uFFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]))/g
string = string.replace(regex, '')

if (removeDiscouragedChars) {
// remove everything not suggested by XML 1.0 specifications
regex = new RegExp(
'([\\x7F-\\x84]|[\\x86-\\x9F]|[\\uFDD0-\\uFDEF]|(?:\\uD83F[\\uDFFE\\uDFFF])|(?:\\uD87F[\\uDF' +
'FE\\uDFFF])|(?:\\uD8BF[\\uDFFE\\uDFFF])|(?:\\uD8FF[\\uDFFE\\uDFFF])|(?:\\uD93F[\\uDFFE\\uD' +
'FFF])|(?:\\uD97F[\\uDFFE\\uDFFF])|(?:\\uD9BF[\\uDFFE\\uDFFF])|(?:\\uD9FF[\\uDFFE\\uDFFF])' +
'|(?:\\uDA3F[\\uDFFE\\uDFFF])|(?:\\uDA7F[\\uDFFE\\uDFFF])|(?:\\uDABF[\\uDFFE\\uDFFF])|(?:\\' +
'uDAFF[\\uDFFE\\uDFFF])|(?:\\uDB3F[\\uDFFE\\uDFFF])|(?:\\uDB7F[\\uDFFE\\uDFFF])|(?:\\uDBBF' +
'[\\uDFFE\\uDFFF])|(?:\\uDBFF[\\uDFFE\\uDFFF])(?:[\\0-\\t\\x0B\\f\\x0E-\\u2027\\u202A-\\uD7FF\\' +
'uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|' +
'(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF]))',
'g'
)
string = string.replace(regex, '')
}

return string
}

0 comments on commit 9842704

Please sign in to comment.