-
-
Notifications
You must be signed in to change notification settings - Fork 118
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(util): add normalizeIdentifier function (#606)
Signed-off-by: Matt Roberts <code@rbrts.uk>
- Loading branch information
Showing
5 changed files
with
203 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
/* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
'use strict'; | ||
|
||
// Conforms to Concerto Spec for identifiers | ||
const ID_REGEX = /^(\p{Lu}|\p{Ll}|\p{Lt}|\p{Lm}|\p{Lo}|\p{Nl}|\$|_|\\u[0-9A-Fa-f]{4})(?:\p{Lu}|\p{Ll}|\p{Lt}|\p{Lm}|\p{Lo}|\p{Nl}|\$|_|\\u[0-9A-Fa-f]{4}|\p{Mn}|\p{Mc}|\p{Nd}|\p{Pc}|\u200C|\u200D)*$/u; | ||
|
||
/** | ||
* Function that attempts to normalize arbitrary strings | ||
* into valid Concerto identifiers | ||
* | ||
* @param {string} identifier - the input value | ||
* @param {number} [truncateLength] - Length at which to truncate the identifier | ||
* @returns {string} - An identifier that meets the Concerto specification | ||
*/ | ||
function normalizeIdentifier(identifier, truncateLength = -1) { | ||
const replacer = (_match, group1) => { | ||
let escapedChar = ''; | ||
// Loop through characters with multiple code points | ||
for (const codePoint of group1) { | ||
escapedChar += `_${codePoint.codePointAt(0).toString(16)}`; | ||
} | ||
return escapedChar; | ||
}; | ||
|
||
// Stringify null & undefined values | ||
let result = identifier ?? String(identifier); | ||
|
||
if (typeof result !== 'string'){ | ||
throw new Error(`Unsupported identifier type, '${typeof result}'.`); | ||
} | ||
|
||
// 1. If the identifier begins with a number, add a leading underscore | ||
result = result | ||
.replace(/^\p{Nd}/u, '_$&') | ||
|
||
// 2. Substitute Whitespace, and joiners | ||
.replace(/[-‐−@#:;><|/\\\u200c\u200d]/g, '_') | ||
.replace(/\s/g, '_') | ||
|
||
// 3a. Replace Invalid Characters | ||
.replace(/(?!\p{Lu}|\p{Ll}|\p{Lt}|\p{Lm}|\p{Lo}|\p{Nl}|\$|_|\p{Mn}|\p{Mc}|\p{Nd}|\p{Pc}|\u200C|\u200D|\\u[0-9A-Fa-f]{4})(.)/gu, replacer) | ||
|
||
// 3b. Escape Surrogate Pairs | ||
.replace(/([\uD800-\uDFFF])/g, replacer); | ||
|
||
// 4. Optionally truncate the identifier | ||
if (truncateLength > 0){ | ||
result = result.substring(0,truncateLength); | ||
} | ||
|
||
// Check validity | ||
if (!ID_REGEX.test(result)){ | ||
throw new Error(`Unexpected error. Not able to escape identifier '${result}'.`); | ||
} | ||
return result; | ||
} | ||
|
||
module.exports = { | ||
normalizeIdentifier, | ||
ID_REGEX | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
/* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
'use strict'; | ||
|
||
const { normalizeIdentifier } = require('../lib/identifiers'); | ||
|
||
require('chai').should(); | ||
|
||
describe('Identifiers', function () { | ||
|
||
describe('normalizeIdentifier', function() { | ||
const ids = [ | ||
// No-op Values | ||
['a'], // Letter, lowercase | ||
['ՠ'], // Letter, lowercase. Unicode 11.0 | ||
['A'], // Letter, uppercase | ||
['ĦĔĽĻŎ'], // Letter, uppercase | ||
['Dž'], // Letter, titlecase | ||
['ᾩ'], // Letter, titlecase | ||
['〱〱〱〱'], // Letter, modifier | ||
['जावास्क्रिप्ट'], // Letter, other | ||
['Ⅶ'], // Number, letter | ||
['$class'], // leading $ | ||
['_class'], // leading _ | ||
['\u03C9'], // Escaped Unicode Code Point, ᾧ | ||
['abc'], // Letter, lowercase | ||
['a123'], // Number, digit | ||
['foo$bar'], // $ separator | ||
['foo_bar'], // _ separator | ||
['αβγδεζηθ'], // Letter, lowercase | ||
['foo\u03C9bar'], // Escaped Unicode Code Point, fooᾧbar | ||
['foo\u03c9bar'], // Escaped Unicode Code Point lowercase, fooᾧbar | ||
['foo‿bar'], // Punctuation, connector | ||
['पः'], // Mark, combining character | ||
['CharlesⅢ'], // Number, letter | ||
['true'], // reserved words | ||
['false'], | ||
['null'], | ||
['while'], | ||
['for'], | ||
['nully'], // leading reserved word | ||
['こんにちは世界'], // Japanese | ||
['foobar', 'foo_bar'], // unescaped zero-width non-joiner | ||
['foobar', 'foo_bar'], // unescaped zero-width joiner | ||
|
||
// Bad Identifiers | ||
['123', '_123'], | ||
['1st', '_1st'], | ||
['foo bar', 'foo_bar'], | ||
['foo\u0020bar', 'foo_bar'], // Escaped Unicode, space | ||
['foo\x3Dbar', 'foo_3dbar'], // Escaped Hex Sequence, foo=bar | ||
['foo\x3Dbar', 'foo_3dbar'], // Escaped Hex Sequence, foo=bar | ||
['foo', '_foo'], // leading unescaped zero-width joiner | ||
['foo-bar', 'foo_bar'], | ||
['foo‐bar', 'foo_bar'], // U+2010 HYPHEN' | ||
['foo−bar', 'foo_bar'], // U+2212 MINUS | ||
['foo|bar', 'foo_bar'], | ||
['foo@bar', 'foo_bar'], | ||
['foo#bar', 'foo_bar'], | ||
['foo/bar', 'foo_bar'], | ||
['foo>bar', 'foo_bar'], | ||
['\x3D', '_3d'], // Escaped Hex Sequence, = | ||
['😄', '_1f604'], // Surrogate pair, Emoji | ||
['\u{1F604}', '_1f604'], // Escaped surrogate pair, Emoji | ||
['𐴓𐴠𐴑𐴤𐴝', '_d803_dd13_d803_dd20_d803_dd11'], // Surrogate pairs, Hanifi Rohingya RTL | ||
[null, 'null'], | ||
[undefined, 'undefined'], | ||
]; | ||
ids.forEach(([id, expectedValue]) => { | ||
it(`'${id}' should equal '${expectedValue ?? id}'`, function() { | ||
normalizeIdentifier(id, 30).should.equal(expectedValue ?? id); | ||
}); | ||
}); | ||
|
||
it('should throw for empty string', () => { | ||
(() => normalizeIdentifier('')).should.throw(/Unexpected error/); | ||
}); | ||
|
||
it('should not normalize non string identifiers', () => { | ||
(() => normalizeIdentifier({ a: 1 })).should.throw(/Unsupported identifier type/); | ||
(() => normalizeIdentifier(Symbol.for('a'))).should.throw(/Unsupported identifier type/); | ||
(() => normalizeIdentifier(false)).should.throw(/Unsupported identifier type/); | ||
(() => normalizeIdentifier(true)).should.throw(/Unsupported identifier type/); | ||
(() => normalizeIdentifier(1)).should.throw(/Unsupported identifier type/); | ||
(() => normalizeIdentifier(1.112345678987654)).should.throw(/Unsupported identifier type/); | ||
(() => normalizeIdentifier(3.1e2)).should.throw(/Unsupported identifier type/); | ||
}); | ||
|
||
it('should truncate identifiers', () => { | ||
normalizeIdentifier('a', 2).should.equal('a'); | ||
normalizeIdentifier('aaa', 2).should.equal('aa'); | ||
normalizeIdentifier('aaa', 0).should.equal('aaa'); | ||
normalizeIdentifier('aaa', -1).should.equal('aaa'); | ||
normalizeIdentifier('$a', 1).should.equal('$'); | ||
normalizeIdentifier('😄', 2).should.equal('_1'); | ||
normalizeIdentifier('𐴓', 2).should.equal('_d'); // surrogate pair character | ||
}); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
/** | ||
* Function that attempts to normalize arbitrary strings | ||
* into valid Concerto identifiers | ||
* | ||
* @param {string} identifier - the input value | ||
* @param {number} [truncateLength] - Length at which to truncate the identifier | ||
* @returns {string} - An identifier that meets the Concerto specification | ||
*/ | ||
export function normalizeIdentifier(identifier: string, truncateLength?: number): string; | ||
export const ID_REGEX: RegExp; |