-
Notifications
You must be signed in to change notification settings - Fork 41
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feature: support emojis and symbols (#9)
- Loading branch information
1 parent
ede1a9a
commit f6f9984
Showing
3 changed files
with
175 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,32 @@ | ||
import toChars from '../../lib/utils/toChars' | ||
|
||
describe('utils.toChars(value)', () => { | ||
it(`Splits a string into characters`, () => { | ||
it(`Splits a string into an array of characters`, () => { | ||
expect(toChars('foo')).toEqual(['f', 'o', 'o']) | ||
expect(toChars('foo bar')).toEqual(['f', 'o', 'o', ' ', 'b', 'a', 'r']) | ||
expect(toChars('f-o-o', /-/)).toEqual(['f', 'o', 'o']) | ||
expect(toChars('f-o-o', '-')).toEqual(['f', 'o', 'o']) | ||
expect(toChars()).toEqual([]) | ||
}) | ||
}) | ||
|
||
it(`Splits a string containing emojis`, () => { | ||
expect(toChars('👋🏽😀✂️')).toEqual(['👋🏽', '😀', '✂️']) | ||
expect(toChars('foo😀')).toEqual(['f', 'o', 'o', '😀']) | ||
}) | ||
|
||
it(`Splits a string containing non-english characters`, () => { | ||
expect(toChars('ふりが')).toEqual(['ふ', 'り', 'が']) | ||
}) | ||
}) | ||
|
||
describe('utils.toChars(value, separator)', () => { | ||
it(`Splits a string using a custom string separator`, () => { | ||
expect(toChars('f-o-o', '-')).toEqual(['f', 'o', 'o']) | ||
expect(toChars('👋🏽-😀-✂️', '-')).toEqual(['👋🏽', '😀', '✂️']) | ||
}) | ||
|
||
it(`Splits a string using a RegExp separator pattern`, () => { | ||
expect(toChars('f-o_o', /-|_/)).toEqual(['f', 'o', 'o']) | ||
expect(toChars('👋🏽-😀_✂️', /-|_/)).toEqual(['👋🏽', '😀', '✂️']) | ||
}) | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,26 +1,156 @@ | ||
/** | ||
* Splits a string into an array of characters | ||
* Based on lodash#split <https://lodash.com/license> | ||
* Copyright jQuery Foundation and other contributors <https://jquery.org/> | ||
* Copyright Jeremy Ashkenas, DocumentCloud and Investigative Reporters & | ||
* Editors | ||
*/ | ||
|
||
/* eslint-disable prefer-template */ | ||
/* eslint-disable no-misleading-character-class */ | ||
import isString from './isString' | ||
|
||
const rsAstralRange = '\\ud800-\\udfff' | ||
const rsComboMarksRange = '\\u0300-\\u036f\\ufe20-\\ufe23' | ||
const rsComboSymbolsRange = '\\u20d0-\\u20f0' | ||
const rsVarRange = '\\ufe0e\\ufe0f' | ||
|
||
/** Used to compose unicode capture groups. */ | ||
const rsAstral = `[${rsAstralRange}]` | ||
const rsCombo = `[${rsComboMarksRange}${rsComboSymbolsRange}]` | ||
const rsFitz = '\\ud83c[\\udffb-\\udfff]' | ||
const rsModifier = `(?:${rsCombo}|${rsFitz})` | ||
const rsNonAstral = `[^${rsAstralRange}]` | ||
const rsRegional = '(?:\\ud83c[\\udde6-\\uddff]){2}' | ||
const rsSurrPair = '[\\ud800-\\udbff][\\udc00-\\udfff]' | ||
const rsZWJ = '\\u200d' | ||
|
||
/** Used to compose unicode regexes. */ | ||
const reOptMod = `${rsModifier}?` | ||
const rsOptVar = `[${rsVarRange}]?` | ||
const rsOptJoin = | ||
'(?:' + | ||
rsZWJ + | ||
'(?:' + | ||
[rsNonAstral, rsRegional, rsSurrPair].join('|') + | ||
')' + | ||
rsOptVar + | ||
reOptMod + | ||
')*' | ||
const rsSeq = rsOptVar + reOptMod + rsOptJoin | ||
const rsSymbol = `(?:${[ | ||
`${rsNonAstral}${rsCombo}?`, | ||
rsCombo, | ||
rsRegional, | ||
rsSurrPair, | ||
rsAstral, | ||
].join('|')} | ||
)` | ||
|
||
/** Used to match [string symbols](https://mathiasbynens.be/notes/javascript-unicode). */ | ||
const reUnicode = RegExp(`${rsFitz}(?=${rsFitz})|${rsSymbol}${rsSeq}`, 'g') | ||
|
||
/** Used to detect strings with [zero-width joiners or code points from the astral planes](http://eev.ee/blog/2015/09/12/dark-corners-of-unicode/). */ | ||
const unicodeRange = [ | ||
rsZWJ, | ||
rsAstralRange, | ||
rsComboMarksRange, | ||
rsComboSymbolsRange, | ||
rsVarRange, | ||
] | ||
const reHasUnicode = RegExp(`[${unicodeRange.join('')}]`) | ||
|
||
/** | ||
* Converts an ASCII `string` to an array. | ||
* | ||
* @private | ||
* @param {string} string The string to convert. | ||
* @returns {Array} Returns the converted array. | ||
*/ | ||
function asciiToArray(string) { | ||
return string.split('') | ||
} | ||
|
||
/** | ||
* Checks if `string` contains Unicode symbols. | ||
* | ||
* @private | ||
* @param {string} string The string to inspect. | ||
* @returns {boolean} Returns `true` if a symbol is found, else `false`. | ||
*/ | ||
function hasUnicode(string) { | ||
return reHasUnicode.test(string) | ||
} | ||
|
||
/** | ||
* Converts a Unicode `string` to an array. | ||
* | ||
* TODO: | ||
* Add support strings that contain unicode characters (ie Emojis and symbols) | ||
* @private | ||
* @param {string} string The string to convert. | ||
* @returns {Array} Returns the converted array. | ||
*/ | ||
function unicodeToArray(string) { | ||
return string.match(reUnicode) || [] | ||
} | ||
|
||
/** | ||
* Converts `string` to an array. | ||
* | ||
* @param {string} string the string to split | ||
* @param {string|RegExp} [separator = ''] Pattern used to separate characters | ||
* @return {string[]} the array of chars | ||
* @private | ||
* @param {string} string The string to convert. | ||
* @returns {Array} Returns the converted array. | ||
*/ | ||
export function stringToArray(string) { | ||
return hasUnicode(string) ? unicodeToArray(string) : asciiToArray(string) | ||
} | ||
|
||
/** | ||
* Converts `value` to a string. An empty string is returned for `null` | ||
* and `undefined` values. | ||
* | ||
* @param {*} value The value to process. | ||
* @returns {string} Returns the string. | ||
* @example | ||
* toChars('foo') | ||
* | ||
* _.toString(null); | ||
* // => '' | ||
* | ||
* _.toString([1, 2, 3]); | ||
* // => '1,2,3' | ||
*/ | ||
function toString(value) { | ||
return value == null ? '' : String(value) | ||
} | ||
|
||
/** | ||
* Splits `string` into an array of characters. If `separator` is omitted, | ||
* it behaves likes split.split(''). | ||
* | ||
* Unlike native string.split(''), it can split strings that contain unicode | ||
* characters like emojis and symbols. | ||
* | ||
* @param {string} [string=''] The string to split. | ||
* @param {RegExp|string} [separator=''] The separator pattern to split by. | ||
* @returns {Array} Returns the string segments. | ||
* @example | ||
* toChars('foo'); | ||
* // => ['f', 'o', 'o'] | ||
* | ||
* toChars('foo bar') | ||
* toChars('foo bar'); | ||
* // => ["f", "o", "o", " ", "b", "a", "r"] | ||
* | ||
* toChars('f-o-o', /-/) | ||
* // => ['f', 'o', 'o'] | ||
* toChars('f😀o'); | ||
* // => ['f', '😀', 'o'] | ||
* | ||
* toChars('f-😀-o', /-/); | ||
* // => ['f', '😀', 'o'] | ||
* | ||
* toChars() | ||
* // => [] | ||
*/ | ||
export default function toChars(string, separator = '') { | ||
string = string == null ? '' : String(string) | ||
string = toString(string) | ||
if (string && isString(string)) { | ||
if (!separator && hasUnicode(string)) { | ||
return stringToArray(string) | ||
} | ||
} | ||
return string.split(separator) | ||
} |