Skip to content

Commit

Permalink
feature: support emojis and symbols (#9)
Browse files Browse the repository at this point in the history
  • Loading branch information
lukePeavey authored Nov 22, 2020
1 parent ede1a9a commit f6f9984
Show file tree
Hide file tree
Showing 3 changed files with 175 additions and 18 deletions.
12 changes: 9 additions & 3 deletions __tests__/split.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ describe('split(element, options)', () => {
expect(result.lines[0].textContent).toEqual(textContent)
})

test(`Splits text into lines and words`, () => {
it(`Splits text into lines and words`, () => {
const settings = { ...defaults, types: 'lines, words' }
const elem = createElement('div', { textContent })
const result = split(elem, settings)
Expand All @@ -126,7 +126,7 @@ describe('split(element, options)', () => {
expect(result.lines[0].textContent).toEqual(textContent)
})

test(`Splits text into lines and characters`, () => {
it(`Splits text into lines and characters`, () => {
const settings = { ...defaults, types: 'lines, chars' }
const elem = createElement('div', { textContent })
const result = split(elem, settings)
Expand All @@ -148,7 +148,7 @@ describe('split(element, options)', () => {
expect(result.lines[0].textContent).toEqual(textContent)
})

test(`Splits text into lines, words and characters`, () => {
it(`Splits text into lines, words and characters`, () => {
const settings = defaults
const elem = createElement('div', { textContent })
const result = split(elem, settings)
Expand Down Expand Up @@ -176,4 +176,10 @@ describe('split(element, options)', () => {
expect(result.lines[0].classList.contains(settings.lineClass)).toBe(true)
expect(result.lines[0].textContent).toEqual(textContent)
})

it(`Splits text containing unicode characters`, () => {
const elem = createElement('div', { textContent: '📌foo' })
const result = split(elem)
expect(result.chars[0].textContent).toEqual('📌')
})
})
25 changes: 23 additions & 2 deletions __tests__/utils/toChars.test.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,32 @@
import toChars from '../../lib/utils/toChars'

describe('utils.toChars(value)', () => {
it(`Splits a string into characters`, () => {
it(`Splits a string into an array of characters`, () => {
expect(toChars('foo')).toEqual(['f', 'o', 'o'])
expect(toChars('foo bar')).toEqual(['f', 'o', 'o', ' ', 'b', 'a', 'r'])
expect(toChars('f-o-o', /-/)).toEqual(['f', 'o', 'o'])
expect(toChars('f-o-o', '-')).toEqual(['f', 'o', 'o'])
expect(toChars()).toEqual([])
})
})

it(`Splits a string containing emojis`, () => {
expect(toChars('👋🏽😀✂️')).toEqual(['👋🏽', '😀', '✂️'])
expect(toChars('foo😀')).toEqual(['f', 'o', 'o', '😀'])
})

it(`Splits a string containing non-english characters`, () => {
expect(toChars('ふりが')).toEqual(['ふ', 'り', 'が'])
})
})

describe('utils.toChars(value, separator)', () => {
it(`Splits a string using a custom string separator`, () => {
expect(toChars('f-o-o', '-')).toEqual(['f', 'o', 'o'])
expect(toChars('👋🏽-😀-✂️', '-')).toEqual(['👋🏽', '😀', '✂️'])
})

it(`Splits a string using a RegExp separator pattern`, () => {
expect(toChars('f-o_o', /-|_/)).toEqual(['f', 'o', 'o'])
expect(toChars('👋🏽-😀_✂️', /-|_/)).toEqual(['👋🏽', '😀', '✂️'])
})
})
156 changes: 143 additions & 13 deletions lib/utils/toChars.js
Original file line number Diff line number Diff line change
@@ -1,26 +1,156 @@
/**
* Splits a string into an array of characters
* Based on lodash#split <https://lodash.com/license>
* Copyright jQuery Foundation and other contributors <https://jquery.org/>
* Copyright Jeremy Ashkenas, DocumentCloud and Investigative Reporters &
* Editors
*/

/* eslint-disable prefer-template */
/* eslint-disable no-misleading-character-class */
import isString from './isString'

const rsAstralRange = '\\ud800-\\udfff'
const rsComboMarksRange = '\\u0300-\\u036f\\ufe20-\\ufe23'
const rsComboSymbolsRange = '\\u20d0-\\u20f0'
const rsVarRange = '\\ufe0e\\ufe0f'

/** Used to compose unicode capture groups. */
const rsAstral = `[${rsAstralRange}]`
const rsCombo = `[${rsComboMarksRange}${rsComboSymbolsRange}]`
const rsFitz = '\\ud83c[\\udffb-\\udfff]'
const rsModifier = `(?:${rsCombo}|${rsFitz})`
const rsNonAstral = `[^${rsAstralRange}]`
const rsRegional = '(?:\\ud83c[\\udde6-\\uddff]){2}'
const rsSurrPair = '[\\ud800-\\udbff][\\udc00-\\udfff]'
const rsZWJ = '\\u200d'

/** Used to compose unicode regexes. */
const reOptMod = `${rsModifier}?`
const rsOptVar = `[${rsVarRange}]?`
const rsOptJoin =
'(?:' +
rsZWJ +
'(?:' +
[rsNonAstral, rsRegional, rsSurrPair].join('|') +
')' +
rsOptVar +
reOptMod +
')*'
const rsSeq = rsOptVar + reOptMod + rsOptJoin
const rsSymbol = `(?:${[
`${rsNonAstral}${rsCombo}?`,
rsCombo,
rsRegional,
rsSurrPair,
rsAstral,
].join('|')}
)`

/** Used to match [string symbols](https://mathiasbynens.be/notes/javascript-unicode). */
const reUnicode = RegExp(`${rsFitz}(?=${rsFitz})|${rsSymbol}${rsSeq}`, 'g')

/** Used to detect strings with [zero-width joiners or code points from the astral planes](http://eev.ee/blog/2015/09/12/dark-corners-of-unicode/). */
const unicodeRange = [
rsZWJ,
rsAstralRange,
rsComboMarksRange,
rsComboSymbolsRange,
rsVarRange,
]
const reHasUnicode = RegExp(`[${unicodeRange.join('')}]`)

/**
* Converts an ASCII `string` to an array.
*
* @private
* @param {string} string The string to convert.
* @returns {Array} Returns the converted array.
*/
function asciiToArray(string) {
return string.split('')
}

/**
* Checks if `string` contains Unicode symbols.
*
* @private
* @param {string} string The string to inspect.
* @returns {boolean} Returns `true` if a symbol is found, else `false`.
*/
function hasUnicode(string) {
return reHasUnicode.test(string)
}

/**
* Converts a Unicode `string` to an array.
*
* TODO:
* Add support strings that contain unicode characters (ie Emojis and symbols)
* @private
* @param {string} string The string to convert.
* @returns {Array} Returns the converted array.
*/
function unicodeToArray(string) {
return string.match(reUnicode) || []
}

/**
* Converts `string` to an array.
*
* @param {string} string the string to split
* @param {string|RegExp} [separator = ''] Pattern used to separate characters
* @return {string[]} the array of chars
* @private
* @param {string} string The string to convert.
* @returns {Array} Returns the converted array.
*/
export function stringToArray(string) {
return hasUnicode(string) ? unicodeToArray(string) : asciiToArray(string)
}

/**
* Converts `value` to a string. An empty string is returned for `null`
* and `undefined` values.
*
* @param {*} value The value to process.
* @returns {string} Returns the string.
* @example
* toChars('foo')
*
* _.toString(null);
* // => ''
*
* _.toString([1, 2, 3]);
* // => '1,2,3'
*/
function toString(value) {
return value == null ? '' : String(value)
}

/**
* Splits `string` into an array of characters. If `separator` is omitted,
* it behaves likes split.split('').
*
* Unlike native string.split(''), it can split strings that contain unicode
* characters like emojis and symbols.
*
* @param {string} [string=''] The string to split.
* @param {RegExp|string} [separator=''] The separator pattern to split by.
* @returns {Array} Returns the string segments.
* @example
* toChars('foo');
* // => ['f', 'o', 'o']
*
* toChars('foo bar')
* toChars('foo bar');
* // => ["f", "o", "o", " ", "b", "a", "r"]
*
* toChars('f-o-o', /-/)
* // => ['f', 'o', 'o']
* toChars('f😀o');
* // => ['f', '😀', 'o']
*
* toChars('f-😀-o', /-/);
* // => ['f', '😀', 'o']
*
* toChars()
* // => []
*/
export default function toChars(string, separator = '') {
string = string == null ? '' : String(string)
string = toString(string)
if (string && isString(string)) {
if (!separator && hasUnicode(string)) {
return stringToArray(string)
}
}
return string.split(separator)
}

0 comments on commit f6f9984

Please sign in to comment.