-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(stripEndings): add stripEndings function to remove line endings
- Loading branch information
1 parent
afdb20b
commit 9158e56
Showing
8 changed files
with
205 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
const toUnicode = require( './toUnicode' ) | ||
const toAscii = require( './toAscii' ) | ||
const { getRegexClass, getRegexGroup } = require( './regex-utils' ) | ||
|
||
// Line endings in both ASCII, Unicode, and English | ||
const endingClass = getRegexClass( [ '।', '॥', ']', '[', '|' ] ) | ||
// Sometimes translation line endings begin with these characters, before numbers | ||
const optionalEndingClass = getRegexClass( [ '(' ] ) | ||
// Remove any broken endings | ||
const brokenEndingClass = getRegexGroup( [ '()' ] ) | ||
|
||
// All numbers in ASCII, Unicode | ||
const numbers = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ].map( i => i.toString() ) | ||
const numberClass = getRegexClass( [ ...numbers, ...numbers.map( toUnicode ) ] ) | ||
|
||
// Rahao in English, ASCII, Unicode | ||
const pauseGroup = getRegexGroup( [ 'ਰਹਾਉ', toAscii( 'ਰਹਾਉ' ), 'Pause' ] ) | ||
|
||
const matchers = [ | ||
// Endings followed by any number => match the rest of the line | ||
` ?(${endingClass}|${optionalEndingClass}?)${numberClass}.*`, | ||
// || Rahao || style endings | ||
` ?${endingClass} ?${pauseGroup} ?${endingClass}`, | ||
// Clean up any lingering ending characters | ||
brokenEndingClass, | ||
endingClass, | ||
].map( exp => new RegExp( exp, 'g' ) ) | ||
|
||
|
||
/** | ||
* Strips line endings from any Gurmukhi or translation string. | ||
* Accepts both Unicode and ASCII input. | ||
* Useful for generating accurate first letters or modifying non-Gurbani for better display. | ||
* *Not* designed for headings or Sirlekhs. | ||
* @param {String} text The text to stip endings from. | ||
* @return {String} A ending-less version of the text. | ||
* @example <caption>Line ending phrases</caption> | ||
* stripEndings('ਸੋ ਘਰੁ ਰਾਖੁ; ਵਡਾਈ ਤੋਇ ॥੧॥ ਰਹਾਉ ॥') // => ਸੋ ਘਰੁ ਰਾਖੁ; ਵਡਾਈ ਤੋਇ | ||
* stripEndings('ਹੁਕਮੁ ਪਛਾਣਿ; ਤਾ ਖਸਮੈ ਮਿਲਣਾ ॥੧॥ ਰਹਾਉ ਦੂਜਾ ॥') // => ਹੁਕਮੁ ਪਛਾਣਿ; ਤਾ ਖਸਮੈ ਮਿਲਣਾ | ||
* stripEndings('ਜਨ ਨਾਨਕ. ਗੁਰਮੁਖਿ ਜਾਤਾ ਰਾਮ ॥੪॥੬॥ ਛਕਾ ੧ ॥') // => ਜਨ ਨਾਨਕ. ਗੁਰਮੁਖਿ ਜਾਤਾ ਰਾਮ | ||
* @example <caption>English Translations</caption> | ||
* stripEndings('O Nanak, Forever And Ever True. ||1||') // => O Nanak, Forever And Ever True. | ||
* stripEndings('lush greenery. ||1||Pause||') // => lush greenery. | ||
* stripEndings('always I live within the Khalsa. 519') // => always I live within the Khalsa. | ||
* stripEndings('without your reminiscence.(1) (3)') // => without your reminiscence. | ||
* @example <caption>Spanish Translations</caption> | ||
* stripEndings('ofrece su ser en sacrificio a Ti. (4-2-9)') // => ofrece su ser en sacrificio a Ti. | ||
*/ | ||
const stripEndings = text => matchers.reduce( ( text, exp ) => text.replace( exp, '' ), text ).trimRight() | ||
|
||
module.exports = stripEndings |
Oops, something went wrong.