From 435116ae8a3b6f212d903158b1038d0f68c5f61d Mon Sep 17 00:00:00 2001 From: Stalone Date: Mon, 10 Jun 2024 21:34:21 +0200 Subject: [PATCH] [remark-sub-super] Start migrating to micromark --- package-lock.json | 16 ++ package.json | 2 +- .../micromark-extension-sub-super/.npmignore | 3 + .../__tests__/spec.test.js | 46 +++ .../micromark-extension-sub-super/lib/html.js | 26 ++ .../lib/index.js | 84 ++++++ .../package.json | 44 +++ .../specs/extension.md | 263 ++++++++++++++++++ 8 files changed, 483 insertions(+), 1 deletion(-) create mode 100644 packages/micromark-extension-sub-super/.npmignore create mode 100644 packages/micromark-extension-sub-super/__tests__/spec.test.js create mode 100644 packages/micromark-extension-sub-super/lib/html.js create mode 100644 packages/micromark-extension-sub-super/lib/index.js create mode 100644 packages/micromark-extension-sub-super/package.json create mode 100644 packages/micromark-extension-sub-super/specs/extension.md diff --git a/package-lock.json b/package-lock.json index 6e3709cd..850091ad 100644 --- a/package-lock.json +++ b/package-lock.json @@ -16906,6 +16906,10 @@ "micromark-util-types": "^2.0.0" } }, + "node_modules/micromark-sub-super": { + "resolved": "packages/micromark-extension-sub-super", + "link": true + }, "node_modules/micromark-util-character": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/micromark-util-character/-/micromark-util-character-2.1.0.tgz", @@ -25396,6 +25400,18 @@ "micromark-util-symbol": "^2.0.0" } }, + "packages/micromark-extension-sub-super": { + "name": "micromark-sub-super", + "version": "0.0.0", + "license": "MIT", + "dependencies": { + "micromark-util-character": "^2.1.0", + "micromark-util-symbol": "^2.0.0" + }, + "devDependencies": { + "micromark": "^4.0.0" + } + }, "packages/rebber": { "version": "5.5.0", "license": "MIT", diff --git a/package.json b/package.json index 9ab1b340..6b8ddddb 100644 --- a/package.json +++ b/package.json @@ -57,7 +57,7 @@ }, "scripts": { "pretest": "lerna run pretest --scope zmarkdown", - "test": "cross-env NODE_OPTIONS=--experimental-vm-modules DEST=/tmp jest packages/remark-kbd packages/remark-iframes packages/remark-ping packages/micromark-extension-kbd packages/micromark-extension-iframes packages/micromark-extension-ping", + "test": "cross-env NODE_OPTIONS=--experimental-vm-modules DEST=/tmp jest packages/remark-kbd packages/remark-iframes packages/remark-ping packages/micromark-extension-kbd packages/micromark-extension-iframes packages/micromark-extension-ping packages/micromark-extension-sub-super", "lint": "eslint .", "posttest": "lerna run posttest --scope zmarkdown", "build": "lerna run build", diff --git a/packages/micromark-extension-sub-super/.npmignore b/packages/micromark-extension-sub-super/.npmignore new file mode 100644 index 00000000..858c80b4 --- /dev/null +++ b/packages/micromark-extension-sub-super/.npmignore @@ -0,0 +1,3 @@ +__tests__/ +specs/ +.npmignore diff --git a/packages/micromark-extension-sub-super/__tests__/spec.test.js b/packages/micromark-extension-sub-super/__tests__/spec.test.js new file mode 100644 index 00000000..f57c2260 --- /dev/null +++ b/packages/micromark-extension-sub-super/__tests__/spec.test.js @@ -0,0 +1,46 @@ +import { micromark } from 'micromark' +import micromarkSubSuper from '../lib/index' +import micromarkSubSuperHtml from '../lib/html' + +const specificationTests = { + 'works - sub': ['CO~2~', '

CO2

'], + 'works - super': ['a^2^ + b^2^ = c^2^', '

a2 + b2 = c2

'], + 'inside words': ['Literally s^e^lfies tbh lo-fi.', '

Literally selfies tbh lo-fi.

'], + 'needs content - sub': ['a~~', '

a~~

'], + 'needs content - super': ['b^^', '

b^^

'], + 'space isn\'t content - sub': ['a~ ~', '

a~ ~

'], + 'space isn\'t content - super': ['b^ ^', '

b^ ^

'], + 'double entry': ['^^foo^^', '

^foo^

'], + 'more than one char': ['a^1+1^ + b^1+1^ = c^1+1^', '

a1+1 + b1+1 = c1+1

'], + 'does not start with space - sub': ['a~ ~ + b~ ~', '

a~ ~ + b~ ~

'], + 'does not start with space - super': ['a^ ^ + b^ ^', '

a^ ^ + b^ ^

'], + 'cannot contain block': ['a~b\n\nc~', '

a~b

\n

c~

'], + 'escaped - sub': ['a\\~no\\~', '

a~no~

'], + 'escaped - super': ['a\\^no\\^', '

a^no^

'], + 'escaped inside': ['a^\\^^', '

a^

'], + 'lone tilde': ['a ~ b', '

a ~ b

'], + 'can contain inline - super': ['my ^*important*^ superscript', '

my important superscript

'], + 'can contain inline - sub': ['my ~*important*~ subscript', '

my important subscript

'], + 'can be contained': ['my *im~por~tant* subscript', '

my important subscript

'], + 'can be self-contained': ['2^2^2^^ = 16', '

222 = 16

'], + 'can be cross-contained': ['remark-~sub-^super^~', '

remark-sub-super

'] +} + +const renderString = (fixture) => + micromark(fixture, { + extensions: [micromarkSubSuper()], + htmlExtensions: [micromarkSubSuperHtml] + }) + +describe('conforms to the specification', () => { + for (const test in specificationTests) { + const jestFunction = (!specificationTests[test][2]) ? it : it.skip + + jestFunction(test, () => { + const [input, expectedOutput] = specificationTests[test] + const output = renderString(input) + + expect(output).toEqual(expectedOutput) + }) + } +}) diff --git a/packages/micromark-extension-sub-super/lib/html.js b/packages/micromark-extension-sub-super/lib/html.js new file mode 100644 index 00000000..435c6cd2 --- /dev/null +++ b/packages/micromark-extension-sub-super/lib/html.js @@ -0,0 +1,26 @@ +export default { + enter: { + subString: enterSubData, + superString: enterSuperData + }, + exit: { + subString: exitSubData, + superString: exitSuperData + } +} + +function enterSubData () { + this.tag('') +} + +function enterSuperData () { + this.tag('') +} + +function exitSubData () { + this.tag('') +} + +function exitSuperData () { + this.tag('') +} diff --git a/packages/micromark-extension-sub-super/lib/index.js b/packages/micromark-extension-sub-super/lib/index.js new file mode 100644 index 00000000..3f55e56c --- /dev/null +++ b/packages/micromark-extension-sub-super/lib/index.js @@ -0,0 +1,84 @@ +import { markdownLineEnding } from 'micromark-util-character' +import { codes } from 'micromark-util-symbol' + +export default function micromarkSubSuper (options = {}) { + // By default, use characters U+94 (`^`) and U+126 (`~`) + const unicodeSubChar = options.subCharCode || 126 + const unicodeSuperChar = options.superCharCode || 94 + + const call = { + name: 'subSuper', + tokenize: tokenizeFactory(unicodeSubChar, unicodeSuperChar) + } + + // Inject a hook called on the given characters + return { + text: { + [unicodeSubChar]: call, + [unicodeSuperChar]: call + } + } +} + +function tokenizeFactory (subCharCode, superCharCode) { + return tokenizeSubSuper + + function tokenizeSubSuper (effects, ok, nok) { + return start + + function start (code) { + // We should not have entered here at all + if (code !== subCharCode && code !== superCharCode) return nok(code) + + effects.enter('subSuperCall') + effects.enter('subSuperSequence') + effects.consume(code) + effects.exit('subSuperSequence') + + if (code === subCharCode) effects.enter('subString') + else if (code === superCharCode) effects.enter('superString') + effects.enter('data') + + return afterStart + } + + function afterStart (code) { + if (code === subCharCode || + code === superCharCode || + code === codes.space) return nok(code) + + return content(code) + } + + function content (code) { + if (code === subCharCode) return subEnd(code) + else if (code === superCharCode) return superEnd(code) + + if (code === codes.eof || markdownLineEnding(code)) return nok(code) + + effects.consume(code) + return content + } + + function subEnd (code) { + effects.exit('data') + effects.exit('subString') + return end(code) + } + + function superEnd (code) { + effects.exit('data') + effects.exit('superString') + return end(code) + } + + function end(code) { + effects.enter('subSuperSequence') + effects.consume(code) + effects.exit('subSuperSequence') + effects.exit('subSuperCall') + + return ok + } + } +} diff --git a/packages/micromark-extension-sub-super/package.json b/packages/micromark-extension-sub-super/package.json new file mode 100644 index 00000000..ea814dbc --- /dev/null +++ b/packages/micromark-extension-sub-super/package.json @@ -0,0 +1,44 @@ +{ + "name": "micromark-sub-super", + "version": "0.0.0", + "description": "Add Markdown syntax to handle subscript and superscript", + "type": "module", + "keywords": [ + "micromark", + "subscript", + "superscript", + "plugin", + "extension" + ], + "author": "Stalone ", + "homepage": "https://github.com/zestedesavoir/zmarkdown/tree/master/packages/micromark-extension-sub-super", + "license": "MIT", + "main": "lib/index.js", + "module": "lib/index.js", + "directories": { + "lib": "lib", + "test": "__tests__" + }, + "files": [ + "lib" + ], + "repository": { + "type": "git", + "url": "git+https://github.com/zestedesavoir/zmarkdown.git#master" + }, + "scripts": { + "pretest": "eslint .", + "test": "cross-env NODE_OPTIONS=--experimental-vm-modules jest", + "coverage": "cross-env NODE_OPTIONS=--experimental-vm-modules jest --coverage" + }, + "bugs": { + "url": "https://github.com/zestedesavoir/zmarkdown/issues" + }, + "dependencies": { + "micromark-util-character": "^2.1.0", + "micromark-util-symbol": "^2.0.0" + }, + "devDependencies": { + "micromark": "^4.0.0" + } +} diff --git a/packages/micromark-extension-sub-super/specs/extension.md b/packages/micromark-extension-sub-super/specs/extension.md new file mode 100644 index 00000000..edd165dc --- /dev/null +++ b/packages/micromark-extension-sub-super/specs/extension.md @@ -0,0 +1,263 @@ +## Subscript and superscript + +Both subscript and superscript constructions extend inline GFM structures. As such, they are parsed sequentially from beginning to end of stream. + +### 1. Definitions + +Two characters are used for subscript and superscript, respectively the tilde (character `~`, U+007E) and circumflex accent (character `^`, U+005E) characters. We also define the backslash as character `\` (U+005C), also called the escape character, and the space character as character ` ` (U+0020). + +A subscript entry begins with a tilde character followed by any number of Unicode character, except the tilde character itself and ends with another tilde character. +Similarly, a superscript entry begins with a circumflex accent character followed by any number of Unicode character, except the circumflex accent character itself and ends with another circumflex accent character. + +The content of the subscript or superscript entry are all the characters between the two abovedefined symbols. The following shows a simple subscript entry: + +Example 1.1: + +```markdown +CO~2~ +``` + +```html +

CO2

+``` + +And the following example shows a simple superscript entry: + +Example 1.2: + +```markdown +a^2^ + b^2^ = c^2^ +``` + +```html +

a2 + b2 = c2

+``` + +Tilde and circumflex accent characters may be used to define a subscript or superscript entry even inside of words: + +Example 1.3: + +```markdown +Literally s^e^lfies tbh lo-fi. +``` + +```html +

Literally selfies tbh lo-fi.

+``` + +### 2. Subscript and superscript entries + +Subscript and superscript entries must always contain content, otherwise they shall not be parsed: + +Example 2.1: + +```markdown +a~~ +``` + +```html +

a~~

+``` + +Example 2.2: + +```markdown +b^^ +``` + +```html +

b^^

+``` + +A whitespace is not considered as content: + +Example 2.3: + +```markdown +a~ ~ +``` + +```html +

a~ ~

+``` + +Example 2.4: + +```markdown +b^ ^ +``` + +```html +

b^ ^

+``` + +Similarly, a subscript or superscript which contains only another subscript or superscript entry is not considered to have content, and should be treated as if the opening and closing characters were escaped: + +Example 2.5: + +```markdown +^^foo^^ +``` + +```html +

^foo^

+``` + +Subscript and superscript entries may contain more than one character: + +Example 2.6: + +```markdown +a^1+1^ + b^1+1^ = c^1+1^ +``` + +```html +

a1+1 + b1+1 = c1+1

+``` + +Subscript and superscript entries may not start with a space character: + +Example 2.7: + + +```markdown +a~ ~ + b~ ~ +``` + +```html +

a~ ~ + b~ ~

+``` + +Example 2.8: + +```markdown +a^ ^ + b^ ^ +``` + +```html +

a^ ^ + b^ ^

+``` + +Since subscript and superscript are inline elements, they cannot contain any block element, such as line breaks: + +Example 2.9: + +```markdown +a~b + +c~ +``` + +```html +

a~b

+ +

c~

+``` + +### 3. Escaping + +The tilde and circumflex accent characters can both be escaped by preceding them with a backslash character, in which case they shall not be treated as opening or closing a subscript or superscript entry. + +Example 3.1: + +```markdown +a\~no\~ +``` + +```html +

a~no~

+``` + +Example 3.2: + +```markdown +a\^no\^ +``` + +```html +

a^no^

+``` +An escaped tilde or circumflex accent character may be included inside a subscript or superscript entry, in which case it shall be included as part of the entry: + +Example 3.3: + +```markdown +a^\^^ +``` + +```html +

a^

+``` + +Lone tilde and circumflex accent characters, which not dot have matching character, do not need to be escaped: + +Example 3.4: + +```markdown +a ~ b +``` + +```html +

a ~ b

+``` + +### 4. Precedence rules + +Subscript and superscript entries have the same precedence as any other inline, except otherwise denoted. As such, they can contain other inline constructs, such as emphasis: + +Example 4.1: + +```markdown +my ^*important*^ superscript +``` + +```html +

my important superscript

+``` + +Example 4.2: + +```markdown +my ~*important*~ subscript +``` + +```html +

my important subscript

+``` + +Subscript and superscript entries may also be contained by other inline constructs: + +Example 4.3: + +```markdown +my *im~por~tant* subscript +``` + +```html +

my important subscript

+``` + +Subscript and superscript entries can also be self-contained: + +Example 4.4: + +```markdown +2^2^2^^ = 16 +``` + +```html +

222 = 16

+``` + +They can also be cross-contained: + +Example 4.5: + +```markdown +remark-~sub-^super^~ +``` + +```html +

remark-sub-super

+``` +