From e635f4f3e8d81e14c6ac1fc7208503522bae0808 Mon Sep 17 00:00:00 2001 From: Paul Robert Lloyd Date: Sun, 12 Nov 2023 16:59:28 +0000 Subject: [PATCH 1/5] docs: fix typo in parse.ts comment --- src/microformats/parse.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/microformats/parse.ts b/src/microformats/parse.ts index d8c6fe0a..117c36e0 100644 --- a/src/microformats/parse.ts +++ b/src/microformats/parse.ts @@ -86,7 +86,7 @@ export const parseMicroformat = ( } /** - * There is some ambigutity on how this should be handled. + * There is some ambiguity on how this should be handled. * At the moment, we're following other parsers and keeping `value` a string * and adding `html` as an undocumented property. */ From bc9d8d331f53e2d8cc2bc4c5996414b855c0c152 Mon Sep 17 00:00:00 2001 From: Paul Robert Lloyd Date: Sun, 12 Nov 2023 17:02:29 +0000 Subject: [PATCH 2/5] feat(experimental): add support for authorship (option flag) --- src/index.ts | 1 + src/types.ts | 1 + src/validator.ts | 6 +++++- test/scenarios.spec.ts | 7 ++++++- 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/index.ts b/src/index.ts index 58e9632b..691525d0 100644 --- a/src/index.ts +++ b/src/index.ts @@ -8,6 +8,7 @@ export interface Options { lang?: boolean; textContent?: boolean; metaformats?: boolean; + authorship?: boolean; }; } diff --git a/src/types.ts b/src/types.ts index 785b6372..9a3c4a36 100644 --- a/src/types.ts +++ b/src/types.ts @@ -14,6 +14,7 @@ export interface ParserOptions { lang?: boolean; textContent?: boolean; metaformats?: boolean; + authorship?: boolean; }; } diff --git a/src/validator.ts b/src/validator.ts index 4d962418..bc751e44 100644 --- a/src/validator.ts +++ b/src/validator.ts @@ -81,7 +81,7 @@ export const validator = ( if ("experimental" in options) { const experimental = assertIsObject( options.experimental, - ["lang", "textContent", "metaformats"], + ["lang", "textContent", "metaformats", "authorship"], "experimental", ); @@ -96,6 +96,10 @@ export const validator = ( if ("metaformats" in experimental) { assertIsBoolean(experimental.metaformats, "experimental.metaformats"); } + + if ("authorship" in experimental) { + assertIsBoolean(experimental.authorship, "experimental.authorship"); + } } }; diff --git a/test/scenarios.spec.ts b/test/scenarios.spec.ts index 672e3c6b..8bbca945 100644 --- a/test/scenarios.spec.ts +++ b/test/scenarios.spec.ts @@ -67,7 +67,12 @@ describe("mf2() // experimental scenarios", () => { it(`should correctly parse ${name}`, () => { const result = mf2(input, { ...options, - experimental: { lang: true, textContent: true, metaformats: true }, + experimental: { + lang: true, + textContent: true, + metaformats: true, + authorship: true, + }, }); expect(result).to.deep.equal(expected); }); From d87355ca70aafe93e2fd447436d7cf237d849742 Mon Sep 17 00:00:00 2001 From: Paul Robert Lloyd Date: Sun, 12 Nov 2023 17:02:46 +0000 Subject: [PATCH 3/5] feat(experimental): add support for authorship (demo option) --- demo/demo.js | 3 ++- demo/index.tpl.html | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/demo/demo.js b/demo/demo.js index 9ecd6e7e..04399b37 100644 --- a/demo/demo.js +++ b/demo/demo.js @@ -34,9 +34,10 @@ window.parseHtml = () => { const lang = document.getElementById("lang").checked; const textContent = document.getElementById("textContent").checked; const metaformats = document.getElementById("metaformats").checked; + const authorship = document.getElementById("authorship").checked; return parse(html, { baseUrl, - experimental: { lang, textContent, metaformats }, + experimental: { lang, textContent, metaformats, authorship }, }); }; diff --git a/demo/index.tpl.html b/demo/index.tpl.html index 1bfd10c8..ad8d32aa 100644 --- a/demo/index.tpl.html +++ b/demo/index.tpl.html @@ -83,6 +83,16 @@

Experimental options

/> Metaformats parsing +

From c01370de6acf79cfbc4f1d9c49336c5424470676 Mon Sep 17 00:00:00 2001 From: Paul Robert Lloyd Date: Sun, 12 Nov 2023 17:12:25 +0000 Subject: [PATCH 4/5] feat(experimental): add support for authorship (test suite) --- ...ship-h-card-with-rel-author-to-rel-me.html | 16 ++++ ...ship-h-card-with-rel-author-to-rel-me.json | 38 ++++++++++ ...rship-h-card-with-rel-author-to-u-url.html | 16 ++++ ...rship-h-card-with-rel-author-to-u-url.json | 38 ++++++++++ .../authorship-h-card-with-rel-author.html | 21 +++++ .../authorship-h-card-with-rel-author.json | 37 +++++++++ .../experimental/authorship-h-card.html | 19 +++++ .../experimental/authorship-h-card.json | 31 ++++++++ .../experimental/authorship-h-feed.html | 30 ++++++++ .../experimental/authorship-h-feed.json | 76 +++++++++++++++++++ .../experimental/authorship-p-author.html | 16 ++++ .../experimental/authorship-p-author.json | 20 +++++ 12 files changed, 358 insertions(+) create mode 100644 test/suites/experimental/authorship-h-card-with-rel-author-to-rel-me.html create mode 100644 test/suites/experimental/authorship-h-card-with-rel-author-to-rel-me.json create mode 100644 test/suites/experimental/authorship-h-card-with-rel-author-to-u-url.html create mode 100644 test/suites/experimental/authorship-h-card-with-rel-author-to-u-url.json create mode 100644 test/suites/experimental/authorship-h-card-with-rel-author.html create mode 100644 test/suites/experimental/authorship-h-card-with-rel-author.json create mode 100644 test/suites/experimental/authorship-h-card.html create mode 100644 test/suites/experimental/authorship-h-card.json create mode 100644 test/suites/experimental/authorship-h-feed.html create mode 100644 test/suites/experimental/authorship-h-feed.json create mode 100644 test/suites/experimental/authorship-p-author.html create mode 100644 test/suites/experimental/authorship-p-author.json diff --git a/test/suites/experimental/authorship-h-card-with-rel-author-to-rel-me.html b/test/suites/experimental/authorship-h-card-with-rel-author-to-rel-me.html new file mode 100644 index 00000000..f7a39387 --- /dev/null +++ b/test/suites/experimental/authorship-h-card-with-rel-author-to-rel-me.html @@ -0,0 +1,16 @@ + + + + + + Entry with rel=author (links to h-card with rel=me) + + +
+

A woman must have money and a room of her own if she is to write fiction.

+
+ + + diff --git a/test/suites/experimental/authorship-h-card-with-rel-author-to-rel-me.json b/test/suites/experimental/authorship-h-card-with-rel-author-to-rel-me.json new file mode 100644 index 00000000..7dc6d92d --- /dev/null +++ b/test/suites/experimental/authorship-h-card-with-rel-author-to-rel-me.json @@ -0,0 +1,38 @@ +{ + "items": [ + { + "type": ["h-entry"], + "lang": "en", + "properties": { + "author": [ + { + "type": ["h-card"], + "lang": "en", + "properties": { + "name": ["Virginia Woolf"], + "photo": ["https://virginia.example/photo.jpg"], + "url": ["https://virginia.example"] + }, + "value": "Virginia Woolf" + } + ], + "content": [ + { + "value": "A woman must have money and a room of her own if she is to write fiction.", + "lang": "en", + "html": "

A woman must have money and a room of her own if she is to write fiction.

" + } + ] + } + } + ], + "rel-urls": { + "https://virginia.woolf": { + "rels": ["author"], + "text": "About Virginia Woolf" + } + }, + "rels": { + "author": ["https://virginia.woolf"] + } +} diff --git a/test/suites/experimental/authorship-h-card-with-rel-author-to-u-url.html b/test/suites/experimental/authorship-h-card-with-rel-author-to-u-url.html new file mode 100644 index 00000000..8ddc1ee1 --- /dev/null +++ b/test/suites/experimental/authorship-h-card-with-rel-author-to-u-url.html @@ -0,0 +1,16 @@ + + + + + + Entry with rel=author (links to h-card with u-url and u-uid) + + +
+

古池や
蛙飛び込む
水の音

+
+ + + diff --git a/test/suites/experimental/authorship-h-card-with-rel-author-to-u-url.json b/test/suites/experimental/authorship-h-card-with-rel-author-to-u-url.json new file mode 100644 index 00000000..c12dde7e --- /dev/null +++ b/test/suites/experimental/authorship-h-card-with-rel-author-to-u-url.json @@ -0,0 +1,38 @@ +{ + "items": [ + { + "type": ["h-entry"], + "lang": "en", + "properties": { + "author": [ + { + "type": ["h-card"], + "lang": "en", + "properties": { + "name": ["Basho"], + "photo": ["https://basho.example/photo.jpg"], + "url": ["https://basho.example"] + }, + "value": "Basho" + } + ], + "content": [ + { + "value": "古池や\n蛙飛び込む\n水の音", + "lang": "jp", + "html": "

古池や
蛙飛び込む
水の音

" + } + ] + } + } + ], + "rel-urls": { + "https://basho.example": { + "rels": ["author"], + "text": "About Basho" + } + }, + "rels": { + "author": ["https://basho.example"] + } +} diff --git a/test/suites/experimental/authorship-h-card-with-rel-author.html b/test/suites/experimental/authorship-h-card-with-rel-author.html new file mode 100644 index 00000000..72190360 --- /dev/null +++ b/test/suites/experimental/authorship-h-card-with-rel-author.html @@ -0,0 +1,21 @@ + + + + + + Entry with separate h-card and rel=author + + +
+

For one who sees the distinction, there is no further confusing of the mind with the self.

+
+

+ + Patañjali + +

+ + + diff --git a/test/suites/experimental/authorship-h-card-with-rel-author.json b/test/suites/experimental/authorship-h-card-with-rel-author.json new file mode 100644 index 00000000..d416ddf7 --- /dev/null +++ b/test/suites/experimental/authorship-h-card-with-rel-author.json @@ -0,0 +1,37 @@ +{ + "items": [ + { + "type": ["h-entry"], + "lang": "en", + "properties": { + "author": [ + { + "type": ["h-card"], + "lang": "en", + "properties": { + "name": ["Patañjali"], + "url": ["https://patanjali.example"] + }, + "value": "Patañjali" + } + ], + "content": [ + { + "value": "For one who sees the distinction, there is no further confusing of the mind with the self.", + "lang": "en", + "html": "

For one who sees the distinction, there is no further confusing of the mind with the self.

" + } + ] + } + } + ], + "rel-urls": { + "https://patanjali.example": { + "rels": ["author"], + "text": "About Patañjali" + } + }, + "rels": { + "author": ["https://patanjali.example"] + } +} diff --git a/test/suites/experimental/authorship-h-card.html b/test/suites/experimental/authorship-h-card.html new file mode 100644 index 00000000..1675d431 --- /dev/null +++ b/test/suites/experimental/authorship-h-card.html @@ -0,0 +1,19 @@ + + + + + + Entry with h-card + + +
+

Even in the house of Hades there is left something, a soul and an image, but there is no real heart of life in it.

+
+ + + diff --git a/test/suites/experimental/authorship-h-card.json b/test/suites/experimental/authorship-h-card.json new file mode 100644 index 00000000..673d02e3 --- /dev/null +++ b/test/suites/experimental/authorship-h-card.json @@ -0,0 +1,31 @@ +{ + "items": [ + { + "type": ["h-entry"], + "lang": "en", + "properties": { + "author": [ + { + "type": ["h-card"], + "lang": "en", + "properties": { + "name": ["Homer"], + "photo": ["https://homer.example/photo.jpg"], + "url": ["https://homer.example"] + }, + "value": "Homer" + } + ], + "content": [ + { + "value": "Even in the house of Hades there is left something, a soul and an image, but there is no real heart of life in it.", + "lang": "en", + "html": "

Even in the house of Hades there is left something, a soul and an image, but there is no real heart of life in it.

" + } + ] + } + } + ], + "rel-urls": {}, + "rels": {} +} diff --git a/test/suites/experimental/authorship-h-feed.html b/test/suites/experimental/authorship-h-feed.html new file mode 100644 index 00000000..97ecd685 --- /dev/null +++ b/test/suites/experimental/authorship-h-feed.html @@ -0,0 +1,30 @@ + + + + + + Feed with h-card + + +
+
+

Quality is not an act, it is a habit.

+
+
+
+
+

Whosoever is delighted in solitude is either a wild beast or a god.

+
+
+ + + diff --git a/test/suites/experimental/authorship-h-feed.json b/test/suites/experimental/authorship-h-feed.json new file mode 100644 index 00000000..607f8ab3 --- /dev/null +++ b/test/suites/experimental/authorship-h-feed.json @@ -0,0 +1,76 @@ +{ + "items": [ + { + "type": ["h-feed"], + "lang": "en", + "properties": { + "author": [ + { + "type": ["h-card"], + "lang": "en", + "properties": { + "name": ["Aristotle"], + "photo": ["https://aristotle.example/photo.jpg"], + "url": ["https://aristotle.example"] + }, + "value": "Aristotle" + } + ] + }, + "children": [ + { + "type": ["h-entry"], + "lang": "en", + "properties": { + "author": [ + { + "type": ["h-card"], + "lang": "en", + "properties": { + "name": ["Aristotle"], + "photo": ["https://aristotle.example/photo.jpg"], + "url": ["https://aristotle.example"] + }, + "value": "Aristotle" + } + ], + "content": [ + { + "html": "

Quality is not an act, it is a habit.

", + "lang": "en", + "value": "Quality is not an act, it is a habit." + } + ] + } + }, + { + "type": ["h-entry"], + "lang": "en", + "properties": { + "author": [ + { + "type": ["h-card"], + "lang": "en", + "properties": { + "name": ["Aristotle"], + "photo": ["https://aristotle.example/photo.jpg"], + "url": ["https://aristotle.example"] + }, + "value": "Aristotle" + } + ], + "content": [ + { + "html": "

Whosoever is delighted in solitude is either a wild beast or a god.

", + "lang": "en", + "value": "Whosoever is delighted in solitude is either a wild beast or a god." + } + ] + } + } + ] + } + ], + "rel-urls": {}, + "rels": {} +} diff --git a/test/suites/experimental/authorship-p-author.html b/test/suites/experimental/authorship-p-author.html new file mode 100644 index 00000000..497682bc --- /dev/null +++ b/test/suites/experimental/authorship-p-author.html @@ -0,0 +1,16 @@ + + + + + + Entry with p-author + + +
+

To be, or not to be: that is the question.

+
+
+

William Shakespeare

+
+ + diff --git a/test/suites/experimental/authorship-p-author.json b/test/suites/experimental/authorship-p-author.json new file mode 100644 index 00000000..35bb5433 --- /dev/null +++ b/test/suites/experimental/authorship-p-author.json @@ -0,0 +1,20 @@ +{ + "items": [ + { + "type": ["h-entry"], + "lang": "en", + "properties": { + "author": ["William Shakespeare"], + "content": [ + { + "value": "To be, or not to be: that is the question.", + "lang": "en", + "html": "

To be, or not to be: that is the question.

" + } + ] + } + } + ], + "rel-urls": {}, + "rels": {} +} From d190378cb71b7578e7502bbbe6a1fb4eead270fd Mon Sep 17 00:00:00 2001 From: Paul Robert Lloyd Date: Sun, 12 Nov 2023 21:28:14 +0000 Subject: [PATCH 5/5] feat(experimental): add support for authorship (wip) --- src/helpers/authorship.ts | 108 ++++++++++++++++++++++++++++++++++++++ src/microformats/parse.ts | 10 ++++ src/parser.ts | 1 + src/types.ts | 15 +++++- 4 files changed, 133 insertions(+), 1 deletion(-) create mode 100644 src/helpers/authorship.ts diff --git a/src/helpers/authorship.ts b/src/helpers/authorship.ts new file mode 100644 index 00000000..47c40f7e --- /dev/null +++ b/src/helpers/authorship.ts @@ -0,0 +1,108 @@ +import { + Author, + Image, + MicroformatProperty, + MicroformatRoot, + Rels, +} from "../types"; + +function getPlainText(values: MicroformatProperty[]): string | null { + if (values.length === 0) { + return null; + } + + const value = values[0] as Author; + let plainText: string | null; + if (value.value !== undefined && typeof value.value === "string") { + plainText = value.value; + } else if (typeof value === "string") { + plainText = value; + } else { + plainText = null; + } + + return plainText && plainText.trim(); +} + +const parseAuthor = (hCard: MicroformatRoot) => { + // TODO: Figure out how to stop TypeScript complaining about missing `value` + const result: Author = {}; + + if (hCard.properties !== undefined) { + // Use first (or only) name + const names = hCard.properties.name as string[]; + if (names?.length > 0) { + result.name = names[0]; + } + + // Use first (or only) photo + const photos = hCard.properties.photo as Image[]; + if (photos?.length > 0) { + const photo = getPlainText(photos); + if (photo) { + result.photo = photo; + } + } + + // Use first (or only) URL + const urls = hCard.properties.url as string[]; + if (urls?.length > 0) { + result.url = urls[0]; + } + } else if (hCard) { + if (URL.canParse(String(hCard))) { + result.url = String(hCard); + } else { + result.name = String(hCard); + } + } + + return result as Author; +}; + +const findEntryAuthor = (hEntry: MicroformatRoot) => { + const values = hEntry.properties.author || []; + + if (Object.keys(values).length === 0) { + return; + } + + return parseAuthor(values[0] as MicroformatRoot); +}; + +const findFeedAuthor = () => false; + +export const findAuthor = async (item: MicroformatRoot, rels: Rels) => { + // 1. If no `h-entry` then there’s no post to find authorship for. + const itemIsEntry = item.type && item.type[0] === "h-entry"; + if (!itemIsEntry) { + return false; + } + + // 2. Parse the `h-entry` + const entryAuthor = findEntryAuthor(item); + const feedAuthor = findFeedAuthor(); // TODO + + // 3 & 4. Return author in `h-entry`, else find author in parent `h-feed` + const author = entryAuthor ? entryAuthor : feedAuthor; + + // 5. Return `author` if `h-card` + const authorIsCard = author && author.type[0] === "h-card"; + if (authorIsCard) { + return author; + } + + // 6. Use `h-card` fetched from rel=author + const authorPage = author.properties?.url || rels.author; + if (authorPage) { + // Fetch `authorPage` and parse result using `parseMicroformat` + // This is an async function, which would bubble up to the parent function + } + + // 7. From the parsed `authorPage`, return the first `h-card` that either: + // * Has a value for `u-url` (or `u-uid`) that matches the `authorPage` URL + // * Has a value for `u-url` that matches a `rel=me` on the `authorPage` + + // 8. Else, no deterministic author can be found + return author; +}; diff --git a/src/microformats/parse.ts b/src/microformats/parse.ts index 117c36e0..2fef6dd8 100644 --- a/src/microformats/parse.ts +++ b/src/microformats/parse.ts @@ -5,6 +5,7 @@ import { Element, } from "../types"; import { microformatProperties } from "./properties"; +import { findAuthor } from "../helpers/authorship"; import { textContent } from "../helpers/textContent"; import { getAttributeValue, getClassNames } from "../helpers/attributes"; import { findChildren } from "../helpers/findChildren"; @@ -66,6 +67,15 @@ export const parseMicroformat = ( item.lang = lang; } + if (isEnabled(options, "authorship")) { + const author = findAuthor(item, options.rels); + + if (author) { + console.log("author", author); + // item.properties.author = author; + } + } + if (children.length) { item.children = children.map((child) => parseMicroformat(child, { ...options, inherited }), diff --git a/src/parser.ts b/src/parser.ts index 339be208..ae7fc213 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -23,6 +23,7 @@ export const parser = ( baseUrl, idRefs, inherited: { roots: [], lang }, + rels, }; let items = findChildren(doc, isMicroformatRoot).map((mf) => parseMicroformat(mf, parsingOptions), diff --git a/src/types.ts b/src/types.ts index 9a3c4a36..14f8e7dd 100644 --- a/src/types.ts +++ b/src/types.ts @@ -27,6 +27,7 @@ export interface ParsingOptions extends ParserOptions { roots: BackcompatRoot[]; lang?: string; }; + rels: Rels; } export interface ParsedDocument { @@ -46,6 +47,13 @@ export interface MicroformatRoot { value?: MicroformatProperty; } +export interface Author { + name?: string; + value: string; + photo?: string; + url?: string; +} + export interface Image { alt: string; value?: string; @@ -57,7 +65,12 @@ export interface Html { lang?: string; } -export type MicroformatProperty = MicroformatRoot | Image | Html | string; +export type MicroformatProperty = + | MicroformatRoot + | Author + | Image + | Html + | string; export type Rels = Record;