Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(experimental): authorship #292

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion demo/demo.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,10 @@ window.parseHtml = () => {
const lang = document.getElementById("lang").checked;
const textContent = document.getElementById("textContent").checked;
const metaformats = document.getElementById("metaformats").checked;
const authorship = document.getElementById("authorship").checked;

return parse(html, {
baseUrl,
experimental: { lang, textContent, metaformats },
experimental: { lang, textContent, metaformats, authorship },
});
};
10 changes: 10 additions & 0 deletions demo/index.tpl.html
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,16 @@ <h3>Experimental options</h3>
/>
<span>Metaformats parsing</span>
</label>
<label>
<input
type="checkbox"
name="authorship"
id="authorship"
value="true"
checked
/>
<span>Authorship discovery</span>
</label>
</p>

<div class="submit">
Expand Down
108 changes: 108 additions & 0 deletions src/helpers/authorship.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import {
Author,
Image,
MicroformatProperty,
MicroformatRoot,
Rels,
} from "../types";

function getPlainText(values: MicroformatProperty[]): string | null {
if (values.length === 0) {
return null;
}

const value = values[0] as Author;
let plainText: string | null;
if (value.value !== undefined && typeof value.value === "string") {
plainText = value.value;
} else if (typeof value === "string") {
plainText = value;
} else {
plainText = null;
}

return plainText && plainText.trim();
}

const parseAuthor = (hCard: MicroformatRoot) => {
// TODO: Figure out how to stop TypeScript complaining about missing `value`
const result: Author = {};

if (hCard.properties !== undefined) {
// Use first (or only) name
const names = hCard.properties.name as string[];
if (names?.length > 0) {
result.name = names[0];
}

// Use first (or only) photo
const photos = hCard.properties.photo as Image[];
if (photos?.length > 0) {
const photo = getPlainText(photos);
if (photo) {
result.photo = photo;
}
}

// Use first (or only) URL
const urls = hCard.properties.url as string[];
if (urls?.length > 0) {
result.url = urls[0];
}
} else if (hCard) {
if (URL.canParse(String(hCard))) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, think I confused myself with another Node-only project supporting Node v20. I’ll create a utility function that achieves similar.

result.url = String(hCard);
} else {
result.name = String(hCard);
}
}

return result as Author;
};

const findEntryAuthor = (hEntry: MicroformatRoot) => {
const values = hEntry.properties.author || [];

if (Object.keys(values).length === 0) {
return;
}

return parseAuthor(values[0] as MicroformatRoot);
};

const findFeedAuthor = () => false;

export const findAuthor = async (item: MicroformatRoot, rels: Rels) => {
// 1. If no `h-entry` then there’s no post to find authorship for.
const itemIsEntry = item.type && item.type[0] === "h-entry";
if (!itemIsEntry) {
return false;
}

// 2. Parse the `h-entry`
const entryAuthor = findEntryAuthor(item);
const feedAuthor = findFeedAuthor(); // TODO

// 3 & 4. Return author in `h-entry`, else find author in parent `h-feed`
const author = entryAuthor ? entryAuthor : feedAuthor;

// 5. Return `author` if `h-card`
const authorIsCard = author && author.type[0] === "h-card";
if (authorIsCard) {
return author;
}

// 6. Use `h-card` fetched from rel=author
const authorPage = author.properties?.url || rels.author;
if (authorPage) {
// Fetch `authorPage` and parse result using `parseMicroformat`
// This is an async function, which would bubble up to the parent function
}

// 7. From the parsed `authorPage`, return the first `h-card` that either:
// * Has a value for `u-url` (or `u-uid`) that matches the `authorPage` URL
// * Has a value for `u-url` that matches a `rel=me` on the `authorPage`

// 8. Else, no deterministic author can be found
return author;
};
1 change: 1 addition & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export interface Options {
lang?: boolean;
textContent?: boolean;
metaformats?: boolean;
authorship?: boolean;
};
}

Expand Down
12 changes: 11 additions & 1 deletion src/microformats/parse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {
Element,
} from "../types";
import { microformatProperties } from "./properties";
import { findAuthor } from "../helpers/authorship";
import { textContent } from "../helpers/textContent";
import { getAttributeValue, getClassNames } from "../helpers/attributes";
import { findChildren } from "../helpers/findChildren";
Expand Down Expand Up @@ -66,6 +67,15 @@ export const parseMicroformat = (
item.lang = lang;
}

if (isEnabled(options, "authorship")) {
const author = findAuthor(item, options.rels);

if (author) {
console.log("author", author);
// item.properties.author = author;
}
}

if (children.length) {
item.children = children.map((child) =>
parseMicroformat(child, { ...options, inherited }),
Expand All @@ -86,7 +96,7 @@ export const parseMicroformat = (
}

/**
* There is some ambigutity on how this should be handled.
* There is some ambiguity on how this should be handled.
* At the moment, we're following other parsers and keeping `value` a string
* and adding `html` as an undocumented property.
*/
Expand Down
1 change: 1 addition & 0 deletions src/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ export const parser = (
baseUrl,
idRefs,
inherited: { roots: [], lang },
rels,
};
let items = findChildren(doc, isMicroformatRoot).map((mf) =>
parseMicroformat(mf, parsingOptions),
Expand Down
16 changes: 15 additions & 1 deletion src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export interface ParserOptions {
lang?: boolean;
textContent?: boolean;
metaformats?: boolean;
authorship?: boolean;
};
}

Expand All @@ -26,6 +27,7 @@ export interface ParsingOptions extends ParserOptions {
roots: BackcompatRoot[];
lang?: string;
};
rels: Rels;
}

export interface ParsedDocument {
Expand All @@ -45,6 +47,13 @@ export interface MicroformatRoot {
value?: MicroformatProperty;
}

export interface Author {
name?: string;
value: string;
photo?: string;
url?: string;
}

export interface Image {
alt: string;
value?: string;
Expand All @@ -56,7 +65,12 @@ export interface Html {
lang?: string;
}

export type MicroformatProperty = MicroformatRoot | Image | Html | string;
export type MicroformatProperty =
| MicroformatRoot
| Author
| Image
| Html
| string;

export type Rels = Record<string, string[]>;

Expand Down
6 changes: 5 additions & 1 deletion src/validator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ export const validator = (
if ("experimental" in options) {
const experimental = assertIsObject(
options.experimental,
["lang", "textContent", "metaformats"],
["lang", "textContent", "metaformats", "authorship"],
"experimental",
);

Expand All @@ -96,6 +96,10 @@ export const validator = (
if ("metaformats" in experimental) {
assertIsBoolean(experimental.metaformats, "experimental.metaformats");
}

if ("authorship" in experimental) {
assertIsBoolean(experimental.authorship, "experimental.authorship");
}
}
};

Expand Down
7 changes: 6 additions & 1 deletion test/scenarios.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,12 @@ describe("mf2() // experimental scenarios", () => {
it(`should correctly parse ${name}`, () => {
const result = mf2(input, {
...options,
experimental: { lang: true, textContent: true, metaformats: true },
experimental: {
lang: true,
textContent: true,
metaformats: true,
authorship: true,
},
});
expect(result).to.deep.equal(expected);
});
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<!-- This next entry is by Virginia Woolf. Her URL is https://virginia.example, and her photo is at https://virginia.example/photo.jpg. -->

<html lang="en">
<head>
<meta charset="utf-8" />
<title>Entry with rel=author (links to h-card with rel=me)</title>
</head>
<body class="h-entry">
<div class="e-content">
<p>A woman must have money and a room of her own if she is to write fiction.</p>
</div>
<footer>
<a href="https://virginia.woolf" rel="author">About Virginia Woolf</a>
</footer>
</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"items": [
{
"type": ["h-entry"],
"lang": "en",
"properties": {
"author": [
{
"type": ["h-card"],
"lang": "en",
"properties": {
"name": ["Virginia Woolf"],
"photo": ["https://virginia.example/photo.jpg"],
"url": ["https://virginia.example"]
},
"value": "Virginia Woolf"
}
],
"content": [
{
"value": "A woman must have money and a room of her own if she is to write fiction.",
"lang": "en",
"html": "<p>A woman must have money and a room of her own if she is to write fiction.</p>"
}
]
}
}
],
"rel-urls": {
"https://virginia.woolf": {
"rels": ["author"],
"text": "About Virginia Woolf"
}
},
"rels": {
"author": ["https://virginia.woolf"]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<!-- This is a haiku by Basho. His URL is https://basho.example, and his photo is at https://basho.example/photo.jpg. -->

<html lang="en">
<head>
<meta charset="utf-8" />
<title>Entry with rel=author (links to h-card with u-url and u-uid)</title>
</head>
<body class="h-entry">
<div class="e-content" lang="jp">
<p>古池や<br />蛙飛び込む<br />水の音</p>
</div>
<footer>
<a href="https://basho.example" rel="author">About Basho</a>
</footer>
</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"items": [
{
"type": ["h-entry"],
"lang": "en",
"properties": {
"author": [
{
"type": ["h-card"],
"lang": "en",
"properties": {
"name": ["Basho"],
"photo": ["https://basho.example/photo.jpg"],
"url": ["https://basho.example"]
},
"value": "Basho"
}
],
"content": [
{
"value": "古池や\n蛙飛び込む\n水の音",
"lang": "jp",
"html": "<p>古池や<br>蛙飛び込む<br>水の音</p>"
}
]
}
}
],
"rel-urls": {
"https://basho.example": {
"rels": ["author"],
"text": "About Basho"
}
},
"rels": {
"author": ["https://basho.example"]
}
}
21 changes: 21 additions & 0 deletions test/suites/experimental/authorship-h-card-with-rel-author.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<!-- This is an entry by Patañjali. His URL is https://patanjali.example, and he uses no photo. -->

<html lang="en">
<head>
<meta charset="utf-8" />
<title>Entry with separate h-card and rel=author</title>
</head>
<body class="h-entry">
<div class="e-content">
<p>For one who sees the distinction, there is no further confusing of the mind with the self.</p>
</div>
<p class="p-author h-card">
<a href="https://patanjali.example" class="u-url">
<span class="p-name">Patañjali</span>
</a>
</p>
<footer>
<a href="https://patanjali.example" rel="author">About Patañjali</a>
</footer>
</body>
</html>
Loading
Loading