From ca840defda6d4b6e36528dc121e94707d88410cd Mon Sep 17 00:00:00 2001
From: Anthony Ciccarello
Date: Mon, 10 Jul 2023 01:15:13 -0700
Subject: [PATCH] implement metaformats parsing
Closes #224
---
README.md | 4 +
demo/demo.js | 6 +-
demo/index.tpl.html | 10 +
src/helpers/metaformats.ts | 246 ++++++++++++++++++
src/helpers/nodeMatchers.ts | 5 +
src/microformats/property.ts | 3 +
src/parser.ts | 13 +-
src/types.ts | 5 +-
src/validator.ts | 12 +-
test/scenarios.spec.ts | 17 +-
.../metaformats-missing-head.html | 8 +
.../metaformats-missing-head.json | 5 +
.../experimental/metaformats-og-article.html | 25 +-
.../experimental/metaformats-og-article.json | 29 ++-
.../metaformats-og-audio-soundcloud.html | 67 +++++
.../metaformats-og-audio-soundcloud.json | 22 ++
.../metaformats-og-profile-linkedin.html | 207 +++++++++++++++
.../metaformats-og-profile-linkedin.json | 31 +++
.../metaformats-og-video-vimeo.html | 68 +++++
.../metaformats-og-video-vimeo.json | 34 +++
.../experimental/metaformats-prefer-mf.html | 24 ++
.../experimental/metaformats-prefer-mf.json | 15 ++
.../experimental/metaformats-standard.html | 19 ++
.../experimental/metaformats-standard.json | 25 ++
.../metaformats-twitter-article.html | 19 ++
.../metaformats-twitter-article.json | 23 ++
26 files changed, 914 insertions(+), 28 deletions(-)
create mode 100644 src/helpers/metaformats.ts
create mode 100644 test/suites/experimental/metaformats-missing-head.html
create mode 100644 test/suites/experimental/metaformats-missing-head.json
create mode 100644 test/suites/experimental/metaformats-og-audio-soundcloud.html
create mode 100644 test/suites/experimental/metaformats-og-audio-soundcloud.json
create mode 100644 test/suites/experimental/metaformats-og-profile-linkedin.html
create mode 100644 test/suites/experimental/metaformats-og-profile-linkedin.json
create mode 100644 test/suites/experimental/metaformats-og-video-vimeo.html
create mode 100644 test/suites/experimental/metaformats-og-video-vimeo.json
create mode 100644 test/suites/experimental/metaformats-prefer-mf.html
create mode 100644 test/suites/experimental/metaformats-prefer-mf.json
create mode 100644 test/suites/experimental/metaformats-standard.html
create mode 100644 test/suites/experimental/metaformats-standard.json
create mode 100644 test/suites/experimental/metaformats-twitter-article.html
create mode 100644 test/suites/experimental/metaformats-twitter-article.json
diff --git a/README.md b/README.md
index f1b2e03b..5f702264 100644
--- a/README.md
+++ b/README.md
@@ -109,6 +109,10 @@ These are sourced from the element themselves, a parent microformat, the HTML do
When parsing microformats for text content, all the consecutive whitespace is collapsed into a single space. `
` and `` tags are treated as line breaks.
+#### `metaformats`
+
+Enables fallback to [metaformats](https://microformats.org/wiki/metaformats) parsing which looks at `` tags to infer content.
+
## Contributing
See our [contributing guidelines](./CONTRIBUTING.md) for more information.
diff --git a/demo/demo.js b/demo/demo.js
index bd43e018..da407322 100644
--- a/demo/demo.js
+++ b/demo/demo.js
@@ -32,6 +32,10 @@ window.parseHtml = () => {
const baseUrl = document.getElementById("base-url").value;
const lang = document.getElementById("lang").checked;
const textContent = document.getElementById("textContent").checked;
+ const metaformats = document.getElementById("metaformats").checked;
- return parse(html, { baseUrl, experimental: { lang, textContent } });
+ return parse(html, {
+ baseUrl,
+ experimental: { lang, textContent, metaformats },
+ });
};
diff --git a/demo/index.tpl.html b/demo/index.tpl.html
index e90b34d4..ce59a30f 100644
--- a/demo/index.tpl.html
+++ b/demo/index.tpl.html
@@ -72,6 +72,16 @@
Experimental options
/>
Better text content
+
diff --git a/src/helpers/metaformats.ts b/src/helpers/metaformats.ts
new file mode 100644
index 00000000..a8faf24e
--- /dev/null
+++ b/src/helpers/metaformats.ts
@@ -0,0 +1,246 @@
+import { Document, Element } from "parse5";
+
+import { MicroformatRoot, ParsingOptions } from "../types";
+import {
+ getAttributeIfTag,
+ getAttributeValue,
+ hasRelIntersect,
+} from "./attributes";
+import { isEnabled } from "./experimental";
+import { isElement, isTag } from "./nodeMatchers";
+
+/** Special key for title tag in meta collection */
+const TITLE_TAG_KEY = "
";
+const CANONICAL_URL_KEY = "";
+const MEDIA_TYPES = ["image", "video", "audio"];
+
+interface ComplexMediaMeta {
+ value: string;
+ alt: string;
+}
+type MetaTagContent = string | ComplexMediaMeta;
+
+/**
+ * Creates a normalized store for meta tags
+ */
+const initializeMetaContentCollection = (): MetaContentCollection => {
+ /**
+ * Collection of all relevant meta tag content
+ * Since tag order isn't guaranteed, need to collect all value before applying defaults
+ */
+ const metaContent: Record = {};
+
+ /**
+ * Gets the values of the first property found
+ * @param properties Array of properties to look for, preferred item first
+ */
+ const get = (properties: string[]) => {
+ for (const key of properties) {
+ if (metaContent[key]) {
+ return metaContent[key];
+ }
+ }
+ return;
+ };
+
+ /**
+ * Stores meta tag values.
+ *
+ * Includes following normalization rules:
+ * - Duplicates are removed from repeated (array) tags
+ * - src, url, and secure_url media tags are treated same as base (e.g. og:image:url -> og:image)
+ * - Alt text is added as property on last image url
+ */
+ const set = (key: string, value: string) => {
+ // Split tag name to normalize values like "og:video:url"
+ const [domain, type, subtype] = key.split(":");
+
+ // Media tags specific parsing
+ if (
+ (domain === "og" || domain === "twitter") &&
+ MEDIA_TYPES.includes(type)
+ ) {
+ if (subtype === "alt") {
+ const existingMedia = metaContent[`${domain}:${type}`];
+
+ if (existingMedia?.length) {
+ const last = existingMedia.pop();
+
+ if (typeof last === "string") {
+ existingMedia.push({ value: last, alt: value });
+ } else if (last) {
+ // Found duplicate alt text tag so re-inserting existing
+ // last should always be object. if condition added for types
+ existingMedia.push(last);
+ }
+ }
+
+ return; // Stop as alt text is already added
+ } else if (["url", "secure_url"].includes(subtype)) {
+ // Mutate key to normalize different url values
+ // Duplicates will be cleaned up on insertion
+ key = `${domain}:${type}`;
+ }
+ }
+ const existing = metaContent[key];
+
+ if (existing) {
+ const isDuplicate = existing
+ .map((existingValue) =>
+ typeof existingValue === "string"
+ ? existingValue
+ : existingValue.value
+ )
+ .some((existingValue) => value === existingValue);
+
+ if (!isDuplicate) {
+ metaContent[key].push(value);
+ } // Else ignore duplicates
+ } else {
+ metaContent[key] = [value];
+ }
+ };
+
+ return {
+ metaContent,
+ set,
+ get,
+ };
+};
+
+interface MetaContentCollection {
+ metaContent: Record;
+ set: (key: string, value: string) => void;
+ get: (properties: string[]) => MetaTagContent[] | undefined;
+}
+
+const collectMetaTags = (head: Element): MetaContentCollection => {
+ const metaTags = initializeMetaContentCollection();
+
+ for (const i in head.childNodes) {
+ const child = head.childNodes[i];
+
+ if (!isElement(child)) {
+ continue;
+ }
+
+ const content = getAttributeIfTag(child, ["meta"], "content");
+ if (content) {
+ // Tags keys usually use the "name" attribute but open graph uses "property"
+ // Consider them separately in case a meta tag uses both
+ // e.g.
+ const property = getAttributeValue(child, "property");
+ if (property) {
+ metaTags.set(property, content);
+ }
+
+ const name = getAttributeValue(child, "name");
+ if (name && name !== property) {
+ metaTags.set(name, content);
+ }
+ } else if (child.tagName === "title" && "value" in child.childNodes[0]) {
+ metaTags.set(TITLE_TAG_KEY, child.childNodes[0].value);
+ } else if (
+ child.tagName === "link" &&
+ hasRelIntersect(child, ["canonical"])
+ ) {
+ const canonicalUrl = getAttributeValue(child, "href");
+ if (canonicalUrl) {
+ metaTags.set(CANONICAL_URL_KEY, canonicalUrl);
+ }
+ }
+ }
+ return metaTags;
+};
+
+/**
+ * Collect meta content into a microformat object
+ * @param metaTags Previously parsed meta tag collection
+ * @param options Library parsing options
+ */
+const combineRoot = (
+ metaTags: MetaContentCollection,
+ options: ParsingOptions
+): MicroformatRoot[] => {
+ const item: MicroformatRoot = { properties: {} };
+
+ if (isEnabled(options, "lang") && options.inherited.lang) {
+ item.lang = options.inherited.lang;
+ }
+
+ /**
+ * Define property on microformat root if values are found
+ * @param property Key of microformats property
+ * @param value Array of values for the property. Empty and undefined values are not added.
+ */
+ const setMicroformatProp = (
+ property: string,
+ value: MetaTagContent[] = []
+ ) => {
+ const filteredValue = value.filter(Boolean);
+ if (filteredValue.length) {
+ item.properties[property] = filteredValue;
+ }
+ };
+
+ let impliedRootClass = "h-entry";
+ const [ogType] = metaTags.get(["og:type"]) ?? [];
+ if (ogType && typeof ogType === "string") {
+ if (ogType === "profile") {
+ impliedRootClass = "h-card";
+ } else if (["music", "video"].some((type) => ogType.includes(type))) {
+ impliedRootClass = "h-cite";
+ } // else h-entry
+ }
+ item.type = [impliedRootClass];
+
+ setMicroformatProp(
+ "name",
+ metaTags.get(["og:title", "twitter:title", TITLE_TAG_KEY])
+ );
+ setMicroformatProp(
+ "summary",
+ metaTags.get(["og:description", "twitter:description", "description"])
+ );
+ setMicroformatProp("featured", metaTags.get(["og:image", "twitter:image"]));
+ setMicroformatProp("video", metaTags.get(["og:video", "twitter:video"]));
+ setMicroformatProp("audio", metaTags.get(["og:audio", "twitter:audio"]));
+ setMicroformatProp(
+ "published",
+ metaTags.get(["article:published_time", "date"])
+ );
+ setMicroformatProp("updated", metaTags.get(["article:modified_time"]));
+ setMicroformatProp("author", metaTags.get(["article:author", "author"]));
+ setMicroformatProp("url", metaTags.get(["og:url", CANONICAL_URL_KEY]));
+
+ // Publication properties useful for h-cite
+ setMicroformatProp(
+ "publication",
+ metaTags.get(["og:site_name", "publisher"])
+ );
+
+ if (impliedRootClass === "h-card") {
+ setMicroformatProp("given-name", metaTags.get(["profile:first_name"]));
+ setMicroformatProp("family-name", metaTags.get(["profile:last_name"]));
+ }
+
+ if (Object.keys(item.properties).length === 0) {
+ return [];
+ }
+
+ return [item];
+};
+
+export const parseMetaformats = (
+ doc: Document,
+ options: ParsingOptions
+): MicroformatRoot[] => {
+ // Per validation, html element will always be found
+ const html = doc.childNodes.find(isTag("html"));
+ const head = html?.childNodes.find(isTag("head"));
+
+ // Per manual testing, head will always be defined
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+ const metaContent = collectMetaTags(head!);
+ return combineRoot(metaContent, options);
+};
diff --git a/src/helpers/nodeMatchers.ts b/src/helpers/nodeMatchers.ts
index ff7858d8..6aacaed7 100644
--- a/src/helpers/nodeMatchers.ts
+++ b/src/helpers/nodeMatchers.ts
@@ -20,6 +20,11 @@ const propClassRegex = classRegex("(p|e|u|dt)");
export const isElement = (node: Node): node is Element =>
"tagName" in node && "childNodes" in node;
+export const isTag =
+ (tagName: string) =>
+ (node: Node): node is Element =>
+ isElement(node) && node.tagName === tagName;
+
export const isTextNode = (node: Node): node is TextNode => "value" in node;
export const isMicroformatV2Root = (node: Element): boolean =>
diff --git a/src/microformats/property.ts b/src/microformats/property.ts
index ba4be1e4..5f271868 100644
--- a/src/microformats/property.ts
+++ b/src/microformats/property.ts
@@ -34,6 +34,7 @@ export const parseP = (node: Element, options: ParsingOptions): string =>
getAttributeIfTag(node, ["abbr", "link"], "title") ??
getAttributeIfTag(node, ["data"], "value") ??
getAttributeIfTag(node, ["img", "area"], "alt") ??
+ getAttributeIfTag(node, ["meta"], "content") ??
textContent(node, options);
export const parseU = (
@@ -49,6 +50,7 @@ export const parseU = (
valueClassPattern(node, options) ??
getAttributeIfTag(node, ["abbr"], "title") ??
getAttributeIfTag(node, ["data", "input"], "value") ??
+ getAttributeIfTag(node, ["meta"], "content") ??
textContent(node, options);
if (typeof url === "string" && isLocalLink(url)) {
@@ -63,6 +65,7 @@ const parseDt = (node: Element, options: ParsingOptions): string =>
getAttributeIfTag(node, ["time", "ins", "del"], "datetime") ??
getAttributeIfTag(node, ["abbr"], "title") ??
getAttributeIfTag(node, ["data", "input"], "value") ??
+ getAttributeIfTag(node, ["meta"], "content") ??
textContent(node, options);
export const parseE = (node: Element, options: ParsingOptions): Html => {
diff --git a/src/parser.ts b/src/parser.ts
index 1b639617..e4bbed7b 100644
--- a/src/parser.ts
+++ b/src/parser.ts
@@ -6,6 +6,8 @@ import { isMicroformatRoot } from "./helpers/nodeMatchers";
import { ParsedDocument, ParserOptions, ParsingOptions } from "./types";
import { validateParsedHtml } from "./validator";
import { documentSetup } from "./helpers/documentSetup";
+import { parseMetaformats } from "./helpers/metaformats";
+import { isEnabled } from "./helpers/experimental";
export const parser = (
html: string,
@@ -22,12 +24,17 @@ export const parser = (
idRefs,
inherited: { roots: [], lang },
};
+ let items = findChildren(doc, isMicroformatRoot).map((mf) =>
+ parseMicroformat(mf, parsingOptions)
+ );
+
+ if (items.length === 0 && isEnabled(parsingOptions, "metaformats")) {
+ items = parseMetaformats(doc, parsingOptions);
+ }
return {
rels,
"rel-urls": relUrls,
- items: findChildren(doc, isMicroformatRoot).map((mf) =>
- parseMicroformat(mf, parsingOptions)
- ),
+ items,
};
};
diff --git a/src/types.ts b/src/types.ts
index a6110350..902bb558 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -2,16 +2,17 @@ import { Element } from "parse5";
import { BackcompatRoot } from "./backcompat";
-export type ExperimentalName = "lang" | "textContent";
-
export interface ParserOptions {
baseUrl: string;
experimental?: {
lang?: boolean;
textContent?: boolean;
+ metaformats?: boolean;
};
}
+export type ExperimentalName = keyof NonNullable;
+
export interface ParsingOptions extends ParserOptions {
implyProperties?: boolean;
idRefs: IdRefs;
diff --git a/src/validator.ts b/src/validator.ts
index 344b73b0..9ccd74dc 100644
--- a/src/validator.ts
+++ b/src/validator.ts
@@ -1,6 +1,6 @@
-import { Document, Element } from "parse5";
+import { Document } from "parse5";
-import { isElement } from "./helpers/nodeMatchers";
+import { isElement, isTag } from "./helpers/nodeMatchers";
const assertIsString = (str: unknown, name: string): string => {
if (typeof str === "undefined") {
@@ -103,17 +103,13 @@ export const validator = (
export const validateParsedHtml = (doc: Document): void => {
// and are always defined (based on tests)
// Provide error handling in the event they are ever not defined
- const html = doc.childNodes.find(
- (child): child is Element => isElement(child) && child.tagName === "html"
- );
+ const html = doc.childNodes.find(isTag("html"));
if (!html) {
throw new Error("Microformats parser: No element found");
}
- const body = html.childNodes.find(
- (child): child is Element => isElement(child) && child.tagName === "body"
- );
+ const body = html.childNodes.find(isTag("body"));
if (!body) {
throw new Error("Microformats parser: No element found");
diff --git a/test/scenarios.spec.ts b/test/scenarios.spec.ts
index 2884a2be..ae75c1a5 100644
--- a/test/scenarios.spec.ts
+++ b/test/scenarios.spec.ts
@@ -1,4 +1,4 @@
-import { expect } from "chai";
+import { expect, assert } from "chai";
import * as path from "path";
import { mf2 } from "../src";
@@ -72,4 +72,19 @@ describe("mf2() // experimental scenarios", () => {
expect(result).to.deep.equal(expected);
});
});
+
+ it("should respect the experimental flag", () => {
+ const findTestCase = (searchName: string) =>
+ experimental.find(({ name }) => name === searchName) ??
+ assert.fail(`Test case "${searchName}" not found`);
+ const { input } = findTestCase("metaformats-og-article");
+ const { expected: emptyMfResult } = findTestCase(
+ "metaformats-missing-head"
+ );
+
+ const result = mf2(input, {
+ ...options,
+ });
+ expect(result).to.deep.equal(emptyMfResult);
+ });
});
diff --git a/test/suites/experimental/metaformats-missing-head.html b/test/suites/experimental/metaformats-missing-head.html
new file mode 100644
index 00000000..bf48cbdf
--- /dev/null
+++ b/test/suites/experimental/metaformats-missing-head.html
@@ -0,0 +1,8 @@
+
+
+
+ Missing Head
+ Shouldn't return any items if properties are not found.
+
+
+
diff --git a/test/suites/experimental/metaformats-missing-head.json b/test/suites/experimental/metaformats-missing-head.json
new file mode 100644
index 00000000..63780580
--- /dev/null
+++ b/test/suites/experimental/metaformats-missing-head.json
@@ -0,0 +1,5 @@
+{
+ "items": [],
+ "rels": {},
+ "rel-urls": {}
+}
diff --git a/test/suites/experimental/metaformats-og-article.html b/test/suites/experimental/metaformats-og-article.html
index de2dc257..6e0bb6b4 100644
--- a/test/suites/experimental/metaformats-og-article.html
+++ b/test/suites/experimental/metaformats-og-article.html
@@ -1,16 +1,35 @@
+ OGP | Title
-
-
+
+
+
+
+
+
+
+
+
+
+
+
- Test
+ OGP
+ OGP tags are read if no microformats are found
+
diff --git a/test/suites/experimental/metaformats-og-article.json b/test/suites/experimental/metaformats-og-article.json
index 7e475447..fd2feb84 100644
--- a/test/suites/experimental/metaformats-og-article.json
+++ b/test/suites/experimental/metaformats-og-article.json
@@ -1,15 +1,24 @@
{
- "items": {
- "type": ["h-entry"],
- "properties": {
- "name": ["Test title"],
- "summary": ["Test description"],
- "photo": ["http://example.com/image.png"],
- "published": "2015-05-29",
- "updated": "2016-02-14",
- "author": "Glenn Jones"
+ "items": [
+ {
+ "type": ["h-entry"],
+ "lang": "en",
+ "properties": {
+ "name": ["Open Graph Protocol"],
+ "url": ["https://ogp.me/"],
+ "summary": ["OG is preferred over other meta tags"],
+ "featured": [
+ {
+ "value": "http://example.com/image.png",
+ "alt": "Alt text for image. "
+ }
+ ],
+ "published": ["2015-05-29"],
+ "updated": ["2016-02-14"],
+ "author": ["Glenn Jones", "Ghost Writer"]
+ }
}
- },
+ ],
"rels": {},
"rel-urls": {}
}
diff --git a/test/suites/experimental/metaformats-og-audio-soundcloud.html b/test/suites/experimental/metaformats-og-audio-soundcloud.html
new file mode 100644
index 00000000..5b3f9612
--- /dev/null
+++ b/test/suites/experimental/metaformats-og-audio-soundcloud.html
@@ -0,0 +1,67 @@
+
+
+
+
+
+
+
+ Stream Over The Moon by Surprise Chef | Listen online for free on
+ SoundCloud
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Over The Moon
+
+
+
diff --git a/test/suites/experimental/metaformats-og-audio-soundcloud.json b/test/suites/experimental/metaformats-og-audio-soundcloud.json
new file mode 100644
index 00000000..596ba4a3
--- /dev/null
+++ b/test/suites/experimental/metaformats-og-audio-soundcloud.json
@@ -0,0 +1,22 @@
+{
+ "items": [
+ {
+ "lang": "en",
+ "properties": {
+ "name": ["Over The Moon"],
+ "featured": [
+ "https://i1.sndcdn.com/artworks-92VWfGsSB6dA-0-t500x500.jpg"
+ ],
+ "publication": ["SoundCloud"],
+ "summary": [
+ "Surprise Chef’s music is based on evoking mood; their vivid arrangements utilize time and space to build soundscapes that invite the listener into their world. The quintet’s distinct sound pulls from "
+ ],
+ "audio": ["https://soundcloud.com/surprisechef/over-the-moon.mp3"],
+ "url": ["https://soundcloud.com/surprisechef/over-the-moon"]
+ },
+ "type": ["h-cite"]
+ }
+ ],
+ "rels": {},
+ "rel-urls": {}
+}
diff --git a/test/suites/experimental/metaformats-og-profile-linkedin.html b/test/suites/experimental/metaformats-og-profile-linkedin.html
new file mode 100644
index 00000000..3d6a0ee2
--- /dev/null
+++ b/test/suites/experimental/metaformats-og-profile-linkedin.html
@@ -0,0 +1,207 @@
+
+
+
+
+
+
+ Tantek Çelik - Web Standards Lead - Mozilla Corporation | LinkedIn
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Test
+
+
diff --git a/test/suites/experimental/metaformats-og-profile-linkedin.json b/test/suites/experimental/metaformats-og-profile-linkedin.json
new file mode 100644
index 00000000..45dfc528
--- /dev/null
+++ b/test/suites/experimental/metaformats-og-profile-linkedin.json
@@ -0,0 +1,31 @@
+{
+ "items": [
+ {
+ "lang": "en",
+ "type": ["h-card"],
+ "properties": {
+ "name": [
+ "Tantek Çelik - Web Standards Lead - Mozilla Corporation | LinkedIn"
+ ],
+ "summary": [
+ "OG: View Tantek Çelik’s profile on LinkedIn, the world’s largest professional community. Tantek has 9 jobs listed on their profile. See the complete profile on LinkedIn and discover Tantek’s connections and jobs at similar companies."
+ ],
+ "featured": [
+ "https://media.licdn.com/dms/image/C4E03AQFCJlpMj8yLiA/profile-displayphoto-shrink_800_800/0/1516166857593?e=2147483647&v=beta&t=JKdFRqEQmtFMizqPGG-yegxmy0vCycdFZMDVS4elPSY"
+ ],
+ "given-name": ["Tantek"],
+ "family-name": ["Çelik"],
+ "url": ["https://www.linkedin.com/in/tantek"]
+ }
+ }
+ ],
+ "rels": {
+ "canonical": ["https://www.linkedin.com/in/tantek"]
+ },
+ "rel-urls": {
+ "https://www.linkedin.com/in/tantek": {
+ "rels": ["canonical"],
+ "text": ""
+ }
+ }
+}
diff --git a/test/suites/experimental/metaformats-og-video-vimeo.html b/test/suites/experimental/metaformats-og-video-vimeo.html
new file mode 100644
index 00000000..cd34b2e8
--- /dev/null
+++ b/test/suites/experimental/metaformats-og-video-vimeo.html
@@ -0,0 +1,68 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Ultromedia Please (Interactive) on Vimeo
+
+
+ Test
+
+
diff --git a/test/suites/experimental/metaformats-og-video-vimeo.json b/test/suites/experimental/metaformats-og-video-vimeo.json
new file mode 100644
index 00000000..eeaaa98c
--- /dev/null
+++ b/test/suites/experimental/metaformats-og-video-vimeo.json
@@ -0,0 +1,34 @@
+{
+ "items": [
+ {
+ "lang": "en",
+ "type": ["h-cite"],
+ "properties": {
+ "name": ["Ultromedia Please (Interactive)"],
+ "url": ["https://vimeo.com/789006133"],
+ "summary": [
+ "Ultromedia Please is a winner of the 2022 Best of the Year award. To explore the full list of winners, check out vimeo.com/bestoftheyear What begins as a helpful…"
+ ],
+ "featured": [
+ {
+ "value": "https://i.vimeocdn.com/video/1586931541-9f193de8dc4391b9676499e272f48c10669bc145876d549fb70c917c0cb1a7dd-d",
+ "alt": "Image alt text."
+ }
+ ],
+ "video": [
+ "https://player.vimeo.com/video/789006133?autoplay=1&h=82e9bae2d0"
+ ],
+ "publication": ["Vimeo"]
+ }
+ }
+ ],
+ "rels": {
+ "canonical": ["https://vimeo.com/789006133"]
+ },
+ "rel-urls": {
+ "https://vimeo.com/789006133": {
+ "rels": ["canonical"],
+ "text": ""
+ }
+ }
+}
diff --git a/test/suites/experimental/metaformats-prefer-mf.html b/test/suites/experimental/metaformats-prefer-mf.html
new file mode 100644
index 00000000..b689028d
--- /dev/null
+++ b/test/suites/experimental/metaformats-prefer-mf.html
@@ -0,0 +1,24 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Microformats
+ True microformats should prevent metaformats
+
+
+
diff --git a/test/suites/experimental/metaformats-prefer-mf.json b/test/suites/experimental/metaformats-prefer-mf.json
new file mode 100644
index 00000000..c83eee07
--- /dev/null
+++ b/test/suites/experimental/metaformats-prefer-mf.json
@@ -0,0 +1,15 @@
+{
+ "items": [
+ {
+ "type": ["h-entry"],
+ "lang": "en",
+ "properties": {
+ "name": ["Microformats"],
+ "content": ["True microformats should prevent metaformats"],
+ "photo": ["http://example.com/mf-image.png"]
+ }
+ }
+ ],
+ "rels": {},
+ "rel-urls": {}
+}
diff --git a/test/suites/experimental/metaformats-standard.html b/test/suites/experimental/metaformats-standard.html
new file mode 100644
index 00000000..51bfde5f
--- /dev/null
+++ b/test/suites/experimental/metaformats-standard.html
@@ -0,0 +1,19 @@
+
+
+
+
+ Standard Meta Tags
+
+
+
+
+
+
+
+
+ Test
+
+
diff --git a/test/suites/experimental/metaformats-standard.json b/test/suites/experimental/metaformats-standard.json
new file mode 100644
index 00000000..7fc3bdfd
--- /dev/null
+++ b/test/suites/experimental/metaformats-standard.json
@@ -0,0 +1,25 @@
+{
+ "items": [
+ {
+ "type": ["h-entry"],
+ "lang": "en",
+ "properties": {
+ "name": ["Standard Meta Tags"],
+ "summary": ["Standard meta tags are also parsed"],
+ "published": ["2023-08-02"],
+ "url": ["https://microformats.org/wiki/rel-canonical"],
+ "author": ["Ted Lasso"],
+ "publication": ["microformats-parser"]
+ }
+ }
+ ],
+ "rels": {
+ "canonical": ["https://microformats.org/wiki/rel-canonical"]
+ },
+ "rel-urls": {
+ "https://microformats.org/wiki/rel-canonical": {
+ "rels": ["canonical"],
+ "text": ""
+ }
+ }
+}
diff --git a/test/suites/experimental/metaformats-twitter-article.html b/test/suites/experimental/metaformats-twitter-article.html
new file mode 100644
index 00000000..4e347d57
--- /dev/null
+++ b/test/suites/experimental/metaformats-twitter-article.html
@@ -0,0 +1,19 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Test
+
+
diff --git a/test/suites/experimental/metaformats-twitter-article.json b/test/suites/experimental/metaformats-twitter-article.json
new file mode 100644
index 00000000..1871bd5e
--- /dev/null
+++ b/test/suites/experimental/metaformats-twitter-article.json
@@ -0,0 +1,23 @@
+{
+ "items": [
+ {
+ "type": ["h-entry"],
+ "lang": "en",
+ "properties": {
+ "name": ["Title 4 Twitter"],
+ "summary": ["Twitter tags are used if no OGP tags are found."],
+ "featured": [
+ {
+ "value": "http://example.com/twitter-image.png",
+ "alt": "This is alt text for an image. "
+ }
+ ],
+ "published": ["2015-05-29"],
+ "updated": ["2016-02-14"],
+ "author": ["Glenn Jones"]
+ }
+ }
+ ],
+ "rels": {},
+ "rel-urls": {}
+}