From ca840defda6d4b6e36528dc121e94707d88410cd Mon Sep 17 00:00:00 2001 From: Anthony Ciccarello Date: Mon, 10 Jul 2023 01:15:13 -0700 Subject: [PATCH] implement metaformats parsing Closes #224 --- README.md | 4 + demo/demo.js | 6 +- demo/index.tpl.html | 10 + src/helpers/metaformats.ts | 246 ++++++++++++++++++ src/helpers/nodeMatchers.ts | 5 + src/microformats/property.ts | 3 + src/parser.ts | 13 +- src/types.ts | 5 +- src/validator.ts | 12 +- test/scenarios.spec.ts | 17 +- .../metaformats-missing-head.html | 8 + .../metaformats-missing-head.json | 5 + .../experimental/metaformats-og-article.html | 25 +- .../experimental/metaformats-og-article.json | 29 ++- .../metaformats-og-audio-soundcloud.html | 67 +++++ .../metaformats-og-audio-soundcloud.json | 22 ++ .../metaformats-og-profile-linkedin.html | 207 +++++++++++++++ .../metaformats-og-profile-linkedin.json | 31 +++ .../metaformats-og-video-vimeo.html | 68 +++++ .../metaformats-og-video-vimeo.json | 34 +++ .../experimental/metaformats-prefer-mf.html | 24 ++ .../experimental/metaformats-prefer-mf.json | 15 ++ .../experimental/metaformats-standard.html | 19 ++ .../experimental/metaformats-standard.json | 25 ++ .../metaformats-twitter-article.html | 19 ++ .../metaformats-twitter-article.json | 23 ++ 26 files changed, 914 insertions(+), 28 deletions(-) create mode 100644 src/helpers/metaformats.ts create mode 100644 test/suites/experimental/metaformats-missing-head.html create mode 100644 test/suites/experimental/metaformats-missing-head.json create mode 100644 test/suites/experimental/metaformats-og-audio-soundcloud.html create mode 100644 test/suites/experimental/metaformats-og-audio-soundcloud.json create mode 100644 test/suites/experimental/metaformats-og-profile-linkedin.html create mode 100644 test/suites/experimental/metaformats-og-profile-linkedin.json create mode 100644 test/suites/experimental/metaformats-og-video-vimeo.html create mode 100644 test/suites/experimental/metaformats-og-video-vimeo.json create mode 100644 test/suites/experimental/metaformats-prefer-mf.html create mode 100644 test/suites/experimental/metaformats-prefer-mf.json create mode 100644 test/suites/experimental/metaformats-standard.html create mode 100644 test/suites/experimental/metaformats-standard.json create mode 100644 test/suites/experimental/metaformats-twitter-article.html create mode 100644 test/suites/experimental/metaformats-twitter-article.json diff --git a/README.md b/README.md index f1b2e03b..5f702264 100644 --- a/README.md +++ b/README.md @@ -109,6 +109,10 @@ These are sourced from the element themselves, a parent microformat, the HTML do When parsing microformats for text content, all the consecutive whitespace is collapsed into a single space. `
` and `

` tags are treated as line breaks. +#### `metaformats` + +Enables fallback to [metaformats](https://microformats.org/wiki/metaformats) parsing which looks at `` tags to infer content. + ## Contributing See our [contributing guidelines](./CONTRIBUTING.md) for more information. diff --git a/demo/demo.js b/demo/demo.js index bd43e018..da407322 100644 --- a/demo/demo.js +++ b/demo/demo.js @@ -32,6 +32,10 @@ window.parseHtml = () => { const baseUrl = document.getElementById("base-url").value; const lang = document.getElementById("lang").checked; const textContent = document.getElementById("textContent").checked; + const metaformats = document.getElementById("metaformats").checked; - return parse(html, { baseUrl, experimental: { lang, textContent } }); + return parse(html, { + baseUrl, + experimental: { lang, textContent, metaformats }, + }); }; diff --git a/demo/index.tpl.html b/demo/index.tpl.html index e90b34d4..ce59a30f 100644 --- a/demo/index.tpl.html +++ b/demo/index.tpl.html @@ -72,6 +72,16 @@

Experimental options

/> Better text content +

diff --git a/src/helpers/metaformats.ts b/src/helpers/metaformats.ts new file mode 100644 index 00000000..a8faf24e --- /dev/null +++ b/src/helpers/metaformats.ts @@ -0,0 +1,246 @@ +import { Document, Element } from "parse5"; + +import { MicroformatRoot, ParsingOptions } from "../types"; +import { + getAttributeIfTag, + getAttributeValue, + hasRelIntersect, +} from "./attributes"; +import { isEnabled } from "./experimental"; +import { isElement, isTag } from "./nodeMatchers"; + +/** Special key for title tag in meta collection */ +const TITLE_TAG_KEY = ""; +const CANONICAL_URL_KEY = "<canonical>"; +const MEDIA_TYPES = ["image", "video", "audio"]; + +interface ComplexMediaMeta { + value: string; + alt: string; +} +type MetaTagContent = string | ComplexMediaMeta; + +/** + * Creates a normalized store for meta tags + */ +const initializeMetaContentCollection = (): MetaContentCollection => { + /** + * Collection of all relevant meta tag content + * Since tag order isn't guaranteed, need to collect all value before applying defaults + */ + const metaContent: Record<string, MetaTagContent[]> = {}; + + /** + * Gets the values of the first property found + * @param properties Array of properties to look for, preferred item first + */ + const get = (properties: string[]) => { + for (const key of properties) { + if (metaContent[key]) { + return metaContent[key]; + } + } + return; + }; + + /** + * Stores meta tag values. + * + * Includes following normalization rules: + * - Duplicates are removed from repeated (array) tags + * - src, url, and secure_url media tags are treated same as base (e.g. og:image:url -> og:image) + * - Alt text is added as property on last image url + */ + const set = (key: string, value: string) => { + // Split tag name to normalize values like "og:video:url" + const [domain, type, subtype] = key.split(":"); + + // Media tags specific parsing + if ( + (domain === "og" || domain === "twitter") && + MEDIA_TYPES.includes(type) + ) { + if (subtype === "alt") { + const existingMedia = metaContent[`${domain}:${type}`]; + + if (existingMedia?.length) { + const last = existingMedia.pop(); + + if (typeof last === "string") { + existingMedia.push({ value: last, alt: value }); + } else if (last) { + // Found duplicate alt text tag so re-inserting existing + // last should always be object. if condition added for types + existingMedia.push(last); + } + } + + return; // Stop as alt text is already added + } else if (["url", "secure_url"].includes(subtype)) { + // Mutate key to normalize different url values + // Duplicates will be cleaned up on insertion + key = `${domain}:${type}`; + } + } + const existing = metaContent[key]; + + if (existing) { + const isDuplicate = existing + .map((existingValue) => + typeof existingValue === "string" + ? existingValue + : existingValue.value + ) + .some((existingValue) => value === existingValue); + + if (!isDuplicate) { + metaContent[key].push(value); + } // Else ignore duplicates + } else { + metaContent[key] = [value]; + } + }; + + return { + metaContent, + set, + get, + }; +}; + +interface MetaContentCollection { + metaContent: Record<string, MetaTagContent[]>; + set: (key: string, value: string) => void; + get: (properties: string[]) => MetaTagContent[] | undefined; +} + +const collectMetaTags = (head: Element): MetaContentCollection => { + const metaTags = initializeMetaContentCollection(); + + for (const i in head.childNodes) { + const child = head.childNodes[i]; + + if (!isElement(child)) { + continue; + } + + const content = getAttributeIfTag(child, ["meta"], "content"); + if (content) { + // Tags keys usually use the "name" attribute but open graph uses "property" + // Consider them separately in case a meta tag uses both + // e.g. <meta property="og:title" name="author" content="Johnny Complex" > + const property = getAttributeValue(child, "property"); + if (property) { + metaTags.set(property, content); + } + + const name = getAttributeValue(child, "name"); + if (name && name !== property) { + metaTags.set(name, content); + } + } else if (child.tagName === "title" && "value" in child.childNodes[0]) { + metaTags.set(TITLE_TAG_KEY, child.childNodes[0].value); + } else if ( + child.tagName === "link" && + hasRelIntersect(child, ["canonical"]) + ) { + const canonicalUrl = getAttributeValue(child, "href"); + if (canonicalUrl) { + metaTags.set(CANONICAL_URL_KEY, canonicalUrl); + } + } + } + return metaTags; +}; + +/** + * Collect meta content into a microformat object + * @param metaTags Previously parsed meta tag collection + * @param options Library parsing options + */ +const combineRoot = ( + metaTags: MetaContentCollection, + options: ParsingOptions +): MicroformatRoot[] => { + const item: MicroformatRoot = { properties: {} }; + + if (isEnabled(options, "lang") && options.inherited.lang) { + item.lang = options.inherited.lang; + } + + /** + * Define property on microformat root if values are found + * @param property Key of microformats property + * @param value Array of values for the property. Empty and undefined values are not added. + */ + const setMicroformatProp = ( + property: string, + value: MetaTagContent[] = [] + ) => { + const filteredValue = value.filter(Boolean); + if (filteredValue.length) { + item.properties[property] = filteredValue; + } + }; + + let impliedRootClass = "h-entry"; + const [ogType] = metaTags.get(["og:type"]) ?? []; + if (ogType && typeof ogType === "string") { + if (ogType === "profile") { + impliedRootClass = "h-card"; + } else if (["music", "video"].some((type) => ogType.includes(type))) { + impliedRootClass = "h-cite"; + } // else h-entry + } + item.type = [impliedRootClass]; + + setMicroformatProp( + "name", + metaTags.get(["og:title", "twitter:title", TITLE_TAG_KEY]) + ); + setMicroformatProp( + "summary", + metaTags.get(["og:description", "twitter:description", "description"]) + ); + setMicroformatProp("featured", metaTags.get(["og:image", "twitter:image"])); + setMicroformatProp("video", metaTags.get(["og:video", "twitter:video"])); + setMicroformatProp("audio", metaTags.get(["og:audio", "twitter:audio"])); + setMicroformatProp( + "published", + metaTags.get(["article:published_time", "date"]) + ); + setMicroformatProp("updated", metaTags.get(["article:modified_time"])); + setMicroformatProp("author", metaTags.get(["article:author", "author"])); + setMicroformatProp("url", metaTags.get(["og:url", CANONICAL_URL_KEY])); + + // Publication properties useful for h-cite + setMicroformatProp( + "publication", + metaTags.get(["og:site_name", "publisher"]) + ); + + if (impliedRootClass === "h-card") { + setMicroformatProp("given-name", metaTags.get(["profile:first_name"])); + setMicroformatProp("family-name", metaTags.get(["profile:last_name"])); + } + + if (Object.keys(item.properties).length === 0) { + return []; + } + + return [item]; +}; + +export const parseMetaformats = ( + doc: Document, + options: ParsingOptions +): MicroformatRoot[] => { + // Per validation, html element will always be found + const html = doc.childNodes.find(isTag("html")); + const head = html?.childNodes.find(isTag("head")); + + // Per manual testing, head will always be defined + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + const metaContent = collectMetaTags(head!); + return combineRoot(metaContent, options); +}; diff --git a/src/helpers/nodeMatchers.ts b/src/helpers/nodeMatchers.ts index ff7858d8..6aacaed7 100644 --- a/src/helpers/nodeMatchers.ts +++ b/src/helpers/nodeMatchers.ts @@ -20,6 +20,11 @@ const propClassRegex = classRegex("(p|e|u|dt)"); export const isElement = (node: Node): node is Element => "tagName" in node && "childNodes" in node; +export const isTag = + (tagName: string) => + (node: Node): node is Element => + isElement(node) && node.tagName === tagName; + export const isTextNode = (node: Node): node is TextNode => "value" in node; export const isMicroformatV2Root = (node: Element): boolean => diff --git a/src/microformats/property.ts b/src/microformats/property.ts index ba4be1e4..5f271868 100644 --- a/src/microformats/property.ts +++ b/src/microformats/property.ts @@ -34,6 +34,7 @@ export const parseP = (node: Element, options: ParsingOptions): string => getAttributeIfTag(node, ["abbr", "link"], "title") ?? getAttributeIfTag(node, ["data"], "value") ?? getAttributeIfTag(node, ["img", "area"], "alt") ?? + getAttributeIfTag(node, ["meta"], "content") ?? textContent(node, options); export const parseU = ( @@ -49,6 +50,7 @@ export const parseU = ( valueClassPattern(node, options) ?? getAttributeIfTag(node, ["abbr"], "title") ?? getAttributeIfTag(node, ["data", "input"], "value") ?? + getAttributeIfTag(node, ["meta"], "content") ?? textContent(node, options); if (typeof url === "string" && isLocalLink(url)) { @@ -63,6 +65,7 @@ const parseDt = (node: Element, options: ParsingOptions): string => getAttributeIfTag(node, ["time", "ins", "del"], "datetime") ?? getAttributeIfTag(node, ["abbr"], "title") ?? getAttributeIfTag(node, ["data", "input"], "value") ?? + getAttributeIfTag(node, ["meta"], "content") ?? textContent(node, options); export const parseE = (node: Element, options: ParsingOptions): Html => { diff --git a/src/parser.ts b/src/parser.ts index 1b639617..e4bbed7b 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -6,6 +6,8 @@ import { isMicroformatRoot } from "./helpers/nodeMatchers"; import { ParsedDocument, ParserOptions, ParsingOptions } from "./types"; import { validateParsedHtml } from "./validator"; import { documentSetup } from "./helpers/documentSetup"; +import { parseMetaformats } from "./helpers/metaformats"; +import { isEnabled } from "./helpers/experimental"; export const parser = ( html: string, @@ -22,12 +24,17 @@ export const parser = ( idRefs, inherited: { roots: [], lang }, }; + let items = findChildren(doc, isMicroformatRoot).map((mf) => + parseMicroformat(mf, parsingOptions) + ); + + if (items.length === 0 && isEnabled(parsingOptions, "metaformats")) { + items = parseMetaformats(doc, parsingOptions); + } return { rels, "rel-urls": relUrls, - items: findChildren(doc, isMicroformatRoot).map((mf) => - parseMicroformat(mf, parsingOptions) - ), + items, }; }; diff --git a/src/types.ts b/src/types.ts index a6110350..902bb558 100644 --- a/src/types.ts +++ b/src/types.ts @@ -2,16 +2,17 @@ import { Element } from "parse5"; import { BackcompatRoot } from "./backcompat"; -export type ExperimentalName = "lang" | "textContent"; - export interface ParserOptions { baseUrl: string; experimental?: { lang?: boolean; textContent?: boolean; + metaformats?: boolean; }; } +export type ExperimentalName = keyof NonNullable<ParserOptions["experimental"]>; + export interface ParsingOptions extends ParserOptions { implyProperties?: boolean; idRefs: IdRefs; diff --git a/src/validator.ts b/src/validator.ts index 344b73b0..9ccd74dc 100644 --- a/src/validator.ts +++ b/src/validator.ts @@ -1,6 +1,6 @@ -import { Document, Element } from "parse5"; +import { Document } from "parse5"; -import { isElement } from "./helpers/nodeMatchers"; +import { isElement, isTag } from "./helpers/nodeMatchers"; const assertIsString = (str: unknown, name: string): string => { if (typeof str === "undefined") { @@ -103,17 +103,13 @@ export const validator = ( export const validateParsedHtml = (doc: Document): void => { // <html> and <body> are always defined (based on tests) // Provide error handling in the event they are ever not defined - const html = doc.childNodes.find( - (child): child is Element => isElement(child) && child.tagName === "html" - ); + const html = doc.childNodes.find(isTag("html")); if (!html) { throw new Error("Microformats parser: No <html> element found"); } - const body = html.childNodes.find( - (child): child is Element => isElement(child) && child.tagName === "body" - ); + const body = html.childNodes.find(isTag("body")); if (!body) { throw new Error("Microformats parser: No <body> element found"); diff --git a/test/scenarios.spec.ts b/test/scenarios.spec.ts index 2884a2be..ae75c1a5 100644 --- a/test/scenarios.spec.ts +++ b/test/scenarios.spec.ts @@ -1,4 +1,4 @@ -import { expect } from "chai"; +import { expect, assert } from "chai"; import * as path from "path"; import { mf2 } from "../src"; @@ -72,4 +72,19 @@ describe("mf2() // experimental scenarios", () => { expect(result).to.deep.equal(expected); }); }); + + it("should respect the experimental flag", () => { + const findTestCase = (searchName: string) => + experimental.find(({ name }) => name === searchName) ?? + assert.fail(`Test case "${searchName}" not found`); + const { input } = findTestCase("metaformats-og-article"); + const { expected: emptyMfResult } = findTestCase( + "metaformats-missing-head" + ); + + const result = mf2(input, { + ...options, + }); + expect(result).to.deep.equal(emptyMfResult); + }); }); diff --git a/test/suites/experimental/metaformats-missing-head.html b/test/suites/experimental/metaformats-missing-head.html new file mode 100644 index 00000000..bf48cbdf --- /dev/null +++ b/test/suites/experimental/metaformats-missing-head.html @@ -0,0 +1,8 @@ +<!DOCTYPE html> +<html lang="en"> + <body> + <h1>Missing Head</h1> + <p>Shouldn't return any items if properties are not found.</p> + <img src="http://example.com/img-image.png" /> + </body> +</html> diff --git a/test/suites/experimental/metaformats-missing-head.json b/test/suites/experimental/metaformats-missing-head.json new file mode 100644 index 00000000..63780580 --- /dev/null +++ b/test/suites/experimental/metaformats-missing-head.json @@ -0,0 +1,5 @@ +{ + "items": [], + "rels": {}, + "rel-urls": {} +} diff --git a/test/suites/experimental/metaformats-og-article.html b/test/suites/experimental/metaformats-og-article.html index de2dc257..6e0bb6b4 100644 --- a/test/suites/experimental/metaformats-og-article.html +++ b/test/suites/experimental/metaformats-og-article.html @@ -1,16 +1,35 @@ <!DOCTYPE html> <html lang="en"> <head> + <title>OGP | Title - - + + + + + + + + + + + + -

Test

+

OGP

+

OGP tags are read if no microformats are found

+ diff --git a/test/suites/experimental/metaformats-og-article.json b/test/suites/experimental/metaformats-og-article.json index 7e475447..fd2feb84 100644 --- a/test/suites/experimental/metaformats-og-article.json +++ b/test/suites/experimental/metaformats-og-article.json @@ -1,15 +1,24 @@ { - "items": { - "type": ["h-entry"], - "properties": { - "name": ["Test title"], - "summary": ["Test description"], - "photo": ["http://example.com/image.png"], - "published": "2015-05-29", - "updated": "2016-02-14", - "author": "Glenn Jones" + "items": [ + { + "type": ["h-entry"], + "lang": "en", + "properties": { + "name": ["Open Graph Protocol"], + "url": ["https://ogp.me/"], + "summary": ["OG is preferred over other meta tags"], + "featured": [ + { + "value": "http://example.com/image.png", + "alt": "Alt text for image. " + } + ], + "published": ["2015-05-29"], + "updated": ["2016-02-14"], + "author": ["Glenn Jones", "Ghost Writer"] + } } - }, + ], "rels": {}, "rel-urls": {} } diff --git a/test/suites/experimental/metaformats-og-audio-soundcloud.html b/test/suites/experimental/metaformats-og-audio-soundcloud.html new file mode 100644 index 00000000..5b3f9612 --- /dev/null +++ b/test/suites/experimental/metaformats-og-audio-soundcloud.html @@ -0,0 +1,67 @@ + + + + + + + + Stream Over The Moon by Surprise Chef | Listen online for free on + SoundCloud + + + + + + + + + + + + + + + + + + + + + + + + +

Over The Moon

+

+ + diff --git a/test/suites/experimental/metaformats-og-audio-soundcloud.json b/test/suites/experimental/metaformats-og-audio-soundcloud.json new file mode 100644 index 00000000..596ba4a3 --- /dev/null +++ b/test/suites/experimental/metaformats-og-audio-soundcloud.json @@ -0,0 +1,22 @@ +{ + "items": [ + { + "lang": "en", + "properties": { + "name": ["Over The Moon"], + "featured": [ + "https://i1.sndcdn.com/artworks-92VWfGsSB6dA-0-t500x500.jpg" + ], + "publication": ["SoundCloud"], + "summary": [ + "Surprise Chef’s music is based on evoking mood; their vivid arrangements utilize time and space to build soundscapes that invite the listener into their world. The quintet’s distinct sound pulls from " + ], + "audio": ["https://soundcloud.com/surprisechef/over-the-moon.mp3"], + "url": ["https://soundcloud.com/surprisechef/over-the-moon"] + }, + "type": ["h-cite"] + } + ], + "rels": {}, + "rel-urls": {} +} diff --git a/test/suites/experimental/metaformats-og-profile-linkedin.html b/test/suites/experimental/metaformats-og-profile-linkedin.html new file mode 100644 index 00000000..3d6a0ee2 --- /dev/null +++ b/test/suites/experimental/metaformats-og-profile-linkedin.html @@ -0,0 +1,207 @@ + + + + + + + Tantek Çelik - Web Standards Lead - Mozilla Corporation | LinkedIn + + + + + + + + + + + + + + + + + + + + + + + + +

Test

+ + diff --git a/test/suites/experimental/metaformats-og-profile-linkedin.json b/test/suites/experimental/metaformats-og-profile-linkedin.json new file mode 100644 index 00000000..45dfc528 --- /dev/null +++ b/test/suites/experimental/metaformats-og-profile-linkedin.json @@ -0,0 +1,31 @@ +{ + "items": [ + { + "lang": "en", + "type": ["h-card"], + "properties": { + "name": [ + "Tantek Çelik - Web Standards Lead - Mozilla Corporation | LinkedIn" + ], + "summary": [ + "OG: View Tantek Çelik’s profile on LinkedIn, the world’s largest professional community. Tantek has 9 jobs listed on their profile. See the complete profile on LinkedIn and discover Tantek’s connections and jobs at similar companies." + ], + "featured": [ + "https://media.licdn.com/dms/image/C4E03AQFCJlpMj8yLiA/profile-displayphoto-shrink_800_800/0/1516166857593?e=2147483647&v=beta&t=JKdFRqEQmtFMizqPGG-yegxmy0vCycdFZMDVS4elPSY" + ], + "given-name": ["Tantek"], + "family-name": ["Çelik"], + "url": ["https://www.linkedin.com/in/tantek"] + } + } + ], + "rels": { + "canonical": ["https://www.linkedin.com/in/tantek"] + }, + "rel-urls": { + "https://www.linkedin.com/in/tantek": { + "rels": ["canonical"], + "text": "" + } + } +} diff --git a/test/suites/experimental/metaformats-og-video-vimeo.html b/test/suites/experimental/metaformats-og-video-vimeo.html new file mode 100644 index 00000000..cd34b2e8 --- /dev/null +++ b/test/suites/experimental/metaformats-og-video-vimeo.html @@ -0,0 +1,68 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Ultromedia Please (Interactive) on Vimeo + + +

Test

+ + diff --git a/test/suites/experimental/metaformats-og-video-vimeo.json b/test/suites/experimental/metaformats-og-video-vimeo.json new file mode 100644 index 00000000..eeaaa98c --- /dev/null +++ b/test/suites/experimental/metaformats-og-video-vimeo.json @@ -0,0 +1,34 @@ +{ + "items": [ + { + "lang": "en", + "type": ["h-cite"], + "properties": { + "name": ["Ultromedia Please (Interactive)"], + "url": ["https://vimeo.com/789006133"], + "summary": [ + "Ultromedia Please is a winner of the 2022 Best of the Year award. To explore the full list of winners, check out vimeo.com/bestoftheyear What begins as a helpful…" + ], + "featured": [ + { + "value": "https://i.vimeocdn.com/video/1586931541-9f193de8dc4391b9676499e272f48c10669bc145876d549fb70c917c0cb1a7dd-d", + "alt": "Image alt text." + } + ], + "video": [ + "https://player.vimeo.com/video/789006133?autoplay=1&h=82e9bae2d0" + ], + "publication": ["Vimeo"] + } + } + ], + "rels": { + "canonical": ["https://vimeo.com/789006133"] + }, + "rel-urls": { + "https://vimeo.com/789006133": { + "rels": ["canonical"], + "text": "" + } + } +} diff --git a/test/suites/experimental/metaformats-prefer-mf.html b/test/suites/experimental/metaformats-prefer-mf.html new file mode 100644 index 00000000..b689028d --- /dev/null +++ b/test/suites/experimental/metaformats-prefer-mf.html @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + +

Microformats

+

True microformats should prevent metaformats

+ + + diff --git a/test/suites/experimental/metaformats-prefer-mf.json b/test/suites/experimental/metaformats-prefer-mf.json new file mode 100644 index 00000000..c83eee07 --- /dev/null +++ b/test/suites/experimental/metaformats-prefer-mf.json @@ -0,0 +1,15 @@ +{ + "items": [ + { + "type": ["h-entry"], + "lang": "en", + "properties": { + "name": ["Microformats"], + "content": ["True microformats should prevent metaformats"], + "photo": ["http://example.com/mf-image.png"] + } + } + ], + "rels": {}, + "rel-urls": {} +} diff --git a/test/suites/experimental/metaformats-standard.html b/test/suites/experimental/metaformats-standard.html new file mode 100644 index 00000000..51bfde5f --- /dev/null +++ b/test/suites/experimental/metaformats-standard.html @@ -0,0 +1,19 @@ + + + + + Standard Meta Tags + + + + + + + + +

Test

+ + diff --git a/test/suites/experimental/metaformats-standard.json b/test/suites/experimental/metaformats-standard.json new file mode 100644 index 00000000..7fc3bdfd --- /dev/null +++ b/test/suites/experimental/metaformats-standard.json @@ -0,0 +1,25 @@ +{ + "items": [ + { + "type": ["h-entry"], + "lang": "en", + "properties": { + "name": ["Standard Meta Tags"], + "summary": ["Standard meta tags are also parsed"], + "published": ["2023-08-02"], + "url": ["https://microformats.org/wiki/rel-canonical"], + "author": ["Ted Lasso"], + "publication": ["microformats-parser"] + } + } + ], + "rels": { + "canonical": ["https://microformats.org/wiki/rel-canonical"] + }, + "rel-urls": { + "https://microformats.org/wiki/rel-canonical": { + "rels": ["canonical"], + "text": "" + } + } +} diff --git a/test/suites/experimental/metaformats-twitter-article.html b/test/suites/experimental/metaformats-twitter-article.html new file mode 100644 index 00000000..4e347d57 --- /dev/null +++ b/test/suites/experimental/metaformats-twitter-article.html @@ -0,0 +1,19 @@ + + + + + + + + + + + + + +

Test

+ + diff --git a/test/suites/experimental/metaformats-twitter-article.json b/test/suites/experimental/metaformats-twitter-article.json new file mode 100644 index 00000000..1871bd5e --- /dev/null +++ b/test/suites/experimental/metaformats-twitter-article.json @@ -0,0 +1,23 @@ +{ + "items": [ + { + "type": ["h-entry"], + "lang": "en", + "properties": { + "name": ["Title 4 Twitter"], + "summary": ["Twitter tags are used if no OGP tags are found."], + "featured": [ + { + "value": "http://example.com/twitter-image.png", + "alt": "This is alt text for an image. " + } + ], + "published": ["2015-05-29"], + "updated": ["2016-02-14"], + "author": ["Glenn Jones"] + } + } + ], + "rels": {}, + "rel-urls": {} +}