diff --git a/src/richtext/htmlAsRichText.ts b/src/richtext/htmlAsRichText.ts index 6d60c608..b3cadf1a 100644 --- a/src/richtext/htmlAsRichText.ts +++ b/src/richtext/htmlAsRichText.ts @@ -21,7 +21,7 @@ export const htmlAsRichText = ( config?: AsRichTextConfig, ): AsRichTextReturnType => { const { result, messages } = unified() - .use(rehypeParse, { emitParseErrors: true }) + .use(rehypeParse, { emitParseErrors: true, missingDoctype: 0 }) .use(rehypeRichText, config) .processSync(html); diff --git a/test/__testutils__/testAsRichTextHelper.ts b/test/__testutils__/testAsRichTextHelper.ts index b0636b54..c68d519f 100644 --- a/test/__testutils__/testAsRichTextHelper.ts +++ b/test/__testutils__/testAsRichTextHelper.ts @@ -9,6 +9,11 @@ type TestAsRichTextHelperArgs = { config?: AsRichTextConfig; + /** + * Warnings that are expected to be present in the output. + */ + expectWarnings?: string[]; + /** * The rich text format is a lossy representation of HTML. Namely it does not * preserves indentation and applies some optimizations to the output such as @@ -55,6 +60,10 @@ const testAsRichTextHelperFactory = ( }, }); + expect( + output.warnings.map((warning) => warning.message).sort(), + ).toStrictEqual(args.expectWarnings?.sort() ?? []); + if (!args.expectAsHTMLNotToMatchInput) { expect(outputAsHTML).toBe(args.input); } else { diff --git a/test/richtext/__snapshots__/htmlAsRichText.test.ts.snap b/test/richtext/__snapshots__/htmlAsRichText.test.ts.snap index 4bde42f1..af6c5642 100644 --- a/test/richtext/__snapshots__/htmlAsRichText.test.ts.snap +++ b/test/richtext/__snapshots__/htmlAsRichText.test.ts.snap @@ -472,6 +472,8 @@ exports[`transforms HTML to rich text > embed > iframe 1`] = ` ] `; +exports[`transforms HTML to rich text > embed > missing src 1`] = `[]`; + exports[`transforms HTML to rich text > image > empty alt 1`] = ` [ { @@ -688,6 +690,8 @@ exports[`transforms HTML to rich text > image > missing alt 1`] = ` ] `; +exports[`transforms HTML to rich text > image > missing src 1`] = `[]`; + exports[`transforms HTML to rich text > image > non-prismic 1`] = ` [ { @@ -1002,6 +1006,17 @@ exports[`transforms HTML to rich text > spans > hyperlink 1`] = ` ] `; +exports[`transforms HTML to rich text > spans > hyperlink missing href 1`] = ` +[ + { + "direction": "ltr", + "spans": [], + "text": "lorem ipsum dolor sit amet", + "type": "paragraph", + }, +] +`; + exports[`transforms HTML to rich text > spans > label 1`] = ` [ { diff --git a/test/richtext/htmlAsRichText.test.ts b/test/richtext/htmlAsRichText.test.ts index 1951c71c..fa2e5429 100644 --- a/test/richtext/htmlAsRichText.test.ts +++ b/test/richtext/htmlAsRichText.test.ts @@ -33,6 +33,14 @@ describe("transforms HTML to rich text", () => { input: /* html */ `

lorem ipsum dolor sit amet

`, }); + testHTMLAsRichTextHelper("hyperlink missing href", { + input: /* html */ `

lorem ipsum dolor sit amet

`, + expectWarnings: [ + "Element of type `hyperlink` is missing an `href` attribute", + ], + expectAsHTMLNotToMatchInput: true, + }); + testHTMLAsRichTextHelper("nested spans", { input: /* html */ `

lorem ipsum dolor sit amet

`, }); @@ -119,6 +127,12 @@ describe("transforms HTML to rich text", () => { expectAsHTMLNotToMatchInput: true, }); + testHTMLAsRichTextHelper("missing src", { + input: /* html */ ``, + expectWarnings: ["Element of type `img` is missing an `src` attribute"], + expectAsHTMLNotToMatchInput: true, + }); + describe("extracts image in text nodes and resume previous text node", () => { testHTMLAsRichTextHelper("basic", { input: /* html */ `

lorem ipsum bar dolor sit amet

`, @@ -153,6 +167,12 @@ describe("transforms HTML to rich text", () => { input: /* html */ `

Slack bot that uses AI to tl;dr; threads for you, anyone?

— Lucie (@li_hbr) June 20, 2024
`, config: { serializer: { blockquote: "embed" } }, }); + + testHTMLAsRichTextHelper("missing src", { + input: /* html */ ``, + expectWarnings: ["Element of type `embed` is missing an `src` attribute"], + expectAsHTMLNotToMatchInput: true, + }); }); describe("configuration", () => { @@ -224,7 +244,7 @@ describe("transforms HTML to rich text", () => { }); testHTMLAsRichTextHelper("image node", { - input: /* html */ ``, + input: /* html */ ``, config: { serializer: { foo: () => ({ @@ -242,7 +262,7 @@ describe("transforms HTML to rich text", () => { }); testHTMLAsRichTextHelper("embed node", { - input: /* html */ ``, + input: /* html */ ``, config: { serializer: { foo: () => ({ @@ -465,27 +485,3 @@ describe("transforms HTML to rich text", () => { }); }); }); - -type WarnCase = { - name: string; - input: string; -}; - -it.each([ - { - name: "element of type `img` is missing an `src` attribute", - input: /* html */ ``, - }, - { - name: "element of type `embed` is missing an `src` attribute", - input: /* html */ ``, - }, - { - name: "element of type `hyperlink` is missing an `href` attribute", - input: /* html */ `

missing-hyperlink-href

`, - }, -])("warns on unprocessable elements ($name)", ({ name, input }) => { - const output = htmlAsRichText(input); - - expect(output.warnings.toString()).toMatch(new RegExp(name, "i")); -});