diff --git a/.vscode/settings.json b/.vscode/settings.json index 2eae3d01f0..d5fa1ea1bb 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -19,6 +19,7 @@ "gptool", "gptools", "gptoolsjs", + "limitrows", "llmify", "llmrequest", "localai", @@ -34,6 +35,7 @@ "promptfoo", "stringifying", "sysr", + "tabletojson", "treesitter", "typecheck", "unfence", diff --git a/docs/genaisrc/genaiscript.d.ts b/docs/genaisrc/genaiscript.d.ts index ed648e260f..442954ef08 100644 --- a/docs/genaisrc/genaiscript.d.ts +++ b/docs/genaisrc/genaiscript.d.ts @@ -1182,14 +1182,40 @@ interface XML { parse(text: string, options?: XMLParseOptions): any } +interface HTMLTableToJSONOptions { + useFirstRowForHeadings?: boolean + headers?: HeaderRows + stripHtmlFromHeadings?: boolean + stripHtmlFromCells?: boolean + stripHtml?: boolean | null + forceIndexAsNumber?: boolean + countDuplicateHeadings?: boolean + ignoreColumns?: number[] | null + onlyColumns?: number[] | null + ignoreHiddenRows?: boolean + id?: string[] | null + headings?: string[] | null + containsClasses?: string[] | null + limitrows?: number | null +} + interface HTML { + /** + * Converts all HTML tables to JSON. + * @param html + * @param options + */ + convertTablesToJSON( + html: string, + options?: HTMLTableToJSONOptions + ): object[][] /** * Converts HTML markup to plain text * @param html */ convertToText(html: string): string /** - * Converts HMTL markup to markdown + * Converts HTML markup to markdown * @param html */ convertToMarkdown(html: string): string diff --git a/genaisrc/docs_sample_generator.genai.js b/genaisrc/docs-sample-generator.genai.mts similarity index 98% rename from genaisrc/docs_sample_generator.genai.js rename to genaisrc/docs-sample-generator.genai.mts index e5395d00d2..7b890c751e 100644 --- a/genaisrc/docs_sample_generator.genai.js +++ b/genaisrc/docs-sample-generator.genai.mts @@ -1,4 +1,5 @@ script({ + model: "openai:gpt-4", title: "generating tests from samples", system: ["system"], parameters: { diff --git a/genaisrc/genaiscript.d.ts b/genaisrc/genaiscript.d.ts index ed648e260f..442954ef08 100644 --- a/genaisrc/genaiscript.d.ts +++ b/genaisrc/genaiscript.d.ts @@ -1182,14 +1182,40 @@ interface XML { parse(text: string, options?: XMLParseOptions): any } +interface HTMLTableToJSONOptions { + useFirstRowForHeadings?: boolean + headers?: HeaderRows + stripHtmlFromHeadings?: boolean + stripHtmlFromCells?: boolean + stripHtml?: boolean | null + forceIndexAsNumber?: boolean + countDuplicateHeadings?: boolean + ignoreColumns?: number[] | null + onlyColumns?: number[] | null + ignoreHiddenRows?: boolean + id?: string[] | null + headings?: string[] | null + containsClasses?: string[] | null + limitrows?: number | null +} + interface HTML { + /** + * Converts all HTML tables to JSON. + * @param html + * @param options + */ + convertTablesToJSON( + html: string, + options?: HTMLTableToJSONOptions + ): object[][] /** * Converts HTML markup to plain text * @param html */ convertToText(html: string): string /** - * Converts HMTL markup to markdown + * Converts HTML markup to markdown * @param html */ convertToMarkdown(html: string): string diff --git a/packages/cli/src/playwright.ts b/packages/cli/src/playwright.ts index c9c0a2db14..3bf10d9ce1 100644 --- a/packages/cli/src/playwright.ts +++ b/packages/cli/src/playwright.ts @@ -86,7 +86,6 @@ export class BrowserManager { logVerbose(`browsing ${ellipseUri(url)}`) const browser = await this.launchBrowser(options) - logVerbose(`navigating...`) let page: Page if (incognito) { const context = await browser.newContext(rest) @@ -96,7 +95,6 @@ export class BrowserManager { } if (timeout !== undefined) page.setDefaultTimeout(timeout) if (url) await page.goto(url) - logVerbose(`page ready`) return page } } diff --git a/packages/core/package.json b/packages/core/package.json index b49c076023..d7213fb0a8 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -57,6 +57,7 @@ "sanitize-html": "^2.13.0", "semver": "^7.6.3", "serialize-error": "^11.0.3", + "tabletojson": "^4.1.4", "toml": "^3.0.0", "tree-sitter-wasms": "^0.1.11", "ts-dedent": "^2.2.0", @@ -76,4 +77,4 @@ "@types/turndown": "^5.0.5", "turndown": "^7.2.0" } -} +} \ No newline at end of file diff --git a/packages/core/src/genaisrc/genaiscript.d.ts b/packages/core/src/genaisrc/genaiscript.d.ts index ed648e260f..442954ef08 100644 --- a/packages/core/src/genaisrc/genaiscript.d.ts +++ b/packages/core/src/genaisrc/genaiscript.d.ts @@ -1182,14 +1182,40 @@ interface XML { parse(text: string, options?: XMLParseOptions): any } +interface HTMLTableToJSONOptions { + useFirstRowForHeadings?: boolean + headers?: HeaderRows + stripHtmlFromHeadings?: boolean + stripHtmlFromCells?: boolean + stripHtml?: boolean | null + forceIndexAsNumber?: boolean + countDuplicateHeadings?: boolean + ignoreColumns?: number[] | null + onlyColumns?: number[] | null + ignoreHiddenRows?: boolean + id?: string[] | null + headings?: string[] | null + containsClasses?: string[] | null + limitrows?: number | null +} + interface HTML { + /** + * Converts all HTML tables to JSON. + * @param html + * @param options + */ + convertTablesToJSON( + html: string, + options?: HTMLTableToJSONOptions + ): object[][] /** * Converts HTML markup to plain text * @param html */ convertToText(html: string): string /** - * Converts HMTL markup to markdown + * Converts HTML markup to markdown * @param html */ convertToMarkdown(html: string): string diff --git a/packages/core/src/globals.ts b/packages/core/src/globals.ts index 5a94dfa006..bafc716054 100644 --- a/packages/core/src/globals.ts +++ b/packages/core/src/globals.ts @@ -8,7 +8,7 @@ import { updateFrontmatter, } from "./frontmatter" import { JSONLStringify, JSONLTryParse } from "./jsonl" -import { HTMLToMarkdown, HTMLToText } from "./html" +import { HTMLTablesToJSON, HTMLToMarkdown, HTMLToText } from "./html" export function resolveGlobal(): any { if (typeof window !== "undefined") @@ -58,6 +58,7 @@ export function installGlobals() { }, }) glb.HTML = Object.freeze({ + convertTablesToJSON: HTMLTablesToJSON, convertToMarkdown: HTMLToMarkdown, convertToText: HTMLToText, }) diff --git a/packages/core/src/html.test.ts b/packages/core/src/html.test.ts index 0b9f89a44f..aaf33d9552 100644 --- a/packages/core/src/html.test.ts +++ b/packages/core/src/html.test.ts @@ -1,8 +1,26 @@ import test, { describe } from "node:test" -import { HTMLToMarkdown, HTMLToText } from "./html" +import { HTMLTablesToJSON, HTMLToMarkdown, HTMLToText } from "./html" import assert from "node:assert/strict" describe("html", () => { + test("convert HTML table to JSON", () => { + const html = ` + + + + + + + + + +
Header 1Header 2
Value 1Value 2
+ ` + const expected = [{ "Header 1": "Value 1", "Header 2": "Value 2" }] + const result = HTMLTablesToJSON(html)[0] + console.log(JSON.stringify(result, null, 2)) + assert.deepStrictEqual(result, expected) + }) test("converts HTML to text", () => { const html = "

Hello, world!

" const expected = "Hello, world!" diff --git a/packages/core/src/html.ts b/packages/core/src/html.ts index dc304ceb2b..43a74d43d0 100644 --- a/packages/core/src/html.ts +++ b/packages/core/src/html.ts @@ -1,6 +1,12 @@ -import { convert } from "html-to-text" +import { convert as convertToText } from "html-to-text" import { TraceOptions } from "./trace" import Turndown from "turndown" +import { tabletojson } from "tabletojson" + +export function HTMLTablesToJSON(html: string, options?: {}): object[][] { + const res = tabletojson.convert(html, options) + return res +} export function HTMLToText( html: string, @@ -11,7 +17,7 @@ export function HTMLToText( const { trace } = options || {} try { - const text = convert(html, options) + const text = convertToText(html, options) return text } catch (e) { trace?.error("HTML conversion failed", e) diff --git a/packages/core/src/promptdom.ts b/packages/core/src/promptdom.ts index 9950d9219d..dc91673296 100644 --- a/packages/core/src/promptdom.ts +++ b/packages/core/src/promptdom.ts @@ -255,7 +255,7 @@ export function createFileOutput(output: FileOutput): FileOutputNode { return { type: "fileOutput", output } } -export function createDefDataNode( +export function createDefData( name: string, data: object | object[], options?: DefDataOptions diff --git a/packages/core/src/promptrunner.ts b/packages/core/src/promptrunner.ts index f25e45bc56..db4c2482b2 100644 --- a/packages/core/src/promptrunner.ts +++ b/packages/core/src/promptrunner.ts @@ -85,7 +85,7 @@ export async function runTemplate( assert(model !== undefined) try { - trace.itemValue("🧠 model", model ?? "??") + trace.heading(3, `🧠 running ${template.id} with model ${model ?? ""}`) if (cliInfo) traceCliArgs(trace, template, options) const vars = await resolveExpansionVars( diff --git a/packages/core/src/runpromptcontext.ts b/packages/core/src/runpromptcontext.ts index eac6d73bcc..8aed175061 100644 --- a/packages/core/src/runpromptcontext.ts +++ b/packages/core/src/runpromptcontext.ts @@ -3,7 +3,7 @@ import { appendChild, createAssistantNode, createChatParticipant, - createDefDataNode, + createDefData, createDefNode, createFileOutput, createFunctionNode, @@ -114,7 +114,7 @@ export function createChatTurnGenerationContext( return name }, defData: (name, data, defOptions) => { - appendChild(node, createDefDataNode(name, data, defOptions)) + appendChild(node, createDefData(name, data, defOptions)) return name }, fence(body, options?: DefOptions) { diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts index 9d67902559..7bfe8d25f5 100644 --- a/packages/core/src/types/prompt_template.d.ts +++ b/packages/core/src/types/prompt_template.d.ts @@ -1156,14 +1156,40 @@ interface XML { parse(text: string, options?: XMLParseOptions): any } +interface HTMLTableToJSONOptions { + useFirstRowForHeadings?: boolean + headers?: HeaderRows + stripHtmlFromHeadings?: boolean + stripHtmlFromCells?: boolean + stripHtml?: boolean | null + forceIndexAsNumber?: boolean + countDuplicateHeadings?: boolean + ignoreColumns?: number[] | null + onlyColumns?: number[] | null + ignoreHiddenRows?: boolean + id?: string[] | null + headings?: string[] | null + containsClasses?: string[] | null + limitrows?: number | null +} + interface HTML { + /** + * Converts all HTML tables to JSON. + * @param html + * @param options + */ + convertTablesToJSON( + html: string, + options?: HTMLTableToJSONOptions + ): object[][] /** * Converts HTML markup to plain text * @param html */ convertToText(html: string): string /** - * Converts HMTL markup to markdown + * Converts HTML markup to markdown * @param html */ convertToMarkdown(html: string): string diff --git a/packages/sample/genaisrc/browse-text.genai.mts b/packages/sample/genaisrc/browse-text.genai.mts index b28e10660d..d35735493d 100644 --- a/packages/sample/genaisrc/browse-text.genai.mts +++ b/packages/sample/genaisrc/browse-text.genai.mts @@ -7,8 +7,7 @@ const page = await host.browse( ) const table = page.locator('table[data-testid="csv-table"]') const html = await table.innerHTML() -console.log(`HTML:` + html) -const csv = HTML.convertToText(html) -console.log(`TEXT: ` + csv) -def("DATA", csv) -$`Analyze DATA.` +const csv = HTML.convertTablesToJSON("" + html + "
")[0] +csv.forEach((row) => delete row[Object.keys(row)[0]]) // remove the first column +defData("DATA", csv, { format: "csv" }) +$`Analyze DATA and provide a statistical summary.` diff --git a/packages/sample/genaisrc/genaiscript.d.ts b/packages/sample/genaisrc/genaiscript.d.ts index ed648e260f..442954ef08 100644 --- a/packages/sample/genaisrc/genaiscript.d.ts +++ b/packages/sample/genaisrc/genaiscript.d.ts @@ -1182,14 +1182,40 @@ interface XML { parse(text: string, options?: XMLParseOptions): any } +interface HTMLTableToJSONOptions { + useFirstRowForHeadings?: boolean + headers?: HeaderRows + stripHtmlFromHeadings?: boolean + stripHtmlFromCells?: boolean + stripHtml?: boolean | null + forceIndexAsNumber?: boolean + countDuplicateHeadings?: boolean + ignoreColumns?: number[] | null + onlyColumns?: number[] | null + ignoreHiddenRows?: boolean + id?: string[] | null + headings?: string[] | null + containsClasses?: string[] | null + limitrows?: number | null +} + interface HTML { + /** + * Converts all HTML tables to JSON. + * @param html + * @param options + */ + convertTablesToJSON( + html: string, + options?: HTMLTableToJSONOptions + ): object[][] /** * Converts HTML markup to plain text * @param html */ convertToText(html: string): string /** - * Converts HMTL markup to markdown + * Converts HTML markup to markdown * @param html */ convertToMarkdown(html: string): string diff --git a/packages/sample/genaisrc/node/genaiscript.d.ts b/packages/sample/genaisrc/node/genaiscript.d.ts index ed648e260f..442954ef08 100644 --- a/packages/sample/genaisrc/node/genaiscript.d.ts +++ b/packages/sample/genaisrc/node/genaiscript.d.ts @@ -1182,14 +1182,40 @@ interface XML { parse(text: string, options?: XMLParseOptions): any } +interface HTMLTableToJSONOptions { + useFirstRowForHeadings?: boolean + headers?: HeaderRows + stripHtmlFromHeadings?: boolean + stripHtmlFromCells?: boolean + stripHtml?: boolean | null + forceIndexAsNumber?: boolean + countDuplicateHeadings?: boolean + ignoreColumns?: number[] | null + onlyColumns?: number[] | null + ignoreHiddenRows?: boolean + id?: string[] | null + headings?: string[] | null + containsClasses?: string[] | null + limitrows?: number | null +} + interface HTML { + /** + * Converts all HTML tables to JSON. + * @param html + * @param options + */ + convertTablesToJSON( + html: string, + options?: HTMLTableToJSONOptions + ): object[][] /** * Converts HTML markup to plain text * @param html */ convertToText(html: string): string /** - * Converts HMTL markup to markdown + * Converts HTML markup to markdown * @param html */ convertToMarkdown(html: string): string diff --git a/packages/sample/genaisrc/python/genaiscript.d.ts b/packages/sample/genaisrc/python/genaiscript.d.ts index ed648e260f..442954ef08 100644 --- a/packages/sample/genaisrc/python/genaiscript.d.ts +++ b/packages/sample/genaisrc/python/genaiscript.d.ts @@ -1182,14 +1182,40 @@ interface XML { parse(text: string, options?: XMLParseOptions): any } +interface HTMLTableToJSONOptions { + useFirstRowForHeadings?: boolean + headers?: HeaderRows + stripHtmlFromHeadings?: boolean + stripHtmlFromCells?: boolean + stripHtml?: boolean | null + forceIndexAsNumber?: boolean + countDuplicateHeadings?: boolean + ignoreColumns?: number[] | null + onlyColumns?: number[] | null + ignoreHiddenRows?: boolean + id?: string[] | null + headings?: string[] | null + containsClasses?: string[] | null + limitrows?: number | null +} + interface HTML { + /** + * Converts all HTML tables to JSON. + * @param html + * @param options + */ + convertTablesToJSON( + html: string, + options?: HTMLTableToJSONOptions + ): object[][] /** * Converts HTML markup to plain text * @param html */ convertToText(html: string): string /** - * Converts HMTL markup to markdown + * Converts HTML markup to markdown * @param html */ convertToMarkdown(html: string): string diff --git a/packages/sample/genaisrc/style/genaiscript.d.ts b/packages/sample/genaisrc/style/genaiscript.d.ts index ed648e260f..442954ef08 100644 --- a/packages/sample/genaisrc/style/genaiscript.d.ts +++ b/packages/sample/genaisrc/style/genaiscript.d.ts @@ -1182,14 +1182,40 @@ interface XML { parse(text: string, options?: XMLParseOptions): any } +interface HTMLTableToJSONOptions { + useFirstRowForHeadings?: boolean + headers?: HeaderRows + stripHtmlFromHeadings?: boolean + stripHtmlFromCells?: boolean + stripHtml?: boolean | null + forceIndexAsNumber?: boolean + countDuplicateHeadings?: boolean + ignoreColumns?: number[] | null + onlyColumns?: number[] | null + ignoreHiddenRows?: boolean + id?: string[] | null + headings?: string[] | null + containsClasses?: string[] | null + limitrows?: number | null +} + interface HTML { + /** + * Converts all HTML tables to JSON. + * @param html + * @param options + */ + convertTablesToJSON( + html: string, + options?: HTMLTableToJSONOptions + ): object[][] /** * Converts HTML markup to plain text * @param html */ convertToText(html: string): string /** - * Converts HMTL markup to markdown + * Converts HTML markup to markdown * @param html */ convertToMarkdown(html: string): string diff --git a/packages/sample/src/aici/genaiscript.d.ts b/packages/sample/src/aici/genaiscript.d.ts index ed648e260f..442954ef08 100644 --- a/packages/sample/src/aici/genaiscript.d.ts +++ b/packages/sample/src/aici/genaiscript.d.ts @@ -1182,14 +1182,40 @@ interface XML { parse(text: string, options?: XMLParseOptions): any } +interface HTMLTableToJSONOptions { + useFirstRowForHeadings?: boolean + headers?: HeaderRows + stripHtmlFromHeadings?: boolean + stripHtmlFromCells?: boolean + stripHtml?: boolean | null + forceIndexAsNumber?: boolean + countDuplicateHeadings?: boolean + ignoreColumns?: number[] | null + onlyColumns?: number[] | null + ignoreHiddenRows?: boolean + id?: string[] | null + headings?: string[] | null + containsClasses?: string[] | null + limitrows?: number | null +} + interface HTML { + /** + * Converts all HTML tables to JSON. + * @param html + * @param options + */ + convertTablesToJSON( + html: string, + options?: HTMLTableToJSONOptions + ): object[][] /** * Converts HTML markup to plain text * @param html */ convertToText(html: string): string /** - * Converts HMTL markup to markdown + * Converts HTML markup to markdown * @param html */ convertToMarkdown(html: string): string diff --git a/packages/sample/src/errors/genaiscript.d.ts b/packages/sample/src/errors/genaiscript.d.ts index ed648e260f..442954ef08 100644 --- a/packages/sample/src/errors/genaiscript.d.ts +++ b/packages/sample/src/errors/genaiscript.d.ts @@ -1182,14 +1182,40 @@ interface XML { parse(text: string, options?: XMLParseOptions): any } +interface HTMLTableToJSONOptions { + useFirstRowForHeadings?: boolean + headers?: HeaderRows + stripHtmlFromHeadings?: boolean + stripHtmlFromCells?: boolean + stripHtml?: boolean | null + forceIndexAsNumber?: boolean + countDuplicateHeadings?: boolean + ignoreColumns?: number[] | null + onlyColumns?: number[] | null + ignoreHiddenRows?: boolean + id?: string[] | null + headings?: string[] | null + containsClasses?: string[] | null + limitrows?: number | null +} + interface HTML { + /** + * Converts all HTML tables to JSON. + * @param html + * @param options + */ + convertTablesToJSON( + html: string, + options?: HTMLTableToJSONOptions + ): object[][] /** * Converts HTML markup to plain text * @param html */ convertToText(html: string): string /** - * Converts HMTL markup to markdown + * Converts HTML markup to markdown * @param html */ convertToMarkdown(html: string): string diff --git a/packages/sample/src/makecode/genaiscript.d.ts b/packages/sample/src/makecode/genaiscript.d.ts index ed648e260f..442954ef08 100644 --- a/packages/sample/src/makecode/genaiscript.d.ts +++ b/packages/sample/src/makecode/genaiscript.d.ts @@ -1182,14 +1182,40 @@ interface XML { parse(text: string, options?: XMLParseOptions): any } +interface HTMLTableToJSONOptions { + useFirstRowForHeadings?: boolean + headers?: HeaderRows + stripHtmlFromHeadings?: boolean + stripHtmlFromCells?: boolean + stripHtml?: boolean | null + forceIndexAsNumber?: boolean + countDuplicateHeadings?: boolean + ignoreColumns?: number[] | null + onlyColumns?: number[] | null + ignoreHiddenRows?: boolean + id?: string[] | null + headings?: string[] | null + containsClasses?: string[] | null + limitrows?: number | null +} + interface HTML { + /** + * Converts all HTML tables to JSON. + * @param html + * @param options + */ + convertTablesToJSON( + html: string, + options?: HTMLTableToJSONOptions + ): object[][] /** * Converts HTML markup to plain text * @param html */ convertToText(html: string): string /** - * Converts HMTL markup to markdown + * Converts HTML markup to markdown * @param html */ convertToMarkdown(html: string): string diff --git a/packages/sample/src/tla/genaiscript.d.ts b/packages/sample/src/tla/genaiscript.d.ts index ed648e260f..442954ef08 100644 --- a/packages/sample/src/tla/genaiscript.d.ts +++ b/packages/sample/src/tla/genaiscript.d.ts @@ -1182,14 +1182,40 @@ interface XML { parse(text: string, options?: XMLParseOptions): any } +interface HTMLTableToJSONOptions { + useFirstRowForHeadings?: boolean + headers?: HeaderRows + stripHtmlFromHeadings?: boolean + stripHtmlFromCells?: boolean + stripHtml?: boolean | null + forceIndexAsNumber?: boolean + countDuplicateHeadings?: boolean + ignoreColumns?: number[] | null + onlyColumns?: number[] | null + ignoreHiddenRows?: boolean + id?: string[] | null + headings?: string[] | null + containsClasses?: string[] | null + limitrows?: number | null +} + interface HTML { + /** + * Converts all HTML tables to JSON. + * @param html + * @param options + */ + convertTablesToJSON( + html: string, + options?: HTMLTableToJSONOptions + ): object[][] /** * Converts HTML markup to plain text * @param html */ convertToText(html: string): string /** - * Converts HMTL markup to markdown + * Converts HTML markup to markdown * @param html */ convertToMarkdown(html: string): string diff --git a/packages/sample/src/vision/genaiscript.d.ts b/packages/sample/src/vision/genaiscript.d.ts index ed648e260f..442954ef08 100644 --- a/packages/sample/src/vision/genaiscript.d.ts +++ b/packages/sample/src/vision/genaiscript.d.ts @@ -1182,14 +1182,40 @@ interface XML { parse(text: string, options?: XMLParseOptions): any } +interface HTMLTableToJSONOptions { + useFirstRowForHeadings?: boolean + headers?: HeaderRows + stripHtmlFromHeadings?: boolean + stripHtmlFromCells?: boolean + stripHtml?: boolean | null + forceIndexAsNumber?: boolean + countDuplicateHeadings?: boolean + ignoreColumns?: number[] | null + onlyColumns?: number[] | null + ignoreHiddenRows?: boolean + id?: string[] | null + headings?: string[] | null + containsClasses?: string[] | null + limitrows?: number | null +} + interface HTML { + /** + * Converts all HTML tables to JSON. + * @param html + * @param options + */ + convertTablesToJSON( + html: string, + options?: HTMLTableToJSONOptions + ): object[][] /** * Converts HTML markup to plain text * @param html */ convertToText(html: string): string /** - * Converts HMTL markup to markdown + * Converts HTML markup to markdown * @param html */ convertToMarkdown(html: string): string diff --git a/slides/genaisrc/genaiscript.d.ts b/slides/genaisrc/genaiscript.d.ts index ed648e260f..442954ef08 100644 --- a/slides/genaisrc/genaiscript.d.ts +++ b/slides/genaisrc/genaiscript.d.ts @@ -1182,14 +1182,40 @@ interface XML { parse(text: string, options?: XMLParseOptions): any } +interface HTMLTableToJSONOptions { + useFirstRowForHeadings?: boolean + headers?: HeaderRows + stripHtmlFromHeadings?: boolean + stripHtmlFromCells?: boolean + stripHtml?: boolean | null + forceIndexAsNumber?: boolean + countDuplicateHeadings?: boolean + ignoreColumns?: number[] | null + onlyColumns?: number[] | null + ignoreHiddenRows?: boolean + id?: string[] | null + headings?: string[] | null + containsClasses?: string[] | null + limitrows?: number | null +} + interface HTML { + /** + * Converts all HTML tables to JSON. + * @param html + * @param options + */ + convertTablesToJSON( + html: string, + options?: HTMLTableToJSONOptions + ): object[][] /** * Converts HTML markup to plain text * @param html */ convertToText(html: string): string /** - * Converts HMTL markup to markdown + * Converts HTML markup to markdown * @param html */ convertToMarkdown(html: string): string diff --git a/yarn.lock b/yarn.lock index 8bcbb978c3..d9f3560980 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1450,7 +1450,7 @@ cheerio-select@^2.1.0: domhandler "^5.0.3" domutils "^3.0.1" -cheerio@^1.0.0-rc.12, cheerio@^1.0.0-rc.9: +cheerio@^1.0.0, cheerio@^1.0.0-rc.12, cheerio@^1.0.0-rc.9: version "1.0.0" resolved "https://registry.yarnpkg.com/cheerio/-/cheerio-1.0.0.tgz#1ede4895a82f26e8af71009f961a9b8cb60d6a81" integrity sha512-quS9HgjQpdaXOvsZz82Oz7uxtXiy6UIsIQcpBj7HRw2M63Skasm9qlDocAM7jNuaxdhpPU7c4kJN+gA5MCu4ww== @@ -4591,7 +4591,16 @@ streamx@^2.15.0, streamx@^2.18.0: optionalDependencies: bare-events "^2.2.0" -"string-width-cjs@npm:string-width@^4.2.0", "string-width@^1.0.2 || 2 || 3 || 4", string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3: +"string-width-cjs@npm:string-width@^4.2.0": + version "4.2.3" + resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010" + integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== + dependencies: + emoji-regex "^8.0.0" + is-fullwidth-code-point "^3.0.0" + strip-ansi "^6.0.1" + +"string-width@^1.0.2 || 2 || 3 || 4", string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3: version "4.2.3" resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010" integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== @@ -4628,7 +4637,14 @@ string_decoder@~1.1.1: dependencies: safe-buffer "~5.1.0" -"strip-ansi-cjs@npm:strip-ansi@^6.0.1", strip-ansi@^6.0.0, strip-ansi@^6.0.1: +"strip-ansi-cjs@npm:strip-ansi@^6.0.1": + version "6.0.1" + resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9" + integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A== + dependencies: + ansi-regex "^5.0.1" + +strip-ansi@^6.0.0, strip-ansi@^6.0.1: version "6.0.1" resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9" integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A== @@ -4731,6 +4747,13 @@ svgpath@^2.1.5: resolved "https://registry.yarnpkg.com/svgpath/-/svgpath-2.6.0.tgz#5b160ef3d742b7dfd2d721bf90588d3450d7a90d" integrity sha512-OIWR6bKzXvdXYyO4DK/UWa1VA1JeKq8E+0ug2DG98Y/vOmMpfZNj+TIG988HjfYSqtcy/hFOtZq/n/j5GSESNg== +tabletojson@^4.1.4: + version "4.1.4" + resolved "https://registry.yarnpkg.com/tabletojson/-/tabletojson-4.1.4.tgz#b72d23c22f13b782ae4fa8c1da0b388010abc31d" + integrity sha512-Y2QpSgQk2ORLROr82dNIXbFKGJkqf8gwgGZ7pCFungVy3UCxaWb8xZT9yb3SHJsqVV6f8utLdbBJ4GVhPEPXog== + dependencies: + cheerio "^1.0.0-rc.12" + tar-fs@^2.0.0: version "2.1.1" resolved "https://registry.yarnpkg.com/tar-fs/-/tar-fs-2.1.1.tgz#489a15ab85f1f0befabb370b7de4f9eb5cbe8784" @@ -5263,7 +5286,7 @@ word-wrap@^1.2.5: resolved "https://registry.yarnpkg.com/word-wrap/-/word-wrap-1.2.5.tgz#d2c45c6dd4fbce621a66f136cbe328afd0410b34" integrity sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA== -"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0", wrap-ansi@^7.0.0: +"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0": version "7.0.0" resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43" integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q== @@ -5281,6 +5304,15 @@ wrap-ansi@^6.2.0: string-width "^4.1.0" strip-ansi "^6.0.0" +wrap-ansi@^7.0.0: + version "7.0.0" + resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43" + integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q== + dependencies: + ansi-styles "^4.0.0" + string-width "^4.1.0" + strip-ansi "^6.0.0" + wrap-ansi@^8.1.0: version "8.1.0" resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-8.1.0.tgz#56dc22368ee570face1b49819975d9b9a5ead214"