diff --git a/.vscode/settings.json b/.vscode/settings.json
index 2eae3d01f0..d5fa1ea1bb 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -19,6 +19,7 @@
"gptool",
"gptools",
"gptoolsjs",
+ "limitrows",
"llmify",
"llmrequest",
"localai",
@@ -34,6 +35,7 @@
"promptfoo",
"stringifying",
"sysr",
+ "tabletojson",
"treesitter",
"typecheck",
"unfence",
diff --git a/docs/genaisrc/genaiscript.d.ts b/docs/genaisrc/genaiscript.d.ts
index ed648e260f..442954ef08 100644
--- a/docs/genaisrc/genaiscript.d.ts
+++ b/docs/genaisrc/genaiscript.d.ts
@@ -1182,14 +1182,40 @@ interface XML {
parse(text: string, options?: XMLParseOptions): any
}
+interface HTMLTableToJSONOptions {
+ useFirstRowForHeadings?: boolean
+ headers?: HeaderRows
+ stripHtmlFromHeadings?: boolean
+ stripHtmlFromCells?: boolean
+ stripHtml?: boolean | null
+ forceIndexAsNumber?: boolean
+ countDuplicateHeadings?: boolean
+ ignoreColumns?: number[] | null
+ onlyColumns?: number[] | null
+ ignoreHiddenRows?: boolean
+ id?: string[] | null
+ headings?: string[] | null
+ containsClasses?: string[] | null
+ limitrows?: number | null
+}
+
interface HTML {
+ /**
+ * Converts all HTML tables to JSON.
+ * @param html
+ * @param options
+ */
+ convertTablesToJSON(
+ html: string,
+ options?: HTMLTableToJSONOptions
+ ): object[][]
/**
* Converts HTML markup to plain text
* @param html
*/
convertToText(html: string): string
/**
- * Converts HMTL markup to markdown
+ * Converts HTML markup to markdown
* @param html
*/
convertToMarkdown(html: string): string
diff --git a/genaisrc/docs_sample_generator.genai.js b/genaisrc/docs-sample-generator.genai.mts
similarity index 98%
rename from genaisrc/docs_sample_generator.genai.js
rename to genaisrc/docs-sample-generator.genai.mts
index e5395d00d2..7b890c751e 100644
--- a/genaisrc/docs_sample_generator.genai.js
+++ b/genaisrc/docs-sample-generator.genai.mts
@@ -1,4 +1,5 @@
script({
+ model: "openai:gpt-4",
title: "generating tests from samples",
system: ["system"],
parameters: {
diff --git a/genaisrc/genaiscript.d.ts b/genaisrc/genaiscript.d.ts
index ed648e260f..442954ef08 100644
--- a/genaisrc/genaiscript.d.ts
+++ b/genaisrc/genaiscript.d.ts
@@ -1182,14 +1182,40 @@ interface XML {
parse(text: string, options?: XMLParseOptions): any
}
+interface HTMLTableToJSONOptions {
+ useFirstRowForHeadings?: boolean
+ headers?: HeaderRows
+ stripHtmlFromHeadings?: boolean
+ stripHtmlFromCells?: boolean
+ stripHtml?: boolean | null
+ forceIndexAsNumber?: boolean
+ countDuplicateHeadings?: boolean
+ ignoreColumns?: number[] | null
+ onlyColumns?: number[] | null
+ ignoreHiddenRows?: boolean
+ id?: string[] | null
+ headings?: string[] | null
+ containsClasses?: string[] | null
+ limitrows?: number | null
+}
+
interface HTML {
+ /**
+ * Converts all HTML tables to JSON.
+ * @param html
+ * @param options
+ */
+ convertTablesToJSON(
+ html: string,
+ options?: HTMLTableToJSONOptions
+ ): object[][]
/**
* Converts HTML markup to plain text
* @param html
*/
convertToText(html: string): string
/**
- * Converts HMTL markup to markdown
+ * Converts HTML markup to markdown
* @param html
*/
convertToMarkdown(html: string): string
diff --git a/packages/cli/src/playwright.ts b/packages/cli/src/playwright.ts
index c9c0a2db14..3bf10d9ce1 100644
--- a/packages/cli/src/playwright.ts
+++ b/packages/cli/src/playwright.ts
@@ -86,7 +86,6 @@ export class BrowserManager {
logVerbose(`browsing ${ellipseUri(url)}`)
const browser = await this.launchBrowser(options)
- logVerbose(`navigating...`)
let page: Page
if (incognito) {
const context = await browser.newContext(rest)
@@ -96,7 +95,6 @@ export class BrowserManager {
}
if (timeout !== undefined) page.setDefaultTimeout(timeout)
if (url) await page.goto(url)
- logVerbose(`page ready`)
return page
}
}
diff --git a/packages/core/package.json b/packages/core/package.json
index b49c076023..d7213fb0a8 100644
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -57,6 +57,7 @@
"sanitize-html": "^2.13.0",
"semver": "^7.6.3",
"serialize-error": "^11.0.3",
+ "tabletojson": "^4.1.4",
"toml": "^3.0.0",
"tree-sitter-wasms": "^0.1.11",
"ts-dedent": "^2.2.0",
@@ -76,4 +77,4 @@
"@types/turndown": "^5.0.5",
"turndown": "^7.2.0"
}
-}
+}
\ No newline at end of file
diff --git a/packages/core/src/genaisrc/genaiscript.d.ts b/packages/core/src/genaisrc/genaiscript.d.ts
index ed648e260f..442954ef08 100644
--- a/packages/core/src/genaisrc/genaiscript.d.ts
+++ b/packages/core/src/genaisrc/genaiscript.d.ts
@@ -1182,14 +1182,40 @@ interface XML {
parse(text: string, options?: XMLParseOptions): any
}
+interface HTMLTableToJSONOptions {
+ useFirstRowForHeadings?: boolean
+ headers?: HeaderRows
+ stripHtmlFromHeadings?: boolean
+ stripHtmlFromCells?: boolean
+ stripHtml?: boolean | null
+ forceIndexAsNumber?: boolean
+ countDuplicateHeadings?: boolean
+ ignoreColumns?: number[] | null
+ onlyColumns?: number[] | null
+ ignoreHiddenRows?: boolean
+ id?: string[] | null
+ headings?: string[] | null
+ containsClasses?: string[] | null
+ limitrows?: number | null
+}
+
interface HTML {
+ /**
+ * Converts all HTML tables to JSON.
+ * @param html
+ * @param options
+ */
+ convertTablesToJSON(
+ html: string,
+ options?: HTMLTableToJSONOptions
+ ): object[][]
/**
* Converts HTML markup to plain text
* @param html
*/
convertToText(html: string): string
/**
- * Converts HMTL markup to markdown
+ * Converts HTML markup to markdown
* @param html
*/
convertToMarkdown(html: string): string
diff --git a/packages/core/src/globals.ts b/packages/core/src/globals.ts
index 5a94dfa006..bafc716054 100644
--- a/packages/core/src/globals.ts
+++ b/packages/core/src/globals.ts
@@ -8,7 +8,7 @@ import {
updateFrontmatter,
} from "./frontmatter"
import { JSONLStringify, JSONLTryParse } from "./jsonl"
-import { HTMLToMarkdown, HTMLToText } from "./html"
+import { HTMLTablesToJSON, HTMLToMarkdown, HTMLToText } from "./html"
export function resolveGlobal(): any {
if (typeof window !== "undefined")
@@ -58,6 +58,7 @@ export function installGlobals() {
},
})
glb.HTML = Object.freeze({
+ convertTablesToJSON: HTMLTablesToJSON,
convertToMarkdown: HTMLToMarkdown,
convertToText: HTMLToText,
})
diff --git a/packages/core/src/html.test.ts b/packages/core/src/html.test.ts
index 0b9f89a44f..aaf33d9552 100644
--- a/packages/core/src/html.test.ts
+++ b/packages/core/src/html.test.ts
@@ -1,8 +1,26 @@
import test, { describe } from "node:test"
-import { HTMLToMarkdown, HTMLToText } from "./html"
+import { HTMLTablesToJSON, HTMLToMarkdown, HTMLToText } from "./html"
import assert from "node:assert/strict"
describe("html", () => {
+ test("convert HTML table to JSON", () => {
+ const html = `
+
+
+ Header 1 |
+ Header 2 |
+
+
+ Value 1 |
+ Value 2 |
+
+
+ `
+ const expected = [{ "Header 1": "Value 1", "Header 2": "Value 2" }]
+ const result = HTMLTablesToJSON(html)[0]
+ console.log(JSON.stringify(result, null, 2))
+ assert.deepStrictEqual(result, expected)
+ })
test("converts HTML to text", () => {
const html = "Hello, world!
"
const expected = "Hello, world!"
diff --git a/packages/core/src/html.ts b/packages/core/src/html.ts
index dc304ceb2b..43a74d43d0 100644
--- a/packages/core/src/html.ts
+++ b/packages/core/src/html.ts
@@ -1,6 +1,12 @@
-import { convert } from "html-to-text"
+import { convert as convertToText } from "html-to-text"
import { TraceOptions } from "./trace"
import Turndown from "turndown"
+import { tabletojson } from "tabletojson"
+
+export function HTMLTablesToJSON(html: string, options?: {}): object[][] {
+ const res = tabletojson.convert(html, options)
+ return res
+}
export function HTMLToText(
html: string,
@@ -11,7 +17,7 @@ export function HTMLToText(
const { trace } = options || {}
try {
- const text = convert(html, options)
+ const text = convertToText(html, options)
return text
} catch (e) {
trace?.error("HTML conversion failed", e)
diff --git a/packages/core/src/promptdom.ts b/packages/core/src/promptdom.ts
index 9950d9219d..dc91673296 100644
--- a/packages/core/src/promptdom.ts
+++ b/packages/core/src/promptdom.ts
@@ -255,7 +255,7 @@ export function createFileOutput(output: FileOutput): FileOutputNode {
return { type: "fileOutput", output }
}
-export function createDefDataNode(
+export function createDefData(
name: string,
data: object | object[],
options?: DefDataOptions
diff --git a/packages/core/src/promptrunner.ts b/packages/core/src/promptrunner.ts
index f25e45bc56..db4c2482b2 100644
--- a/packages/core/src/promptrunner.ts
+++ b/packages/core/src/promptrunner.ts
@@ -85,7 +85,7 @@ export async function runTemplate(
assert(model !== undefined)
try {
- trace.itemValue("🧠model", model ?? "??")
+ trace.heading(3, `🧠running ${template.id} with model ${model ?? ""}`)
if (cliInfo) traceCliArgs(trace, template, options)
const vars = await resolveExpansionVars(
diff --git a/packages/core/src/runpromptcontext.ts b/packages/core/src/runpromptcontext.ts
index eac6d73bcc..8aed175061 100644
--- a/packages/core/src/runpromptcontext.ts
+++ b/packages/core/src/runpromptcontext.ts
@@ -3,7 +3,7 @@ import {
appendChild,
createAssistantNode,
createChatParticipant,
- createDefDataNode,
+ createDefData,
createDefNode,
createFileOutput,
createFunctionNode,
@@ -114,7 +114,7 @@ export function createChatTurnGenerationContext(
return name
},
defData: (name, data, defOptions) => {
- appendChild(node, createDefDataNode(name, data, defOptions))
+ appendChild(node, createDefData(name, data, defOptions))
return name
},
fence(body, options?: DefOptions) {
diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts
index 9d67902559..7bfe8d25f5 100644
--- a/packages/core/src/types/prompt_template.d.ts
+++ b/packages/core/src/types/prompt_template.d.ts
@@ -1156,14 +1156,40 @@ interface XML {
parse(text: string, options?: XMLParseOptions): any
}
+interface HTMLTableToJSONOptions {
+ useFirstRowForHeadings?: boolean
+ headers?: HeaderRows
+ stripHtmlFromHeadings?: boolean
+ stripHtmlFromCells?: boolean
+ stripHtml?: boolean | null
+ forceIndexAsNumber?: boolean
+ countDuplicateHeadings?: boolean
+ ignoreColumns?: number[] | null
+ onlyColumns?: number[] | null
+ ignoreHiddenRows?: boolean
+ id?: string[] | null
+ headings?: string[] | null
+ containsClasses?: string[] | null
+ limitrows?: number | null
+}
+
interface HTML {
+ /**
+ * Converts all HTML tables to JSON.
+ * @param html
+ * @param options
+ */
+ convertTablesToJSON(
+ html: string,
+ options?: HTMLTableToJSONOptions
+ ): object[][]
/**
* Converts HTML markup to plain text
* @param html
*/
convertToText(html: string): string
/**
- * Converts HMTL markup to markdown
+ * Converts HTML markup to markdown
* @param html
*/
convertToMarkdown(html: string): string
diff --git a/packages/sample/genaisrc/browse-text.genai.mts b/packages/sample/genaisrc/browse-text.genai.mts
index b28e10660d..d35735493d 100644
--- a/packages/sample/genaisrc/browse-text.genai.mts
+++ b/packages/sample/genaisrc/browse-text.genai.mts
@@ -7,8 +7,7 @@ const page = await host.browse(
)
const table = page.locator('table[data-testid="csv-table"]')
const html = await table.innerHTML()
-console.log(`HTML:` + html)
-const csv = HTML.convertToText(html)
-console.log(`TEXT: ` + csv)
-def("DATA", csv)
-$`Analyze DATA.`
+const csv = HTML.convertTablesToJSON("")[0]
+csv.forEach((row) => delete row[Object.keys(row)[0]]) // remove the first column
+defData("DATA", csv, { format: "csv" })
+$`Analyze DATA and provide a statistical summary.`
diff --git a/packages/sample/genaisrc/genaiscript.d.ts b/packages/sample/genaisrc/genaiscript.d.ts
index ed648e260f..442954ef08 100644
--- a/packages/sample/genaisrc/genaiscript.d.ts
+++ b/packages/sample/genaisrc/genaiscript.d.ts
@@ -1182,14 +1182,40 @@ interface XML {
parse(text: string, options?: XMLParseOptions): any
}
+interface HTMLTableToJSONOptions {
+ useFirstRowForHeadings?: boolean
+ headers?: HeaderRows
+ stripHtmlFromHeadings?: boolean
+ stripHtmlFromCells?: boolean
+ stripHtml?: boolean | null
+ forceIndexAsNumber?: boolean
+ countDuplicateHeadings?: boolean
+ ignoreColumns?: number[] | null
+ onlyColumns?: number[] | null
+ ignoreHiddenRows?: boolean
+ id?: string[] | null
+ headings?: string[] | null
+ containsClasses?: string[] | null
+ limitrows?: number | null
+}
+
interface HTML {
+ /**
+ * Converts all HTML tables to JSON.
+ * @param html
+ * @param options
+ */
+ convertTablesToJSON(
+ html: string,
+ options?: HTMLTableToJSONOptions
+ ): object[][]
/**
* Converts HTML markup to plain text
* @param html
*/
convertToText(html: string): string
/**
- * Converts HMTL markup to markdown
+ * Converts HTML markup to markdown
* @param html
*/
convertToMarkdown(html: string): string
diff --git a/packages/sample/genaisrc/node/genaiscript.d.ts b/packages/sample/genaisrc/node/genaiscript.d.ts
index ed648e260f..442954ef08 100644
--- a/packages/sample/genaisrc/node/genaiscript.d.ts
+++ b/packages/sample/genaisrc/node/genaiscript.d.ts
@@ -1182,14 +1182,40 @@ interface XML {
parse(text: string, options?: XMLParseOptions): any
}
+interface HTMLTableToJSONOptions {
+ useFirstRowForHeadings?: boolean
+ headers?: HeaderRows
+ stripHtmlFromHeadings?: boolean
+ stripHtmlFromCells?: boolean
+ stripHtml?: boolean | null
+ forceIndexAsNumber?: boolean
+ countDuplicateHeadings?: boolean
+ ignoreColumns?: number[] | null
+ onlyColumns?: number[] | null
+ ignoreHiddenRows?: boolean
+ id?: string[] | null
+ headings?: string[] | null
+ containsClasses?: string[] | null
+ limitrows?: number | null
+}
+
interface HTML {
+ /**
+ * Converts all HTML tables to JSON.
+ * @param html
+ * @param options
+ */
+ convertTablesToJSON(
+ html: string,
+ options?: HTMLTableToJSONOptions
+ ): object[][]
/**
* Converts HTML markup to plain text
* @param html
*/
convertToText(html: string): string
/**
- * Converts HMTL markup to markdown
+ * Converts HTML markup to markdown
* @param html
*/
convertToMarkdown(html: string): string
diff --git a/packages/sample/genaisrc/python/genaiscript.d.ts b/packages/sample/genaisrc/python/genaiscript.d.ts
index ed648e260f..442954ef08 100644
--- a/packages/sample/genaisrc/python/genaiscript.d.ts
+++ b/packages/sample/genaisrc/python/genaiscript.d.ts
@@ -1182,14 +1182,40 @@ interface XML {
parse(text: string, options?: XMLParseOptions): any
}
+interface HTMLTableToJSONOptions {
+ useFirstRowForHeadings?: boolean
+ headers?: HeaderRows
+ stripHtmlFromHeadings?: boolean
+ stripHtmlFromCells?: boolean
+ stripHtml?: boolean | null
+ forceIndexAsNumber?: boolean
+ countDuplicateHeadings?: boolean
+ ignoreColumns?: number[] | null
+ onlyColumns?: number[] | null
+ ignoreHiddenRows?: boolean
+ id?: string[] | null
+ headings?: string[] | null
+ containsClasses?: string[] | null
+ limitrows?: number | null
+}
+
interface HTML {
+ /**
+ * Converts all HTML tables to JSON.
+ * @param html
+ * @param options
+ */
+ convertTablesToJSON(
+ html: string,
+ options?: HTMLTableToJSONOptions
+ ): object[][]
/**
* Converts HTML markup to plain text
* @param html
*/
convertToText(html: string): string
/**
- * Converts HMTL markup to markdown
+ * Converts HTML markup to markdown
* @param html
*/
convertToMarkdown(html: string): string
diff --git a/packages/sample/genaisrc/style/genaiscript.d.ts b/packages/sample/genaisrc/style/genaiscript.d.ts
index ed648e260f..442954ef08 100644
--- a/packages/sample/genaisrc/style/genaiscript.d.ts
+++ b/packages/sample/genaisrc/style/genaiscript.d.ts
@@ -1182,14 +1182,40 @@ interface XML {
parse(text: string, options?: XMLParseOptions): any
}
+interface HTMLTableToJSONOptions {
+ useFirstRowForHeadings?: boolean
+ headers?: HeaderRows
+ stripHtmlFromHeadings?: boolean
+ stripHtmlFromCells?: boolean
+ stripHtml?: boolean | null
+ forceIndexAsNumber?: boolean
+ countDuplicateHeadings?: boolean
+ ignoreColumns?: number[] | null
+ onlyColumns?: number[] | null
+ ignoreHiddenRows?: boolean
+ id?: string[] | null
+ headings?: string[] | null
+ containsClasses?: string[] | null
+ limitrows?: number | null
+}
+
interface HTML {
+ /**
+ * Converts all HTML tables to JSON.
+ * @param html
+ * @param options
+ */
+ convertTablesToJSON(
+ html: string,
+ options?: HTMLTableToJSONOptions
+ ): object[][]
/**
* Converts HTML markup to plain text
* @param html
*/
convertToText(html: string): string
/**
- * Converts HMTL markup to markdown
+ * Converts HTML markup to markdown
* @param html
*/
convertToMarkdown(html: string): string
diff --git a/packages/sample/src/aici/genaiscript.d.ts b/packages/sample/src/aici/genaiscript.d.ts
index ed648e260f..442954ef08 100644
--- a/packages/sample/src/aici/genaiscript.d.ts
+++ b/packages/sample/src/aici/genaiscript.d.ts
@@ -1182,14 +1182,40 @@ interface XML {
parse(text: string, options?: XMLParseOptions): any
}
+interface HTMLTableToJSONOptions {
+ useFirstRowForHeadings?: boolean
+ headers?: HeaderRows
+ stripHtmlFromHeadings?: boolean
+ stripHtmlFromCells?: boolean
+ stripHtml?: boolean | null
+ forceIndexAsNumber?: boolean
+ countDuplicateHeadings?: boolean
+ ignoreColumns?: number[] | null
+ onlyColumns?: number[] | null
+ ignoreHiddenRows?: boolean
+ id?: string[] | null
+ headings?: string[] | null
+ containsClasses?: string[] | null
+ limitrows?: number | null
+}
+
interface HTML {
+ /**
+ * Converts all HTML tables to JSON.
+ * @param html
+ * @param options
+ */
+ convertTablesToJSON(
+ html: string,
+ options?: HTMLTableToJSONOptions
+ ): object[][]
/**
* Converts HTML markup to plain text
* @param html
*/
convertToText(html: string): string
/**
- * Converts HMTL markup to markdown
+ * Converts HTML markup to markdown
* @param html
*/
convertToMarkdown(html: string): string
diff --git a/packages/sample/src/errors/genaiscript.d.ts b/packages/sample/src/errors/genaiscript.d.ts
index ed648e260f..442954ef08 100644
--- a/packages/sample/src/errors/genaiscript.d.ts
+++ b/packages/sample/src/errors/genaiscript.d.ts
@@ -1182,14 +1182,40 @@ interface XML {
parse(text: string, options?: XMLParseOptions): any
}
+interface HTMLTableToJSONOptions {
+ useFirstRowForHeadings?: boolean
+ headers?: HeaderRows
+ stripHtmlFromHeadings?: boolean
+ stripHtmlFromCells?: boolean
+ stripHtml?: boolean | null
+ forceIndexAsNumber?: boolean
+ countDuplicateHeadings?: boolean
+ ignoreColumns?: number[] | null
+ onlyColumns?: number[] | null
+ ignoreHiddenRows?: boolean
+ id?: string[] | null
+ headings?: string[] | null
+ containsClasses?: string[] | null
+ limitrows?: number | null
+}
+
interface HTML {
+ /**
+ * Converts all HTML tables to JSON.
+ * @param html
+ * @param options
+ */
+ convertTablesToJSON(
+ html: string,
+ options?: HTMLTableToJSONOptions
+ ): object[][]
/**
* Converts HTML markup to plain text
* @param html
*/
convertToText(html: string): string
/**
- * Converts HMTL markup to markdown
+ * Converts HTML markup to markdown
* @param html
*/
convertToMarkdown(html: string): string
diff --git a/packages/sample/src/makecode/genaiscript.d.ts b/packages/sample/src/makecode/genaiscript.d.ts
index ed648e260f..442954ef08 100644
--- a/packages/sample/src/makecode/genaiscript.d.ts
+++ b/packages/sample/src/makecode/genaiscript.d.ts
@@ -1182,14 +1182,40 @@ interface XML {
parse(text: string, options?: XMLParseOptions): any
}
+interface HTMLTableToJSONOptions {
+ useFirstRowForHeadings?: boolean
+ headers?: HeaderRows
+ stripHtmlFromHeadings?: boolean
+ stripHtmlFromCells?: boolean
+ stripHtml?: boolean | null
+ forceIndexAsNumber?: boolean
+ countDuplicateHeadings?: boolean
+ ignoreColumns?: number[] | null
+ onlyColumns?: number[] | null
+ ignoreHiddenRows?: boolean
+ id?: string[] | null
+ headings?: string[] | null
+ containsClasses?: string[] | null
+ limitrows?: number | null
+}
+
interface HTML {
+ /**
+ * Converts all HTML tables to JSON.
+ * @param html
+ * @param options
+ */
+ convertTablesToJSON(
+ html: string,
+ options?: HTMLTableToJSONOptions
+ ): object[][]
/**
* Converts HTML markup to plain text
* @param html
*/
convertToText(html: string): string
/**
- * Converts HMTL markup to markdown
+ * Converts HTML markup to markdown
* @param html
*/
convertToMarkdown(html: string): string
diff --git a/packages/sample/src/tla/genaiscript.d.ts b/packages/sample/src/tla/genaiscript.d.ts
index ed648e260f..442954ef08 100644
--- a/packages/sample/src/tla/genaiscript.d.ts
+++ b/packages/sample/src/tla/genaiscript.d.ts
@@ -1182,14 +1182,40 @@ interface XML {
parse(text: string, options?: XMLParseOptions): any
}
+interface HTMLTableToJSONOptions {
+ useFirstRowForHeadings?: boolean
+ headers?: HeaderRows
+ stripHtmlFromHeadings?: boolean
+ stripHtmlFromCells?: boolean
+ stripHtml?: boolean | null
+ forceIndexAsNumber?: boolean
+ countDuplicateHeadings?: boolean
+ ignoreColumns?: number[] | null
+ onlyColumns?: number[] | null
+ ignoreHiddenRows?: boolean
+ id?: string[] | null
+ headings?: string[] | null
+ containsClasses?: string[] | null
+ limitrows?: number | null
+}
+
interface HTML {
+ /**
+ * Converts all HTML tables to JSON.
+ * @param html
+ * @param options
+ */
+ convertTablesToJSON(
+ html: string,
+ options?: HTMLTableToJSONOptions
+ ): object[][]
/**
* Converts HTML markup to plain text
* @param html
*/
convertToText(html: string): string
/**
- * Converts HMTL markup to markdown
+ * Converts HTML markup to markdown
* @param html
*/
convertToMarkdown(html: string): string
diff --git a/packages/sample/src/vision/genaiscript.d.ts b/packages/sample/src/vision/genaiscript.d.ts
index ed648e260f..442954ef08 100644
--- a/packages/sample/src/vision/genaiscript.d.ts
+++ b/packages/sample/src/vision/genaiscript.d.ts
@@ -1182,14 +1182,40 @@ interface XML {
parse(text: string, options?: XMLParseOptions): any
}
+interface HTMLTableToJSONOptions {
+ useFirstRowForHeadings?: boolean
+ headers?: HeaderRows
+ stripHtmlFromHeadings?: boolean
+ stripHtmlFromCells?: boolean
+ stripHtml?: boolean | null
+ forceIndexAsNumber?: boolean
+ countDuplicateHeadings?: boolean
+ ignoreColumns?: number[] | null
+ onlyColumns?: number[] | null
+ ignoreHiddenRows?: boolean
+ id?: string[] | null
+ headings?: string[] | null
+ containsClasses?: string[] | null
+ limitrows?: number | null
+}
+
interface HTML {
+ /**
+ * Converts all HTML tables to JSON.
+ * @param html
+ * @param options
+ */
+ convertTablesToJSON(
+ html: string,
+ options?: HTMLTableToJSONOptions
+ ): object[][]
/**
* Converts HTML markup to plain text
* @param html
*/
convertToText(html: string): string
/**
- * Converts HMTL markup to markdown
+ * Converts HTML markup to markdown
* @param html
*/
convertToMarkdown(html: string): string
diff --git a/slides/genaisrc/genaiscript.d.ts b/slides/genaisrc/genaiscript.d.ts
index ed648e260f..442954ef08 100644
--- a/slides/genaisrc/genaiscript.d.ts
+++ b/slides/genaisrc/genaiscript.d.ts
@@ -1182,14 +1182,40 @@ interface XML {
parse(text: string, options?: XMLParseOptions): any
}
+interface HTMLTableToJSONOptions {
+ useFirstRowForHeadings?: boolean
+ headers?: HeaderRows
+ stripHtmlFromHeadings?: boolean
+ stripHtmlFromCells?: boolean
+ stripHtml?: boolean | null
+ forceIndexAsNumber?: boolean
+ countDuplicateHeadings?: boolean
+ ignoreColumns?: number[] | null
+ onlyColumns?: number[] | null
+ ignoreHiddenRows?: boolean
+ id?: string[] | null
+ headings?: string[] | null
+ containsClasses?: string[] | null
+ limitrows?: number | null
+}
+
interface HTML {
+ /**
+ * Converts all HTML tables to JSON.
+ * @param html
+ * @param options
+ */
+ convertTablesToJSON(
+ html: string,
+ options?: HTMLTableToJSONOptions
+ ): object[][]
/**
* Converts HTML markup to plain text
* @param html
*/
convertToText(html: string): string
/**
- * Converts HMTL markup to markdown
+ * Converts HTML markup to markdown
* @param html
*/
convertToMarkdown(html: string): string
diff --git a/yarn.lock b/yarn.lock
index 8bcbb978c3..d9f3560980 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -1450,7 +1450,7 @@ cheerio-select@^2.1.0:
domhandler "^5.0.3"
domutils "^3.0.1"
-cheerio@^1.0.0-rc.12, cheerio@^1.0.0-rc.9:
+cheerio@^1.0.0, cheerio@^1.0.0-rc.12, cheerio@^1.0.0-rc.9:
version "1.0.0"
resolved "https://registry.yarnpkg.com/cheerio/-/cheerio-1.0.0.tgz#1ede4895a82f26e8af71009f961a9b8cb60d6a81"
integrity sha512-quS9HgjQpdaXOvsZz82Oz7uxtXiy6UIsIQcpBj7HRw2M63Skasm9qlDocAM7jNuaxdhpPU7c4kJN+gA5MCu4ww==
@@ -4591,7 +4591,16 @@ streamx@^2.15.0, streamx@^2.18.0:
optionalDependencies:
bare-events "^2.2.0"
-"string-width-cjs@npm:string-width@^4.2.0", "string-width@^1.0.2 || 2 || 3 || 4", string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3:
+"string-width-cjs@npm:string-width@^4.2.0":
+ version "4.2.3"
+ resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010"
+ integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==
+ dependencies:
+ emoji-regex "^8.0.0"
+ is-fullwidth-code-point "^3.0.0"
+ strip-ansi "^6.0.1"
+
+"string-width@^1.0.2 || 2 || 3 || 4", string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3:
version "4.2.3"
resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010"
integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==
@@ -4628,7 +4637,14 @@ string_decoder@~1.1.1:
dependencies:
safe-buffer "~5.1.0"
-"strip-ansi-cjs@npm:strip-ansi@^6.0.1", strip-ansi@^6.0.0, strip-ansi@^6.0.1:
+"strip-ansi-cjs@npm:strip-ansi@^6.0.1":
+ version "6.0.1"
+ resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9"
+ integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==
+ dependencies:
+ ansi-regex "^5.0.1"
+
+strip-ansi@^6.0.0, strip-ansi@^6.0.1:
version "6.0.1"
resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9"
integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==
@@ -4731,6 +4747,13 @@ svgpath@^2.1.5:
resolved "https://registry.yarnpkg.com/svgpath/-/svgpath-2.6.0.tgz#5b160ef3d742b7dfd2d721bf90588d3450d7a90d"
integrity sha512-OIWR6bKzXvdXYyO4DK/UWa1VA1JeKq8E+0ug2DG98Y/vOmMpfZNj+TIG988HjfYSqtcy/hFOtZq/n/j5GSESNg==
+tabletojson@^4.1.4:
+ version "4.1.4"
+ resolved "https://registry.yarnpkg.com/tabletojson/-/tabletojson-4.1.4.tgz#b72d23c22f13b782ae4fa8c1da0b388010abc31d"
+ integrity sha512-Y2QpSgQk2ORLROr82dNIXbFKGJkqf8gwgGZ7pCFungVy3UCxaWb8xZT9yb3SHJsqVV6f8utLdbBJ4GVhPEPXog==
+ dependencies:
+ cheerio "^1.0.0-rc.12"
+
tar-fs@^2.0.0:
version "2.1.1"
resolved "https://registry.yarnpkg.com/tar-fs/-/tar-fs-2.1.1.tgz#489a15ab85f1f0befabb370b7de4f9eb5cbe8784"
@@ -5263,7 +5286,7 @@ word-wrap@^1.2.5:
resolved "https://registry.yarnpkg.com/word-wrap/-/word-wrap-1.2.5.tgz#d2c45c6dd4fbce621a66f136cbe328afd0410b34"
integrity sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA==
-"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0", wrap-ansi@^7.0.0:
+"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0":
version "7.0.0"
resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43"
integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==
@@ -5281,6 +5304,15 @@ wrap-ansi@^6.2.0:
string-width "^4.1.0"
strip-ansi "^6.0.0"
+wrap-ansi@^7.0.0:
+ version "7.0.0"
+ resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43"
+ integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==
+ dependencies:
+ ansi-styles "^4.0.0"
+ string-width "^4.1.0"
+ strip-ansi "^6.0.0"
+
wrap-ansi@^8.1.0:
version "8.1.0"
resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-8.1.0.tgz#56dc22368ee570face1b49819975d9b9a5ead214"