From 3077128ca8caab432800b628ea2f45642bd38772 Mon Sep 17 00:00:00 2001 From: telesoho Date: Tue, 17 Sep 2024 23:26:11 +0800 Subject: [PATCH] feat: use AI to parse content --- package.json | 14 ++++++------ src/Logger.ts | 2 +- src/ToolsManager.ts | 53 ++++++++++++++++++++----------------------- src/ai_paster.ts | 6 ++++- src/paster.ts | 43 ++++++++++++----------------------- src/toMarkdown.ts | 7 +++++- src/tool_functions.ts | 18 ++++++++++++--- 7 files changed, 73 insertions(+), 70 deletions(-) diff --git a/package.json b/package.json index b9123c4..9ecb927 100644 --- a/package.json +++ b/package.json @@ -82,23 +82,22 @@ }, "default": [ { - "model": "llama-3.1-8b-instant", + "model": "llama-3.1-70b-versatile", "messages": [ { "role": "system", "content": [ - "You are responsible for converting text content into Markdown format. ", - "If the original content is HTML, ignore any color or font settings and comments, but retain tables.", - "Only output convered content" + "You are a helpful assistant." ] }, { "role": "user", - "content": "{{clipboard_text}}" + "content": [ + "Generate consolation for the following text:", + "{{clipboard_text}}" + ] } ], - "tools": [], - "tool_choice": "auto", "max_tokens": 4096 } ], @@ -364,6 +363,7 @@ "arch": "^2.2.0", "axios": "^1.7.7", "moment": "^2.22.1", + "node-html-parser": "^6.1.13", "openai": "^4.61.0", "shelljs": "^0.8.5", "turndown": "^7.1.2", diff --git a/src/Logger.ts b/src/Logger.ts index fcfc2e8..25c66c9 100644 --- a/src/Logger.ts +++ b/src/Logger.ts @@ -8,7 +8,7 @@ export default class Logger { if (this.channel) { const time = moment().format("MM-DD HH:mm:ss"); for (const m of message) { - const logmsg = `[${time}] ${m}`; + const logmsg = `[${time}] ${m.substring(0, 256)}`; this.channel.appendLine(logmsg); } } diff --git a/src/ToolsManager.ts b/src/ToolsManager.ts index f2acb7f..e93f311 100644 --- a/src/ToolsManager.ts +++ b/src/ToolsManager.ts @@ -1,13 +1,16 @@ import { ChatCompletionTool } from "openai/resources/chat/completions"; +import { fetchWeb, htmlToMarkdown } from "./tool_functions"; import Logger from "./Logger"; -import { fetchWeb } from "./tool_functions"; +type FunctionParameters = { + [x: string]: unknown; +}; type ToolFunction = (...args: any[]) => any; interface ToolInfo { func: ToolFunction; description: string; - parameters: Record; + parameters: FunctionParameters; } export class ToolsManager { @@ -18,38 +21,32 @@ export class ToolsManager { } public registerDefaultTools() { - this.registerTool( - "get_current_weather", - async ({ city }: { city: string }) => { - return JSON.stringify({ - city: city, - temperature: "25°C", - weather: "sunny", - }); - }, - "Get the current weather for a specified city", - { - type: "object", - properties: { - city: { type: "string", description: "The name of the city" }, - }, - required: ["city"], - } - ); - this.registerTool("fetchWeb", fetchWeb, "fetch a web page content", { - type: "object", - properties: { - url: { type: "string", description: "The url of the web page" }, - }, - required: ["url"], - }); + // this.registerTool("fetchWeb", fetchWeb, "fetch a web page content", { + // type: "object", + // properties: { + // url: { type: "string", description: "The url of the web page" }, + // }, + // required: ["url"], + // }); + // this.registerTool( + // "htmlToMarkdown", + // htmlToMarkdown, + // "Conver html to markdown", + // { + // type: "object", + // properties: { + // html: { type: "string", description: "html" }, + // }, + // required: ["html"], + // } + // ); } public registerTool( name: string, func: ToolFunction, description: string, - parameters: Record + parameters: FunctionParameters ) { this.tools.set(name, { func, description, parameters }); } diff --git a/src/ai_paster.ts b/src/ai_paster.ts index 48e777d..305f732 100644 --- a/src/ai_paster.ts +++ b/src/ai_paster.ts @@ -38,12 +38,16 @@ export class AIPaster { }` ); }); + completion.tools.forEach((tool) => { + Logger.log("tool:", JSON.stringify(tool)); + }); const chatCompletion = await this.client.chat.completions.create( completion ); const responseMessages = chatCompletion.choices[0].message; const toolCalls = chatCompletion.choices[0].message.tool_calls; if (toolCalls) { + completion.messages.push(responseMessages); for (const toolCall of toolCalls) { const functionName = toolCall.function.name; const functionResponse = await this.toolsManager.executeTool( @@ -78,7 +82,7 @@ export class AIPaster { } return responseMessages.content; } catch (error) { - Logger.log("Error", error); + Logger.log("Error:", JSON.stringify(error)); throw error; } } diff --git a/src/paster.ts b/src/paster.ts index afaf492..3b67e6a 100644 --- a/src/paster.ts +++ b/src/paster.ts @@ -36,16 +36,15 @@ class Paster { } } - static async parseByAI(content: string): Promise { - if (Paster.getConfig().enableAI) { + static async parseByAI(content: string) { + if (Paster.config.enableAI) { const p = new AIPaster(); const result = await p.callAI(content); if (result.status == "success") { - Paster.writeToEditor(result.message); - return true; + await Paster.writeToEditor(result.message); } } - return false; + Paster.writeToEditor(content); } static async selectClipboardType( @@ -54,12 +53,6 @@ class Paster { if (!(type instanceof Set)) { return type; } - if (this.config.autoSelectClipboardType == "always") { - const priorityOrdering = this.config.autoSelectClipboardTypePriority; - for (const theType of priorityOrdering) - if (type.has(theType)) return theType; - return xclip.ClipboardType.Unknown; - } if ( this.config.autoSelectClipboardType == "never" || (this.config.autoSelectClipboardType == "html&text" && @@ -77,6 +70,10 @@ class Paster { } return xclip.ClipboardType.Unknown; } + const priorityOrdering = this.config.autoSelectClipboardTypePriority; + for (const theType of priorityOrdering) + if (type.has(theType)) return theType; + return xclip.ClipboardType.Unknown; } /** @@ -96,36 +93,24 @@ class Paster { case xclip.ClipboardType.Html: if (enableHtmlConverter) { const html = await cb.getTextHtml(); - if (await Paster.parseByAI(html)) { - return; - } - Logger.log(html); - const markdown = toMarkdown(html, turndownOptions); + let markdown = toMarkdown(html, turndownOptions); if (enableRulesForHtml) { - let newMarkdown = Paster.parse(markdown); - Paster.writeToEditor(newMarkdown); - } else { - Paster.writeToEditor(markdown); + markdown = Paster.parse(markdown); } + await Paster.parseByAI(markdown); } else { const text = await cb.getTextPlain(); - if (Paster.parseByAI(text)) { - return; - } if (text) { let newContent = Paster.parse(text); - Paster.writeToEditor(newContent); + await Paster.parseByAI(newContent); } } break; case xclip.ClipboardType.Text: const text = await cb.getTextPlain(); - if (await Paster.parseByAI(text)) { - return; - } if (text) { let newContent = Paster.parse(text); - Paster.writeToEditor(newContent); + await Paster.parseByAI(newContent); } break; case xclip.ClipboardType.Image: @@ -450,7 +435,7 @@ class Paster { } } - private static parse(content) { + static parse(content) { let editor = vscode.window.activeTextEditor; let fileUri = editor.document.uri; diff --git a/src/toMarkdown.ts b/src/toMarkdown.ts index 8b84f92..f7d2be3 100644 --- a/src/toMarkdown.ts +++ b/src/toMarkdown.ts @@ -46,7 +46,12 @@ function toMarkdown(content, options) { // return "\n\n" + content + "\n" + underline + "\n\n"; // }, // }, - + { + filter: ["style", "script", "head", "meta"], + replacement: function (content) { + return ""; + }, + }, { filter: "sup", replacement: function (content) { diff --git a/src/tool_functions.ts b/src/tool_functions.ts index ec564fd..33777ad 100644 --- a/src/tool_functions.ts +++ b/src/tool_functions.ts @@ -1,14 +1,26 @@ import axios from "axios"; +import { toMarkdown } from "./toMarkdown"; +import { Paster } from "./paster"; +import Logger from "./Logger"; +import * as HTMLParser from "node-html-parser"; export async function fetchWeb({ url }: { url: string }): Promise { - let content = ""; try { const response = await axios.get(url); - content = response.data; - return { content }; + let html = HTMLParser.parse(response.data); + const body = html.querySelector("body").toString(); + const title = html.querySelector("title").toString(); + return { url, title, body }; } catch (e) { return { error: e, }; } } + +export async function htmlToMarkdown({ html }: { html: string }): Promise { + Logger.log("htmlToMarkdown:", JSON.stringify(html)); + let turndownOptions = Paster.config.turndownOptions; + let content = toMarkdown(html, turndownOptions); + return { content }; +}