diff --git a/apps/browser/src/pages/Dojo.tsx b/apps/browser/src/pages/Dojo.tsx index e2b4281d..9776a19b 100644 --- a/apps/browser/src/pages/Dojo.tsx +++ b/apps/browser/src/pages/Dojo.tsx @@ -172,9 +172,10 @@ function Dojo() { const userWorkspace = new EvoCore.InMemoryWorkspace(); setUserWorkspace(userWorkspace); + const contextWindow = new EvoCore.ContextWindow(llm); const chat = new EvoCore.Chat( - llm, cl100k_base, + contextWindow, logger ); diff --git a/apps/cli/src/app.ts b/apps/cli/src/app.ts index 420e278a..1d209375 100644 --- a/apps/cli/src/app.ts +++ b/apps/cli/src/app.ts @@ -11,6 +11,7 @@ import { Timeout, Workspace, LlmApi, + ContextWindow, } from "@evo-ninja/agent-utils"; import dotenv from "dotenv"; import readline from "readline"; @@ -90,7 +91,8 @@ export function createApp(config?: AppConfig): App { config?.userWorkspace ?? new FileSystemWorkspace(workspacePath); // Chat - const chat = new Chat(llm, cl100k_base, logger); + const contextWindow = new ContextWindow(llm); + const chat = new Chat(cl100k_base, contextWindow, logger); // Debug Logging let debugLog: DebugLog | undefined; diff --git a/apps/cli/src/diagnostic/DebugLlmApi.ts b/apps/cli/src/diagnostic/DebugLlmApi.ts index b8105435..54c8099b 100644 --- a/apps/cli/src/diagnostic/DebugLlmApi.ts +++ b/apps/cli/src/diagnostic/DebugLlmApi.ts @@ -1,7 +1,7 @@ import { DebugLog } from "./DebugLog"; import { Timer } from "./Timer"; -import { Chat, LlmApi, LlmOptions, ChatMessage } from "@evo-ninja/agent-utils"; +import { LlmApi, LlmOptions, ChatLogs, ChatMessage } from "@evo-ninja/agent-utils"; export class DebugLlmApi implements LlmApi { constructor( @@ -18,7 +18,7 @@ export class DebugLlmApi implements LlmApi { } async getResponse( - chat: Chat, + chatLogs: ChatLogs, functionDefinitions: any[], options?: LlmOptions | undefined ): Promise { @@ -28,7 +28,7 @@ export class DebugLlmApi implements LlmApi { time.start(); const resp = await this.llm.getResponse( - chat, + chatLogs, functionDefinitions, options ); @@ -36,7 +36,7 @@ export class DebugLlmApi implements LlmApi { time.end(); this.debugLog.stepLlmReq( time, - chat.export(), + chatLogs.clone(), resp ); diff --git a/apps/cli/src/diagnostic/DebugLlmReq.ts b/apps/cli/src/diagnostic/DebugLlmReq.ts index 6f79d34d..ca50a6c8 100644 --- a/apps/cli/src/diagnostic/DebugLlmReq.ts +++ b/apps/cli/src/diagnostic/DebugLlmReq.ts @@ -1,28 +1,32 @@ import { Timer } from "./Timer"; -import { ChatMessageLog, ChatMessage } from "@evo-ninja/agent-utils"; +import { ChatLogs, ChatMessage } from "@evo-ninja/agent-utils"; export class DebugLlmReq { constructor( public time: Timer, - public chat: ChatMessageLog, + public chatLogs: ChatLogs, public response?: ChatMessage ) { } - get tokens() { - return this.chat["persistent"].tokens + - this.chat["temporary"].tokens; + get tokens(): number { + return this.chatLogs.tokens; } toString(): string { return JSON.stringify(this.toJSON(), null, 2); } - toJSON() { + toJSON(): { + time: Timer; + tokens: number; + chat: ChatLogs; + response?: ChatMessage; + } { return { time: this.time, tokens: this.tokens, - chat: this.chat, + chat: this.chatLogs, response: this.response }; } diff --git a/apps/cli/src/diagnostic/DebugLog.ts b/apps/cli/src/diagnostic/DebugLog.ts index 3b9dab11..135b85d6 100644 --- a/apps/cli/src/diagnostic/DebugLog.ts +++ b/apps/cli/src/diagnostic/DebugLog.ts @@ -1,7 +1,7 @@ import { Timer } from "./Timer"; import { DebugLlmReq } from "./DebugLlmReq"; -import { ChatMessageLog, ChatMessage, Workspace } from "@evo-ninja/agent-utils"; +import { ChatLogs, ChatMessage, Workspace } from "@evo-ninja/agent-utils"; interface DebugGoal { prompt: string; @@ -77,8 +77,8 @@ export class DebugLog { this.save(); } - stepLlmReq(time: Timer, chat: ChatMessageLog, response?: ChatMessage): void { - const req = new DebugLlmReq(time, chat, response); + stepLlmReq(time: Timer, chatLogs: ChatLogs, response?: ChatMessage): void { + const req = new DebugLlmReq(time, chatLogs, response); this.goal.llmReqs += 1; this.goal.tokens += req.tokens; this._latestStep.llmReqs.push(req); @@ -89,7 +89,10 @@ export class DebugLog { return JSON.stringify(this.toJSON(), null, 2); } - toJSON() { + toJSON(): { + goal: DebugGoal; + steps: DebugStep[]; + } { return { goal: this.goal, steps: this.steps, diff --git a/apps/cli/src/diagnostic/Timer.ts b/apps/cli/src/diagnostic/Timer.ts index 50aa9b6d..74e89644 100644 --- a/apps/cli/src/diagnostic/Timer.ts +++ b/apps/cli/src/diagnostic/Timer.ts @@ -26,11 +26,11 @@ export class Timer { return `${this._pad(hours)}:${this._pad(minutes)}:${this._pad(seconds)}`; } - toString() { + toString(): string { return this.getHHMMSS(); } - toJSON() { + toJSON(): string { return this.toString(); } diff --git a/packages/agent-utils/src/agent/basicFunctionCallLoop.ts b/packages/agent-utils/src/agent/basicFunctionCallLoop.ts index 17ea6db2..2111fd44 100644 --- a/packages/agent-utils/src/agent/basicFunctionCallLoop.ts +++ b/packages/agent-utils/src/agent/basicFunctionCallLoop.ts @@ -27,7 +27,7 @@ export async function* basicFunctionCallLoop f.definition); - const response = await llm.getResponse(chat, functionDefinitions); + const response = await llm.getResponse(chat.chatLogs, functionDefinitions); if (!response) { return ResultErr("No response from LLM."); diff --git a/packages/agent-utils/src/llm/Chat.ts b/packages/agent-utils/src/llm/Chat.ts index 2a985a53..50162793 100644 --- a/packages/agent-utils/src/llm/Chat.ts +++ b/packages/agent-utils/src/llm/Chat.ts @@ -1,64 +1,31 @@ -import { LlmApi, Tokenizer } from "."; -import { Logger } from "../sys"; - import { - ChatCompletionRequestMessageRoleEnum, - ChatCompletionRequestMessage as ChatMessage -} from "openai"; - -export { ChatMessage }; - -export type MessageType = - | "persistent" - | "temporary"; + Tokenizer, + ChatLogs, + ChatMessage, + ChatLogType, + ContextWindow +} from "."; +import { Logger } from "../sys"; -interface MessageLog { - tokens: number; - msgs: ChatMessage[]; -} +import { ChatCompletionRequestMessageRoleEnum } from "openai"; export type ChatRole = ChatCompletionRequestMessageRoleEnum; -export type ChatMessageLog = Record; - export class Chat { - private _msgLogs: ChatMessageLog = { - "persistent": { - tokens: 0, - msgs: [] - }, - "temporary": { - tokens: 0, - msgs: [] - } - }; - private _maxContextTokens: number; - private _summaryTokens: number; - private _chunkTokens: number; + private _chatLogs: ChatLogs; constructor( - private _llm: LlmApi, private _tokenizer: Tokenizer, + private _contextWindow?: ContextWindow, private _logger?: Logger, - ) { - this._maxContextTokens = this._llm.getMaxContextTokens(); + ) { } - // Summary size should be ~10% of total tokens - const summaryPerc = 0.10; - this._summaryTokens = Math.floor( - this._maxContextTokens * summaryPerc - ); - - // Chunk size should be ~70% of total tokens - const chunkPerc = 0.7; - this._chunkTokens = Math.floor( - this._maxContextTokens * chunkPerc - ); + get chatLogs(): ChatLogs { + return this._chatLogs; } get tokens(): number { - return this._msgLogs["persistent"].tokens + - this._msgLogs["temporary"].tokens; + return this._chatLogs.tokens; } get tokenizer(): Tokenizer { @@ -66,30 +33,30 @@ export class Chat { } get messages(): ChatMessage[] { - return [ - ...this._msgLogs["persistent"].msgs, - ...this._msgLogs["temporary"].msgs - ]; + return this._chatLogs.messages; } public add( - type: MessageType, + type: ChatLogType, msg: ChatMessage | ChatMessage[] ) { - const msgLog = this._msgLogs[type]; let msgs = Array.isArray(msg) ? msg : [msg]; for (const msg of msgs) { const tokens = this._tokenizer.encode(msg.content || "").length; // If the message is larger than the context window - if (tokens > this._chunkTokens) { - const chunked = this._chunk(msg); - msgLog.tokens += chunked.tokens; - msgLog.msgs.push(...chunked.msgs); + if (this._contextWindow?.shouldChunk(tokens)) { + const chunked = this._contextWindow.chunk( + msg, + this._tokenizer + ); + this._chatLogs.add(type, chunked); } else { - msgLog.tokens += tokens; - msgLog.msgs.push(msg); + this._chatLogs.add(type, { + tokens, + msgs: [msg] + }); } } } @@ -125,134 +92,36 @@ export class Chat { } } - public async fitToContextWindow(): Promise { - if (this.tokens < this._maxContextTokens) { - return; - } - - this._logger?.error(`! Max Tokens Exceeded (${this.tokens} / ${this._maxContextTokens})`); - - // Start with "temporary" messages - await this._summarize("temporary"); - - if (this.tokens < this._maxContextTokens) { - return; - } - - // Move onto "persistent" messages - await this._summarize("persistent"); - } - - public export(): ChatMessageLog { - return JSON.parse(JSON.stringify(this._msgLogs)); + public cloneChatLogs(): ChatLogs { + return this._chatLogs.clone(); } - public toString() { + public toString(): string { return JSON.stringify(this, null, 2); } - public toJSON() { - return this._msgLogs; - } - - private _chunk(msg: ChatMessage): MessageLog { - const chunks: MessageLog = { - tokens: 0, - msgs: [] - }; - let content = msg.content || ""; - - while (content.length > 0) { - // Slice a chunk - const contentChunk = content.slice(0, this._chunkTokens); - - // Append the chunk - chunks.tokens += this._tokenizer.encode(contentChunk).length; - chunks.msgs.push({ - ...msg, - content: contentChunk - }); - - // Remove the chunk - content = content.slice(this._chunkTokens); - } - - return chunks; + public toJSON(): ChatLogs { + return this._chatLogs; } - private async _summarize( - msgType: MessageType - ): Promise { - const msgLog = this._msgLogs[msgType]; - - const message = await this._summarizeMessages(msgLog.msgs); - - if (!message) { - return; + public async fitToContextWindow(): Promise { + if (!this._contextWindow) { + return Promise.resolve(); } - const tokens = this._tokenizer.encode(message.content || "").length; - - this._msgLogs[msgType] = { - tokens, - msgs: [message] - }; - } - - private async _summarizeMessages( - msgs: ChatMessage[] - ): Promise { - let result: ChatMessage | undefined; - let queue = msgs; - - // While we still have more than 1 message to summarize - while (queue.length > 1) { - // Aggregate as many messages as possible, - // based on max size of the context window - const toSummarize: ChatMessage[] = []; - let tokenCounter = 0; - let index = 0; - - while (index < queue.length) { - const msg = queue[index]; - const content = msg.content || ""; - const contentTokens = this._tokenizer.encode(content).length; - - if ((tokenCounter + contentTokens) > (this._maxContextTokens - this._summaryTokens)) { - break; - } - - toSummarize.push(msg); - tokenCounter += this._tokenizer.encode(content).length; - index++; - } - - // Summarize - const message = await this._llm.getResponse( - this, - [], - { - temperature: 0, - max_tokens: this._summaryTokens - } - ); - - // Remove messages from the queue - queue = queue.splice(index); - - // Add the new message to the queue - if (message) { - queue = [ - message, - ...queue - ]; - } + if (!this._contextWindow.shouldSummarize(this.tokens)) { + return Promise.resolve(); } - if (queue.length > 0) { - result = queue[0]; - } + this._logger?.error( + `! Max Tokens Exceeded (${ + this.tokens} / ${this._contextWindow.maxContextTokens + })` + ); - return result; + this._chatLogs = await this._contextWindow.summarizeChat( + this._chatLogs, + this._tokenizer + ); } } diff --git a/packages/agent-utils/src/llm/ChatLogs.ts b/packages/agent-utils/src/llm/ChatLogs.ts new file mode 100644 index 00000000..2d9df56e --- /dev/null +++ b/packages/agent-utils/src/llm/ChatLogs.ts @@ -0,0 +1,66 @@ +import { ChatCompletionRequestMessage as ChatMessage } from "openai"; + +export { ChatMessage }; + +export type ChatLogType = + | "persistent" + | "temporary"; + +export interface ChatLog { + tokens: number; + msgs: ChatMessage[]; +} + +export class ChatLogs { + private _logs: Record = { + "persistent": { + tokens: 0, + msgs: [], + }, + "temporary": { + tokens: 0, + msgs: [], + }, + }; + + constructor(logs?: Record) { + if (logs) { + this._logs = logs; + } + } + + get tokens(): number { + return this._logs["persistent"].tokens + + this._logs["temporary"].tokens; + } + + get messages(): ChatMessage[] { + return [ + ...this._logs["persistent"].msgs, + ...this._logs["temporary"].msgs + ]; + } + + public get(type: ChatLogType): ChatLog { + return this._logs[type]; + } + + public add(type: ChatLogType, log: ChatLog) { + this._logs[type].tokens += log.tokens; + this._logs[type].msgs.push(...log.msgs); + } + + public clone(): ChatLogs { + return new ChatLogs( + JSON.parse(JSON.stringify(this._logs)) + ); + } + + public toString(): string { + return JSON.stringify(this, null, 2); + } + + public toJSON(): Record { + return this._logs; + } +} diff --git a/packages/agent-utils/src/llm/ContextWindow.ts b/packages/agent-utils/src/llm/ContextWindow.ts new file mode 100644 index 00000000..93880ae7 --- /dev/null +++ b/packages/agent-utils/src/llm/ContextWindow.ts @@ -0,0 +1,176 @@ +import { + ChatLog, + ChatLogs, + ChatMessage, + LlmApi, + Tokenizer +} from "."; + +export class ContextWindow { + private _maxContextTokens: number; + private _summaryTokens: number; + private _chunkTokens: number; + + constructor( + private _llm: LlmApi + ) { + this._maxContextTokens = this._llm.getMaxContextTokens(); + + // Summary size should be ~10% of total tokens + const summaryPerc = 0.10; + this._summaryTokens = Math.floor( + this._maxContextTokens * summaryPerc + ); + + // Chunk size should be ~70% of total tokens + const chunkPerc = 0.7; + this._chunkTokens = Math.floor( + this._maxContextTokens * chunkPerc + ); + } + + public get maxContextTokens(): number { + return this._maxContextTokens; + } + + public shouldChunk(tokens: number): boolean { + return tokens > this._chunkTokens; + } + + public chunk(msg: ChatMessage, tokenizer: Tokenizer): ChatLog { + const chunks: ChatLog = { + tokens: 0, + msgs: [] + }; + let content = msg.content || ""; + + while (content.length > 0) { + // Slice a chunk + const contentChunk = content.slice(0, this._chunkTokens); + + // Append the chunk + chunks.tokens += tokenizer.encode(contentChunk).length; + chunks.msgs.push({ + ...msg, + content: contentChunk + }); + + // Remove the chunk + content = content.slice(this._chunkTokens); + } + + return chunks; + } + + public shouldSummarize(tokens: number): boolean { + return tokens >= this._maxContextTokens; + } + + public async summarizeChat( + chatLogs: ChatLogs, + tokenizer: Tokenizer + ): Promise { + // Start with "temporary" messages + const sumTemporary = await this._summarize( + chatLogs.get("temporary"), + tokenizer + ); + + let newChatLogs = new ChatLogs({ + "persistent": chatLogs.get("persistent"), + "temporary": sumTemporary + }); + + if (newChatLogs.tokens < this._maxContextTokens) { + return newChatLogs; + } + + // Move onto "persistent" messages + const sumPersistent = await this._summarize( + chatLogs.get("persistent"), + tokenizer + ); + + return new ChatLogs({ + "persistent": sumPersistent, + "temporary": sumTemporary + }); + } + + private async _summarize( + chatLog: ChatLog, + tokenizer: Tokenizer + ): Promise { + const message = await this._summarizeMessages(chatLog, tokenizer); + + const tokens = tokenizer.encode(message?.content || "").length; + + return { + tokens, + msgs: message ? [message] : [] + }; + } + + private async _summarizeMessages( + chatLog: ChatLog, + tokenizer: Tokenizer + ): Promise { + let result: ChatMessage | undefined; + let queue = chatLog.msgs; + + // While we still have more than 1 message to summarize + while (queue.length > 1) { + // Aggregate as many messages as possible, + // based on max size of the context window + const toSummarize: ChatMessage[] = []; + let tokenCounter = 0; + let index = 0; + + while (index < queue.length) { + const msg = queue[index]; + const content = msg.content || ""; + const contentTokens = tokenizer.encode(content).length; + + if ((tokenCounter + contentTokens) > (this._maxContextTokens - this._summaryTokens)) { + break; + } + + toSummarize.push(msg); + tokenCounter += contentTokens + index++; + } + + // Summarize + const toSummarizeLogs = new ChatLogs(); + toSummarizeLogs.add("persistent", { + msgs: toSummarize, + tokens: tokenCounter + }); + const message = await this._llm.getResponse( + toSummarizeLogs, + [], + { + temperature: 0, + max_tokens: this._summaryTokens + } + ); + + // Remove messages from the queue + queue = queue.splice(index); + + // Add the new message to the queue + if (message) { + queue = [ + message, + ...queue + ]; + } + } + + if (queue.length > 0) { + result = queue[0]; + } + + return result; + } +} diff --git a/packages/agent-utils/src/llm/llm.ts b/packages/agent-utils/src/llm/LlmApi.ts similarity index 86% rename from packages/agent-utils/src/llm/llm.ts rename to packages/agent-utils/src/llm/LlmApi.ts index 81d930ec..4bd502fd 100644 --- a/packages/agent-utils/src/llm/llm.ts +++ b/packages/agent-utils/src/llm/LlmApi.ts @@ -1,4 +1,4 @@ -import { Chat, ChatMessage, OpenAIFunctions } from "."; +import { ChatLogs, ChatMessage, OpenAIFunctions } from "."; export declare const LlmRoles: { readonly System: "system"; @@ -17,7 +17,7 @@ export interface LlmApi { getMaxContextTokens(): number; getModel(): string; getResponse( - chat: Chat, + chatLog: ChatLogs, functionDefinitions: OpenAIFunctions, options?: LlmOptions ): Promise; diff --git a/packages/agent-utils/src/llm/OpenAI.ts b/packages/agent-utils/src/llm/OpenAI.ts index 5fbc3741..f3351f9b 100644 --- a/packages/agent-utils/src/llm/OpenAI.ts +++ b/packages/agent-utils/src/llm/OpenAI.ts @@ -1,4 +1,4 @@ -import { LlmApi, LlmOptions, Chat, ChatMessage } from "."; +import { LlmApi, LlmOptions, ChatLogs, ChatMessage } from "."; import { Logger } from "../"; import { @@ -50,14 +50,14 @@ export class OpenAI implements LlmApi { } async getResponse( - chat: Chat, + chatLog: ChatLogs, functionDefinitions: OpenAIFunctions, options?: LlmOptions, tries?: number ): Promise { try { const completion = await this._createChatCompletion({ - messages: chat.messages, + messages: chatLog.messages, functions: functionDefinitions, temperature: options ? options.temperature : 0, max_tokens: options ? options.max_tokens : this._defaultMaxResponseTokens @@ -92,7 +92,7 @@ export class OpenAI implements LlmApi { if (!tries || tries < this._maxRateLimitRetries) { return this.getResponse( - chat, + chatLog, functionDefinitions, options, tries === undefined ? 0 : ++tries diff --git a/packages/agent-utils/src/llm/index.ts b/packages/agent-utils/src/llm/index.ts index 6fb938c8..449a5618 100644 --- a/packages/agent-utils/src/llm/index.ts +++ b/packages/agent-utils/src/llm/index.ts @@ -1,4 +1,6 @@ export * from "./Chat"; -export * from "./llm"; +export * from "./ChatLogs"; +export * from "./ContextWindow"; +export * from "./LlmApi"; export * from "./OpenAI"; export * from "./Tokenizer"; diff --git a/packages/evo/src/Evo.ts b/packages/evo/src/Evo.ts index da60af4c..d344ff4c 100644 --- a/packages/evo/src/Evo.ts +++ b/packages/evo/src/Evo.ts @@ -19,7 +19,8 @@ import { Timeout, InMemoryWorkspace, executeAgentFunction, - basicFunctionCallLoop + basicFunctionCallLoop, + ContextWindow } from "@evo-ninja/agent-utils"; import { ScriptWriter } from "@evo-ninja/js-script-writer-agent"; import { ResultErr } from "@polywrap/result"; @@ -54,7 +55,8 @@ export class Evo implements Agent { const { chat } = this.context; const createScriptWriter = (): ScriptWriter => { const workspace = new InMemoryWorkspace(); - const chat = new Chat(this.llm, this.chat.tokenizer, this.logger); + const contextWindow = new ContextWindow(this.llm); + const chat = new Chat(this.chat.tokenizer, contextWindow, this.logger); return new ScriptWriter(this.llm, chat, this.logger, workspace); }; diff --git a/packages/evo/src/__tests__/llm.spec.ts b/packages/evo/src/__tests__/llm.spec.ts index 0cbba0c1..05f088ce 100644 --- a/packages/evo/src/__tests__/llm.spec.ts +++ b/packages/evo/src/__tests__/llm.spec.ts @@ -5,7 +5,8 @@ import { Logger, Env, ChatRole, - ChatMessage + ChatMessage, + ContextWindow } from "@evo-ninja/agent-utils"; import dotenv from "dotenv"; import cl100k_base from "gpt-tokenizer/cjs/encoding/cl100k_base"; @@ -64,8 +65,8 @@ describe('LLM Test Suite', () => { env.MAX_RESPONSE_TOKENS, logger ); - - const chat = new Chat(llm, cl100k_base, logger); + const contextWindow = new ContextWindow(llm); + const chat = new Chat(cl100k_base, contextWindow, logger); for (const msg of msgs.persistent.msgs) { chat.persistent(msg.role as ChatRole, msg.content); @@ -77,8 +78,8 @@ describe('LLM Test Suite', () => { const currentFunctions = agentFunctions(() => ({}) as any); - const response = await llm.getResponse(chat, currentFunctions.map(f => f.definition)); - + const response = await llm.getResponse(chat.chatLogs, currentFunctions.map(f => f.definition)); + expect(response).toEqual({ role: "assistant", function_call: {