chore: added 3 dev tests

agentcoinorg · Oct 3, 2023 · 6eb711a · 6eb711a
1 parent 99a48c6
commit 6eb711a
Show file tree

Hide file tree

Showing 18 changed files with 181 additions and 88 deletions.
diff --git a/apps/cli/package.json b/apps/cli/package.json
@@ -16,6 +16,7 @@
   },
   "dependencies": {
     "@evo-ninja/evo-agent": "~0.1.0",
+    "@evo-ninja/agent-debug": "~0.1.0",
     "@evo-ninja/agent-utils": "~0.1.0",
     "@evo-ninja/agent-utils-fs": "~0.1.0",
     "forked-agent-protocol": "0.0.5",

diff --git a/apps/cli/src/app.ts b/apps/cli/src/app.ts
@@ -1,5 +1,3 @@
-import { DebugLog, DebugLlmApi } from "./diagnostic";
-
 import { Evo } from "@evo-ninja/evo-agent";
 import {
   Env,
@@ -13,6 +11,7 @@ import {
   LlmApi,
   ContextWindow,
 } from "@evo-ninja/agent-utils";
+import { DebugLog, DebugLlmApi } from "@evo-ninja/agent-debug";
 import { FileSystemWorkspace, FileLogger } from "@evo-ninja/agent-utils-fs";
 import dotenv from "dotenv";
 import readline from "readline";

diff --git a/package.json b/package.json
@@ -17,9 +17,10 @@
     "start": "yarn workspace evo-ninja run start",
     "start:browser": "yarn workspace @evo-ninja/ui run start",
     "start:api": "yarn workspace evo-ninja run start:api",
-    "build": "yarn build:agent-utils && yarn build:agents && yarn build:cli && yarn build:browser",
+    "build": "yarn build:agent-utils && yarn build:agent-debug && yarn build:agents && yarn build:cli && yarn build:browser",
     "build:agent-utils": "yarn workspace @evo-ninja/agent-utils run build && yarn build:agent-utils-fs",
     "build:agent-utils-fs": "yarn workspace @evo-ninja/agent-utils-fs run build",
+    "build:agent-debug": "yarn workspace @evo-ninja/agent-debug run build",
     "build:agents": "yarn build:script-writer && yarn build:dev && yarn build:evo",
     "build:evo": "yarn workspace @evo-ninja/evo-agent run build",
     "build:dev": "yarn workspace @evo-ninja/dev-agent run build",
@@ -28,6 +29,6 @@
     "build:browser": "yarn workspace @evo-ninja/ui run build"
   },
   "devDependencies": {
-    "rimraf": "5.0.1"
+    "rimraf": "~5.0.1"
   }
 }
diff --git a/packages/agent-debug/package.json b/packages/agent-debug/package.json
@@ -0,0 +1,17 @@
+{
+  "name": "@evo-ninja/agent-debug",
+  "version": "0.1.0",
+  "license": "MIT",
+  "main": "./build/index.js",
+  "scripts": {
+    "build": "rimraf build && tsc"
+  },
+  "dependencies": {
+    "@evo-ninja/agent-utils": "~0.1.0"
+  },
+  "devDependencies": {
+    "rimraf": "~5.0.1",
+    "ts-node": "10.9.1",
+    "typescript": "4.9.5"
+  }
+}
diff --git a/apps/cli/src/diagnostic/DebugLlmApi.ts → packages/agent-debug/src/DebugLlmApi.ts b/apps/cli/src/diagnostic/DebugLlmApi.ts → packages/agent-debug/src/DebugLlmApi.ts
@@ -22,8 +22,6 @@ export class DebugLlmApi implements LlmApi {
     functionDefinitions: any[],
     options?: LlmOptions | undefined
   ): Promise<ChatMessage | undefined> {
-    console.log(this.getModel());
-
     const time = new Timer();
     time.start();
 

diff --git a/apps/cli/src/diagnostic/DebugLlmReq.ts → packages/agent-debug/src/DebugLlmReq.ts b/apps/cli/src/diagnostic/DebugLlmReq.ts → packages/agent-debug/src/DebugLlmReq.ts
diff --git a/apps/cli/src/diagnostic/DebugLog.ts → packages/agent-debug/src/DebugLog.ts b/apps/cli/src/diagnostic/DebugLog.ts → packages/agent-debug/src/DebugLog.ts
diff --git a/apps/cli/src/diagnostic/Timer.ts → packages/agent-debug/src/Timer.ts b/apps/cli/src/diagnostic/Timer.ts → packages/agent-debug/src/Timer.ts
diff --git a/apps/cli/src/diagnostic/index.ts → packages/agent-debug/src/index.ts b/apps/cli/src/diagnostic/index.ts → packages/agent-debug/src/index.ts
diff --git a/packages/agent-debug/tsconfig.json b/packages/agent-debug/tsconfig.json
@@ -0,0 +1,10 @@
+{
+  "extends": "../../tsconfig",
+  "compilerOptions": {
+    "outDir": "build"
+  },
+  "include": [
+    "./src/**/*.ts"
+  ],
+  "exclude": []
+}
diff --git a/packages/agent-utils/src/agent/basicFunctionCallLoop.ts b/packages/agent-utils/src/agent/basicFunctionCallLoop.ts
@@ -45,12 +45,12 @@ export async function* basicFunctionCallLoop<TContext extends { llm: LlmApi, cha
 
       result.value.messages.forEach(x => chat.temporary(x));
 
+      const terminate = functionCalled && shouldTerminate(functionCalled, result);
+
       for (let i = 0; i < result.value.outputs.length; i++) {
         const output = result.value.outputs[i];
 
-        if (i === result.value.outputs.length - 1 &&
-          functionCalled && shouldTerminate(functionCalled, result)
-        ) {
+        if (i === result.value.outputs.length - 1 && terminate) {
           return ResultOk(output);
         }
 

diff --git a/packages/dev/.gitignore b/packages/dev/.gitignore
@@ -0,0 +1 @@
+src/__tests__/test-cases/
diff --git a/packages/dev/package.json b/packages/dev/package.json
@@ -9,11 +9,11 @@
   },
   "dependencies": {
     "@polywrap/result": "~0.12.0",
-    "@evo-ninja/agent-utils": "~0.1.0",
-    "openai": "~3.3.0"
+    "@evo-ninja/agent-utils": "~0.1.0"
   },
   "devDependencies": {
     "@types/jest": "29.5.0",
+    "@evo-ninja/agent-debug": "~0.1.0",
     "@evo-ninja/agent-utils-fs": "~0.1.0",
     "gpt-tokenizer": "~2.1.1",
     "jest": "29.5.0",

diff --git a/packages/dev/src/__tests__/dev-agent.spec.ts b/packages/dev/src/__tests__/dev-agent.spec.ts
@@ -1,3 +1,5 @@
+import { DevAgent } from "../DevAgent";
+
 import {
   Env,
   Scripts,
@@ -6,16 +8,14 @@ import {
   ContextWindow,
   LlmApi,
   ConsoleLogger,
-  Logger,
-  InMemoryWorkspace
+  Logger
 } from "@evo-ninja/agent-utils";
-import {
-  FileSystemWorkspace
-} from "@evo-ninja/agent-utils-fs";
+import { FileSystemWorkspace } from "@evo-ninja/agent-utils-fs";
+import { DebugLog, DebugLlmApi } from "@evo-ninja/agent-debug";
+import * as rimraf from "rimraf";
 import dotenv from "dotenv";
 import path from "path";
 import cl100k_base from "gpt-tokenizer/cjs/encoding/cl100k_base";
-import { DevAgent } from "../DevAgent";
 
 dotenv.config({
   path: path.join(__dirname, "../../../../.env")
@@ -25,7 +25,15 @@ jest.setTimeout(120000);
 
 describe('Dev Agent Test Suite', () => {
 
-  function createDevAgent(): DevAgent {
+  function createDevAgent(testName: string): {
+    agent: DevAgent;
+    debugLog: DebugLog;
+  } {
+    const testCaseDir = path.join(__dirname, "test-cases", testName);
+
+    // reset the dir
+    rimraf.sync(testCaseDir);
+
     const env = new Env(process.env as Record<string, string>);
     const logger = new Logger([new ConsoleLogger()], {
       promptUser: () => {
@@ -44,6 +52,11 @@ describe('Dev Agent Test Suite', () => {
       logger
     );
 
+    const debugLog = new DebugLog(
+      new FileSystemWorkspace(path.join(testCaseDir, "./debug"))
+    );
+    const debugLlm = new DebugLlmApi(debugLog, llm);
+
     const contextWindow = new ContextWindow(llm);
     const chat = new Chat(cl100k_base, contextWindow, logger);
 
@@ -53,35 +66,76 @@ describe('Dev Agent Test Suite', () => {
     );
     const scripts = new Scripts(scriptsWorkspace, "./");
 
-    const workspace = new InMemoryWorkspace();
+    const workspace = new FileSystemWorkspace(testCaseDir);
 
-    return new DevAgent(
-      llm,
-      chat,
-      workspace,
-      scripts,
-      logger
-    );
+    return {
+      agent: new DevAgent(
+        debugLlm,
+        chat,
+        workspace,
+        scripts,
+        logger
+      ),
+      debugLog
+    };
   }
 
-  async function runDevAgent(agent: DevAgent, goal: string) {
+  async function runDevAgent(agent: DevAgent, goal: string, debugLog: DebugLog) {
+    debugLog.goalStart(goal);
     const iterator = agent.run(goal);
 
     while (true) {
+      debugLog.stepStart();
       const response = await iterator.next();
+      debugLog.stepEnd();
 
       if (response.done) {
+        if (!response.value.ok) {
+          debugLog.stepError(response.value.error ?? "Unknown error");
+        } else {
+          debugLog.stepLog(JSON.stringify(response.value.value));
+        }
         return response;
       }
     }
   }
 
-  test("tick-tack-toe in python", async () => {
-    const dev = createDevAgent();
-    const response = await runDevAgent(dev, "Build a tick-tack-toe game in python");
+  test("tic-tac-toe", async () => {
+    const { agent, debugLog } = createDevAgent("tic-tac-toe");
+    const response = await runDevAgent(
+      agent,
+      "Build a Tic-Tac-Toe game using a python CLI. Here are the specifications.\n\nThe Grid: The game board is a 3x3 grid, consisting of 3 rows and 3 columns, creating a total of 9 squares.\n\nPlayers: There are two players. One player uses the number \"1\", and the other player uses the number \"2\".\n\nTaking Turns: Players take turns to put their respective numbers (\"1\" or \"2\") in an empty square of the grid. Once a player has placed their number in a square, it cannot be changed or removed.\n\nObjective: The goal is to get three of your numbers in a row, either horizontally, vertically, or diagonally.\n\nEnd of the Game: The game concludes in one of two ways: One player gets three of their numbers in a row (horizontally, vertically, or diagonally) and is declared the winner.\nAll squares on the grid are filled, and no player has three in a row. This situation is a \"draw\" or a \"tie\".\n\nTechnical specifications:\nBuild a file called tic_tac_toe.py. This file will be called through command lines. You will have to prompt users for their move. Player 1 will always start.\nPlayers will input their move in the following format: \"x,y\" where x and y represent the location in the grid (0,0 is top left, 2,2 is bottom right).\n\nYour primary requirement is to halt the game when appropriate and to print only one of these three exact sentences:\n\n\"Player 1 won!\"\n\"Player 2 won!\"\n\"Draw\"\n\nEdge cases: A player can send an incorrect location. Either the location is incorrect or the square is already filled. In this case, this counts as doing nothing, and the player gets prompted for new locations again.\n\n\nYou will be expected to create a python file called tic_tac_toe.py that will run through command lines by using ```python tic_tac_toe.py```.\n\nHere is an example of how your tic_tac_toe.py game will be tested.\n```\nprocess = subprocess.Popen(\n    ['python', 'tic_tac_toe.py'],\n    stdout=subprocess.PIPE,\n    text=True\n)\n\noutput, _ = process.communicate('\\n'.join([\"0,0\", \"1,0\", \"0,1\", \"1,1\", \"0,2\"]))\n\nassert \"Player 1 won!\" in output\n```",
+      debugLog
+    );
+
+    expect(response.value.ok).toBe(true);
+    const sourceCode = agent.workspace.readFileSync("tic_tac_toe.py");
+    expect(sourceCode).toBeTruthy();
+  });
+
+  test("three-sum", async () => {
+    const { agent, debugLog } = createDevAgent("three-sum");
+    const response = await runDevAgent(
+      agent,
+      "Create a three_sum function in a file called sample_code.py. Given an array of integers, return indices of the three numbers such that they add up to a specific target. You may assume that each input would have exactly one solution, and you may not use the same element twice. Example: Given nums = [2, 7, 11, 15], target = 20, Because nums[0] + nums[1] + nums[2] = 2 + 7 + 11 = 20, return [0, 1, 2].",
+      debugLog
+    );
+
+    expect(response.value.ok).toBe(true);
+    const sourceCode = agent.workspace.readFileSync("sample_code.py");
+    expect(sourceCode).toBeTruthy();
+  });
+
+  test("file-organizer", async () => {
+    const { agent, debugLog } = createDevAgent("file-organizer");
+    const response = await runDevAgent(
+      agent,
+      "Create a file organizer CLI tool in Python that sorts files in a directory based on their file types (e.g., images, documents, audio) and moves them into these corresponding folders: 'images', 'documents', 'audio'. The entry point will be a python file that can be run this way: python organize_files.py --directory_path=YOUR_DIRECTORY_PATH",
+      debugLog
+    );
 
-    console.log(response);
     expect(response.value.ok).toBe(true);
-    console.log(dev.workspace.readdirSync("./"))
+    const sourceCode = agent.workspace.readFileSync("organize_files.py");
+    expect(sourceCode).toBeTruthy();
   });
 });
diff --git a/packages/dev/src/agent-functions/agent_onGoalAchieved.ts b/packages/dev/src/agent-functions/agent_onGoalAchieved.ts
@@ -4,19 +4,29 @@ import { AgentContext } from "../AgentContext";
 import {
   AgentFunction,
   AgentFunctionResult,
+  AgentOutputType
 } from "@evo-ninja/agent-utils";
 
 const FN_NAME = "agent_onGoalAchieved";
 
 const SUCCESS = (): AgentFunctionResult => ({
-  outputs: [],
+  outputs: [
+    {
+      type: AgentOutputType.Success,
+      title: "[dev] agent_onGoalAchieved"
+    }
+  ],
   messages: []
 });
 
 export const agent_onGoalAchieved: AgentFunction<AgentContext> = {
   definition: {
     name: FN_NAME,
     description: "Informs the user that the goal has been achieved.",
+    parameters: {
+      type: "object",
+      properties: { },
+    }
   },
   buildExecutor(context: AgentContext) {
     return createScriptExecutor(

diff --git a/packages/dev/src/agent-functions/agent_onGoalFailed.ts b/packages/dev/src/agent-functions/agent_onGoalFailed.ts
@@ -4,19 +4,29 @@ import { AgentContext } from "../AgentContext";
 import {
   AgentFunction,
   AgentFunctionResult,
+  AgentOutputType
 } from "@evo-ninja/agent-utils";
 
 const FN_NAME = "agent_onGoalFailed";
 
 const SUCCESS = (): AgentFunctionResult => ({
-  outputs: [],
+  outputs: [
+    {
+      type: AgentOutputType.Error,
+      title: "[dev] agent_onGoalFailed"
+    }
+  ],
   messages: []
 });
 
 export const agent_onGoalFailed: AgentFunction<AgentContext> = {
   definition: {
     name: FN_NAME,
     description: "Informs the user that the agent could not achieve the goal.",
+    parameters: {
+      type: "object",
+      properties: { },
+    }
   },
   buildExecutor(context: AgentContext) {
     return createScriptExecutor(

diff --git a/packages/dev/src/prompts.ts b/packages/dev/src/prompts.ts
@@ -1,8 +1,7 @@
 import { AgentFunctionDefinition } from "@evo-ninja/agent-utils";
 
 export const INITIAL_PROMP = (functions: AgentFunctionDefinition[]) =>
-  `You are an expert software engineer named "dev". You have access to the following functions to accomplish your goal:\n` +
-  functions.map((def) => (`${def.name}: ${def.description}`)).join("\n");
+  `You are an expert software engineer named "dev".`;
 
 export const GOAL_PROMPT = (goal: string) =>
   `You have been asked by the user to achieve the following goal: ${goal}`;