Skip to content

Commit

Permalink
chore: added 3 dev tests
Browse files Browse the repository at this point in the history
  • Loading branch information
dOrgJelli committed Oct 3, 2023
1 parent 99a48c6 commit 6eb711a
Show file tree
Hide file tree
Showing 18 changed files with 181 additions and 88 deletions.
1 change: 1 addition & 0 deletions apps/cli/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
},
"dependencies": {
"@evo-ninja/evo-agent": "~0.1.0",
"@evo-ninja/agent-debug": "~0.1.0",
"@evo-ninja/agent-utils": "~0.1.0",
"@evo-ninja/agent-utils-fs": "~0.1.0",
"forked-agent-protocol": "0.0.5",
Expand Down
3 changes: 1 addition & 2 deletions apps/cli/src/app.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import { DebugLog, DebugLlmApi } from "./diagnostic";

import { Evo } from "@evo-ninja/evo-agent";
import {
Env,
Expand All @@ -13,6 +11,7 @@ import {
LlmApi,
ContextWindow,
} from "@evo-ninja/agent-utils";
import { DebugLog, DebugLlmApi } from "@evo-ninja/agent-debug";
import { FileSystemWorkspace, FileLogger } from "@evo-ninja/agent-utils-fs";
import dotenv from "dotenv";
import readline from "readline";
Expand Down
5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@
"start": "yarn workspace evo-ninja run start",
"start:browser": "yarn workspace @evo-ninja/ui run start",
"start:api": "yarn workspace evo-ninja run start:api",
"build": "yarn build:agent-utils && yarn build:agents && yarn build:cli && yarn build:browser",
"build": "yarn build:agent-utils && yarn build:agent-debug && yarn build:agents && yarn build:cli && yarn build:browser",
"build:agent-utils": "yarn workspace @evo-ninja/agent-utils run build && yarn build:agent-utils-fs",
"build:agent-utils-fs": "yarn workspace @evo-ninja/agent-utils-fs run build",
"build:agent-debug": "yarn workspace @evo-ninja/agent-debug run build",
"build:agents": "yarn build:script-writer && yarn build:dev && yarn build:evo",
"build:evo": "yarn workspace @evo-ninja/evo-agent run build",
"build:dev": "yarn workspace @evo-ninja/dev-agent run build",
Expand All @@ -28,6 +29,6 @@
"build:browser": "yarn workspace @evo-ninja/ui run build"
},
"devDependencies": {
"rimraf": "5.0.1"
"rimraf": "~5.0.1"
}
}
17 changes: 17 additions & 0 deletions packages/agent-debug/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"name": "@evo-ninja/agent-debug",
"version": "0.1.0",
"license": "MIT",
"main": "./build/index.js",
"scripts": {
"build": "rimraf build && tsc"
},
"dependencies": {
"@evo-ninja/agent-utils": "~0.1.0"
},
"devDependencies": {
"rimraf": "~5.0.1",
"ts-node": "10.9.1",
"typescript": "4.9.5"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@ export class DebugLlmApi implements LlmApi {
functionDefinitions: any[],
options?: LlmOptions | undefined
): Promise<ChatMessage | undefined> {
console.log(this.getModel());

const time = new Timer();
time.start();

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
10 changes: 10 additions & 0 deletions packages/agent-debug/tsconfig.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"extends": "../../tsconfig",
"compilerOptions": {
"outDir": "build"
},
"include": [
"./src/**/*.ts"
],
"exclude": []
}
6 changes: 3 additions & 3 deletions packages/agent-utils/src/agent/basicFunctionCallLoop.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,12 @@ export async function* basicFunctionCallLoop<TContext extends { llm: LlmApi, cha

result.value.messages.forEach(x => chat.temporary(x));

const terminate = functionCalled && shouldTerminate(functionCalled, result);

for (let i = 0; i < result.value.outputs.length; i++) {
const output = result.value.outputs[i];

if (i === result.value.outputs.length - 1 &&
functionCalled && shouldTerminate(functionCalled, result)
) {
if (i === result.value.outputs.length - 1 && terminate) {
return ResultOk(output);
}

Expand Down
1 change: 1 addition & 0 deletions packages/dev/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
src/__tests__/test-cases/
4 changes: 2 additions & 2 deletions packages/dev/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
},
"dependencies": {
"@polywrap/result": "~0.12.0",
"@evo-ninja/agent-utils": "~0.1.0",
"openai": "~3.3.0"
"@evo-ninja/agent-utils": "~0.1.0"
},
"devDependencies": {
"@types/jest": "29.5.0",
"@evo-ninja/agent-debug": "~0.1.0",
"@evo-ninja/agent-utils-fs": "~0.1.0",
"gpt-tokenizer": "~2.1.1",
"jest": "29.5.0",
Expand Down
96 changes: 75 additions & 21 deletions packages/dev/src/__tests__/dev-agent.spec.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { DevAgent } from "../DevAgent";

import {
Env,
Scripts,
Expand All @@ -6,16 +8,14 @@ import {
ContextWindow,
LlmApi,
ConsoleLogger,
Logger,
InMemoryWorkspace
Logger
} from "@evo-ninja/agent-utils";
import {
FileSystemWorkspace
} from "@evo-ninja/agent-utils-fs";
import { FileSystemWorkspace } from "@evo-ninja/agent-utils-fs";
import { DebugLog, DebugLlmApi } from "@evo-ninja/agent-debug";
import * as rimraf from "rimraf";
import dotenv from "dotenv";
import path from "path";
import cl100k_base from "gpt-tokenizer/cjs/encoding/cl100k_base";
import { DevAgent } from "../DevAgent";

dotenv.config({
path: path.join(__dirname, "../../../../.env")
Expand All @@ -25,7 +25,15 @@ jest.setTimeout(120000);

describe('Dev Agent Test Suite', () => {

function createDevAgent(): DevAgent {
function createDevAgent(testName: string): {
agent: DevAgent;
debugLog: DebugLog;
} {
const testCaseDir = path.join(__dirname, "test-cases", testName);

// reset the dir
rimraf.sync(testCaseDir);

const env = new Env(process.env as Record<string, string>);
const logger = new Logger([new ConsoleLogger()], {
promptUser: () => {
Expand All @@ -44,6 +52,11 @@ describe('Dev Agent Test Suite', () => {
logger
);

const debugLog = new DebugLog(
new FileSystemWorkspace(path.join(testCaseDir, "./debug"))
);
const debugLlm = new DebugLlmApi(debugLog, llm);

const contextWindow = new ContextWindow(llm);
const chat = new Chat(cl100k_base, contextWindow, logger);

Expand All @@ -53,35 +66,76 @@ describe('Dev Agent Test Suite', () => {
);
const scripts = new Scripts(scriptsWorkspace, "./");

const workspace = new InMemoryWorkspace();
const workspace = new FileSystemWorkspace(testCaseDir);

return new DevAgent(
llm,
chat,
workspace,
scripts,
logger
);
return {
agent: new DevAgent(
debugLlm,
chat,
workspace,
scripts,
logger
),
debugLog
};
}

async function runDevAgent(agent: DevAgent, goal: string) {
async function runDevAgent(agent: DevAgent, goal: string, debugLog: DebugLog) {
debugLog.goalStart(goal);
const iterator = agent.run(goal);

while (true) {
debugLog.stepStart();
const response = await iterator.next();
debugLog.stepEnd();

if (response.done) {
if (!response.value.ok) {
debugLog.stepError(response.value.error ?? "Unknown error");
} else {
debugLog.stepLog(JSON.stringify(response.value.value));
}
return response;
}
}
}

test("tick-tack-toe in python", async () => {
const dev = createDevAgent();
const response = await runDevAgent(dev, "Build a tick-tack-toe game in python");
test("tic-tac-toe", async () => {
const { agent, debugLog } = createDevAgent("tic-tac-toe");
const response = await runDevAgent(
agent,
"Build a Tic-Tac-Toe game using a python CLI. Here are the specifications.\n\nThe Grid: The game board is a 3x3 grid, consisting of 3 rows and 3 columns, creating a total of 9 squares.\n\nPlayers: There are two players. One player uses the number \"1\", and the other player uses the number \"2\".\n\nTaking Turns: Players take turns to put their respective numbers (\"1\" or \"2\") in an empty square of the grid. Once a player has placed their number in a square, it cannot be changed or removed.\n\nObjective: The goal is to get three of your numbers in a row, either horizontally, vertically, or diagonally.\n\nEnd of the Game: The game concludes in one of two ways: One player gets three of their numbers in a row (horizontally, vertically, or diagonally) and is declared the winner.\nAll squares on the grid are filled, and no player has three in a row. This situation is a \"draw\" or a \"tie\".\n\nTechnical specifications:\nBuild a file called tic_tac_toe.py. This file will be called through command lines. You will have to prompt users for their move. Player 1 will always start.\nPlayers will input their move in the following format: \"x,y\" where x and y represent the location in the grid (0,0 is top left, 2,2 is bottom right).\n\nYour primary requirement is to halt the game when appropriate and to print only one of these three exact sentences:\n\n\"Player 1 won!\"\n\"Player 2 won!\"\n\"Draw\"\n\nEdge cases: A player can send an incorrect location. Either the location is incorrect or the square is already filled. In this case, this counts as doing nothing, and the player gets prompted for new locations again.\n\n\nYou will be expected to create a python file called tic_tac_toe.py that will run through command lines by using ```python tic_tac_toe.py```.\n\nHere is an example of how your tic_tac_toe.py game will be tested.\n```\nprocess = subprocess.Popen(\n ['python', 'tic_tac_toe.py'],\n stdout=subprocess.PIPE,\n text=True\n)\n\noutput, _ = process.communicate('\\n'.join([\"0,0\", \"1,0\", \"0,1\", \"1,1\", \"0,2\"]))\n\nassert \"Player 1 won!\" in output\n```",
debugLog
);

expect(response.value.ok).toBe(true);
const sourceCode = agent.workspace.readFileSync("tic_tac_toe.py");
expect(sourceCode).toBeTruthy();
});

test("three-sum", async () => {
const { agent, debugLog } = createDevAgent("three-sum");
const response = await runDevAgent(
agent,
"Create a three_sum function in a file called sample_code.py. Given an array of integers, return indices of the three numbers such that they add up to a specific target. You may assume that each input would have exactly one solution, and you may not use the same element twice. Example: Given nums = [2, 7, 11, 15], target = 20, Because nums[0] + nums[1] + nums[2] = 2 + 7 + 11 = 20, return [0, 1, 2].",
debugLog
);

expect(response.value.ok).toBe(true);
const sourceCode = agent.workspace.readFileSync("sample_code.py");
expect(sourceCode).toBeTruthy();
});

test("file-organizer", async () => {
const { agent, debugLog } = createDevAgent("file-organizer");
const response = await runDevAgent(
agent,
"Create a file organizer CLI tool in Python that sorts files in a directory based on their file types (e.g., images, documents, audio) and moves them into these corresponding folders: 'images', 'documents', 'audio'. The entry point will be a python file that can be run this way: python organize_files.py --directory_path=YOUR_DIRECTORY_PATH",
debugLog
);

console.log(response);
expect(response.value.ok).toBe(true);
console.log(dev.workspace.readdirSync("./"))
const sourceCode = agent.workspace.readFileSync("organize_files.py");
expect(sourceCode).toBeTruthy();
});
});
12 changes: 11 additions & 1 deletion packages/dev/src/agent-functions/agent_onGoalAchieved.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,29 @@ import { AgentContext } from "../AgentContext";
import {
AgentFunction,
AgentFunctionResult,
AgentOutputType
} from "@evo-ninja/agent-utils";

const FN_NAME = "agent_onGoalAchieved";

const SUCCESS = (): AgentFunctionResult => ({
outputs: [],
outputs: [
{
type: AgentOutputType.Success,
title: "[dev] agent_onGoalAchieved"
}
],
messages: []
});

export const agent_onGoalAchieved: AgentFunction<AgentContext> = {
definition: {
name: FN_NAME,
description: "Informs the user that the goal has been achieved.",
parameters: {
type: "object",
properties: { },
}
},
buildExecutor(context: AgentContext) {
return createScriptExecutor(
Expand Down
12 changes: 11 additions & 1 deletion packages/dev/src/agent-functions/agent_onGoalFailed.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,29 @@ import { AgentContext } from "../AgentContext";
import {
AgentFunction,
AgentFunctionResult,
AgentOutputType
} from "@evo-ninja/agent-utils";

const FN_NAME = "agent_onGoalFailed";

const SUCCESS = (): AgentFunctionResult => ({
outputs: [],
outputs: [
{
type: AgentOutputType.Error,
title: "[dev] agent_onGoalFailed"
}
],
messages: []
});

export const agent_onGoalFailed: AgentFunction<AgentContext> = {
definition: {
name: FN_NAME,
description: "Informs the user that the agent could not achieve the goal.",
parameters: {
type: "object",
properties: { },
}
},
buildExecutor(context: AgentContext) {
return createScriptExecutor(
Expand Down
3 changes: 1 addition & 2 deletions packages/dev/src/prompts.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import { AgentFunctionDefinition } from "@evo-ninja/agent-utils";

export const INITIAL_PROMP = (functions: AgentFunctionDefinition[]) =>
`You are an expert software engineer named "dev". You have access to the following functions to accomplish your goal:\n` +
functions.map((def) => (`${def.name}: ${def.description}`)).join("\n");
`You are an expert software engineer named "dev".`;

export const GOAL_PROMPT = (goal: string) =>
`You have been asked by the user to achieve the following goal: ${goal}`;
Expand Down
Loading

0 comments on commit 6eb711a

Please sign in to comment.