Add memory agent and caching functionality 🧠✨ (#761)

* Add memory agent and caching functionality 🧠✨ * Add memory cache support and enhance agent memory management 🧠✨ * Remove incomplete bullet point from system.mdx 📄✂️ * Refactor memory handling: rename `memory` to `disableMemory` 🚀 * Improve GitHub detection regex for system inclusion 🔍 * Refactor workflow parameters and update branch-specific failed run search 🚀🔍 * Fix typo in 'retrieves' across documentation and source files 📚🛠️ * Add agent memory section and disableMemory option 🧠🔧 * Update agents.mdx to clarify `agent_memory` usage and enable instructions 📚🧠 * Remove memory usage and redundant info in system agent configs ✨ * Update workflow scripts for failure handling and memory caching 🔄 * Update guidelines: duplicated exclusion rules 📝
microsoft · Oct 8, 2024 · b8e246a · b8e246a
1 parent 15882aa
commit b8e246a
Show file tree

Hide file tree

Showing 34 changed files with 341 additions and 51 deletions.
diff --git a/.github/workflows/genai-investigator.yml b/.github/workflows/genai-investigator.yml
@@ -38,15 +38,15 @@ jobs:
                   OPENAI_API_BASE: ${{ secrets.OPENAI_API_BASE }}
                   GENAISCRIPT_VAR_BRANCH: ${{ github.event.workflow_run.head_branch }}
             - name: genaiscript github-agent
-              run: node packages/cli/built/genaiscript.cjs run github-agent -pr ${{ github.event.workflow_run.pull_requests[0].number }} -prc --vars "workflow=${{ github.event.workflow_run.workflow_id }}" --vars "run=${{ github.event.workflow_run.id }}" --out-trace $GITHUB_STEP_SUMMARY
+              run: node packages/cli/built/genaiscript.cjs run github-agent -pr ${{ github.event.workflow_run.pull_requests[0].number }} -prc --vars "workflow=${{ github.event.workflow_run.workflow_id }}" --vars "failure_run_id=${{ github.event.workflow_run.id }}" --out-trace $GITHUB_STEP_SUMMARY
               env:
                   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
                   OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
                   OPENAI_API_TYPE: ${{ secrets.OPENAI_API_TYPE }}
                   OPENAI_API_BASE: ${{ secrets.OPENAI_API_BASE }}
                   GENAISCRIPT_VAR_BRANCH: ${{ github.event.workflow_run.head_branch }}
             - name: genaiscript github-one
-              run: node packages/cli/built/genaiscript.cjs run github-one -pr ${{ github.event.workflow_run.pull_requests[0].number }} -prc --vars "workflow=${{ github.event.workflow_run.workflow_id }}" --vars "run=${{ github.event.workflow_run.id }}" --out-trace $GITHUB_STEP_SUMMARY
+              run: node packages/cli/built/genaiscript.cjs run github-one -pr ${{ github.event.workflow_run.pull_requests[0].number }} -prc --vars "workflow=${{ github.event.workflow_run.workflow_id }}" --vars "failure_run_id=${{ github.event.workflow_run.id }}" --out-trace $GITHUB_STEP_SUMMARY
               env:
                   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
                   OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}

diff --git a/docs/genaisrc/genaiscript.d.ts b/docs/genaisrc/genaiscript.d.ts
diff --git a/docs/src/components/BuiltinAgents.mdx b/docs/src/components/BuiltinAgents.mdx
@@ -11,4 +11,5 @@ import { LinkCard } from '@astrojs/starlight/components';
 <LinkCard title="agent git" description="query a repository using Git to accomplish tasks. Provide all the context information available to execute git queries." href="/genaiscript/reference/scripts/system#systemagent_git" />
 <LinkCard title="agent github" description="query GitHub to accomplish tasks" href="/genaiscript/reference/scripts/system#systemagent_github" />
 <LinkCard title="agent interpreter" description="run code interpreters for Python, Math. Use this agent to ground computation questions." href="/genaiscript/reference/scripts/system#systemagent_interpreter" />
+<LinkCard title="agent memory" description="queries the memories created by other agent conversations." href="/genaiscript/reference/scripts/system#systemagent_memory" />
 <LinkCard title="agent user_input" description="ask user for input to confirm, select or answer the question in the query. The message should be very clear and provide all the context." href="/genaiscript/reference/scripts/system#systemagent_user_input" />
diff --git a/docs/src/content/docs/reference/scripts/agents.mdx b/docs/src/content/docs/reference/scripts/agents.mdx
@@ -148,3 +148,15 @@ We use the `system` parameter to configure the tools exposed to the LLM. In this
 ```
 
 This full source of this agent is defined in the [system.agent_github](/genaiscript/reference/scripts/system/#systemagent_github) system prompt.
+
+## Agent Memory
+
+The `agent_memory` is a special agent that queries the memories created by other agent conversations. It is used to store and retrieve information from the LLM's memory.
+
+All agent contribute to the conversation memory, and use the `agent_memory`, tool unless it is explicitely disabled using `disableMemory`.
+
+```js "disableMemory: true"
+defAgent(..., { disableMemory: true })
+```
+
+To enable agent memory in the top level script, add the `agent_memory` tool.
diff --git a/docs/src/content/docs/reference/scripts/system.mdx b/docs/src/content/docs/reference/scripts/system.mdx
@@ -235,7 +235,6 @@ defAgent(
     "query GitHub to accomplish tasks",
     `Your are a helpfull LLM agent that can query GitHub to accomplish tasks. Answer the question in QUERY.
     - Prefer diffing job logs rather downloading entire logs which can be very large.
-    - Pull Requests ar a specialized type of issues.
     `,
     {
         model,
@@ -290,6 +289,57 @@ defAgent(
 `````
 
 
+### `system.agent_memory`
+
+agent that retrieves memories
+
+
+
+
+
+`````js wrap title="system.agent_memory"
+system({
+    title: "agent that retrieves memories",
+})
+
+const cache = await host.cache("agent_memory")
+defAgent(
+    "memory",
+    "queries the memories created by other agent conversations.",
+    async (ctx) => {
+        const memories = await cache.values()
+        ctx.$`Your are a helpfull LLM agent that acts as a knowledge base for memories created by other agents.
+
+    Answer the question in QUERY with the memories in MEMORY.
+
+    - Use the information in MEMORY exclusively to answer the question in QUERY.
+    - If the information in MEMORY is not enough to answer the question in QUERY, respond <NO_MEMORY>.
+    - The memory 
+    `
+        memories.reverse().forEach(
+            ({ agent, query, answer }) =>
+                ctx.def(
+                    "MEMORY",
+                    `${agent}> ${query}?
+                ${answer}
+                `
+                ),
+            {
+                flex: 1,
+            }
+        )
+    },
+    {
+        model: "openai:gpt-4o",
+        flexTokens: 30000,
+        system: ["system"],
+        disableMemory: true,
+    }
+)
+
+`````
+
+
 ### `system.agent_user_input`
 
 Agent that can asks questions to the user.
@@ -1063,7 +1113,7 @@ defTool(
         context.log(`github action list jobs for run ${run_id}`)
         const res = await github.listWorkflowJobs(run_id)
         return CSV.stringify(
-            res.map(({ id, name, status }) => ({ id, name, status })),
+            res.map(({ id, name, conclusion }) => ({ id, name, conclusion })),
             { header: true }
         )
     }

diff --git a/eval/extrism/genaisrc/genaiscript.d.ts b/eval/extrism/genaisrc/genaiscript.d.ts
diff --git a/genaisrc/genaiscript.d.ts b/genaisrc/genaiscript.d.ts
diff --git a/packages/auto/genaiscript.d.ts b/packages/auto/genaiscript.d.ts
diff --git a/packages/cli/src/run.ts b/packages/cli/src/run.ts
@@ -33,6 +33,7 @@ import {
     DOCS_CONFIGURATION_URL,
     TRACE_DETAILS,
     CLI_ENV_VAR_RX,
+    AGENT_MEMORY_CACHE_NAME,
 } from "../../core/src/constants"
 import { isCancelError, errorMessage } from "../../core/src/error"
 import { Fragment, GenerationResult } from "../../core/src/generation"
@@ -72,6 +73,7 @@ import { writeFileSync } from "node:fs"
 import { prettifyMarkdown } from "../../core/src/markdown"
 import { delay } from "es-toolkit"
 import { GenerationStats } from "../../core/src/usage"
+import { MemoryCache } from "../../core/src/cache"
 
 function parseVars(
     vars: string[],
@@ -334,6 +336,7 @@ export async function runScript(
         return fail("runtime error", RUNTIME_ERROR_CODE)
     }
     if (!isQuiet) logVerbose("") // force new line
+
     if (outAnnotations && result.annotations?.length) {
         if (isJSONLFilename(outAnnotations))
             await appendJSONL(outAnnotations, result.annotations)

diff --git a/packages/core/src/constants.ts b/packages/core/src/constants.ts
@@ -255,3 +255,5 @@ export const CLI_ENV_VAR_RX = /^genaiscript_var_/i
 
 export const GIT_DIFF_MAX_TOKENS = 8000
 export const MAX_TOOL_CONTENT_TOKENS = 4000
+
+export const AGENT_MEMORY_CACHE_NAME = "agent_memory"
diff --git a/packages/core/src/genaisrc/genaiscript.d.ts b/packages/core/src/genaisrc/genaiscript.d.ts
diff --git a/packages/core/src/genaisrc/system.agent_github.genai.mjs b/packages/core/src/genaisrc/system.agent_github.genai.mjs
@@ -9,7 +9,6 @@ defAgent(
     "query GitHub to accomplish tasks",
     `Your are a helpfull LLM agent that can query GitHub to accomplish tasks. Answer the question in QUERY.
     - Prefer diffing job logs rather downloading entire logs which can be very large.
-    - Pull Requests ar a specialized type of issues.
     `,
     {
         model,

diff --git a/packages/core/src/genaisrc/system.agent_memory.genai.mjs b/packages/core/src/genaisrc/system.agent_memory.genai.mjs
@@ -0,0 +1,38 @@
+system({
+    title: "agent that retrieves memories",
+})
+
+const cache = await host.cache("agent_memory")
+defAgent(
+    "memory",
+    "queries the memories created by other agent conversations.",
+    async (ctx) => {
+        const memories = await cache.values()
+        ctx.$`Your are a helpfull LLM agent that acts as a knowledge base for memories created by other agents.
+
+    Answer the question in QUERY with the memories in MEMORY.
+
+    - Use the information in MEMORY exclusively to answer the question in QUERY.
+    - If the information in MEMORY is not enough to answer the question in QUERY, respond <NO_MEMORY>.
+    - The memory 
+    `
+        memories.reverse().forEach(
+            ({ agent, query, answer }) =>
+                ctx.def(
+                    "MEMORY",
+                    `${agent}> ${query}?
+                ${answer}
+                `
+                ),
+            {
+                flex: 1,
+            }
+        )
+    },
+    {
+        model: "openai:gpt-4o",
+        flexTokens: 30000,
+        system: ["system"],
+        disableMemory: true,
+    }
+)
diff --git a/packages/core/src/genaisrc/system.github_actions.genai.mjs b/packages/core/src/genaisrc/system.github_actions.genai.mjs
@@ -89,7 +89,7 @@ defTool(
         context.log(`github action list jobs for run ${run_id}`)
         const res = await github.listWorkflowJobs(run_id)
         return CSV.stringify(
-            res.map(({ id, name, status }) => ({ id, name, status })),
+            res.map(({ id, name, conclusion }) => ({ id, name, conclusion })),
             { header: true }
         )
     }

diff --git a/packages/core/src/github.ts b/packages/core/src/github.ts
@@ -659,23 +659,30 @@ export class GitHubClient implements GitHub {
 
     async listWorkflowJobs(
         run_id: number,
-        options?: GitHubPaginationOptions
+        options?: { filter?: "all" | "latest" } & GitHubPaginationOptions
     ): Promise<GitHubWorkflowJob[]> {
         // Get the jobs for the specified workflow run
         const { client, owner, repo } = await this.client()
-        const { count = GITHUB_REST_PAGE_DEFAULT, ...rest } = options ?? {}
+        const {
+            filter,
+            count = GITHUB_REST_PAGE_DEFAULT,
+            ...rest
+        } = options ?? {}
         const ite = client.paginate.iterator(
             client.rest.actions.listJobsForWorkflowRun,
             {
                 owner,
                 repo,
                 run_id,
+                filter,
             }
         )
         const jobs = await paginatorToArray(ite, count, (i) => i.data)
 
         const res: GitHubWorkflowJob[] = []
         for (const job of jobs) {
+            if (job.conclusion === "skipped" || job.conclusion === "cancelled")
+                continue
             const { url: logs_url } =
                 await client.rest.actions.downloadJobLogsForWorkflowRun({
                     owner,
Original file line number	Diff line number	Diff line change
Expand Up		@@ -255,3 +255,5 @@ export const CLI_ENV_VAR_RX = /^genaiscript_var_/i

		export const GIT_DIFF_MAX_TOKENS = 8000
		export const MAX_TOOL_CONTENT_TOKENS = 4000

		export const AGENT_MEMORY_CACHE_NAME = "agent_memory"