microsoft · pelikhan · Aug 29, 2024 · Aug 29, 2024 · Aug 29, 2024 · Aug 29, 2024
diff --git a/docs/genaisrc/genaiscript.d.ts b/docs/genaisrc/genaiscript.d.ts
diff --git a/docs/src/content/docs/reference/scripts/cache.mdx b/docs/src/content/docs/reference/scripts/cache.mdx
@@ -7,13 +7,21 @@
 ---
 
 import { FileTree } from "@astrojs/starlight/components"
 
-LLM requests are cached by default. This means that if a script generates the same prompt for the same model, the cache may be used.
+LLM requests are **NOT** cached by default. However, you can turn on LLM request caching from `script` metadata or the CLI arguments.
 
--   the `temperature` is less than 0.5
--   the `top_p` is less than 0.5
--   no [functions](./functions.md) are used as they introduce randomness
--   `seed` is not used
+```js "cache: true"
+script({
+    ...,
+    cache: true
+})
+```
+
+or
+
+```sh "--cache"
+npx genaiscript run ... --cache
+```
 
 The cache is stored in the `.genaiscript/cache/chat.jsonl` file. You can delete this file to clear the cache.
 This file is excluded from git by default.
@@ -26,32 +34,15 @@
 
 </FileTree>
 
-## Disabling
-
-You can always disable the cache using the `cache` option in `script`.
-
-```js
-script({
-    ...,
-    cache: false // always off
-})
-```
-
-Or using the `--no-cache` flag in the CLI.
-
-```sh
-npx genaiscript run .... --no-cache
-```
-
 ## Custom cache file
 
 Use the `cacheName` option to specify a custom cache file name.
 The name will be used to create a file in the `.genaiscript/cache` directory.

 ```js
 script({
     ...,
-    cacheName: "summary"
+    cache: "summary"
 })
 ```
 

diff --git a/genaisrc/genaiscript.d.ts b/genaisrc/genaiscript.d.ts
diff --git a/packages/cli/src/run.ts b/packages/cli/src/run.ts
@@ -101,13 +101,15 @@
            exitCode === SUCCESS_ERROR_CODE ||
            UNRECOVERABLE_ERROR_CODES.includes(exitCode)
        )
             break
 
         const delayMs = 2000 * Math.pow(2, r)
-        console.error(
-            `error: run failed with ${exitCode}, retry #${r + 1}/${runRetry} in ${delayMs}ms`
-        )
-        await delay(delayMs)
+        if (runRetry > 1) {
+            console.error(
+                `error: run failed with ${exitCode}, retry #${r + 1}/${runRetry} in ${delayMs}ms`
+            )
+            await delay(delayMs)
+        }
     }
     process.exit(exitCode)
 }
@@ -156,7 +158,7 @@
     const jsSource = options.jsSource
 
     const fail = (msg: string, exitCode: number) => {
-        logVerbose(msg)
+        logError(msg)
         return { exitCode, result }
     }
 
@@ -303,9 +305,6 @@
         return fail("runtime error", RUNTIME_ERROR_CODE)
     }
     if (!isQuiet) logVerbose("") // force new line
-    if (result.status !== "success" && result.status !== "cancelled")
-        logVerbose(result.statusText ?? result.status)
-
     if (outAnnotations && result.annotations?.length) {
         if (isJSONLFilename(outAnnotations))
             await appendJSONL(outAnnotations, result.annotations)
@@ -485,8 +484,10 @@
         }
     }
     // final fail
-    if (result.error && !isCancelError(result.error))
-        return fail(errorMessage(result.error), RUNTIME_ERROR_CODE)
+    if (result.status !== "success" && result.status !== "cancelled") {
+        const msg = errorMessage(result.error) ?? result.statusText
+        return fail(msg, RUNTIME_ERROR_CODE)
+    }
 
     if (failOnErrors && result.annotations?.some((a) => a.severity === "error"))
         return fail("error annotations found", ANNOTATION_ERROR_CODE)

diff --git a/packages/core/src/chattypes.ts b/packages/core/src/chattypes.ts
@@ -86,7 +86,7 @@ export interface ChatCompletionsOptions {
     requestOptions?: Partial<Omit<RequestInit, "signal">>
     maxCachedTemperature?: number
     maxCachedTopP?: number
-    cache?: boolean
+    cache?: boolean | string
     cacheName?: string
     retry?: number
     retryDelay?: number

diff --git a/packages/core/src/constants.ts b/packages/core/src/constants.ts
@@ -2,8 +2,6 @@ export const CHANGE = "change"
 export const TRACE_CHUNK = "traceChunk"
 export const RECONNECT = "reconnect"
 export const OPEN = "open"
-export const MAX_CACHED_TEMPERATURE = 0.5
-export const MAX_CACHED_TOP_P = 0.5
 export const MAX_TOOL_CALLS = 10000
 
 // https://learn.microsoft.com/en-us/azure/ai-services/openai/reference
@@ -211,7 +209,7 @@ export const GITHUB_API_VERSION = "2022-11-28"
 export const GITHUB_TOKEN = "GITHUB_TOKEN"
 
 export const AI_REQUESTS_CACHE = "airequests"
-export const CHAT_CACHE = "chatv2"
+export const CHAT_CACHE = "chat"
 export const GITHUB_PULL_REQUEST_REVIEWS_CACHE = "prr"
 export const GITHUB_PULLREQUEST_REVIEW_COMMENT_LINE_DISTANCE = 5
 

diff --git a/packages/core/src/genaisrc/genaiscript.d.ts b/packages/core/src/genaisrc/genaiscript.d.ts
diff --git a/packages/core/src/openai.ts b/packages/core/src/openai.ts
@@ -2,8 +2,6 @@
 import { LanguageModelConfiguration, host } from "./host"
 import {
     AZURE_OPENAI_API_VERSION,
-    MAX_CACHED_TEMPERATURE,
-    MAX_CACHED_TOP_P,
     MODEL_PROVIDER_OPENAI,
     TOOL_ID,
 } from "./constants"
@@ -50,13 +48,10 @@
     options,
     trace
 ) => {
-    const { temperature, top_p, seed, tools } = req
     const {
         requestOptions,
         partialCb,
-        maxCachedTemperature = MAX_CACHED_TEMPERATURE,
-        maxCachedTopP = MAX_CACHED_TOP_P,
-        cache: useCache,
+        cache: cacheOrName,
         cacheName,
         retry,
         retryDelay,
@@ -69,18 +64,12 @@
     const { model } = parseModelIdentifier(req.model)
     const encoder = await resolveTokenEncoder(model)
 
-    const cache = getChatCompletionCache(cacheName)
-    const caching =
-        useCache === true || // always use cache
-        (useCache !== false && // never use cache
-            seed === undefined && // seed is not cacheable (let the LLM make the run deterministic)
-            !tools?.length && // assume tools are non-deterministic by default
-            (isNaN(temperature) ||
-                isNaN(maxCachedTemperature) ||
-                temperature < maxCachedTemperature) && // high temperature is not cacheable (it's too random)
-            (isNaN(top_p) || isNaN(maxCachedTopP) || top_p < maxCachedTopP))
-    trace.itemValue(`caching`, caching)
-    const cachedKey = caching
+    const cache = getChatCompletionCache(
+        typeof cacheOrName === "string" ? cacheOrName : cacheName
+    )
+    trace.itemValue(`caching`, !!cache)
+    trace.itemValue(`cache`, cache?.name)
+    const cachedKey = !!cacheOrName
         ? <ChatCompletionRequestCacheKey>{
               ...req,
               ...cfgNoToken,
@@ -160,7 +149,11 @@
         try {
             body = await r.text()
         } catch (e) {}
-        const { error } = JSON5TryParse(body, {}) as { error: any }
+        const { error, message } = JSON5TryParse(body, {}) as {
+            error: any
+            message: string
+        }
+        if (message) trace.error(message)
         if (error)
             trace.error(undefined, <SerializedError>{
                 name: error.code,
@@ -169,7 +162,7 @@
             })
         throw new RequestError(
             r.status,
-            r.statusText,
+            message ?? error?.message ?? r.statusText,
             error,
             body,
             normalizeInt(r.headers.get("retry-after"))
@@ -263,11 +256,11 @@
                 responseSoFar: chatResp,
                 tokensSoFar: numTokens,
                 responseChunk: progress,
-                inner
+                inner,
             })
         }
         pref = chunk
    }
 }

 async function listModels(

diff --git a/packages/core/src/server/messages.ts b/packages/core/src/server/messages.ts
@@ -67,10 +67,10 @@
    maxTokens: string
    maxToolCalls: string
    maxDataRepairs: string
     model: string
     embeddingsModel: string
     csvSeparator: string
-    cache: boolean
+    cache: boolean | string
     cacheName: string
     applyEdits: boolean
     failOnErrors: boolean

diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts
@@ -176,13 +176,13 @@ interface ModelOptions extends ModelConnectionOptions {
     seed?: number
 
     /**
-     * If true, the prompt will be cached. If false, the LLM chat is never cached.
-     * Leave empty to use the default behavior.
+     * By default, LLM queries are not cached. If true, the LLM request will be cached. Use a string to override the default cache name
      */
-    cache?: boolean
+    cache?: boolean | string
 
     /**
      * Custom cache name. If not set, the default cache is used.
+     * @deprecated Use `cache` instead with a string
      */
     cacheName?: string
 }

diff --git a/packages/sample/genaisrc/cache.genai.mts b/packages/sample/genaisrc/cache.genai.mts
@@ -1,7 +1,6 @@
 script({
     model: "openai:gpt-3.5-turbo",
-    cache: true,
-    cacheName: "gpt-cache",
+    cache: "gpt-cache",
     tests: [{}, {}], // run twice to trigger caching
 })
 

diff --git a/packages/sample/genaisrc/genaiscript.d.ts b/packages/sample/genaisrc/genaiscript.d.ts
diff --git a/packages/sample/genaisrc/node/genaiscript.d.ts b/packages/sample/genaisrc/node/genaiscript.d.ts
diff --git a/packages/sample/genaisrc/python/genaiscript.d.ts b/packages/sample/genaisrc/python/genaiscript.d.ts
diff --git a/packages/sample/genaisrc/style/genaiscript.d.ts b/packages/sample/genaisrc/style/genaiscript.d.ts
diff --git a/packages/sample/genaisrc/summary-of-summary-gpt35.genai.js b/packages/sample/genaisrc/summary-of-summary-gpt35.genai.js
@@ -15,7 +15,7 @@ for (const file of env.files) {
             _.def("FILE", file)
             _.$`Summarize FILE. Be concise.`
         },
-        { model: "gpt-3.5-turbo", cacheName: "summary_gpt35" }
+        { model: "gpt-3.5-turbo", cache: "summary_gpt35" }
     )
     // save the summary in the main prompt
     def("FILE", { filename: file.filename, content: text })

diff --git a/packages/sample/genaisrc/summary-of-summary-phi3.genai.js b/packages/sample/genaisrc/summary-of-summary-phi3.genai.js
@@ -5,7 +5,7 @@ script({
     tests: {
         files: ["src/rag/*.md"],
         keywords: ["markdown", "lorem", "microsoft"],
-    }
+    },
 })
 
 // summarize each files individually
@@ -15,7 +15,7 @@ for (const file of env.files) {
             _.def("FILE", file)
             _.$`Extract keywords for the contents of FILE.`
         },
-        { model: "ollama:phi3", cacheName: "summary_phi3" }
+        { model: "ollama:phi3", cache: "summary_phi3" }
     )
     def("FILE", { ...file, content: text })
 }