diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index e78f60f5..07c57291 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -23,7 +23,8 @@ jobs:
- name: Download latest llama.cpp release
env:
CI: true
- run: node ./dist/cli/cli.js source download --release latest --skipBuild --noBundle --noUsageExample --updateBinariesReleaseMetadataAndSaveGitBundle
+ # Switched to `b3808` instead of `latest` due to a build failure on the latest version. `b3808` is the previous release.
+ run: node ./dist/cli/cli.js source download --release b3808 --skipBuild --noBundle --noUsageExample --updateBinariesReleaseMetadataAndSaveGitBundle
- name: Upload build artifact
uses: actions/upload-artifact@v4
with:
@@ -640,7 +641,7 @@ jobs:
if: |
always() &&
github.event_name == 'push' &&
- (github.ref == 'refs/heads/master' || github.ref == 'refs/heads/beta') &&
+ github.ref == 'refs/heads/master' &&
needs.build.result == 'success' &&
needs.resolve-next-release.result == 'success' &&
needs.resolve-next-release.outputs.next-version != '' &&
@@ -654,7 +655,7 @@ jobs:
concurrency: update-documentation-website-${{ github.ref }}
environment:
name: Documentation website
-# url: "https://node-llama-cpp.withcat.ai"
+ url: "https://node-llama-cpp.withcat.ai"
needs:
- build
- resolve-next-release
@@ -704,32 +705,25 @@ jobs:
git apply --ignore-whitespace ./scripts/patches/vitepress+1.3.4.patch
npm run docs:build
- - name: Upload docs
- uses: actions/upload-artifact@v4
+ - name: Upload docs to GitHub Pages
+ uses: actions/upload-pages-artifact@v3
with:
- include-hidden-files: true
- retention-days: 2
- name: "docs-site"
+ name: pages-docs
path: docs-site
-# - name: Upload docs to GitHub Pages
-# uses: actions/upload-pages-artifact@v3
-# with:
-# name: pages-docs
-# path: docs-site
-# - name: Deploy docs to GitHub Pages
-# uses: actions/deploy-pages@v4
-# with:
-# artifact_name: pages-docs
-# - name: Update feed
-# run: |
-# curl -X POST "https://pubsubhubbub.appspot.com/" -H "Content-Type: application/x-www-form-urlencoded" --data-urlencode "hub.mode=publish" --data-urlencode "hub.url=https://node-llama-cpp.withcat.ai/blog/feed.atom"
+ - name: Deploy docs to GitHub Pages
+ uses: actions/deploy-pages@v4
+ with:
+ artifact_name: pages-docs
+ - name: Update feed
+ run: |
+ curl -X POST "https://pubsubhubbub.appspot.com/" -H "Content-Type: application/x-www-form-urlencoded" --data-urlencode "hub.mode=publish" --data-urlencode "hub.url=https://node-llama-cpp.withcat.ai/blog/feed.atom"
update-documentation-website-no-release:
name: Update documentation website - no version release
if: |
always() &&
github.event_name == 'push' &&
- (github.ref == 'refs/heads/master' || github.ref == 'refs/heads/beta') &&
+ github.ref == 'refs/heads/master' &&
needs.build.result == 'success' &&
needs.resolve-next-release.result == 'success' &&
needs.resolve-next-release.outputs.next-version == 'false'
@@ -737,7 +731,7 @@ jobs:
concurrency: update-documentation-website-${{ github.ref }}
environment:
name: Documentation website
-# url: "https://node-llama-cpp.withcat.ai"
+ url: "https://node-llama-cpp.withcat.ai"
needs:
- build
- resolve-next-release
@@ -760,12 +754,12 @@ jobs:
- name: Move artifacts
run: |
mv artifacts/build dist/
-
+
cp -r artifacts/llama.cpp/llama.cpp llama/llama.cpp
-
+
rm -f ./llama/binariesGithubRelease.json
mv artifacts/llama.cpp/binariesGithubRelease.json ./llama/binariesGithubRelease.json
-
+
rm -f ./llama/llama.cpp.info.json
mv artifacts/llama.cpp/llama.cpp.info.json ./llama/llama.cpp.info.json
- name: Resolve docs version
@@ -783,25 +777,18 @@ jobs:
run: |
export DOCS_PACKAGE_VERSION="$(cat ./docsVersion.txt)"
echo "Package version: $DOCS_PACKAGE_VERSION"
-
+
git apply --ignore-whitespace ./scripts/patches/vitepress+1.3.4.patch
npm run docs:build
- - name: Upload docs
- uses: actions/upload-artifact@v4
+ - name: Upload docs to GitHub Pages
+ uses: actions/upload-pages-artifact@v3
with:
- include-hidden-files: true
- retention-days: 2
- name: "docs-site"
+ name: pages-docs
path: docs-site
- # - name: Upload docs to GitHub Pages
- # uses: actions/upload-pages-artifact@v3
- # with:
- # name: pages-docs
- # path: docs-site
- # - name: Deploy docs to GitHub Pages
- # uses: actions/deploy-pages@v4
- # with:
- # artifact_name: pages-docs
- # - name: Update feed
- # run: |
- # curl -X POST "https://pubsubhubbub.appspot.com/" -H "Content-Type: application/x-www-form-urlencoded" --data-urlencode "hub.mode=publish" --data-urlencode "hub.url=https://node-llama-cpp.withcat.ai/blog/feed.atom"
+ - name: Deploy docs to GitHub Pages
+ uses: actions/deploy-pages@v4
+ with:
+ artifact_name: pages-docs
+ - name: Update feed
+ run: |
+ curl -X POST "https://pubsubhubbub.appspot.com/" -H "Content-Type: application/x-www-form-urlencoded" --data-urlencode "hub.mode=publish" --data-urlencode "hub.url=https://node-llama-cpp.withcat.ai/blog/feed.atom"
diff --git a/.vitepress/theme/index.ts b/.vitepress/theme/index.ts
index 894b6a06..fc6d4789 100644
--- a/.vitepress/theme/index.ts
+++ b/.vitepress/theme/index.ts
@@ -17,19 +17,19 @@ import type {EnhanceAppContext} from "vitepress";
export default {
extends: Theme,
Layout: () => {
- const text = "v3.0.0 is here!";
+ const text = "v3.0 is here!";
const link = "/blog/v3";
const hideDate = new Date("2025-01-01T00:00:00Z");
return h(LayoutContainer, null, h(Theme.Layout, null, {
- // "home-hero-info-before": () => h(LatestVersionHomeBadge, {
- // type: "desktop",
- // text, link, hideDate
- // }),
- // "home-hero-actions-after": () => h(LatestVersionHomeBadge, {
- // type: "mobile",
- // text, link, hideDate
- // }),
+ "home-hero-info-before": () => h(LatestVersionHomeBadge, {
+ type: "desktop",
+ text, link, hideDate
+ }),
+ "home-hero-actions-after": () => h(LatestVersionHomeBadge, {
+ type: "mobile",
+ text, link, hideDate
+ }),
"doc-after": () => h(CommentsSection)
}));
},
diff --git a/README.md b/README.md
index 2e2417f0..3c996f74 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@
-✨ New! [`v3.0.0` is here!](https://github.com/withcatai/node-llama-cpp/pull/105) ✨ (included: function calling, automatic chat wrapper detection, embedding support, and more)
+✨ [`v3.0` is here!](https://node-llama-cpp.withcat.ai/blog/v3) ✨
## Features
* Run LLMs locally on your machine
diff --git a/docs/blog/v3.md b/docs/blog/v3.md
new file mode 100644
index 00000000..6b87ccd4
--- /dev/null
+++ b/docs/blog/v3.md
@@ -0,0 +1,125 @@
+---
+title: node-llama-cpp v3.0
+date: 2024-09-23T22:00:00Z
+author:
+ name: Gilad S.
+ github: giladgd
+category: Release
+description: Learn more about the new features in node-llama-cpp v3.0!
+image:
+ url: https://github.com/user-attachments/assets/c7ed2eab-fb50-426d-9019-aed40147f30e
+ alt: Celebrate
+ width: 3072
+ height: 1536
+---
+[`node-llama-cpp`](https://node-llama-cpp.withcat.ai) 3.0 is finally here.
+
+With [`node-llama-cpp`](https://node-llama-cpp.withcat.ai), you can run large language models locally on your machine using the power of [`llama.cpp`](https://github.com/ggerganov/llama.cpp) with a simple and easy-to-use API.
+
+It includes everything you need, from downloading models, to running them in the most optimized way for your hardware, and integrating them in your projects.
+
+---
+
+## Why `node-llama-cpp`?
+You might be wondering, why choose `node-llama-cpp` over using an OpenAI API of a service running on your machine?
+
+The answer is simple: simplicity, performance, and flexibility.
+
+Let's break it down:
+
+### Simplicity
+To use `node-llama-cpp`, you install it like any other npm package, and you're good to go.
+
+To run your project, all you have to do is `npm install` and `npm start`. That's it.
+
+No installing additional software on your machine, no setting up API keys or environment variables, no setup process at all.
+Everything is self-contained in your project, giving you complete control over it.
+
+With `node-llama-cpp`, you can run large language models on your machine using Node.js and TypeScript, _without_ any Python at all.
+Say goodbye to setup headaches, "it works on my machine" issues, and all other Python-related problems.
+
+While `llama.cpp` is an amazing project, it's also highly technical and can be challenging for beginners.
+`node-llama-cpp` bridge that gap, making `llama.cpp` accessible to everyone, regardless of their experience level.
+
+### Performance
+[`node-llama-cpp`](https://node-llama-cpp.withcat.ai) is built on top of [`llama.cpp`](https://github.com/ggerganov/llama.cpp), a highly optimized C++ library for running large language models.
+
+`llama.cpp` supports many compute backends, including Metal, CUDA, and Vulkan. It also uses [Accelerate](https://developer.apple.com/accelerate/) on Mac.
+
+`node-llama-cpp` automatically adapts to your hardware and adjusts the default settings to give you the best performance,
+so you don't _have_ to configure anything to use it.
+
+By using `node-llama-cpp` you are essentially running models _inside_ your project.
+With no overhead of network calls or data serializations,
+you can more effectively take advantage of the stateful nature of inference operations.
+
+For example, you can prompt a model on top of an existing conversation inference state,
+without re-evaluating the entire history just to process the new prompt.
+
+This reduces the time it takes to start generating a response, and makes more efficient use of your resources.
+
+If you were using an API, you would have to re-evaluate the entire history every time you prompt the model,
+or have the API store the state for you, which can use huge amounts of disk space.
+
+### Flexibility
+Since `node-llama-cpp` runs inside your project, you can also deploy it together with your project.
+
+You can run models in your [Electron](../guide/electron.md) app without requiring any additional setup on the user's machine.
+
+You can build libraries that use large language models and distribute them as npm packages,
+
+or deploy self-contained Docker images and run them on any hardware you want.
+
+You can use [any model you want](../guide/choosing-a-model.md), or even create your own and use it with `node-llama-cpp`.
+
+Download models [as part of `npm install`](../guide/downloading-models.md) or [on-demand from your code](../guide/downloading-models.md#programmatic).
+
+[Tweak inference settings](../guide/chat-session.md#repeat-penalty) to get better results for your particular use case.
+
+`node-llama-cpp` is regularly updated with the latest `llama.cpp` release,
+but you can also [download and build the latest release](../guide/building-from-source.md#download-new-release) at any time with a single command.
+
+The possibilities are endless.
+You have full control over the models you use, how you use them, and where you use them.
+You can tailor `node-llama-cpp` to your needs in ways that aren't possible with an OpenAI API (at least not efficiently or easily).
+
+## Powerful Features
+`node-llama-cpp` includes a complete suite of everything you need to use large language models in your projects,
+with convenient wrappers for popular tasks, such as:
+* [Enforcing a JSON schema](../guide/chat-session.md#response-json-schema) on the output the model generates
+* Providing the model with [functions it can call on demand](../guide/chat-session.md#function-calling) to retrieve information or perform actions, even with some models that don't officially support it
+* [Generating completion](../guide/text-completion.md) for a given text
+* [Embedding text](../guide/embedding.md) for similarity searches or other tasks
+* Much more
+
+## Why Node.js?
+JavaScript is the most popular programming language in the world, and Node.js is the most popular runtime for JavaScript server-side applications.
+Developers choose Node.js for its versatility, reliability, ease of use, forward compatibility, and the vast ecosystem of npm packages.
+
+While Python is currently the go-to language for data science and machine learning,
+the needs of data scientists differ from those of developers building services and applications.
+
+`node-llama-cpp` bridges this gap, making it easier to integrate large language models into Node.js and Electron projects,
+while focusing on the needs of developers building services and applications.
+
+## Try It Out
+`node-llama-cpp` comes with comprehensive documentation, covering everything from installation to advanced usage.
+It's beginner-friendly, with explanations for every step of the way for those who are new to the world of large language models,
+while still being flexible enough to allow advanced usage for those who are more experienced and knowledgeable.
+
+Experience the ease of running models on your machine with this single command:
+```shell
+npx -y node-llama-cpp chat
+```
+
+Check out the [getting started guide](../guide/index.md) to learn how to use `node-llama-cpp`.
+
+## Thank You
+`node-llama-cpp` is only possible thanks to the amazing work done on [`llama.cpp`](https://github.com/ggerganov/llama.cpp) by [Georgi Gerganov](https://github.com/ggerganov), [Slaren](https://github.com/slaren) and all the contributors from the community.
+
+## What's next?
+Version 3.0 is a major milestone, but there's plenty more planned for the future.
+
+Check out the [roadmap](https://github.com/orgs/withcatai/projects/1) to see what's coming next,
+
+and [give `node-llama-cpp` a star on GitHub](https://github.com/withcatai/node-llama-cpp) to support the project.