diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e78f60f5..07c57291 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -23,7 +23,8 @@ jobs: - name: Download latest llama.cpp release env: CI: true - run: node ./dist/cli/cli.js source download --release latest --skipBuild --noBundle --noUsageExample --updateBinariesReleaseMetadataAndSaveGitBundle + # Switched to `b3808` instead of `latest` due to a build failure on the latest version. `b3808` is the previous release. + run: node ./dist/cli/cli.js source download --release b3808 --skipBuild --noBundle --noUsageExample --updateBinariesReleaseMetadataAndSaveGitBundle - name: Upload build artifact uses: actions/upload-artifact@v4 with: @@ -640,7 +641,7 @@ jobs: if: | always() && github.event_name == 'push' && - (github.ref == 'refs/heads/master' || github.ref == 'refs/heads/beta') && + github.ref == 'refs/heads/master' && needs.build.result == 'success' && needs.resolve-next-release.result == 'success' && needs.resolve-next-release.outputs.next-version != '' && @@ -654,7 +655,7 @@ jobs: concurrency: update-documentation-website-${{ github.ref }} environment: name: Documentation website -# url: "https://node-llama-cpp.withcat.ai" + url: "https://node-llama-cpp.withcat.ai" needs: - build - resolve-next-release @@ -704,32 +705,25 @@ jobs: git apply --ignore-whitespace ./scripts/patches/vitepress+1.3.4.patch npm run docs:build - - name: Upload docs - uses: actions/upload-artifact@v4 + - name: Upload docs to GitHub Pages + uses: actions/upload-pages-artifact@v3 with: - include-hidden-files: true - retention-days: 2 - name: "docs-site" + name: pages-docs path: docs-site -# - name: Upload docs to GitHub Pages -# uses: actions/upload-pages-artifact@v3 -# with: -# name: pages-docs -# path: docs-site -# - name: Deploy docs to GitHub Pages -# uses: actions/deploy-pages@v4 -# with: -# artifact_name: pages-docs -# - name: Update feed -# run: | -# curl -X POST "https://pubsubhubbub.appspot.com/" -H "Content-Type: application/x-www-form-urlencoded" --data-urlencode "hub.mode=publish" --data-urlencode "hub.url=https://node-llama-cpp.withcat.ai/blog/feed.atom" + - name: Deploy docs to GitHub Pages + uses: actions/deploy-pages@v4 + with: + artifact_name: pages-docs + - name: Update feed + run: | + curl -X POST "https://pubsubhubbub.appspot.com/" -H "Content-Type: application/x-www-form-urlencoded" --data-urlencode "hub.mode=publish" --data-urlencode "hub.url=https://node-llama-cpp.withcat.ai/blog/feed.atom" update-documentation-website-no-release: name: Update documentation website - no version release if: | always() && github.event_name == 'push' && - (github.ref == 'refs/heads/master' || github.ref == 'refs/heads/beta') && + github.ref == 'refs/heads/master' && needs.build.result == 'success' && needs.resolve-next-release.result == 'success' && needs.resolve-next-release.outputs.next-version == 'false' @@ -737,7 +731,7 @@ jobs: concurrency: update-documentation-website-${{ github.ref }} environment: name: Documentation website -# url: "https://node-llama-cpp.withcat.ai" + url: "https://node-llama-cpp.withcat.ai" needs: - build - resolve-next-release @@ -760,12 +754,12 @@ jobs: - name: Move artifacts run: | mv artifacts/build dist/ - + cp -r artifacts/llama.cpp/llama.cpp llama/llama.cpp - + rm -f ./llama/binariesGithubRelease.json mv artifacts/llama.cpp/binariesGithubRelease.json ./llama/binariesGithubRelease.json - + rm -f ./llama/llama.cpp.info.json mv artifacts/llama.cpp/llama.cpp.info.json ./llama/llama.cpp.info.json - name: Resolve docs version @@ -783,25 +777,18 @@ jobs: run: | export DOCS_PACKAGE_VERSION="$(cat ./docsVersion.txt)" echo "Package version: $DOCS_PACKAGE_VERSION" - + git apply --ignore-whitespace ./scripts/patches/vitepress+1.3.4.patch npm run docs:build - - name: Upload docs - uses: actions/upload-artifact@v4 + - name: Upload docs to GitHub Pages + uses: actions/upload-pages-artifact@v3 with: - include-hidden-files: true - retention-days: 2 - name: "docs-site" + name: pages-docs path: docs-site - # - name: Upload docs to GitHub Pages - # uses: actions/upload-pages-artifact@v3 - # with: - # name: pages-docs - # path: docs-site - # - name: Deploy docs to GitHub Pages - # uses: actions/deploy-pages@v4 - # with: - # artifact_name: pages-docs - # - name: Update feed - # run: | - # curl -X POST "https://pubsubhubbub.appspot.com/" -H "Content-Type: application/x-www-form-urlencoded" --data-urlencode "hub.mode=publish" --data-urlencode "hub.url=https://node-llama-cpp.withcat.ai/blog/feed.atom" + - name: Deploy docs to GitHub Pages + uses: actions/deploy-pages@v4 + with: + artifact_name: pages-docs + - name: Update feed + run: | + curl -X POST "https://pubsubhubbub.appspot.com/" -H "Content-Type: application/x-www-form-urlencoded" --data-urlencode "hub.mode=publish" --data-urlencode "hub.url=https://node-llama-cpp.withcat.ai/blog/feed.atom" diff --git a/.vitepress/theme/index.ts b/.vitepress/theme/index.ts index 894b6a06..fc6d4789 100644 --- a/.vitepress/theme/index.ts +++ b/.vitepress/theme/index.ts @@ -17,19 +17,19 @@ import type {EnhanceAppContext} from "vitepress"; export default { extends: Theme, Layout: () => { - const text = "v3.0.0 is here!"; + const text = "v3.0 is here!"; const link = "/blog/v3"; const hideDate = new Date("2025-01-01T00:00:00Z"); return h(LayoutContainer, null, h(Theme.Layout, null, { - // "home-hero-info-before": () => h(LatestVersionHomeBadge, { - // type: "desktop", - // text, link, hideDate - // }), - // "home-hero-actions-after": () => h(LatestVersionHomeBadge, { - // type: "mobile", - // text, link, hideDate - // }), + "home-hero-info-before": () => h(LatestVersionHomeBadge, { + type: "desktop", + text, link, hideDate + }), + "home-hero-actions-after": () => h(LatestVersionHomeBadge, { + type: "mobile", + text, link, hideDate + }), "doc-after": () => h(CommentsSection) })); }, diff --git a/README.md b/README.md index 2e2417f0..3c996f74 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ -✨ New! [`v3.0.0` is here!](https://github.com/withcatai/node-llama-cpp/pull/105) ✨ (included: function calling, automatic chat wrapper detection, embedding support, and more) +✨ [`v3.0` is here!](https://node-llama-cpp.withcat.ai/blog/v3) ✨ ## Features * Run LLMs locally on your machine diff --git a/docs/blog/v3.md b/docs/blog/v3.md new file mode 100644 index 00000000..6b87ccd4 --- /dev/null +++ b/docs/blog/v3.md @@ -0,0 +1,125 @@ +--- +title: node-llama-cpp v3.0 +date: 2024-09-23T22:00:00Z +author: + name: Gilad S. + github: giladgd +category: Release +description: Learn more about the new features in node-llama-cpp v3.0! +image: + url: https://github.com/user-attachments/assets/c7ed2eab-fb50-426d-9019-aed40147f30e + alt: Celebrate + width: 3072 + height: 1536 +--- +[`node-llama-cpp`](https://node-llama-cpp.withcat.ai) 3.0 is finally here. + +With [`node-llama-cpp`](https://node-llama-cpp.withcat.ai), you can run large language models locally on your machine using the power of [`llama.cpp`](https://github.com/ggerganov/llama.cpp) with a simple and easy-to-use API. + +It includes everything you need, from downloading models, to running them in the most optimized way for your hardware, and integrating them in your projects. + +--- + +## Why `node-llama-cpp`? +You might be wondering, why choose `node-llama-cpp` over using an OpenAI API of a service running on your machine? + +The answer is simple: simplicity, performance, and flexibility. + +Let's break it down: + +### Simplicity +To use `node-llama-cpp`, you install it like any other npm package, and you're good to go. + +To run your project, all you have to do is `npm install` and `npm start`. That's it. + +No installing additional software on your machine, no setting up API keys or environment variables, no setup process at all. +Everything is self-contained in your project, giving you complete control over it. + +With `node-llama-cpp`, you can run large language models on your machine using Node.js and TypeScript, _without_ any Python at all. +Say goodbye to setup headaches, "it works on my machine" issues, and all other Python-related problems. + +While `llama.cpp` is an amazing project, it's also highly technical and can be challenging for beginners. +`node-llama-cpp` bridge that gap, making `llama.cpp` accessible to everyone, regardless of their experience level. + +### Performance +[`node-llama-cpp`](https://node-llama-cpp.withcat.ai) is built on top of [`llama.cpp`](https://github.com/ggerganov/llama.cpp), a highly optimized C++ library for running large language models. + +`llama.cpp` supports many compute backends, including Metal, CUDA, and Vulkan. It also uses [Accelerate](https://developer.apple.com/accelerate/) on Mac. + +`node-llama-cpp` automatically adapts to your hardware and adjusts the default settings to give you the best performance, +so you don't _have_ to configure anything to use it. + +By using `node-llama-cpp` you are essentially running models _inside_ your project. +With no overhead of network calls or data serializations, +you can more effectively take advantage of the stateful nature of inference operations. + +For example, you can prompt a model on top of an existing conversation inference state, +without re-evaluating the entire history just to process the new prompt. +
+This reduces the time it takes to start generating a response, and makes more efficient use of your resources. + +If you were using an API, you would have to re-evaluate the entire history every time you prompt the model, +or have the API store the state for you, which can use huge amounts of disk space. + +### Flexibility +Since `node-llama-cpp` runs inside your project, you can also deploy it together with your project. +
+You can run models in your [Electron](../guide/electron.md) app without requiring any additional setup on the user's machine. + +You can build libraries that use large language models and distribute them as npm packages, +
+or deploy self-contained Docker images and run them on any hardware you want. + +You can use [any model you want](../guide/choosing-a-model.md), or even create your own and use it with `node-llama-cpp`. +
+Download models [as part of `npm install`](../guide/downloading-models.md) or [on-demand from your code](../guide/downloading-models.md#programmatic). + +[Tweak inference settings](../guide/chat-session.md#repeat-penalty) to get better results for your particular use case. + +`node-llama-cpp` is regularly updated with the latest `llama.cpp` release, +but you can also [download and build the latest release](../guide/building-from-source.md#download-new-release) at any time with a single command. + +The possibilities are endless. +You have full control over the models you use, how you use them, and where you use them. +You can tailor `node-llama-cpp` to your needs in ways that aren't possible with an OpenAI API (at least not efficiently or easily). + +## Powerful Features +`node-llama-cpp` includes a complete suite of everything you need to use large language models in your projects, +with convenient wrappers for popular tasks, such as: +* [Enforcing a JSON schema](../guide/chat-session.md#response-json-schema) on the output the model generates +* Providing the model with [functions it can call on demand](../guide/chat-session.md#function-calling) to retrieve information or perform actions, even with some models that don't officially support it +* [Generating completion](../guide/text-completion.md) for a given text +* [Embedding text](../guide/embedding.md) for similarity searches or other tasks +* Much more + +## Why Node.js? +JavaScript is the most popular programming language in the world, and Node.js is the most popular runtime for JavaScript server-side applications. +Developers choose Node.js for its versatility, reliability, ease of use, forward compatibility, and the vast ecosystem of npm packages. + +While Python is currently the go-to language for data science and machine learning, +the needs of data scientists differ from those of developers building services and applications. + +`node-llama-cpp` bridges this gap, making it easier to integrate large language models into Node.js and Electron projects, +while focusing on the needs of developers building services and applications. + +## Try It Out +`node-llama-cpp` comes with comprehensive documentation, covering everything from installation to advanced usage. +It's beginner-friendly, with explanations for every step of the way for those who are new to the world of large language models, +while still being flexible enough to allow advanced usage for those who are more experienced and knowledgeable. + +Experience the ease of running models on your machine with this single command: +```shell +npx -y node-llama-cpp chat +``` + +Check out the [getting started guide](../guide/index.md) to learn how to use `node-llama-cpp`. + +## Thank You +`node-llama-cpp` is only possible thanks to the amazing work done on [`llama.cpp`](https://github.com/ggerganov/llama.cpp) by [Georgi Gerganov](https://github.com/ggerganov), [Slaren](https://github.com/slaren) and all the contributors from the community. + +## What's next? +Version 3.0 is a major milestone, but there's plenty more planned for the future. + +Check out the [roadmap](https://github.com/orgs/withcatai/projects/1) to see what's coming next, +
+and [give `node-llama-cpp` a star on GitHub](https://github.com/withcatai/node-llama-cpp) to support the project.