From 632b63af7c1a2c722c7620bec51bdfc7340084b2 Mon Sep 17 00:00:00 2001
From: thxCode <thxcode0824@gmail.com>
Date: Mon, 29 Jul 2024 12:55:42 +0800
Subject: [PATCH] fix: embedding

Signed-off-by: thxCode <thxcode0824@gmail.com>
---
 llama-box/main.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/llama-box/main.cpp b/llama-box/main.cpp
index a930f6e..9a39e91 100644
--- a/llama-box/main.cpp
+++ b/llama-box/main.cpp
@@ -725,6 +725,9 @@ struct server_context {
     bool load_model(const llama_box_params &bparams) {
         params = bparams.gparams;
 
+        // always disable embedding for the server
+        params.embedding = false;
+
         // load multimodal projection model
         if (!params.mmproj.empty()) {
             if (params.n_ctx < 2048) {
@@ -2439,6 +2442,9 @@ struct server_context {
             return;
         }
 
+        // make sure we're in the right embedding mode
+        llama_set_embeddings(ctx, batch_type == 1);
+
         // process the created batch of tokens
         for (int32_t i = 0; i < batch.n_tokens; i += n_batch) {
             const int32_t n_tokens = std::min(n_batch, batch.n_tokens - i);