From 632b63af7c1a2c722c7620bec51bdfc7340084b2 Mon Sep 17 00:00:00 2001 From: thxCode Date: Mon, 29 Jul 2024 12:55:42 +0800 Subject: [PATCH] fix: embedding Signed-off-by: thxCode --- llama-box/main.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/llama-box/main.cpp b/llama-box/main.cpp index a930f6e..9a39e91 100644 --- a/llama-box/main.cpp +++ b/llama-box/main.cpp @@ -725,6 +725,9 @@ struct server_context { bool load_model(const llama_box_params &bparams) { params = bparams.gparams; + // always disable embedding for the server + params.embedding = false; + // load multimodal projection model if (!params.mmproj.empty()) { if (params.n_ctx < 2048) { @@ -2439,6 +2442,9 @@ struct server_context { return; } + // make sure we're in the right embedding mode + llama_set_embeddings(ctx, batch_type == 1); + // process the created batch of tokens for (int32_t i = 0; i < batch.n_tokens; i += n_batch) { const int32_t n_tokens = std::min(n_batch, batch.n_tokens - i);