Skip to content

Commit

Permalink
feat: distinguish embedding-only model
Browse files Browse the repository at this point in the history
Signed-off-by: thxCode <thxcode0824@gmail.com>
  • Loading branch information
thxCode committed Jul 31, 2024
1 parent 7bae61d commit 510a29d
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 4 deletions.
6 changes: 3 additions & 3 deletions llama-box/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ add_custom_target(patch
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
VERBATIM
)
add_dependencies(patch version)
add_dependencies(patch version build_info)
add_dependencies(llama patch)

#
# llama-box
Expand All @@ -82,9 +83,8 @@ if (GGML_MUSA)
set(CMAKE_C_EXTENSIONS OFF)
set(CMAKE_CXX_COMPILER clang++)
set(CMAKE_CXX_EXTENSIONS OFF)
endif()
endif ()
add_executable(${TARGET} main.cpp param.hpp ratelimiter.hpp utils.hpp)
add_dependencies(${TARGET} patch)
target_link_libraries(${TARGET} PRIVATE version common llava ${CMAKE_THREAD_LIBS_INIT})
target_include_directories(${TARGET} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
if (WIN32)
Expand Down
16 changes: 16 additions & 0 deletions llama-box/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3396,6 +3396,14 @@ int main(int argc, char **argv) {

const auto handle_completions = [&ctx_server, &res_error](const httplib::Request &req,
httplib::Response &res) {
// llama_supports_embedding_only is a patch.
if (llama_supports_embedding_only(ctx_server.ctx)) {
res.status = httplib::StatusCode::Forbidden_403;
res.set_content("You are not allowed to sample from this model",
"text/plain; charset=utf-8");
return;
}

int tps = 0;
{
const std::string tps_s = req.get_header_value("X-Request-Tokens-Per-Second");
Expand Down Expand Up @@ -3533,6 +3541,14 @@ int main(int argc, char **argv) {

const auto handle_chat_completions = [&ctx_server, &params, &res_error](
const httplib::Request &req, httplib::Response &res) {
// llama_supports_embedding_only is a patch.
if (llama_supports_embedding_only(ctx_server.ctx)) {
res.status = httplib::StatusCode::Forbidden_403;
res.set_content("You are not allowed to sample from this model",
"text/plain; charset=utf-8");
return;
}

int tps = 0;
{
const std::string tps_s = req.get_header_value("X-Request-Tokens-Per-Second");
Expand Down
25 changes: 25 additions & 0 deletions llama-box/patches/embedding.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
diff --git a/include/llama.h b/include/llama.h
index f23355a6..96e16d89 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -1178,6 +1178,8 @@ extern "C" {

LLAMA_API void llama_dump_timing_info_yaml(FILE * stream, const struct llama_context * ctx);

+ LLAMA_API bool llama_supports_embedding_only (const struct llama_context * ctx);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/llama.cpp b/src/llama.cpp
index a207451f..f635b15a 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -19158,3 +19158,7 @@ void llama_log_callback_default(ggml_log_level level, const char * text, void *
fputs(text, stderr);
fflush(stderr);
}
+
+bool llama_supports_embedding_only(const struct llama_context * ctx) {
+ return !ctx->cparams.causal_attn;
+}
2 changes: 1 addition & 1 deletion llama-box/scripts/patch-llama-cpp.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ if (Git_FOUND)
COMMAND ${GIT_EXECUTABLE} -C ${LLAMA_CPP_DIR} apply --whitespace=nowarn ${PATCH_FILE}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
)
message(STATUS "Applied patches")
message(STATUS "Applied patch file ${PATCH_FILE}")
else ()
message(FATAL_ERROR "Failed to apply patches: Cannot apply patch file ${PATCH_FILE}")
endif ()
Expand Down

0 comments on commit 510a29d

Please sign in to comment.