From b3b3f6079dde0a9ab4f6586f1740166d96c0608e Mon Sep 17 00:00:00 2001 From: Christian M Date: Wed, 10 Jan 2024 19:43:51 +0100 Subject: [PATCH] :bug::whale: fix model path parameter and chat-ui env config --- chat-ui.env | 157 ++++++++++++++++++++++++++++++++++++++-- docker-compose.yml | 6 +- src/api/routes/model.rs | 2 +- src/llm/models/mod.rs | 2 +- src/server.rs | 2 +- 5 files changed, 154 insertions(+), 15 deletions(-) diff --git a/chat-ui.env b/chat-ui.env index c801def..a7e6db7 100644 --- a/chat-ui.env +++ b/chat-ui.env @@ -45,24 +45,131 @@ REJECT_UNAUTHORIZED=true # 'name', 'userMessageToken', 'assistantMessageToken' are required MODELS=`[ { - "name": "chat-flame-backend/Mistral", - "displayName": "Mistral-7B-Instruct-v0.1 on chat-flame-backend", + "name": "microsoft/phi-2", + "description": "1.3b and 2.7b LLM with state of the art performance for <10b models", + "websiteUrl": "https://huggingface.co/microsoft/phi-2", + "userMessageToken": "", + "userMessageEndToken": " [/INST] ", + "assistantMessageToken": "", + "assistantMessageEndToken": " [INST] ", + "preprompt": " ", + "chatPromptTemplate" : "Instruct: {{#each messages}}{{#ifUser}}{{content}}\n{{/ifUser}}{{#ifAssistant}}{{content}}\n{{/ifAssistant}}{{/each}}\nOutput:", + "endpoints": [{ + "type" : "tgi", + "url": "http://chat-flame-backend:8080/model/phi-v2/" + }], + "promptExamples": [ + { + "title": "Write an email from bullet list", + "prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)" + }, { + "title": "Code a snake game", + "prompt": "Code a basic snake game in python, give explanations for each step." + }, { + "title": "Assist in a task", + "prompt": "How do I make a delicious lemon cheesecake?" + } + ], + "parameters": { + "temperature": 0.1, + "top_p": 0.95, + "repetition_penalty": 1.2, + "top_k": 50, + "truncate": 3072, + "max_new_tokens": 1024, + "stop": [""] + } + }, + { + "name": "meta-llama/Llama-2-70b-chat-hf", + "description": "The latest and biggest model from Meta, fine-tuned for chat.", + "websiteUrl": "https://ai.meta.com/llama/", + "userMessageToken": "", + "userMessageEndToken": " [/INST] ", + "assistantMessageToken": "", + "assistantMessageEndToken": " [INST] ", + "preprompt": " ", + "chatPromptTemplate" : "[INST] <>\n{{preprompt}}\n<>\n\n{{#each messages}}{{#ifUser}}{{content}} [/INST] {{/ifUser}}{{#ifAssistant}}{{content}} [INST] {{/ifAssistant}}{{/each}}", + "endpoints": [{ + "type" : "tgi", + "url": "http://chat-flame-backend:8080/model/70b-chat/" + }], + "promptExamples": [ + { + "title": "Write an email from bullet list", + "prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)" + }, { + "title": "Code a snake game", + "prompt": "Code a basic snake game in python, give explanations for each step." + }, { + "title": "Assist in a task", + "prompt": "How do I make a delicious lemon cheesecake?" + } + ], + "parameters": { + "temperature": 0.1, + "top_p": 0.95, + "repetition_penalty": 1.2, + "top_k": 50, + "truncate": 3072, + "max_new_tokens": 1024 + } + }, + { + "name": "codellama/CodeLlama-34b-Instruct-hf", + "displayName": "codellama/CodeLlama-34b-Instruct-hf", + "description": "Code Llama, a state of the art code model from Meta.", + "websiteUrl": "https://about.fb.com/news/2023/08/code-llama-ai-for-coding/", + "userMessageToken": "", + "userMessageEndToken": " [/INST] ", + "assistantMessageToken": "", + "assistantMessageEndToken": " [INST] ", + "preprompt": " ", + "chatPromptTemplate" : "[INST] <>\n{{preprompt}}\n<>\n\n{{#each messages}}{{#ifUser}}{{content}} [/INST] {{/ifUser}}{{#ifAssistant}}{{content}} [INST] {{/ifAssistant}}{{/each}}", + "endpoints": [{ + "type" : "tgi", + "url": "http://chat-flame-backend:8080/model/34b-code/" + }], + "promptExamples": [ + { + "title": "Fibonacci in Python", + "prompt": "Write a python function to calculate the nth fibonacci number." + }, { + "title": "JavaScript promises", + "prompt": "How can I wait for multiple JavaScript promises to fulfill before doing something with their values?" + }, { + "title": "Rust filesystem", + "prompt": "How can I load a file from disk in Rust?" + } + ], + "parameters": { + "temperature": 0.1, + "top_p": 0.95, + "repetition_penalty": 1.2, + "top_k": 50, + "truncate": 4096, + "max_new_tokens": 4096 + } + }, + { + "name": "mistralai/Mistral-7B-Instruct-v0.1", + "displayName": "mistralai/Mistral-7B-Instruct-v0.1", "description": "Mistral 7B is a new Apache 2.0 model, released by Mistral AI that outperforms Llama2 13B in benchmarks.", "websiteUrl": "https://mistral.ai/news/announcing-mistral-7b/", "preprompt": "", "chatPromptTemplate" : "{{#each messages}}{{#ifUser}}[INST] {{#if @first}}{{#if @root.preprompt}}{{@root.preprompt}}\n{{/if}}{{/if}}{{content}} [/INST]{{/ifUser}}{{#ifAssistant}}{{content}}{{/ifAssistant}}{{/each}}", "parameters": { - "temperature": 0.7, + "temperature": 0.1, "top_p": 0.95, - "repetition_penalty": 1.1, + "repetition_penalty": 1.2, "top_k": 50, "truncate": 3072, - "max_new_tokens": 512, + "max_new_tokens": 1024, "stop": [""] }, "endpoints": [{ "type" : "tgi", - "url": "http://chat-flame-backend:8080" + "url": "http://chat-flame-backend:8080/model/7b-mistral-instruct/" }], "promptExamples": [ { @@ -75,8 +182,42 @@ MODELS=`[ "title": "Assist in a task", "prompt": "How do I make a delicious lemon cheesecake?" } - ] - } + ], + "unlisted": true + }, + { + "name": "openchat/openchat-3.5-0106", + "displayName": "openchat/openchat-3.5-0106", + "description": "OpenChat 3.5 is the #1 model on MT-Bench, with only 7B parameters.", + "websiteUrl": "https://huggingface.co/openchat/openchat-3.5-0106", + "preprompt": "", + "chatPromptTemplate" : "{{#each messages}}{{#ifUser}}GPT4 Correct User: {{#if @first}}{{#if @root.preprompt}}{{@root.preprompt}}\n{{/if}}{{/if}}{{content}}<|end_of_turn|>GPT4 Correct Assistant:{{/ifUser}}{{#ifAssistant}}{{content}}<|end_of_turn|>{{/ifAssistant}}{{/each}}", + "parameters": { + "temperature": 0.6, + "top_p": 0.95, + "repetition_penalty": 1.2, + "top_k": 50, + "truncate": 6016, + "max_new_tokens": 2048, + "stop": ["<|end_of_turn|>"] + }, + "endpoints": [{ + "type" : "tgi", + "url": "http://chat-flame-backend:8080/model/7b-open-chat-3.5/" + }], + "promptExamples": [ + { + "title": "Write an email from bullet list", + "prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)" + }, { + "title": "Code a snake game", + "prompt": "Code a basic snake game in python, give explanations for each step." + }, { + "title": "Assist in a task", + "prompt": "How do I make a delicious lemon cheesecake?" + } + ] + } ]` OLD_MODELS=`[]`# any removed models, `{ name: string, displayName?: string, id?: string }` diff --git a/docker-compose.yml b/docker-compose.yml index 2bdc71e..a9aaae0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,8 +3,8 @@ version: "3.8" services: chat-flame-backend: build: . - ports: - - "8080:8080" + #ports: + # - "8080:8080" environment: - RUST_LOG=debug command: ./chat-flame-backend @@ -25,8 +25,6 @@ services: - ./chat-ui.env:/app/.env.local depends_on: - mongodb - extra_hosts: - - "host.docker.internal:host-gateway" volumes: models: diff --git a/src/api/routes/model.rs b/src/api/routes/model.rs index c1bdb0c..10ba05f 100644 --- a/src/api/routes/model.rs +++ b/src/api/routes/model.rs @@ -29,7 +29,7 @@ use super::{generate_stream::generate_stream_handler, generate_text_handler}; #[utoipa::path( post, tag = "Text Generation Inference", - path = "/model/{model}", + path = "/model/{model}/", params( ("model" = Models, Path, description = "Model to use for generation"), ), diff --git a/src/llm/models/mod.rs b/src/llm/models/mod.rs index fe0c865..e7f0044 100644 --- a/src/llm/models/mod.rs +++ b/src/llm/models/mod.rs @@ -21,7 +21,7 @@ pub enum Models { L7bCode, #[serde(rename = "13b-code")] L13bCode, - #[serde(rename = "32b-code")] + #[serde(rename = "34b-code")] L34bCode, #[serde(rename = "7b-leo")] Leo7b, diff --git a/src/server.rs b/src/server.rs index c27819f..e151714 100644 --- a/src/server.rs +++ b/src/server.rs @@ -38,7 +38,7 @@ pub fn server(config: Config) -> Router { .route("/health", get(get_health_handler)) .route("/info", get(get_info_handler)) .route("/generate_stream", post(generate_stream_handler)) - .route("/model/:model", post(generate_model_handler)) + .route("/model/:model/", post(generate_model_handler)) .with_state(config); let swagger_ui = SwaggerUi::new("/swagger-ui").url("/api-docs/openapi.json", ApiDoc::openapi());