forked from huggingface/lighteval
-
Notifications
You must be signed in to change notification settings - Fork 0
/
endpoint_model.yaml
22 lines (22 loc) · 1.11 KB
/
endpoint_model.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
model:
type: "endpoint" # can be base, tgi, or endpoint
base_params:
endpoint_name: "llama-2-7B-lighteval" # needs to be lower case without special characters
model: "meta-llama/Llama-2-7b-hf"
revision: "main"
dtype: "float16" # can be any of "awq", "eetq", "gptq", "4bit' or "8bit" (will use bitsandbytes), "bfloat16" or "float16"
reuse_existing: false # if true, ignore all params in instance, and don't delete the endpoint after evaluation
instance:
accelerator: "gpu"
region: "eu-west-1"
vendor: "aws"
instance_size: "medium"
instance_type: "g5.2xlarge"
framework: "pytorch"
endpoint_type: "protected"
namespace: null # The namespace under which to launch the endopint. Defaults to the current user's namespace
image_url: null # Optionally specify the docker image to use when launching the endpoint model. E.g., launching models with later releases of the TGI container with support for newer models.
env_vars:
null # Optional environment variables to include when launching the endpoint. e.g., `MAX_INPUT_LENGTH: 2048`
generation:
add_special_tokens: true