From 6a00ff9205054ba549e71fa7454a0713e15b5e5b Mon Sep 17 00:00:00 2001 From: Viktor Taskov Date: Tue, 29 Aug 2023 15:43:40 +0100 Subject: [PATCH] feat(docs): [SCv1] Add a section about loading custom HuggingFace models (#5105) * [HuggingFace] Add section about loading custom models * Refactor a word * Change custom HF model location in GCS * Refactor SeldonDeployment name in yaml --- doc/source/servers/huggingface.md | 46 +++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/doc/source/servers/huggingface.md b/doc/source/servers/huggingface.md index d459781928..d0211062be 100644 --- a/doc/source/servers/huggingface.md +++ b/doc/source/servers/huggingface.md @@ -8,12 +8,12 @@ We also support the high performance optimizations provided by the [Transformer The parameters that are available for you to configure include: -| Name | Description | -| ---- | ----------- | -| `task` | The transformer pipeline task | -| `pretrained_model` | The name of the pretrained model in the Hub | +| Name | Description | +|------------------------|---------------------------------------------------------------------| +| `task` | The transformer pipeline task | +| `pretrained_model` | The name of the pretrained model in the Hub | | `pretrained_tokenizer` | Transformer name in Hub if different to the one provided with model | -| `optimum_model` | Boolean to enable loading model with Optimum framework | +| `optimum_model` | Boolean to enable loading model with Optimum framework | ## Simple Example @@ -43,7 +43,7 @@ spec: ## Quantized & Optimized Models with Optimum -You can deploy a HuggingFace model loaded using the Optimum library by using the `optimum_model` parameter +You can deploy a HuggingFace model loaded using the Optimum library by using the `optimum_model` parameter. ```yaml apiVersion: machinelearning.seldon.io/v1alpha2 @@ -70,3 +70,37 @@ spec: replicas: 1 ``` +## Custom Model Example + +You can deploy a custom HuggingFace model by providing the location of the model artefacts using the `modelUri` field. + +```yaml +apiVersion: machinelearning.seldon.io/v1alpha2 +kind: SeldonDeployment +metadata: + name: custom-gpt2-hf-model +spec: + protocol: v2 + predictors: + - graph: + name: transformer + implementation: HUGGINGFACE_SERVER + modelUri: gs://seldon-models/v1.18.0-dev/huggingface/custom-text-generation + parameters: + - name: task + type: STRING + value: text-generation + componentSpecs: + - spec: + containers: + - name: transformer + resources: + limits: + cpu: 1 + memory: 4Gi + requests: + cpu: 100m + memory: 3Gi + name: default + replicas: 1 +```