From 6a00ff9205054ba549e71fa7454a0713e15b5e5b Mon Sep 17 00:00:00 2001
From: Viktor Taskov <viktor.taskov@seldon.io>
Date: Tue, 29 Aug 2023 15:43:40 +0100
Subject: [PATCH] feat(docs): [SCv1] Add a section about loading custom
 HuggingFace models (#5105)

* [HuggingFace] Add section about loading custom models

* Refactor a word

* Change custom HF model location in GCS

* Refactor SeldonDeployment name in yaml
---
 doc/source/servers/huggingface.md | 46 +++++++++++++++++++++++++++----
 1 file changed, 40 insertions(+), 6 deletions(-)

diff --git a/doc/source/servers/huggingface.md b/doc/source/servers/huggingface.md
index d459781928..d0211062be 100644
--- a/doc/source/servers/huggingface.md
+++ b/doc/source/servers/huggingface.md
@@ -8,12 +8,12 @@ We also support the high performance optimizations provided by the [Transformer
 
 The parameters that are available for you to configure include:
 
-| Name | Description |
-| ---- | ----------- |
-| `task` | The transformer pipeline task |
-| `pretrained_model` | The name of the pretrained model in the Hub |
+| Name                   | Description                                                         |
+|------------------------|---------------------------------------------------------------------|
+| `task`                 | The transformer pipeline task                                       |
+| `pretrained_model`     | The name of the pretrained model in the Hub                         |
 | `pretrained_tokenizer` | Transformer name in Hub if different to the one provided with model |
-| `optimum_model` | Boolean to enable loading model with Optimum framework |
+| `optimum_model`        | Boolean to enable loading model with Optimum framework              |
 
 ## Simple Example
 
@@ -43,7 +43,7 @@ spec:
 
 ## Quantized & Optimized Models with Optimum
 
-You can deploy a HuggingFace model loaded using the Optimum library by using the `optimum_model` parameter
+You can deploy a HuggingFace model loaded using the Optimum library by using the `optimum_model` parameter.
 
 ```yaml
 apiVersion: machinelearning.seldon.io/v1alpha2
@@ -70,3 +70,37 @@ spec:
     replicas: 1
 ```
 
+## Custom Model Example
+
+You can deploy a custom HuggingFace model by providing the location of the model artefacts using the `modelUri` field.
+
+```yaml
+apiVersion: machinelearning.seldon.io/v1alpha2
+kind: SeldonDeployment
+metadata:
+  name: custom-gpt2-hf-model
+spec:
+  protocol: v2
+  predictors:
+  - graph:
+      name: transformer
+      implementation: HUGGINGFACE_SERVER
+      modelUri: gs://seldon-models/v1.18.0-dev/huggingface/custom-text-generation
+      parameters:
+      - name: task
+        type: STRING
+        value: text-generation
+    componentSpecs:
+      - spec:
+          containers:
+            - name: transformer
+              resources:
+                limits:
+                  cpu: 1
+                  memory: 4Gi
+                requests:
+                  cpu: 100m
+                  memory: 3Gi
+    name: default
+    replicas: 1
+```