update plugin spec

higress-group · Sep 13, 2024 · 9ce08aa · 9ce08aa
1 parent 6aa5900
commit 9ce08aa
Show file tree

Hide file tree

Showing 41 changed files with 415 additions and 5 deletions.
diff --git a/backend/sdk/src/main/resources/plugins/ai-agent/spec.yaml b/backend/sdk/src/main/resources/plugins/ai-agent/spec.yaml
@@ -13,6 +13,7 @@ info:
   version: 1.0.0
   contact:
     name: xingyunyang01
+  gatewayMinVersion: ""
 spec:
   phase: UNSPECIFIED_PHASE
   priority: 20

diff --git a/backend/sdk/src/main/resources/plugins/ai-cache/spec.yaml b/backend/sdk/src/main/resources/plugins/ai-cache/spec.yaml
@@ -13,6 +13,7 @@ info:
   version: 1.0.0
   contact:
     name: johnlanni
+  gatewayMinVersion: "2.0.0"    
 spec:
   phase: AUTHN
   priority: 10

diff --git a/backend/sdk/src/main/resources/plugins/ai-data-masking/README.md b/backend/sdk/src/main/resources/plugins/ai-data-masking/README.md
@@ -0,0 +1,151 @@
+---
+title: AI 数据脱敏
+keywords: [higress,ai data masking]
+description: AI 数据脱敏插件配置参考
+---
+
+## 功能说明
+
+  对请求/返回中的敏感词拦截、替换
+
+![image](https://img.alicdn.com/imgextra/i4/O1CN0156Wtko1T9JO0RiWow_!!6000000002339-0-tps-1314-638.jpg)
+
+### 处理数据范围
+  - openai协议：请求/返回对话内容
+  - jsonpath：只处理指定字段
+  - raw：整个请求/返回body
+
+### 敏感词拦截
+  - 处理数据范围中出现敏感词直接拦截，返回预设错误信息
+  - 支持系统内置敏感词库和自定义敏感词
+
+### 敏感词替换
+  - 将请求数据中出现的敏感词替换为脱敏字符串，传递给后端服务。可保证敏感数据不出域
+  - 部分脱敏数据在后端服务返回后可进行还原
+  - 自定义规则支持标准正则和grok规则，替换字符串支持变量替换
+
+## 运行属性
+
+插件执行阶段：`认证阶段`
+插件执行优先级：`991`
+
+## 配置字段
+
+| 名称 | 数据类型 | 默认值 | 描述 |
+| -------- | --------  | -------- | -------- |
+|  deny_openai            | bool            | true  |  对openai协议进行拦截 |
+|  deny_jsonpath          | string          |   []  |  对指定jsonpath拦截 |
+|  deny_raw               | bool            | false |  对原始body拦截 |
+|  system_deny            | bool            | true  |  开启内置拦截规则  |
+|  deny_code              | int             | 200   |  拦截时http状态码   |
+|  deny_message           | string          | 提问或回答中包含敏感词，已被屏蔽 |  拦截时ai返回消息   |
+|  deny_raw_message       | string          | {"errmsg":"提问或回答中包含敏感词，已被屏蔽"} |  非openai拦截时返回内容   |
+|  deny_content_type      | string          | application/json  |  非openai拦截时返回content_type头 |
+|  deny_words             | array of string | []    |  自定义敏感词列表  |
+|  replace_roles          | array           |   -   |  自定义敏感词正则替换  |
+|  replace_roles.regex    | string          |   -   |  规则正则(内置GROK规则) |
+|  replace_roles.type     | [replace, hash] |   -   |  替换类型  |
+|  replace_roles.restore  | bool            | false |  是否恢复  |
+|  replace_roles.value    | string          |   -   |  替换值（支持正则变量）  |
+
+## 配置示例
+
+```yaml
+system_deny: true
+deny_openai: true
+deny_jsonpath:
+  - "$.messages[*].content"
+deny_raw: true
+deny_code: 200
+deny_message: "提问或回答中包含敏感词，已被屏蔽"
+deny_raw_message: "{\"errmsg\":\"提问或回答中包含敏感词，已被屏蔽\"}"
+deny_content_type: "application/json"
+deny_words: 
+  - "自定义敏感词1"
+  - "自定义敏感词2"
+replace_roles:
+  - regex: "%{MOBILE}"
+    type: "replace"
+    value: "****"
+    # 手机号  13800138000 -> ****
+  - regex: "%{EMAILLOCALPART}@%{HOSTNAME:domain}"
+    type: "replace"
+    restore: true
+    value: "****@$domain"
+    # 电子邮箱  admin@gmail.com -> ****@gmail.com
+  - regex: "%{IP}"
+    type: "replace"
+    restore: true
+    value: "***.***.***.***"
+    # ip 192.168.0.1 -> ***.***.***.***
+  - regex: "%{IDCARD}"
+    type: "replace"
+    value: "****"
+    # 身份证号 110000000000000000 -> ****
+  - regex: "sk-[0-9a-zA-Z]*"
+    restore: true
+    type: "hash"
+    # hash sk-12345 -> 9cb495455da32f41567dab1d07f1973d
+    # hash后的值提供给大模型，从大模型返回的数据中会将hash值还原为原始值
+```
+
+## 敏感词替换样例
+
+### 用户请求内容
+
+  请将 `curl http://172.20.5.14/api/openai/v1/chat/completions -H "Authorization: sk-12345" -H "Auth: test@gmail.com"` 改成post方式
+
+### 处理后请求大模型内容
+
+  `curl http://***.***.***.***/api/openai/v1/chat/completions -H "Authorization: 48a7e98a91d93896d8dac522c5853948" -H "Auth: ****@gmail.com"` 改成post方式
+
+### 大模型返回内容
+
+  您想要将一个 `curl` 的 GET 请求转换为 POST 请求，并且这个请求是向一个特定的 API 发送数据。下面是修改后的 `curl` 命令，以 POST 方式发送：
+
+```sh
+curl -X POST \
+     -H "Authorization: 48a7e98a91d93896d8dac522c5853948" \
+     -H "Auth: ****@gmail.com" \
+     -H "Content-Type: application/json" \
+     -d '{"key":"value"}' \
+     http://***.***.***.***/api/openai/v1/chat/completions
+```
+
+这里做了如下几个修改:
+
+- `-X POST` 设置请求方式为 POST。
+- `-H "Content-Type: application/json"` 设置请求头中的 `Content-Type` 为 `application/json`，这通常用来告诉服务器您发送的数据格式是 JSON。
+- `-d '{"key":"value"}'` 这里设置了要发送的数据，`'{"key":"value"}'` 是一个简单的 JSON 对象示例。您需要将其替换为您实际想要发送的数据。
+
+请注意，您需要将 `"key":"value"` 替换为您实际要发送的数据内容。如果您的 API 接受不同的数据结构或者需要特定的字段，请根据实际情况调整这部分内容。
+
+### 处理后返回用户内容
+
+  您想要将一个 `curl` 的 GET 请求转换为 POST 请求，并且这个请求是向一个特定的 API 发送数据。下面是修改后的 `curl` 命令，以 POST 方式发送：
+
+```sh
+curl -X POST \
+     -H "Authorization: sk-12345" \
+     -H "Auth: test@gmail.com" \
+     -H "Content-Type: application/json" \
+     -d '{"key":"value"}' \
+     http://172.20.5.14/api/openai/v1/chat/completions
+```
+
+这里做了如下几个修改:
+
+- `-X POST` 设置请求方式为 POST。
+- `-H "Content-Type: application/json"` 设置请求头中的 `Content-Type` 为 `application/json`，这通常用来告诉服务器您发送的数据格式是 JSON。
+- `-d '{"key":"value"}'` 这里设置了要发送的数据，`'{"key":"value"}'` 是一个简单的 JSON 对象示例。您需要将其替换为您实际想要发送的数据。
+
+请注意，您需要将 `"key":"value"` 替换为您实际要发送的数据内容。如果您的 API 接受不同的数据结构或者需要特定的字段，请根据实际情况调整这部分内容。
+
+
+## 相关说明
+
+ - 流模式中如果脱敏后的词被多个chunk拆分，可能无法进行还原
+ - 流模式中，如果敏感词语被多个chunk拆分，可能会有敏感词的一部分返回给用户的情况
+ - grok 内置规则列表 https://help.aliyun.com/zh/sls/user-guide/grok-patterns
+ - 内置敏感词库数据来源 https://github.com/houbb/sensitive-word/tree/master/src/main/resources
+
diff --git a/backend/sdk/src/main/resources/plugins/ai-data-masking/README_EN.md b/backend/sdk/src/main/resources/plugins/ai-data-masking/README_EN.md
@@ -0,0 +1,131 @@
+---
+title: AI Data Masking
+keywords: [higress, ai data masking]
+description: AI Data Masking Plugin Configuration Reference
+---
+## Function Description
+  Interception and replacement of sensitive words in requests/responses
+![image](https://img.alicdn.com/imgextra/i4/O1CN0156Wtko1T9JO0RiWow_!!6000000002339-0-tps-1314-638.jpg)
+
+### Data Handling Scope
+  - openai protocol: Request/response conversation content
+  - jsonpath: Only process specified fields
+  - raw: Entire request/response body
+
+### Sensitive Word Interception
+  - Directly intercept sensitive words in the data handling scope and return preset error messages
+  - Supports system's built-in sensitive word library and custom sensitive words
+
+### Sensitive Word Replacement
+  - Replace sensitive words in request data with masked strings before passing to back-end services. Ensures that sensitive data does not leave the domain
+  - Some masked data can be restored after being returned by the back-end service
+  - Custom rules support standard regular expressions and grok rules, and replacement strings support variable substitution
+
+## Execution Properties
+Plugin Execution Phase: `Authentication Phase`  
+Plugin Execution Priority: `991`
+
+## Configuration Fields
+| Name                   | Data Type       | Default Value | Description                          |
+| ---------------------- | ---------------- | -------------- | ------------------------------------ |
+|  deny_openai           | bool             | true           |  Intercept openai protocol          |
+|  deny_jsonpath         | string           |   []           |  Intercept specified jsonpath       |
+|  deny_raw              | bool             | false          |  Intercept raw body                 |
+|  system_deny           | bool             | true           |  Enable built-in interception rules  |
+|  deny_code             | int              | 200            |  HTTP status code when intercepted   |
+|  deny_message          | string           | Sensitive words found in the question or answer have been blocked | AI returned message when intercepted |
+|  deny_raw_message      | string           | {"errmsg":"Sensitive words found in the question or answer have been blocked"} | Content returned when not openai intercepted |
+|  deny_content_type     | string           | application/json | Content type header returned when not openai intercepted |
+|  deny_words            | array of string  | []             | Custom sensitive word list           |
+|  replace_roles         | array            |   -            | Custom sensitive word regex replacement |
+|  replace_roles.regex   | string           |   -            | Rule regex (built-in GROK rule)    |
+|  replace_roles.type    | [replace, hash]  |   -            | Replacement type                     |
+|  replace_roles.restore  | bool             | false          | Whether to restore                   |
+|  replace_roles.value    | string          |   -            | Replacement value (supports regex variables) |
+
+## Configuration Example
+```yaml
+system_deny: true
+deny_openai: true
+deny_jsonpath:
+  - "$.messages[*].content"
+deny_raw: true
+deny_code: 200
+deny_message: "Sensitive words found in the question or answer have been blocked"
+deny_raw_message: "{\"errmsg\":\"Sensitive words found in the question or answer have been blocked\"}"
+deny_content_type: "application/json"
+deny_words:
+  - "Custom sensitive word 1"
+  - "Custom sensitive word 2"
+replace_roles:
+  - regex: "%{MOBILE}"
+    type: "replace"
+    value: "****"
+    # Mobile number  13800138000 -> ****
+  - regex: "%{EMAILLOCALPART}@%{HOSTNAME:domain}"
+    type: "replace"
+    restore: true
+    value: "****@$domain"
+    # Email  admin@gmail.com -> ****@gmail.com
+  - regex: "%{IP}"
+    type: "replace"
+    restore: true
+    value: "***.***.***.***"
+    # IP 192.168.0.1 -> ***.***.***.***
+  - regex: "%{IDCARD}"
+    type: "replace"
+    value: "****"
+    # ID card number 110000000000000000 -> ****
+  - regex: "sk-[0-9a-zA-Z]*"
+    restore: true
+    type: "hash"
+    # hash sk-12345 -> 9cb495455da32f41567dab1d07f1973d
+    # The hashed value is provided to the large model, and the hash value will be restored to the original value from the data returned by the large model
+```
+
+## Sensitive Word Replacement Example
+### User Request Content
+  Please change `curl http://172.20.5.14/api/openai/v1/chat/completions -H "Authorization: sk-12345" -H "Auth: test@gmail.com"` to POST method
+
+### Processed Request Large Model Content
+  `curl http://***.***.***.***/api/openai/v1/chat/completions -H "Authorization: 48a7e98a91d93896d8dac522c5853948" -H "Auth: ****@gmail.com"` change to POST method
+
+### Large Model Returned Content
+  You want to convert a `curl` GET request to a POST request, and this request is sending data to a specific API. Below is the modified `curl` command to send as POST:
+```sh
+curl -X POST \
+     -H "Authorization: 48a7e98a91d93896d8dac522c5853948" \
+     -H "Auth: ****@gmail.com" \
+     -H "Content-Type: application/json" \
+     -d '{"key":"value"}' \
+     http://***.***.***.***/api/openai/v1/chat/completions
+```
+Here are the following modifications made:
+- `-X POST` sets the request method to POST.
+- `-H "Content-Type: application/json"` sets the `Content-Type` in the request header to `application/json`, which is typically used to inform the server that the data you are sending is in JSON format.
+- `-d '{"key":"value"}'` sets the data to be sent, where `'{"key":"value"}'` is a simple example of a JSON object. You need to replace it with the actual data you want to send.
+
+Please note that you need to replace `"key":"value"` with the actual data content you want to send. If your API accepts a different data structure or requires specific fields, please adjust this part according to your actual situation.
+
+### Processed Return to User Content
+  You want to convert a `curl` GET request to a POST request, and this request is sending data to a specific API. Below is the modified `curl` command to send as POST:
+```sh
+curl -X POST \
+     -H "Authorization: sk-12345" \
+     -H "Auth: test@gmail.com" \
+     -H "Content-Type: application/json" \
+     -d '{"key":"value"}' \
+     http://172.20.5.14/api/openai/v1/chat/completions
+```
+Here are the following modifications made:
+- `-X POST` sets the request method to POST.
+- `-H "Content-Type: application/json"` sets the `Content-Type` in the request header to `application/json`, which is typically used to inform the server that the data you are sending is in JSON format.
+- `-d '{"key":"value"}'` sets the data to be sent, where `'{"key":"value"}'` is a simple example of a JSON object. You need to replace it with the actual data you want to send.
+
+Please note that you need to replace `"key":"value"` with the actual data content you want to send. If your API accepts a different data structure or requires specific fields, please adjust this part according to your actual situation.
+
+## Related Notes
+ - In streaming mode, if the masked words are split across multiple chunks, restoration may not be possible
+ - In streaming mode, if sensitive words are split across multiple chunks, there may be cases where part of the sensitive word is returned to the user
+ - Grok built-in rule list: https://help.aliyun.com/zh/sls/user-guide/grok-patterns
+ - Built-in sensitive word library data source: https://github.com/houbb/sensitive-word/tree/master/src/main/resources
diff --git a/backend/sdk/src/main/resources/plugins/ai-data-masking/spec.yaml b/backend/sdk/src/main/resources/plugins/ai-data-masking/spec.yaml
@@ -0,0 +1,92 @@
+apiVersion: 1.0.0
+info:
+  type: oss
+  category: ai
+  image: platform_wasm/ai-data-masking
+  name: ai-data-masking
+  title: AI data masking
+  x-title-i18n:
+    zh-CN: AI 数据脱敏
+  description: Intercept, replace, and restore sensitive information in requests/responses.
+  x-description-i18n:
+    zh-CN: 对请求/响应中的敏感信息进行拦截、替换、还原
+  iconUrl: https://img.alicdn.com/imgextra/i1/O1CN018iKKih1iVx287RltL_!!6000000004419-2-tps-42-42.png
+  version: 1.0.0
+  contact:
+    name: johnlanni
+  gatewayMinVersion: "2.0.0"    
+spec:
+  phase: AUTHN
+  priority: 991
+  configSchema:
+    openAPIV3Schema:
+      type: object
+      example:
+        system_deny: true
+        deny_openai: true
+        deny_jsonpath:
+          - "$.messages[*].content"
+        deny_message: "Question or answer contains sensitive words and has been blocked."
+        deny_words:
+          - "Custom Sensitive Word 1"
+          - "Custom Sensitive Word 2"
+        replace_roles:
+          - regex: "%{MOBILE}"
+            type: "replace"
+            value: "****"
+            # Phone number: 13800138000 -> ****
+          - regex: "%{EMAILLOCALPART}@%{HOSTNAME:domain}"
+            type: "replace"
+            restore: true
+            value: "****@$domain"
+            # Email:  admin@gmail.com -> ****@gmail.com
+          - regex: "%{IP}"
+            type: "replace"
+            restore: true
+            value: "***.***.***.***"
+            # IP: 192.168.0.1 -> ***.***.***.***
+          - regex: "%{IDCARD}"
+            type: "replace"
+            value: "****"
+            # ID card: 110000000000000000 -> ****
+          - regex: "sk-[0-9a-zA-Z]*"
+            restore: true
+            type: "hash"
+            # hash sk-12345 -> 9cb495455da32f41567dab1d07f1973d
+            # The hashed value is provided to the large model, and within the data returned by the large model, the hash value is reversed back to its original value.
+  routeConfigSchema:
+    openAPIV3Schema:
+      type: object
+      example:
+        system_deny: true
+        deny_openai: true
+        deny_jsonpath:
+          - "$.messages[*].content"
+        deny_message: "Question or answer contains sensitive words and has been blocked."
+        deny_words:
+          - "Custom Sensitive Word 1"
+          - "Custom Sensitive Word 2"
+        replace_roles:
+          - regex: "%{MOBILE}"
+            type: "replace"
+            value: "****"
+            # Phone number: 13800138000 -> ****
+          - regex: "%{EMAILLOCALPART}@%{HOSTNAME:domain}"
+            type: "replace"
+            restore: true
+            value: "****@$domain"
+            # Email:  admin@gmail.com -> ****@gmail.com
+          - regex: "%{IP}"
+            type: "replace"
+            restore: true
+            value: "***.***.***.***"
+            # IP: 192.168.0.1 -> ***.***.***.***
+          - regex: "%{IDCARD}"
+            type: "replace"
+            value: "****"
+            # ID card: 110000000000000000 -> ****
+          - regex: "sk-[0-9a-zA-Z]*"
+            restore: true
+            type: "hash"
+            # hash sk-12345 -> 9cb495455da32f41567dab1d07f1973d
+            # The hashed value is provided to the large model, and within the data returned by the large model, the hash value is reversed back to its original value.
diff --git a/backend/sdk/src/main/resources/plugins/ai-history/spec.yaml b/backend/sdk/src/main/resources/plugins/ai-history/spec.yaml
@@ -1,5 +1,6 @@
 apiVersion: 1.0.0
 info:
+  gatewayMinVersion: "2.0.0"
   type: oss
   category: ai
   name: ai-history