Update TGI specs (+ define TextToSpeech independently) (#915)

This PR pulls latest changes from TGI specs. In particular: - updated `ChatCompletionInputMessageContent` => now supports single message (as before), array of messages and **sending image as input** (in case of VLMs) - new input: `stream_options` - tools input have been updated - more documented fields **Note:** TGI specs have a `guideline` input parameter for chat completion. I removed it from the official specs as it's a very specific parameter and subject to deletion in the near future (it was there to fix a chat template issue for `ShieldGemma` models). More context [here](https://huggingface.slack.com/archives/C05CFK1HM0T/p1726822675255589?thread_ts=1724852501.099809&cid=C05CFK1HM0T) (internal). **Note:** I also took the opportunity to define `TextToSpeech` specs independently from `TextToAudio` (otherwise the semi-automatic scripts are not working properly). Nothing has changed in the specs. I'll open a related PR on `huggingface_hub` side to reflect this change.
huggingface · Sep 24, 2024 · 1aae6ac · 1aae6ac
1 parent 5a92211
commit 1aae6ac
Show file tree

Hide file tree

Showing 12 changed files with 444 additions and 105 deletions.
diff --git a/packages/tasks/scripts/inference-tei-import.ts b/packages/tasks/scripts/inference-tei-import.ts
@@ -73,7 +73,9 @@ async function _extractAndAdapt(task: string, mainComponentName: string, type: "
 					}
 
 					// Add reference to components to export (and scan it too)
-					const newRef = camelFullName + ref.replace(camelName, "");
+					let newRef = camelFullName + ref.replace(camelName, "");
+					// remove duplicated InputInput or OutputOutput in naming
+					newRef = newRef.replace("InputInput", "Input").replace("OutputOutput", "Output");
 					if (!filteredComponents[newRef]) {
 						components[ref]["title"] = newRef; // Rename title to avoid conflicts
 						filteredComponents[newRef] = components[ref];

diff --git a/packages/tasks/scripts/inference-tgi-import.ts b/packages/tasks/scripts/inference-tgi-import.ts
@@ -66,7 +66,9 @@ async function _extractAndAdapt(task: string, mainComponentName: string, type: "
 					}
 
 					// Add reference to components to export (and scan it too)
-					const newRef = camelFullName + ref.replace(camelName, "");
+					let newRef = camelFullName + ref.replace(camelName, "");
+					// remove duplicated InputInput or OutputOutput in naming
+					newRef = newRef.replace("InputInput", "Input").replace("OutputOutput", "Output");
 					if (!filteredComponents[newRef]) {
 						components[ref]["title"] = newRef; // Rename title to avoid conflicts
 						filteredComponents[newRef] = components[ref];

diff --git a/packages/tasks/src/tasks/chat-completion/inference.ts b/packages/tasks/src/tasks/chat-completion/inference.ts
@@ -49,7 +49,7 @@ export interface ChatCompletionInput {
 	 * [UNUSED] ID of the model to use. See the model endpoint compatibility table for details
 	 * on which models work with the Chat API.
 	 */
-	model: string;
+	model?: string;
 	/**
 	 * UNUSED
 	 * How many chat completion choices to generate for each input message. Note that you will
@@ -63,12 +63,14 @@ export interface ChatCompletionInput {
 	 * increasing the model's likelihood to talk about new topics
 	 */
 	presence_penalty?: number;
+	response_format?: ChatCompletionInputGrammarType;
 	seed?: number;
 	/**
 	 * Up to 4 sequences where the API will stop generating further tokens.
 	 */
 	stop?: string[];
 	stream?: boolean;
+	stream_options?: ChatCompletionInputStreamOptions;
 	/**
 	 * What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the
 	 * output more random, while
@@ -77,7 +79,7 @@ export interface ChatCompletionInput {
 	 * We generally recommend altering this or `top_p` but not both.
 	 */
 	temperature?: number;
-	tool_choice?: ChatCompletionInputToolType;
+	tool_choice?: ChatCompletionInputTool;
 	/**
 	 * A prompt to be appended before the tools
 	 */
@@ -87,7 +89,7 @@ export interface ChatCompletionInput {
 	 * Use this to provide a list of
 	 * functions the model may generate JSON inputs for.
 	 */
-	tools?: ChatCompletionInputTool[];
+	tools?: ToolElement[];
 	/**
 	 * An integer between 0 and 5 specifying the number of most likely tokens to return at each
 	 * token position, each with
@@ -105,40 +107,78 @@ export interface ChatCompletionInput {
 }
 
 export interface ChatCompletionInputMessage {
-	content?: string;
+	content: ChatCompletionInputMessageContent;
 	name?: string;
 	role: string;
-	tool_calls?: ChatCompletionInputToolCall[];
 	[property: string]: unknown;
 }
 
-export interface ChatCompletionInputToolCall {
-	function: ChatCompletionInputFunctionDefinition;
-	id: number;
-	type: string;
+export type ChatCompletionInputMessageContent = ChatCompletionInputMessageChunk[] | string;
+
+export interface ChatCompletionInputMessageChunk {
+	image_url?: ChatCompletionInputURL;
+	text?: string;
+	type: ChatCompletionInputMessageChunkType;
 	[property: string]: unknown;
 }
 
-export interface ChatCompletionInputFunctionDefinition {
-	arguments: unknown;
-	description?: string;
-	name: string;
+export interface ChatCompletionInputURL {
+	url: string;
 	[property: string]: unknown;
 }
 
-export type ChatCompletionInputToolType = "OneOf" | ChatCompletionInputToolTypeObject;
+export type ChatCompletionInputMessageChunkType = "text" | "image_url";
 
-export interface ChatCompletionInputToolTypeObject {
-	FunctionName: string;
+export interface ChatCompletionInputGrammarType {
+	type: ChatCompletionInputGrammarTypeType;
+	/**
+	 * A string that represents a [JSON Schema](https://json-schema.org/).
+	 *
+	 * JSON Schema is a declarative language that allows to annotate JSON documents
+	 * with types and descriptions.
+	 */
+	value: unknown;
+	[property: string]: unknown;
+}
+
+export type ChatCompletionInputGrammarTypeType = "json" | "regex";
+
+export interface ChatCompletionInputStreamOptions {
+	/**
+	 * If set, an additional chunk will be streamed before the data: [DONE] message. The usage
+	 * field on this chunk shows the token usage statistics for the entire request, and the
+	 * choices field will always be an empty array. All other chunks will also include a usage
+	 * field, but with a null value.
+	 */
+	include_usage: boolean;
 	[property: string]: unknown;
 }
 
-export interface ChatCompletionInputTool {
+export type ChatCompletionInputTool = ChatCompletionInputToolType | string;
+
+export interface ChatCompletionInputToolType {
+	function?: ChatCompletionInputFunctionName;
+	[property: string]: unknown;
+}
+
+export interface ChatCompletionInputFunctionName {
+	name: string;
+	[property: string]: unknown;
+}
+
+export interface ToolElement {
 	function: ChatCompletionInputFunctionDefinition;
 	type: string;
 	[property: string]: unknown;
 }
 
+export interface ChatCompletionInputFunctionDefinition {
+	arguments: unknown;
+	description?: string;
+	name: string;
+	[property: string]: unknown;
+}
+
 /**
  * Chat Completion Output.
  *
@@ -151,7 +191,6 @@ export interface ChatCompletionOutput {
 	created: number;
 	id: string;
 	model: string;
-	object: string;
 	system_fingerprint: string;
 	usage: ChatCompletionOutputUsage;
 	[property: string]: unknown;
@@ -185,15 +224,14 @@ export interface ChatCompletionOutputTopLogprob {
 
 export interface ChatCompletionOutputMessage {
 	content?: string;
-	name?: string;
 	role: string;
 	tool_calls?: ChatCompletionOutputToolCall[];
 	[property: string]: unknown;
 }
 
 export interface ChatCompletionOutputToolCall {
 	function: ChatCompletionOutputFunctionDefinition;
-	id: number;
+	id: string;
 	type: string;
 	[property: string]: unknown;
 }
@@ -224,8 +262,8 @@ export interface ChatCompletionStreamOutput {
 	created: number;
 	id: string;
 	model: string;
-	object: string;
 	system_fingerprint: string;
+	usage?: ChatCompletionStreamOutputUsage;
 	[property: string]: unknown;
 }
 
@@ -275,3 +313,10 @@ export interface ChatCompletionStreamOutputTopLogprob {
 	token: string;
 	[property: string]: unknown;
 }
+
+export interface ChatCompletionStreamOutputUsage {
+	completion_tokens: number;
+	prompt_tokens: number;
+	total_tokens: number;
+	[property: string]: unknown;
+}