diff --git a/packages/tasks/package.json b/packages/tasks/package.json
index 752c1a7ff..06ea1cc0b 100644
--- a/packages/tasks/package.json
+++ b/packages/tasks/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "@huggingface/tasks",
 	"packageManager": "pnpm@8.10.5",
-	"version": "0.11.11",
+	"version": "0.11.12",
 	"description": "List of ML tasks for huggingface.co/tasks",
 	"repository": "https://github.com/huggingface/huggingface.js.git",
 	"publishConfig": {
diff --git a/packages/tasks/src/model-libraries-snippets.ts b/packages/tasks/src/model-libraries-snippets.ts
index cf5b256f6..f9c096095 100644
--- a/packages/tasks/src/model-libraries-snippets.ts
+++ b/packages/tasks/src/model-libraries-snippets.ts
@@ -170,6 +170,48 @@ export const diffusers = (model: ModelData): string[] => {
 	}
 };
 
+export const diffusionkit = (model: ModelData): string[] => {
+	const sd3Snippet = `# Pipeline for Stable Diffusion 3
+from diffusionkit.mlx import DiffusionPipeline
+
+pipeline = DiffusionPipeline(
+	shift=3.0,
+	use_t5=False,
+	model_version=${model.id},
+	low_memory_mode=True,
+	a16=True,
+	w16=True,
+)`;
+
+	const fluxSnippet = `# Pipeline for Flux
+from diffusionkit.mlx import FluxPipeline
+
+pipeline = FluxPipeline(
+  shift=1.0,
+  model_version=${model.id},
+  low_memory_mode=True,
+  a16=True,
+  w16=True,
+)`;
+
+	const generateSnippet = `# Image Generation
+HEIGHT = 512
+WIDTH = 512
+NUM_STEPS = ${model.tags.includes("flux") ? 4 : 50}
+CFG_WEIGHT = ${model.tags.includes("flux") ? 0 : 5}
+
+image, _ = pipeline.generate_image(
+  "a photo of a cat",
+  cfg_weight=CFG_WEIGHT,
+  num_steps=NUM_STEPS,
+  latent_size=(HEIGHT // 8, WIDTH // 8),
+)`;
+
+	const pipelineSnippet = model.tags.includes("flux") ? fluxSnippet : sd3Snippet;
+
+	return [pipelineSnippet, generateSnippet];
+};
+
 export const cartesia_pytorch = (model: ModelData): string[] => [
 	`# pip install --no-binary :all: cartesia-pytorch
 from cartesia_pytorch import ReneLMHeadModel
diff --git a/packages/tasks/src/model-libraries.ts b/packages/tasks/src/model-libraries.ts
index 0860f9352..71b8747b9 100644
--- a/packages/tasks/src/model-libraries.ts
+++ b/packages/tasks/src/model-libraries.ts
@@ -155,7 +155,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
 		repoName: "deepforest",
 		docsUrl: "https://deepforest.readthedocs.io/en/latest/",
 		repoUrl: "https://github.com/weecology/DeepForest",
-		countDownloads: `path_extension:"pt"`,
+		countDownloads: `path_extension:"pt" OR path_extension:"pl"`,
 	},
 	"depth-anything-v2": {
 		prettyLabel: "DepthAnythingV2",
@@ -181,6 +181,12 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
 		filter: true,
 		/// diffusers has its own more complex "countDownloads" query
 	},
+	diffusionkit: {
+		prettyLabel: "DiffusionKit",
+		repoName: "DiffusionKit",
+		repoUrl: "https://github.com/argmaxinc/DiffusionKit",
+		snippets: snippets.diffusionkit,
+	},
 	doctr: {
 		prettyLabel: "docTR",
 		repoName: "doctr",
diff --git a/packages/tasks/src/tasks/audio-classification/inference.ts b/packages/tasks/src/tasks/audio-classification/inference.ts
index ee61c7052..b8ed3f50f 100644
--- a/packages/tasks/src/tasks/audio-classification/inference.ts
+++ b/packages/tasks/src/tasks/audio-classification/inference.ts
@@ -8,9 +8,10 @@
  */
 export interface AudioClassificationInput {
 	/**
-	 * The input audio data
+	 * The input audio data as a base64-encoded string. If no `parameters` are provided, you can
+	 * also provide the audio data as a raw bytes payload.
 	 */
-	inputs: unknown;
+	inputs: string;
 	/**
 	 * Additional inference parameters
 	 */
diff --git a/packages/tasks/src/tasks/audio-classification/spec/input.json b/packages/tasks/src/tasks/audio-classification/spec/input.json
index d0372bb68..0bfeb69cd 100644
--- a/packages/tasks/src/tasks/audio-classification/spec/input.json
+++ b/packages/tasks/src/tasks/audio-classification/spec/input.json
@@ -6,7 +6,8 @@
 	"type": "object",
 	"properties": {
 		"inputs": {
-			"description": "The input audio data"
+			"description": "The input audio data as a base64-encoded string. If no `parameters` are provided, you can also provide the audio data as a raw bytes payload.",
+			"type": "string"
 		},
 		"parameters": {
 			"description": "Additional inference parameters",
diff --git a/packages/tasks/src/tasks/audio-classification/spec/output.json b/packages/tasks/src/tasks/audio-classification/spec/output.json
index dac7a9225..f1f2dfe8e 100644
--- a/packages/tasks/src/tasks/audio-classification/spec/output.json
+++ b/packages/tasks/src/tasks/audio-classification/spec/output.json
@@ -5,6 +5,7 @@
 	"description": "Outputs for Audio Classification inference",
 	"type": "array",
 	"items": {
+		"type": "object",
 		"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutput"
 	}
 }
diff --git a/packages/tasks/src/tasks/automatic-speech-recognition/inference.ts b/packages/tasks/src/tasks/automatic-speech-recognition/inference.ts
index dfc501519..c8ef9b9bc 100644
--- a/packages/tasks/src/tasks/automatic-speech-recognition/inference.ts
+++ b/packages/tasks/src/tasks/automatic-speech-recognition/inference.ts
@@ -9,9 +9,10 @@
  */
 export interface AutomaticSpeechRecognitionInput {
 	/**
-	 * The input audio data
+	 * The input audio data as a base64-encoded string. If no `parameters` are provided, you can
+	 * also provide the audio data as a raw bytes payload.
 	 */
-	inputs: unknown;
+	inputs: string;
 	/**
 	 * Additional inference parameters
 	 */
diff --git a/packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json b/packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json
index 691c7f4b7..a618c68a3 100644
--- a/packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json
+++ b/packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json
@@ -6,7 +6,8 @@
 	"type": "object",
 	"properties": {
 		"inputs": {
-			"description": "The input audio data"
+			"description": "The input audio data as a base64-encoded string. If no `parameters` are provided, you can also provide the audio data as a raw bytes payload.",
+			"type": "string"
 		},
 		"parameters": {
 			"description": "Additional inference parameters",
diff --git a/packages/tasks/src/tasks/common-definitions.json b/packages/tasks/src/tasks/common-definitions.json
index f78d3d9e4..744893e4d 100644
--- a/packages/tasks/src/tasks/common-definitions.json
+++ b/packages/tasks/src/tasks/common-definitions.json
@@ -7,17 +7,7 @@
 			"title": "ClassificationOutputTransform",
 			"type": "string",
 			"description": "The function to apply to the model outputs in order to retrieve the scores.",
-			"oneOf": [
-				{
-					"const": "sigmoid"
-				},
-				{
-					"const": "softmax"
-				},
-				{
-					"const": "none"
-				}
-			]
+			"enum": ["sigmoid", "softmax", "none"]
 		},
 		"ClassificationOutput": {
 			"title": "ClassificationOutput",
@@ -84,16 +74,9 @@
 					"description": "Whether to use sampling instead of greedy decoding when generating new tokens."
 				},
 				"early_stopping": {
+					"type": ["boolean", "string"],
 					"description": "Controls the stopping condition for beam-based methods.",
-					"oneOf": [
-						{
-							"type": "boolean"
-						},
-						{
-							"const": "never",
-							"type": "string"
-						}
-					]
+					"enum": ["never", true, false]
 				},
 				"num_beams": {
 					"type": "integer",
diff --git a/packages/tasks/src/tasks/image-classification/inference.ts b/packages/tasks/src/tasks/image-classification/inference.ts
index e0689d887..bd56a7d31 100644
--- a/packages/tasks/src/tasks/image-classification/inference.ts
+++ b/packages/tasks/src/tasks/image-classification/inference.ts
@@ -8,9 +8,10 @@
  */
 export interface ImageClassificationInput {
 	/**
-	 * The input image data
+	 * The input image data as a base64-encoded string. If no `parameters` are provided, you can
+	 * also provide the image data as a raw bytes payload.
 	 */
-	inputs: unknown;
+	inputs: string;
 	/**
 	 * Additional inference parameters
 	 */
diff --git a/packages/tasks/src/tasks/image-classification/spec/input.json b/packages/tasks/src/tasks/image-classification/spec/input.json
index a8cd4273c..cf0b30ec5 100644
--- a/packages/tasks/src/tasks/image-classification/spec/input.json
+++ b/packages/tasks/src/tasks/image-classification/spec/input.json
@@ -6,7 +6,8 @@
 	"type": "object",
 	"properties": {
 		"inputs": {
-			"description": "The input image data"
+			"type": "string",
+			"description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload."
 		},
 		"parameters": {
 			"description": "Additional inference parameters",
diff --git a/packages/tasks/src/tasks/image-classification/spec/output.json b/packages/tasks/src/tasks/image-classification/spec/output.json
index 2a3264bce..3ababaf63 100644
--- a/packages/tasks/src/tasks/image-classification/spec/output.json
+++ b/packages/tasks/src/tasks/image-classification/spec/output.json
@@ -5,6 +5,7 @@
 	"title": "ImageClassificationOutput",
 	"type": "array",
 	"items": {
+		"type": "object",
 		"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutput"
 	}
 }
diff --git a/packages/tasks/src/tasks/image-segmentation/inference.ts b/packages/tasks/src/tasks/image-segmentation/inference.ts
index 02db5cb90..4ccd36e41 100644
--- a/packages/tasks/src/tasks/image-segmentation/inference.ts
+++ b/packages/tasks/src/tasks/image-segmentation/inference.ts
@@ -8,9 +8,10 @@
  */
 export interface ImageSegmentationInput {
 	/**
-	 * The input image data
+	 * The input image data as a base64-encoded string. If no `parameters` are provided, you can
+	 * also provide the image data as a raw bytes payload.
 	 */
-	inputs: unknown;
+	inputs: string;
 	/**
 	 * Additional inference parameters
 	 */
@@ -41,6 +42,9 @@ export interface ImageSegmentationParameters {
 	threshold?: number;
 	[property: string]: unknown;
 }
+/**
+ * Segmentation task to be performed, depending on model capabilities.
+ */
 export type ImageSegmentationSubtask = "instance" | "panoptic" | "semantic";
 export type ImageSegmentationOutput = ImageSegmentationOutputElement[];
 /**
@@ -50,15 +54,15 @@ export type ImageSegmentationOutput = ImageSegmentationOutputElement[];
  */
 export interface ImageSegmentationOutputElement {
 	/**
-	 * The label of the predicted segment
+	 * The label of the predicted segment.
 	 */
 	label: string;
 	/**
-	 * The corresponding mask as a black-and-white image
+	 * The corresponding mask as a black-and-white image (base64-encoded).
 	 */
-	mask: unknown;
+	mask: string;
 	/**
-	 * The score or confidence degreee the model has
+	 * The score or confidence degree the model has.
 	 */
 	score?: number;
 	[property: string]: unknown;
diff --git a/packages/tasks/src/tasks/image-segmentation/spec/input.json b/packages/tasks/src/tasks/image-segmentation/spec/input.json
index 500793554..697f8959b 100644
--- a/packages/tasks/src/tasks/image-segmentation/spec/input.json
+++ b/packages/tasks/src/tasks/image-segmentation/spec/input.json
@@ -6,7 +6,8 @@
 	"type": "object",
 	"properties": {
 		"inputs": {
-			"description": "The input image data"
+			"type": "string",
+			"description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload."
 		},
 		"parameters": {
 			"description": "Additional inference parameters",
@@ -31,17 +32,7 @@
 					"title": "ImageSegmentationSubtask",
 					"type": "string",
 					"description": "Segmentation task to be performed, depending on model capabilities.",
-					"oneOf": [
-						{
-							"const": "instance"
-						},
-						{
-							"const": "panoptic"
-						},
-						{
-							"const": "semantic"
-						}
-					]
+					"enum": ["instance", "panoptic", "semantic"]
 				},
 				"threshold": {
 					"type": "number",
diff --git a/packages/tasks/src/tasks/image-segmentation/spec/output.json b/packages/tasks/src/tasks/image-segmentation/spec/output.json
index b20aa415e..6fa5b0d8e 100644
--- a/packages/tasks/src/tasks/image-segmentation/spec/output.json
+++ b/packages/tasks/src/tasks/image-segmentation/spec/output.json
@@ -10,14 +10,15 @@
 		"properties": {
 			"label": {
 				"type": "string",
-				"description": "The label of the predicted segment"
+				"description": "The label of the predicted segment."
 			},
 			"mask": {
-				"description": "The corresponding mask as a black-and-white image"
+				"type": "string",
+				"description": "The corresponding mask as a black-and-white image (base64-encoded)."
 			},
 			"score": {
 				"type": "number",
-				"description": "The score or confidence degreee the model has"
+				"description": "The score or confidence degree the model has."
 			}
 		},
 		"required": ["label", "mask"]
diff --git a/packages/tasks/src/tasks/image-to-image/inference.ts b/packages/tasks/src/tasks/image-to-image/inference.ts
index bf732e070..8ba34b5ff 100644
--- a/packages/tasks/src/tasks/image-to-image/inference.ts
+++ b/packages/tasks/src/tasks/image-to-image/inference.ts
@@ -9,9 +9,10 @@
  */
 export interface ImageToImageInput {
 	/**
-	 * The input image data
+	 * The input image data as a base64-encoded string. If no `parameters` are provided, you can
+	 * also provide the image data as a raw bytes payload.
 	 */
-	inputs: unknown;
+	inputs: string;
 	/**
 	 * Additional inference parameters
 	 */
@@ -40,14 +41,14 @@ export interface ImageToImageParameters {
 	 */
 	num_inference_steps?: number;
 	/**
-	 * The size in pixel of the output image
+	 * The size in pixel of the output image.
 	 */
 	target_size?: TargetSize;
 	[property: string]: unknown;
 }
 
 /**
- * The size in pixel of the output image
+ * The size in pixel of the output image.
  */
 export interface TargetSize {
 	height: number;
@@ -60,7 +61,7 @@ export interface TargetSize {
  */
 export interface ImageToImageOutput {
 	/**
-	 * The output image
+	 * The output image returned as raw bytes in the payload.
 	 */
 	image?: unknown;
 	[property: string]: unknown;
diff --git a/packages/tasks/src/tasks/image-to-image/spec/input.json b/packages/tasks/src/tasks/image-to-image/spec/input.json
index 873e1f20d..23695c6b1 100644
--- a/packages/tasks/src/tasks/image-to-image/spec/input.json
+++ b/packages/tasks/src/tasks/image-to-image/spec/input.json
@@ -6,7 +6,8 @@
 	"type": "object",
 	"properties": {
 		"inputs": {
-			"description": "The input image data"
+			"type": "string",
+			"description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload."
 		},
 		"parameters": {
 			"description": "Additional inference parameters",
@@ -36,7 +37,7 @@
 				},
 				"target_size": {
 					"type": "object",
-					"description": "The size in pixel of the output image",
+					"description": "The size in pixel of the output image.",
 					"properties": {
 						"width": {
 							"type": "integer"
diff --git a/packages/tasks/src/tasks/image-to-image/spec/output.json b/packages/tasks/src/tasks/image-to-image/spec/output.json
index af4eff804..043544e75 100644
--- a/packages/tasks/src/tasks/image-to-image/spec/output.json
+++ b/packages/tasks/src/tasks/image-to-image/spec/output.json
@@ -6,7 +6,7 @@
 	"type": "object",
 	"properties": {
 		"image": {
-			"description": "The output image"
+			"description": "The output image returned as raw bytes in the payload."
 		}
 	}
 }
diff --git a/packages/tasks/src/tasks/index.ts b/packages/tasks/src/tasks/index.ts
index 7e17e9373..a72bb9c88 100644
--- a/packages/tasks/src/tasks/index.ts
+++ b/packages/tasks/src/tasks/index.ts
@@ -73,12 +73,7 @@ export type * from "./table-question-answering/inference";
 export type { TextToImageInput, TextToImageOutput, TextToImageParameters } from "./text-to-image/inference";
 export type { TextToAudioParameters, TextToSpeechInput, TextToSpeechOutput } from "./text-to-speech/inference";
 export type * from "./token-classification/inference";
-export type {
-	Text2TextGenerationParameters,
-	Text2TextGenerationTruncationStrategy,
-	TranslationInput,
-	TranslationOutput,
-} from "./translation/inference";
+export type { TranslationInput, TranslationOutput } from "./translation/inference";
 export type {
 	ClassificationOutputTransform,
 	TextClassificationInput,
diff --git a/packages/tasks/src/tasks/object-detection/inference.ts b/packages/tasks/src/tasks/object-detection/inference.ts
index d1765ad00..d117dcb0b 100644
--- a/packages/tasks/src/tasks/object-detection/inference.ts
+++ b/packages/tasks/src/tasks/object-detection/inference.ts
@@ -8,9 +8,10 @@
  */
 export interface ObjectDetectionInput {
 	/**
-	 * The input image data
+	 * The input image data as a base64-encoded string. If no `parameters` are provided, you can
+	 * also provide the image data as a raw bytes payload.
 	 */
-	inputs: unknown;
+	inputs: string;
 	/**
 	 * Additional inference parameters
 	 */
@@ -34,9 +35,21 @@ export interface ObjectDetectionParameters {
  * image.
  */
 export interface BoundingBox {
+	/**
+	 * The x-coordinate of the bottom-right corner of the bounding box.
+	 */
 	xmax: number;
+	/**
+	 * The x-coordinate of the top-left corner of the bounding box.
+	 */
 	xmin: number;
+	/**
+	 * The y-coordinate of the bottom-right corner of the bounding box.
+	 */
 	ymax: number;
+	/**
+	 * The y-coordinate of the top-left corner of the bounding box.
+	 */
 	ymin: number;
 	[property: string]: unknown;
 }
@@ -51,11 +64,11 @@ export interface ObjectDetectionOutputElement {
 	 */
 	box: BoundingBox;
 	/**
-	 * The predicted label for the bounding box
+	 * The predicted label for the bounding box.
 	 */
 	label: string;
 	/**
-	 * The associated score / probability
+	 * The associated score / probability.
 	 */
 	score: number;
 	[property: string]: unknown;
diff --git a/packages/tasks/src/tasks/object-detection/spec/input.json b/packages/tasks/src/tasks/object-detection/spec/input.json
index b694f2fa5..d00deefec 100644
--- a/packages/tasks/src/tasks/object-detection/spec/input.json
+++ b/packages/tasks/src/tasks/object-detection/spec/input.json
@@ -6,7 +6,8 @@
 	"type": "object",
 	"properties": {
 		"inputs": {
-			"description": "The input image data"
+			"type": "string",
+			"description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload."
 		},
 		"parameters": {
 			"description": "Additional inference parameters",
diff --git a/packages/tasks/src/tasks/object-detection/spec/output.json b/packages/tasks/src/tasks/object-detection/spec/output.json
index 20c92d5d3..8d91f1078 100644
--- a/packages/tasks/src/tasks/object-detection/spec/output.json
+++ b/packages/tasks/src/tasks/object-detection/spec/output.json
@@ -9,11 +9,11 @@
 		"properties": {
 			"label": {
 				"type": "string",
-				"description": "The predicted label for the bounding box"
+				"description": "The predicted label for the bounding box."
 			},
 			"score": {
 				"type": "number",
-				"description": "The associated score / probability"
+				"description": "The associated score / probability."
 			},
 			"box": {
 				"$ref": "#/$defs/BoundingBox",
@@ -28,16 +28,20 @@
 			"title": "BoundingBox",
 			"properties": {
 				"xmin": {
-					"type": "integer"
+					"type": "integer",
+					"description": "The x-coordinate of the top-left corner of the bounding box."
 				},
 				"xmax": {
-					"type": "integer"
+					"type": "integer",
+					"description": "The x-coordinate of the bottom-right corner of the bounding box."
 				},
 				"ymin": {
-					"type": "integer"
+					"type": "integer",
+					"description": "The y-coordinate of the top-left corner of the bounding box."
 				},
 				"ymax": {
-					"type": "integer"
+					"type": "integer",
+					"description": "The y-coordinate of the bottom-right corner of the bounding box."
 				}
 			},
 			"required": ["xmin", "xmax", "ymin", "ymax"]
diff --git a/packages/tasks/src/tasks/summarization/inference.ts b/packages/tasks/src/tasks/summarization/inference.ts
index 2b674184f..a08e25230 100644
--- a/packages/tasks/src/tasks/summarization/inference.ts
+++ b/packages/tasks/src/tasks/summarization/inference.ts
@@ -6,43 +6,44 @@
 
 /**
  * Inputs for Summarization inference
- *
- * Inputs for Text2text Generation inference
  */
 export interface SummarizationInput {
 	/**
-	 * The input text data
+	 * The input text to summarize.
 	 */
 	inputs: string;
 	/**
-	 * Additional inference parameters
+	 * Additional inference parameters.
 	 */
-	parameters?: Text2TextGenerationParameters;
+	parameters?: SummarizationParameters;
 	[property: string]: unknown;
 }
 
 /**
- * Additional inference parameters
+ * Additional inference parameters.
  *
- * Additional inference parameters for Text2text Generation
+ * Additional inference parameters for summarization.
  */
-export interface Text2TextGenerationParameters {
+export interface SummarizationParameters {
 	/**
 	 * Whether to clean up the potential extra spaces in the text output.
 	 */
 	clean_up_tokenization_spaces?: boolean;
 	/**
-	 * Additional parametrization of the text generation algorithm
+	 * Additional parametrization of the text generation algorithm.
 	 */
 	generate_parameters?: { [key: string]: unknown };
 	/**
-	 * The truncation strategy to use
+	 * The truncation strategy to use.
 	 */
-	truncation?: Text2TextGenerationTruncationStrategy;
+	truncation?: SummarizationTruncationStrategy;
 	[property: string]: unknown;
 }
 
-export type Text2TextGenerationTruncationStrategy = "do_not_truncate" | "longest_first" | "only_first" | "only_second";
+/**
+ * The truncation strategy to use.
+ */
+export type SummarizationTruncationStrategy = "do_not_truncate" | "longest_first" | "only_first" | "only_second";
 
 /**
  * Outputs of inference for the Summarization task
diff --git a/packages/tasks/src/tasks/summarization/spec/input.json b/packages/tasks/src/tasks/summarization/spec/input.json
index 629da31ea..d33152857 100644
--- a/packages/tasks/src/tasks/summarization/spec/input.json
+++ b/packages/tasks/src/tasks/summarization/spec/input.json
@@ -1,7 +1,42 @@
 {
-	"$ref": "/inference/schemas/text2text-generation/input.json",
 	"$id": "/inference/schemas/summarization/input.json",
 	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Inputs for Summarization inference",
 	"title": "SummarizationInput",
-	"description": "Inputs for Summarization inference"
+	"type": "object",
+	"properties": {
+		"inputs": {
+			"description": "The input text to summarize.",
+			"type": "string"
+		},
+		"parameters": {
+			"description": "Additional inference parameters.",
+			"$ref": "#/$defs/SummarizationParameters"
+		}
+	},
+	"$defs": {
+		"SummarizationParameters": {
+			"title": "SummarizationParameters",
+			"description": "Additional inference parameters for summarization.",
+			"type": "object",
+			"properties": {
+				"clean_up_tokenization_spaces": {
+					"type": "boolean",
+					"description": "Whether to clean up the potential extra spaces in the text output."
+				},
+				"truncation": {
+					"title": "SummarizationTruncationStrategy",
+					"type": "string",
+					"description": "The truncation strategy to use.",
+					"enum": ["do_not_truncate", "longest_first", "only_first", "only_second"]
+				},
+				"generate_parameters": {
+					"title": "generateParameters",
+					"type": "object",
+					"description": "Additional parametrization of the text generation algorithm."
+				}
+			}
+		}
+	},
+	"required": ["inputs"]
 }
diff --git a/packages/tasks/src/tasks/text-classification/spec/output.json b/packages/tasks/src/tasks/text-classification/spec/output.json
index 704b82225..2bf3def35 100644
--- a/packages/tasks/src/tasks/text-classification/spec/output.json
+++ b/packages/tasks/src/tasks/text-classification/spec/output.json
@@ -5,6 +5,7 @@
 	"title": "TextClassificationOutput",
 	"type": "array",
 	"items": {
+		"type": "object",
 		"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutput"
 	}
 }
diff --git a/packages/tasks/src/tasks/text-to-image/inference.ts b/packages/tasks/src/tasks/text-to-image/inference.ts
index 4997165b8..b2e735746 100644
--- a/packages/tasks/src/tasks/text-to-image/inference.ts
+++ b/packages/tasks/src/tasks/text-to-image/inference.ts
@@ -9,7 +9,7 @@
  */
 export interface TextToImageInput {
 	/**
-	 * The input text data (sometimes called "prompt"
+	 * The input text data (sometimes called "prompt")
 	 */
 	inputs: string;
 	/**
@@ -64,7 +64,7 @@ export interface TargetSize {
  */
 export interface TextToImageOutput {
 	/**
-	 * The generated image
+	 * The generated image returned as raw bytes in the payload.
 	 */
 	image: unknown;
 	[property: string]: unknown;
diff --git a/packages/tasks/src/tasks/text-to-image/spec/input.json b/packages/tasks/src/tasks/text-to-image/spec/input.json
index 49acc7ed3..467b848f6 100644
--- a/packages/tasks/src/tasks/text-to-image/spec/input.json
+++ b/packages/tasks/src/tasks/text-to-image/spec/input.json
@@ -6,7 +6,7 @@
 	"type": "object",
 	"properties": {
 		"inputs": {
-			"description": "The input text data (sometimes called \"prompt\"",
+			"description": "The input text data (sometimes called \"prompt\")",
 			"type": "string"
 		},
 		"parameters": {
diff --git a/packages/tasks/src/tasks/text-to-image/spec/output.json b/packages/tasks/src/tasks/text-to-image/spec/output.json
index ff952a3a3..f90a1eee1 100644
--- a/packages/tasks/src/tasks/text-to-image/spec/output.json
+++ b/packages/tasks/src/tasks/text-to-image/spec/output.json
@@ -6,7 +6,7 @@
 	"type": "object",
 	"properties": {
 		"image": {
-			"description": "The generated image"
+			"description": "The generated image returned as raw bytes in the payload."
 		}
 	},
 	"required": ["image"]
diff --git a/packages/tasks/src/tasks/translation/inference.ts b/packages/tasks/src/tasks/translation/inference.ts
index d5ea57eed..a78c7e940 100644
--- a/packages/tasks/src/tasks/translation/inference.ts
+++ b/packages/tasks/src/tasks/translation/inference.ts
@@ -6,43 +6,54 @@
 
 /**
  * Inputs for Translation inference
- *
- * Inputs for Text2text Generation inference
  */
 export interface TranslationInput {
 	/**
-	 * The input text data
+	 * The text to translate.
 	 */
 	inputs: string;
 	/**
 	 * Additional inference parameters
 	 */
-	parameters?: Text2TextGenerationParameters;
+	parameters?: TranslationParameters;
 	[property: string]: unknown;
 }
 
 /**
  * Additional inference parameters
  *
- * Additional inference parameters for Text2text Generation
+ * Additional inference parameters for Translation
  */
-export interface Text2TextGenerationParameters {
+export interface TranslationParameters {
 	/**
 	 * Whether to clean up the potential extra spaces in the text output.
 	 */
 	clean_up_tokenization_spaces?: boolean;
 	/**
-	 * Additional parametrization of the text generation algorithm
+	 * Additional parametrization of the text generation algorithm.
 	 */
 	generate_parameters?: { [key: string]: unknown };
 	/**
-	 * The truncation strategy to use
+	 * The source language of the text. Required for models that can translate from multiple
+	 * languages.
+	 */
+	src_lang?: string;
+	/**
+	 * Target language to translate to. Required for models that can translate to multiple
+	 * languages.
 	 */
-	truncation?: Text2TextGenerationTruncationStrategy;
+	tgt_lang?: string;
+	/**
+	 * The truncation strategy to use.
+	 */
+	truncation?: TranslationTruncationStrategy;
 	[property: string]: unknown;
 }
 
-export type Text2TextGenerationTruncationStrategy = "do_not_truncate" | "longest_first" | "only_first" | "only_second";
+/**
+ * The truncation strategy to use.
+ */
+export type TranslationTruncationStrategy = "do_not_truncate" | "longest_first" | "only_first" | "only_second";
 
 /**
  * Outputs of inference for the Translation task
diff --git a/packages/tasks/src/tasks/translation/spec/input.json b/packages/tasks/src/tasks/translation/spec/input.json
index 0695bc672..0c2d196cf 100644
--- a/packages/tasks/src/tasks/translation/spec/input.json
+++ b/packages/tasks/src/tasks/translation/spec/input.json
@@ -1,7 +1,50 @@
 {
-	"$ref": "/inference/schemas/text2text-generation/input.json",
 	"$id": "/inference/schemas/translation/input.json",
 	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Inputs for Translation inference",
 	"title": "TranslationInput",
-	"description": "Inputs for Translation inference"
+	"type": "object",
+	"properties": {
+		"inputs": {
+			"description": "The text to translate.",
+			"type": "string"
+		},
+		"parameters": {
+			"description": "Additional inference parameters",
+			"$ref": "#/$defs/TranslationParameters"
+		}
+	},
+	"$defs": {
+		"TranslationParameters": {
+			"title": "TranslationParameters",
+			"description": "Additional inference parameters for Translation",
+			"type": "object",
+			"properties": {
+				"src_lang": {
+					"type": "string",
+					"description": "The source language of the text. Required for models that can translate from multiple languages."
+				},
+				"tgt_lang": {
+					"type": "string",
+					"description": "Target language to translate to. Required for models that can translate to multiple languages."
+				},
+				"clean_up_tokenization_spaces": {
+					"type": "boolean",
+					"description": "Whether to clean up the potential extra spaces in the text output."
+				},
+				"truncation": {
+					"title": "TranslationTruncationStrategy",
+					"type": "string",
+					"description": "The truncation strategy to use.",
+					"enum": ["do_not_truncate", "longest_first", "only_first", "only_second"]
+				},
+				"generate_parameters": {
+					"title": "generateParameters",
+					"type": "object",
+					"description": "Additional parametrization of the text generation algorithm."
+				}
+			}
+		}
+	},
+	"required": ["inputs"]
 }
diff --git a/packages/tasks/src/tasks/zero-shot-classification/spec/output.json b/packages/tasks/src/tasks/zero-shot-classification/spec/output.json
index 83ed1098f..1b5ac0cc3 100644
--- a/packages/tasks/src/tasks/zero-shot-classification/spec/output.json
+++ b/packages/tasks/src/tasks/zero-shot-classification/spec/output.json
@@ -5,6 +5,7 @@
 	"title": "ZeroShotClassificationOutput",
 	"type": "array",
 	"items": {
+		"type": "object",
 		"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutput"
 	}
 }