Skip to content

Commit

Permalink
Merge branch 'main' into add-keypoint-detection
Browse files Browse the repository at this point in the history
  • Loading branch information
merveenoyan authored Aug 31, 2024
2 parents e8551a4 + 94cb7fe commit dbe94fa
Show file tree
Hide file tree
Showing 31 changed files with 249 additions and 107 deletions.
2 changes: 1 addition & 1 deletion packages/tasks/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "@huggingface/tasks",
"packageManager": "pnpm@8.10.5",
"version": "0.11.11",
"version": "0.11.12",
"description": "List of ML tasks for huggingface.co/tasks",
"repository": "https://github.com/huggingface/huggingface.js.git",
"publishConfig": {
Expand Down
42 changes: 42 additions & 0 deletions packages/tasks/src/model-libraries-snippets.ts
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,48 @@ export const diffusers = (model: ModelData): string[] => {
}
};

export const diffusionkit = (model: ModelData): string[] => {
const sd3Snippet = `# Pipeline for Stable Diffusion 3
from diffusionkit.mlx import DiffusionPipeline
pipeline = DiffusionPipeline(
shift=3.0,
use_t5=False,
model_version=${model.id},
low_memory_mode=True,
a16=True,
w16=True,
)`;

const fluxSnippet = `# Pipeline for Flux
from diffusionkit.mlx import FluxPipeline
pipeline = FluxPipeline(
shift=1.0,
model_version=${model.id},
low_memory_mode=True,
a16=True,
w16=True,
)`;

const generateSnippet = `# Image Generation
HEIGHT = 512
WIDTH = 512
NUM_STEPS = ${model.tags.includes("flux") ? 4 : 50}
CFG_WEIGHT = ${model.tags.includes("flux") ? 0 : 5}
image, _ = pipeline.generate_image(
"a photo of a cat",
cfg_weight=CFG_WEIGHT,
num_steps=NUM_STEPS,
latent_size=(HEIGHT // 8, WIDTH // 8),
)`;

const pipelineSnippet = model.tags.includes("flux") ? fluxSnippet : sd3Snippet;

return [pipelineSnippet, generateSnippet];
};

export const cartesia_pytorch = (model: ModelData): string[] => [
`# pip install --no-binary :all: cartesia-pytorch
from cartesia_pytorch import ReneLMHeadModel
Expand Down
8 changes: 7 additions & 1 deletion packages/tasks/src/model-libraries.ts
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
repoName: "deepforest",
docsUrl: "https://deepforest.readthedocs.io/en/latest/",
repoUrl: "https://github.com/weecology/DeepForest",
countDownloads: `path_extension:"pt"`,
countDownloads: `path_extension:"pt" OR path_extension:"pl"`,
},
"depth-anything-v2": {
prettyLabel: "DepthAnythingV2",
Expand All @@ -181,6 +181,12 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
filter: true,
/// diffusers has its own more complex "countDownloads" query
},
diffusionkit: {
prettyLabel: "DiffusionKit",
repoName: "DiffusionKit",
repoUrl: "https://github.com/argmaxinc/DiffusionKit",
snippets: snippets.diffusionkit,
},
doctr: {
prettyLabel: "docTR",
repoName: "doctr",
Expand Down
5 changes: 3 additions & 2 deletions packages/tasks/src/tasks/audio-classification/inference.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
*/
export interface AudioClassificationInput {
/**
* The input audio data
* The input audio data as a base64-encoded string. If no `parameters` are provided, you can
* also provide the audio data as a raw bytes payload.
*/
inputs: unknown;
inputs: string;
/**
* Additional inference parameters
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
"type": "object",
"properties": {
"inputs": {
"description": "The input audio data"
"description": "The input audio data as a base64-encoded string. If no `parameters` are provided, you can also provide the audio data as a raw bytes payload.",
"type": "string"
},
"parameters": {
"description": "Additional inference parameters",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"description": "Outputs for Audio Classification inference",
"type": "array",
"items": {
"type": "object",
"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutput"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
*/
export interface AutomaticSpeechRecognitionInput {
/**
* The input audio data
* The input audio data as a base64-encoded string. If no `parameters` are provided, you can
* also provide the audio data as a raw bytes payload.
*/
inputs: unknown;
inputs: string;
/**
* Additional inference parameters
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
"type": "object",
"properties": {
"inputs": {
"description": "The input audio data"
"description": "The input audio data as a base64-encoded string. If no `parameters` are provided, you can also provide the audio data as a raw bytes payload.",
"type": "string"
},
"parameters": {
"description": "Additional inference parameters",
Expand Down
23 changes: 3 additions & 20 deletions packages/tasks/src/tasks/common-definitions.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,7 @@
"title": "ClassificationOutputTransform",
"type": "string",
"description": "The function to apply to the model outputs in order to retrieve the scores.",
"oneOf": [
{
"const": "sigmoid"
},
{
"const": "softmax"
},
{
"const": "none"
}
]
"enum": ["sigmoid", "softmax", "none"]
},
"ClassificationOutput": {
"title": "ClassificationOutput",
Expand Down Expand Up @@ -84,16 +74,9 @@
"description": "Whether to use sampling instead of greedy decoding when generating new tokens."
},
"early_stopping": {
"type": ["boolean", "string"],
"description": "Controls the stopping condition for beam-based methods.",
"oneOf": [
{
"type": "boolean"
},
{
"const": "never",
"type": "string"
}
]
"enum": ["never", true, false]
},
"num_beams": {
"type": "integer",
Expand Down
5 changes: 3 additions & 2 deletions packages/tasks/src/tasks/image-classification/inference.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
*/
export interface ImageClassificationInput {
/**
* The input image data
* The input image data as a base64-encoded string. If no `parameters` are provided, you can
* also provide the image data as a raw bytes payload.
*/
inputs: unknown;
inputs: string;
/**
* Additional inference parameters
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
"type": "object",
"properties": {
"inputs": {
"description": "The input image data"
"type": "string",
"description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload."
},
"parameters": {
"description": "Additional inference parameters",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"title": "ImageClassificationOutput",
"type": "array",
"items": {
"type": "object",
"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutput"
}
}
16 changes: 10 additions & 6 deletions packages/tasks/src/tasks/image-segmentation/inference.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
*/
export interface ImageSegmentationInput {
/**
* The input image data
* The input image data as a base64-encoded string. If no `parameters` are provided, you can
* also provide the image data as a raw bytes payload.
*/
inputs: unknown;
inputs: string;
/**
* Additional inference parameters
*/
Expand Down Expand Up @@ -41,6 +42,9 @@ export interface ImageSegmentationParameters {
threshold?: number;
[property: string]: unknown;
}
/**
* Segmentation task to be performed, depending on model capabilities.
*/
export type ImageSegmentationSubtask = "instance" | "panoptic" | "semantic";
export type ImageSegmentationOutput = ImageSegmentationOutputElement[];
/**
Expand All @@ -50,15 +54,15 @@ export type ImageSegmentationOutput = ImageSegmentationOutputElement[];
*/
export interface ImageSegmentationOutputElement {
/**
* The label of the predicted segment
* The label of the predicted segment.
*/
label: string;
/**
* The corresponding mask as a black-and-white image
* The corresponding mask as a black-and-white image (base64-encoded).
*/
mask: unknown;
mask: string;
/**
* The score or confidence degreee the model has
* The score or confidence degree the model has.
*/
score?: number;
[property: string]: unknown;
Expand Down
15 changes: 3 additions & 12 deletions packages/tasks/src/tasks/image-segmentation/spec/input.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
"type": "object",
"properties": {
"inputs": {
"description": "The input image data"
"type": "string",
"description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload."
},
"parameters": {
"description": "Additional inference parameters",
Expand All @@ -31,17 +32,7 @@
"title": "ImageSegmentationSubtask",
"type": "string",
"description": "Segmentation task to be performed, depending on model capabilities.",
"oneOf": [
{
"const": "instance"
},
{
"const": "panoptic"
},
{
"const": "semantic"
}
]
"enum": ["instance", "panoptic", "semantic"]
},
"threshold": {
"type": "number",
Expand Down
7 changes: 4 additions & 3 deletions packages/tasks/src/tasks/image-segmentation/spec/output.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,15 @@
"properties": {
"label": {
"type": "string",
"description": "The label of the predicted segment"
"description": "The label of the predicted segment."
},
"mask": {
"description": "The corresponding mask as a black-and-white image"
"type": "string",
"description": "The corresponding mask as a black-and-white image (base64-encoded)."
},
"score": {
"type": "number",
"description": "The score or confidence degreee the model has"
"description": "The score or confidence degree the model has."
}
},
"required": ["label", "mask"]
Expand Down
11 changes: 6 additions & 5 deletions packages/tasks/src/tasks/image-to-image/inference.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
*/
export interface ImageToImageInput {
/**
* The input image data
* The input image data as a base64-encoded string. If no `parameters` are provided, you can
* also provide the image data as a raw bytes payload.
*/
inputs: unknown;
inputs: string;
/**
* Additional inference parameters
*/
Expand Down Expand Up @@ -40,14 +41,14 @@ export interface ImageToImageParameters {
*/
num_inference_steps?: number;
/**
* The size in pixel of the output image
* The size in pixel of the output image.
*/
target_size?: TargetSize;
[property: string]: unknown;
}

/**
* The size in pixel of the output image
* The size in pixel of the output image.
*/
export interface TargetSize {
height: number;
Expand All @@ -60,7 +61,7 @@ export interface TargetSize {
*/
export interface ImageToImageOutput {
/**
* The output image
* The output image returned as raw bytes in the payload.
*/
image?: unknown;
[property: string]: unknown;
Expand Down
5 changes: 3 additions & 2 deletions packages/tasks/src/tasks/image-to-image/spec/input.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
"type": "object",
"properties": {
"inputs": {
"description": "The input image data"
"type": "string",
"description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload."
},
"parameters": {
"description": "Additional inference parameters",
Expand Down Expand Up @@ -36,7 +37,7 @@
},
"target_size": {
"type": "object",
"description": "The size in pixel of the output image",
"description": "The size in pixel of the output image.",
"properties": {
"width": {
"type": "integer"
Expand Down
2 changes: 1 addition & 1 deletion packages/tasks/src/tasks/image-to-image/spec/output.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"type": "object",
"properties": {
"image": {
"description": "The output image"
"description": "The output image returned as raw bytes in the payload."
}
}
}
7 changes: 1 addition & 6 deletions packages/tasks/src/tasks/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,7 @@ export type * from "./table-question-answering/inference";
export type { TextToImageInput, TextToImageOutput, TextToImageParameters } from "./text-to-image/inference";
export type { TextToAudioParameters, TextToSpeechInput, TextToSpeechOutput } from "./text-to-speech/inference";
export type * from "./token-classification/inference";
export type {
Text2TextGenerationParameters,
Text2TextGenerationTruncationStrategy,
TranslationInput,
TranslationOutput,
} from "./translation/inference";
export type { TranslationInput, TranslationOutput } from "./translation/inference";
export type {
ClassificationOutputTransform,
TextClassificationInput,
Expand Down
Loading

0 comments on commit dbe94fa

Please sign in to comment.