From d57fc815a5f0610cacc30d0039b474eeac5b0208 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Thu, 15 Feb 2024 16:47:56 +0100 Subject: [PATCH] Simpler support for new model libraries (#482) close this [internal issue](https://github.com/huggingface/moon-landing/issues/8791) (ignore the unrelated README changes) --- README.md | 81 ++-- packages/hub/README.md | 2 +- packages/tasks/README.md | 2 +- packages/tasks/src/index.ts | 7 +- packages/tasks/src/library-to-tasks.ts | 2 +- ...lements.ts => model-libraries-snippets.ts} | 334 ++-------------- packages/tasks/src/model-libraries.ts | 369 +++++++++++++++--- 7 files changed, 420 insertions(+), 377 deletions(-) rename packages/tasks/src/{library-ui-elements.ts => model-libraries-snippets.ts} (61%) diff --git a/README.md b/README.md index 7af69a57f..bc30a6916 100644 --- a/README.md +++ b/README.md @@ -10,20 +10,30 @@

```ts +// Programatically interact with the Hub + +await createRepo({ + repo: {type: "model", name: "my-user/nlp-model"}, + credentials: {accessToken: HF_TOKEN} +}); + +await uploadFile({ + repo: "my-user/nlp-model", + credentials: {accessToken: HF_TOKEN}, + // Can work with native File in browsers + file: { + path: "pytorch_model.bin", + content: new Blob(...) + } +}); + +// Use hosted inference + await inference.translation({ model: 't5-base', inputs: 'My name is Wolfgang and I live in Berlin' }) -await hf.translation({ - model: "facebook/nllb-200-distilled-600M", - inputs: "how is the weather like in Gaborone", - parameters : { - src_lang: "eng_Latn", - tgt_lang: "sot_Latn" - } -}) - await inference.textToImage({ model: 'stabilityai/stable-diffusion-2', inputs: 'award winning high resolution photo of a giant tortoise/((ladybird)) hybrid, [trending on artstation]', @@ -31,13 +41,15 @@ await inference.textToImage({ negative_prompt: 'blurry', } }) + +// and much more… ``` # Hugging Face JS libraries This is a collection of JS libraries to interact with the Hugging Face API, with TS types included. -- [@huggingface/inference](packages/inference/README.md): Use Inference Endpoints (serverless) to make calls to 100,000+ Machine Learning models +- [@huggingface/inference](packages/inference/README.md): Use Inference Endpoints (serverless or dedicated) to make calls to 100,000+ Machine Learning models - [@huggingface/hub](packages/hub/README.md): Interact with huggingface.co to create or delete repos and commit / download files - [@huggingface/agents](packages/agents/README.md): Interact with HF models through a natural language interface @@ -130,30 +142,6 @@ await inference.imageToText({ const gpt2 = inference.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2'); const { generated_text } = await gpt2.textGeneration({inputs: 'The answer to the universe is'}); ``` -### @huggingface/agents example - -```ts -import {HfAgent, LLMFromHub, defaultTools} from '@huggingface/agents'; - -const HF_TOKEN = "hf_..."; - -const agent = new HfAgent( - HF_TOKEN, - LLMFromHub(HF_TOKEN), - [...defaultTools] -); - - -// you can generate the code, inspect it and then run it -const code = await agent.generateCode("Draw a picture of a cat wearing a top hat. Then caption the picture and read it out loud."); -console.log(code); -const messages = await agent.evaluateCode(code) -console.log(messages); // contains the data - -// or you can run the code directly, however you can't check that the code is safe to execute this way, use at your own risk. -const messages = await agent.run("Draw a picture of a cat wearing a top hat. Then caption the picture and read it out loud.") -console.log(messages); -``` ### @huggingface/hub examples @@ -184,6 +172,31 @@ await deleteFiles({ }); ``` +### @huggingface/agents example + +```ts +import {HfAgent, LLMFromHub, defaultTools} from '@huggingface/agents'; + +const HF_TOKEN = "hf_..."; + +const agent = new HfAgent( + HF_TOKEN, + LLMFromHub(HF_TOKEN), + [...defaultTools] +); + + +// you can generate the code, inspect it and then run it +const code = await agent.generateCode("Draw a picture of a cat wearing a top hat. Then caption the picture and read it out loud."); +console.log(code); +const messages = await agent.evaluateCode(code) +console.log(messages); // contains the data + +// or you can run the code directly, however you can't check that the code is safe to execute this way, use at your own risk. +const messages = await agent.run("Draw a picture of a cat wearing a top hat. Then caption the picture and read it out loud.") +console.log(messages); +``` + There are more features of course, check each library's README! diff --git a/packages/hub/README.md b/packages/hub/README.md index 6a5cb3f4e..9ce14e2ba 100644 --- a/packages/hub/README.md +++ b/packages/hub/README.md @@ -1,6 +1,6 @@ # 🤗 Hugging Face Hub API -Official utilities to use the Hugging Face hub API, still very experimental. +Official utilities to use the Hugging Face Hub API. ## Install diff --git a/packages/tasks/README.md b/packages/tasks/README.md index 3c5639b5d..91724b29a 100644 --- a/packages/tasks/README.md +++ b/packages/tasks/README.md @@ -25,7 +25,7 @@ This package contains the definition files (written in Typescript) for the huggi - **pipeline types** a.k.a. **task types** (used to determine which widget to display on the model page, and which inference API to run) - **default widget inputs** (when they aren't provided in the model card) -- definitions and UI elements for **third party libraries**. +- definitions and UI elements for **model libraries** (and soon for **dataset libraries**). Please add to any of those definitions by opening a PR. Thanks 🔥 diff --git a/packages/tasks/src/index.ts b/packages/tasks/src/index.ts index d4173be09..e5cd667a6 100644 --- a/packages/tasks/src/index.ts +++ b/packages/tasks/src/index.ts @@ -1,5 +1,4 @@ export { LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS } from "./library-to-tasks"; -export { MODEL_LIBRARIES_UI_ELEMENTS } from "./library-ui-elements"; export { MAPPING_DEFAULT_WIDGET } from "./default-widget-inputs"; export type { TaskData, TaskDemo, TaskDemoEntry, ExampleRepo } from "./tasks"; export * from "./tasks"; @@ -14,8 +13,8 @@ export { SUBTASK_TYPES, PIPELINE_TYPES_SET, } from "./pipelines"; -export { ModelLibrary, ALL_DISPLAY_MODEL_LIBRARY_KEYS } from "./model-libraries"; -export type { ModelLibraryKey } from "./model-libraries"; +export { ALL_DISPLAY_MODEL_LIBRARY_KEYS, ALL_MODEL_LIBRARY_KEYS, MODEL_LIBRARIES_UI_ELEMENTS } from "./model-libraries"; +export type { LibraryUiElement, ModelLibraryKey } from "./model-libraries"; export type { ModelData, TransformersInfo } from "./model-data"; export type { WidgetExample, @@ -41,5 +40,3 @@ export { InferenceDisplayability } from "./model-data"; import * as snippets from "./snippets"; export { snippets }; - -export type { LibraryUiElement } from "./library-ui-elements"; diff --git a/packages/tasks/src/library-to-tasks.ts b/packages/tasks/src/library-to-tasks.ts index b7ddcc381..3430f30ba 100644 --- a/packages/tasks/src/library-to-tasks.ts +++ b/packages/tasks/src/library-to-tasks.ts @@ -3,7 +3,7 @@ import type { PipelineType } from "./pipelines"; /** * Mapping from library name (excluding Transformers) to its supported tasks. - * Inference Endpoints (serverless) should be disabled for all other (library, task) pairs beyond this mapping. + * Inference API (serverless) should be disabled for all other (library, task) pairs beyond this mapping. * As an exception, we assume Transformers supports all inference tasks. * This mapping is generated automatically by "python-api-export-tasks" action in huggingface/api-inference-community repo upon merge. * Ref: https://github.com/huggingface/api-inference-community/pull/158 diff --git a/packages/tasks/src/library-ui-elements.ts b/packages/tasks/src/model-libraries-snippets.ts similarity index 61% rename from packages/tasks/src/library-ui-elements.ts rename to packages/tasks/src/model-libraries-snippets.ts index ae8571a9e..242119e03 100644 --- a/packages/tasks/src/library-ui-elements.ts +++ b/packages/tasks/src/model-libraries-snippets.ts @@ -1,35 +1,7 @@ import type { ModelData } from "./model-data"; -import type { ModelLibraryKey } from "./model-libraries"; const TAG_CUSTOM_CODE = "custom_code"; -/** - * Elements configurable by a model library. - */ -export interface LibraryUiElement { - /** - * Name displayed on the main - * call-to-action button on the model page. - */ - btnLabel: string; - /** - * Repo name - */ - repoName: string; - /** - * URL to library's repo - */ - repoUrl: string; - /** - * URL to library's docs - */ - docsUrl?: string; - /** - * Code snippet displayed on model page - */ - snippets: (model: ModelData) => string[]; -} - function nameWithoutNamespace(modelId: string): string { const splitted = modelId.split("/"); return splitted.length === 1 ? splitted[0] : splitted[1]; @@ -37,7 +9,7 @@ function nameWithoutNamespace(modelId: string): string { //#region snippets -const adapters = (model: ModelData) => [ +export const adapters = (model: ModelData): string[] => [ `from adapters import AutoAdapterModel model = AutoAdapterModel.from_pretrained("${model.config?.adapter_transformers?.model_name}") @@ -60,14 +32,14 @@ predictor_input = {"passage": "My name is Wolfgang and I live in Berlin", "quest predictions = predictor.predict_json(predictor_input)`, ]; -const allennlp = (model: ModelData) => { +export const allennlp = (model: ModelData): string[] => { if (model.tags?.includes("question-answering")) { return allennlpQuestionAnswering(model); } return allennlpUnknown(model); }; -const asteroid = (model: ModelData) => [ +export const asteroid = (model: ModelData): string[] => [ `from asteroid.models import BaseModel model = BaseModel.from_pretrained("${model.id}")`, @@ -77,7 +49,7 @@ function get_base_diffusers_model(model: ModelData): string { return model.cardData?.base_model?.toString() ?? "fill-in-base-model"; } -const bertopic = (model: ModelData) => [ +export const bertopic = (model: ModelData): string[] => [ `from bertopic import BERTopic model = BERTopic.load("${model.id}")`, @@ -112,7 +84,7 @@ pipeline = DiffusionPipeline.from_pretrained("${get_base_diffusers_model(model)} pipeline.load_textual_inversion("${model.id}")`, ]; -const diffusers = (model: ModelData) => { +export const diffusers = (model: ModelData): string[] => { if (model.tags?.includes("controlnet")) { return diffusers_controlnet(model); } else if (model.tags?.includes("lora")) { @@ -124,7 +96,7 @@ const diffusers = (model: ModelData) => { } }; -const espnetTTS = (model: ModelData) => [ +export const espnetTTS = (model: ModelData): string[] => [ `from espnet2.bin.tts_inference import Text2Speech model = Text2Speech.from_pretrained("${model.id}") @@ -132,7 +104,7 @@ model = Text2Speech.from_pretrained("${model.id}") speech, *_ = model("text to generate speech from")`, ]; -const espnetASR = (model: ModelData) => [ +export const espnetASR = (model: ModelData): string[] => [ `from espnet2.bin.asr_inference import Speech2Text model = Speech2Text.from_pretrained( @@ -145,7 +117,7 @@ text, *_ = model(speech)[0]`, const espnetUnknown = () => [`unknown model type (must be text-to-speech or automatic-speech-recognition)`]; -const espnet = (model: ModelData) => { +export const espnet = (model: ModelData): string[] => { if (model.tags?.includes("text-to-speech")) { return espnetTTS(model); } else if (model.tags?.includes("automatic-speech-recognition")) { @@ -154,7 +126,7 @@ const espnet = (model: ModelData) => { return espnetUnknown(); }; -const fairseq = (model: ModelData) => [ +export const fairseq = (model: ModelData): string[] => [ `from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub models, cfg, task = load_model_ensemble_and_task_from_hf_hub( @@ -162,27 +134,27 @@ models, cfg, task = load_model_ensemble_and_task_from_hf_hub( )`, ]; -const flair = (model: ModelData) => [ +export const flair = (model: ModelData): string[] => [ `from flair.models import SequenceTagger tagger = SequenceTagger.load("${model.id}")`, ]; -const keras = (model: ModelData) => [ +export const keras = (model: ModelData): string[] => [ `from huggingface_hub import from_pretrained_keras model = from_pretrained_keras("${model.id}") `, ]; -const open_clip = (model: ModelData) => [ +export const open_clip = (model: ModelData): string[] => [ `import open_clip model, preprocess_train, preprocess_val = open_clip.create_model_and_transforms('hf-hub:${model.id}') tokenizer = open_clip.get_tokenizer('hf-hub:${model.id}')`, ]; -const paddlenlp = (model: ModelData) => { +export const paddlenlp = (model: ModelData): string[] => { if (model.config?.architectures?.[0]) { const architecture = model.config.architectures[0]; return [ @@ -206,7 +178,7 @@ const paddlenlp = (model: ModelData) => { } }; -const pyannote_audio_pipeline = (model: ModelData) => [ +export const pyannote_audio_pipeline = (model: ModelData): string[] => [ `from pyannote.audio import Pipeline pipeline = Pipeline.from_pretrained("${model.id}") @@ -223,7 +195,7 @@ waveform, sample_rate = Audio().crop("file.wav", excerpt) pipeline({"waveform": waveform, "sample_rate": sample_rate})`, ]; -const pyannote_audio_model = (model: ModelData) => [ +const pyannote_audio_model = (model: ModelData): string[] => [ `from pyannote.audio import Model, Inference model = Model.from_pretrained("${model.id}") @@ -238,14 +210,14 @@ excerpt = Segment(start=2.0, end=5.0) inference.crop("file.wav", excerpt)`, ]; -const pyannote_audio = (model: ModelData) => { +export const pyannote_audio = (model: ModelData): string[] => { if (model.tags?.includes("pyannote-audio-pipeline")) { return pyannote_audio_pipeline(model); } return pyannote_audio_model(model); }; -const tensorflowttsTextToMel = (model: ModelData) => [ +const tensorflowttsTextToMel = (model: ModelData): string[] => [ `from tensorflow_tts.inference import AutoProcessor, TFAutoModel processor = AutoProcessor.from_pretrained("${model.id}") @@ -253,7 +225,7 @@ model = TFAutoModel.from_pretrained("${model.id}") `, ]; -const tensorflowttsMelToWav = (model: ModelData) => [ +const tensorflowttsMelToWav = (model: ModelData): string[] => [ `from tensorflow_tts.inference import TFAutoModel model = TFAutoModel.from_pretrained("${model.id}") @@ -261,14 +233,14 @@ audios = model.inference(mels) `, ]; -const tensorflowttsUnknown = (model: ModelData) => [ +const tensorflowttsUnknown = (model: ModelData): string[] => [ `from tensorflow_tts.inference import TFAutoModel model = TFAutoModel.from_pretrained("${model.id}") `, ]; -const tensorflowtts = (model: ModelData) => { +export const tensorflowtts = (model: ModelData): string[] => { if (model.tags?.includes("text-to-mel")) { return tensorflowttsTextToMel(model); } else if (model.tags?.includes("mel-to-wav")) { @@ -277,7 +249,7 @@ const tensorflowtts = (model: ModelData) => { return tensorflowttsUnknown(model); }; -const timm = (model: ModelData) => [ +export const timm = (model: ModelData): string[] => [ `import timm model = timm.create_model("hf_hub:${model.id}", pretrained=True)`, @@ -319,7 +291,7 @@ model = joblib.load( ]; }; -const sklearn = (model: ModelData) => { +export const sklearn = (model: ModelData): string[] => { if (model.tags?.includes("skops")) { const skopsmodelFile = model.config?.sklearn?.filename; const skopssaveFormat = model.config?.sklearn?.model_format; @@ -336,29 +308,29 @@ const sklearn = (model: ModelData) => { } }; -const fastai = (model: ModelData) => [ +export const fastai = (model: ModelData): string[] => [ `from huggingface_hub import from_pretrained_fastai learn = from_pretrained_fastai("${model.id}")`, ]; -const sampleFactory = (model: ModelData) => [ +export const sampleFactory = (model: ModelData): string[] => [ `python -m sample_factory.huggingface.load_from_hub -r ${model.id} -d ./train_dir`, ]; -const sentenceTransformers = (model: ModelData) => [ +export const sentenceTransformers = (model: ModelData): string[] => [ `from sentence_transformers import SentenceTransformer model = SentenceTransformer("${model.id}")`, ]; -const setfit = (model: ModelData) => [ +export const setfit = (model: ModelData): string[] => [ `from setfit import SetFitModel model = SetFitModel.from_pretrained("${model.id}")`, ]; -const spacy = (model: ModelData) => [ +export const spacy = (model: ModelData): string[] => [ `!pip install https://huggingface.co/${model.id}/resolve/main/${nameWithoutNamespace(model.id)}-any-py3-none-any.whl # Using spacy.load(). @@ -370,13 +342,13 @@ import ${nameWithoutNamespace(model.id)} nlp = ${nameWithoutNamespace(model.id)}.load()`, ]; -const span_marker = (model: ModelData) => [ +export const span_marker = (model: ModelData): string[] => [ `from span_marker import SpanMarkerModel model = SpanMarkerModel.from_pretrained("${model.id}")`, ]; -const stanza = (model: ModelData) => [ +export const stanza = (model: ModelData): string[] => [ `import stanza stanza.download("${nameWithoutNamespace(model.id).replace("stanza-", "")}") @@ -399,7 +371,7 @@ const speechBrainMethod = (speechbrainInterface: string) => { } }; -const speechbrain = (model: ModelData) => { +export const speechbrain = (model: ModelData): string[] => { const speechbrainInterface = model.config?.speechbrain?.interface; if (speechbrainInterface === undefined) { return [`# interface not specified in config.json`]; @@ -419,7 +391,7 @@ model.${speechbrainMethod}("file.wav")`, ]; }; -const transformers = (model: ModelData) => { +export const transformers = (model: ModelData): string[] => { const info = model.transformersInfo; if (!info) { return [`# ⚠️ Type of model unknown`]; @@ -461,7 +433,7 @@ const transformers = (model: ModelData) => { return [autoSnippet]; }; -const transformersJS = (model: ModelData) => { +export const transformersJS = (model: ModelData): string[] => { if (!model.pipeline_tag) { return [`// ⚠️ Unknown pipeline tag`]; } @@ -492,7 +464,7 @@ const peftTask = (peftTaskType?: string) => { } }; -const peft = (model: ModelData) => { +export const peft = (model: ModelData): string[] => { const { base_model_name: peftBaseModel, task_type: peftTaskType } = model.config?.peft ?? {}; const pefttask = peftTask(peftTaskType); if (!pefttask) { @@ -512,14 +484,14 @@ model = PeftModel.from_pretrained(model, "${model.id}")`, ]; }; -const fasttext = (model: ModelData) => [ +export const fasttext = (model: ModelData): string[] => [ `from huggingface_hub import hf_hub_download import fasttext model = fasttext.load_model(hf_hub_download("${model.id}", "model.bin"))`, ]; -const stableBaselines3 = (model: ModelData) => [ +export const stableBaselines3 = (model: ModelData): string[] => [ `from huggingface_sb3 import load_from_hub checkpoint = load_from_hub( repo_id="${model.id}", @@ -541,9 +513,11 @@ transcriptions = asr_model.transcribe(["file.wav"])`, } }; -const mlAgents = (model: ModelData) => [`mlagents-load-from-hf --repo-id="${model.id}" --local-dir="./downloads"`]; +export const mlAgents = (model: ModelData): string[] => [ + `mlagents-load-from-hf --repo-id="${model.id}" --local-dir="./download: string[]s"`, +]; -const sentis = (/* model: ModelData */) => [ +export const sentis = (/* model: ModelData */): string[] => [ `string modelName = "[Your model name here].sentis"; Model model = ModelLoader.Load(Application.streamingAssetsPath + "/" + modelName); IWorker engine = WorkerFactory.CreateWorker(BackendType.GPUCompute, model); @@ -551,14 +525,14 @@ IWorker engine = WorkerFactory.CreateWorker(BackendType.GPUCompute, model); `, ]; -const mlx = (model: ModelData) => [ +export const mlx = (model: ModelData): string[] => [ `pip install huggingface_hub hf_transfer -export HF_HUB_ENABLE_HF_TRANSFER=1 +export HF_HUB_ENABLE_HF_TRANS: string[]FER=1 huggingface-cli download --local-dir ${nameWithoutNamespace(model.id)} ${model.id}`, ]; -const nemo = (model: ModelData) => { +export const nemo = (model: ModelData): string[] => { let command: string[] | undefined = undefined; // Resolve the tag to a nemo domain/sub-domain if (model.tags?.includes("automatic-speech-recognition")) { @@ -568,232 +542,10 @@ const nemo = (model: ModelData) => { return command ?? [`# tag did not correspond to a valid NeMo domain.`]; }; -const pythae = (model: ModelData) => [ +export const pythae = (model: ModelData): string[] => [ `from pythae.models import AutoModel model = AutoModel.load_from_hf_hub("${model.id}")`, ]; //#endregion - -export const MODEL_LIBRARIES_UI_ELEMENTS: Partial> = { - "adapter-transformers": { - btnLabel: "Adapters", - repoName: "adapters", - repoUrl: "https://github.com/Adapter-Hub/adapters", - docsUrl: "https://huggingface.co/docs/hub/adapters", - snippets: adapters, - }, - allennlp: { - btnLabel: "AllenNLP", - repoName: "AllenNLP", - repoUrl: "https://github.com/allenai/allennlp", - docsUrl: "https://huggingface.co/docs/hub/allennlp", - snippets: allennlp, - }, - asteroid: { - btnLabel: "Asteroid", - repoName: "Asteroid", - repoUrl: "https://github.com/asteroid-team/asteroid", - docsUrl: "https://huggingface.co/docs/hub/asteroid", - snippets: asteroid, - }, - bertopic: { - btnLabel: "BERTopic", - repoName: "BERTopic", - repoUrl: "https://github.com/MaartenGr/BERTopic", - snippets: bertopic, - }, - diffusers: { - btnLabel: "Diffusers", - repoName: "🤗/diffusers", - repoUrl: "https://github.com/huggingface/diffusers", - docsUrl: "https://huggingface.co/docs/hub/diffusers", - snippets: diffusers, - }, - espnet: { - btnLabel: "ESPnet", - repoName: "ESPnet", - repoUrl: "https://github.com/espnet/espnet", - docsUrl: "https://huggingface.co/docs/hub/espnet", - snippets: espnet, - }, - fairseq: { - btnLabel: "Fairseq", - repoName: "fairseq", - repoUrl: "https://github.com/pytorch/fairseq", - snippets: fairseq, - }, - flair: { - btnLabel: "Flair", - repoName: "Flair", - repoUrl: "https://github.com/flairNLP/flair", - docsUrl: "https://huggingface.co/docs/hub/flair", - snippets: flair, - }, - keras: { - btnLabel: "Keras", - repoName: "Keras", - repoUrl: "https://github.com/keras-team/keras", - docsUrl: "https://huggingface.co/docs/hub/keras", - snippets: keras, - }, - mlx: { - btnLabel: "MLX", - repoName: "MLX", - repoUrl: "https://github.com/ml-explore/mlx-examples/tree/main", - snippets: mlx, - }, - nemo: { - btnLabel: "NeMo", - repoName: "NeMo", - repoUrl: "https://github.com/NVIDIA/NeMo", - snippets: nemo, - }, - open_clip: { - btnLabel: "OpenCLIP", - repoName: "OpenCLIP", - repoUrl: "https://github.com/mlfoundations/open_clip", - snippets: open_clip, - }, - paddlenlp: { - btnLabel: "paddlenlp", - repoName: "PaddleNLP", - repoUrl: "https://github.com/PaddlePaddle/PaddleNLP", - docsUrl: "https://huggingface.co/docs/hub/paddlenlp", - snippets: paddlenlp, - }, - peft: { - btnLabel: "PEFT", - repoName: "PEFT", - repoUrl: "https://github.com/huggingface/peft", - snippets: peft, - }, - "pyannote-audio": { - btnLabel: "pyannote.audio", - repoName: "pyannote-audio", - repoUrl: "https://github.com/pyannote/pyannote-audio", - snippets: pyannote_audio, - }, - "sentence-transformers": { - btnLabel: "sentence-transformers", - repoName: "sentence-transformers", - repoUrl: "https://github.com/UKPLab/sentence-transformers", - docsUrl: "https://huggingface.co/docs/hub/sentence-transformers", - snippets: sentenceTransformers, - }, - setfit: { - btnLabel: "setfit", - repoName: "setfit", - repoUrl: "https://github.com/huggingface/setfit", - docsUrl: "https://huggingface.co/docs/hub/setfit", - snippets: setfit, - }, - sklearn: { - btnLabel: "Scikit-learn", - repoName: "Scikit-learn", - repoUrl: "https://github.com/scikit-learn/scikit-learn", - snippets: sklearn, - }, - fastai: { - btnLabel: "fastai", - repoName: "fastai", - repoUrl: "https://github.com/fastai/fastai", - docsUrl: "https://huggingface.co/docs/hub/fastai", - snippets: fastai, - }, - spacy: { - btnLabel: "spaCy", - repoName: "spaCy", - repoUrl: "https://github.com/explosion/spaCy", - docsUrl: "https://huggingface.co/docs/hub/spacy", - snippets: spacy, - }, - "span-marker": { - btnLabel: "SpanMarker", - repoName: "SpanMarkerNER", - repoUrl: "https://github.com/tomaarsen/SpanMarkerNER", - docsUrl: "https://huggingface.co/docs/hub/span_marker", - snippets: span_marker, - }, - speechbrain: { - btnLabel: "speechbrain", - repoName: "speechbrain", - repoUrl: "https://github.com/speechbrain/speechbrain", - docsUrl: "https://huggingface.co/docs/hub/speechbrain", - snippets: speechbrain, - }, - stanza: { - btnLabel: "Stanza", - repoName: "stanza", - repoUrl: "https://github.com/stanfordnlp/stanza", - docsUrl: "https://huggingface.co/docs/hub/stanza", - snippets: stanza, - }, - tensorflowtts: { - btnLabel: "TensorFlowTTS", - repoName: "TensorFlowTTS", - repoUrl: "https://github.com/TensorSpeech/TensorFlowTTS", - snippets: tensorflowtts, - }, - timm: { - btnLabel: "timm", - repoName: "pytorch-image-models", - repoUrl: "https://github.com/rwightman/pytorch-image-models", - docsUrl: "https://huggingface.co/docs/hub/timm", - snippets: timm, - }, - transformers: { - btnLabel: "Transformers", - repoName: "🤗/transformers", - repoUrl: "https://github.com/huggingface/transformers", - docsUrl: "https://huggingface.co/docs/hub/transformers", - snippets: transformers, - }, - "transformers.js": { - btnLabel: "Transformers.js", - repoName: "transformers.js", - repoUrl: "https://github.com/xenova/transformers.js", - docsUrl: "https://huggingface.co/docs/hub/transformers-js", - snippets: transformersJS, - }, - fasttext: { - btnLabel: "fastText", - repoName: "fastText", - repoUrl: "https://fasttext.cc/", - snippets: fasttext, - }, - "sample-factory": { - btnLabel: "sample-factory", - repoName: "sample-factory", - repoUrl: "https://github.com/alex-petrenko/sample-factory", - docsUrl: "https://huggingface.co/docs/hub/sample-factory", - snippets: sampleFactory, - }, - "stable-baselines3": { - btnLabel: "stable-baselines3", - repoName: "stable-baselines3", - repoUrl: "https://github.com/huggingface/huggingface_sb3", - docsUrl: "https://huggingface.co/docs/hub/stable-baselines3", - snippets: stableBaselines3, - }, - "ml-agents": { - btnLabel: "ml-agents", - repoName: "ml-agents", - repoUrl: "https://github.com/Unity-Technologies/ml-agents", - docsUrl: "https://huggingface.co/docs/hub/ml-agents", - snippets: mlAgents, - }, - "unity-sentis": { - btnLabel: "unity-sentis", - repoName: "unity-sentis", - repoUrl: "https://github.com/Unity-Technologies/sentis-samples", - snippets: sentis, - }, - pythae: { - btnLabel: "pythae", - repoName: "pythae", - repoUrl: "https://github.com/clementchadebec/benchmark_VAE", - snippets: pythae, - }, -} as const; diff --git a/packages/tasks/src/model-libraries.ts b/packages/tasks/src/model-libraries.ts index 59123724a..e12602e1a 100644 --- a/packages/tasks/src/model-libraries.ts +++ b/packages/tasks/src/model-libraries.ts @@ -1,52 +1,333 @@ +import * as snippets from "./model-libraries-snippets"; +import type { ModelData } from "./model-data"; + +/** + * Elements configurable by a model library. + */ +export interface LibraryUiElement { + /** + * Pretty name of the library. + * displayed in tags, and on the main + * call-to-action button on the model page. + */ + prettyLabel: string; + /** + * Repo name of the library's (usually on GitHub) code repo + */ + repoName: string; + /** + * URL to library's (usually on GitHub) code repo + */ + repoUrl: string; + /** + * URL to library's docs + */ + docsUrl?: string; + /** + * Code snippet(s) displayed on model page + */ + snippets?: (model: ModelData) => string[]; + /** + * should we display this library in hf.co/models filter + * (only for popular libraries with > 100 models) + */ + filter?: boolean; +} + /** * Add your new library here. * * This is for modeling (= architectures) libraries, not for file formats (like ONNX, etc). - * File formats live in an enum inside the internal codebase. + * (unlike libraries, file formats live in an enum inside the internal codebase.) + * + * Doc on how to add a library to the Hub: + * + * https://huggingface.co/docs/hub/models-adding-libraries + * + * /!\ IMPORTANT + * + * The key you choose is the tag your models have in their library_name on the Hub. */ -export enum ModelLibrary { - "adapter-transformers" = "Adapters", - "allennlp" = "allenNLP", - "asteroid" = "Asteroid", - "bertopic" = "BERTopic", - "diffusers" = "Diffusers", - "doctr" = "docTR", - "espnet" = "ESPnet", - "fairseq" = "Fairseq", - "flair" = "Flair", - "keras" = "Keras", - "k2" = "K2", - "mlx" = "MLX", - "nemo" = "NeMo", - "open_clip" = "OpenCLIP", - "paddlenlp" = "PaddleNLP", - "peft" = "PEFT", - "pyannote-audio" = "pyannote.audio", - "sample-factory" = "Sample Factory", - "sentence-transformers" = "Sentence Transformers", - "setfit" = "SetFit", - "sklearn" = "Scikit-learn", - "spacy" = "spaCy", - "span-marker" = "SpanMarker", - "speechbrain" = "speechbrain", - "tensorflowtts" = "TensorFlowTTS", - "timm" = "Timm", - "fastai" = "fastai", - "transformers" = "Transformers", - "transformers.js" = "Transformers.js", - "stanza" = "Stanza", - "fasttext" = "fastText", - "stable-baselines3" = "Stable-Baselines3", - "ml-agents" = "Unity ML-Agents", - "pythae" = "Pythae", - "mindspore" = "MindSpore", - "unity-sentis" = "Unity Sentis", -} -export type ModelLibraryKey = keyof typeof ModelLibrary; +export const MODEL_LIBRARIES_UI_ELEMENTS = { + "adapter-transformers": { + prettyLabel: "Adapters", + repoName: "adapters", + repoUrl: "https://github.com/Adapter-Hub/adapters", + docsUrl: "https://huggingface.co/docs/hub/adapters", + snippets: snippets.adapters, + filter: true, + }, + allennlp: { + prettyLabel: "AllenNLP", + repoName: "AllenNLP", + repoUrl: "https://github.com/allenai/allennlp", + docsUrl: "https://huggingface.co/docs/hub/allennlp", + snippets: snippets.allennlp, + filter: true, + }, + asteroid: { + prettyLabel: "Asteroid", + repoName: "Asteroid", + repoUrl: "https://github.com/asteroid-team/asteroid", + docsUrl: "https://huggingface.co/docs/hub/asteroid", + snippets: snippets.asteroid, + filter: true, + }, + bertopic: { + prettyLabel: "BERTopic", + repoName: "BERTopic", + repoUrl: "https://github.com/MaartenGr/BERTopic", + snippets: snippets.bertopic, + filter: true, + }, + diffusers: { + prettyLabel: "Diffusers", + repoName: "🤗/diffusers", + repoUrl: "https://github.com/huggingface/diffusers", + docsUrl: "https://huggingface.co/docs/hub/diffusers", + snippets: snippets.diffusers, + filter: true, + }, + doctr: { + prettyLabel: "docTR", + repoName: "doctr", + repoUrl: "https://github.com/mindee/doctr", + }, + espnet: { + prettyLabel: "ESPnet", + repoName: "ESPnet", + repoUrl: "https://github.com/espnet/espnet", + docsUrl: "https://huggingface.co/docs/hub/espnet", + snippets: snippets.espnet, + filter: true, + }, + fairseq: { + prettyLabel: "Fairseq", + repoName: "fairseq", + repoUrl: "https://github.com/pytorch/fairseq", + snippets: snippets.fairseq, + filter: true, + }, + fastai: { + prettyLabel: "fastai", + repoName: "fastai", + repoUrl: "https://github.com/fastai/fastai", + docsUrl: "https://huggingface.co/docs/hub/fastai", + snippets: snippets.fastai, + filter: true, + }, + fasttext: { + prettyLabel: "fastText", + repoName: "fastText", + repoUrl: "https://fasttext.cc/", + snippets: snippets.fasttext, + filter: true, + }, + flair: { + prettyLabel: "Flair", + repoName: "Flair", + repoUrl: "https://github.com/flairNLP/flair", + docsUrl: "https://huggingface.co/docs/hub/flair", + snippets: snippets.flair, + filter: true, + }, + keras: { + prettyLabel: "Keras", + repoName: "Keras", + repoUrl: "https://github.com/keras-team/keras", + docsUrl: "https://huggingface.co/docs/hub/keras", + snippets: snippets.keras, + filter: true, + }, + k2: { + prettyLabel: "K2", + repoName: "k2", + repoUrl: "https://github.com/k2-fsa/k2", + }, + mindspore: { + prettyLabel: "MindSpore", + repoName: "mindspore", + repoUrl: "https://github.com/mindspore-ai/mindspore", + }, + "ml-agents": { + prettyLabel: "ml-agents", + repoName: "ml-agents", + repoUrl: "https://github.com/Unity-Technologies/ml-agents", + docsUrl: "https://huggingface.co/docs/hub/ml-agents", + snippets: snippets.mlAgents, + filter: true, + }, + mlx: { + prettyLabel: "MLX", + repoName: "MLX", + repoUrl: "https://github.com/ml-explore/mlx-examples/tree/main", + snippets: snippets.mlx, + filter: true, + }, + nemo: { + prettyLabel: "NeMo", + repoName: "NeMo", + repoUrl: "https://github.com/NVIDIA/NeMo", + snippets: snippets.nemo, + filter: true, + }, + open_clip: { + prettyLabel: "OpenCLIP", + repoName: "OpenCLIP", + repoUrl: "https://github.com/mlfoundations/open_clip", + snippets: snippets.open_clip, + filter: true, + }, + paddlenlp: { + prettyLabel: "paddlenlp", + repoName: "PaddleNLP", + repoUrl: "https://github.com/PaddlePaddle/PaddleNLP", + docsUrl: "https://huggingface.co/docs/hub/paddlenlp", + snippets: snippets.paddlenlp, + filter: true, + }, + peft: { + prettyLabel: "PEFT", + repoName: "PEFT", + repoUrl: "https://github.com/huggingface/peft", + snippets: snippets.peft, + filter: true, + }, + "pyannote-audio": { + prettyLabel: "pyannote.audio", + repoName: "pyannote-audio", + repoUrl: "https://github.com/pyannote/pyannote-audio", + snippets: snippets.pyannote_audio, + filter: true, + }, + pythae: { + prettyLabel: "pythae", + repoName: "pythae", + repoUrl: "https://github.com/clementchadebec/benchmark_VAE", + snippets: snippets.pythae, + filter: true, + }, + "sample-factory": { + prettyLabel: "sample-factory", + repoName: "sample-factory", + repoUrl: "https://github.com/alex-petrenko/sample-factory", + docsUrl: "https://huggingface.co/docs/hub/sample-factory", + snippets: snippets.sampleFactory, + filter: true, + }, + "sentence-transformers": { + prettyLabel: "sentence-transformers", + repoName: "sentence-transformers", + repoUrl: "https://github.com/UKPLab/sentence-transformers", + docsUrl: "https://huggingface.co/docs/hub/sentence-transformers", + snippets: snippets.sentenceTransformers, + filter: true, + }, + setfit: { + prettyLabel: "setfit", + repoName: "setfit", + repoUrl: "https://github.com/huggingface/setfit", + docsUrl: "https://huggingface.co/docs/hub/setfit", + snippets: snippets.setfit, + filter: true, + }, + sklearn: { + prettyLabel: "Scikit-learn", + repoName: "Scikit-learn", + repoUrl: "https://github.com/scikit-learn/scikit-learn", + snippets: snippets.sklearn, + filter: true, + }, + spacy: { + prettyLabel: "spaCy", + repoName: "spaCy", + repoUrl: "https://github.com/explosion/spaCy", + docsUrl: "https://huggingface.co/docs/hub/spacy", + snippets: snippets.spacy, + filter: true, + }, + "span-marker": { + prettyLabel: "SpanMarker", + repoName: "SpanMarkerNER", + repoUrl: "https://github.com/tomaarsen/SpanMarkerNER", + docsUrl: "https://huggingface.co/docs/hub/span_marker", + snippets: snippets.span_marker, + filter: true, + }, + speechbrain: { + prettyLabel: "speechbrain", + repoName: "speechbrain", + repoUrl: "https://github.com/speechbrain/speechbrain", + docsUrl: "https://huggingface.co/docs/hub/speechbrain", + snippets: snippets.speechbrain, + filter: true, + }, + "stable-baselines3": { + prettyLabel: "stable-baselines3", + repoName: "stable-baselines3", + repoUrl: "https://github.com/huggingface/huggingface_sb3", + docsUrl: "https://huggingface.co/docs/hub/stable-baselines3", + snippets: snippets.stableBaselines3, + filter: true, + }, + stanza: { + prettyLabel: "Stanza", + repoName: "stanza", + repoUrl: "https://github.com/stanfordnlp/stanza", + docsUrl: "https://huggingface.co/docs/hub/stanza", + snippets: snippets.stanza, + filter: true, + }, + tensorflowtts: { + prettyLabel: "TensorFlowTTS", + repoName: "TensorFlowTTS", + repoUrl: "https://github.com/TensorSpeech/TensorFlowTTS", + snippets: snippets.tensorflowtts, + }, + timm: { + prettyLabel: "timm", + repoName: "pytorch-image-models", + repoUrl: "https://github.com/rwightman/pytorch-image-models", + docsUrl: "https://huggingface.co/docs/hub/timm", + snippets: snippets.timm, + filter: true, + }, + transformers: { + prettyLabel: "Transformers", + repoName: "🤗/transformers", + repoUrl: "https://github.com/huggingface/transformers", + docsUrl: "https://huggingface.co/docs/hub/transformers", + snippets: snippets.transformers, + filter: true, + }, + "transformers.js": { + prettyLabel: "Transformers.js", + repoName: "transformers.js", + repoUrl: "https://github.com/xenova/transformers.js", + docsUrl: "https://huggingface.co/docs/hub/transformers-js", + snippets: snippets.transformersJS, + filter: true, + }, + "unity-sentis": { + prettyLabel: "unity-sentis", + repoName: "unity-sentis", + repoUrl: "https://github.com/Unity-Technologies/sentis-samples", + snippets: snippets.sentis, + filter: true, + }, +} satisfies Record; + +export type ModelLibraryKey = keyof typeof MODEL_LIBRARIES_UI_ELEMENTS; -export const ALL_MODEL_LIBRARY_KEYS = Object.keys(ModelLibrary) as ModelLibraryKey[]; +export const ALL_MODEL_LIBRARY_KEYS = Object.keys(MODEL_LIBRARIES_UI_ELEMENTS) as ModelLibraryKey[]; -export const ALL_DISPLAY_MODEL_LIBRARY_KEYS = ALL_MODEL_LIBRARY_KEYS.filter( - (k) => !["doctr", "k2", "mindspore", "tensorflowtts"].includes(k) -); +export const ALL_DISPLAY_MODEL_LIBRARY_KEYS = ( + Object.entries(MODEL_LIBRARIES_UI_ELEMENTS as Record) as [ + ModelLibraryKey, + LibraryUiElement, + ][] +) + // eslint-disable-next-line @typescript-eslint/no-unused-vars + .filter(([_, v]) => v.filter) + .map(([k]) => k);