From 9cf62545ef07745d9577b02056ba47ed71c23a79 Mon Sep 17 00:00:00 2001
From: Devin Ha <33089471+DevinTDHa@users.noreply.github.com>
Date: Fri, 24 May 2024 10:26:52 +0200
Subject: [PATCH 1/3] SPARKNLP-1036: Onnx Example notebooks (#14234)
* SPARKNLP-1036: Fix dev python kernel names
* SPARKNLP-1036: Bump transformers version
* SPARKNLP-1036: Fix Colab buttons
* SPARKNLP-1036: Pin onnx version for compatibility
* SPARKNLP-1036: Upgrade Spark version
* SPARKNLP-1036: Minor Fixes
* SPARKNLP-1036: Clean Metadata
* SPARKNLP-1036: Add/Adjust Documentation
- Note for supported Spark Version of Annotators
- added missing Documentation for BGEEmbeddings
---
docs/en/annotator_entries/BGEEmbeddings.md | 160 +
docs/en/annotators.md | 1 +
docs/en/transformer_entries/E5Embeddings.md | 2 +
.../en/transformer_entries/MPNetEmbeddings.md | 2 +
...HuggingFace_ONNX_in_Spark_NLP_ALBERT.ipynb | 1180 ++--
...Spark_NLP_AlbertForQuestionAnswering.ipynb | 4 +-
..._NLP_AlbertForSequenceClassification.ipynb | 4 +-
...ark_NLP_AlbertForTokenClassification.ipynb | 4 +-
.../HuggingFace_ONNX_in_Spark_NLP_BERT.ipynb | 7 +-
.../HuggingFace_ONNX_in_Spark_NLP_BGE.ipynb | 5484 +++++++--------
...n_Spark_NLP_BertForQuestionAnswering.ipynb | 4 +-
...rk_NLP_BertForSequenceClassification.ipynb | 4 +-
...Spark_NLP_BertForTokenClassification.ipynb | 4 +-
...rk_NLP_BertForZeroShotClassification.ipynb | 4791 ++++++-------
..._in_Spark_NLP_BertSentenceEmbeddings.ipynb | 5 +-
.../HuggingFace_ONNX_in_Spark_NLP_CLIP.ipynb | 17 +-
...gingFace_ONNX_in_Spark_NLP_CamemBERT.ipynb | 4607 ++++++------
...rk_NLP_CamemBertForQuestionAnswering.ipynb | 245 +-
...P_CamemBertForSequenceClassification.ipynb | 277 +-
..._NLP_CamemBertForTokenClassification.ipynb | 275 +-
...uggingFace_ONNX_in_Spark_NLP_DeBERTa.ipynb | 4 +-
...park_NLP_DeBertaForQuestionAnswering.ipynb | 6038 ++++++++--------
...NLP_DeBertaForSequenceClassification.ipynb | 4 +-
...rk_NLP_DeBertaForTokenClassification.ipynb | 6232 ++++++++---------
...ingFace_ONNX_in_Spark_NLP_DistilBERT.ipynb | 6 +-
...k_NLP_DistilBertForQuestionAnswering.ipynb | 4 +-
..._DistilBertForSequenceClassification.ipynb | 4 +-
...NLP_DistilBertForTokenClassification.ipynb | 6 +-
.../HuggingFace_ONNX_in_Spark_NLP_E5.ipynb | 916 ++-
.../HuggingFace_ONNX_in_Spark_NLP_MPNet.ipynb | 1466 ++--
..._Spark_NLP_MPNetForQuestionAnswering.ipynb | 11 +-
...k_NLP_MPNetForSequenceClassification.ipynb | 2 +-
...HuggingFace_ONNX_in_Spark_NLP_Marian.ipynb | 2 +-
...uggingFace_ONNX_in_Spark_NLP_RoBERTa.ipynb | 4 +-
...park_NLP_RoBertaForQuestionAnswering.ipynb | 4 +-
...NLP_RoBertaForSequenceClassification.ipynb | 5570 +++++++--------
...rk_NLP_RoBertaForTokenClassification.ipynb | 6222 ++++++++--------
.../HuggingFace_ONNX_in_Spark_NLP_T5.ipynb | 2 +-
...uggingFace_ONNX_in_Spark_NLP_Whisper.ipynb | 1243 ++--
...ngFace_ONNX_in_Spark_NLP_XLM_RoBERTa.ipynb | 4601 ++++++------
...k_NLP_XlmRoBertaForQuestionAnswering.ipynb | 4616 ++++++------
..._XlmRoBertaForSequenceClassification.ipynb | 4093 ++++++-----
...NLP_XlmRoBertaForTokenClassification.ipynb | 4036 ++++++-----
...ark_NLP_XlmRoBertaSentenceEmbeddings.ipynb | 9 +-
...Spark_NLP_AlbertForQuestionAnswering.ipynb | 4 +-
.../annotator/embeddings/bge_embeddings.py | 2 +
.../annotator/embeddings/e5_embeddings.py | 2 +
.../annotator/embeddings/mpnet_embeddings.py | 2 +
.../nlp/embeddings/BGEEmbeddings.scala | 4 +-
.../nlp/embeddings/E5Embeddings.scala | 2 +
.../nlp/embeddings/MPNetEmbeddings.scala | 2 +
51 files changed, 30360 insertions(+), 31830 deletions(-)
create mode 100644 docs/en/annotator_entries/BGEEmbeddings.md
diff --git a/docs/en/annotator_entries/BGEEmbeddings.md b/docs/en/annotator_entries/BGEEmbeddings.md
new file mode 100644
index 00000000000000..7e006a5e7789a8
--- /dev/null
+++ b/docs/en/annotator_entries/BGEEmbeddings.md
@@ -0,0 +1,160 @@
+{%- capture title -%}
+BGEEmbeddings
+{%- endcapture -%}
+
+{%- capture description -%}
+Sentence embeddings using BGE.
+
+BGE, or BAAI General Embeddings, a model that can map any text to a low-dimensional dense
+vector which can be used for tasks like retrieval, classification, clustering, or semantic
+search.
+
+Note that this annotator is only supported for Spark Versions 3.4 and up.
+
+Pretrained models can be loaded with `pretrained` of the companion object:
+
+```scala
+val embeddings = BGEEmbeddings.pretrained()
+ .setInputCols("document")
+ .setOutputCol("embeddings")
+```
+
+The default model is `"bge_base"`, if no name is provided.
+
+For available pretrained models please see the
+[Models Hub](https://sparknlp.org/models?q=BGE).
+
+For extended examples of usage, see
+[BGEEmbeddingsTestSpec](https://github.com/JohnSnowLabs/spark-nlp/blob/master/src/test/scala/com/johnsnowlabs/nlp/embeddings/BGEEmbeddingsTestSpec.scala).
+
+**Sources** :
+
+[C-Pack: Packaged Resources To Advance General Chinese Embedding](https://arxiv.org/pdf/2309.07597)
+
+[BGE Github Repository](https://github.com/FlagOpen/FlagEmbedding)
+
+**Paper abstract**
+
+*We introduce C-Pack, a package of resources that significantly advance the field of general
+Chinese embeddings. C-Pack includes three critical resources. 1) C-MTEB is a comprehensive
+benchmark for Chinese text embeddings covering 6 tasks and 35 datasets. 2) C-MTP is a massive
+text embedding dataset curated from labeled and unlabeled Chinese corpora for training
+embedding models. 3) C-TEM is a family of embedding models covering multiple sizes. Our models
+outperform all prior Chinese text embeddings on C-MTEB by up to +10% upon the time of the
+release. We also integrate and optimize the entire suite of training methods for C-TEM. Along
+with our resources on general Chinese embedding, we release our data and models for English
+text embeddings. The English models achieve stateof-the-art performance on the MTEB benchmark;
+meanwhile, our released English data is 2 times larger than the Chinese data. All these
+resources are made publicly available at https://github.com/FlagOpen/FlagEmbedding.*
+{%- endcapture -%}
+
+{%- capture input_anno -%}
+DOCUMENT
+{%- endcapture -%}
+
+{%- capture output_anno -%}
+SENTENCE_EMBEDDINGS
+{%- endcapture -%}
+
+{%- capture python_example -%}
+import sparknlp
+from sparknlp.base import *
+from sparknlp.annotator import *
+from pyspark.ml import Pipeline
+documentAssembler = DocumentAssembler() \
+ .setInputCol("text") \
+ .setOutputCol("document")
+embeddings = BGEEmbeddings.pretrained() \
+ .setInputCols(["document"]) \
+ .setOutputCol("bge_embeddings")
+embeddingsFinisher = EmbeddingsFinisher() \
+ .setInputCols(["bge_embeddings"]) \
+ .setOutputCols("finished_embeddings") \
+ .setOutputAsVector(True)
+pipeline = Pipeline().setStages([
+ documentAssembler,
+ embeddings,
+ embeddingsFinisher
+])
+data = spark.createDataFrame([["query: how much protein should a female eat",
+"passage: As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day." + \
+"But, as you can see from this chart, you'll need to increase that if you're expecting or training for a" + \
+"marathon. Check out the chart below to see how much protein you should be eating each day.",
+]]).toDF("text")
+result = pipeline.fit(data).transform(data)
+result.selectExpr("explode(finished_embeddings) as result").show(5, 80)
++--------------------------------------------------------------------------------+
+| result|
++--------------------------------------------------------------------------------+
+|[[8.0190285E-4, -0.005974853, -0.072875895, 0.007944068, 0.026059335, -0.0080...|
+|[[0.050514214, 0.010061974, -0.04340176, -0.020937217, 0.05170225, 0.01157857...|
++--------------------------------------------------------------------------------+
+{%- endcapture -%}
+
+{%- capture scala_example -%}
+import spark.implicits._
+import com.johnsnowlabs.nlp.base.DocumentAssembler
+import com.johnsnowlabs.nlp.annotators.Tokenizer
+import com.johnsnowlabs.nlp.embeddings.BGEEmbeddings
+import com.johnsnowlabs.nlp.EmbeddingsFinisher
+import org.apache.spark.ml.Pipeline
+
+val documentAssembler = new DocumentAssembler()
+ .setInputCol("text")
+ .setOutputCol("document")
+
+val embeddings = BGEEmbeddings.pretrained("bge_base", "en")
+ .setInputCols("document")
+ .setOutputCol("bge_embeddings")
+
+val embeddingsFinisher = new EmbeddingsFinisher()
+ .setInputCols("bge_embeddings")
+ .setOutputCols("finished_embeddings")
+ .setOutputAsVector(true)
+
+val pipeline = new Pipeline().setStages(Array(
+ documentAssembler,
+ embeddings,
+ embeddingsFinisher
+))
+
+val data = Seq("query: how much protein should a female eat",
+"passage: As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day." +
+But, as you can see from this chart, you'll need to increase that if you're expecting or training for a" +
+marathon. Check out the chart below to see how much protein you should be eating each day."
+
+).toDF("text")
+val result = pipeline.fit(data).transform(data)
+
+result.selectExpr("explode(finished_embeddings) as result").show(1, 80)
++--------------------------------------------------------------------------------+
+| result|
++--------------------------------------------------------------------------------+
+|[[8.0190285E-4, -0.005974853, -0.072875895, 0.007944068, 0.026059335, -0.0080...|
+|[[0.050514214, 0.010061974, -0.04340176, -0.020937217, 0.05170225, 0.01157857...|
++--------------------------------------------------------------------------------+
+{%- endcapture -%}
+
+{%- capture api_link -%}
+[BGEEmbeddings](/api/com/johnsnowlabs/nlp/embeddings/BGEEmbeddings)
+{%- endcapture -%}
+
+{%- capture python_api_link -%}
+[BGEEmbeddings](/api/python/reference/autosummary/sparknlp/annotator/embeddings/bge_embeddings/index.html#sparknlp.annotator.embeddings.bge_embeddings.BGEEmbeddings)
+{%- endcapture -%}
+
+{%- capture source_link -%}
+[BGEEmbeddings](https://github.com/JohnSnowLabs/spark-nlp/tree/master/src/main/scala/com/johnsnowlabs/nlp/embeddings/BGEEmbeddings.scala)
+{%- endcapture -%}
+
+{% include templates/anno_template.md
+title=title
+description=description
+input_anno=input_anno
+output_anno=output_anno
+python_example=python_example
+scala_example=scala_example
+api_link=api_link
+python_api_link=python_api_link
+source_link=source_link
+%}
\ No newline at end of file
diff --git a/docs/en/annotators.md b/docs/en/annotators.md
index 858a07d0a06336..b65eae52cc7f12 100644
--- a/docs/en/annotators.md
+++ b/docs/en/annotators.md
@@ -45,6 +45,7 @@ There are two types of Annotators:
{:.table-model-big}
|Annotator|Description|Version |
|---|---|---|
+{% include templates/anno_table_entry.md path="" name="BGEEmbeddings" summary="Sentence embeddings using BGE."%}
{% include templates/anno_table_entry.md path="" name="BigTextMatcher" summary="Annotator to match exact phrases (by token) provided in a file against a Document."%}
{% include templates/anno_table_entry.md path="" name="Chunk2Doc" summary="Converts a `CHUNK` type column back into `DOCUMENT`. Useful when trying to re-tokenize or do further analysis on a `CHUNK` result."%}
{% include templates/anno_table_entry.md path="" name="ChunkEmbeddings" summary="This annotator utilizes WordEmbeddings, BertEmbeddings etc. to generate chunk embeddings from either Chunker, NGramGenerator, or NerConverter outputs."%}
diff --git a/docs/en/transformer_entries/E5Embeddings.md b/docs/en/transformer_entries/E5Embeddings.md
index 04fa9482ed52d3..d3f0ec14da9fcc 100644
--- a/docs/en/transformer_entries/E5Embeddings.md
+++ b/docs/en/transformer_entries/E5Embeddings.md
@@ -8,6 +8,8 @@ Sentence embeddings using E5.
E5, an instruction-finetuned text embedding model that can generate text embeddings tailored
to any task (e.g., classification, retrieval, clustering, text evaluation, etc.)
+Note that this annotator is only supported for Spark Versions 3.4 and up.
+
Pretrained models can be loaded with `pretrained` of the companion object:
```scala
diff --git a/docs/en/transformer_entries/MPNetEmbeddings.md b/docs/en/transformer_entries/MPNetEmbeddings.md
index a4c8b4df22c968..843078363277b2 100644
--- a/docs/en/transformer_entries/MPNetEmbeddings.md
+++ b/docs/en/transformer_entries/MPNetEmbeddings.md
@@ -10,6 +10,8 @@ Understanding by Kaitao Song, Xu Tan, Tao Qin, Jianfeng Lu, Tie-Yan Liu. MPNet a
pre-training method, named masked and permuted language modeling, to inherit the advantages of
masked language modeling and permuted language modeling for natural language understanding.
+Note that this annotator is only supported for Spark Versions 3.4 and up.
+
Pretrained models can be loaded with `pretrained` of the companion object:
```scala
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_ALBERT.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_ALBERT.ipynb
index 10c920bfea02be..08b77ed29e3841 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_ALBERT.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_ALBERT.ipynb
@@ -41,16 +41,16 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "bgAl5Z8JLROz",
- "outputId": "0e3bf64c-a97d-465c-8357-47e6b1409135",
"colab": {
"base_uri": "https://localhost:8080/"
- }
+ },
+ "id": "bgAl5Z8JLROz",
+ "outputId": "0e3bf64c-a97d-465c-8357-47e6b1409135"
},
"outputs": [
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m69.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
@@ -119,7 +119,7 @@
}
],
"source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum tensorflow"
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum tensorflow"
]
},
{
@@ -137,8 +137,6 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "VHQU-UIHLRO3",
- "outputId": "9cbb1d48-174a-4194-c475-dc616ec8076e",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 423,
@@ -199,12 +197,14 @@
"a27e457fcb9746c7ad4a8a694f76e367",
"220b5d00bae9489fb70ce469d4103d08"
]
- }
+ },
+ "id": "VHQU-UIHLRO3",
+ "outputId": "9cbb1d48-174a-4194-c475-dc616ec8076e"
},
"outputs": [
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n",
"The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
@@ -215,43 +215,43 @@
]
},
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- "config.json: 0%| | 0.00/684 [00:00, ?B/s]"
- ],
"application/vnd.jupyter.widget-view+json": {
+ "model_id": "a56550fad97d49d4a2b9fddf205ed42f",
"version_major": 2,
- "version_minor": 0,
- "model_id": "a56550fad97d49d4a2b9fddf205ed42f"
- }
+ "version_minor": 0
+ },
+ "text/plain": [
+ "config.json: 0%| | 0.00/684 [00:00, ?B/s]"
+ ]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Framework not specified. Using pt to export the model.\n"
]
},
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- "model.safetensors: 0%| | 0.00/47.4M [00:00, ?B/s]"
- ],
"application/vnd.jupyter.widget-view+json": {
+ "model_id": "144772b83e3a451ea737692bb03cda53",
"version_major": 2,
- "version_minor": 0,
- "model_id": "144772b83e3a451ea737692bb03cda53"
- }
+ "version_minor": 0
+ },
+ "text/plain": [
+ "model.safetensors: 0%| | 0.00/47.4M [00:00, ?B/s]"
+ ]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertModel: ['predictions.dense.bias', 'predictions.decoder.bias', 'predictions.bias', 'predictions.LayerNorm.weight', 'predictions.LayerNorm.bias', 'predictions.dense.weight']\n",
"- This IS expected if you are initializing AlbertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
@@ -259,50 +259,50 @@
]
},
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- "tokenizer_config.json: 0%| | 0.00/25.0 [00:00, ?B/s]"
- ],
"application/vnd.jupyter.widget-view+json": {
+ "model_id": "1d8985f6823e4249a55a6ed9d4c1a57d",
"version_major": 2,
- "version_minor": 0,
- "model_id": "1d8985f6823e4249a55a6ed9d4c1a57d"
- }
+ "version_minor": 0
+ },
+ "text/plain": [
+ "tokenizer_config.json: 0%| | 0.00/25.0 [00:00, ?B/s]"
+ ]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- "spiece.model: 0%| | 0.00/760k [00:00, ?B/s]"
- ],
"application/vnd.jupyter.widget-view+json": {
+ "model_id": "7173408a02654a869335fd1a1a34cd04",
"version_major": 2,
- "version_minor": 0,
- "model_id": "7173408a02654a869335fd1a1a34cd04"
- }
+ "version_minor": 0
+ },
+ "text/plain": [
+ "spiece.model: 0%| | 0.00/760k [00:00, ?B/s]"
+ ]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- "tokenizer.json: 0%| | 0.00/1.31M [00:00, ?B/s]"
- ],
"application/vnd.jupyter.widget-view+json": {
+ "model_id": "b214680a132f4584b97200442feeca41",
"version_major": 2,
- "version_minor": 0,
- "model_id": "b214680a132f4584b97200442feeca41"
- }
+ "version_minor": 0
+ },
+ "text/plain": [
+ "tokenizer.json: 0%| | 0.00/1.31M [00:00, ?B/s]"
+ ]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"Using the export variant default. Available variants are:\n",
" - default: The default ONNX variant.\n",
@@ -340,16 +340,16 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "iUmHRgbvLRO4",
- "outputId": "6cd365e0-8516-4486-892e-b4acf73d3a76",
"colab": {
"base_uri": "https://localhost:8080/"
- }
+ },
+ "id": "iUmHRgbvLRO4",
+ "outputId": "6cd365e0-8516-4486-892e-b4acf73d3a76"
},
"outputs": [
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"total 45912\n",
"drwxr-xr-x 2 root root 4096 Feb 29 23:27 assets\n",
@@ -369,16 +369,16 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "AcV5pbwGLRO5",
- "outputId": "8b52aa43-1fc3-4bfd-f946-f4c750011d39",
"colab": {
"base_uri": "https://localhost:8080/"
- }
+ },
+ "id": "AcV5pbwGLRO5",
+ "outputId": "8b52aa43-1fc3-4bfd-f946-f4c750011d39"
},
"outputs": [
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"total 744\n",
"-rw-r--r-- 1 root root 760289 Feb 29 23:27 spiece.model\n"
@@ -405,16 +405,16 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "bRAvyXTlLRO-",
- "outputId": "b736dcf3-43f7-43b4-f4ed-78061c22d413",
"colab": {
"base_uri": "https://localhost:8080/"
- }
+ },
+ "id": "bRAvyXTlLRO-",
+ "outputId": "b736dcf3-43f7-43b4-f4ed-78061c22d413"
},
"outputs": [
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Installing PySpark 3.2.3 and Spark NLP 5.3.0\n",
"setup Colab for PySpark 3.2.3 and Spark NLP 5.3.0\n",
@@ -541,16 +541,16 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "yBcjQkDHLRPG",
- "outputId": "db2e1ffc-5fcc-4c7c-8df4-30cba05a0619",
"colab": {
"base_uri": "https://localhost:8080/"
- }
+ },
+ "id": "yBcjQkDHLRPG",
+ "outputId": "db2e1ffc-5fcc-4c7c-8df4-30cba05a0619"
},
"outputs": [
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"total 44432\n",
"-rw-r--r-- 1 root root 44729685 Feb 29 23:29 albert_onnx\n",
@@ -613,16 +613,16 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "_IU5yE9RLRPH",
- "outputId": "d1b711cc-10d6-4347-d46d-11e476ffa4c7",
"colab": {
"base_uri": "https://localhost:8080/"
- }
+ },
+ "id": "_IU5yE9RLRPH",
+ "outputId": "d1b711cc-10d6-4347-d46d-11e476ffa4c7"
},
"outputs": [
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"+--------------------+\n",
"| embeddings|\n",
@@ -668,9 +668,10 @@
}
],
"metadata": {
+ "accelerator": "GPU",
"colab": {
- "provenance": [],
- "gpuType": "T4"
+ "gpuType": "T4",
+ "provenance": []
},
"kernelspec": {
"display_name": "Python 3",
@@ -690,32 +691,10 @@
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
- "a56550fad97d49d4a2b9fddf205ed42f": {
+ "0246b916ec2c4bb78696f27b68ed57c0": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
"model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_fa42a964f576462ea192e4c47d33beab",
- "IPY_MODEL_82ac68dec79f49229d2f6e0a29d39675",
- "IPY_MODEL_5ceb0727c2334bf0b1be26d3c90008d5"
- ],
- "layout": "IPY_MODEL_3955fdd6066248cdac68ec6796365d59"
- }
- },
- "fa42a964f576462ea192e4c47d33beab": {
- "model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
- "model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -727,61 +706,31 @@
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_baffc75590e2427b8f0a4ed4b7901d74",
+ "layout": "IPY_MODEL_64ac18474257437f916941bb3f09bf53",
"placeholder": "",
- "style": "IPY_MODEL_8cf156ee9c4945e997a815618b93ebc3",
- "value": "config.json: 100%"
- }
- },
- "82ac68dec79f49229d2f6e0a29d39675": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_d2bd7f515c994a529fb6685addb34ae5",
- "max": 684,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_fafa5ff8a6104816b08636f3061c559e",
- "value": 684
+ "style": "IPY_MODEL_a8806c8148944ab0baf055a59cc5d9d5",
+ "value": "tokenizer.json: 100%"
}
},
- "5ceb0727c2334bf0b1be26d3c90008d5": {
+ "044e6d9d96a045da801ea3d8ce29793f": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
- "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
+ "_model_name": "DescriptionStyleModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_7fe574626264422db3174e1ca84384a7",
- "placeholder": "",
- "style": "IPY_MODEL_a7d2489cbb2640d9b7bf87ba6b765fa4",
- "value": " 684/684 [00:00<00:00, 948B/s]"
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
}
},
- "3955fdd6066248cdac68ec6796365d59": {
+ "097a57f6fa1b4a5391f865815e2701c1": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -830,10 +779,10 @@
"width": null
}
},
- "baffc75590e2427b8f0a4ed4b7901d74": {
+ "0b380fc0d74a48ac81313a216457a304": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -882,203 +831,78 @@
"width": null
}
},
- "8cf156ee9c4945e997a815618b93ebc3": {
+ "0c8db63e710c48b8a548c21a1f110407": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_name": "FloatProgressModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0b380fc0d74a48ac81313a216457a304",
+ "max": 25,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_86aba96ffdc6405f879cfde1fd368966",
+ "value": 25
}
},
- "d2bd7f515c994a529fb6685addb34ae5": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
+ "144772b83e3a451ea737692bb03cda53": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
"state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_b809d2becb73412ea175b1158f8d909f",
+ "IPY_MODEL_20a3c7f59f2c4c2d81562b65509e3164",
+ "IPY_MODEL_b7c43e15175e445687d1d3ebebca06b7"
+ ],
+ "layout": "IPY_MODEL_7d6e37ab07d54c5e82001a848951e049"
}
},
- "fafa5ff8a6104816b08636f3061c559e": {
+ "1d8985f6823e4249a55a6ed9d4c1a57d": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "7fe574626264422db3174e1ca84384a7": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "a7d2489cbb2640d9b7bf87ba6b765fa4": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "144772b83e3a451ea737692bb03cda53": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
+ "_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
- "IPY_MODEL_b809d2becb73412ea175b1158f8d909f",
- "IPY_MODEL_20a3c7f59f2c4c2d81562b65509e3164",
- "IPY_MODEL_b7c43e15175e445687d1d3ebebca06b7"
+ "IPY_MODEL_5643873b3ae9471fb35f8aa9e9072dc3",
+ "IPY_MODEL_0c8db63e710c48b8a548c21a1f110407",
+ "IPY_MODEL_f0b60625b1fd4ad1aa4ec454d5df5dea"
],
- "layout": "IPY_MODEL_7d6e37ab07d54c5e82001a848951e049"
- }
- },
- "b809d2becb73412ea175b1158f8d909f": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_5533075d191d475fbb6d1a8efc4df459",
- "placeholder": "",
- "style": "IPY_MODEL_c2753ed1af9a4c72a455e35950f5a3f6",
- "value": "model.safetensors: 100%"
+ "layout": "IPY_MODEL_5e428f65b0b24093a258ec6acb2620af"
}
},
"20a3c7f59f2c4c2d81562b65509e3164": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -1099,31 +923,41 @@
"value": 47372894
}
},
- "b7c43e15175e445687d1d3ebebca06b7": {
+ "20c5f5081dc348a498055dfc64d1374c": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
"state": {
- "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
+ "_model_name": "ProgressStyleModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_2a1be62b7eda40f5aa090f56730554bf",
- "placeholder": "",
- "style": "IPY_MODEL_044e6d9d96a045da801ea3d8ce29793f",
- "value": " 47.4M/47.4M [00:00<00:00, 87.0MB/s]"
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
}
},
- "7d6e37ab07d54c5e82001a848951e049": {
+ "220b5d00bae9489fb70ce469d4103d08": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "2a1be62b7eda40f5aa090f56730554bf": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -1172,10 +1006,10 @@
"width": null
}
},
- "5533075d191d475fbb6d1a8efc4df459": {
+ "2b62760901154f2da48bbb4e3b6cbc00": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -1224,10 +1058,10 @@
"width": null
}
},
- "c2753ed1af9a4c72a455e35950f5a3f6": {
+ "34bb9f1060b741c5b25901e4aa888bd7": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
@@ -1239,10 +1073,10 @@
"description_width": ""
}
},
- "c22f638b036b442eadd4467a228d3aa6": {
+ "3955fdd6066248cdac68ec6796365d59": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -1291,26 +1125,34 @@
"width": null
}
},
- "b5abf8f24bd644ab83a6db16ecdca4d1": {
+ "498157d1ca414ef4b7f95d2848d56262": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
+ "_model_name": "FloatProgressModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_549aa270fee54565a6d9a8ba2fb90766",
+ "max": 760289,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_20c5f5081dc348a498055dfc64d1374c",
+ "value": 760289
}
},
- "2a1be62b7eda40f5aa090f56730554bf": {
+ "5381ebd352db4cde9ac1983b85961754": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -1359,92 +1201,135 @@
"width": null
}
},
- "044e6d9d96a045da801ea3d8ce29793f": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
+ "549aa270fee54565a6d9a8ba2fb90766": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "1d8985f6823e4249a55a6ed9d4c1a57d": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_5643873b3ae9471fb35f8aa9e9072dc3",
- "IPY_MODEL_0c8db63e710c48b8a548c21a1f110407",
- "IPY_MODEL_f0b60625b1fd4ad1aa4ec454d5df5dea"
- ],
- "layout": "IPY_MODEL_5e428f65b0b24093a258ec6acb2620af"
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
}
},
- "5643873b3ae9471fb35f8aa9e9072dc3": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
+ "5533075d191d475fbb6d1a8efc4df459": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_f55883529cad4f2a89cc669f7fb6539b",
- "placeholder": "",
- "style": "IPY_MODEL_9c944fa3c5e741b68cd176d3bc0b6c08",
- "value": "tokenizer_config.json: 100%"
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
}
},
- "0c8db63e710c48b8a548c21a1f110407": {
+ "5643873b3ae9471fb35f8aa9e9072dc3": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
+ "_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
+ "_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_0b380fc0d74a48ac81313a216457a304",
- "max": 25,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_86aba96ffdc6405f879cfde1fd368966",
- "value": 25
+ "layout": "IPY_MODEL_f55883529cad4f2a89cc669f7fb6539b",
+ "placeholder": "",
+ "style": "IPY_MODEL_9c944fa3c5e741b68cd176d3bc0b6c08",
+ "value": "tokenizer_config.json: 100%"
}
},
- "f0b60625b1fd4ad1aa4ec454d5df5dea": {
+ "5ceb0727c2334bf0b1be26d3c90008d5": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -1456,16 +1341,16 @@
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_9dd156bd2244453c85a6b07ffec9c674",
+ "layout": "IPY_MODEL_7fe574626264422db3174e1ca84384a7",
"placeholder": "",
- "style": "IPY_MODEL_efcf2b26d3f644cb8fb5113bfcf4b2c3",
- "value": " 25.0/25.0 [00:00<00:00, 1.67kB/s]"
+ "style": "IPY_MODEL_a7d2489cbb2640d9b7bf87ba6b765fa4",
+ "value": " 684/684 [00:00<00:00, 948B/s]"
}
},
"5e428f65b0b24093a258ec6acb2620af": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -1514,10 +1399,31 @@
"width": null
}
},
- "f55883529cad4f2a89cc669f7fb6539b": {
+ "63649ded14a04f889f3b1025ea804e32": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a27e457fcb9746c7ad4a8a694f76e367",
+ "placeholder": "",
+ "style": "IPY_MODEL_220b5d00bae9489fb70ce469d4103d08",
+ "value": " 1.31M/1.31M [00:00<00:00, 5.75MB/s]"
+ }
+ },
+ "64ac18474257437f916941bb3f09bf53": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -1566,25 +1472,32 @@
"width": null
}
},
- "9c944fa3c5e741b68cd176d3bc0b6c08": {
+ "7173408a02654a869335fd1a1a34cd04": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_name": "HBoxModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_eab38496db0d4fa88d45348f917f2224",
+ "IPY_MODEL_498157d1ca414ef4b7f95d2848d56262",
+ "IPY_MODEL_82f917f1e0bd450db76b1d8f05cb774a"
+ ],
+ "layout": "IPY_MODEL_b01a0ae6e4424365b5d8f9643ca8b93b"
}
},
- "0b380fc0d74a48ac81313a216457a304": {
+ "7d6e37ab07d54c5e82001a848951e049": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -1633,26 +1546,10 @@
"width": null
}
},
- "86aba96ffdc6405f879cfde1fd368966": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "9dd156bd2244453c85a6b07ffec9c674": {
+ "7fe574626264422db3174e1ca84384a7": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -1701,68 +1598,10 @@
"width": null
}
},
- "efcf2b26d3f644cb8fb5113bfcf4b2c3": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "7173408a02654a869335fd1a1a34cd04": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_eab38496db0d4fa88d45348f917f2224",
- "IPY_MODEL_498157d1ca414ef4b7f95d2848d56262",
- "IPY_MODEL_82f917f1e0bd450db76b1d8f05cb774a"
- ],
- "layout": "IPY_MODEL_b01a0ae6e4424365b5d8f9643ca8b93b"
- }
- },
- "eab38496db0d4fa88d45348f917f2224": {
+ "82ac68dec79f49229d2f6e0a29d39675": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_5381ebd352db4cde9ac1983b85961754",
- "placeholder": "",
- "style": "IPY_MODEL_9cb5f993dd2f40889590fa9465794efe",
- "value": "spiece.model: 100%"
- }
- },
- "498157d1ca414ef4b7f95d2848d56262": {
- "model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -1775,18 +1614,18 @@
"bar_style": "success",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_549aa270fee54565a6d9a8ba2fb90766",
- "max": 760289,
+ "layout": "IPY_MODEL_d2bd7f515c994a529fb6685addb34ae5",
+ "max": 684,
"min": 0,
"orientation": "horizontal",
- "style": "IPY_MODEL_20c5f5081dc348a498055dfc64d1374c",
- "value": 760289
+ "style": "IPY_MODEL_fafa5ff8a6104816b08636f3061c559e",
+ "value": 684
}
},
"82f917f1e0bd450db76b1d8f05cb774a": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -1804,10 +1643,26 @@
"value": " 760k/760k [00:00<00:00, 4.68MB/s]"
}
},
- "b01a0ae6e4424365b5d8f9643ca8b93b": {
+ "86aba96ffdc6405f879cfde1fd368966": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "88f1436f2b4d4b268f1c7cba52578a6b": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -1856,10 +1711,55 @@
"width": null
}
},
- "5381ebd352db4cde9ac1983b85961754": {
+ "8cf156ee9c4945e997a815618b93ebc3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "9c944fa3c5e741b68cd176d3bc0b6c08": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "9cb5f993dd2f40889590fa9465794efe": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "9dd156bd2244453c85a6b07ffec9c674": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -1908,25 +1808,10 @@
"width": null
}
},
- "9cb5f993dd2f40889590fa9465794efe": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "549aa270fee54565a6d9a8ba2fb90766": {
+ "a27e457fcb9746c7ad4a8a694f76e367": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -1975,26 +1860,62 @@
"width": null
}
},
- "20c5f5081dc348a498055dfc64d1374c": {
+ "a56550fad97d49d4a2b9fddf205ed42f": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_fa42a964f576462ea192e4c47d33beab",
+ "IPY_MODEL_82ac68dec79f49229d2f6e0a29d39675",
+ "IPY_MODEL_5ceb0727c2334bf0b1be26d3c90008d5"
+ ],
+ "layout": "IPY_MODEL_3955fdd6066248cdac68ec6796365d59"
+ }
+ },
+ "a7d2489cbb2640d9b7bf87ba6b765fa4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
- "bar_color": null,
"description_width": ""
}
},
- "88f1436f2b4d4b268f1c7cba52578a6b": {
+ "a8806c8148944ab0baf055a59cc5d9d5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "b01a0ae6e4424365b5d8f9643ca8b93b": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2043,25 +1964,10 @@
"width": null
}
},
- "34bb9f1060b741c5b25901e4aa888bd7": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
"b214680a132f4584b97200442feeca41": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
"model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -2080,55 +1986,47 @@
"layout": "IPY_MODEL_097a57f6fa1b4a5391f865815e2701c1"
}
},
- "0246b916ec2c4bb78696f27b68ed57c0": {
+ "b5abf8f24bd644ab83a6db16ecdca4d1": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
"state": {
- "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
+ "_model_name": "ProgressStyleModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_64ac18474257437f916941bb3f09bf53",
- "placeholder": "",
- "style": "IPY_MODEL_a8806c8148944ab0baf055a59cc5d9d5",
- "value": "tokenizer.json: 100%"
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
}
},
- "ff4944bcf015410a89e8cfce3bb485c6": {
+ "b7c43e15175e445687d1d3ebebca06b7": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
+ "_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
+ "_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_2b62760901154f2da48bbb4e3b6cbc00",
- "max": 1312669,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_ba87161ff85445a78101da82e07fce25",
- "value": 1312669
+ "layout": "IPY_MODEL_2a1be62b7eda40f5aa090f56730554bf",
+ "placeholder": "",
+ "style": "IPY_MODEL_044e6d9d96a045da801ea3d8ce29793f",
+ "value": " 47.4M/47.4M [00:00<00:00, 87.0MB/s]"
}
},
- "63649ded14a04f889f3b1025ea804e32": {
+ "b809d2becb73412ea175b1158f8d909f": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -2140,16 +2038,32 @@
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_a27e457fcb9746c7ad4a8a694f76e367",
+ "layout": "IPY_MODEL_5533075d191d475fbb6d1a8efc4df459",
"placeholder": "",
- "style": "IPY_MODEL_220b5d00bae9489fb70ce469d4103d08",
- "value": " 1.31M/1.31M [00:00<00:00, 5.75MB/s]"
+ "style": "IPY_MODEL_c2753ed1af9a4c72a455e35950f5a3f6",
+ "value": "model.safetensors: 100%"
}
},
- "097a57f6fa1b4a5391f865815e2701c1": {
+ "ba87161ff85445a78101da82e07fce25": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "baffc75590e2427b8f0a4ed4b7901d74": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2198,10 +2112,10 @@
"width": null
}
},
- "64ac18474257437f916941bb3f09bf53": {
+ "c22f638b036b442eadd4467a228d3aa6": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2250,10 +2164,10 @@
"width": null
}
},
- "a8806c8148944ab0baf055a59cc5d9d5": {
+ "c2753ed1af9a4c72a455e35950f5a3f6": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
@@ -2265,10 +2179,10 @@
"description_width": ""
}
},
- "2b62760901154f2da48bbb4e3b6cbc00": {
+ "d2bd7f515c994a529fb6685addb34ae5": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2317,26 +2231,67 @@
"width": null
}
},
- "ba87161ff85445a78101da82e07fce25": {
+ "eab38496db0d4fa88d45348f917f2224": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_5381ebd352db4cde9ac1983b85961754",
+ "placeholder": "",
+ "style": "IPY_MODEL_9cb5f993dd2f40889590fa9465794efe",
+ "value": "spiece.model: 100%"
+ }
+ },
+ "efcf2b26d3f644cb8fb5113bfcf4b2c3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
- "bar_color": null,
"description_width": ""
}
},
- "a27e457fcb9746c7ad4a8a694f76e367": {
+ "f0b60625b1fd4ad1aa4ec454d5df5dea": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_9dd156bd2244453c85a6b07ffec9c674",
+ "placeholder": "",
+ "style": "IPY_MODEL_efcf2b26d3f644cb8fb5113bfcf4b2c3",
+ "value": " 25.0/25.0 [00:00<00:00, 1.67kB/s]"
+ }
+ },
+ "f55883529cad4f2a89cc669f7fb6539b": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2385,25 +2340,70 @@
"width": null
}
},
- "220b5d00bae9489fb70ce469d4103d08": {
+ "fa42a964f576462ea192e4c47d33beab": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_baffc75590e2427b8f0a4ed4b7901d74",
+ "placeholder": "",
+ "style": "IPY_MODEL_8cf156ee9c4945e997a815618b93ebc3",
+ "value": "config.json: 100%"
+ }
+ },
+ "fafa5ff8a6104816b08636f3061c559e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
+ "bar_color": null,
"description_width": ""
}
+ },
+ "ff4944bcf015410a89e8cfce3bb485c6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_2b62760901154f2da48bbb4e3b6cbc00",
+ "max": 1312669,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_ba87161ff85445a78101da82e07fce25",
+ "value": 1312669
+ }
}
}
- },
- "accelerator": "GPU"
+ }
},
"nbformat": 4,
"nbformat_minor": 0
-}
\ No newline at end of file
+}
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_AlbertForQuestionAnswering.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_AlbertForQuestionAnswering.ipynb
index 24fda0354440ba..2e1d1eb3fa184a 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_AlbertForQuestionAnswering.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_AlbertForQuestionAnswering.ipynb
@@ -36,7 +36,7 @@
"metadata": {},
"source": [
"- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n",
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n",
"- Albert uses SentencePiece, so we will have to install that as well"
]
},
@@ -75,7 +75,7 @@
}
],
"source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum sentencepiece tensorflow"
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum sentencepiece tensorflow"
]
},
{
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_AlbertForSequenceClassification.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_AlbertForSequenceClassification.ipynb
index 80c341c4b9773d..01ebb3f4b2844c 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_AlbertForSequenceClassification.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_AlbertForSequenceClassification.ipynb
@@ -36,7 +36,7 @@
"metadata": {},
"source": [
"- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n",
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n",
"- Albert uses SentencePiece, so we will have to install that as well"
]
},
@@ -75,7 +75,7 @@
}
],
"source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum sentencepiece"
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum sentencepiece"
]
},
{
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_AlbertForTokenClassification.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_AlbertForTokenClassification.ipynb
index 441e82771e5c31..a03552490a1071 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_AlbertForTokenClassification.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_AlbertForTokenClassification.ipynb
@@ -35,7 +35,7 @@
"metadata": {},
"source": [
"- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases\n",
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases\n",
"- Albert uses SentencePiece, so we will have to install that as well"
]
},
@@ -74,7 +74,7 @@
}
],
"source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum sentencepiece"
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum sentencepiece"
]
},
{
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BERT.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BERT.ipynb
index 2eb92dde43fe45..887ee3c86dabd7 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BERT.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BERT.ipynb
@@ -28,7 +28,7 @@
"metadata": {},
"source": [
"- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."
]
},
{
@@ -70,7 +70,7 @@
}
],
"source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum tensorflow"
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum tensorflow"
]
},
{
@@ -510,8 +510,7 @@
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.12"
+ "pygments_lexer": "ipython3"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BGE.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BGE.ipynb
index 4a32637359f498..5d74216027be58 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BGE.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BGE.ipynb
@@ -1,2790 +1,2806 @@
{
- "cells": [
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
+ "\n",
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BGE.ipynb)\n",
+ "\n",
+ "# Import ONNX BGE models from HuggingFace 🤗 into Spark NLP 🚀\n",
+ "\n",
+ "Let's keep in mind a few things before we start 😊\n",
+ "\n",
+ "- ONNX support for this annotator was introduced in `Spark NLP 5.2.1`, enabling high performance inference for models. Please make sure you have upgraded to the latest Spark NLP release.\n",
+ "- You can import models for BGE from HuggingFace and they have to be in `Sentence Similarity` category. Meaning, you cannot use BGE models trained/fine-tuned on a specific task such as token/sequence classification."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Export and Save HuggingFace model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
+ "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "B7vB-Wi_07h8"
- },
- "source": [
- "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
- "\n",
- "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BGE.ipynb)\n",
- "\n",
- "# Import ONNX BGE models from HuggingFace 🤗 into Spark NLP 🚀\n",
- "\n",
- "Let's keep in mind a few things before we start 😊\n",
- "\n",
- "- ONNX support for this annotator was introduced in `Spark NLP 5.2.1`, enabling high performance inference for models. Please make sure you have upgraded to the latest Spark NLP release.\n",
- "- You can import models for BGE from HuggingFace and they have to be in `Sentence Similarity` category. Meaning, you cannot use BGE models trained/fine-tuned on a specific task such as token/sequence classification."
- ]
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Collecting install\n",
+ " Downloading install-1.3.5-py3-none-any.whl (3.2 kB)\n",
+ "Collecting optimum[onnxruntime]\n",
+ " Downloading optimum-1.18.1-py3-none-any.whl (410 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m410.0/410.0 kB\u001b[0m \u001b[31m7.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting coloredlogs (from optimum[onnxruntime])\n",
+ " Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from optimum[onnxruntime]) (1.12)\n",
+ "Requirement already satisfied: transformers[sentencepiece]<4.40.0,>=4.26.0 in /usr/local/lib/python3.10/dist-packages (from optimum[onnxruntime]) (4.38.2)\n",
+ "Requirement already satisfied: torch>=1.11 in /usr/local/lib/python3.10/dist-packages (from optimum[onnxruntime]) (2.2.1+cu121)\n",
+ "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from optimum[onnxruntime]) (24.0)\n",
+ "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from optimum[onnxruntime]) (1.25.2)\n",
+ "Requirement already satisfied: huggingface-hub>=0.8.0 in /usr/local/lib/python3.10/dist-packages (from optimum[onnxruntime]) (0.20.3)\n",
+ "Collecting datasets (from optimum[onnxruntime])\n",
+ " Downloading datasets-2.18.0-py3-none-any.whl (510 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m510.5/510.5 kB\u001b[0m \u001b[31m25.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting onnx (from optimum[onnxruntime])\n",
+ " Downloading onnx-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.9 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.9/15.9 MB\u001b[0m \u001b[31m63.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting onnxruntime>=1.11.0 (from optimum[onnxruntime])\n",
+ " Downloading onnxruntime-1.17.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (6.8 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m68.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting evaluate (from optimum[onnxruntime])\n",
+ " Downloading evaluate-0.4.1-py3-none-any.whl (84 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.1/84.1 kB\u001b[0m \u001b[31m12.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: protobuf>=3.20.1 in /usr/local/lib/python3.10/dist-packages (from optimum[onnxruntime]) (3.20.3)\n",
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets->optimum[onnxruntime]) (3.13.4)\n",
+ "Requirement already satisfied: pyarrow>=12.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets->optimum[onnxruntime]) (14.0.2)\n",
+ "Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets->optimum[onnxruntime]) (0.6)\n",
+ "Collecting dill<0.3.9,>=0.3.0 (from datasets->optimum[onnxruntime])\n",
+ " Downloading dill-0.3.8-py3-none-any.whl (116 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m14.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets->optimum[onnxruntime]) (2.0.3)\n",
+ "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from datasets->optimum[onnxruntime]) (2.31.0)\n",
+ "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from datasets->optimum[onnxruntime]) (4.66.2)\n",
+ "Collecting xxhash (from datasets->optimum[onnxruntime])\n",
+ " Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting multiprocess (from datasets->optimum[onnxruntime])\n",
+ " Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m11.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: fsspec[http]<=2024.2.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from datasets->optimum[onnxruntime]) (2023.6.0)\n",
+ "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets->optimum[onnxruntime]) (3.9.3)\n",
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets->optimum[onnxruntime]) (6.0.1)\n",
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.8.0->optimum[onnxruntime]) (4.11.0)\n",
+ "Requirement already satisfied: flatbuffers in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.11.0->optimum[onnxruntime]) (24.3.25)\n",
+ "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.11->optimum[onnxruntime]) (3.3)\n",
+ "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11->optimum[onnxruntime]) (3.1.3)\n",
+ "Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.11->optimum[onnxruntime])\n",
+ " Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n",
+ "Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.11->optimum[onnxruntime])\n",
+ " Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n",
+ "Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.11->optimum[onnxruntime])\n",
+ " Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n",
+ "Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.11->optimum[onnxruntime])\n",
+ " Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n",
+ "Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.11->optimum[onnxruntime])\n",
+ " Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n",
+ "Collecting nvidia-cufft-cu12==11.0.2.54 (from torch>=1.11->optimum[onnxruntime])\n",
+ " Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n",
+ "Collecting nvidia-curand-cu12==10.3.2.106 (from torch>=1.11->optimum[onnxruntime])\n",
+ " Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n",
+ "Collecting nvidia-cusolver-cu12==11.4.5.107 (from torch>=1.11->optimum[onnxruntime])\n",
+ " Using cached nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n",
+ "Collecting nvidia-cusparse-cu12==12.1.0.106 (from torch>=1.11->optimum[onnxruntime])\n",
+ " Using cached nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n",
+ "Collecting nvidia-nccl-cu12==2.19.3 (from torch>=1.11->optimum[onnxruntime])\n",
+ " Using cached nvidia_nccl_cu12-2.19.3-py3-none-manylinux1_x86_64.whl (166.0 MB)\n",
+ "Collecting nvidia-nvtx-cu12==12.1.105 (from torch>=1.11->optimum[onnxruntime])\n",
+ " Using cached nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n",
+ "Requirement already satisfied: triton==2.2.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11->optimum[onnxruntime]) (2.2.0)\n",
+ "Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.11->optimum[onnxruntime])\n",
+ " Using cached nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n",
+ "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]<4.40.0,>=4.26.0->optimum[onnxruntime]) (2023.12.25)\n",
+ "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]<4.40.0,>=4.26.0->optimum[onnxruntime]) (0.15.2)\n",
+ "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]<4.40.0,>=4.26.0->optimum[onnxruntime]) (0.4.2)\n",
+ "Requirement already satisfied: sentencepiece!=0.1.92,>=0.1.91 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]<4.40.0,>=4.26.0->optimum[onnxruntime]) (0.1.99)\n",
+ "Collecting humanfriendly>=9.1 (from coloredlogs->optimum[onnxruntime])\n",
+ " Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m14.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting responses<0.19 (from evaluate->optimum[onnxruntime])\n",
+ " Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n",
+ "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->optimum[onnxruntime]) (1.3.0)\n",
+ "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->optimum[onnxruntime]) (1.3.1)\n",
+ "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->optimum[onnxruntime]) (23.2.0)\n",
+ "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->optimum[onnxruntime]) (1.4.1)\n",
+ "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->optimum[onnxruntime]) (6.0.5)\n",
+ "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->optimum[onnxruntime]) (1.9.4)\n",
+ "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->optimum[onnxruntime]) (4.0.3)\n",
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets->optimum[onnxruntime]) (3.3.2)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets->optimum[onnxruntime]) (3.6)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets->optimum[onnxruntime]) (2.0.7)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets->optimum[onnxruntime]) (2024.2.2)\n",
+ "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.11->optimum[onnxruntime]) (2.1.5)\n",
+ "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->optimum[onnxruntime]) (2.8.2)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->optimum[onnxruntime]) (2023.4)\n",
+ "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->optimum[onnxruntime]) (2024.1)\n",
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->datasets->optimum[onnxruntime]) (1.16.0)\n",
+ "Installing collected packages: xxhash, onnx, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, install, humanfriendly, dill, responses, nvidia-cusparse-cu12, nvidia-cudnn-cu12, multiprocess, coloredlogs, onnxruntime, nvidia-cusolver-cu12, datasets, evaluate, optimum\n",
+ "Successfully installed coloredlogs-15.0.1 datasets-2.18.0 dill-0.3.8 evaluate-0.4.1 humanfriendly-10.0 install-1.3.5 multiprocess-0.70.16 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.19.3 nvidia-nvjitlink-cu12-12.4.127 nvidia-nvtx-cu12-12.1.105 onnx-1.16.0 onnxruntime-1.17.1 optimum-1.18.1 responses-0.18.0 xxhash-3.4.1\n"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install --upgrade-strategy eager install optimum[onnxruntime]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
+ "- We'll use [BAAI/bge-base-en](https://huggingface.co/BAAI/bge-base-en) model from HuggingFace as an example and load it as a `ORTModelForFeatureExtraction`, representing an ONNX model.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n",
+ "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
+ "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
+ "You will be able to reuse this secret in all of your notebooks.\n",
+ "Please note that authentication is recommended but still optional to access public models or datasets.\n",
+ " warnings.warn(\n"
+ ]
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "6Kdh7FHM07h_"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "cec3087741c14a54a25fa6228c28aae3",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "## Export and Save HuggingFace model"
+ "text/plain": [
+ "config.json: 0%| | 0.00/719 [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "rZQyTwjC07h_"
- },
- "source": [
- "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."
- ]
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Framework not specified. Using pt to export the model.\n"
+ ]
},
{
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "7XQhDa9s07iA",
- "outputId": "a91bb22e-88d4-498b-cb77-e729a015ed86"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "ef655c7c3a08470fa85d00bc55b2bb51",
+ "version_major": 2,
+ "version_minor": 0
},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Requirement already satisfied: install in /usr/local/lib/python3.10/dist-packages (1.3.5)\n",
- "Collecting optimum[onnxruntime]\n",
- " Downloading optimum-1.16.1-py3-none-any.whl (403 kB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m403.3/403.3 kB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hCollecting coloredlogs (from optimum[onnxruntime])\n",
- " Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hRequirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from optimum[onnxruntime]) (1.12)\n",
- "Requirement already satisfied: transformers[sentencepiece]>=4.26.0 in /usr/local/lib/python3.10/dist-packages (from optimum[onnxruntime]) (4.35.2)\n",
- "Requirement already satisfied: torch>=1.9 in /usr/local/lib/python3.10/dist-packages (from optimum[onnxruntime]) (2.1.0+cu121)\n",
- "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from optimum[onnxruntime]) (23.2)\n",
- "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from optimum[onnxruntime]) (1.23.5)\n",
- "Requirement already satisfied: huggingface-hub>=0.8.0 in /usr/local/lib/python3.10/dist-packages (from optimum[onnxruntime]) (0.19.4)\n",
- "Collecting datasets (from optimum[onnxruntime])\n",
- " Downloading datasets-2.16.1-py3-none-any.whl (507 kB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m507.1/507.1 kB\u001b[0m \u001b[31m13.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hCollecting onnx (from optimum[onnxruntime])\n",
- " Downloading onnx-1.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.7 MB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.7/15.7 MB\u001b[0m \u001b[31m50.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hCollecting onnxruntime>=1.11.0 (from optimum[onnxruntime])\n",
- " Downloading onnxruntime-1.16.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.4 MB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m70.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hCollecting evaluate (from optimum[onnxruntime])\n",
- " Downloading evaluate-0.4.1-py3-none-any.whl (84 kB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.1/84.1 kB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hRequirement already satisfied: protobuf>=3.20.1 in /usr/local/lib/python3.10/dist-packages (from optimum[onnxruntime]) (3.20.3)\n",
- "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets->optimum[onnxruntime]) (3.13.1)\n",
- "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets->optimum[onnxruntime]) (10.0.1)\n",
- "Collecting pyarrow-hotfix (from datasets->optimum[onnxruntime])\n",
- " Downloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)\n",
- "Collecting dill<0.3.8,>=0.3.0 (from datasets->optimum[onnxruntime])\n",
- " Downloading dill-0.3.7-py3-none-any.whl (115 kB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m13.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets->optimum[onnxruntime]) (1.5.3)\n",
- "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from datasets->optimum[onnxruntime]) (2.31.0)\n",
- "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from datasets->optimum[onnxruntime]) (4.66.1)\n",
- "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets->optimum[onnxruntime]) (3.4.1)\n",
- "Collecting multiprocess (from datasets->optimum[onnxruntime])\n",
- " Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m13.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hRequirement already satisfied: fsspec[http]<=2023.10.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from datasets->optimum[onnxruntime]) (2023.6.0)\n",
- "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets->optimum[onnxruntime]) (3.9.1)\n",
- "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets->optimum[onnxruntime]) (6.0.1)\n",
- "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.8.0->optimum[onnxruntime]) (4.5.0)\n",
- "Requirement already satisfied: flatbuffers in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.11.0->optimum[onnxruntime]) (23.5.26)\n",
- "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.9->optimum[onnxruntime]) (3.2.1)\n",
- "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.9->optimum[onnxruntime]) (3.1.2)\n",
- "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.9->optimum[onnxruntime]) (2.1.0)\n",
- "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]>=4.26.0->optimum[onnxruntime]) (2023.6.3)\n",
- "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]>=4.26.0->optimum[onnxruntime]) (0.15.0)\n",
- "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]>=4.26.0->optimum[onnxruntime]) (0.4.1)\n",
- "Collecting sentencepiece!=0.1.92,>=0.1.91 (from transformers[sentencepiece]>=4.26.0->optimum[onnxruntime])\n",
- " Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m53.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hCollecting humanfriendly>=9.1 (from coloredlogs->optimum[onnxruntime])\n",
- " Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m7.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hCollecting responses<0.19 (from evaluate->optimum[onnxruntime])\n",
- " Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n",
- "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->optimum[onnxruntime]) (1.3.0)\n",
- "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->optimum[onnxruntime]) (23.1.0)\n",
- "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->optimum[onnxruntime]) (6.0.4)\n",
- "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->optimum[onnxruntime]) (1.9.4)\n",
- "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->optimum[onnxruntime]) (1.4.1)\n",
- "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->optimum[onnxruntime]) (1.3.1)\n",
- "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->optimum[onnxruntime]) (4.0.3)\n",
- "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets->optimum[onnxruntime]) (3.3.2)\n",
- "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets->optimum[onnxruntime]) (3.6)\n",
- "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets->optimum[onnxruntime]) (2.0.7)\n",
- "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets->optimum[onnxruntime]) (2023.11.17)\n",
- "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.9->optimum[onnxruntime]) (2.1.3)\n",
- "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->optimum[onnxruntime]) (2.8.2)\n",
- "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->optimum[onnxruntime]) (2023.3.post1)\n",
- "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->datasets->optimum[onnxruntime]) (1.16.0)\n",
- "Installing collected packages: sentencepiece, pyarrow-hotfix, onnx, humanfriendly, dill, responses, multiprocess, coloredlogs, onnxruntime, datasets, evaluate, optimum\n",
- "Successfully installed coloredlogs-15.0.1 datasets-2.16.1 dill-0.3.7 evaluate-0.4.1 humanfriendly-10.0 multiprocess-0.70.15 onnx-1.15.0 onnxruntime-1.16.3 optimum-1.16.1 pyarrow-hotfix-0.6 responses-0.18.0 sentencepiece-0.1.99\n"
- ]
- }
- ],
- "source": [
- "!pip install --upgrade-strategy eager install optimum[onnxruntime]"
+ "text/plain": [
+ "model.safetensors: 0%| | 0.00/438M [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "UXcTvyA007iA"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "4d9593b4c3f843b3bc142b3177d88ec2",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
- "- We'll use [BAAI/bge-base-en](https://huggingface.co/BAAI/bge-base-en) model from HuggingFace as an example and load it as a `ORTModelForFeatureExtraction`, representing an ONNX model.\n"
+ "text/plain": [
+ "tokenizer_config.json: 0%| | 0.00/366 [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 311,
- "referenced_widgets": [
- "b6aeecd601db44c4b8d92abef34156c2",
- "6f2b77719e3344b1954eaaf46e7d512e",
- "73b4a51894114681abdc74131668e43e",
- "2c75bce84ce749c797446623bfcfaf39",
- "48cff192c62444a292343467b1eec46a",
- "6ba1b72f64d94cd6878bce77ca47172b",
- "bf2876273c1b409f969cbee51441309b",
- "a4ca89a620284318996aa3ef1e1b9d28",
- "26970bab519f4c2293bb2d88455909a7",
- "bc73afc3091d40ca9c61d9352611a7a7",
- "0e2c74dfa0364df6a4eb7d0b5f41e694",
- "a14c903e07e84eebbef3d4da3f9ffb53",
- "563eba9b7c924ba9ad0e8abdb139a8e2",
- "82f71bd255b6416f9808f5805f017a26",
- "1c126c817ec941c58914112992764f24",
- "394c30efbf924843869e9073a95af196",
- "fdb324b3829f4a0791ebcfc11499062a",
- "d0d4e25e98f941ad91481b1d3e0aeb7b",
- "3e8ceaae9fe7468a9e14b2bbab3ece53",
- "34a28ead887e450b802ea8be16c0f592",
- "086dbc2868c0488b9d9dc5d3c8972133",
- "9221a3d70fb644749e9828ea60b6ee3d",
- "b94853f0f9c6437f82170bf67e9debc9",
- "0dac17a4e0304f33b1ad997590a663ad",
- "9f785d2b81c54695997e370210a61226",
- "26fa5822a4974257b8862581a60e3179",
- "cf7cfc607b2041a993df35b525aa0d45",
- "fa84fc0acea44fde89aa9ffd93a5aa9a",
- "0b9da9243f9e49bdb6411c131a7e4682",
- "cec343ddabf94dd0832e4ac7aec14ecc",
- "82e0ea7d46b5465ebc8faad3beab43b5",
- "fb7485359aa74e82963d8255b41cf81f",
- "aee26846c0634c9b8ae61b4dc14ed675",
- "e1e339b3d7a14048b0fc35b7d05cb60a",
- "b0a1cd96c31848b78b150258b0637a82",
- "84ba32d3a129477591e78264ddf22cb7",
- "8bdeba24a5b0463abcaa0213674b388b",
- "82052250508f40b483fd5d5a2e3bc316",
- "5bc65236ab3443eb89b761452f5f776c",
- "30696b2ee77846149f4b0d88f9d1cc32",
- "7c03e098f4aa4e61bc13da78d6b7b7d8",
- "de6ef93762a047998bb0d888d2a2720a",
- "291ce98198874d7ebac0affca544861c",
- "749855c3e9ac4566a509d834204ab0e4",
- "82438bb56d4841eaa7e4ff609e79edd9",
- "2aff7e730cce4d468136bab9c735d2e9",
- "517eb17af21949908b597e76f1c835b4",
- "0600b51130bc4f5bbe68e38bb5662ce1",
- "0b0f81687aac4432bdc06a75bcc53320",
- "f013cfd30cd948159d6a3216dc00ab94",
- "21f7d6700ffe4ce09d67147c17d13f20",
- "69ac10476cd5458a8b06333a559c9d28",
- "2e3a3b76638c44d481569a7d16d71cb7",
- "e0112abb7d0643f981094fb8ed67390f",
- "0d7d08ec18b04866ae4c9f0870d8d8f6",
- "85542508f8fa4545a8b48829d5b69ded",
- "1338f4e57b554fafba3d0fb2acf59b02",
- "bfcf2b27861a4c7b9451d6dc6ef600b6",
- "fdf60696d8be49ae9f9d98200b7c35e8",
- "3febcb4b4101454eafbe2be02b06d7b4",
- "fcaf78ec75af4b76a95de9d73194516d",
- "4b21dc70cd214d64ad2da330f38e1638",
- "66652fb210ff46cea975b7f02c34df5d",
- "7fa5ffc59fb14dc39feac160a10bce66",
- "03a985766f57423991c4a416d487b451",
- "a40c77d69fe641c298616c62381faaf1"
- ]
- },
- "id": "2jnG-ReQ07iA",
- "outputId": "c11a6199-b561-4c8e-e9d1-ebf62db3ef72"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "98bdb7fefacf4f69ae6476621725637d",
+ "version_major": 2,
+ "version_minor": 0
},
- "outputs": [
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "config.json: 0%| | 0.00/719 [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "b6aeecd601db44c4b8d92abef34156c2"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "Framework not specified. Using pt to export to ONNX.\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "model.safetensors: 0%| | 0.00/438M [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "a14c903e07e84eebbef3d4da3f9ffb53"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "tokenizer_config.json: 0%| | 0.00/366 [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "b94853f0f9c6437f82170bf67e9debc9"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "vocab.txt: 0%| | 0.00/232k [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "e1e339b3d7a14048b0fc35b7d05cb60a"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "tokenizer.json: 0%| | 0.00/711k [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "82438bb56d4841eaa7e4ff609e79edd9"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "special_tokens_map.json: 0%| | 0.00/125 [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "85542508f8fa4545a8b48829d5b69ded"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "Using the export variant default. Available variants are:\n",
- " - default: The default ONNX variant.\n",
- "Using framework PyTorch: 2.1.0+cu121\n",
- "Overriding 1 configuration item(s)\n",
- "\t- use_cache -> False\n"
- ]
- }
- ],
- "source": [
- "from optimum.onnxruntime import ORTModelForFeatureExtraction\n",
- "\n",
- "MODEL_NAME = \"BAAI/bge-base-en\"\n",
- "EXPORT_PATH = f\"onnx_models/{MODEL_NAME}\"\n",
- "\n",
- "ort_model = ORTModelForFeatureExtraction.from_pretrained(MODEL_NAME, export=True)\n",
- "\n",
- "# Save the ONNX model\n",
- "ort_model.save_pretrained(EXPORT_PATH)\n",
- "\n",
- "# Create directory for assets and move the tokenizer files.\n",
- "# A separate folder is needed for Spark NLP.\n",
- "!mkdir {EXPORT_PATH}/assets\n",
- "!mv {EXPORT_PATH}/vocab.txt {EXPORT_PATH}/assets/"
+ "text/plain": [
+ "vocab.txt: 0%| | 0.00/232k [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "94jRIamN07iB"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "04fc855de92a4ac7bde136daffa9df93",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "Let's have a look inside these two directories and see what we are dealing with:"
+ "text/plain": [
+ "tokenizer.json: 0%| | 0.00/711k [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "id": "LEv_GmQZ07iB",
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "outputId": "f80b15d9-3c83-41e5-f40c-11102bfb7b03"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "ff2effbb2e10466e850209f56c354636",
+ "version_major": 2,
+ "version_minor": 0
},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "total 426312\n",
- "drwxr-xr-x 2 root root 4096 Jan 1 09:23 assets\n",
- "-rw-r--r-- 1 root root 735 Jan 1 09:23 config.json\n",
- "-rw-r--r-- 1 root root 435811540 Jan 1 09:23 model.onnx\n",
- "-rw-r--r-- 1 root root 695 Jan 1 09:23 special_tokens_map.json\n",
- "-rw-r--r-- 1 root root 1242 Jan 1 09:23 tokenizer_config.json\n",
- "-rw-r--r-- 1 root root 711396 Jan 1 09:23 tokenizer.json\n"
- ]
- }
- ],
- "source": [
- "!ls -l {EXPORT_PATH}"
+ "text/plain": [
+ "special_tokens_map.json: 0%| | 0.00/125 [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {
- "id": "05FGr0W_07iB",
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "outputId": "6f6336d4-7ec4-472a-e2ce-49b4d28a3a7e"
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "total 228\n",
- "-rw-r--r-- 1 root root 231508 Jan 1 09:23 vocab.txt\n"
- ]
- }
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Using the export variant default. Available variants are:\n",
+ " - default: The default ONNX variant.\n",
+ "Using framework PyTorch: 2.2.1+cu121\n",
+ "Overriding 1 configuration item(s)\n",
+ "\t- use_cache -> False\n"
+ ]
+ }
+ ],
+ "source": [
+ "from optimum.onnxruntime import ORTModelForFeatureExtraction\n",
+ "\n",
+ "MODEL_NAME = \"BAAI/bge-base-en\"\n",
+ "EXPORT_PATH = f\"onnx_models/{MODEL_NAME}\"\n",
+ "\n",
+ "ort_model = ORTModelForFeatureExtraction.from_pretrained(MODEL_NAME, export=True)\n",
+ "\n",
+ "# Save the ONNX model\n",
+ "ort_model.save_pretrained(EXPORT_PATH)\n",
+ "\n",
+ "# Create directory for assets and move the tokenizer files.\n",
+ "# A separate folder is needed for Spark NLP.\n",
+ "!mkdir {EXPORT_PATH}/assets\n",
+ "!mv {EXPORT_PATH}/vocab.txt {EXPORT_PATH}/assets/"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's have a look inside these two directories and see what we are dealing with:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "total 426316\n",
+ "drwxr-xr-x 2 root root 4096 Apr 12 11:13 assets\n",
+ "-rw-r--r-- 1 root root 735 Apr 12 11:13 config.json\n",
+ "-rw-r--r-- 1 root root 435811541 Apr 12 11:13 model.onnx\n",
+ "-rw-r--r-- 1 root root 695 Apr 12 11:13 special_tokens_map.json\n",
+ "-rw-r--r-- 1 root root 1242 Apr 12 11:13 tokenizer_config.json\n",
+ "-rw-r--r-- 1 root root 711396 Apr 12 11:13 tokenizer.json\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls -l {EXPORT_PATH}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "total 228\n",
+ "-rw-r--r-- 1 root root 231508 Apr 12 11:13 vocab.txt\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls -l {EXPORT_PATH}/assets"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Import and Save BGE in Spark NLP\n",
+ "\n",
+ "- Let's install and setup Spark NLP in Google Colab\n",
+ "- This part is pretty easy via our simple script\n",
+ "- However, we need to upgrade Spark to a more recent version to use this annotator."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Installing PySpark 3.2.3 and Spark NLP 5.3.3\n",
+ "setup Colab for PySpark 3.2.3 and Spark NLP 5.3.3\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m568.4/568.4 kB\u001b[0m \u001b[31m38.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.7/199.7 kB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "Collecting pyspark==3.4.1\n",
+ " Downloading pyspark-3.4.1.tar.gz (310.8 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m310.8/310.8 MB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "Collecting py4j==0.10.9.7 (from pyspark==3.4.1)\n",
+ " Downloading py4j-0.10.9.7-py2.py3-none-any.whl (200 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m200.5/200.5 kB\u001b[0m \u001b[31m27.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hBuilding wheels for collected packages: pyspark\n",
+ " Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ " Created wheel for pyspark: filename=pyspark-3.4.1-py2.py3-none-any.whl size=311285388 sha256=62465da1460fcdc99650dde11bbf8f2ea59eed17293c05cc491293d1f701c682\n",
+ " Stored in directory: /root/.cache/pip/wheels/0d/77/a3/ff2f74cc9ab41f8f594dabf0579c2a7c6de920d584206e0834\n",
+ "Successfully built pyspark\n",
+ "Installing collected packages: py4j, pyspark\n",
+ " Attempting uninstall: py4j\n",
+ " Found existing installation: py4j 0.10.9.5\n",
+ " Uninstalling py4j-0.10.9.5:\n",
+ " Successfully uninstalled py4j-0.10.9.5\n",
+ " Attempting uninstall: pyspark\n",
+ " Found existing installation: pyspark 3.2.3\n",
+ " Uninstalling pyspark-3.2.3:\n",
+ " Successfully uninstalled pyspark-3.2.3\n",
+ "Successfully installed py4j-0.10.9.7 pyspark-3.4.1\n"
+ ]
+ }
+ ],
+ "source": [
+ "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash\n",
+ "! pip install -U pyspark==3.4.1"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's start Spark with Spark NLP included via our simple `start()` function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/usr/lib/python3.10/subprocess.py:1796: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n",
+ " self.pid = _posixsubprocess.fork_exec(\n"
+ ]
+ }
+ ],
+ "source": [
+ "import sparknlp\n",
+ "# let's start Spark with Spark NLP\n",
+ "spark = sparknlp.start()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's use `loadSavedModel` functon in `E5Embeddings` which allows us to load the ONNX model\n",
+ "- Most params will be set automatically. They can also be set later after loading the model in `E5Embeddings` during runtime, so don't worry about setting them now\n",
+ "- `loadSavedModel` accepts two params, first is the path to the exported model. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n",
+ "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sparknlp.annotator import *\n",
+ "\n",
+ "# All these params should be identical to the original ONNX model\n",
+ "BGE = BGEEmbeddings.loadSavedModel(f\"{EXPORT_PATH}\", spark)\\\n",
+ " .setInputCols([\"document\"])\\\n",
+ " .setOutputCol(\"bge\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "
sparknlp.annotator.embeddings.bge_embeddings.BGEEmbeddings
def __init__(classname='com.johnsnowlabs.nlp.embeddings.BGEEmbeddings', java_model=None)
/usr/local/lib/python3.10/dist-packages/sparknlp/annotator/embeddings/bge_embeddings.pySentence embeddings using BGE.\n",
+ "\n",
+ " BGE, or BAAI General Embeddings, a model that can map any text to a low-dimensional dense \n",
+ "vector which can be used for tasks like retrieval, classification, clustering, or semantic search.\n",
+ "\n",
+ "Pretrained models can be loaded with `pretrained` of the companion object:\n",
+ "\n",
+ " >>> embeddings = BGEEmbeddings.pretrained() \\\n",
+ " ... .setInputCols(["document"]) \\\n",
+ " ... .setOutputCol("bge_embeddings")\n",
+ "\n",
+ "\n",
+ " The default model is ``"bge_base"``, if no name is provided.\n",
+ "\n",
+ " For available pretrained models please see the\n",
+ " `Models Hub <https://sparknlp.org/models?q=BGE>`__.\n",
+ "\n",
+ "\n",
+ " ====================== ======================\n",
+ " Input Annotation types Output Annotation type\n",
+ " ====================== ======================\n",
+ " ``DOCUMENT`` ``SENTENCE_EMBEDDINGS``\n",
+ " ====================== ======================\n",
+ "\n",
+ " Parameters\n",
+ " ----------\n",
+ " batchSize\n",
+ " Size of every batch , by default 8\n",
+ " dimension\n",
+ " Number of embedding dimensions, by default 768\n",
+ " caseSensitive\n",
+ " Whether to ignore case in tokens for embeddings matching, by default False\n",
+ " maxSentenceLength\n",
+ " Max sentence length to process, by default 512\n",
+ " configProtoBytes\n",
+ " ConfigProto from tensorflow, serialized into byte array.\n",
+ "\n",
+ " References\n",
+ " ----------\n",
+ " `C-Pack: Packaged Resources To Advance General Chinese Embedding <https://arxiv.org/pdf/2309.07597>`__\n",
+ " `BGE Github Repository <https://github.com/FlagOpen/FlagEmbedding>`__\n",
+ "\n",
+ " **Paper abstract**\n",
+ "\n",
+ " *We introduce C-Pack, a package of resources that significantly advance the field of general\n",
+ " Chinese embeddings. C-Pack includes three critical resources. \n",
+ " 1) C-MTEB is a comprehensive benchmark for Chinese text embeddings covering 6 tasks and 35 datasets.\n",
+ " 2) C-MTP is a massive text embedding dataset curated from labeled and unlabeled Chinese corpora\n",
+ " for training embedding models.\n",
+ " 3) C-TEM is a family of embedding models covering multiple sizes.\n",
+ " Our models outperform all prior Chinese text embeddings on C-MTEB by up to +10% upon the \n",
+ " time of the release. We also integrate and optimize the entire suite of training methods for\n",
+ " C-TEM. Along with our resources on general Chinese embedding, we release our data and models for\n",
+ " English text embeddings. The English models achieve stateof-the-art performance on the MTEB\n",
+ " benchmark; meanwhile, our released English data is 2 times larger than the Chinese data. All\n",
+ " these resources are made publicly available at https://github.com/FlagOpen/FlagEmbedding.*\n",
+ "\n",
+ " Examples\n",
+ " --------\n",
+ " >>> import sparknlp\n",
+ " >>> from sparknlp.base import *\n",
+ " >>> from sparknlp.annotator import *\n",
+ " >>> from pyspark.ml import Pipeline\n",
+ " >>> documentAssembler = DocumentAssembler() \\\n",
+ " ... .setInputCol("text") \\\n",
+ " ... .setOutputCol("document")\n",
+ " >>> embeddings = BGEEmbeddings.pretrained() \\\n",
+ " ... .setInputCols(["document"]) \\\n",
+ " ... .setOutputCol("bge_embeddings")\n",
+ " >>> embeddingsFinisher = EmbeddingsFinisher() \\\n",
+ " ... .setInputCols(["bge_embeddings"]) \\\n",
+ " ... .setOutputCols("finished_embeddings") \\\n",
+ " ... .setOutputAsVector(True)\n",
+ " >>> pipeline = Pipeline().setStages([\n",
+ " ... documentAssembler,\n",
+ " ... embeddings,\n",
+ " ... embeddingsFinisher\n",
+ " ... ])\n",
+ " >>> data = spark.createDataFrame([["query: how much protein should a female eat",\n",
+ " ... "passage: As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day." + ... "But, as you can see from this chart, you'll need to increase that if you're expecting or training for a" + ... "marathon. Check out the chart below to see how much protein you should be eating each day.",\n",
+ " ... ]]).toDF("text")\n",
+ " >>> result = pipeline.fit(data).transform(data)\n",
+ " >>> result.selectExpr("explode(finished_embeddings) as result").show(5, 80)\n",
+ " +--------------------------------------------------------------------------------+\n",
+ " | result|\n",
+ " +--------------------------------------------------------------------------------+\n",
+ " |[[8.0190285E-4, -0.005974853, -0.072875895, 0.007944068, 0.026059335, -0.0080...|\n",
+ " |[[0.050514214, 0.010061974, -0.04340176, -0.020937217, 0.05170225, 0.01157857...|\n",
+ " +--------------------------------------------------------------------------------+\n",
+ "
\n",
+ " \n",
+ "
"
],
- "source": [
- "!ls -l {EXPORT_PATH}/assets"
+ "text/plain": [
+ "sparknlp.annotator.embeddings.bge_embeddings.BGEEmbeddings"
]
- },
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "BGEEmbeddings"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "BGE.write().overwrite().save(f\"{MODEL_NAME}_spark_nlp\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's clean up stuff we don't need anymore"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!rm -rf {EXPORT_PATH}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Awesome 😎 !\n",
+ "\n",
+ "This is your ONNX BGE model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "8U9fAdyP07iB"
- },
- "source": [
- "## Import and Save BGE in Spark NLP\n",
- "\n",
- "- Let's install and setup Spark NLP in Google Colab\n",
- "- This part is pretty easy via our simple script"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "total 425676\n",
+ "-rw-r--r-- 1 root root 435878171 Apr 12 11:18 bge_onnx\n",
+ "drwxr-xr-x 3 root root 4096 Apr 12 11:18 fields\n",
+ "drwxr-xr-x 2 root root 4096 Apr 12 11:17 metadata\n"
+ ]
+ }
+ ],
+ "source": [
+ "! ls -l {MODEL_NAME}_spark_nlp"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny E5 model 😊"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import sparknlp\n",
+ "\n",
+ "from sparknlp.base import *\n",
+ "from sparknlp.annotator import *\n",
+ "\n",
+ "document_assembler = DocumentAssembler()\\\n",
+ " .setInputCol(\"text\")\\\n",
+ " .setOutputCol(\"document\")\n",
+ "\n",
+ "BGE_loaded = BGEEmbeddings.load(f\"{MODEL_NAME}_spark_nlp\")\\\n",
+ " .setInputCols([\"document\"])\\\n",
+ " .setOutputCol(\"bge\")\\\n",
+ "\n",
+ "pipeline = Pipeline(\n",
+ " stages = [\n",
+ " document_assembler,\n",
+ " BGE_loaded\n",
+ " ])\n",
+ "\n",
+ "data = spark.createDataFrame([['William Henry Gates III (born October 28, 1955) is an American business magnate, software developer, investor,and philanthropist.']]).toDF(\"text\")\n",
+ "model = pipeline.fit(data)\n",
+ "result = model.transform(data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "source": [],
- "metadata": {
- "id": "lsEhkReSVKT1"
- },
- "execution_count": null,
- "outputs": []
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "+--------------------+\n",
+ "| embeddings|\n",
+ "+--------------------+\n",
+ "|[-0.03762533, 0.0...|\n",
+ "+--------------------+\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "result.selectExpr(\"explode(bge.embeddings) as embeddings\").show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "That's it! You can now go wild and use hundreds of E5 models from HuggingFace 🤗 in Spark NLP 🚀\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "gpuType": "T4",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "04fc855de92a4ac7bde136daffa9df93": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_fbe3133d96334b3588ccdc747a4dc860",
+ "IPY_MODEL_78e7ad669cda4b0db40de79feec10453",
+ "IPY_MODEL_ee18a8efc7564117b7243dcf266b6930"
+ ],
+ "layout": "IPY_MODEL_e4fdba14d81f4709b4377885faacbc84"
+ }
},
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {
- "id": "Tqi7wiwq07iC",
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "outputId": "1c02ea38-a45e-443d-aa87-7911413026fa"
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Collecting pyspark\n",
- " Downloading pyspark-3.5.0.tar.gz (316.9 MB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m316.9/316.9 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
- "Requirement already satisfied: py4j==0.10.9.7 in /usr/local/lib/python3.10/dist-packages (from pyspark) (0.10.9.7)\n",
- "Building wheels for collected packages: pyspark\n",
- " Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
- " Created wheel for pyspark: filename=pyspark-3.5.0-py2.py3-none-any.whl size=317425345 sha256=540db909df4d61fe0e14a45bce69a7e9f9ca01fb84b9d9eccb028c1238573047\n",
- " Stored in directory: /root/.cache/pip/wheels/41/4e/10/c2cf2467f71c678cfc8a6b9ac9241e5e44a01940da8fbb17fc\n",
- "Successfully built pyspark\n",
- "Installing collected packages: pyspark\n",
- "Successfully installed pyspark-3.5.0\n",
- "Processing ./spark_nlp-5.2.1-py2.py3-none-any.whl\n",
- "Installing collected packages: spark-nlp\n",
- "Successfully installed spark-nlp-5.2.1\n"
- ]
- }
+ "05843a7fbe90487395d085273a862474": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_602e54ecffd94d599fa3bc0164c61b95",
+ "max": 125,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_2f01819dfb6a487d9fd1e9c64eaa4a2f",
+ "value": 125
+ }
+ },
+ "1ba47727b2d34d2cb19c2eacc94df028": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "1c33fe0eff454bc69582ef43ac6d6cc5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2f01819dfb6a487d9fd1e9c64eaa4a2f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "31a9b57e1773438dbeecf2ddf33ef99d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "38683dbe19364b19a005b184a924a72c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_9ad4628cf6674e57955dc441145fda6d",
+ "max": 719,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_7b1b0c121332424687393b0352878e3d",
+ "value": 719
+ }
+ },
+ "4be5b724e5044ed5a617e9a763bc252e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e8c9a619bf2a4e1a8ab88d04e8571f36",
+ "placeholder": "",
+ "style": "IPY_MODEL_b7f88f02a2994b78b3896ff819d319ad",
+ "value": " 232k/232k [00:00<00:00, 10.4MB/s]"
+ }
+ },
+ "4d9593b4c3f843b3bc142b3177d88ec2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_8d563f1b8401418cb650099070bd9b89",
+ "IPY_MODEL_da3e4bc985454dfeac9072005ecc1476",
+ "IPY_MODEL_9e8ebe9169c64d95a732686184fed177"
],
- "source": [
- "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"
- ]
+ "layout": "IPY_MODEL_641bde89ac824839868903e74e921989"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "SZhqrGJg07iC"
- },
- "source": [
- "Let's start Spark with Spark NLP included via our simple `start()` function"
- ]
+ "4dfad0c536844190889d5a09c0efde78": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_1c33fe0eff454bc69582ef43ac6d6cc5",
+ "max": 231508,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_8de47e3311e24ee6b6b54054852ae7e7",
+ "value": 231508
+ }
},
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {
- "id": "Pe2IVIs407iC"
- },
- "outputs": [],
- "source": [
- "import sparknlp\n",
- "# let's start Spark with Spark NLP\n",
- "spark = sparknlp.start()"
- ]
+ "508795d94bb648eb91c40fa786b7ae90": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "iJ0nwY_g07iC"
- },
- "source": [
- "- Let's use `loadSavedModel` functon in `E5Embeddings` which allows us to load the ONNX model\n",
- "- Most params will be set automatically. They can also be set later after loading the model in `E5Embeddings` during runtime, so don't worry about setting them now\n",
- "- `loadSavedModel` accepts two params, first is the path to the exported model. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n",
- "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.\n"
- ]
+ "5127f6c9f27f45a281e3de25b798f442": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {
- "id": "2ArVWTRr07iD"
- },
- "outputs": [],
- "source": [
- "from sparknlp.annotator import *\n",
- "\n",
- "# All these params should be identical to the original ONNX model\n",
- "BGE = BGEEmbeddings.loadSavedModel(f\"{EXPORT_PATH}\", spark)\\\n",
- " .setInputCols([\"document\"])\\\n",
- " .setOutputCol(\"bge\")"
- ]
+ "53590c0136a44efb94ed26e30594552e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_ee0fa667215148858fd66cc301d1ada1",
+ "placeholder": "",
+ "style": "IPY_MODEL_faeec3bcda1b4682a9c5754394e7515c",
+ "value": " 438M/438M [00:04<00:00, 33.6MB/s]"
+ }
},
- {
- "cell_type": "code",
- "source": [
- "BGEEmbeddings"
+ "561900c921f04ab0870bd3b32badfbe1": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "574b1a52edf94c24b5bc9990a91a74da": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5b9976ca08014c5ebe13c299b280bdb1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "5c9c70b7f00c4afb958815a69aee0c69": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "5d5dbec6000a4ed1b2dca640d685bace": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "602e54ecffd94d599fa3bc0164c61b95": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "607fb88de6f44dd682878653c85a539e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "641bde89ac824839868903e74e921989": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "648d3d6bc327494cb2ccf0bf3286eb36": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_31a9b57e1773438dbeecf2ddf33ef99d",
+ "placeholder": "",
+ "style": "IPY_MODEL_a181a880c31b4e6993a7a2fc5cc5927a",
+ "value": "vocab.txt: 100%"
+ }
+ },
+ "6b37614be8144aaa8a370cfae6b4ab75": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "6c1a99fe6ec84ca7afd06ed5be648eeb": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "6ebe12e78b624355a69d62c827d7f085": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "7095df5dce6d4bb68fa0d87aab86f78f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "72ec65cd646f47b7b8787313ed308c4a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "78e7ad669cda4b0db40de79feec10453": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_508795d94bb648eb91c40fa786b7ae90",
+ "max": 711396,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_5d5dbec6000a4ed1b2dca640d685bace",
+ "value": 711396
+ }
+ },
+ "7998f7bec2c342c8b9d0622db8b47397": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "7b10f1b428904bc0905e54fc030a91e8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "7b1b0c121332424687393b0352878e3d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "87e79a2bd1814d05b72f339f9d28a9f7": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "88ccf9763afd48019f621b8953211a92": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_574b1a52edf94c24b5bc9990a91a74da",
+ "placeholder": "",
+ "style": "IPY_MODEL_7095df5dce6d4bb68fa0d87aab86f78f",
+ "value": "config.json: 100%"
+ }
+ },
+ "8af20d759d014ca78d2328346ba9c57b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_f0fc8b2e6ff840309c9a219ea217fcc0",
+ "placeholder": "",
+ "style": "IPY_MODEL_95c146a715a641f6996ecd333a78881d",
+ "value": " 719/719 [00:00<00:00, 9.30kB/s]"
+ }
+ },
+ "8d563f1b8401418cb650099070bd9b89": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_c4c1fec0be994ac89622f441e4ff3354",
+ "placeholder": "",
+ "style": "IPY_MODEL_7b10f1b428904bc0905e54fc030a91e8",
+ "value": "tokenizer_config.json: 100%"
+ }
+ },
+ "8de47e3311e24ee6b6b54054852ae7e7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "942546f9f9d34fc6b978d3efadeddb06": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "95c146a715a641f6996ecd333a78881d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "98ab41762ba1448a884693fa58dae307": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "98bdb7fefacf4f69ae6476621725637d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_648d3d6bc327494cb2ccf0bf3286eb36",
+ "IPY_MODEL_4dfad0c536844190889d5a09c0efde78",
+ "IPY_MODEL_4be5b724e5044ed5a617e9a763bc252e"
],
- "metadata": {
- "id": "zWw5xrO-53xk",
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "outputId": "58f812d5-e2a8-4ab0-cc37-1fa496213458"
- },
- "execution_count": 8,
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "sparknlp.annotator.embeddings.bge_embeddings.BGEEmbeddings"
- ]
- },
- "metadata": {},
- "execution_count": 8
- }
- ]
+ "layout": "IPY_MODEL_e84a92e1d713426c8fe341a623904c1c"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "F9TmAQx_07iD"
- },
- "source": [
- "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
- ]
+ "9ad4628cf6674e57955dc441145fda6d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {
- "id": "KsM5Ect607iD"
- },
- "outputs": [],
- "source": [
- "BGE.write().overwrite().save(f\"{MODEL_NAME}_spark_nlp\")"
- ]
+ "9e8ebe9169c64d95a732686184fed177": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_1ba47727b2d34d2cb19c2eacc94df028",
+ "placeholder": "",
+ "style": "IPY_MODEL_7998f7bec2c342c8b9d0622db8b47397",
+ "value": " 366/366 [00:00<00:00, 19.4kB/s]"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "-mSnWope07iD"
- },
- "source": [
- "Let's clean up stuff we don't need anymore"
- ]
+ "a181a880c31b4e6993a7a2fc5cc5927a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {
- "id": "AhmAF6KZ07iD"
- },
- "outputs": [],
- "source": [
- "!rm -rf {EXPORT_PATH}"
- ]
+ "a483a2cc4c7449db85992600938d1fd4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_87e79a2bd1814d05b72f339f9d28a9f7",
+ "max": 437955512,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_98ab41762ba1448a884693fa58dae307",
+ "value": 437955512
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "207B-CAx07iE"
- },
- "source": [
- "Awesome 😎 !\n",
- "\n",
- "This is your ONNX BGE model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀"
- ]
+ "a6bd88a906a74c2483fb5f1ef48ff445": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {
- "id": "vnWXwenx07iE",
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "outputId": "156ad10b-73c5-489d-e961-d9f3688b2705"
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "total 425676\n",
- "-rw-r--r-- 1 root root 435878170 Jan 1 09:41 bge_onnx\n",
- "drwxr-xr-x 3 root root 4096 Jan 1 09:41 fields\n",
- "drwxr-xr-x 2 root root 4096 Jan 1 09:41 metadata\n"
- ]
- }
+ "a87d5e5fa0a94fdc959435575a1f0b21": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a95909b2bc4841f4bd64dc9f4732d9c8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_6ebe12e78b624355a69d62c827d7f085",
+ "placeholder": "",
+ "style": "IPY_MODEL_5c9c70b7f00c4afb958815a69aee0c69",
+ "value": " 125/125 [00:00<00:00, 9.31kB/s]"
+ }
+ },
+ "b7f88f02a2994b78b3896ff819d319ad": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "c4c1fec0be994ac89622f441e4ff3354": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "caf7c06fa3b04d33ad38ffcac64c7dfe": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "cec3087741c14a54a25fa6228c28aae3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_88ccf9763afd48019f621b8953211a92",
+ "IPY_MODEL_38683dbe19364b19a005b184a924a72c",
+ "IPY_MODEL_8af20d759d014ca78d2328346ba9c57b"
],
- "source": [
- "! ls -l {MODEL_NAME}_spark_nlp"
- ]
+ "layout": "IPY_MODEL_dd80da9f70c041c186b2fe9a347e05ec"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "LUlYEn6c07iE"
- },
- "source": [
- "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny E5 model 😊"
- ]
+ "d6ed52dfbff341dba71ec72853407d80": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_561900c921f04ab0870bd3b32badfbe1",
+ "placeholder": "",
+ "style": "IPY_MODEL_edf3057d83c84f71afbd77a839fb1a77",
+ "value": "special_tokens_map.json: 100%"
+ }
},
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {
- "id": "Ltaki7iL07iE"
- },
- "outputs": [],
- "source": [
- "import sparknlp\n",
- "\n",
- "from sparknlp.base import *\n",
- "from sparknlp.annotator import *\n",
- "\n",
- "document_assembler = DocumentAssembler()\\\n",
- " .setInputCol(\"text\")\\\n",
- " .setOutputCol(\"document\")\n",
- "\n",
- "BGE_loaded = BGEEmbeddings.load(f\"{MODEL_NAME}_spark_nlp\")\\\n",
- " .setInputCols([\"document\"])\\\n",
- " .setOutputCol(\"bge\")\\\n",
- "\n",
- "pipeline = Pipeline(\n",
- " stages = [\n",
- " document_assembler,\n",
- " BGE_loaded\n",
- " ])\n",
- "\n",
- "data = spark.createDataFrame([['William Henry Gates III (born October 28, 1955) is an American business magnate, software developer, investor,and philanthropist.']]).toDF(\"text\")\n",
- "model = pipeline.fit(data)\n",
- "result = model.transform(data)"
- ]
+ "da3e4bc985454dfeac9072005ecc1476": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_caf7c06fa3b04d33ad38ffcac64c7dfe",
+ "max": 366,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_607fb88de6f44dd682878653c85a539e",
+ "value": 366
+ }
},
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {
- "id": "xwdX1Ler07iE",
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "outputId": "e26c6baa-2952-47af-e386-fdd9d103b5bf"
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "+--------------------+\n",
- "| embeddings|\n",
- "+--------------------+\n",
- "|[-0.18189742, -0....|\n",
- "+--------------------+\n",
- "\n"
- ]
- }
+ "dd80da9f70c041c186b2fe9a347e05ec": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e4fdba14d81f4709b4377885faacbc84": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e84a92e1d713426c8fe341a623904c1c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e8c9a619bf2a4e1a8ab88d04e8571f36": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e8fa9fb8456a4a499ae3ab348a89f0e5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_6c1a99fe6ec84ca7afd06ed5be648eeb",
+ "placeholder": "",
+ "style": "IPY_MODEL_a87d5e5fa0a94fdc959435575a1f0b21",
+ "value": "model.safetensors: 100%"
+ }
+ },
+ "edf3057d83c84f71afbd77a839fb1a77": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "ee0fa667215148858fd66cc301d1ada1": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ee18a8efc7564117b7243dcf266b6930": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_72ec65cd646f47b7b8787313ed308c4a",
+ "placeholder": "",
+ "style": "IPY_MODEL_942546f9f9d34fc6b978d3efadeddb06",
+ "value": " 711k/711k [00:00<00:00, 26.5MB/s]"
+ }
+ },
+ "ef655c7c3a08470fa85d00bc55b2bb51": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_e8fa9fb8456a4a499ae3ab348a89f0e5",
+ "IPY_MODEL_a483a2cc4c7449db85992600938d1fd4",
+ "IPY_MODEL_53590c0136a44efb94ed26e30594552e"
],
- "source": [
- "result.selectExpr(\"explode(bge.embeddings) as embeddings\").show()"
- ]
+ "layout": "IPY_MODEL_6b37614be8144aaa8a370cfae6b4ab75"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "BQSavS4z07iF"
- },
- "source": [
- "That's it! You can now go wild and use hundreds of E5 models from HuggingFace 🤗 in Spark NLP 🚀\n"
- ]
+ "f0fc8b2e6ff840309c9a219ea217fcc0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "source": [],
- "metadata": {
- "id": "F20-61Pd_rvB"
- },
- "execution_count": null,
- "outputs": []
- }
- ],
- "metadata": {
- "colab": {
- "provenance": [],
- "gpuType": "T4"
- },
- "kernelspec": {
- "display_name": "Python 3",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.9.16"
- },
- "accelerator": "GPU",
- "widgets": {
- "application/vnd.jupyter.widget-state+json": {
- "b6aeecd601db44c4b8d92abef34156c2": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_6f2b77719e3344b1954eaaf46e7d512e",
- "IPY_MODEL_73b4a51894114681abdc74131668e43e",
- "IPY_MODEL_2c75bce84ce749c797446623bfcfaf39"
- ],
- "layout": "IPY_MODEL_48cff192c62444a292343467b1eec46a"
- }
- },
- "6f2b77719e3344b1954eaaf46e7d512e": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_6ba1b72f64d94cd6878bce77ca47172b",
- "placeholder": "",
- "style": "IPY_MODEL_bf2876273c1b409f969cbee51441309b",
- "value": "config.json: 100%"
- }
- },
- "73b4a51894114681abdc74131668e43e": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_a4ca89a620284318996aa3ef1e1b9d28",
- "max": 719,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_26970bab519f4c2293bb2d88455909a7",
- "value": 719
- }
- },
- "2c75bce84ce749c797446623bfcfaf39": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_bc73afc3091d40ca9c61d9352611a7a7",
- "placeholder": "",
- "style": "IPY_MODEL_0e2c74dfa0364df6a4eb7d0b5f41e694",
- "value": " 719/719 [00:00<00:00, 40.7kB/s]"
- }
- },
- "48cff192c62444a292343467b1eec46a": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "6ba1b72f64d94cd6878bce77ca47172b": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "bf2876273c1b409f969cbee51441309b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "a4ca89a620284318996aa3ef1e1b9d28": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "26970bab519f4c2293bb2d88455909a7": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "bc73afc3091d40ca9c61d9352611a7a7": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "0e2c74dfa0364df6a4eb7d0b5f41e694": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "a14c903e07e84eebbef3d4da3f9ffb53": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_563eba9b7c924ba9ad0e8abdb139a8e2",
- "IPY_MODEL_82f71bd255b6416f9808f5805f017a26",
- "IPY_MODEL_1c126c817ec941c58914112992764f24"
- ],
- "layout": "IPY_MODEL_394c30efbf924843869e9073a95af196"
- }
- },
- "563eba9b7c924ba9ad0e8abdb139a8e2": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_fdb324b3829f4a0791ebcfc11499062a",
- "placeholder": "",
- "style": "IPY_MODEL_d0d4e25e98f941ad91481b1d3e0aeb7b",
- "value": "model.safetensors: 100%"
- }
- },
- "82f71bd255b6416f9808f5805f017a26": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_3e8ceaae9fe7468a9e14b2bbab3ece53",
- "max": 437955512,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_34a28ead887e450b802ea8be16c0f592",
- "value": 437955512
- }
- },
- "1c126c817ec941c58914112992764f24": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_086dbc2868c0488b9d9dc5d3c8972133",
- "placeholder": "",
- "style": "IPY_MODEL_9221a3d70fb644749e9828ea60b6ee3d",
- "value": " 438M/438M [00:11<00:00, 39.8MB/s]"
- }
- },
- "394c30efbf924843869e9073a95af196": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "fdb324b3829f4a0791ebcfc11499062a": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "d0d4e25e98f941ad91481b1d3e0aeb7b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "3e8ceaae9fe7468a9e14b2bbab3ece53": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "34a28ead887e450b802ea8be16c0f592": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "086dbc2868c0488b9d9dc5d3c8972133": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "9221a3d70fb644749e9828ea60b6ee3d": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "b94853f0f9c6437f82170bf67e9debc9": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_0dac17a4e0304f33b1ad997590a663ad",
- "IPY_MODEL_9f785d2b81c54695997e370210a61226",
- "IPY_MODEL_26fa5822a4974257b8862581a60e3179"
- ],
- "layout": "IPY_MODEL_cf7cfc607b2041a993df35b525aa0d45"
- }
- },
- "0dac17a4e0304f33b1ad997590a663ad": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_fa84fc0acea44fde89aa9ffd93a5aa9a",
- "placeholder": "",
- "style": "IPY_MODEL_0b9da9243f9e49bdb6411c131a7e4682",
- "value": "tokenizer_config.json: 100%"
- }
- },
- "9f785d2b81c54695997e370210a61226": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_cec343ddabf94dd0832e4ac7aec14ecc",
- "max": 366,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_82e0ea7d46b5465ebc8faad3beab43b5",
- "value": 366
- }
- },
- "26fa5822a4974257b8862581a60e3179": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_fb7485359aa74e82963d8255b41cf81f",
- "placeholder": "",
- "style": "IPY_MODEL_aee26846c0634c9b8ae61b4dc14ed675",
- "value": " 366/366 [00:00<00:00, 26.2kB/s]"
- }
- },
- "cf7cfc607b2041a993df35b525aa0d45": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "fa84fc0acea44fde89aa9ffd93a5aa9a": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "0b9da9243f9e49bdb6411c131a7e4682": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "cec343ddabf94dd0832e4ac7aec14ecc": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "82e0ea7d46b5465ebc8faad3beab43b5": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "fb7485359aa74e82963d8255b41cf81f": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "aee26846c0634c9b8ae61b4dc14ed675": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "e1e339b3d7a14048b0fc35b7d05cb60a": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_b0a1cd96c31848b78b150258b0637a82",
- "IPY_MODEL_84ba32d3a129477591e78264ddf22cb7",
- "IPY_MODEL_8bdeba24a5b0463abcaa0213674b388b"
- ],
- "layout": "IPY_MODEL_82052250508f40b483fd5d5a2e3bc316"
- }
- },
- "b0a1cd96c31848b78b150258b0637a82": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_5bc65236ab3443eb89b761452f5f776c",
- "placeholder": "",
- "style": "IPY_MODEL_30696b2ee77846149f4b0d88f9d1cc32",
- "value": "vocab.txt: 100%"
- }
- },
- "84ba32d3a129477591e78264ddf22cb7": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_7c03e098f4aa4e61bc13da78d6b7b7d8",
- "max": 231508,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_de6ef93762a047998bb0d888d2a2720a",
- "value": 231508
- }
- },
- "8bdeba24a5b0463abcaa0213674b388b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_291ce98198874d7ebac0affca544861c",
- "placeholder": "",
- "style": "IPY_MODEL_749855c3e9ac4566a509d834204ab0e4",
- "value": " 232k/232k [00:00<00:00, 1.42MB/s]"
- }
- },
- "82052250508f40b483fd5d5a2e3bc316": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "5bc65236ab3443eb89b761452f5f776c": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "30696b2ee77846149f4b0d88f9d1cc32": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "7c03e098f4aa4e61bc13da78d6b7b7d8": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "de6ef93762a047998bb0d888d2a2720a": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "291ce98198874d7ebac0affca544861c": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "749855c3e9ac4566a509d834204ab0e4": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "82438bb56d4841eaa7e4ff609e79edd9": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_2aff7e730cce4d468136bab9c735d2e9",
- "IPY_MODEL_517eb17af21949908b597e76f1c835b4",
- "IPY_MODEL_0600b51130bc4f5bbe68e38bb5662ce1"
- ],
- "layout": "IPY_MODEL_0b0f81687aac4432bdc06a75bcc53320"
- }
- },
- "2aff7e730cce4d468136bab9c735d2e9": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_f013cfd30cd948159d6a3216dc00ab94",
- "placeholder": "",
- "style": "IPY_MODEL_21f7d6700ffe4ce09d67147c17d13f20",
- "value": "tokenizer.json: 100%"
- }
- },
- "517eb17af21949908b597e76f1c835b4": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_69ac10476cd5458a8b06333a559c9d28",
- "max": 711396,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_2e3a3b76638c44d481569a7d16d71cb7",
- "value": 711396
- }
- },
- "0600b51130bc4f5bbe68e38bb5662ce1": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_e0112abb7d0643f981094fb8ed67390f",
- "placeholder": "",
- "style": "IPY_MODEL_0d7d08ec18b04866ae4c9f0870d8d8f6",
- "value": " 711k/711k [00:00<00:00, 2.18MB/s]"
- }
- },
- "0b0f81687aac4432bdc06a75bcc53320": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "f013cfd30cd948159d6a3216dc00ab94": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "21f7d6700ffe4ce09d67147c17d13f20": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "69ac10476cd5458a8b06333a559c9d28": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "2e3a3b76638c44d481569a7d16d71cb7": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "e0112abb7d0643f981094fb8ed67390f": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "0d7d08ec18b04866ae4c9f0870d8d8f6": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "85542508f8fa4545a8b48829d5b69ded": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_1338f4e57b554fafba3d0fb2acf59b02",
- "IPY_MODEL_bfcf2b27861a4c7b9451d6dc6ef600b6",
- "IPY_MODEL_fdf60696d8be49ae9f9d98200b7c35e8"
- ],
- "layout": "IPY_MODEL_3febcb4b4101454eafbe2be02b06d7b4"
- }
- },
- "1338f4e57b554fafba3d0fb2acf59b02": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_fcaf78ec75af4b76a95de9d73194516d",
- "placeholder": "",
- "style": "IPY_MODEL_4b21dc70cd214d64ad2da330f38e1638",
- "value": "special_tokens_map.json: 100%"
- }
- },
- "bfcf2b27861a4c7b9451d6dc6ef600b6": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_66652fb210ff46cea975b7f02c34df5d",
- "max": 125,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_7fa5ffc59fb14dc39feac160a10bce66",
- "value": 125
- }
- },
- "fdf60696d8be49ae9f9d98200b7c35e8": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_03a985766f57423991c4a416d487b451",
- "placeholder": "",
- "style": "IPY_MODEL_a40c77d69fe641c298616c62381faaf1",
- "value": " 125/125 [00:00<00:00, 5.69kB/s]"
- }
- },
- "3febcb4b4101454eafbe2be02b06d7b4": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "fcaf78ec75af4b76a95de9d73194516d": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "4b21dc70cd214d64ad2da330f38e1638": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "66652fb210ff46cea975b7f02c34df5d": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "7fa5ffc59fb14dc39feac160a10bce66": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "03a985766f57423991c4a416d487b451": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "a40c77d69fe641c298616c62381faaf1": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- }
- }
+ "faeec3bcda1b4682a9c5754394e7515c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "fbe3133d96334b3588ccdc747a4dc860": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_5127f6c9f27f45a281e3de25b798f442",
+ "placeholder": "",
+ "style": "IPY_MODEL_5b9976ca08014c5ebe13c299b280bdb1",
+ "value": "tokenizer.json: 100%"
+ }
+ },
+ "ff2effbb2e10466e850209f56c354636": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_d6ed52dfbff341dba71ec72853407d80",
+ "IPY_MODEL_05843a7fbe90487395d085273a862474",
+ "IPY_MODEL_a95909b2bc4841f4bd64dc9f4732d9c8"
+ ],
+ "layout": "IPY_MODEL_a6bd88a906a74c2483fb5f1ef48ff445"
+ }
}
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
\ No newline at end of file
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BertForQuestionAnswering.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BertForQuestionAnswering.ipynb
index 7972181e127b7e..4827be2fa9373f 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BertForQuestionAnswering.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BertForQuestionAnswering.ipynb
@@ -36,7 +36,7 @@
"metadata": {},
"source": [
"- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n",
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n",
"- Albert uses SentencePiece, so we will have to install that as well"
]
},
@@ -116,7 +116,7 @@
}
],
"source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum sentencepiece tensorflow"
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum sentencepiece tensorflow"
]
},
{
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BertForSequenceClassification.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BertForSequenceClassification.ipynb
index 7f27dd9ef33c49..9d29064eb44abc 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BertForSequenceClassification.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BertForSequenceClassification.ipynb
@@ -36,7 +36,7 @@
"metadata": {},
"source": [
"- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n",
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n",
"- Albert uses SentencePiece, so we will have to install that as well"
]
},
@@ -121,7 +121,7 @@
}
],
"source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum sentencepiece tensorflow"
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum sentencepiece tensorflow"
]
},
{
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BertForTokenClassification.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BertForTokenClassification.ipynb
index 43b9511376ff72..a42aa763837c37 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BertForTokenClassification.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BertForTokenClassification.ipynb
@@ -6,7 +6,7 @@
"source": [
"![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
"\n",
- "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace%20ONNX%20in%20Spark%20NLP%20-%20AlbertForTokenClassification.ipynb)"
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace%20ONNX%20in%20Spark%20NLP%20-%20BertForTokenClassification.ipynb)"
]
},
{
@@ -116,7 +116,7 @@
}
],
"source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum sentencepiece tensorflow"
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum sentencepiece tensorflow"
]
},
{
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BertForZeroShotClassification.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BertForZeroShotClassification.ipynb
index 00bdadfdf0f31a..5fc57b584455a8 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BertForZeroShotClassification.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BertForZeroShotClassification.ipynb
@@ -1,2532 +1,2383 @@
{
- "cells": [
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
+ "\n",
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BertForZeroShotClassification.ipynb)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Import ONNX BertForZeroShotClassification models from HuggingFace 🤗 into Spark NLP 🚀\n",
+ "\n",
+ "Let's keep in mind a few things before we start 😊\n",
+ "\n",
+ "- ONNX support was introduced in `Spark NLP 5.0.0`, enabling high performance inference for models.\n",
+ "- `BertForZeroShotClassification ` is only available since in `Spark NLP 5.2.4` and after. So please make sure you have upgraded to the latest Spark NLP release\n",
+ "- You can import BERT models trained/fine-tuned for zero shot classification via `BertForSequenceClassification` or `TFBertForSequenceClassification`. These models are usually under `Zero-Shot Classification` category and have `bert` in their labels\n",
+ "- Reference: [TFBertForSequenceClassification](https://huggingface.co/transformers/model_doc/bert.html#tfbertforsequenceclassification)\n",
+ "- Some [example models](https://huggingface.co/models?pipeline_tag=zero-shot-classification&sort=downloads&search=bert)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Export and Save HuggingFace model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n",
+ "- Albert uses SentencePiece, so we will have to install that as well"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "tIhDjN37_WEc"
- },
- "source": [
- "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
- "\n",
- "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BertForZeroShotClassification.ipynb)"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.1/7.1 MB\u001b[0m \u001b[31m9.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m402.5/402.5 kB\u001b[0m \u001b[31m36.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m475.2/475.2 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m81.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.8/455.8 kB\u001b[0m \u001b[31m41.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m95.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m23.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m4.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m507.1/507.1 kB\u001b[0m \u001b[31m40.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.7/15.7 MB\u001b[0m \u001b[31m79.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m92.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m72.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m88.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m81.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m72.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m98.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m79.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m104.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m87.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m80.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m104.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m77.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m104.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m475.2/475.2 MB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.9/489.9 MB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m47.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m26.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m42.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m76.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.8/489.8 MB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m479.7/479.7 MB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m85.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m102.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.8/440.8 kB\u001b[0m \u001b[31m41.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m524.1/524.1 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m1.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m78.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m81.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m37.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m110.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m781.3/781.3 kB\u001b[0m \u001b[31m50.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m105.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m73.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m588.3/588.3 MB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m27.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m59.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m97.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m439.2/439.2 kB\u001b[0m \u001b[31m16.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m98.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.8/83.8 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.7/455.7 kB\u001b[0m \u001b[31m40.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m39.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m40.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m451.2/451.2 kB\u001b[0m \u001b[31m37.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m60.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m64.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m61.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m71.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.1/13.1 MB\u001b[0m \u001b[31m91.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m465.0 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+ "pandas-gbq 0.19.2 requires google-auth-oauthlib>=0.7.0, but you have google-auth-oauthlib 0.4.6 which is incompatible.\n",
+ "tensorflow-datasets 4.9.4 requires protobuf>=3.20, but you have protobuf 3.19.6 which is incompatible.\n",
+ "tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 3.19.6 which is incompatible.\u001b[0m\u001b[31m\n",
+ "\u001b[0m"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum sentencepiece tensorflow"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
+ "- We'll use [NbAiLab/nb-bert-base-mnli](https://huggingface.co/NbAiLab/nb-bert-base-mnli) model from HuggingFace as an example and load it as a `ORTModelForSequenceClassification`, representing an ONNX model."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "jtvJkUO7_WEf"
- },
- "source": [
- "## Import ONNX BertForZeroShotClassification models from HuggingFace 🤗 into Spark NLP 🚀\n",
- "\n",
- "Let's keep in mind a few things before we start 😊\n",
- "\n",
- "- ONNX support was introduced in `Spark NLP 5.0.0`, enabling high performance inference for models.\n",
- "- `BertForZeroShotClassification ` is only available since in `Spark NLP 5.2.4` and after. So please make sure you have upgraded to the latest Spark NLP release\n",
- "- You can import BERT models trained/fine-tuned for zero shot classification via `BertForSequenceClassification` or `TFBertForSequenceClassification`. These models are usually under `Zero-Shot Classification` category and have `bert` in their labels\n",
- "- Reference: [TFBertForSequenceClassification](https://huggingface.co/transformers/model_doc/bert.html#tfbertforsequenceclassification)\n",
- "- Some [example models](https://huggingface.co/models?pipeline_tag=zero-shot-classification&sort=downloads&search=bert)"
- ]
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n",
+ "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
+ "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
+ "You will be able to reuse this secret in all of your notebooks.\n",
+ "Please note that authentication is recommended but still optional to access public models or datasets.\n",
+ " warnings.warn(\n"
+ ]
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "Biy6z0oM_WEg"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "dc2010ab9c8b4ce2a61fd2f5bf584466",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "## Export and Save HuggingFace model"
+ "text/plain": [
+ "config.json: 0%| | 0.00/639 [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "ey3A3ToN_WEh"
- },
- "source": [
- "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n",
- "- Albert uses SentencePiece, so we will have to install that as well"
- ]
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Framework not specified. Using pt to export to ONNX.\n"
+ ]
},
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "UFhSwQbI_WEh",
- "outputId": "ede67969-5420-41c5-9b69-a82937749bb4",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "3a713cbb11ee47ca8a1c5e09c5e2134c",
+ "version_major": 2,
+ "version_minor": 0
},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.1/7.1 MB\u001b[0m \u001b[31m9.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m402.5/402.5 kB\u001b[0m \u001b[31m36.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m475.2/475.2 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m81.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.8/455.8 kB\u001b[0m \u001b[31m41.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m95.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m23.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m4.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m507.1/507.1 kB\u001b[0m \u001b[31m40.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.7/15.7 MB\u001b[0m \u001b[31m79.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m92.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m72.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m88.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m81.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m72.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m98.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m79.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m104.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m87.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m80.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m104.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m77.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m104.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m475.2/475.2 MB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.9/489.9 MB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m47.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m26.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m42.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m76.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.8/489.8 MB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m479.7/479.7 MB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m85.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m102.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.8/440.8 kB\u001b[0m \u001b[31m41.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m524.1/524.1 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m1.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m78.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m81.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m37.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m110.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m781.3/781.3 kB\u001b[0m \u001b[31m50.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m105.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m73.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m588.3/588.3 MB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m27.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m59.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m97.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m439.2/439.2 kB\u001b[0m \u001b[31m16.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m98.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.8/83.8 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.7/455.7 kB\u001b[0m \u001b[31m40.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m39.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m40.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m451.2/451.2 kB\u001b[0m \u001b[31m37.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m60.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m64.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m61.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m71.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.1/13.1 MB\u001b[0m \u001b[31m91.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m465.0 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
- "pandas-gbq 0.19.2 requires google-auth-oauthlib>=0.7.0, but you have google-auth-oauthlib 0.4.6 which is incompatible.\n",
- "tensorflow-datasets 4.9.4 requires protobuf>=3.20, but you have protobuf 3.19.6 which is incompatible.\n",
- "tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 3.19.6 which is incompatible.\u001b[0m\u001b[31m\n",
- "\u001b[0m"
- ]
- }
- ],
- "source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum sentencepiece tensorflow"
+ "text/plain": [
+ "model.safetensors: 0%| | 0.00/438M [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "27pTsBbz_WEj"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "46ce6bbe733144b493460694864e042f",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
- "- We'll use [NbAiLab/nb-bert-base-mnli](https://huggingface.co/NbAiLab/nb-bert-base-mnli) model from HuggingFace as an example and load it as a `ORTModelForSequenceClassification`, representing an ONNX model."
+ "text/plain": [
+ "tokenizer_config.json: 0%| | 0.00/48.0 [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "hbJ9I-2f_WEk",
- "outputId": "d224905d-1609-41d5-a33d-3796925f7f6c",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 405,
- "referenced_widgets": [
- "dc2010ab9c8b4ce2a61fd2f5bf584466",
- "557b68d5c6bd47c586571bb6d03fb75b",
- "fb27d436ed9b41b3813afdabc7e7168e",
- "608a47e426c44acea7e60f8b386e0677",
- "d046f93abb3945c9beec40f5b5e7034d",
- "2650006b821e493abfb0005f680b4bbb",
- "5ee1cbef2ffe4cc2b1af445726a25871",
- "cc1743bea22342ff9f81223b231ac387",
- "4d4920f93b2c4f1c96670ac9396fb244",
- "a273ab182aab40e79a1f3b8517452322",
- "31926af1bcb341f1805e31c9f6a105a6",
- "3a713cbb11ee47ca8a1c5e09c5e2134c",
- "f9a44a68c55546a6bd1425f649e84fce",
- "a65ebf8907284391bd098d8906997a65",
- "5a00dcdd1b3840538bbdffe4c9dfccac",
- "fea490f5d6ce4b0cb044aed1fcac40c6",
- "31e8635f8ed543fe96980f24c1977435",
- "bd84a229a7e2423e9afff844346027a6",
- "b637bec487a949dbad7283eedc25cf51",
- "081a21e0131647a4a934b80ce35f99fe",
- "8d554fc5ff7e4a149fccda1e5d197326",
- "a581cd1016fb4d9083039b55162d03c8",
- "46ce6bbe733144b493460694864e042f",
- "892540858af848e2989805c7facf9eaa",
- "8bc01972359142c6b20c98fee0608c26",
- "9fb30845b7e245829867afeaac10e8a1",
- "a04cbc3089c24b8d9056ed56dcb383fe",
- "698707314ce945198dbf494154d0d3d9",
- "76891611ea974bed8ce9825197d6ede6",
- "4a79b68ed33047edb71fcc08d4e098ad",
- "110a9c4ba8144b23bec988991c50ea69",
- "1b35a6c060fb45bb8a7c50e4e58b3e70",
- "7cfb8103d0914f8db95a98d0298241c1",
- "4bbc0cfc3e194e58b688d1ddad08e355",
- "05e2cc6a1b8d445b9f4c75b8eafc8569",
- "2f9b2816622b4ea78d333b28d2e1d528",
- "eac29afa1787461d80b19843a322b35b",
- "600ef6e0875047feb0edeb29f1e2cec4",
- "9ff55de49cfb4c5da242f79c29d765c0",
- "2ca6b6198d1247a086ad6c77ef98745d",
- "5c9ee9d2bfff4dd9bf111523b15f28b8",
- "4fc274d64c994e19851775ff1f5b7bfd",
- "98ccf59935c148af9266163fa8e12f36",
- "18cba1a7ebf54e389b0512644fd4eb8d",
- "ef611b4c4adf4cdfb9e118d93fbe346a",
- "ce68fc9368cb4860a3c65fb33a3b9362",
- "a82f20d3bade4075a67de714763267d3",
- "2b7c0e9b54a04512be2914256c92a8b0",
- "f96f6adfbec643c5aed81d1d3df80daa",
- "2e6d65b7f9e943c294b604769f714bdb",
- "e6ca865f09b440f0b646f74b85a930c2",
- "7c20f9b4aa514244b413026b484773be",
- "8c3c8bc1c60c4be1b2ce6eb91fb3d80c",
- "374ab98ad46c4518b0800c58d12702b5",
- "fc725112860647558d1c3614ada8be40"
- ]
- }
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "4bbc0cfc3e194e58b688d1ddad08e355",
+ "version_major": 2,
+ "version_minor": 0
},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n",
- "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
- "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
- "You will be able to reuse this secret in all of your notebooks.\n",
- "Please note that authentication is recommended but still optional to access public models or datasets.\n",
- " warnings.warn(\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "config.json: 0%| | 0.00/639 [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "dc2010ab9c8b4ce2a61fd2f5bf584466"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "Framework not specified. Using pt to export to ONNX.\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "model.safetensors: 0%| | 0.00/438M [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "3a713cbb11ee47ca8a1c5e09c5e2134c"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "tokenizer_config.json: 0%| | 0.00/48.0 [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "46ce6bbe733144b493460694864e042f"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "vocab.txt: 0%| | 0.00/232k [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "4bbc0cfc3e194e58b688d1ddad08e355"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "special_tokens_map.json: 0%| | 0.00/112 [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "ef611b4c4adf4cdfb9e118d93fbe346a"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "Using the export variant default. Available variants are:\n",
- " - default: The default ONNX variant.\n",
- "Using framework PyTorch: 2.1.0+cu121\n",
- "Overriding 1 configuration item(s)\n",
- "\t- use_cache -> False\n"
- ]
- }
- ],
- "source": [
- "from optimum.onnxruntime import ORTModelForSequenceClassification\n",
- "import tensorflow as tf\n",
- "\n",
- "MODEL_NAME = 'aloxatel/bert-base-mnli'\n",
- "EXPORT_PATH = f\"onnx_models/{MODEL_NAME}\"\n",
- "\n",
- "ort_model = ORTModelForSequenceClassification.from_pretrained(MODEL_NAME, export=True)\n",
- "\n",
- "# Save the ONNX model\n",
- "ort_model.save_pretrained(EXPORT_PATH)"
+ "text/plain": [
+ "vocab.txt: 0%| | 0.00/232k [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "0DY3p-DH_WEo"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "ef611b4c4adf4cdfb9e118d93fbe346a",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "Let's have a look inside these two directories and see what we are dealing with:"
+ "text/plain": [
+ "special_tokens_map.json: 0%| | 0.00/112 [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "CVyszhaS_WEp",
- "outputId": "88ddabc8-f31a-4065-e98a-cdbfdfecf8cd",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "total 428876\n",
- "-rw-r--r-- 1 root root 813 Feb 1 10:15 config.json\n",
- "-rw-r--r-- 1 root root 438204942 Feb 1 10:15 model.onnx\n",
- "-rw-r--r-- 1 root root 125 Feb 1 10:15 special_tokens_map.json\n",
- "-rw-r--r-- 1 root root 366 Feb 1 10:15 tokenizer_config.json\n",
- "-rw-r--r-- 1 root root 711396 Feb 1 10:15 tokenizer.json\n",
- "-rw-r--r-- 1 root root 231508 Feb 1 10:15 vocab.txt\n"
- ]
- }
- ],
- "source": [
- "!ls -l {EXPORT_PATH}"
- ]
- },
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Using the export variant default. Available variants are:\n",
+ " - default: The default ONNX variant.\n",
+ "Using framework PyTorch: 2.1.0+cu121\n",
+ "Overriding 1 configuration item(s)\n",
+ "\t- use_cache -> False\n"
+ ]
+ }
+ ],
+ "source": [
+ "from optimum.onnxruntime import ORTModelForSequenceClassification\n",
+ "import tensorflow as tf\n",
+ "\n",
+ "MODEL_NAME = 'aloxatel/bert-base-mnli'\n",
+ "EXPORT_PATH = f\"onnx_models/{MODEL_NAME}\"\n",
+ "\n",
+ "ort_model = ORTModelForSequenceClassification.from_pretrained(MODEL_NAME, export=True)\n",
+ "\n",
+ "# Save the ONNX model\n",
+ "ort_model.save_pretrained(EXPORT_PATH)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's have a look inside these two directories and see what we are dealing with:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "HKR22Lek_WEp"
- },
- "source": [
- "- As you can see, we need to move `vocabs.txt` from the tokenizer to assets folder which Spark NLP will look for\n",
- "- We also need `labels` and their `ids` which is saved inside the model's config. We will save this inside `labels.txt`"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "total 428876\n",
+ "-rw-r--r-- 1 root root 813 Feb 1 10:15 config.json\n",
+ "-rw-r--r-- 1 root root 438204942 Feb 1 10:15 model.onnx\n",
+ "-rw-r--r-- 1 root root 125 Feb 1 10:15 special_tokens_map.json\n",
+ "-rw-r--r-- 1 root root 366 Feb 1 10:15 tokenizer_config.json\n",
+ "-rw-r--r-- 1 root root 711396 Feb 1 10:15 tokenizer.json\n",
+ "-rw-r--r-- 1 root root 231508 Feb 1 10:15 vocab.txt\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls -l {EXPORT_PATH}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- As you can see, we need to move `vocabs.txt` from the tokenizer to assets folder which Spark NLP will look for\n",
+ "- We also need `labels` and their `ids` which is saved inside the model's config. We will save this inside `labels.txt`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mkdir {EXPORT_PATH}/assets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# get label2id dictionary\n",
+ "labels = ort_model.config.id2label\n",
+ "# sort the dictionary based on the id\n",
+ "labels = [value for key,value in sorted(labels.items(), reverse=False)]\n",
+ "\n",
+ "with open(EXPORT_PATH + '/assets/labels.txt', 'w') as f:\n",
+ " f.write('\\n'.join(labels))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mv {EXPORT_PATH}/vocab.txt {EXPORT_PATH}/assets"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Voila! We have our `vocab.txt` and `labels.txt` inside assets directory"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "0Fz5W4s5_WEq"
- },
- "outputs": [],
- "source": [
- "!mkdir {EXPORT_PATH}/assets"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "onnx_models/aloxatel/bert-base-mnli:\n",
+ "total 428652\n",
+ "drwxr-xr-x 2 root root 4096 Feb 1 10:15 assets\n",
+ "-rw-r--r-- 1 root root 813 Feb 1 10:15 config.json\n",
+ "-rw-r--r-- 1 root root 438204942 Feb 1 10:15 model.onnx\n",
+ "-rw-r--r-- 1 root root 125 Feb 1 10:15 special_tokens_map.json\n",
+ "-rw-r--r-- 1 root root 366 Feb 1 10:15 tokenizer_config.json\n",
+ "-rw-r--r-- 1 root root 711396 Feb 1 10:15 tokenizer.json\n",
+ "\n",
+ "onnx_models/aloxatel/bert-base-mnli/assets:\n",
+ "total 232\n",
+ "-rw-r--r-- 1 root root 32 Feb 1 10:15 labels.txt\n",
+ "-rw-r--r-- 1 root root 231508 Feb 1 10:15 vocab.txt\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls -lR {EXPORT_PATH}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Import and Save BertForZeroShotClassification in Spark NLP\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's install and setup Spark NLP in Google Colab\n",
+ "- This part is pretty easy via our simple script"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "fFR7rLmp_WEq"
- },
- "outputs": [],
- "source": [
- "# get label2id dictionary\n",
- "labels = ort_model.config.id2label\n",
- "# sort the dictionary based on the id\n",
- "labels = [value for key,value in sorted(labels.items(), reverse=False)]\n",
- "\n",
- "with open(EXPORT_PATH + '/assets/labels.txt', 'w') as f:\n",
- " f.write('\\n'.join(labels))"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "--2023-09-29 19:41:03-- http://setup.johnsnowlabs.com/colab.sh\n",
+ "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n",
+ "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n",
+ "HTTP request sent, awaiting response... 302 Moved Temporarily\n",
+ "Location: https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n",
+ "--2023-09-29 19:41:04-- https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n",
+ "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.110.133, 185.199.109.133, ...\n",
+ "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.\n",
+ "HTTP request sent, awaiting response... 200 OK\n",
+ "Length: 1191 (1.2K) [text/plain]\n",
+ "Saving to: ‘STDOUT’\n",
+ "\n",
+ "- 100%[===================>] 1.16K --.-KB/s in 0s \n",
+ "\n",
+ "2023-09-29 19:41:04 (106 MB/s) - written to stdout [1191/1191]\n",
+ "\n",
+ "Installing PySpark 3.2.3 and Spark NLP 5.1.2\n",
+ "setup Colab for PySpark 3.2.3 and Spark NLP 5.1.2\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 MB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m536.3/536.3 kB\u001b[0m \u001b[31m38.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.7/199.7 kB\u001b[0m \u001b[31m19.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
+ ]
+ }
+ ],
+ "source": [
+ "! wget http://setup.johnsnowlabs.com/colab.sh -O - | bash"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's start Spark with Spark NLP included via our simple `start()` function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "4vOLnld4_WEq"
- },
- "outputs": [],
- "source": [
- "!mv {EXPORT_PATH}/vocab.txt {EXPORT_PATH}/assets"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Apache Spark version: 3.2.3\n"
+ ]
+ }
+ ],
+ "source": [
+ "import sparknlp\n",
+ "# let's start Spark with Spark NLP\n",
+ "spark = sparknlp.start()\n",
+ "\n",
+ "print(\"Apache Spark version: {}\".format(spark.version))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's use `loadSavedModel` functon in `BertForZeroShotClassification` which allows us to load TensorFlow model in SavedModel format\n",
+ "- Most params can be set later when you are loading this model in `BertForZeroShotClassification` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n",
+ "- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n",
+ "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sparknlp.annotator import *\n",
+ "from sparknlp.base import *\n",
+ "\n",
+ "zero_shot_classifier = BertForZeroShotClassification.loadSavedModel(\n",
+ " EXPORT_PATH,\n",
+ " spark\n",
+ " )\\\n",
+ " .setInputCols([\"document\", \"token\"]) \\\n",
+ " .setOutputCol(\"class\") \\\n",
+ " .setCandidateLabels([\"urgent\", \"mobile\", \"travel\", \"movie\", \"music\", \"sport\", \"weather\", \"technology\"])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "zero_shot_classifier.write().overwrite().save(\"./{}_spark_nlp_onnx\".format(MODEL_NAME))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's clean up stuff we don't need anymore"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!rm -rf {EXPORT_PATH}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Awesome 😎 !\n",
+ "\n",
+ "This is your BertForZeroShotClassification model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "hNPl3hqx_WEr"
- },
- "source": [
- "Voila! We have our `vocab.txt` and `labels.txt` inside assets directory"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "total 429464\n",
+ "-rw-r--r-- 1 root root 439759046 Sep 29 19:42 bert_classification_onnx\n",
+ "drwxr-xr-x 4 root root 4096 Sep 29 19:42 fields\n",
+ "drwxr-xr-x 2 root root 4096 Sep 29 19:42 metadata\n"
+ ]
+ }
+ ],
+ "source": [
+ "! ls -l {MODEL_NAME}_spark_nlp_onnx"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny BertForZeroShotClassification model 😊"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "zero_shot_classifier_loaded = BertForZeroShotClassification.load(\"./{}_spark_nlp_onnx\".format(MODEL_NAME))\\\n",
+ " .setInputCols([\"document\",'token'])\\\n",
+ " .setOutputCol(\"class\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You can see what labels were used to train this model via `getClasses` function:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "uQNah_q7_WEr",
- "outputId": "157c7f9e-9568-494c-d7c4-aa90d49942ee",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "onnx_models/aloxatel/bert-base-mnli:\n",
- "total 428652\n",
- "drwxr-xr-x 2 root root 4096 Feb 1 10:15 assets\n",
- "-rw-r--r-- 1 root root 813 Feb 1 10:15 config.json\n",
- "-rw-r--r-- 1 root root 438204942 Feb 1 10:15 model.onnx\n",
- "-rw-r--r-- 1 root root 125 Feb 1 10:15 special_tokens_map.json\n",
- "-rw-r--r-- 1 root root 366 Feb 1 10:15 tokenizer_config.json\n",
- "-rw-r--r-- 1 root root 711396 Feb 1 10:15 tokenizer.json\n",
- "\n",
- "onnx_models/aloxatel/bert-base-mnli/assets:\n",
- "total 232\n",
- "-rw-r--r-- 1 root root 32 Feb 1 10:15 labels.txt\n",
- "-rw-r--r-- 1 root root 231508 Feb 1 10:15 vocab.txt\n"
- ]
- }
- ],
- "source": [
- "!ls -lR {EXPORT_PATH}"
+ "data": {
+ "text/plain": [
+ "['NEU', 'POS', 'NEG']"
]
- },
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# .getClasses was introduced in spark-nlp==3.4.0\n",
+ "zero_shot_classifier_loaded.getClasses()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This is how you can use your loaded classifier model in Spark NLP 🚀 pipeline:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "0s4GZ9SD_WEr"
- },
- "source": [
- "## Import and Save BertForZeroShotClassification in Spark NLP\n"
- ]
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "+------------------+------+\n",
+ "| text|result|\n",
+ "+------------------+------+\n",
+ "|Te quiero. Te amo.| [POS]|\n",
+ "+------------------+------+\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sparknlp.base import *\n",
+ "from sparknlp.annotator import *\n",
+ "from pyspark.ml import Pipeline, PipelineModel\n",
+ "\n",
+ "document_assembler = DocumentAssembler() \\\n",
+ " .setInputCol(\"text\") \\\n",
+ " .setOutputCol(\"document\")\n",
+ "\n",
+ "tokenizer = Tokenizer().setInputCols(\"document\").setOutputCol(\"token\")\n",
+ "\n",
+ "pipeline = Pipeline(stages=[\n",
+ " document_assembler,\n",
+ " tokenizer,\n",
+ " zero_shot_classifier_loaded\n",
+ "])\n",
+ "\n",
+ "text = [[\"I have a problem with my iphone that needs to be resolved asap!!\"],\n",
+ " [\"Last week I upgraded my iOS version and ever since then my phone has been overheating whenever I use your app.\"],\n",
+ " [\"I have a phone and I love it!\"],\n",
+ " [\"I really want to visit Germany and I am planning to go there next year.\"],\n",
+ " [\"Let's watch some movies tonight! I am in the mood for a horror movie.\"],\n",
+ " [\"Have you watched the match yesterday? It was a great game!\"],\n",
+ " [\"We need to harry up and get to the airport. We are going to miss our flight!\"]]\n",
+ "\n",
+ "# create a DataFrame in PySpark\n",
+ "inputDataset = spark.createDataFrame(text, [\"text\"])\n",
+ "model = pipeline.fit(inputDataset)\n",
+ "model.transform(inputDataset).select(\"class.result\").show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "That's it! You can now go wild and use hundreds of `BertForSequenceClassification` models from HuggingFace 🤗 in Spark NLP 🚀\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "gpuType": "T4",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "05e2cc6a1b8d445b9f4c75b8eafc8569": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_9ff55de49cfb4c5da242f79c29d765c0",
+ "placeholder": "",
+ "style": "IPY_MODEL_2ca6b6198d1247a086ad6c77ef98745d",
+ "value": "vocab.txt: 100%"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "7JnUm4oH_WEr"
- },
- "source": [
- "- Let's install and setup Spark NLP in Google Colab\n",
- "- This part is pretty easy via our simple script"
- ]
+ "081a21e0131647a4a934b80ce35f99fe": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "nkuttlrp_WEr",
- "outputId": "b1b0d012-62ea-4567-e9e5-3f8f466ceda3"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "--2023-09-29 19:41:03-- http://setup.johnsnowlabs.com/colab.sh\n",
- "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n",
- "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n",
- "HTTP request sent, awaiting response... 302 Moved Temporarily\n",
- "Location: https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n",
- "--2023-09-29 19:41:04-- https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n",
- "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.110.133, 185.199.109.133, ...\n",
- "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.\n",
- "HTTP request sent, awaiting response... 200 OK\n",
- "Length: 1191 (1.2K) [text/plain]\n",
- "Saving to: ‘STDOUT’\n",
- "\n",
- "- 100%[===================>] 1.16K --.-KB/s in 0s \n",
- "\n",
- "2023-09-29 19:41:04 (106 MB/s) - written to stdout [1191/1191]\n",
- "\n",
- "Installing PySpark 3.2.3 and Spark NLP 5.1.2\n",
- "setup Colab for PySpark 3.2.3 and Spark NLP 5.1.2\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 MB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m536.3/536.3 kB\u001b[0m \u001b[31m38.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.7/199.7 kB\u001b[0m \u001b[31m19.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
- ]
- }
+ "110a9c4ba8144b23bec988991c50ea69": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "18cba1a7ebf54e389b0512644fd4eb8d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "1b35a6c060fb45bb8a7c50e4e58b3e70": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2650006b821e493abfb0005f680b4bbb": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2b7c0e9b54a04512be2914256c92a8b0": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_374ab98ad46c4518b0800c58d12702b5",
+ "placeholder": "",
+ "style": "IPY_MODEL_fc725112860647558d1c3614ada8be40",
+ "value": " 112/112 [00:00<00:00, 7.23kB/s]"
+ }
+ },
+ "2ca6b6198d1247a086ad6c77ef98745d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "2e6d65b7f9e943c294b604769f714bdb": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2f9b2816622b4ea78d333b28d2e1d528": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_5c9ee9d2bfff4dd9bf111523b15f28b8",
+ "max": 231508,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_4fc274d64c994e19851775ff1f5b7bfd",
+ "value": 231508
+ }
+ },
+ "31926af1bcb341f1805e31c9f6a105a6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "31e8635f8ed543fe96980f24c1977435": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "374ab98ad46c4518b0800c58d12702b5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "3a713cbb11ee47ca8a1c5e09c5e2134c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_f9a44a68c55546a6bd1425f649e84fce",
+ "IPY_MODEL_a65ebf8907284391bd098d8906997a65",
+ "IPY_MODEL_5a00dcdd1b3840538bbdffe4c9dfccac"
],
- "source": [
- "! wget http://setup.johnsnowlabs.com/colab.sh -O - | bash"
- ]
+ "layout": "IPY_MODEL_fea490f5d6ce4b0cb044aed1fcac40c6"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "7_MYXgjf_WEs"
- },
- "source": [
- "Let's start Spark with Spark NLP included via our simple `start()` function"
- ]
+ "46ce6bbe733144b493460694864e042f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_892540858af848e2989805c7facf9eaa",
+ "IPY_MODEL_8bc01972359142c6b20c98fee0608c26",
+ "IPY_MODEL_9fb30845b7e245829867afeaac10e8a1"
+ ],
+ "layout": "IPY_MODEL_a04cbc3089c24b8d9056ed56dcb383fe"
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "kOc3hrRI_WEs",
- "outputId": "53287f60-6d7e-46aa-8845-d4789596b0a6"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Apache Spark version: 3.2.3\n"
- ]
- }
+ "4a79b68ed33047edb71fcc08d4e098ad": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4bbc0cfc3e194e58b688d1ddad08e355": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_05e2cc6a1b8d445b9f4c75b8eafc8569",
+ "IPY_MODEL_2f9b2816622b4ea78d333b28d2e1d528",
+ "IPY_MODEL_eac29afa1787461d80b19843a322b35b"
],
- "source": [
- "import sparknlp\n",
- "# let's start Spark with Spark NLP\n",
- "spark = sparknlp.start()\n",
- "\n",
- "print(\"Apache Spark version: {}\".format(spark.version))"
- ]
+ "layout": "IPY_MODEL_600ef6e0875047feb0edeb29f1e2cec4"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "FA3w5kEt_WEs"
- },
- "source": [
- "- Let's use `loadSavedModel` functon in `BertForZeroShotClassification` which allows us to load TensorFlow model in SavedModel format\n",
- "- Most params can be set later when you are loading this model in `BertForZeroShotClassification` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n",
- "- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n",
- "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.\n"
- ]
+ "4d4920f93b2c4f1c96670ac9396fb244": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "ab2CrizU_WEs"
- },
- "outputs": [],
- "source": [
- "from sparknlp.annotator import *\n",
- "from sparknlp.base import *\n",
- "\n",
- "zero_shot_classifier = BertForZeroShotClassification.loadSavedModel(\n",
- " '{}/saved_model/1'.format(MODEL_NAME),\n",
- " spark\n",
- " )\\\n",
- " .setInputCols([\"document\", \"token\"]) \\\n",
- " .setOutputCol(\"class\") \\\n",
- " .setCandidateLabels([\"urgent\", \"mobile\", \"travel\", \"movie\", \"music\", \"sport\", \"weather\", \"technology\"])"
- ]
+ "4fc274d64c994e19851775ff1f5b7bfd": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "paajUy-T_WEs"
- },
- "source": [
- "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
- ]
+ "557b68d5c6bd47c586571bb6d03fb75b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_2650006b821e493abfb0005f680b4bbb",
+ "placeholder": "",
+ "style": "IPY_MODEL_5ee1cbef2ffe4cc2b1af445726a25871",
+ "value": "config.json: 100%"
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "DiXFayTa_WEs"
- },
- "outputs": [],
- "source": [
- "zero_shot_classifier.write().overwrite().save(\"./{}_spark_nlp_onnx\".format(MODEL_NAME))"
- ]
+ "5a00dcdd1b3840538bbdffe4c9dfccac": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_8d554fc5ff7e4a149fccda1e5d197326",
+ "placeholder": "",
+ "style": "IPY_MODEL_a581cd1016fb4d9083039b55162d03c8",
+ "value": " 438M/438M [00:27<00:00, 17.0MB/s]"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "5yTVmF8r_WEt"
- },
- "source": [
- "Let's clean up stuff we don't need anymore"
- ]
+ "5c9ee9d2bfff4dd9bf111523b15f28b8": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "1PPJw45m_WEt"
- },
- "outputs": [],
- "source": [
- "!rm -rf {EXPORT_PATH}"
- ]
+ "5ee1cbef2ffe4cc2b1af445726a25871": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "F5uVLNjp_WEt"
- },
- "source": [
- "Awesome 😎 !\n",
- "\n",
- "This is your BertForZeroShotClassification model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀"
- ]
+ "600ef6e0875047feb0edeb29f1e2cec4": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "XPs8R23U_WEt",
- "outputId": "b302795a-74be-4859-96b8-dfefe9fe5b69"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "total 429464\n",
- "-rw-r--r-- 1 root root 439759046 Sep 29 19:42 bert_classification_onnx\n",
- "drwxr-xr-x 4 root root 4096 Sep 29 19:42 fields\n",
- "drwxr-xr-x 2 root root 4096 Sep 29 19:42 metadata\n"
- ]
- }
- ],
- "source": [
- "! ls -l {MODEL_NAME}_spark_nlp_onnx"
- ]
+ "608a47e426c44acea7e60f8b386e0677": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a273ab182aab40e79a1f3b8517452322",
+ "placeholder": "",
+ "style": "IPY_MODEL_31926af1bcb341f1805e31c9f6a105a6",
+ "value": " 639/639 [00:00<00:00, 33.1kB/s]"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "AEsYTR2T_WEt"
- },
- "source": [
- "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny BertForZeroShotClassification model 😊"
- ]
+ "698707314ce945198dbf494154d0d3d9": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "yozw4m76_WEt"
- },
- "outputs": [],
- "source": [
- "sequenceClassifier_loaded = BertForZeroShotClassification.load(\"./{}_spark_nlp_onnx\".format(MODEL_NAME))\\\n",
- " .setInputCols([\"document\",'token'])\\\n",
- " .setOutputCol(\"class\")"
- ]
+ "76891611ea974bed8ce9825197d6ede6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "rATPyeeR_WEu"
- },
- "source": [
- "You can see what labels were used to train this model via `getClasses` function:"
- ]
+ "7c20f9b4aa514244b413026b484773be": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "fMNJ9mfr_WEu",
- "outputId": "46d0ab80-fac2-4cb7-e091-fa8895e31217"
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "['NEU', 'POS', 'NEG']"
- ]
- },
- "execution_count": null,
- "metadata": {},
- "output_type": "execute_result"
- }
+ "7cfb8103d0914f8db95a98d0298241c1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "892540858af848e2989805c7facf9eaa": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_698707314ce945198dbf494154d0d3d9",
+ "placeholder": "",
+ "style": "IPY_MODEL_76891611ea974bed8ce9825197d6ede6",
+ "value": "tokenizer_config.json: 100%"
+ }
+ },
+ "8bc01972359142c6b20c98fee0608c26": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4a79b68ed33047edb71fcc08d4e098ad",
+ "max": 48,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_110a9c4ba8144b23bec988991c50ea69",
+ "value": 48
+ }
+ },
+ "8c3c8bc1c60c4be1b2ce6eb91fb3d80c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "8d554fc5ff7e4a149fccda1e5d197326": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "98ccf59935c148af9266163fa8e12f36": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "9fb30845b7e245829867afeaac10e8a1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_1b35a6c060fb45bb8a7c50e4e58b3e70",
+ "placeholder": "",
+ "style": "IPY_MODEL_7cfb8103d0914f8db95a98d0298241c1",
+ "value": " 48.0/48.0 [00:00<00:00, 3.08kB/s]"
+ }
+ },
+ "9ff55de49cfb4c5da242f79c29d765c0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a04cbc3089c24b8d9056ed56dcb383fe": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a273ab182aab40e79a1f3b8517452322": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a581cd1016fb4d9083039b55162d03c8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a65ebf8907284391bd098d8906997a65": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b637bec487a949dbad7283eedc25cf51",
+ "max": 437961724,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_081a21e0131647a4a934b80ce35f99fe",
+ "value": 437961724
+ }
+ },
+ "a82f20d3bade4075a67de714763267d3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_7c20f9b4aa514244b413026b484773be",
+ "max": 112,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_8c3c8bc1c60c4be1b2ce6eb91fb3d80c",
+ "value": 112
+ }
+ },
+ "b637bec487a949dbad7283eedc25cf51": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "bd84a229a7e2423e9afff844346027a6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "cc1743bea22342ff9f81223b231ac387": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ce68fc9368cb4860a3c65fb33a3b9362": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_2e6d65b7f9e943c294b604769f714bdb",
+ "placeholder": "",
+ "style": "IPY_MODEL_e6ca865f09b440f0b646f74b85a930c2",
+ "value": "special_tokens_map.json: 100%"
+ }
+ },
+ "d046f93abb3945c9beec40f5b5e7034d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "dc2010ab9c8b4ce2a61fd2f5bf584466": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_557b68d5c6bd47c586571bb6d03fb75b",
+ "IPY_MODEL_fb27d436ed9b41b3813afdabc7e7168e",
+ "IPY_MODEL_608a47e426c44acea7e60f8b386e0677"
],
- "source": [
- "# .getClasses was introduced in spark-nlp==3.4.0\n",
- "sequenceClassifier_loaded.getClasses()"
- ]
+ "layout": "IPY_MODEL_d046f93abb3945c9beec40f5b5e7034d"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "wqNxl8_E_WEu"
- },
- "source": [
- "This is how you can use your loaded classifier model in Spark NLP 🚀 pipeline:"
- ]
+ "e6ca865f09b440f0b646f74b85a930c2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "SAulQjDX_WEu",
- "outputId": "fef295e4-0b21-48fa-af0c-139579c50527"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "+------------------+------+\n",
- "| text|result|\n",
- "+------------------+------+\n",
- "|Te quiero. Te amo.| [POS]|\n",
- "+------------------+------+\n",
- "\n"
- ]
- }
+ "eac29afa1787461d80b19843a322b35b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_98ccf59935c148af9266163fa8e12f36",
+ "placeholder": "",
+ "style": "IPY_MODEL_18cba1a7ebf54e389b0512644fd4eb8d",
+ "value": " 232k/232k [00:00<00:00, 952kB/s]"
+ }
+ },
+ "ef611b4c4adf4cdfb9e118d93fbe346a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_ce68fc9368cb4860a3c65fb33a3b9362",
+ "IPY_MODEL_a82f20d3bade4075a67de714763267d3",
+ "IPY_MODEL_2b7c0e9b54a04512be2914256c92a8b0"
],
- "source": [
- "from sparknlp.base import *\n",
- "from sparknlp.annotator import *\n",
- "from pyspark.ml import Pipeline, PipelineModel\n",
- "\n",
- "document_assembler = DocumentAssembler() \\\n",
- " .setInputCol(\"text\") \\\n",
- " .setOutputCol(\"document\")\n",
- "\n",
- "tokenizer = Tokenizer().setInputCols(\"document\").setOutputCol(\"token\")\n",
- "\n",
- "pipeline = Pipeline(stages=[\n",
- " document_assembler,\n",
- " tokenizer,\n",
- " zero_shot_classifier_loaded\n",
- "])\n",
- "\n",
- "text = [[\"I have a problem with my iphone that needs to be resolved asap!!\"],\n",
- " [\"Last week I upgraded my iOS version and ever since then my phone has been overheating whenever I use your app.\"],\n",
- " [\"I have a phone and I love it!\"],\n",
- " [\"I really want to visit Germany and I am planning to go there next year.\"],\n",
- " [\"Let's watch some movies tonight! I am in the mood for a horror movie.\"],\n",
- " [\"Have you watched the match yesterday? It was a great game!\"],\n",
- " [\"We need to harry up and get to the airport. We are going to miss our flight!\"]]\n",
- "\n",
- "# create a DataFrame in PySpark\n",
- "inputDataset = spark.createDataFrame(text, [\"text\"])\n",
- "model = pipeline.fit(inputDataset)\n",
- "model.transform(inputDataset).select(\"class.result\").show()"
- ]
+ "layout": "IPY_MODEL_f96f6adfbec643c5aed81d1d3df80daa"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "JzEwOk48_WEu"
- },
- "source": [
- "That's it! You can now go wild and use hundreds of `BertForSequenceClassification` models from HuggingFace 🤗 in Spark NLP 🚀\n"
- ]
- }
- ],
- "metadata": {
- "colab": {
- "provenance": [],
- "gpuType": "T4"
+ "f96f6adfbec643c5aed81d1d3df80daa": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- "kernelspec": {
- "display_name": "Python 3",
- "name": "python3"
+ "f9a44a68c55546a6bd1425f649e84fce": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_31e8635f8ed543fe96980f24c1977435",
+ "placeholder": "",
+ "style": "IPY_MODEL_bd84a229a7e2423e9afff844346027a6",
+ "value": "model.safetensors: 100%"
+ }
},
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3"
+ "fb27d436ed9b41b3813afdabc7e7168e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_cc1743bea22342ff9f81223b231ac387",
+ "max": 639,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_4d4920f93b2c4f1c96670ac9396fb244",
+ "value": 639
+ }
},
- "widgets": {
- "application/vnd.jupyter.widget-state+json": {
- "dc2010ab9c8b4ce2a61fd2f5bf584466": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_557b68d5c6bd47c586571bb6d03fb75b",
- "IPY_MODEL_fb27d436ed9b41b3813afdabc7e7168e",
- "IPY_MODEL_608a47e426c44acea7e60f8b386e0677"
- ],
- "layout": "IPY_MODEL_d046f93abb3945c9beec40f5b5e7034d"
- }
- },
- "557b68d5c6bd47c586571bb6d03fb75b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_2650006b821e493abfb0005f680b4bbb",
- "placeholder": "",
- "style": "IPY_MODEL_5ee1cbef2ffe4cc2b1af445726a25871",
- "value": "config.json: 100%"
- }
- },
- "fb27d436ed9b41b3813afdabc7e7168e": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_cc1743bea22342ff9f81223b231ac387",
- "max": 639,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_4d4920f93b2c4f1c96670ac9396fb244",
- "value": 639
- }
- },
- "608a47e426c44acea7e60f8b386e0677": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_a273ab182aab40e79a1f3b8517452322",
- "placeholder": "",
- "style": "IPY_MODEL_31926af1bcb341f1805e31c9f6a105a6",
- "value": " 639/639 [00:00<00:00, 33.1kB/s]"
- }
- },
- "d046f93abb3945c9beec40f5b5e7034d": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "2650006b821e493abfb0005f680b4bbb": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "5ee1cbef2ffe4cc2b1af445726a25871": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "cc1743bea22342ff9f81223b231ac387": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "4d4920f93b2c4f1c96670ac9396fb244": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "a273ab182aab40e79a1f3b8517452322": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "31926af1bcb341f1805e31c9f6a105a6": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "3a713cbb11ee47ca8a1c5e09c5e2134c": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_f9a44a68c55546a6bd1425f649e84fce",
- "IPY_MODEL_a65ebf8907284391bd098d8906997a65",
- "IPY_MODEL_5a00dcdd1b3840538bbdffe4c9dfccac"
- ],
- "layout": "IPY_MODEL_fea490f5d6ce4b0cb044aed1fcac40c6"
- }
- },
- "f9a44a68c55546a6bd1425f649e84fce": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_31e8635f8ed543fe96980f24c1977435",
- "placeholder": "",
- "style": "IPY_MODEL_bd84a229a7e2423e9afff844346027a6",
- "value": "model.safetensors: 100%"
- }
- },
- "a65ebf8907284391bd098d8906997a65": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_b637bec487a949dbad7283eedc25cf51",
- "max": 437961724,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_081a21e0131647a4a934b80ce35f99fe",
- "value": 437961724
- }
- },
- "5a00dcdd1b3840538bbdffe4c9dfccac": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_8d554fc5ff7e4a149fccda1e5d197326",
- "placeholder": "",
- "style": "IPY_MODEL_a581cd1016fb4d9083039b55162d03c8",
- "value": " 438M/438M [00:27<00:00, 17.0MB/s]"
- }
- },
- "fea490f5d6ce4b0cb044aed1fcac40c6": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "31e8635f8ed543fe96980f24c1977435": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "bd84a229a7e2423e9afff844346027a6": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "b637bec487a949dbad7283eedc25cf51": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "081a21e0131647a4a934b80ce35f99fe": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "8d554fc5ff7e4a149fccda1e5d197326": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "a581cd1016fb4d9083039b55162d03c8": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "46ce6bbe733144b493460694864e042f": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_892540858af848e2989805c7facf9eaa",
- "IPY_MODEL_8bc01972359142c6b20c98fee0608c26",
- "IPY_MODEL_9fb30845b7e245829867afeaac10e8a1"
- ],
- "layout": "IPY_MODEL_a04cbc3089c24b8d9056ed56dcb383fe"
- }
- },
- "892540858af848e2989805c7facf9eaa": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_698707314ce945198dbf494154d0d3d9",
- "placeholder": "",
- "style": "IPY_MODEL_76891611ea974bed8ce9825197d6ede6",
- "value": "tokenizer_config.json: 100%"
- }
- },
- "8bc01972359142c6b20c98fee0608c26": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_4a79b68ed33047edb71fcc08d4e098ad",
- "max": 48,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_110a9c4ba8144b23bec988991c50ea69",
- "value": 48
- }
- },
- "9fb30845b7e245829867afeaac10e8a1": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_1b35a6c060fb45bb8a7c50e4e58b3e70",
- "placeholder": "",
- "style": "IPY_MODEL_7cfb8103d0914f8db95a98d0298241c1",
- "value": " 48.0/48.0 [00:00<00:00, 3.08kB/s]"
- }
- },
- "a04cbc3089c24b8d9056ed56dcb383fe": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "698707314ce945198dbf494154d0d3d9": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "76891611ea974bed8ce9825197d6ede6": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "4a79b68ed33047edb71fcc08d4e098ad": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "110a9c4ba8144b23bec988991c50ea69": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "1b35a6c060fb45bb8a7c50e4e58b3e70": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "7cfb8103d0914f8db95a98d0298241c1": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "4bbc0cfc3e194e58b688d1ddad08e355": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_05e2cc6a1b8d445b9f4c75b8eafc8569",
- "IPY_MODEL_2f9b2816622b4ea78d333b28d2e1d528",
- "IPY_MODEL_eac29afa1787461d80b19843a322b35b"
- ],
- "layout": "IPY_MODEL_600ef6e0875047feb0edeb29f1e2cec4"
- }
- },
- "05e2cc6a1b8d445b9f4c75b8eafc8569": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_9ff55de49cfb4c5da242f79c29d765c0",
- "placeholder": "",
- "style": "IPY_MODEL_2ca6b6198d1247a086ad6c77ef98745d",
- "value": "vocab.txt: 100%"
- }
- },
- "2f9b2816622b4ea78d333b28d2e1d528": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_5c9ee9d2bfff4dd9bf111523b15f28b8",
- "max": 231508,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_4fc274d64c994e19851775ff1f5b7bfd",
- "value": 231508
- }
- },
- "eac29afa1787461d80b19843a322b35b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_98ccf59935c148af9266163fa8e12f36",
- "placeholder": "",
- "style": "IPY_MODEL_18cba1a7ebf54e389b0512644fd4eb8d",
- "value": " 232k/232k [00:00<00:00, 952kB/s]"
- }
- },
- "600ef6e0875047feb0edeb29f1e2cec4": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "9ff55de49cfb4c5da242f79c29d765c0": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "2ca6b6198d1247a086ad6c77ef98745d": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "5c9ee9d2bfff4dd9bf111523b15f28b8": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "4fc274d64c994e19851775ff1f5b7bfd": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "98ccf59935c148af9266163fa8e12f36": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "18cba1a7ebf54e389b0512644fd4eb8d": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "ef611b4c4adf4cdfb9e118d93fbe346a": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_ce68fc9368cb4860a3c65fb33a3b9362",
- "IPY_MODEL_a82f20d3bade4075a67de714763267d3",
- "IPY_MODEL_2b7c0e9b54a04512be2914256c92a8b0"
- ],
- "layout": "IPY_MODEL_f96f6adfbec643c5aed81d1d3df80daa"
- }
- },
- "ce68fc9368cb4860a3c65fb33a3b9362": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_2e6d65b7f9e943c294b604769f714bdb",
- "placeholder": "",
- "style": "IPY_MODEL_e6ca865f09b440f0b646f74b85a930c2",
- "value": "special_tokens_map.json: 100%"
- }
- },
- "a82f20d3bade4075a67de714763267d3": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_7c20f9b4aa514244b413026b484773be",
- "max": 112,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_8c3c8bc1c60c4be1b2ce6eb91fb3d80c",
- "value": 112
- }
- },
- "2b7c0e9b54a04512be2914256c92a8b0": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_374ab98ad46c4518b0800c58d12702b5",
- "placeholder": "",
- "style": "IPY_MODEL_fc725112860647558d1c3614ada8be40",
- "value": " 112/112 [00:00<00:00, 7.23kB/s]"
- }
- },
- "f96f6adfbec643c5aed81d1d3df80daa": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "2e6d65b7f9e943c294b604769f714bdb": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "e6ca865f09b440f0b646f74b85a930c2": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "7c20f9b4aa514244b413026b484773be": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "8c3c8bc1c60c4be1b2ce6eb91fb3d80c": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "374ab98ad46c4518b0800c58d12702b5": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "fc725112860647558d1c3614ada8be40": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- }
- }
+ "fc725112860647558d1c3614ada8be40": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- "accelerator": "GPU"
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
\ No newline at end of file
+ "fea490f5d6ce4b0cb044aed1fcac40c6": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ }
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BertSentenceEmbeddings.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BertSentenceEmbeddings.ipynb
index ce8caba7472d61..6716957245597a 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BertSentenceEmbeddings.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_BertSentenceEmbeddings.ipynb
@@ -414,9 +414,8 @@
"provenance": []
},
"kernelspec": {
- "display_name": "Python [conda env:sparknlp_dev]",
- "language": "python",
- "name": "conda-env-sparknlp_dev-py"
+ "display_name": "Python 3",
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_CLIP.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_CLIP.ipynb
index 4310d452eba8af..f9d70656a7f95c 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_CLIP.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_CLIP.ipynb
@@ -88,8 +88,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [
- ],
+ "outputs": [],
"source": [
"! optimum-cli export onnx --model {MODEL_NAME} {EXPORT_PATH}"
]
@@ -314,8 +313,8 @@
],
"source": [
"from PIL import Image\n",
- "\n",
- "Image.open(\"../../../../src/test/resources/image/egyptian_cat.jpeg\")"
+ "!wget https://github.com/JohnSnowLabs/spark-nlp/raw/master/src/test/resources/image/egyptian_cat.jpeg\n",
+ "Image.open(\"egyptian_cat.jpeg\")"
]
},
{
@@ -352,7 +351,7 @@
"imageDF = spark.read \\\n",
" .format(\"image\") \\\n",
" .option(\"dropInvalid\", value = True) \\\n",
- " .load(\"../../../../src/test/resources/image/egyptian_cat.jpeg\")\n",
+ " .load(\"egyptian_cat.jpeg\")\n",
"\n",
"imageAssembler = ImageAssembler() \\\n",
" .setInputCol(\"image\") \\\n",
@@ -387,10 +386,10 @@
"provenance": []
},
"kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
-},
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
"language_info": {
"codemirror_mode": {
"name": "ipython",
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_CamemBERT.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_CamemBERT.ipynb
index 5c04b3e61de50d..acf34540b2246a 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_CamemBERT.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_CamemBERT.ipynb
@@ -1,2422 +1,2285 @@
{
- "cells": [
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
+ "\n",
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_CamemBERT.ipynb)\n",
+ "\n",
+ "# Import ONNX CamemBERT models from HuggingFace 🤗 into Spark NLP 🚀\n",
+ "\n",
+ "Let's keep in mind a few things before we start 😊\n",
+ "\n",
+ "- ONNX support was introduced in `Spark NLP 5.0.0`, enabling high performance inference for models. Please make sure you have upgraded to the latest Spark NLP release.\n",
+ "- You can import models for CamemBERT from HuggingFace and they have to be in `Fill Mask` category. Meaning, you cannot use CamemBERT models trained/fine-tuned on a specific task such as token/sequence classification."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Export and Save HuggingFace model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "7Ba7C4s3YxPm"
- },
- "source": [
- "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
- "\n",
- "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_CamemBert.ipynb)\n",
- "\n",
- "# Import ONNX CamemBERT models from HuggingFace 🤗 into Spark NLP 🚀\n",
- "\n",
- "Let's keep in mind a few things before we start 😊\n",
- "\n",
- "- ONNX support was introduced in `Spark NLP 5.0.0`, enabling high performance inference for models. Please make sure you have upgraded to the latest Spark NLP release.\n",
- "- You can import models for CamemBERT from HuggingFace and they have to be in `Fill Mask` category. Meaning, you cannot use CamemBERT models trained/fine-tuned on a specific task such as token/sequence classification."
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.1/7.1 MB\u001b[0m \u001b[31m17.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m407.1/407.1 kB\u001b[0m \u001b[31m24.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m475.2/475.2 MB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m60.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.8/455.8 kB\u001b[0m \u001b[31m38.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m69.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m24.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m536.7/536.7 kB\u001b[0m \u001b[31m40.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.7/15.7 MB\u001b[0m \u001b[31m84.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m112.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m107.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m98.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m103.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m110.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m110.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m107.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m85.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m105.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m102.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m100.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m108.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m92.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m23.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m27.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m475.2/475.2 MB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.9/489.9 MB\u001b[0m \u001b[31m1.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m106.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m47.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m94.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m101.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.8/489.8 MB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m479.7/479.7 MB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m84.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.3/17.3 MB\u001b[0m \u001b[31m77.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m102.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.8/440.8 kB\u001b[0m \u001b[31m41.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m524.1/524.1 MB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m85.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m110.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m35.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m88.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m781.3/781.3 kB\u001b[0m \u001b[31m61.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m69.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m75.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.1/17.1 MB\u001b[0m \u001b[31m82.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m588.3/588.3 MB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m84.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m67.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m108.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m439.2/439.2 kB\u001b[0m \u001b[31m45.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m95.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m11.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m13.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m16.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.8/83.8 kB\u001b[0m \u001b[31m10.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.7/455.7 kB\u001b[0m \u001b[31m39.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m30.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m36.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m451.2/451.2 kB\u001b[0m \u001b[31m27.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m67.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m82.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m69.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m34.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.1/13.1 MB\u001b[0m \u001b[31m83.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+ "pandas-gbq 0.19.2 requires google-auth-oauthlib>=0.7.0, but you have google-auth-oauthlib 0.4.6 which is incompatible.\n",
+ "tensorflow-datasets 4.9.4 requires protobuf>=3.20, but you have protobuf 3.19.6 which is incompatible.\n",
+ "tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 3.19.6 which is incompatible.\u001b[0m\u001b[31m\n",
+ "\u001b[0m"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum tensorflow"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
+ "- We'll use [camembert-base](https://huggingface.co/camembert-base) model from HuggingFace as an example and load it as a `ORTModelForFeatureExtraction`, representing an ONNX model.\n",
+ "- In addition to the CamemBERT model, we also need to save the `CamembertTokenizer`. This is the same for every model, these are assets (saved in `/assets`) needed for tokenization inside Spark NLP."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "GksUloJ0YxPo"
- },
- "source": [
- "## Export and Save HuggingFace model"
- ]
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n",
+ "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
+ "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
+ "You will be able to reuse this secret in all of your notebooks.\n",
+ "Please note that authentication is recommended but still optional to access public models or datasets.\n",
+ " warnings.warn(\n"
+ ]
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "vrxtF-pPYxPp"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "566c95aa32904d73923ed7e93d99d373",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."
+ "text/plain": [
+ "config.json: 0%| | 0.00/508 [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "id": "qIXsovz5YxPp",
- "outputId": "0dcf22f9-1192-4c41-c739-f6eaa0d5d81c",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.1/7.1 MB\u001b[0m \u001b[31m17.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m407.1/407.1 kB\u001b[0m \u001b[31m24.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m475.2/475.2 MB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m60.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.8/455.8 kB\u001b[0m \u001b[31m38.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m69.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m24.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m536.7/536.7 kB\u001b[0m \u001b[31m40.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.7/15.7 MB\u001b[0m \u001b[31m84.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m112.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m107.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m98.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m103.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m110.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m110.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m107.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m85.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m105.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m102.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m100.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m108.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m92.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m23.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m27.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m475.2/475.2 MB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.9/489.9 MB\u001b[0m \u001b[31m1.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m106.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m47.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m94.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m101.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.8/489.8 MB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m479.7/479.7 MB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m84.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.3/17.3 MB\u001b[0m \u001b[31m77.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m102.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.8/440.8 kB\u001b[0m \u001b[31m41.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m524.1/524.1 MB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m85.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m110.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m35.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m88.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m781.3/781.3 kB\u001b[0m \u001b[31m61.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m69.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m75.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.1/17.1 MB\u001b[0m \u001b[31m82.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m588.3/588.3 MB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m84.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m67.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m108.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m439.2/439.2 kB\u001b[0m \u001b[31m45.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m95.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m11.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m13.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m16.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.8/83.8 kB\u001b[0m \u001b[31m10.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.7/455.7 kB\u001b[0m \u001b[31m39.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m30.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m36.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m451.2/451.2 kB\u001b[0m \u001b[31m27.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m67.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m82.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m69.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m34.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.1/13.1 MB\u001b[0m \u001b[31m83.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
- "pandas-gbq 0.19.2 requires google-auth-oauthlib>=0.7.0, but you have google-auth-oauthlib 0.4.6 which is incompatible.\n",
- "tensorflow-datasets 4.9.4 requires protobuf>=3.20, but you have protobuf 3.19.6 which is incompatible.\n",
- "tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 3.19.6 which is incompatible.\u001b[0m\u001b[31m\n",
- "\u001b[0m"
- ]
- }
- ],
- "source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum tensorflow"
- ]
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Framework not specified. Using pt to export the model.\n"
+ ]
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "RF5us7X6YxPr"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "20bb65ce09c440658155d6f1f2590a09",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
- "- We'll use [camembert-base](https://huggingface.co/camembert-base) model from HuggingFace as an example and load it as a `ORTModelForFeatureExtraction`, representing an ONNX model.\n",
- "- In addition to the CamemBERT model, we also need to save the `CamembertTokenizer`. This is the same for every model, these are assets (saved in `/assets`) needed for tokenization inside Spark NLP."
+ "text/plain": [
+ "model.safetensors: 0%| | 0.00/445M [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {
- "id": "dBcg3fFiYxPs",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 457,
- "referenced_widgets": [
- "566c95aa32904d73923ed7e93d99d373",
- "453d1e3c486f4b9aa7e2d9acecf4176b",
- "377ca31e8f0c4cfe8d027e73f1908612",
- "a1f8086b1b4d40cbac31618adffb1b11",
- "2c16c911327041379eae3410825174ce",
- "ed985a8db8234b90917695713da4a13a",
- "8816bd1505374989a4b8135ead3b5abf",
- "7f4455090e7746f5b4be7b3ac8b3b589",
- "1696e83714ed4c5ca3231eabad56665a",
- "d95e1ea826cd4edbac988692aba618b1",
- "6c33e478484d4866b6b4d44bc1af9c3b",
- "20bb65ce09c440658155d6f1f2590a09",
- "c2d1126afc674192a56f52668380ab94",
- "3ca16338e2c14772a57d0c1e5de2668f",
- "ebc68a01a2354c55953779f328523c1b",
- "e7a68b8fdfbd41999a3cec53c187f1ef",
- "9d84925c75e546a683a580e58d8e763f",
- "3d0b2b4f4dce4697811f861bc0ea2df1",
- "7984e7fac83e4001815c440c7b7919ad",
- "1991b5c440194f9a8729a01d382af5c3",
- "85ede09e39074fd3a9bf08c6bc68b753",
- "d676d382842d4784add17dfca06348a2",
- "4a8d4f7ef796473a91c5bb1f7b0db38c",
- "bf6c1dbba58b41448a3350c8967fabfc",
- "8d52a81376024e90a0d4bfef55285ca9",
- "8c8a79b44687450f95884ea9a809d6e4",
- "cd091521a65c47d19bd40d45b1bf4895",
- "1c02b6ac167543109981ef1908a2ce8f",
- "6dc758d8cb124f96873fac98d745a2f2",
- "18b50db31cdf49b1ab28e045e71ed1d8",
- "d0ce9537c09b4321a08d71164049e753",
- "6fc8723befd549b6aabe9a84da74b04a",
- "ab012f044d614acba8d1b2330ecae31d",
- "477fd05f4c8f49fdb4002d47582f6313",
- "d78a70686e9348aa9c8a654c352915f4",
- "26cba613ef664b97a445b206887f231b",
- "0b8eb9f25faa4993802ed33f05eaf91a",
- "97a5876e545b4c24b7498683bb5876b5",
- "69610c818dfa442c86f9af8a04e6019f",
- "c6ecfecbc45144c385ce89057e86e627",
- "0ad83b11b2f94abb84c84a7029356683",
- "11907ede30534508a2793241e515a818",
- "e1c692cb85b64ae6999e2febcb92e556",
- "932203a42244430f903ef45fc3d70f76",
- "2f9b57a9d24848b4ac7d3d254a445251",
- "be26390927f246348533f0704c18fe60",
- "47bd91449e67415cbbbca16e9f9de31f",
- "b57f460a026541c28ab83c06c0d047f8",
- "ed1cc1cce2d84c10babf49a9239028f5",
- "7ce7766a45494859a5cdcb668994c3f6",
- "879960f4c4e34d07928275c812674dd6",
- "247ea5e1983b4c3885408e364a3ccdc2",
- "dddfc0e2eadc494aa0412e67f894730b",
- "19403be31a184b3d96a4e92c2c00140e",
- "0f08885250af4f419b14dab6df6932f7"
- ]
- },
- "outputId": "b3c51860-06a3-4234-81f0-3b84ed5de80d"
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n",
- "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
- "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
- "You will be able to reuse this secret in all of your notebooks.\n",
- "Please note that authentication is recommended but still optional to access public models or datasets.\n",
- " warnings.warn(\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "config.json: 0%| | 0.00/508 [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "566c95aa32904d73923ed7e93d99d373"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "Framework not specified. Using pt to export the model.\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "model.safetensors: 0%| | 0.00/445M [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "20bb65ce09c440658155d6f1f2590a09"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "Some weights of the model checkpoint at camembert-base were not used when initializing CamembertModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']\n",
- "- This IS expected if you are initializing CamembertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
- "- This IS NOT expected if you are initializing CamembertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "tokenizer_config.json: 0%| | 0.00/25.0 [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "4a8d4f7ef796473a91c5bb1f7b0db38c"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "sentencepiece.bpe.model: 0%| | 0.00/811k [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "477fd05f4c8f49fdb4002d47582f6313"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "tokenizer.json: 0%| | 0.00/1.40M [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "2f9b57a9d24848b4ac7d3d254a445251"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "Using the export variant default. Available variants are:\n",
- " - default: The default ONNX variant.\n",
- "Using framework PyTorch: 2.1.0+cu121\n",
- "Overriding 1 configuration item(s)\n",
- "\t- use_cache -> False\n"
- ]
- }
- ],
- "source": [
- "from optimum.onnxruntime import ORTModelForFeatureExtraction\n",
- "\n",
- "MODEL_NAME = \"camembert-base\"\n",
- "EXPORT_PATH = f\"onnx_models/{MODEL_NAME}\"\n",
- "\n",
- "ort_model = ORTModelForFeatureExtraction.from_pretrained(MODEL_NAME, export=True)\n",
- "\n",
- "# Save the ONNX model\n",
- "ort_model.save_pretrained(EXPORT_PATH)\n",
- "\n",
- "# Create directory for assets and move the tokenizer files.\n",
- "# A separate folder is needed for Spark NLP.\n",
- "!mkdir {EXPORT_PATH}/assets\n",
- "!mv {EXPORT_PATH}/sentencepiece.bpe.model {EXPORT_PATH}/assets/"
- ]
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Some weights of the model checkpoint at camembert-base were not used when initializing CamembertModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']\n",
+ "- This IS expected if you are initializing CamembertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+ "- This IS NOT expected if you are initializing CamembertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
+ ]
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "oQqUoZMPYxPt"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "4a8d4f7ef796473a91c5bb1f7b0db38c",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "Let's have a look inside these two directories and see what we are dealing with:"
+ "text/plain": [
+ "tokenizer_config.json: 0%| | 0.00/25.0 [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "id": "Bf-9lbe6YxPt",
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "outputId": "1b658055-07fc-433b-f85f-849d4c35dea2"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "477fd05f4c8f49fdb4002d47582f6313",
+ "version_major": 2,
+ "version_minor": 0
},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "total 432436\n",
- "drwxr-xr-x 2 root root 4096 Mar 1 01:03 assets\n",
- "-rw-r--r-- 1 root root 673 Mar 1 01:03 config.json\n",
- "-rw-r--r-- 1 root root 440372299 Mar 1 01:03 model.onnx\n",
- "-rw-r--r-- 1 root root 354 Mar 1 01:03 special_tokens_map.json\n",
- "-rw-r--r-- 1 root root 491 Mar 1 01:03 tokenizer_config.json\n",
- "-rw-r--r-- 1 root root 2418800 Mar 1 01:03 tokenizer.json\n"
- ]
- }
- ],
- "source": [
- "!ls -l {EXPORT_PATH}"
+ "text/plain": [
+ "sentencepiece.bpe.model: 0%| | 0.00/811k [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {
- "id": "Txj99l8kYxPu",
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "outputId": "96e3700e-7ea7-4f34-da55-deacf727ea59"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "2f9b57a9d24848b4ac7d3d254a445251",
+ "version_major": 2,
+ "version_minor": 0
},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "total 792\n",
- "-rw-r--r-- 1 root root 810912 Mar 1 01:03 sentencepiece.bpe.model\n"
- ]
- }
- ],
- "source": [
- "!ls -l {EXPORT_PATH}/assets"
+ "text/plain": [
+ "tokenizer.json: 0%| | 0.00/1.40M [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "hFmXVv8YYxPu"
- },
- "source": [
- "## Import and Save CamemBERT in Spark NLP\n",
- "\n",
- "- Let's install and setup Spark NLP in Google Colab\n",
- "- This part is pretty easy via our simple script"
- ]
- },
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Using the export variant default. Available variants are:\n",
+ " - default: The default ONNX variant.\n",
+ "Using framework PyTorch: 2.1.0+cu121\n",
+ "Overriding 1 configuration item(s)\n",
+ "\t- use_cache -> False\n"
+ ]
+ }
+ ],
+ "source": [
+ "from optimum.onnxruntime import ORTModelForFeatureExtraction\n",
+ "\n",
+ "MODEL_NAME = \"camembert-base\"\n",
+ "EXPORT_PATH = f\"onnx_models/{MODEL_NAME}\"\n",
+ "\n",
+ "ort_model = ORTModelForFeatureExtraction.from_pretrained(MODEL_NAME, export=True)\n",
+ "\n",
+ "# Save the ONNX model\n",
+ "ort_model.save_pretrained(EXPORT_PATH)\n",
+ "\n",
+ "# Create directory for assets and move the tokenizer files.\n",
+ "# A separate folder is needed for Spark NLP.\n",
+ "!mkdir {EXPORT_PATH}/assets\n",
+ "!mv {EXPORT_PATH}/sentencepiece.bpe.model {EXPORT_PATH}/assets/"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's have a look inside these two directories and see what we are dealing with:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {
- "id": "dYGONeQ_YxPu",
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "outputId": "a45cbf0b-f8c1-44e1-c5a3-b45ba6d8b110"
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Installing PySpark 3.2.3 and Spark NLP 5.3.0\n",
- "setup Colab for PySpark 3.2.3 and Spark NLP 5.3.0\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 MB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m564.8/564.8 kB\u001b[0m \u001b[31m49.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.7/199.7 kB\u001b[0m \u001b[31m26.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
- ]
- }
- ],
- "source": [
- "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "total 432436\n",
+ "drwxr-xr-x 2 root root 4096 Mar 1 01:03 assets\n",
+ "-rw-r--r-- 1 root root 673 Mar 1 01:03 config.json\n",
+ "-rw-r--r-- 1 root root 440372299 Mar 1 01:03 model.onnx\n",
+ "-rw-r--r-- 1 root root 354 Mar 1 01:03 special_tokens_map.json\n",
+ "-rw-r--r-- 1 root root 491 Mar 1 01:03 tokenizer_config.json\n",
+ "-rw-r--r-- 1 root root 2418800 Mar 1 01:03 tokenizer.json\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls -l {EXPORT_PATH}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "AlqjcksvYxPv"
- },
- "source": [
- "Let's start Spark with Spark NLP included via our simple `start()` function"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "total 792\n",
+ "-rw-r--r-- 1 root root 810912 Mar 1 01:03 sentencepiece.bpe.model\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls -l {EXPORT_PATH}/assets"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Import and Save CamemBERT in Spark NLP\n",
+ "\n",
+ "- Let's install and setup Spark NLP in Google Colab\n",
+ "- This part is pretty easy via our simple script"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {
- "id": "XyGjm38dYxPv"
- },
- "outputs": [],
- "source": [
- "import sparknlp\n",
- "# let's start Spark with Spark NLP\n",
- "spark = sparknlp.start()"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Installing PySpark 3.2.3 and Spark NLP 5.3.0\n",
+ "setup Colab for PySpark 3.2.3 and Spark NLP 5.3.0\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 MB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m564.8/564.8 kB\u001b[0m \u001b[31m49.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.7/199.7 kB\u001b[0m \u001b[31m26.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
+ ]
+ }
+ ],
+ "source": [
+ "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's start Spark with Spark NLP included via our simple `start()` function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import sparknlp\n",
+ "# let's start Spark with Spark NLP\n",
+ "spark = sparknlp.start()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's use `loadSavedModel` functon in `CamemBertEmbeddings` which allows us to load the ONNX model\n",
+ "- Most params will be set automatically. They can also be set later after loading the model in `CamemBertEmbeddings` during runtime, so don't worry about setting them now\n",
+ "- `loadSavedModel` accepts two params, first is the path to the exported model. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n",
+ "- `setStorageRef` is very important. When you are training a task like NER or any Text Classification, we use this reference to bound the trained model to this specific embeddings so you won't load a different embeddings by mistake and see terrible results 😊\n",
+ "- It's up to you what you put in `setStorageRef` but it cannot be changed later on. We usually use the name of the model to be clear, but you can get creative if you want!\n",
+ "- The `dimension` param is is purely cosmetic and won't change anything. It's mostly for you to know later via `.getDimension` what is the dimension of your model. So set this accordingly.\n",
+ "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sparknlp.annotator import *\n",
+ "\n",
+ "# All these params should be identical to the original ONNX model\n",
+ "camembert = CamemBertEmbeddings.loadSavedModel(f\"{EXPORT_PATH}\", spark)\\\n",
+ " .setInputCols([\"document\",'token'])\\\n",
+ " .setOutputCol(\"camembert\")\\\n",
+ " .setCaseSensitive(True)\\\n",
+ " .setDimension(768)\\\n",
+ " .setStorageRef('camembert_base')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "camembert.write().overwrite().save(f\"{MODEL_NAME}_spark_nlp\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's clean up stuff we don't need anymore"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!rm -rf {EXPORT_PATH}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Awesome 😎 !\n",
+ "\n",
+ "This is your ONNX CamemBERT model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "xVh0x6TiYxPv"
- },
- "source": [
- "- Let's use `loadSavedModel` functon in `CamemBertEmbeddings` which allows us to load the ONNX model\n",
- "- Most params will be set automatically. They can also be set later after loading the model in `CamemBertEmbeddings` during runtime, so don't worry about setting them now\n",
- "- `loadSavedModel` accepts two params, first is the path to the exported model. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n",
- "- `setStorageRef` is very important. When you are training a task like NER or any Text Classification, we use this reference to bound the trained model to this specific embeddings so you won't load a different embeddings by mistake and see terrible results 😊\n",
- "- It's up to you what you put in `setStorageRef` but it cannot be changed later on. We usually use the name of the model to be clear, but you can get creative if you want!\n",
- "- The `dimension` param is is purely cosmetic and won't change anything. It's mostly for you to know later via `.getDimension` what is the dimension of your model. So set this accordingly.\n",
- "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.\n"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "total 430920\n",
+ "-rw-r--r-- 1 root root 440439641 Mar 1 01:05 camembert_onnx\n",
+ "-rw-r--r-- 1 root root 810912 Mar 1 01:05 camembert_spp\n",
+ "drwxr-xr-x 2 root root 4096 Mar 1 01:05 metadata\n"
+ ]
+ }
+ ],
+ "source": [
+ "! ls -l {MODEL_NAME}_spark_nlp"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny CamemBERT model 😊"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import sparknlp\n",
+ "\n",
+ "from sparknlp.base import *\n",
+ "from sparknlp.annotator import *\n",
+ "\n",
+ "document_assembler = DocumentAssembler()\\\n",
+ " .setInputCol(\"text\")\\\n",
+ " .setOutputCol(\"document\")\n",
+ "\n",
+ "tokenizer = Tokenizer()\\\n",
+ " .setInputCols([\"document\"])\\\n",
+ " .setOutputCol(\"token\")\n",
+ "\n",
+ "camembert_loaded = CamemBertEmbeddings.load(f\"{MODEL_NAME}_spark_nlp\")\\\n",
+ " .setInputCols([\"document\",'token'])\\\n",
+ " .setOutputCol(\"camembert\")\\\n",
+ "\n",
+ "pipeline = Pipeline(\n",
+ " stages = [\n",
+ " document_assembler,\n",
+ " tokenizer,\n",
+ " camembert_loaded\n",
+ " ])\n",
+ "\n",
+ "data = spark.createDataFrame([['William Henry Gates III (born October 28, 1955) is an American business magnate, software developer, investor,and philanthropist.']]).toDF(\"text\")\n",
+ "model = pipeline.fit(data)\n",
+ "result = model.transform(data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {
- "id": "NY5PvPq9YxPv"
- },
- "outputs": [],
- "source": [
- "from sparknlp.annotator import *\n",
- "\n",
- "# All these params should be identical to the original ONNX model\n",
- "camembert = CamemBertEmbeddings.loadSavedModel(f\"{EXPORT_PATH}\", spark)\\\n",
- " .setInputCols([\"document\",'token'])\\\n",
- " .setOutputCol(\"camembert\")\\\n",
- " .setCaseSensitive(True)\\\n",
- " .setDimension(768)\\\n",
- " .setStorageRef('camembert_base')"
- ]
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "+--------------------+\n",
+ "| embeddings|\n",
+ "+--------------------+\n",
+ "|[-0.049330253, 0....|\n",
+ "|[0.003116008, 0.1...|\n",
+ "|[-0.021312904, -0...|\n",
+ "|[0.046165787, 0.0...|\n",
+ "|[0.09459148, 0.07...|\n",
+ "|[0.071022525, 0.2...|\n",
+ "|[0.08610784, -0.3...|\n",
+ "|[0.20012067, 0.49...|\n",
+ "|[0.10958594, -0.0...|\n",
+ "|[0.19859709, 0.09...|\n",
+ "|[0.09361851, 0.21...|\n",
+ "|[0.12071304, 0.41...|\n",
+ "|[0.12088075, 0.41...|\n",
+ "|[0.034318373, -0....|\n",
+ "|[0.02465238, 0.16...|\n",
+ "|[-0.019737713, 0....|\n",
+ "|[0.08724952, -0.0...|\n",
+ "|[-0.02866838, 0.2...|\n",
+ "|[-0.047727797, 0....|\n",
+ "|[0.07970655, -0.0...|\n",
+ "+--------------------+\n",
+ "only showing top 20 rows\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "result.selectExpr(\"explode(camembert.embeddings) as embeddings\").show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "That's it! You can now go wild and use hundreds of CamemBERT models from HuggingFace 🤗 in Spark NLP 🚀\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "gpuType": "T4",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "0ad83b11b2f94abb84c84a7029356683": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "QzK06tJbYxPw"
- },
- "source": [
- "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
- ]
+ "0b8eb9f25faa4993802ed33f05eaf91a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e1c692cb85b64ae6999e2febcb92e556",
+ "placeholder": "",
+ "style": "IPY_MODEL_932203a42244430f903ef45fc3d70f76",
+ "value": " 811k/811k [00:00<00:00, 4.83MB/s]"
+ }
},
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {
- "id": "TkLhBJTaYxPw"
- },
- "outputs": [],
- "source": [
- "camembert.write().overwrite().save(f\"{MODEL_NAME}_spark_nlp\")"
- ]
+ "0f08885250af4f419b14dab6df6932f7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "olWYb2EoYxPw"
- },
- "source": [
- "Let's clean up stuff we don't need anymore"
- ]
+ "11907ede30534508a2793241e515a818": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {
- "id": "kVUjfeKoYxPw"
- },
- "outputs": [],
- "source": [
- "!rm -rf {EXPORT_PATH}"
- ]
+ "1696e83714ed4c5ca3231eabad56665a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "1-Yh1zoJYxPw"
- },
- "source": [
- "Awesome 😎 !\n",
- "\n",
- "This is your ONNX CamemBERT model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀"
- ]
+ "18b50db31cdf49b1ab28e045e71ed1d8": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {
- "id": "qLlgnpPiYxPw",
- "outputId": "74867d22-b90b-406e-8f5a-294de2a6063c",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "total 430920\n",
- "-rw-r--r-- 1 root root 440439641 Mar 1 01:05 camembert_onnx\n",
- "-rw-r--r-- 1 root root 810912 Mar 1 01:05 camembert_spp\n",
- "drwxr-xr-x 2 root root 4096 Mar 1 01:05 metadata\n"
- ]
- }
+ "19403be31a184b3d96a4e92c2c00140e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "1991b5c440194f9a8729a01d382af5c3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "1c02b6ac167543109981ef1908a2ce8f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "20bb65ce09c440658155d6f1f2590a09": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_c2d1126afc674192a56f52668380ab94",
+ "IPY_MODEL_3ca16338e2c14772a57d0c1e5de2668f",
+ "IPY_MODEL_ebc68a01a2354c55953779f328523c1b"
],
- "source": [
- "! ls -l {MODEL_NAME}_spark_nlp"
- ]
+ "layout": "IPY_MODEL_e7a68b8fdfbd41999a3cec53c187f1ef"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "aJCkVxTxYxPx"
- },
- "source": [
- "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny CamemBERT model 😊"
- ]
+ "247ea5e1983b4c3885408e364a3ccdc2": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {
- "id": "6DQmw6ySYxPx"
- },
- "outputs": [],
- "source": [
- "import sparknlp\n",
- "\n",
- "from sparknlp.base import *\n",
- "from sparknlp.annotator import *\n",
- "\n",
- "document_assembler = DocumentAssembler()\\\n",
- " .setInputCol(\"text\")\\\n",
- " .setOutputCol(\"document\")\n",
- "\n",
- "tokenizer = Tokenizer()\\\n",
- " .setInputCols([\"document\"])\\\n",
- " .setOutputCol(\"token\")\n",
- "\n",
- "camembert_loaded = CamemBertEmbeddings.load(f\"{MODEL_NAME}_spark_nlp\")\\\n",
- " .setInputCols([\"document\",'token'])\\\n",
- " .setOutputCol(\"camembert\")\\\n",
- "\n",
- "pipeline = Pipeline(\n",
- " stages = [\n",
- " document_assembler,\n",
- " tokenizer,\n",
- " camembert_loaded\n",
- " ])\n",
- "\n",
- "data = spark.createDataFrame([['William Henry Gates III (born October 28, 1955) is an American business magnate, software developer, investor,and philanthropist.']]).toDF(\"text\")\n",
- "model = pipeline.fit(data)\n",
- "result = model.transform(data)"
- ]
+ "26cba613ef664b97a445b206887f231b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0ad83b11b2f94abb84c84a7029356683",
+ "max": 810912,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_11907ede30534508a2793241e515a818",
+ "value": 810912
+ }
},
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {
- "id": "X7cKqpDCYxPx",
- "outputId": "54e87d8e-0ab4-4e7b-be30-e947f1e98992",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "+--------------------+\n",
- "| embeddings|\n",
- "+--------------------+\n",
- "|[-0.049330253, 0....|\n",
- "|[0.003116008, 0.1...|\n",
- "|[-0.021312904, -0...|\n",
- "|[0.046165787, 0.0...|\n",
- "|[0.09459148, 0.07...|\n",
- "|[0.071022525, 0.2...|\n",
- "|[0.08610784, -0.3...|\n",
- "|[0.20012067, 0.49...|\n",
- "|[0.10958594, -0.0...|\n",
- "|[0.19859709, 0.09...|\n",
- "|[0.09361851, 0.21...|\n",
- "|[0.12071304, 0.41...|\n",
- "|[0.12088075, 0.41...|\n",
- "|[0.034318373, -0....|\n",
- "|[0.02465238, 0.16...|\n",
- "|[-0.019737713, 0....|\n",
- "|[0.08724952, -0.0...|\n",
- "|[-0.02866838, 0.2...|\n",
- "|[-0.047727797, 0....|\n",
- "|[0.07970655, -0.0...|\n",
- "+--------------------+\n",
- "only showing top 20 rows\n",
- "\n"
- ]
- }
+ "2c16c911327041379eae3410825174ce": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2f9b57a9d24848b4ac7d3d254a445251": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_be26390927f246348533f0704c18fe60",
+ "IPY_MODEL_47bd91449e67415cbbbca16e9f9de31f",
+ "IPY_MODEL_b57f460a026541c28ab83c06c0d047f8"
],
- "source": [
- "result.selectExpr(\"explode(camembert.embeddings) as embeddings\").show()"
- ]
+ "layout": "IPY_MODEL_ed1cc1cce2d84c10babf49a9239028f5"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "0HO06arnYxPx"
- },
- "source": [
- "That's it! You can now go wild and use hundreds of CamemBERT models from HuggingFace 🤗 in Spark NLP 🚀\n"
- ]
- }
- ],
- "metadata": {
- "colab": {
- "provenance": [],
- "gpuType": "T4"
+ "377ca31e8f0c4cfe8d027e73f1908612": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_7f4455090e7746f5b4be7b3ac8b3b589",
+ "max": 508,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_1696e83714ed4c5ca3231eabad56665a",
+ "value": 508
+ }
},
- "kernelspec": {
- "display_name": "Python 3",
- "name": "python3"
+ "3ca16338e2c14772a57d0c1e5de2668f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_7984e7fac83e4001815c440c7b7919ad",
+ "max": 445008750,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_1991b5c440194f9a8729a01d382af5c3",
+ "value": 445008750
+ }
},
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.12"
+ "3d0b2b4f4dce4697811f861bc0ea2df1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "453d1e3c486f4b9aa7e2d9acecf4176b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_ed985a8db8234b90917695713da4a13a",
+ "placeholder": "",
+ "style": "IPY_MODEL_8816bd1505374989a4b8135ead3b5abf",
+ "value": "config.json: 100%"
+ }
+ },
+ "477fd05f4c8f49fdb4002d47582f6313": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_d78a70686e9348aa9c8a654c352915f4",
+ "IPY_MODEL_26cba613ef664b97a445b206887f231b",
+ "IPY_MODEL_0b8eb9f25faa4993802ed33f05eaf91a"
+ ],
+ "layout": "IPY_MODEL_97a5876e545b4c24b7498683bb5876b5"
+ }
+ },
+ "47bd91449e67415cbbbca16e9f9de31f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_247ea5e1983b4c3885408e364a3ccdc2",
+ "max": 1395301,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_dddfc0e2eadc494aa0412e67f894730b",
+ "value": 1395301
+ }
+ },
+ "4a8d4f7ef796473a91c5bb1f7b0db38c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_bf6c1dbba58b41448a3350c8967fabfc",
+ "IPY_MODEL_8d52a81376024e90a0d4bfef55285ca9",
+ "IPY_MODEL_8c8a79b44687450f95884ea9a809d6e4"
+ ],
+ "layout": "IPY_MODEL_cd091521a65c47d19bd40d45b1bf4895"
+ }
},
- "accelerator": "GPU",
- "widgets": {
- "application/vnd.jupyter.widget-state+json": {
- "566c95aa32904d73923ed7e93d99d373": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_453d1e3c486f4b9aa7e2d9acecf4176b",
- "IPY_MODEL_377ca31e8f0c4cfe8d027e73f1908612",
- "IPY_MODEL_a1f8086b1b4d40cbac31618adffb1b11"
- ],
- "layout": "IPY_MODEL_2c16c911327041379eae3410825174ce"
- }
- },
- "453d1e3c486f4b9aa7e2d9acecf4176b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_ed985a8db8234b90917695713da4a13a",
- "placeholder": "",
- "style": "IPY_MODEL_8816bd1505374989a4b8135ead3b5abf",
- "value": "config.json: 100%"
- }
- },
- "377ca31e8f0c4cfe8d027e73f1908612": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_7f4455090e7746f5b4be7b3ac8b3b589",
- "max": 508,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_1696e83714ed4c5ca3231eabad56665a",
- "value": 508
- }
- },
- "a1f8086b1b4d40cbac31618adffb1b11": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_d95e1ea826cd4edbac988692aba618b1",
- "placeholder": "",
- "style": "IPY_MODEL_6c33e478484d4866b6b4d44bc1af9c3b",
- "value": " 508/508 [00:00<00:00, 34.9kB/s]"
- }
- },
- "2c16c911327041379eae3410825174ce": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "ed985a8db8234b90917695713da4a13a": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "8816bd1505374989a4b8135ead3b5abf": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "7f4455090e7746f5b4be7b3ac8b3b589": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "1696e83714ed4c5ca3231eabad56665a": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "d95e1ea826cd4edbac988692aba618b1": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "6c33e478484d4866b6b4d44bc1af9c3b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "20bb65ce09c440658155d6f1f2590a09": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_c2d1126afc674192a56f52668380ab94",
- "IPY_MODEL_3ca16338e2c14772a57d0c1e5de2668f",
- "IPY_MODEL_ebc68a01a2354c55953779f328523c1b"
- ],
- "layout": "IPY_MODEL_e7a68b8fdfbd41999a3cec53c187f1ef"
- }
- },
- "c2d1126afc674192a56f52668380ab94": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_9d84925c75e546a683a580e58d8e763f",
- "placeholder": "",
- "style": "IPY_MODEL_3d0b2b4f4dce4697811f861bc0ea2df1",
- "value": "model.safetensors: 100%"
- }
- },
- "3ca16338e2c14772a57d0c1e5de2668f": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_7984e7fac83e4001815c440c7b7919ad",
- "max": 445008750,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_1991b5c440194f9a8729a01d382af5c3",
- "value": 445008750
- }
- },
- "ebc68a01a2354c55953779f328523c1b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_85ede09e39074fd3a9bf08c6bc68b753",
- "placeholder": "",
- "style": "IPY_MODEL_d676d382842d4784add17dfca06348a2",
- "value": " 445M/445M [00:02<00:00, 237MB/s]"
- }
- },
- "e7a68b8fdfbd41999a3cec53c187f1ef": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "9d84925c75e546a683a580e58d8e763f": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "3d0b2b4f4dce4697811f861bc0ea2df1": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "7984e7fac83e4001815c440c7b7919ad": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "1991b5c440194f9a8729a01d382af5c3": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "85ede09e39074fd3a9bf08c6bc68b753": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "d676d382842d4784add17dfca06348a2": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "4a8d4f7ef796473a91c5bb1f7b0db38c": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_bf6c1dbba58b41448a3350c8967fabfc",
- "IPY_MODEL_8d52a81376024e90a0d4bfef55285ca9",
- "IPY_MODEL_8c8a79b44687450f95884ea9a809d6e4"
- ],
- "layout": "IPY_MODEL_cd091521a65c47d19bd40d45b1bf4895"
- }
- },
- "bf6c1dbba58b41448a3350c8967fabfc": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_1c02b6ac167543109981ef1908a2ce8f",
- "placeholder": "",
- "style": "IPY_MODEL_6dc758d8cb124f96873fac98d745a2f2",
- "value": "tokenizer_config.json: 100%"
- }
- },
- "8d52a81376024e90a0d4bfef55285ca9": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_18b50db31cdf49b1ab28e045e71ed1d8",
- "max": 25,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_d0ce9537c09b4321a08d71164049e753",
- "value": 25
- }
- },
- "8c8a79b44687450f95884ea9a809d6e4": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_6fc8723befd549b6aabe9a84da74b04a",
- "placeholder": "",
- "style": "IPY_MODEL_ab012f044d614acba8d1b2330ecae31d",
- "value": " 25.0/25.0 [00:00<00:00, 127B/s]"
- }
- },
- "cd091521a65c47d19bd40d45b1bf4895": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "1c02b6ac167543109981ef1908a2ce8f": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "6dc758d8cb124f96873fac98d745a2f2": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "18b50db31cdf49b1ab28e045e71ed1d8": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "d0ce9537c09b4321a08d71164049e753": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "6fc8723befd549b6aabe9a84da74b04a": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "ab012f044d614acba8d1b2330ecae31d": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "477fd05f4c8f49fdb4002d47582f6313": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_d78a70686e9348aa9c8a654c352915f4",
- "IPY_MODEL_26cba613ef664b97a445b206887f231b",
- "IPY_MODEL_0b8eb9f25faa4993802ed33f05eaf91a"
- ],
- "layout": "IPY_MODEL_97a5876e545b4c24b7498683bb5876b5"
- }
- },
- "d78a70686e9348aa9c8a654c352915f4": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_69610c818dfa442c86f9af8a04e6019f",
- "placeholder": "",
- "style": "IPY_MODEL_c6ecfecbc45144c385ce89057e86e627",
- "value": "sentencepiece.bpe.model: 100%"
- }
- },
- "26cba613ef664b97a445b206887f231b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_0ad83b11b2f94abb84c84a7029356683",
- "max": 810912,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_11907ede30534508a2793241e515a818",
- "value": 810912
- }
- },
- "0b8eb9f25faa4993802ed33f05eaf91a": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_e1c692cb85b64ae6999e2febcb92e556",
- "placeholder": "",
- "style": "IPY_MODEL_932203a42244430f903ef45fc3d70f76",
- "value": " 811k/811k [00:00<00:00, 4.83MB/s]"
- }
- },
- "97a5876e545b4c24b7498683bb5876b5": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "69610c818dfa442c86f9af8a04e6019f": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "c6ecfecbc45144c385ce89057e86e627": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "0ad83b11b2f94abb84c84a7029356683": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "11907ede30534508a2793241e515a818": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "e1c692cb85b64ae6999e2febcb92e556": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "932203a42244430f903ef45fc3d70f76": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "2f9b57a9d24848b4ac7d3d254a445251": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_be26390927f246348533f0704c18fe60",
- "IPY_MODEL_47bd91449e67415cbbbca16e9f9de31f",
- "IPY_MODEL_b57f460a026541c28ab83c06c0d047f8"
- ],
- "layout": "IPY_MODEL_ed1cc1cce2d84c10babf49a9239028f5"
- }
- },
- "be26390927f246348533f0704c18fe60": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_7ce7766a45494859a5cdcb668994c3f6",
- "placeholder": "",
- "style": "IPY_MODEL_879960f4c4e34d07928275c812674dd6",
- "value": "tokenizer.json: 100%"
- }
- },
- "47bd91449e67415cbbbca16e9f9de31f": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_247ea5e1983b4c3885408e364a3ccdc2",
- "max": 1395301,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_dddfc0e2eadc494aa0412e67f894730b",
- "value": 1395301
- }
- },
- "b57f460a026541c28ab83c06c0d047f8": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_19403be31a184b3d96a4e92c2c00140e",
- "placeholder": "",
- "style": "IPY_MODEL_0f08885250af4f419b14dab6df6932f7",
- "value": " 1.40M/1.40M [00:00<00:00, 5.08MB/s]"
- }
- },
- "ed1cc1cce2d84c10babf49a9239028f5": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "7ce7766a45494859a5cdcb668994c3f6": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "879960f4c4e34d07928275c812674dd6": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "247ea5e1983b4c3885408e364a3ccdc2": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "dddfc0e2eadc494aa0412e67f894730b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "19403be31a184b3d96a4e92c2c00140e": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "0f08885250af4f419b14dab6df6932f7": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- }
- }
+ "566c95aa32904d73923ed7e93d99d373": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_453d1e3c486f4b9aa7e2d9acecf4176b",
+ "IPY_MODEL_377ca31e8f0c4cfe8d027e73f1908612",
+ "IPY_MODEL_a1f8086b1b4d40cbac31618adffb1b11"
+ ],
+ "layout": "IPY_MODEL_2c16c911327041379eae3410825174ce"
+ }
+ },
+ "69610c818dfa442c86f9af8a04e6019f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "6c33e478484d4866b6b4d44bc1af9c3b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "6dc758d8cb124f96873fac98d745a2f2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "6fc8723befd549b6aabe9a84da74b04a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "7984e7fac83e4001815c440c7b7919ad": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "7ce7766a45494859a5cdcb668994c3f6": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "7f4455090e7746f5b4be7b3ac8b3b589": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "85ede09e39074fd3a9bf08c6bc68b753": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "879960f4c4e34d07928275c812674dd6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "8816bd1505374989a4b8135ead3b5abf": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "8c8a79b44687450f95884ea9a809d6e4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_6fc8723befd549b6aabe9a84da74b04a",
+ "placeholder": "",
+ "style": "IPY_MODEL_ab012f044d614acba8d1b2330ecae31d",
+ "value": " 25.0/25.0 [00:00<00:00, 127B/s]"
+ }
+ },
+ "8d52a81376024e90a0d4bfef55285ca9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_18b50db31cdf49b1ab28e045e71ed1d8",
+ "max": 25,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_d0ce9537c09b4321a08d71164049e753",
+ "value": 25
+ }
+ },
+ "932203a42244430f903ef45fc3d70f76": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "97a5876e545b4c24b7498683bb5876b5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "9d84925c75e546a683a580e58d8e763f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a1f8086b1b4d40cbac31618adffb1b11": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d95e1ea826cd4edbac988692aba618b1",
+ "placeholder": "",
+ "style": "IPY_MODEL_6c33e478484d4866b6b4d44bc1af9c3b",
+ "value": " 508/508 [00:00<00:00, 34.9kB/s]"
+ }
+ },
+ "ab012f044d614acba8d1b2330ecae31d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "b57f460a026541c28ab83c06c0d047f8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_19403be31a184b3d96a4e92c2c00140e",
+ "placeholder": "",
+ "style": "IPY_MODEL_0f08885250af4f419b14dab6df6932f7",
+ "value": " 1.40M/1.40M [00:00<00:00, 5.08MB/s]"
+ }
+ },
+ "be26390927f246348533f0704c18fe60": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_7ce7766a45494859a5cdcb668994c3f6",
+ "placeholder": "",
+ "style": "IPY_MODEL_879960f4c4e34d07928275c812674dd6",
+ "value": "tokenizer.json: 100%"
+ }
+ },
+ "bf6c1dbba58b41448a3350c8967fabfc": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_1c02b6ac167543109981ef1908a2ce8f",
+ "placeholder": "",
+ "style": "IPY_MODEL_6dc758d8cb124f96873fac98d745a2f2",
+ "value": "tokenizer_config.json: 100%"
+ }
+ },
+ "c2d1126afc674192a56f52668380ab94": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_9d84925c75e546a683a580e58d8e763f",
+ "placeholder": "",
+ "style": "IPY_MODEL_3d0b2b4f4dce4697811f861bc0ea2df1",
+ "value": "model.safetensors: 100%"
+ }
+ },
+ "c6ecfecbc45144c385ce89057e86e627": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "cd091521a65c47d19bd40d45b1bf4895": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d0ce9537c09b4321a08d71164049e753": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "d676d382842d4784add17dfca06348a2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "d78a70686e9348aa9c8a654c352915f4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_69610c818dfa442c86f9af8a04e6019f",
+ "placeholder": "",
+ "style": "IPY_MODEL_c6ecfecbc45144c385ce89057e86e627",
+ "value": "sentencepiece.bpe.model: 100%"
+ }
+ },
+ "d95e1ea826cd4edbac988692aba618b1": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "dddfc0e2eadc494aa0412e67f894730b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "e1c692cb85b64ae6999e2febcb92e556": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e7a68b8fdfbd41999a3cec53c187f1ef": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ebc68a01a2354c55953779f328523c1b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_85ede09e39074fd3a9bf08c6bc68b753",
+ "placeholder": "",
+ "style": "IPY_MODEL_d676d382842d4784add17dfca06348a2",
+ "value": " 445M/445M [00:02<00:00, 237MB/s]"
+ }
+ },
+ "ed1cc1cce2d84c10babf49a9239028f5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ed985a8db8234b90917695713da4a13a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
}
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
\ No newline at end of file
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_CamemBertForQuestionAnswering.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_CamemBertForQuestionAnswering.ipynb
index 0aa530ff44ed50..68b32b12391e99 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_CamemBertForQuestionAnswering.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_CamemBertForQuestionAnswering.ipynb
@@ -2,9 +2,7 @@
"cells": [
{
"cell_type": "markdown",
- "metadata": {
- "id": "vfU3Ee88cwGj"
- },
+ "metadata": {},
"source": [
"![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
"\n",
@@ -13,9 +11,7 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "fM_4ix0mcwGm"
- },
+ "metadata": {},
"source": [
"## Import ONNX CamemBertForQuestionAnswering models from HuggingFace 🤗 into Spark NLP 🚀\n",
"\n",
@@ -30,34 +26,24 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "EVzmVKX8cwGn"
- },
+ "metadata": {},
"source": [
"## Export and Save HuggingFace model"
]
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "WDSalCHsd9-z"
- },
+ "metadata": {},
"source": [
"- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n",
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n",
"- CamembertTokenizer requires the `SentencePiece` library, so we install that as well"
]
},
{
"cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "qSx09sNyegma",
- "outputId": "c12f53f3-970e-40b8-c092-e67153a21a7a"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -130,14 +116,12 @@
}
],
"source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum sentencepiece tensorflow"
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum sentencepiece tensorflow"
]
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "uFkFe1YUewJR"
- },
+ "metadata": {},
"source": [
"- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
"- We'll use [illuin/camembert-base-fquad](https://huggingface.co/illuin/camembert-base-fquad) model from HuggingFace as an example and load it as a `ORTModelForQuestionAnswering`, representing an ONNX model."
@@ -145,72 +129,8 @@
},
{
"cell_type": "code",
- "execution_count": 4,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 281,
- "referenced_widgets": [
- "60824b807eb34823970f0afe669fab77",
- "579cc0e3f0b74666b269eba1095fd54a",
- "cc4728272f11453ca0c03d2c3840ac0d",
- "2e012432b76d46bca6dc05df6e1f78d1",
- "1164fd1351b741dead98629581dbceea",
- "bf5e3cf99e994011aea18983e0794770",
- "b1c76170057e409f811eb164f74b991e",
- "2cd9b87a2a4b4562b29cea3938d7b60a",
- "3fa71888d6a84f7998ea340e0984aea4",
- "ea092b510afa465192742a0baf2ed85e",
- "c5dbc3bb275e419f8209451e521efad6",
- "b394db14253a4f0c8d5611698cf427b7",
- "83bebd8cf8f04a63a58d89fe8a09954e",
- "2466e7029af64b1c9bedd7ce35163899",
- "674cfd9ff77d4147bd56978528448e90",
- "0e2fa741d4744549be91d54239a056db",
- "9e09160e16ec4b5fbf57af47ecf30c01",
- "76261617f60340a58bd7db1ee6889b54",
- "f376a02795b343ffb2f1045b414d8bdb",
- "04c11c2902274294b9bdf935912013a0",
- "e2caa4055a114b00ac0136a5e52cc517",
- "8b94d0f7b9404cb5ac7d37dd6989d1fe",
- "0fd98d8510ab4a05bdddac8727f9a618",
- "93e4f9fb19a34c95a189871d94f8b845",
- "b9957383a64b46b7bff6762bb21cda34",
- "f473d9d2086742b38e24daf86d839827",
- "14bf9182226d4663b917ee435b8014d2",
- "c168bda2ae104316b404bfc532265780",
- "0d42197f53404e98b4cc1536842e3e04",
- "4d34bf4e3e334d65b908c6cfc0e3b6c2",
- "077e7918ec344c60b3041714077f7e76",
- "eda670714b574ccb81f1614edd771cc4",
- "b0535795047546de90a690a7467a35b3",
- "5186a7123c064af58f948eeca9249972",
- "e3230d96b9e1458eb4abcad151da533b",
- "47ea1c464c7f4e088025238f9af6d462",
- "0595b9a3e25b4c97a204c8024da0bf28",
- "1f1f591e62f94390908fb4660a97de0a",
- "7363cd559e2947dbba473f13fbb2e282",
- "78a2a618f96b4b27886a969ed8749bcc",
- "895b5df6613f4d7db780ba93307bf6fd",
- "92170245c9984fa78a4c0be174345aec",
- "f9dd34dd913742a2ac469207964d40a8",
- "eb1a55e3c53749db853fd8465634766c",
- "7224240d590a460b9e7396e783eb17bb",
- "6c33510efcf949b4bd0f0b7c68901da9",
- "c891ff3fd87e4c1ba700c234e76f239d",
- "dabe0a4be0204c989b582a531c56c439",
- "c8e7e8365da44f8fbe15057040b8d6f9",
- "0db6c31436ff403d8a44e197ac435b38",
- "937af17f98d24828a022d92106e4d901",
- "a410f3289fb046a6a4d33422892bedf1",
- "94b04d2fba604d6f9d09001778242f48",
- "6ec5e976fb0d473ebe21ed6ea312714a",
- "3982f5c5417e47259d51fb9cfa2d3224"
- ]
- },
- "id": "FtWcH9nycwGq",
- "outputId": "f4156389-8a42-4617-c3c7-1a729a42d5a3"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [
{
"data": {
@@ -316,23 +236,15 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "zwiHGrMzcwGr"
- },
+ "metadata": {},
"source": [
"Let's have a look inside these two directories and see what we are dealing with:"
]
},
{
"cell_type": "code",
- "execution_count": 5,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "DNv4wNUjcwGs",
- "outputId": "d3a4bd83-efd1-4bc2-c360-b6ed527f8cc8"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -354,19 +266,15 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "A729i3lKcwGt"
- },
+ "metadata": {},
"source": [
"- As you can see, we need to move `sentencepiece.bpe.model` from the tokenizer to `assets` folder which Spark NLP will look for"
]
},
{
"cell_type": "code",
- "execution_count": 6,
- "metadata": {
- "id": "YeAxCPRucwGs"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
"source": [
"!mkdir {ONNX_MODEL}/assets"
@@ -374,10 +282,8 @@
},
{
"cell_type": "code",
- "execution_count": 7,
- "metadata": {
- "id": "iGYjyVVscwGt"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
"source": [
"!mv {ONNX_MODEL}/sentencepiece.bpe.model {ONNX_MODEL}/assets"
@@ -385,23 +291,15 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "jDnAwQNzcwGt"
- },
+ "metadata": {},
"source": [
"Voila! We have our `spiece.model` inside assets directory"
]
},
{
"cell_type": "code",
- "execution_count": 8,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "I0bHNH4DcwGu",
- "outputId": "c504743a-c117-4cbe-9592-d693647b3cba"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -428,18 +326,14 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "9YqU2MCUcwGu"
- },
+ "metadata": {},
"source": [
"## Import and Save CamemBertForQuestionAnswering in Spark NLP\n"
]
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "DY8O4gmNcwGu"
- },
+ "metadata": {},
"source": [
"- Let's install and setup Spark NLP in Google Colab\n",
"- This part is pretty easy via our simple script"
@@ -447,14 +341,8 @@
},
{
"cell_type": "code",
- "execution_count": 9,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "sf3trd1AcwGu",
- "outputId": "f052434f-b296-4fc8-82b7-460eae529c2c"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -476,23 +364,15 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "c20mfw2XcwGv"
- },
+ "metadata": {},
"source": [
"Let's start Spark with Spark NLP included via our simple `start()` function"
]
},
{
"cell_type": "code",
- "execution_count": 10,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "hpyY43a1cwGv",
- "outputId": "267e6909-8cea-4008-9acd-e8e05f41089f"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -512,9 +392,7 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "MphL0pwXcwGv"
- },
+ "metadata": {},
"source": [
"- Let's use `loadSavedModel` functon in `CamemBertForQuestionAnswering` which allows us to load TensorFlow model in SavedModel format\n",
"- Most params can be set later when you are loading this model in `CamemBertForQuestionAnswering` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n",
@@ -524,10 +402,8 @@
},
{
"cell_type": "code",
- "execution_count": 12,
- "metadata": {
- "id": "EYtqFJdicwGv"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
"source": [
"from sparknlp.annotator import *\n",
@@ -545,19 +421,15 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "onlY3f49cwGw"
- },
+ "metadata": {},
"source": [
"- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
]
},
{
"cell_type": "code",
- "execution_count": 13,
- "metadata": {
- "id": "r8acHJGIcwGw"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
"source": [
"spanClassifier.write().overwrite().save(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))"
@@ -565,19 +437,15 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "zDktkvATcwGw"
- },
+ "metadata": {},
"source": [
"Let's clean up stuff we don't need anymore"
]
},
{
"cell_type": "code",
- "execution_count": 14,
- "metadata": {
- "id": "QcB2MXA_cwGx"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
"source": [
"!rm -rf {ONNX_MODEL}"
@@ -585,9 +453,7 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "Z1onLYQQcwGx"
- },
+ "metadata": {},
"source": [
"Awesome 😎 !\n",
"\n",
@@ -596,14 +462,8 @@
},
{
"cell_type": "code",
- "execution_count": 15,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "lBGyJYtdcwGy",
- "outputId": "0d75b5aa-32cc-4751-bca7-29573c8143fc"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -622,23 +482,15 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "dnrCzOhBcwGy"
- },
+ "metadata": {},
"source": [
"Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny CamemBertForQuestionAnswering model in Spark NLP 🚀 pipeline!"
]
},
{
"cell_type": "code",
- "execution_count": 23,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "6pRIq0DycwGz",
- "outputId": "35b44310-ee12-4c28-8b2c-c7d4af52ca72"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -670,7 +522,7 @@
"context = \"Mon nom est Wolfgang et je vis à Berlin\"\n",
"question = \"Où est-ce que je vis?\"\n",
"\n",
- "example = spark.createDataFrame([[question, contexilluin/camembert-base-fquadt]]).toDF(\"question\", \"context\")\n",
+ "example = spark.createDataFrame([[question, context]]).toDF(\"question\", \"context\")\n",
"result = pipeline.fit(example).transform(example)\n",
"\n",
"result.select(\"question\", \"answer.result\").show(truncate=False)"
@@ -678,9 +530,7 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "tpTcy12BcwGz"
- },
+ "metadata": {},
"source": [
"That's it! You can now go wild and use hundreds of `CamemBertForQuestionAnswering` models from HuggingFace 🤗 in Spark NLP 🚀\n"
]
@@ -706,8 +556,7 @@
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.12"
+ "pygments_lexer": "ipython3"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_CamemBertForSequenceClassification.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_CamemBertForSequenceClassification.ipynb
index aebc1ead26991d..4fb7a0de126606 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_CamemBertForSequenceClassification.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_CamemBertForSequenceClassification.ipynb
@@ -2,20 +2,16 @@
"cells": [
{
"cell_type": "markdown",
- "metadata": {
- "id": "Kz2JVabcBeK6"
- },
+ "metadata": {},
"source": [
"![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
"\n",
- "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_AlbertForSequenceClassification.ipynb)"
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_CamemBertForSequenceClassification.ipynb)"
]
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "LGmftFTVBeK_"
- },
+ "metadata": {},
"source": [
"## Import ONNX CamemBertForSequenceClassification models from HuggingFace 🤗 into Spark NLP 🚀\n",
"\n",
@@ -30,34 +26,24 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "C6h2JYiqBeLB"
- },
+ "metadata": {},
"source": [
"## Export and Save HuggingFace model"
]
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "VG7MbbfwBeLB"
- },
+ "metadata": {},
"source": [
"- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n",
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n",
"- CamembertTokenizer requires the `SentencePiece` library, so we install that as well"
]
},
{
"cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "O-v9Ox3RBeLC",
- "outputId": "5ab09375-e283-4110-98b8-d315adeabaed"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -126,14 +112,12 @@
}
],
"source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum sentencepiece tensorflow"
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum sentencepiece tensorflow"
]
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "vX94VKVqDBys"
- },
+ "metadata": {},
"source": [
"- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
"- We'll use [tblard/tf-allocine](https://huggingface.co/tblard/tf-allocine) model from HuggingFace as an example and load it as a `ORTModelForSequenceClassification`, representing an ONNX model."
@@ -141,72 +125,8 @@
},
{
"cell_type": "code",
- "execution_count": 4,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 368,
- "referenced_widgets": [
- "1fc8a82b2cc049eebf6cf548c4d8bc31",
- "d38d929335154784a2f64c4c9a867bde",
- "6baf6b23f8b340fdb485a1822d3fbb17",
- "74779f1b332e4d87b4e74f54a45b061d",
- "f4ebd094b530442e8f801e86ac62e266",
- "f943daa5bd4c4477bc0b96ae7266d0ef",
- "19f767de7d6441a78112982a4b2e5689",
- "f8b33f71a72c4cdeafbb6dcb56fada3d",
- "7e1ba263493a42e5898c90dcfc76ea9a",
- "bf75a87261e548c0ab3a13ba713fcd3f",
- "312e8c30d9264f0799b251ca9b5ddaec",
- "8f61c2b8a66c4d57b232b75670077d16",
- "03f9cff31e0849328fbc0bcdae68504a",
- "b1febffea0fc4a29a1ddaa089b46f963",
- "b9ce748b6bca4a4fb87ee78d123cec4f",
- "fd60da5ac18443ba84fa1a3019d4ec17",
- "670125413ddd416dba9691a0eecd5bf2",
- "e1f215647df14777b76565f9e17b6b42",
- "6ee0d336f8cf4e54bc9b16b102a6f6a6",
- "d6b5087105694b1d9d31ca752d7a1a02",
- "5a1688724bf541df9a663b4e3fbb9c4a",
- "158a4e71807c4035898f571d090196c4",
- "eedcb59087aa463787ccfb431491612e",
- "03b2bf55225a4f3b90c58c6a2aebd8d6",
- "e03bab197a814f979d1f580da8cfd34f",
- "5af2961b3ba3480cb89f8ac474f017b6",
- "73ebf2da268a447ebc76b53a46efe7ae",
- "1db65061f028451585293514adcd15a0",
- "e0c9bff56942449aab8ebde097afe65a",
- "df6fbe6070e545fea98371b38b1a7968",
- "a855a3255f064400a72323cd7012f00e",
- "4940c4b39d1e4f1699f2a831310b92b3",
- "19f001d8e3be4674b582cbeaa6769ac1",
- "5e86f09721874d8db44266e8efdd756d",
- "941982def08449689fa10b9060df21dd",
- "04b9564aab874822883cd67bab2693da",
- "b649e106a52c45369c60f52dc29858fe",
- "971fce9704734bbcbff90ee730238a53",
- "a649d60a24c0459a8910631de2d4ba09",
- "21303552af4540beaefa93507328127d",
- "0aab68e6209743efbe733ab76d22d522",
- "f89e56bb9b174fc9886578897f29a129",
- "06507c6d96ce42139c5b07b6071fb187",
- "5e6208deee5e4ee891d2b79ee36c4b3c",
- "69ae18d4f80843889ed37540ee99f6ee",
- "8e73282bb7c7443dbb8b50fb291f7596",
- "abd0b36bd6714517bb480f8bbb212ed1",
- "8e76d740007c406fa93bfe1260ac73f4",
- "a3d2d1e0d5b84a158ed651509cd7f71e",
- "0f95972368cf4c9bafcbae7bd7e6af17",
- "9ba93d6ff0f64bb19bcb968b9a5d2786",
- "239ee55351684b15a86fc7b1216c084a",
- "f3f27505e47d4d609433fc39898daa01",
- "b6f5e8a982904d5e83e734d3efe3e9ca",
- "aa05663dbc2446358f023015125340e0"
- ]
- },
- "id": "1KO14D7FDYbV",
- "outputId": "96c0057a-d0f4-43c4-8cd0-f88f08b31a4d"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [
{
"data": {
@@ -323,23 +243,15 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "M9DSQEJgBeLE"
- },
+ "metadata": {},
"source": [
"Let's have a look inside these two directories and see what we are dealing with:"
]
},
{
"cell_type": "code",
- "execution_count": 5,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "eQMgq6JyBeLF",
- "outputId": "64ff36e5-91c1-4078-d616-1761300a106b"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -361,9 +273,7 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "j2vb2xNZBeLG"
- },
+ "metadata": {},
"source": [
"- As you can see, we need to move `sentencepiece.bpe.model` from the tokenizer to assets folder which Spark NLP will look for\n",
"- In addition to vocabs, we also need `labels` and their `ids` which is saved inside the model's config. We will save this inside `labels.txt`"
@@ -371,10 +281,8 @@
},
{
"cell_type": "code",
- "execution_count": 6,
- "metadata": {
- "id": "QCzfmhNwFg9J"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
"source": [
"!mkdir {ONNX_MODEL}/assets"
@@ -382,10 +290,8 @@
},
{
"cell_type": "code",
- "execution_count": 7,
- "metadata": {
- "id": "8762MVaBBeLG"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
"source": [
"# get label2id dictionary\n",
@@ -399,10 +305,8 @@
},
{
"cell_type": "code",
- "execution_count": 8,
- "metadata": {
- "id": "aDlALBQrBeLG"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
"source": [
"!mv {ONNX_MODEL}/sentencepiece.bpe.model {ONNX_MODEL}/assets"
@@ -410,23 +314,15 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "96PWQ6MYBeLG"
- },
+ "metadata": {},
"source": [
"Voila! We have our `sentencepiece.bpe.model` and `labels.txt` inside assets directory"
]
},
{
"cell_type": "code",
- "execution_count": 9,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "onqDAch3BeLH",
- "outputId": "17b0d23f-6653-4610-8145-5fbfd229d714"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -454,18 +350,14 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "HNqY0JX7BeLH"
- },
+ "metadata": {},
"source": [
"## Import and Save CamemBertForSequenceClassification in Spark NLP\n"
]
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "KBftz1gHBeLH"
- },
+ "metadata": {},
"source": [
"- Let's install and setup Spark NLP in Google Colab\n",
"- This part is pretty easy via our simple script"
@@ -473,14 +365,8 @@
},
{
"cell_type": "code",
- "execution_count": 10,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "CNsH1BkpBeLH",
- "outputId": "50ba7045-a29e-4501-b2c7-4fcd2894c6b5"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -518,23 +404,15 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "1GFZq_URBeLI"
- },
+ "metadata": {},
"source": [
"Let's start Spark with Spark NLP included via our simple `start()` function"
]
},
{
"cell_type": "code",
- "execution_count": 11,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "5Ti3X-BJBeLI",
- "outputId": "c4fef397-1747-4def-e3c8-f84d88b4840d"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -554,9 +432,7 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "4RPu9eDfBeLI"
- },
+ "metadata": {},
"source": [
"- Let's use `loadSavedModel` functon in `CamemBertForSequenceClassification` which allows us to load TensorFlow model in SavedModel format\n",
"- Most params can be set later when you are loading this model in `CamemBertForSequenceClassification` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n",
@@ -566,10 +442,8 @@
},
{
"cell_type": "code",
- "execution_count": 12,
- "metadata": {
- "id": "gFIeDIyVBeLI"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
"source": [
"from sparknlp.annotator import *\n",
@@ -586,19 +460,15 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "iDgUKFgyBeLI"
- },
+ "metadata": {},
"source": [
"- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
]
},
{
"cell_type": "code",
- "execution_count": 13,
- "metadata": {
- "id": "x5wvcWXrBeLI"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
"source": [
"sequenceClassifier.write().overwrite().save(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))"
@@ -606,19 +476,15 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "vSjvf8woBeLI"
- },
+ "metadata": {},
"source": [
"Let's clean up stuff we don't need anymore"
]
},
{
"cell_type": "code",
- "execution_count": 14,
- "metadata": {
- "id": "Blq6dSf1BeLJ"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
"source": [
"!rm -rf {ONNX_MODEL}"
@@ -626,9 +492,7 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "NR0Dge9gBeLJ"
- },
+ "metadata": {},
"source": [
"Awesome 😎 !\n",
"\n",
@@ -637,14 +501,8 @@
},
{
"cell_type": "code",
- "execution_count": 15,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "xKvpIhytBeLJ",
- "outputId": "46a0c1ff-c423-483a-d97d-fa66080a055c"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -664,19 +522,15 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "GylPMYsEBeLJ"
- },
+ "metadata": {},
"source": [
"Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny CamemBertForSequenceClassification model 😊"
]
},
{
"cell_type": "code",
- "execution_count": 16,
- "metadata": {
- "id": "PeY7xqh7BeLJ"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
"source": [
"sequenceClassifier_loaded = CamemBertForSequenceClassification.load(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))\\\n",
@@ -686,23 +540,15 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "ZRNL6MuYBeLJ"
- },
+ "metadata": {},
"source": [
"You can see what labels were used to train this model via `getClasses` function:"
]
},
{
"cell_type": "code",
- "execution_count": 17,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "wkYTgXO5BeLJ",
- "outputId": "d85f21b9-b545-4033-a67c-85b3fef033ea"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [
{
"data": {
@@ -710,7 +556,7 @@
"['NEGATIVE', 'POSITIVE']"
]
},
- "execution_count": 17,
+ "execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@@ -722,23 +568,15 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "1ROrCoh7BeLK"
- },
+ "metadata": {},
"source": [
"This is how you can use your loaded classifier model in Spark NLP 🚀 pipeline:"
]
},
{
"cell_type": "code",
- "execution_count": 18,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "752d8NgmBeLK",
- "outputId": "869ea71a-88fe-45bc-890b-7600c1d4c285"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -785,9 +623,7 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "jdXhBn3wBeLK"
- },
+ "metadata": {},
"source": [
"That's it! You can now go wild and use hundreds of `CamemBertForSequenceClassification` models from HuggingFace 🤗 in Spark NLP 🚀\n"
]
@@ -813,8 +649,7 @@
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.12"
+ "pygments_lexer": "ipython3"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_CamemBertForTokenClassification.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_CamemBertForTokenClassification.ipynb
index 89488034812dc1..89028457390477 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_CamemBertForTokenClassification.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_CamemBertForTokenClassification.ipynb
@@ -2,9 +2,7 @@
"cells": [
{
"cell_type": "markdown",
- "metadata": {
- "id": "t609_kwpJbwZ"
- },
+ "metadata": {},
"source": [
"![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
"\n",
@@ -13,9 +11,7 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "zAKzkwH5Jbwf"
- },
+ "metadata": {},
"source": [
"## Import ONNX CamemBertForTokenClassification models from HuggingFace 🤗 into Spark NLP 🚀\n",
"\n",
@@ -30,34 +26,24 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "4Wd_fyDVJbwg"
- },
+ "metadata": {},
"source": [
"## Export and Save HuggingFace model"
]
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "dyDoTKBbJbwg"
- },
+ "metadata": {},
"source": [
"- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases\n",
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases\n",
"- CamembertTokenizer requires the `SentencePiece` library, so we install that as well"
]
},
{
"cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "Mm0OMPsCJbwh",
- "outputId": "153cedd1-9f53-450f-d595-172914745937"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -126,14 +112,12 @@
}
],
"source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum sentencepiece tensorflow"
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum sentencepiece tensorflow"
]
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "MvbxodR1Jbwi"
- },
+ "metadata": {},
"source": [
"- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
"- We'll use [Jean-Baptiste/camembert-ner](https://huggingface.co/Jean-Baptiste/camembert-ner) model from HuggingFace as an example\n",
@@ -142,72 +126,8 @@
},
{
"cell_type": "code",
- "execution_count": 5,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 281,
- "referenced_widgets": [
- "2ba8ff842c074fab8fab3bc525260a11",
- "5e9677b9d95f4543b87e3fb4e09e6bde",
- "0b62c365f7704f46a9ef959ba529a159",
- "b73eb7c03f064876b4380e82cb8e583c",
- "ccc1c9742b1c484b9ea4ad616b71a87d",
- "e41b4406b5b349259880f92990a334b5",
- "ed3989d2c8dc4986ba7bb8a014f49c1a",
- "f5c2ec307a9b4ee6a39e9748e19eb5d5",
- "c55bf3c58b8a46e7abee8d7fea7a1561",
- "409a90ec548e462c80a97891ec686aa3",
- "dcc62c0294ef4334b3508984c22b5a70",
- "2a29fc0fffc2482487f63ab522ec35db",
- "39048167e0ad4015be7f8f51419206d9",
- "c777b6fec9f143d281ebe9ba4103d283",
- "82c60427d78a4b639ee203f9ee788683",
- "ab7bf1c3000048ff87797d068f1367a8",
- "edd7b3df09a84b0488369090591f656d",
- "aa825fbcab524208a21f35e33d058290",
- "9ac67d31c7ac450a87a5d0222c42e35c",
- "14368fb00c55452fa76ba6497e6d942e",
- "05772272612a4848a196233f6cf75811",
- "9e8b12bafe0c4fcf98fccdc8dfb1c3ed",
- "3d9433bb5f0c407eb11e9790fdefd8df",
- "b0324bc4bda04fb48a5a7b4c56fb9026",
- "edfb6a656ec644b8b76148003cea9f94",
- "688d5fa38a8d42cebefbc07fa6a45f47",
- "7cfb6ecb38834276afdd3822ab258dc7",
- "b3c59848de5b4a9d8ef10590dac6db43",
- "f2da19a7e20b47a98fc36a82946ce2ff",
- "a181ef0216ba49a19fc8520e04eb905d",
- "6e83b3a5aa1a43a6b98b99cae4661936",
- "fecfdac50bd6472e93eef245b1194670",
- "806acc042ca143759e47d2814e229759",
- "16c1c0ff48d54d61a8f669d945e95c5d",
- "6d474cbdef9749e8a57a6d36b8f47646",
- "660aa338eb314cc0843e470b209a80e4",
- "b3b40e6fa37b476c9053a1abeb6f5161",
- "b6055e80cd98444d9ee1d62a49888abf",
- "22450b273b9b4d19ae09b082f7e1f88f",
- "0893cfe3bf4a41fc8ea582ab795f10fa",
- "734fb73ad2794b54bc9ca3c8cae64d13",
- "6aee6131b2404ee2aeea75798a46b8b8",
- "ea23116c7bfc48a6942c826feb08f58b",
- "697717fe14444024bc24b23e41e31dbb",
- "e2d79e49240d42e4981874f04fb81f79",
- "c46d3e59f2b34fc88bbb66058b24d7ed",
- "b26578c8ddeb4ae7acd8a29fa3c66092",
- "b170b6bf9cc648f9b42e04502afd830d",
- "3e5bff8b13c44da8923242dd57faf147",
- "e6d35d4877984babb780f46b7b747c5a",
- "115c9ab451834e76a911aa51b1bffd06",
- "304abae09efb4f13b6a9be85e2439311",
- "ffcadb4b3181404c8b9ce05faaca3a96",
- "c9cf94eccc5545dea21618e802725ebe",
- "21f40edbe45948e5add865e8dcdd2fa6"
- ]
- },
- "id": "69dqor6oJbwj",
- "outputId": "dde2fba0-185e-45d8-dfd0-76662ee03e16"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [
{
"data": {
@@ -313,23 +233,15 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "b_izs6-IJbwj"
- },
+ "metadata": {},
"source": [
"Let's have a look inside these two directories and see what we are dealing with:"
]
},
{
"cell_type": "code",
- "execution_count": 6,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "MlDEX0RPJbwk",
- "outputId": "2d6c2275-8c48-41ab-e46d-b019d9aa2ab4"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -351,10 +263,8 @@
},
{
"cell_type": "code",
- "execution_count": 7,
- "metadata": {
- "id": "LlNnvSmEJbwl"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
"source": [
"!mkdir {ONNX_MODEL}/assets"
@@ -362,9 +272,7 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "MMgh4l6YJbwl"
- },
+ "metadata": {},
"source": [
"- As you can see, we need to move `spiece.model` from the tokenizer to assets folder which Spark NLP will look for\n",
"- We also need `labels` and their `ids` which is saved inside the model's config. We will save this inside `labels.txt`"
@@ -372,10 +280,8 @@
},
{
"cell_type": "code",
- "execution_count": 8,
- "metadata": {
- "id": "o9Iwtm4yJbwm"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
"source": [
"# get label2id dictionary\n",
@@ -389,10 +295,8 @@
},
{
"cell_type": "code",
- "execution_count": 9,
- "metadata": {
- "id": "uXyBoTy8RGXs"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
"source": [
"!mv {ONNX_MODEL}/sentencepiece.bpe.model {ONNX_MODEL}/assets"
@@ -400,23 +304,15 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "iVWGRQVeJbwm"
- },
+ "metadata": {},
"source": [
"Voila! We have our `sentencepiece.bpe.model` and `labels.txt` inside assets directory"
]
},
{
"cell_type": "code",
- "execution_count": 10,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "zS2XOFsgJbwm",
- "outputId": "093f2354-e749-4620-84cc-61470b61485d"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -444,18 +340,14 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "cDYX_LrGJbwm"
- },
+ "metadata": {},
"source": [
"## Import and Save CamemBertForTokenClassification in Spark NLP\n"
]
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "lDxYBOKvJbwn"
- },
+ "metadata": {},
"source": [
"- Let's install and setup Spark NLP in Google Colab\n",
"- This part is pretty easy via our simple script"
@@ -463,14 +355,8 @@
},
{
"cell_type": "code",
- "execution_count": 11,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "0BRm0BAtJbwn",
- "outputId": "0e86c28b-403a-43d4-c53a-4f157f5c2b7a"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -492,23 +378,15 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "V_4on1mZJbwn"
- },
+ "metadata": {},
"source": [
"Let's start Spark with Spark NLP included via our simple `start()` function"
]
},
{
"cell_type": "code",
- "execution_count": 12,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "EEgIAj-1Jbwo",
- "outputId": "8dbd34fc-f958-4601-8748-45f5874b56f4"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -528,9 +406,7 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "ny4my0lWJbwo"
- },
+ "metadata": {},
"source": [
"- Let's use `loadSavedModel` functon in `CamemBertForTokenClassification` which allows us to load TensorFlow model in SavedModel format\n",
"- Most params can be set later when you are loading this model in `CamemBertForTokenClassification` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n",
@@ -540,10 +416,8 @@
},
{
"cell_type": "code",
- "execution_count": 13,
- "metadata": {
- "id": "DfyIDo5YJbwo"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
"source": [
"from sparknlp.annotator import *\n",
@@ -561,19 +435,15 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "F1-FhOlmJbwo"
- },
+ "metadata": {},
"source": [
"- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
]
},
{
"cell_type": "code",
- "execution_count": 14,
- "metadata": {
- "id": "23ttd2FZJbwo"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
"source": [
"tokenClassifier.write().overwrite().save(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))"
@@ -581,19 +451,15 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "P0DfbHYqJbwo"
- },
+ "metadata": {},
"source": [
"Let's clean up stuff we don't need anymore"
]
},
{
"cell_type": "code",
- "execution_count": 15,
- "metadata": {
- "id": "fmAPpXxAJbwp"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
"source": [
"!rm -rf {ONNX_MODEL}"
@@ -601,9 +467,7 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "MWUOPc0ZJbwp"
- },
+ "metadata": {},
"source": [
"Awesome 😎 !\n",
"\n",
@@ -612,14 +476,8 @@
},
{
"cell_type": "code",
- "execution_count": 16,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "khdBEds6Jbwp",
- "outputId": "7305ca32-3b48-4900-ec0b-4166ee5d8a9c"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -639,19 +497,15 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "nY_AQ5wjJbwp"
- },
+ "metadata": {},
"source": [
"Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny CamemBertForTokenClassification model 😊"
]
},
{
"cell_type": "code",
- "execution_count": 17,
- "metadata": {
- "id": "TLaswe_-Jbwp"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
"source": [
"tokenClassifier_loaded = CamemBertForTokenClassification.load(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))\\\n",
@@ -661,23 +515,15 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "lLf8G5u-Jbwp"
- },
+ "metadata": {},
"source": [
"You can see what labels were used to train this model via `getClasses` function:"
]
},
{
"cell_type": "code",
- "execution_count": 18,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "ynmsq45nJbwq",
- "outputId": "68596e4a-ff57-46d9-8edf-95d849f2c9b6"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [
{
"data": {
@@ -685,7 +531,7 @@
"['I-ORG', 'I-MISC', 'I-LOC', 'I-PER', 'O']"
]
},
- "execution_count": 18,
+ "execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@@ -697,23 +543,15 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "iASxSDQXJbwq"
- },
+ "metadata": {},
"source": [
"This is how you can use your loaded classifier model in Spark NLP 🚀 pipeline:"
]
},
{
"cell_type": "code",
- "execution_count": 19,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "D8cNS4atJbwq",
- "outputId": "0fcfc224-dd1c-4bc2-c3bd-9a0ff84e4c4f"
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -757,9 +595,7 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "id": "DUPss2DdJbwq"
- },
+ "metadata": {},
"source": [
"That's it! You can now go wild and use hundreds of `CamemBertForTokenClassification` models from HuggingFace 🤗 in Spark NLP 🚀\n"
]
@@ -785,8 +621,7 @@
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.12"
+ "pygments_lexer": "ipython3"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DeBERTa.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DeBERTa.ipynb
index 64ea287fc9376f..bf087ea8342937 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DeBERTa.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DeBERTa.ipynb
@@ -28,7 +28,7 @@
"metadata": {},
"source": [
"- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."
]
},
{
@@ -70,7 +70,7 @@
}
],
"source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum"
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum"
]
},
{
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DeBertaForQuestionAnswering.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DeBertaForQuestionAnswering.ipynb
index 6dca7dc5b6644f..bd218a3a41eece 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DeBertaForQuestionAnswering.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DeBertaForQuestionAnswering.ipynb
@@ -1,3165 +1,2993 @@
{
- "cells": [
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
+ "\n",
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DeBertaForQuestionAnswering.ipynb)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Import ONNX DeBertaForQuestionAnswering models from HuggingFace 🤗 into Spark NLP 🚀\n",
+ "\n",
+ "Let's keep in mind a few things before we start 😊\n",
+ "\n",
+ "- ONNX support was introduced in `Spark NLP 5.0.0`, enabling high performance inference for models.\n",
+ "- `DeBertaForQuestionAnswering` is only available since in `Spark NLP 5.2.1` and after. So please make sure you have upgraded to the latest Spark NLP release\n",
+ "- You can import DeBerta models trained/fine-tuned for question answering via `DeBertaForQuestionAnswering` or `TFDeBertaForQuestionAnswering`. These models are usually under `Question Answering` category and have `DeBerta` in their labels\n",
+ "- Reference: [TFDeBertaForQuestionAnswering](https://huggingface.co/docs/transformers/model_doc/deberta#transformers.TFDebertaForQuestionAnswering)\n",
+ "- Some [example models](https://huggingface.co/models?filter=deberta&pipeline_tag=question-answering)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Export and Save HuggingFace model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n",
+ "- Albert uses SentencePiece, so we will have to install that as well"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "vizs6Bi9VdSl"
- },
- "source": [
- "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
- "\n",
- "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DeBertaForQuestionAnswering.ipynb)"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.1/7.1 MB\u001b[0m \u001b[31m22.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m403.3/403.3 kB\u001b[0m \u001b[31m18.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m588.3/588.3 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m68.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.7/455.7 kB\u001b[0m \u001b[31m30.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m65.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m20.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m72.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m64.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m108.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m439.2/439.2 kB\u001b[0m \u001b[31m41.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m507.1/507.1 kB\u001b[0m \u001b[31m48.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.7/15.7 MB\u001b[0m \u001b[31m87.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m106.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m781.3/781.3 kB\u001b[0m \u001b[31m54.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m73.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m12.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m18.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.8/83.8 kB\u001b[0m \u001b[31m11.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m47.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m48.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m451.2/451.2 kB\u001b[0m \u001b[31m48.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m93.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m29.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m94.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m108.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.1/13.1 MB\u001b[0m \u001b[31m93.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+ "pandas-gbq 0.19.2 requires google-auth-oauthlib>=0.7.0, but you have google-auth-oauthlib 0.4.6 which is incompatible.\n",
+ "tensorflow-datasets 4.9.4 requires protobuf>=3.20, but you have protobuf 3.19.6 which is incompatible.\n",
+ "tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 3.19.6 which is incompatible.\u001b[0m\u001b[31m\n",
+ "\u001b[0m"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum tensorflow==2.11.0"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
+ "- - We'll use [nbroad/deberta-v3-xsmall-squad2](https://huggingface.co/nbroad/deberta-v3-xsmall-squad2) model from HuggingFace as an example and load it as a `ORTModelForQuestionAnswering`, representing an ONNX model."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "mNs5zLPbVdSo"
- },
- "source": [
- "## Import ONNX DeBertaForQuestionAnswering models from HuggingFace 🤗 into Spark NLP 🚀\n",
- "\n",
- "Let's keep in mind a few things before we start 😊\n",
- "\n",
- "- ONNX support was introduced in `Spark NLP 5.0.0`, enabling high performance inference for models.\n",
- "- `DeBertaForQuestionAnswering` is only available since in `Spark NLP 5.2.1` and after. So please make sure you have upgraded to the latest Spark NLP release\n",
- "- You can import DeBerta models trained/fine-tuned for question answering via `DeBertaForQuestionAnswering` or `TFDeBertaForQuestionAnswering`. These models are usually under `Question Answering` category and have `DeBerta` in their labels\n",
- "- Reference: [TFDeBertaForQuestionAnswering](https://huggingface.co/docs/transformers/model_doc/deberta#transformers.TFDebertaForQuestionAnswering)\n",
- "- Some [example models](https://huggingface.co/models?filter=deberta&pipeline_tag=question-answering)"
- ]
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:72: UserWarning: \n",
+ "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
+ "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
+ "You will be able to reuse this secret in all of your notebooks.\n",
+ "Please note that authentication is recommended but still optional to access public models or datasets.\n",
+ " warnings.warn(\n"
+ ]
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "_pi-2aJlVdSo"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "f9254e58721a48248f1730e695aded32",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "## Export and Save HuggingFace model"
+ "text/plain": [
+ "config.json: 0%| | 0.00/884 [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "OruD9J3RVdSp"
- },
- "source": [
- "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n",
- "- Albert uses SentencePiece, so we will have to install that as well"
- ]
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Framework not specified. Using pt to export to ONNX.\n"
+ ]
},
{
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "id": "iwEScelfVdSp",
- "outputId": "8bf611c6-0d21-4be1-e0a2-d1de597d051a",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "455962cabd95443da227911716b94d06",
+ "version_major": 2,
+ "version_minor": 0
},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.1/7.1 MB\u001b[0m \u001b[31m22.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m403.3/403.3 kB\u001b[0m \u001b[31m18.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m588.3/588.3 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m68.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.7/455.7 kB\u001b[0m \u001b[31m30.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m65.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m20.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m72.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m64.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m108.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m439.2/439.2 kB\u001b[0m \u001b[31m41.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m507.1/507.1 kB\u001b[0m \u001b[31m48.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.7/15.7 MB\u001b[0m \u001b[31m87.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m106.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m781.3/781.3 kB\u001b[0m \u001b[31m54.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m73.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m12.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m18.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.8/83.8 kB\u001b[0m \u001b[31m11.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m47.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m48.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m451.2/451.2 kB\u001b[0m \u001b[31m48.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m93.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m29.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m94.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m108.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.1/13.1 MB\u001b[0m \u001b[31m93.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
- "pandas-gbq 0.19.2 requires google-auth-oauthlib>=0.7.0, but you have google-auth-oauthlib 0.4.6 which is incompatible.\n",
- "tensorflow-datasets 4.9.4 requires protobuf>=3.20, but you have protobuf 3.19.6 which is incompatible.\n",
- "tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 3.19.6 which is incompatible.\u001b[0m\u001b[31m\n",
- "\u001b[0m"
- ]
- }
- ],
- "source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum tensorflow==2.11.0"
+ "text/plain": [
+ "pytorch_model.bin: 0%| | 0.00/283M [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "Rpw6dThqVdSq"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "9594a396614d45288ad21e75d4bbcfea",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
- "- - We'll use [nbroad/deberta-v3-xsmall-squad2](https://huggingface.co/nbroad/deberta-v3-xsmall-squad2) model from HuggingFace as an example and load it as a `ORTModelForQuestionAnswering`, representing an ONNX model."
+ "text/plain": [
+ "tokenizer_config.json: 0%| | 0.00/394 [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {
- "id": "_DQpE4a8VdSq",
- "outputId": "bb55b337-fdca-4df8-8344-f8bba05ce20e",
- "colab": {
- "referenced_widgets": [
- "f9254e58721a48248f1730e695aded32",
- "6ec34b182e974b129584c00f99c339ae",
- "0e324df4ba4343f0a4fcd53ba21d8a3b",
- "fabf92e8f40d43338ca4594330c990a3",
- "3c7c2efc95524a839950b0558bdbf226",
- "d924104033c34f938bc17afc91c022f5",
- "f2191759bb264d9e9d1fcc6b277bfe59",
- "e2b34a54d285421982a362f3e22035ee",
- "2c8e8004e95f4af6ac908c447baf6cb1",
- "0df5c5ed6d2f4881b5f73e0dc068f808",
- "eb1aaa375c83484e8b7b3c11b207d44b",
- "455962cabd95443da227911716b94d06",
- "57f30a4b9b534ef8a8b4000a9ec4ea8b",
- "0256b0f9dfd4414dba094da582949d3a",
- "229eb576df26428fbf5146f9eda58971",
- "980662b7d22f402a9c611b1ccdb6f32b",
- "25f7ff1545b04aaab894174f020ffc5d",
- "e46424a7c71f4405a254aab81b0d2b0a",
- "75023cd7db9b40c585ff011c30d50b91",
- "9319f7f503e841dea5a1e3ec0f7214ea",
- "14f5573dbc2c41459782bd44e58976ee",
- "b5604c1693884d589e41755b66e3c86f",
- "9594a396614d45288ad21e75d4bbcfea",
- "3a7808f8cbd347c79f84765458a08ce6",
- "d786b0ed9aab4e749802d3a2fa6f4959",
- "40f83b7123164f9aa3d0ca0ddf02d0bd",
- "8e4e4cfa531a48d2bf71ac9930c4a48d",
- "b31da4d620204a8ba024c48324764639",
- "47f15a2604f64bd4854cc95a0f76b49d",
- "ed2957c1ff7c40c58b618c384b363c6e",
- "c8398964efcd4d82b0f7bac2608e1f1d",
- "ac1e1ab366bc4d8b8dca9d71ca65a7f6",
- "13358ce6b33d47e890b43dd4236aa439",
- "a229d8535c4f4341bcc1c0a8506b5b17",
- "3d45d291cfda42f4a7c67516fa5488ca",
- "746d83433e484a0a8de93fcbb5ad29da",
- "431b86ed4ec643e1b1f01b804e8bec41",
- "93e87b82fc16440393f084a9c762f2ed",
- "d75e4b099fe84eb684a0e66b8a02982e",
- "8b358f86da884768a0b5926b3edd7a0c",
- "fb61a83b5a8c47c4981bd49c35af1a42",
- "f3aa9faf5c034a8d8d7ab245d2d34125",
- "40bc62cffee94aab884d27c6a05e5705",
- "428abffd70534dc783e9567fbe149d32",
- "6671b03b737b4a49a656296dd679b19c",
- "8112ad9f82a1414e9023e7a0512aa20f",
- "724d76da514e433a854564890ec4e4e3",
- "00d7cad57ef640258c6b0180d5292e99",
- "762621335410465cb10ae36de14014b2",
- "3a8a4455d47240e78f48b8eb624f6b78",
- "fef3940a3e7544559405d75149f6b4d2",
- "89b949f7aaa44d0eb14ae4a0ff1b83e0",
- "d49fb0de764a421f8bd6ec113ead4bc9",
- "5d6f3fc3823340edaa164a23046d0bed",
- "3ebb7187480b4b15bc7631bfafa83ba4",
- "b22946eb1eac4259b858b57987f30016",
- "e2fcf6ea02dc4196ad8c68b18bbc59ce",
- "6d6573a0406e4b69b94fa6d19fb5260c",
- "ecf203671bde4fa7b90f0170b4477970",
- "95753053c3cc4dd2995d65b9fbcc536d",
- "3a636c5987b94912ad4ef559afe0ca04",
- "36272b7e705f418383ccbfc3b0214781",
- "d5368a2371544347a97015061f463e04",
- "6bb13ca9c60e4f5f861b0cae87766cfc",
- "f9f880ea45b24a249ea82caa314685ac",
- "38895a270f7045d18b2bf6c867447868",
- "cb864906686547ee91cbb96dcfd30dc2",
- "c439f91b65524b0783fd7ee953083d69",
- "dc344a0dcff94d0cbf3b8f778ab59c8b",
- "f49e1f3a6a2a4ba491abc58a3505a2f0",
- "e515aaeab05e421282c16d6fa6c3b808",
- "8c6094de1e584aaebf405fbbb66bd19d",
- "518235d256644f1ab46d08f87a193d97",
- "62740de8ad3445549ef1bb1fb0c58994",
- "16bd0df4859849e1b62d6cf314ca46e9",
- "fd14fcc2bff94248a24df3316e79900c",
- "6e92901b488e4bb2adbc40a803f236fc"
- ],
- "base_uri": "https://localhost:8080/",
- "height": 816
- }
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "a229d8535c4f4341bcc1c0a8506b5b17",
+ "version_major": 2,
+ "version_minor": 0
},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:72: UserWarning: \n",
- "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
- "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
- "You will be able to reuse this secret in all of your notebooks.\n",
- "Please note that authentication is recommended but still optional to access public models or datasets.\n",
- " warnings.warn(\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "config.json: 0%| | 0.00/884 [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "f9254e58721a48248f1730e695aded32"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "Framework not specified. Using pt to export to ONNX.\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "pytorch_model.bin: 0%| | 0.00/283M [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "455962cabd95443da227911716b94d06"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "tokenizer_config.json: 0%| | 0.00/394 [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "9594a396614d45288ad21e75d4bbcfea"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "spm.model: 0%| | 0.00/2.46M [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "a229d8535c4f4341bcc1c0a8506b5b17"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "tokenizer.json: 0%| | 0.00/8.66M [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "6671b03b737b4a49a656296dd679b19c"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "added_tokens.json: 0%| | 0.00/18.0 [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "b22946eb1eac4259b858b57987f30016"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "special_tokens_map.json: 0%| | 0.00/156 [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "cb864906686547ee91cbb96dcfd30dc2"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "/usr/local/lib/python3.10/dist-packages/transformers/convert_slow_tokenizer.py:454: UserWarning: The sentencepiece tokenizer that you are converting to a fast tokenizer uses the byte fallback option which is not implemented in the fast tokenizers. In practice this means that the fast version of the tokenizer can produce unknown tokens whereas the sentencepiece version would have converted these unknown tokens into a sequence of byte tokens matching the original piece of text.\n",
- " warnings.warn(\n",
- "Using the export variant default. Available variants are:\n",
- " - default: The default ONNX variant.\n",
- "Using framework PyTorch: 2.1.0+cu121\n",
- "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:561: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
- " torch.tensor(mid - 1).type_as(relative_pos),\n",
- "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:565: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
- " torch.ceil(torch.log(abs_pos / mid) / torch.log(torch.tensor((max_position - 1) / mid)) * (mid - 1)) + mid\n",
- "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:724: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
- " scale = torch.sqrt(torch.tensor(query_layer.size(-1), dtype=torch.float) * scale_factor)\n",
- "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:724: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
- " scale = torch.sqrt(torch.tensor(query_layer.size(-1), dtype=torch.float) * scale_factor)\n",
- "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:803: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
- " scale = torch.sqrt(torch.tensor(pos_key_layer.size(-1), dtype=torch.float) * scale_factor)\n",
- "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:803: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
- " scale = torch.sqrt(torch.tensor(pos_key_layer.size(-1), dtype=torch.float) * scale_factor)\n",
- "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:815: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
- " scale = torch.sqrt(torch.tensor(pos_query_layer.size(-1), dtype=torch.float) * scale_factor)\n",
- "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:815: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
- " scale = torch.sqrt(torch.tensor(pos_query_layer.size(-1), dtype=torch.float) * scale_factor)\n",
- "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:816: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
- " if key_layer.size(-2) != query_layer.size(-2):\n",
- "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:112: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
- " output = input.masked_fill(rmask, torch.tensor(torch.finfo(input.dtype).min))\n"
- ]
- }
- ],
- "source": [
- "from optimum.onnxruntime import ORTModelForQuestionAnswering\n",
- "import tensorflow as tf\n",
- "\n",
- "MODEL_NAME = 'nbroad/deberta-v3-xsmall-squad2'\n",
- "ONNX_MODEL = f\"onnx_models/{MODEL_NAME}\"\n",
- "\n",
- "ort_model = ORTModelForQuestionAnswering.from_pretrained(MODEL_NAME, export=True)\n",
- "\n",
- "# Save the ONNX model\n",
- "ort_model.save_pretrained(ONNX_MODEL)"
+ "text/plain": [
+ "spm.model: 0%| | 0.00/2.46M [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "arQbeMqYVdSq"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "6671b03b737b4a49a656296dd679b19c",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "Let's have a look inside these two directories and see what we are dealing with:"
+ "text/plain": [
+ "tokenizer.json: 0%| | 0.00/8.66M [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "id": "jSCM6ab8VdSr",
- "outputId": "5aff0e04-849b-4e4e-a1ca-eb5dc31816d1",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "b22946eb1eac4259b858b57987f30016",
+ "version_major": 2,
+ "version_minor": 0
},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "total 287828\n",
- "-rw-r--r-- 1 root root 23 Jan 4 16:45 added_tokens.json\n",
- "-rw-r--r-- 1 root root 880 Jan 4 16:45 config.json\n",
- "-rw-r--r-- 1 root root 283599177 Jan 4 16:45 model.onnx\n",
- "-rw-r--r-- 1 root root 173 Jan 4 16:45 special_tokens_map.json\n",
- "-rw-r--r-- 1 root root 2464616 Jan 4 16:45 spm.model\n",
- "-rw-r--r-- 1 root root 412 Jan 4 16:45 tokenizer_config.json\n",
- "-rw-r--r-- 1 root root 8648791 Jan 4 16:45 tokenizer.json\n"
- ]
- }
- ],
- "source": [
- "!ls -l {ONNX_MODEL}"
+ "text/plain": [
+ "added_tokens.json: 0%| | 0.00/18.0 [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "UWkwd7kdVdSr"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "cb864906686547ee91cbb96dcfd30dc2",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "- As you can see, we need to move `spm.model` from the tokenizer to `assets` folder which Spark NLP will look for"
+ "text/plain": [
+ "special_tokens_map.json: 0%| | 0.00/156 [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {
- "id": "lOZcb89fVdSr"
- },
- "outputs": [],
- "source": [
- "!mkdir {ONNX_MODEL}/assets"
- ]
- },
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/usr/local/lib/python3.10/dist-packages/transformers/convert_slow_tokenizer.py:454: UserWarning: The sentencepiece tokenizer that you are converting to a fast tokenizer uses the byte fallback option which is not implemented in the fast tokenizers. In practice this means that the fast version of the tokenizer can produce unknown tokens whereas the sentencepiece version would have converted these unknown tokens into a sequence of byte tokens matching the original piece of text.\n",
+ " warnings.warn(\n",
+ "Using the export variant default. Available variants are:\n",
+ " - default: The default ONNX variant.\n",
+ "Using framework PyTorch: 2.1.0+cu121\n",
+ "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:561: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
+ " torch.tensor(mid - 1).type_as(relative_pos),\n",
+ "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:565: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
+ " torch.ceil(torch.log(abs_pos / mid) / torch.log(torch.tensor((max_position - 1) / mid)) * (mid - 1)) + mid\n",
+ "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:724: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
+ " scale = torch.sqrt(torch.tensor(query_layer.size(-1), dtype=torch.float) * scale_factor)\n",
+ "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:724: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+ " scale = torch.sqrt(torch.tensor(query_layer.size(-1), dtype=torch.float) * scale_factor)\n",
+ "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:803: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
+ " scale = torch.sqrt(torch.tensor(pos_key_layer.size(-1), dtype=torch.float) * scale_factor)\n",
+ "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:803: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+ " scale = torch.sqrt(torch.tensor(pos_key_layer.size(-1), dtype=torch.float) * scale_factor)\n",
+ "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:815: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
+ " scale = torch.sqrt(torch.tensor(pos_query_layer.size(-1), dtype=torch.float) * scale_factor)\n",
+ "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:815: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+ " scale = torch.sqrt(torch.tensor(pos_query_layer.size(-1), dtype=torch.float) * scale_factor)\n",
+ "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:816: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
+ " if key_layer.size(-2) != query_layer.size(-2):\n",
+ "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:112: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
+ " output = input.masked_fill(rmask, torch.tensor(torch.finfo(input.dtype).min))\n"
+ ]
+ }
+ ],
+ "source": [
+ "from optimum.onnxruntime import ORTModelForQuestionAnswering\n",
+ "import tensorflow as tf\n",
+ "\n",
+ "MODEL_NAME = 'nbroad/deberta-v3-xsmall-squad2'\n",
+ "ONNX_MODEL = f\"onnx_models/{MODEL_NAME}\"\n",
+ "\n",
+ "ort_model = ORTModelForQuestionAnswering.from_pretrained(MODEL_NAME, export=True)\n",
+ "\n",
+ "# Save the ONNX model\n",
+ "ort_model.save_pretrained(ONNX_MODEL)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's have a look inside these two directories and see what we are dealing with:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {
- "id": "azGDw57sVdSr"
- },
- "outputs": [],
- "source": [
- "!mv {ONNX_MODEL}/spm.model {ONNX_MODEL}/assets"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "total 287828\n",
+ "-rw-r--r-- 1 root root 23 Jan 4 16:45 added_tokens.json\n",
+ "-rw-r--r-- 1 root root 880 Jan 4 16:45 config.json\n",
+ "-rw-r--r-- 1 root root 283599177 Jan 4 16:45 model.onnx\n",
+ "-rw-r--r-- 1 root root 173 Jan 4 16:45 special_tokens_map.json\n",
+ "-rw-r--r-- 1 root root 2464616 Jan 4 16:45 spm.model\n",
+ "-rw-r--r-- 1 root root 412 Jan 4 16:45 tokenizer_config.json\n",
+ "-rw-r--r-- 1 root root 8648791 Jan 4 16:45 tokenizer.json\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls -l {ONNX_MODEL}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- As you can see, we need to move `spm.model` from the tokenizer to `assets` folder which Spark NLP will look for"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mkdir {ONNX_MODEL}/assets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mv {ONNX_MODEL}/spm.model {ONNX_MODEL}/assets"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Voila! We have our `spm.model` inside assets directory"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "puzAFW6nVdSr"
- },
- "source": [
- "Voila! We have our `spm.model` inside assets directory"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "onnx_models/nbroad/deberta-v3-xsmall-squad2:\n",
+ "total 285424\n",
+ "-rw-r--r-- 1 root root 23 Jan 4 16:45 added_tokens.json\n",
+ "drwxr-xr-x 2 root root 4096 Jan 4 16:45 assets\n",
+ "-rw-r--r-- 1 root root 880 Jan 4 16:45 config.json\n",
+ "-rw-r--r-- 1 root root 283599177 Jan 4 16:45 model.onnx\n",
+ "-rw-r--r-- 1 root root 173 Jan 4 16:45 special_tokens_map.json\n",
+ "-rw-r--r-- 1 root root 412 Jan 4 16:45 tokenizer_config.json\n",
+ "-rw-r--r-- 1 root root 8648791 Jan 4 16:45 tokenizer.json\n",
+ "\n",
+ "onnx_models/nbroad/deberta-v3-xsmall-squad2/assets:\n",
+ "total 2408\n",
+ "-rw-r--r-- 1 root root 2464616 Jan 4 16:45 spm.model\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls -lR {ONNX_MODEL}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Import and Save DeBertaForQuestionAnswering in Spark NLP\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's install and setup Spark NLP in Google Colab\n",
+ "- This part is pretty easy via our simple script"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {
- "id": "Y6IFRy54VdSr",
- "outputId": "e73ff6b5-4843-4567-f198-5adf8c4a64d9",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "onnx_models/nbroad/deberta-v3-xsmall-squad2:\n",
- "total 285424\n",
- "-rw-r--r-- 1 root root 23 Jan 4 16:45 added_tokens.json\n",
- "drwxr-xr-x 2 root root 4096 Jan 4 16:45 assets\n",
- "-rw-r--r-- 1 root root 880 Jan 4 16:45 config.json\n",
- "-rw-r--r-- 1 root root 283599177 Jan 4 16:45 model.onnx\n",
- "-rw-r--r-- 1 root root 173 Jan 4 16:45 special_tokens_map.json\n",
- "-rw-r--r-- 1 root root 412 Jan 4 16:45 tokenizer_config.json\n",
- "-rw-r--r-- 1 root root 8648791 Jan 4 16:45 tokenizer.json\n",
- "\n",
- "onnx_models/nbroad/deberta-v3-xsmall-squad2/assets:\n",
- "total 2408\n",
- "-rw-r--r-- 1 root root 2464616 Jan 4 16:45 spm.model\n"
- ]
- }
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Installing PySpark 3.2.3 and Spark NLP 5.2.2\n",
+ "setup Colab for PySpark 3.2.3 and Spark NLP 5.2.2\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 MB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m547.3/547.3 kB\u001b[0m \u001b[31m42.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.7/199.7 kB\u001b[0m \u001b[31m24.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
+ ]
+ }
+ ],
+ "source": [
+ "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's start Spark with Spark NLP included via our simple `start()` function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Apache Spark version: 3.2.3\n"
+ ]
+ }
+ ],
+ "source": [
+ "import sparknlp\n",
+ "# let's start Spark with Spark NLP\n",
+ "spark = sparknlp.start()\n",
+ "print(\"Apache Spark version: {}\".format(spark.version))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's use `loadSavedModel` functon in `DeBertaForQuestionAnswering` which allows us to load TensorFlow model in SavedModel format\n",
+ "- Most params can be set later when you are loading this model in `DeBertaForQuestionAnswering` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n",
+ "- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n",
+ "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sparknlp.annotator import *\n",
+ "from sparknlp.base import *\n",
+ "\n",
+ "spanClassifier = DeBertaForQuestionAnswering.loadSavedModel(\n",
+ " ONNX_MODEL,\n",
+ " spark\n",
+ " )\\\n",
+ " .setInputCols([\"document_question\",'document_context'])\\\n",
+ " .setOutputCol(\"answer\")\\\n",
+ " .setCaseSensitive(True)\\\n",
+ " .setMaxSentenceLength(512)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "spanClassifier.write().overwrite().save(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's clean up stuff we don't need anymore"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!rm -rf {ONNX_MODEL}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Awesome 😎 !\n",
+ "\n",
+ "This is your DeBertaForQuestionAnswering model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "total 279412\n",
+ "-rw-r--r-- 1 root root 283642624 Jan 4 16:47 camembert_classification_onnx\n",
+ "-rw-r--r-- 1 root root 2464616 Jan 4 16:47 deberta_spp\n",
+ "drwxr-xr-x 2 root root 4096 Jan 4 16:47 metadata\n"
+ ]
+ }
+ ],
+ "source": [
+ "! ls -l {ONNX_MODEL}_spark_nlp_onnx"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny AlbertForQuestionAnswering model in Spark NLP 🚀 pipeline!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "+-------+\n",
+ "|result |\n",
+ "+-------+\n",
+ "|[Clara]|\n",
+ "+-------+\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "document_assembler = MultiDocumentAssembler() \\\n",
+ " .setInputCols([\"question\", \"context\"]) \\\n",
+ " .setOutputCols([\"document_question\", \"document_context\"])\n",
+ "\n",
+ "spanClassifier_loaded = DeBertaForQuestionAnswering.load(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))\\\n",
+ " .setInputCols([\"document_question\",'document_context'])\\\n",
+ " .setOutputCol(\"answer\")\n",
+ "\n",
+ "pipeline = Pipeline().setStages([\n",
+ " document_assembler,\n",
+ " spanClassifier_loaded\n",
+ "])\n",
+ "\n",
+ "example = spark.createDataFrame([[\"What's my name?\", \"My name is Clara and I live in Berkeley.\"]]).toDF(\"question\", \"context\")\n",
+ "result = pipeline.fit(example).transform(example)\n",
+ "\n",
+ "result.select(\"answer.result\").show(1, False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "That's it! You can now go wild and use hundreds of `DeBertaForQuestionAnswering` models from HuggingFace 🤗 in Spark NLP 🚀\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "gpuType": "T4",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "00d7cad57ef640258c6b0180d5292e99": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_5d6f3fc3823340edaa164a23046d0bed",
+ "placeholder": "",
+ "style": "IPY_MODEL_3ebb7187480b4b15bc7631bfafa83ba4",
+ "value": " 8.66M/8.66M [00:00<00:00, 18.6MB/s]"
+ }
+ },
+ "0256b0f9dfd4414dba094da582949d3a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_75023cd7db9b40c585ff011c30d50b91",
+ "max": 282817775,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_9319f7f503e841dea5a1e3ec0f7214ea",
+ "value": 282817775
+ }
+ },
+ "0df5c5ed6d2f4881b5f73e0dc068f808": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "0e324df4ba4343f0a4fcd53ba21d8a3b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e2b34a54d285421982a362f3e22035ee",
+ "max": 884,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_2c8e8004e95f4af6ac908c447baf6cb1",
+ "value": 884
+ }
+ },
+ "13358ce6b33d47e890b43dd4236aa439": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "14f5573dbc2c41459782bd44e58976ee": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "16bd0df4859849e1b62d6cf314ca46e9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "229eb576df26428fbf5146f9eda58971": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_14f5573dbc2c41459782bd44e58976ee",
+ "placeholder": "",
+ "style": "IPY_MODEL_b5604c1693884d589e41755b66e3c86f",
+ "value": " 283M/283M [00:05<00:00, 57.6MB/s]"
+ }
+ },
+ "25f7ff1545b04aaab894174f020ffc5d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2c8e8004e95f4af6ac908c447baf6cb1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "36272b7e705f418383ccbfc3b0214781": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "38895a270f7045d18b2bf6c867447868": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "3a636c5987b94912ad4ef559afe0ca04": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "3a7808f8cbd347c79f84765458a08ce6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b31da4d620204a8ba024c48324764639",
+ "placeholder": "",
+ "style": "IPY_MODEL_47f15a2604f64bd4854cc95a0f76b49d",
+ "value": "tokenizer_config.json: 100%"
+ }
+ },
+ "3a8a4455d47240e78f48b8eb624f6b78": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "3c7c2efc95524a839950b0558bdbf226": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "3d45d291cfda42f4a7c67516fa5488ca": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d75e4b099fe84eb684a0e66b8a02982e",
+ "placeholder": "",
+ "style": "IPY_MODEL_8b358f86da884768a0b5926b3edd7a0c",
+ "value": "spm.model: 100%"
+ }
+ },
+ "3ebb7187480b4b15bc7631bfafa83ba4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "40bc62cffee94aab884d27c6a05e5705": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "40f83b7123164f9aa3d0ca0ddf02d0bd": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_ac1e1ab366bc4d8b8dca9d71ca65a7f6",
+ "placeholder": "",
+ "style": "IPY_MODEL_13358ce6b33d47e890b43dd4236aa439",
+ "value": " 394/394 [00:00<00:00, 21.0kB/s]"
+ }
+ },
+ "428abffd70534dc783e9567fbe149d32": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "431b86ed4ec643e1b1f01b804e8bec41": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_40bc62cffee94aab884d27c6a05e5705",
+ "placeholder": "",
+ "style": "IPY_MODEL_428abffd70534dc783e9567fbe149d32",
+ "value": " 2.46M/2.46M [00:00<00:00, 6.80MB/s]"
+ }
+ },
+ "455962cabd95443da227911716b94d06": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_57f30a4b9b534ef8a8b4000a9ec4ea8b",
+ "IPY_MODEL_0256b0f9dfd4414dba094da582949d3a",
+ "IPY_MODEL_229eb576df26428fbf5146f9eda58971"
],
- "source": [
- "!ls -lR {ONNX_MODEL}"
- ]
+ "layout": "IPY_MODEL_980662b7d22f402a9c611b1ccdb6f32b"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "-qcd7uA-VdSr"
- },
- "source": [
- "## Import and Save DeBertaForQuestionAnswering in Spark NLP\n"
- ]
+ "47f15a2604f64bd4854cc95a0f76b49d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "RYwvyCiNVdSr"
- },
- "source": [
- "- Let's install and setup Spark NLP in Google Colab\n",
- "- This part is pretty easy via our simple script"
- ]
+ "518235d256644f1ab46d08f87a193d97": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {
- "id": "XBhiA0jBVdSr",
- "outputId": "c4c40b34-2275-4e3d-f140-0ca62b65b599",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Installing PySpark 3.2.3 and Spark NLP 5.2.2\n",
- "setup Colab for PySpark 3.2.3 and Spark NLP 5.2.2\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 MB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m547.3/547.3 kB\u001b[0m \u001b[31m42.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.7/199.7 kB\u001b[0m \u001b[31m24.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
- ]
- }
+ "57f30a4b9b534ef8a8b4000a9ec4ea8b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_25f7ff1545b04aaab894174f020ffc5d",
+ "placeholder": "",
+ "style": "IPY_MODEL_e46424a7c71f4405a254aab81b0d2b0a",
+ "value": "pytorch_model.bin: 100%"
+ }
+ },
+ "5d6f3fc3823340edaa164a23046d0bed": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "62740de8ad3445549ef1bb1fb0c58994": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "6671b03b737b4a49a656296dd679b19c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_8112ad9f82a1414e9023e7a0512aa20f",
+ "IPY_MODEL_724d76da514e433a854564890ec4e4e3",
+ "IPY_MODEL_00d7cad57ef640258c6b0180d5292e99"
],
- "source": [
- "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"
- ]
+ "layout": "IPY_MODEL_762621335410465cb10ae36de14014b2"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "XqMZOSd0VdSs"
- },
- "source": [
- "Let's start Spark with Spark NLP included via our simple `start()` function"
- ]
+ "6bb13ca9c60e4f5f861b0cae87766cfc": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {
- "id": "fchkX6LzVdSs",
- "outputId": "0676732d-1fd7-4d27-b359-e597b4b2ad5b",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Apache Spark version: 3.2.3\n"
- ]
- }
+ "6d6573a0406e4b69b94fa6d19fb5260c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d5368a2371544347a97015061f463e04",
+ "max": 18,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_6bb13ca9c60e4f5f861b0cae87766cfc",
+ "value": 18
+ }
+ },
+ "6e92901b488e4bb2adbc40a803f236fc": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "6ec34b182e974b129584c00f99c339ae": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d924104033c34f938bc17afc91c022f5",
+ "placeholder": "",
+ "style": "IPY_MODEL_f2191759bb264d9e9d1fcc6b277bfe59",
+ "value": "config.json: 100%"
+ }
+ },
+ "724d76da514e433a854564890ec4e4e3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_89b949f7aaa44d0eb14ae4a0ff1b83e0",
+ "max": 8656372,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_d49fb0de764a421f8bd6ec113ead4bc9",
+ "value": 8656372
+ }
+ },
+ "746d83433e484a0a8de93fcbb5ad29da": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_fb61a83b5a8c47c4981bd49c35af1a42",
+ "max": 2464616,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_f3aa9faf5c034a8d8d7ab245d2d34125",
+ "value": 2464616
+ }
+ },
+ "75023cd7db9b40c585ff011c30d50b91": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "762621335410465cb10ae36de14014b2": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "8112ad9f82a1414e9023e7a0512aa20f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_3a8a4455d47240e78f48b8eb624f6b78",
+ "placeholder": "",
+ "style": "IPY_MODEL_fef3940a3e7544559405d75149f6b4d2",
+ "value": "tokenizer.json: 100%"
+ }
+ },
+ "89b949f7aaa44d0eb14ae4a0ff1b83e0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "8b358f86da884768a0b5926b3edd7a0c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "8c6094de1e584aaebf405fbbb66bd19d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "8e4e4cfa531a48d2bf71ac9930c4a48d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "9319f7f503e841dea5a1e3ec0f7214ea": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "93e87b82fc16440393f084a9c762f2ed": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "95753053c3cc4dd2995d65b9fbcc536d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "9594a396614d45288ad21e75d4bbcfea": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_3a7808f8cbd347c79f84765458a08ce6",
+ "IPY_MODEL_d786b0ed9aab4e749802d3a2fa6f4959",
+ "IPY_MODEL_40f83b7123164f9aa3d0ca0ddf02d0bd"
],
- "source": [
- "import sparknlp\n",
- "# let's start Spark with Spark NLP\n",
- "spark = sparknlp.start()\n",
- "print(\"Apache Spark version: {}\".format(spark.version))"
- ]
+ "layout": "IPY_MODEL_8e4e4cfa531a48d2bf71ac9930c4a48d"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "zz4vLzrRVdSs"
- },
- "source": [
- "- Let's use `loadSavedModel` functon in `DeBertaForQuestionAnswering` which allows us to load TensorFlow model in SavedModel format\n",
- "- Most params can be set later when you are loading this model in `DeBertaForQuestionAnswering` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n",
- "- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n",
- "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.\n",
- "\n"
- ]
+ "980662b7d22f402a9c611b1ccdb6f32b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {
- "id": "kjFw9387VdSs"
- },
- "outputs": [],
- "source": [
- "from sparknlp.annotator import *\n",
- "from sparknlp.base import *\n",
- "\n",
- "spanClassifier = DeBertaForQuestionAnswering.loadSavedModel(\n",
- " ONNX_MODEL,\n",
- " spark\n",
- " )\\\n",
- " .setInputCols([\"document_question\",'document_context'])\\\n",
- " .setOutputCol(\"answer\")\\\n",
- " .setCaseSensitive(True)\\\n",
- " .setMaxSentenceLength(512)"
- ]
+ "a229d8535c4f4341bcc1c0a8506b5b17": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_3d45d291cfda42f4a7c67516fa5488ca",
+ "IPY_MODEL_746d83433e484a0a8de93fcbb5ad29da",
+ "IPY_MODEL_431b86ed4ec643e1b1f01b804e8bec41"
+ ],
+ "layout": "IPY_MODEL_93e87b82fc16440393f084a9c762f2ed"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "xI46qquQVdSs"
- },
- "source": [
- "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
- ]
+ "ac1e1ab366bc4d8b8dca9d71ca65a7f6": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {
- "id": "rRyg0s0SVdSs"
- },
- "outputs": [],
- "source": [
- "spanClassifier.write().overwrite().save(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))"
- ]
+ "b22946eb1eac4259b858b57987f30016": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_e2fcf6ea02dc4196ad8c68b18bbc59ce",
+ "IPY_MODEL_6d6573a0406e4b69b94fa6d19fb5260c",
+ "IPY_MODEL_ecf203671bde4fa7b90f0170b4477970"
+ ],
+ "layout": "IPY_MODEL_95753053c3cc4dd2995d65b9fbcc536d"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "XFkIyOcRVdSs"
- },
- "source": [
- "Let's clean up stuff we don't need anymore"
- ]
+ "b31da4d620204a8ba024c48324764639": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {
- "id": "WlUpGL-SVdSs"
- },
- "outputs": [],
- "source": [
- "!rm -rf {ONNX_MODEL}"
- ]
+ "b5604c1693884d589e41755b66e3c86f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "NUr4tolsVdSs"
- },
- "source": [
- "Awesome 😎 !\n",
- "\n",
- "This is your DeBertaForQuestionAnswering model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀"
- ]
+ "c439f91b65524b0783fd7ee953083d69": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_8c6094de1e584aaebf405fbbb66bd19d",
+ "placeholder": "",
+ "style": "IPY_MODEL_518235d256644f1ab46d08f87a193d97",
+ "value": "special_tokens_map.json: 100%"
+ }
},
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {
- "id": "TAPtwhCJVdSs",
- "outputId": "75a3e6f1-249a-4b61-e8d2-a2d654ae65cc",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "total 279412\n",
- "-rw-r--r-- 1 root root 283642624 Jan 4 16:47 camembert_classification_onnx\n",
- "-rw-r--r-- 1 root root 2464616 Jan 4 16:47 deberta_spp\n",
- "drwxr-xr-x 2 root root 4096 Jan 4 16:47 metadata\n"
- ]
- }
+ "c8398964efcd4d82b0f7bac2608e1f1d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "cb864906686547ee91cbb96dcfd30dc2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_c439f91b65524b0783fd7ee953083d69",
+ "IPY_MODEL_dc344a0dcff94d0cbf3b8f778ab59c8b",
+ "IPY_MODEL_f49e1f3a6a2a4ba491abc58a3505a2f0"
],
- "source": [
- "! ls -l {ONNX_MODEL}_spark_nlp_onnx"
- ]
+ "layout": "IPY_MODEL_e515aaeab05e421282c16d6fa6c3b808"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "Ce4PcxTRVdSs"
- },
- "source": [
- "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny AlbertForQuestionAnswering model in Spark NLP 🚀 pipeline!"
- ]
+ "d49fb0de764a421f8bd6ec113ead4bc9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {
- "id": "77kEyaKYVdSs",
- "outputId": "1ceb6bb2-0557-4b7b-f2f6-4d822f98ce1e",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "+-------+\n",
- "|result |\n",
- "+-------+\n",
- "|[Clara]|\n",
- "+-------+\n",
- "\n"
- ]
- }
+ "d5368a2371544347a97015061f463e04": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d75e4b099fe84eb684a0e66b8a02982e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d786b0ed9aab4e749802d3a2fa6f4959": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_ed2957c1ff7c40c58b618c384b363c6e",
+ "max": 394,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_c8398964efcd4d82b0f7bac2608e1f1d",
+ "value": 394
+ }
+ },
+ "d924104033c34f938bc17afc91c022f5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "dc344a0dcff94d0cbf3b8f778ab59c8b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_62740de8ad3445549ef1bb1fb0c58994",
+ "max": 156,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_16bd0df4859849e1b62d6cf314ca46e9",
+ "value": 156
+ }
+ },
+ "e2b34a54d285421982a362f3e22035ee": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e2fcf6ea02dc4196ad8c68b18bbc59ce": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_3a636c5987b94912ad4ef559afe0ca04",
+ "placeholder": "",
+ "style": "IPY_MODEL_36272b7e705f418383ccbfc3b0214781",
+ "value": "added_tokens.json: 100%"
+ }
+ },
+ "e46424a7c71f4405a254aab81b0d2b0a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "e515aaeab05e421282c16d6fa6c3b808": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "eb1aaa375c83484e8b7b3c11b207d44b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "ecf203671bde4fa7b90f0170b4477970": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_f9f880ea45b24a249ea82caa314685ac",
+ "placeholder": "",
+ "style": "IPY_MODEL_38895a270f7045d18b2bf6c867447868",
+ "value": " 18.0/18.0 [00:00<00:00, 1.09kB/s]"
+ }
+ },
+ "ed2957c1ff7c40c58b618c384b363c6e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f2191759bb264d9e9d1fcc6b277bfe59": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "f3aa9faf5c034a8d8d7ab245d2d34125": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "f49e1f3a6a2a4ba491abc58a3505a2f0": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_fd14fcc2bff94248a24df3316e79900c",
+ "placeholder": "",
+ "style": "IPY_MODEL_6e92901b488e4bb2adbc40a803f236fc",
+ "value": " 156/156 [00:00<00:00, 7.27kB/s]"
+ }
+ },
+ "f9254e58721a48248f1730e695aded32": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_6ec34b182e974b129584c00f99c339ae",
+ "IPY_MODEL_0e324df4ba4343f0a4fcd53ba21d8a3b",
+ "IPY_MODEL_fabf92e8f40d43338ca4594330c990a3"
],
- "source": [
- "document_assembler = MultiDocumentAssembler() \\\n",
- " .setInputCols([\"question\", \"context\"]) \\\n",
- " .setOutputCols([\"document_question\", \"document_context\"])\n",
- "\n",
- "spanClassifier_loaded = DeBertaForQuestionAnswering.load(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))\\\n",
- " .setInputCols([\"document_question\",'document_context'])\\\n",
- " .setOutputCol(\"answer\")\n",
- "\n",
- "pipeline = Pipeline().setStages([\n",
- " document_assembler,\n",
- " spanClassifier_loaded\n",
- "])\n",
- "\n",
- "example = spark.createDataFrame([[\"What's my name?\", \"My name is Clara and I live in Berkeley.\"]]).toDF(\"question\", \"context\")\n",
- "result = pipeline.fit(example).transform(example)\n",
- "\n",
- "result.select(\"answer.result\").show(1, False)"
- ]
+ "layout": "IPY_MODEL_3c7c2efc95524a839950b0558bdbf226"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "hkCtdRD3VdSs"
- },
- "source": [
- "That's it! You can now go wild and use hundreds of `DeBertaForQuestionAnswering` models from HuggingFace 🤗 in Spark NLP 🚀\n"
- ]
+ "f9f880ea45b24a249ea82caa314685ac": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "fabf92e8f40d43338ca4594330c990a3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0df5c5ed6d2f4881b5f73e0dc068f808",
+ "placeholder": "",
+ "style": "IPY_MODEL_eb1aaa375c83484e8b7b3c11b207d44b",
+ "value": " 884/884 [00:00<00:00, 60.7kB/s]"
+ }
+ },
+ "fb61a83b5a8c47c4981bd49c35af1a42": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "fd14fcc2bff94248a24df3316e79900c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "fef3940a3e7544559405d75149f6b4d2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
}
- ],
- "metadata": {
- "colab": {
- "provenance": [],
- "gpuType": "T4"
- },
- "kernelspec": {
- "display_name": "Python 3",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3"
- },
- "widgets": {
- "application/vnd.jupyter.widget-state+json": {
- "f9254e58721a48248f1730e695aded32": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_6ec34b182e974b129584c00f99c339ae",
- "IPY_MODEL_0e324df4ba4343f0a4fcd53ba21d8a3b",
- "IPY_MODEL_fabf92e8f40d43338ca4594330c990a3"
- ],
- "layout": "IPY_MODEL_3c7c2efc95524a839950b0558bdbf226"
- }
- },
- "6ec34b182e974b129584c00f99c339ae": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_d924104033c34f938bc17afc91c022f5",
- "placeholder": "",
- "style": "IPY_MODEL_f2191759bb264d9e9d1fcc6b277bfe59",
- "value": "config.json: 100%"
- }
- },
- "0e324df4ba4343f0a4fcd53ba21d8a3b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_e2b34a54d285421982a362f3e22035ee",
- "max": 884,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_2c8e8004e95f4af6ac908c447baf6cb1",
- "value": 884
- }
- },
- "fabf92e8f40d43338ca4594330c990a3": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_0df5c5ed6d2f4881b5f73e0dc068f808",
- "placeholder": "",
- "style": "IPY_MODEL_eb1aaa375c83484e8b7b3c11b207d44b",
- "value": " 884/884 [00:00<00:00, 60.7kB/s]"
- }
- },
- "3c7c2efc95524a839950b0558bdbf226": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "d924104033c34f938bc17afc91c022f5": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "f2191759bb264d9e9d1fcc6b277bfe59": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "e2b34a54d285421982a362f3e22035ee": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "2c8e8004e95f4af6ac908c447baf6cb1": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "0df5c5ed6d2f4881b5f73e0dc068f808": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "eb1aaa375c83484e8b7b3c11b207d44b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "455962cabd95443da227911716b94d06": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_57f30a4b9b534ef8a8b4000a9ec4ea8b",
- "IPY_MODEL_0256b0f9dfd4414dba094da582949d3a",
- "IPY_MODEL_229eb576df26428fbf5146f9eda58971"
- ],
- "layout": "IPY_MODEL_980662b7d22f402a9c611b1ccdb6f32b"
- }
- },
- "57f30a4b9b534ef8a8b4000a9ec4ea8b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_25f7ff1545b04aaab894174f020ffc5d",
- "placeholder": "",
- "style": "IPY_MODEL_e46424a7c71f4405a254aab81b0d2b0a",
- "value": "pytorch_model.bin: 100%"
- }
- },
- "0256b0f9dfd4414dba094da582949d3a": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_75023cd7db9b40c585ff011c30d50b91",
- "max": 282817775,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_9319f7f503e841dea5a1e3ec0f7214ea",
- "value": 282817775
- }
- },
- "229eb576df26428fbf5146f9eda58971": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_14f5573dbc2c41459782bd44e58976ee",
- "placeholder": "",
- "style": "IPY_MODEL_b5604c1693884d589e41755b66e3c86f",
- "value": " 283M/283M [00:05<00:00, 57.6MB/s]"
- }
- },
- "980662b7d22f402a9c611b1ccdb6f32b": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "25f7ff1545b04aaab894174f020ffc5d": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "e46424a7c71f4405a254aab81b0d2b0a": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "75023cd7db9b40c585ff011c30d50b91": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "9319f7f503e841dea5a1e3ec0f7214ea": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "14f5573dbc2c41459782bd44e58976ee": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "b5604c1693884d589e41755b66e3c86f": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "9594a396614d45288ad21e75d4bbcfea": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_3a7808f8cbd347c79f84765458a08ce6",
- "IPY_MODEL_d786b0ed9aab4e749802d3a2fa6f4959",
- "IPY_MODEL_40f83b7123164f9aa3d0ca0ddf02d0bd"
- ],
- "layout": "IPY_MODEL_8e4e4cfa531a48d2bf71ac9930c4a48d"
- }
- },
- "3a7808f8cbd347c79f84765458a08ce6": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_b31da4d620204a8ba024c48324764639",
- "placeholder": "",
- "style": "IPY_MODEL_47f15a2604f64bd4854cc95a0f76b49d",
- "value": "tokenizer_config.json: 100%"
- }
- },
- "d786b0ed9aab4e749802d3a2fa6f4959": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_ed2957c1ff7c40c58b618c384b363c6e",
- "max": 394,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_c8398964efcd4d82b0f7bac2608e1f1d",
- "value": 394
- }
- },
- "40f83b7123164f9aa3d0ca0ddf02d0bd": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_ac1e1ab366bc4d8b8dca9d71ca65a7f6",
- "placeholder": "",
- "style": "IPY_MODEL_13358ce6b33d47e890b43dd4236aa439",
- "value": " 394/394 [00:00<00:00, 21.0kB/s]"
- }
- },
- "8e4e4cfa531a48d2bf71ac9930c4a48d": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "b31da4d620204a8ba024c48324764639": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "47f15a2604f64bd4854cc95a0f76b49d": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "ed2957c1ff7c40c58b618c384b363c6e": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "c8398964efcd4d82b0f7bac2608e1f1d": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "ac1e1ab366bc4d8b8dca9d71ca65a7f6": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "13358ce6b33d47e890b43dd4236aa439": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "a229d8535c4f4341bcc1c0a8506b5b17": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_3d45d291cfda42f4a7c67516fa5488ca",
- "IPY_MODEL_746d83433e484a0a8de93fcbb5ad29da",
- "IPY_MODEL_431b86ed4ec643e1b1f01b804e8bec41"
- ],
- "layout": "IPY_MODEL_93e87b82fc16440393f084a9c762f2ed"
- }
- },
- "3d45d291cfda42f4a7c67516fa5488ca": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_d75e4b099fe84eb684a0e66b8a02982e",
- "placeholder": "",
- "style": "IPY_MODEL_8b358f86da884768a0b5926b3edd7a0c",
- "value": "spm.model: 100%"
- }
- },
- "746d83433e484a0a8de93fcbb5ad29da": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_fb61a83b5a8c47c4981bd49c35af1a42",
- "max": 2464616,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_f3aa9faf5c034a8d8d7ab245d2d34125",
- "value": 2464616
- }
- },
- "431b86ed4ec643e1b1f01b804e8bec41": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_40bc62cffee94aab884d27c6a05e5705",
- "placeholder": "",
- "style": "IPY_MODEL_428abffd70534dc783e9567fbe149d32",
- "value": " 2.46M/2.46M [00:00<00:00, 6.80MB/s]"
- }
- },
- "93e87b82fc16440393f084a9c762f2ed": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "d75e4b099fe84eb684a0e66b8a02982e": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "8b358f86da884768a0b5926b3edd7a0c": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "fb61a83b5a8c47c4981bd49c35af1a42": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "f3aa9faf5c034a8d8d7ab245d2d34125": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "40bc62cffee94aab884d27c6a05e5705": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "428abffd70534dc783e9567fbe149d32": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "6671b03b737b4a49a656296dd679b19c": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_8112ad9f82a1414e9023e7a0512aa20f",
- "IPY_MODEL_724d76da514e433a854564890ec4e4e3",
- "IPY_MODEL_00d7cad57ef640258c6b0180d5292e99"
- ],
- "layout": "IPY_MODEL_762621335410465cb10ae36de14014b2"
- }
- },
- "8112ad9f82a1414e9023e7a0512aa20f": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_3a8a4455d47240e78f48b8eb624f6b78",
- "placeholder": "",
- "style": "IPY_MODEL_fef3940a3e7544559405d75149f6b4d2",
- "value": "tokenizer.json: 100%"
- }
- },
- "724d76da514e433a854564890ec4e4e3": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_89b949f7aaa44d0eb14ae4a0ff1b83e0",
- "max": 8656372,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_d49fb0de764a421f8bd6ec113ead4bc9",
- "value": 8656372
- }
- },
- "00d7cad57ef640258c6b0180d5292e99": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_5d6f3fc3823340edaa164a23046d0bed",
- "placeholder": "",
- "style": "IPY_MODEL_3ebb7187480b4b15bc7631bfafa83ba4",
- "value": " 8.66M/8.66M [00:00<00:00, 18.6MB/s]"
- }
- },
- "762621335410465cb10ae36de14014b2": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "3a8a4455d47240e78f48b8eb624f6b78": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "fef3940a3e7544559405d75149f6b4d2": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "89b949f7aaa44d0eb14ae4a0ff1b83e0": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "d49fb0de764a421f8bd6ec113ead4bc9": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "5d6f3fc3823340edaa164a23046d0bed": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "3ebb7187480b4b15bc7631bfafa83ba4": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "b22946eb1eac4259b858b57987f30016": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_e2fcf6ea02dc4196ad8c68b18bbc59ce",
- "IPY_MODEL_6d6573a0406e4b69b94fa6d19fb5260c",
- "IPY_MODEL_ecf203671bde4fa7b90f0170b4477970"
- ],
- "layout": "IPY_MODEL_95753053c3cc4dd2995d65b9fbcc536d"
- }
- },
- "e2fcf6ea02dc4196ad8c68b18bbc59ce": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_3a636c5987b94912ad4ef559afe0ca04",
- "placeholder": "",
- "style": "IPY_MODEL_36272b7e705f418383ccbfc3b0214781",
- "value": "added_tokens.json: 100%"
- }
- },
- "6d6573a0406e4b69b94fa6d19fb5260c": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_d5368a2371544347a97015061f463e04",
- "max": 18,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_6bb13ca9c60e4f5f861b0cae87766cfc",
- "value": 18
- }
- },
- "ecf203671bde4fa7b90f0170b4477970": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_f9f880ea45b24a249ea82caa314685ac",
- "placeholder": "",
- "style": "IPY_MODEL_38895a270f7045d18b2bf6c867447868",
- "value": " 18.0/18.0 [00:00<00:00, 1.09kB/s]"
- }
- },
- "95753053c3cc4dd2995d65b9fbcc536d": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "3a636c5987b94912ad4ef559afe0ca04": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "36272b7e705f418383ccbfc3b0214781": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "d5368a2371544347a97015061f463e04": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "6bb13ca9c60e4f5f861b0cae87766cfc": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "f9f880ea45b24a249ea82caa314685ac": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "38895a270f7045d18b2bf6c867447868": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "cb864906686547ee91cbb96dcfd30dc2": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_c439f91b65524b0783fd7ee953083d69",
- "IPY_MODEL_dc344a0dcff94d0cbf3b8f778ab59c8b",
- "IPY_MODEL_f49e1f3a6a2a4ba491abc58a3505a2f0"
- ],
- "layout": "IPY_MODEL_e515aaeab05e421282c16d6fa6c3b808"
- }
- },
- "c439f91b65524b0783fd7ee953083d69": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_8c6094de1e584aaebf405fbbb66bd19d",
- "placeholder": "",
- "style": "IPY_MODEL_518235d256644f1ab46d08f87a193d97",
- "value": "special_tokens_map.json: 100%"
- }
- },
- "dc344a0dcff94d0cbf3b8f778ab59c8b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_62740de8ad3445549ef1bb1fb0c58994",
- "max": 156,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_16bd0df4859849e1b62d6cf314ca46e9",
- "value": 156
- }
- },
- "f49e1f3a6a2a4ba491abc58a3505a2f0": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_fd14fcc2bff94248a24df3316e79900c",
- "placeholder": "",
- "style": "IPY_MODEL_6e92901b488e4bb2adbc40a803f236fc",
- "value": " 156/156 [00:00<00:00, 7.27kB/s]"
- }
- },
- "e515aaeab05e421282c16d6fa6c3b808": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "8c6094de1e584aaebf405fbbb66bd19d": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "518235d256644f1ab46d08f87a193d97": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "62740de8ad3445549ef1bb1fb0c58994": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "16bd0df4859849e1b62d6cf314ca46e9": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "fd14fcc2bff94248a24df3316e79900c": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "6e92901b488e4bb2adbc40a803f236fc": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- }
- }
- },
- "accelerator": "GPU"
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
\ No newline at end of file
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DeBertaForSequenceClassification.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DeBertaForSequenceClassification.ipynb
index 8818fb48baadae..aaaf55ce572ab0 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DeBertaForSequenceClassification.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DeBertaForSequenceClassification.ipynb
@@ -44,7 +44,7 @@
},
"source": [
"- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."
]
},
{
@@ -102,7 +102,7 @@
}
],
"source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum sentencepiece tensorflow==2.11.0"
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum sentencepiece tensorflow==2.11.0"
]
},
{
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DeBertaForTokenClassification.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DeBertaForTokenClassification.ipynb
index b5557cd4ee8677..053ca34cc7aa53 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DeBertaForTokenClassification.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DeBertaForTokenClassification.ipynb
@@ -1,3276 +1,3090 @@
{
- "cells": [
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
+ "\n",
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DeBertaForTokenClassification.ipynb)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Import ONNX DeBertaForTokenClassification models from HuggingFace 🤗 into Spark NLP 🚀\n",
+ "\n",
+ "Let's keep in mind a few things before we start 😊\n",
+ "\n",
+ "- ONNX support was introduced in `Spark NLP 5.0.0`, enabling high performance inference for models.\n",
+ "- `DeBertaForTokenClassification` is only available since in `Spark NLP 5.1.3` and after. So please make sure you have upgraded to the latest Spark NLP release\n",
+ "- You can import DeBerta models trained/fine-tuned for token classification via `DeBertaForTokenClassification` or `TFDeBertaForTokenClassification`. These models are usually under `Token Classification` category and have `bert` in their labels\n",
+ "- Reference: [TFDeBertaForTokenClassification](https://huggingface.co/docs/transformers/model_doc/deberta#transformers.TFDebertaForTokenClassification)\n",
+ "- Some [example models](https://huggingface.co/models?filter=deberta&pipeline_tag=token-classification)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Export and Save HuggingFace model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases\n",
+ "- Albert uses SentencePiece, so we will have to install that as well"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "HMDvz8L8edjT"
- },
- "source": [
- "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
- "\n",
- "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DeBertaForTokenClassification.ipynb)"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.1/7.1 MB\u001b[0m \u001b[31m19.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m403.3/403.3 kB\u001b[0m \u001b[31m31.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m588.3/588.3 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m45.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m10.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.7/455.7 kB\u001b[0m \u001b[31m26.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m62.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m20.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m42.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m41.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m80.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m439.2/439.2 kB\u001b[0m \u001b[31m41.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m6.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m507.1/507.1 kB\u001b[0m \u001b[31m48.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.7/15.7 MB\u001b[0m \u001b[31m37.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m104.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m781.3/781.3 kB\u001b[0m \u001b[31m68.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m83.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m14.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m18.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m22.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.8/83.8 kB\u001b[0m \u001b[31m13.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m50.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m53.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m451.2/451.2 kB\u001b[0m \u001b[31m54.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m84.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m95.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m104.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m102.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.1/13.1 MB\u001b[0m \u001b[31m103.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+ "pandas-gbq 0.19.2 requires google-auth-oauthlib>=0.7.0, but you have google-auth-oauthlib 0.4.6 which is incompatible.\n",
+ "tensorflow-datasets 4.9.4 requires protobuf>=3.20, but you have protobuf 3.19.6 which is incompatible.\n",
+ "tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 3.19.6 which is incompatible.\u001b[0m\u001b[31m\n",
+ "\u001b[0m"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum tensorflow==2.11.0"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
+ "- We'll use [davanstrien/deberta-v3-base_fine_tuned_food_ner](https://huggingface.co/davanstrien/deberta-v3-base_fine_tuned_food_ner) model from HuggingFace as an example\n",
+ "- In addition to `TFDeBertaForTokenClassification` we also need to save the `DeBertaTokenizer`. This is the same for every model, these are assets needed for tokenization inside Spark NLP."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "rllRrPX5edjW"
- },
- "source": [
- "## Import ONNX DeBertaForTokenClassification models from HuggingFace 🤗 into Spark NLP 🚀\n",
- "\n",
- "Let's keep in mind a few things before we start 😊\n",
- "\n",
- "- ONNX support was introduced in `Spark NLP 5.0.0`, enabling high performance inference for models.\n",
- "- `DeBertaForTokenClassification` is only available since in `Spark NLP 5.1.3` and after. So please make sure you have upgraded to the latest Spark NLP release\n",
- "- You can import DeBerta models trained/fine-tuned for token classification via `DeBertaForTokenClassification` or `TFDeBertaForTokenClassification`. These models are usually under `Token Classification` category and have `bert` in their labels\n",
- "- Reference: [TFDeBertaForTokenClassification](https://huggingface.co/docs/transformers/model_doc/deberta#transformers.TFDebertaForTokenClassification)\n",
- "- Some [example models](https://huggingface.co/models?filter=deberta&pipeline_tag=token-classification)"
- ]
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:72: UserWarning: \n",
+ "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
+ "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
+ "You will be able to reuse this secret in all of your notebooks.\n",
+ "Please note that authentication is recommended but still optional to access public models or datasets.\n",
+ " warnings.warn(\n"
+ ]
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "BxfHE_l9edjW"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "28f58f45348b490aa1aa15e42555927f",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "## Export and Save HuggingFace model"
+ "text/plain": [
+ "config.json: 0%| | 0.00/2.40k [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "QailgffhedjX"
- },
- "source": [
- "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases\n",
- "- Albert uses SentencePiece, so we will have to install that as well"
- ]
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Framework not specified. Using pt to export to ONNX.\n"
+ ]
},
{
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "id": "JXSYOIbeedjX",
- "outputId": "f3c4347a-8851-4500-8f9c-2e6a6a366178",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "016a392548494bb5afcc686153f07981",
+ "version_major": 2,
+ "version_minor": 0
},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.1/7.1 MB\u001b[0m \u001b[31m19.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m403.3/403.3 kB\u001b[0m \u001b[31m31.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m588.3/588.3 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m45.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m10.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.7/455.7 kB\u001b[0m \u001b[31m26.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m62.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m20.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m42.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m41.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m80.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m439.2/439.2 kB\u001b[0m \u001b[31m41.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m6.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m507.1/507.1 kB\u001b[0m \u001b[31m48.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.7/15.7 MB\u001b[0m \u001b[31m37.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m104.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m781.3/781.3 kB\u001b[0m \u001b[31m68.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m83.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m14.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m18.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m22.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.8/83.8 kB\u001b[0m \u001b[31m13.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m50.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m53.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m451.2/451.2 kB\u001b[0m \u001b[31m54.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m84.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m95.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m104.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m102.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.1/13.1 MB\u001b[0m \u001b[31m103.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
- "pandas-gbq 0.19.2 requires google-auth-oauthlib>=0.7.0, but you have google-auth-oauthlib 0.4.6 which is incompatible.\n",
- "tensorflow-datasets 4.9.4 requires protobuf>=3.20, but you have protobuf 3.19.6 which is incompatible.\n",
- "tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 3.19.6 which is incompatible.\u001b[0m\u001b[31m\n",
- "\u001b[0m"
- ]
- }
- ],
- "source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum tensorflow==2.11.0"
+ "text/plain": [
+ "model.safetensors: 0%| | 0.00/735M [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "cK405Yo9edjY"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "7b7c7492f8c7418886f884d85c8f9d7d",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
- "- We'll use [davanstrien/deberta-v3-base_fine_tuned_food_ner](https://huggingface.co/davanstrien/deberta-v3-base_fine_tuned_food_ner) model from HuggingFace as an example\n",
- "- In addition to `TFDeBertaForTokenClassification` we also need to save the `DeBertaTokenizer`. This is the same for every model, these are assets needed for tokenization inside Spark NLP."
+ "text/plain": [
+ "tokenizer_config.json: 0%| | 0.00/399 [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "id": "11QN3u_WedjY",
- "outputId": "17636b6f-1e84-46f5-daef-a94c0f52f229",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 782,
- "referenced_widgets": [
- "28f58f45348b490aa1aa15e42555927f",
- "e932157e8a3c4190871f8e7f094cf455",
- "75901c3613144a5197097ecc977dc52c",
- "5ffe04b3274f46cdb1cd07a24150a94e",
- "edbffa1db81a4f53809097ecba9ec7a0",
- "0436eef144e941178929119487eb1d6a",
- "71e530f0a9c84f568678e5135974ab97",
- "1944e124a9c44536a25896c74f97a792",
- "ffb685c7bf4147ddb7691207d5aeb171",
- "39fe3520c2d94087bfa0a2c316c9c153",
- "3fbcca93c88b43de8de92fabd4bdaada",
- "016a392548494bb5afcc686153f07981",
- "349e3bfd7bd34a16939e0d427c8a039b",
- "fd125d554b364f2fb05ab3e06b6b6d48",
- "818a51ebfa3c4f698e748f6356ab275c",
- "9be87e2eeaca45c28174163186031aa4",
- "8845d01b94c544f8890f72e3d1189464",
- "051f7a5e5293438a86f448bcf3ab3628",
- "5a8123d27a2c45fc8d771b5e9d586590",
- "ed5cb867566e496482c61ec98e195ac3",
- "db688ccf3e104772ad402ba7745cdd1a",
- "e8fb168c53834a2992081a6f29c02a8f",
- "7b7c7492f8c7418886f884d85c8f9d7d",
- "ccb33de54b864163ad928ff7dbe25f55",
- "ff17312cb37f4481a9cee79d436f4a88",
- "41e2d87e3b964047bc9560446309e918",
- "884cc437587c40b18b93437b7e06d2ee",
- "2595e0c9f84940918a9ea7da5056b94f",
- "32fb58a8b61643598f3601881a6247fc",
- "248701fdac1149a48aaaa367efe88ed7",
- "11050a770a9e47138718b81150f2bea3",
- "851dc22f9dac4b0184387d0ca28ede77",
- "d210b59504a9405aad9458ca8f3f205a",
- "5986a4862656454282550f48aff8232b",
- "348e99e83a2b460f908fc75198839d2c",
- "5f96ce5f2a19474a83dafa3135fc13ea",
- "222d6f2198964b25a1727603efb244a8",
- "40580e5c537d48bd83a1c558ee9c2681",
- "4de2922eb9c44317b0095c13b053379c",
- "b6719791e8904793a489ab2bddf1d9e9",
- "72632f0fef104b10a632d5ecc179a2c2",
- "42c290ae973341f6bfc2a08937a17685",
- "d82005f661bb4f5182ea30a602532ecf",
- "14c1845f589d474d8117a9ad21982b3a",
- "e2c9fbe59d864951ba3cd1f4cfc96e49",
- "ac47d733b1c8432e95a721628881fc91",
- "1ead2b8c273848adbce31e9627dbda26",
- "6a97f2fa4d0f4a9cad417e64c32ea7a8",
- "41facf9b456c4304acd3c763221a15b1",
- "1c7b46f5656c4e06b35d4b898d1ee406",
- "0e6fa53ead5d4017b270d12bff278e97",
- "85e50550fc71471495e132d7f84ed0ab",
- "29022bc352014727a2567f1f7f97cfd9",
- "4b4abee52a83499cb3ade8e90bf150dc",
- "032f00fa51294589bb39278998176925",
- "ccf87863deac46178cfbb70bebe37f54",
- "db845fbcd1014d9cb4bd6e1b45d24032",
- "f8f8d4405b444d2699b38e70afec9350",
- "91a1588c6b2c4ddba09ce37b439ed680",
- "40591a7f0aa34a499d5f1d79d388f0bf",
- "c9946529acce4be5991ce6ddfc11e2a1",
- "6a70306424044419a88929531ad939af",
- "544e083e923d472b91f027abb1c5fb9c",
- "7cb72e07c7594512ba36e92178a48a71",
- "81376469b6934cb4b7c84881fd35b263",
- "98cbfcbc57394d4eac49a52a86fa8344",
- "b61be87ac3bb4533a7230f308471c9e6",
- "bb2297ca44594e6f94ff3ea9c8fa3d3f",
- "e8e740bbaf4e49c8b1c8da5bf1c14470",
- "12934ef1f8994658b6af18f11a58120f",
- "e3698564f53148a087e561a2eae6d737",
- "cd5281764b48462c9ed417c3b9a8d997",
- "7cfe733b2327414f9b20409ca6cea0f1",
- "5326823753034500b18451917546424b",
- "6fa78d78d4c043c49de4ec7aa76fa32d",
- "46815e9165f941da94ad23abb65d633a",
- "ac72abed72d440c5a8bdc99a34366a68"
- ]
- }
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "5986a4862656454282550f48aff8232b",
+ "version_major": 2,
+ "version_minor": 0
},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:72: UserWarning: \n",
- "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
- "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
- "You will be able to reuse this secret in all of your notebooks.\n",
- "Please note that authentication is recommended but still optional to access public models or datasets.\n",
- " warnings.warn(\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "config.json: 0%| | 0.00/2.40k [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "28f58f45348b490aa1aa15e42555927f"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "Framework not specified. Using pt to export to ONNX.\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "model.safetensors: 0%| | 0.00/735M [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "016a392548494bb5afcc686153f07981"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "tokenizer_config.json: 0%| | 0.00/399 [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "7b7c7492f8c7418886f884d85c8f9d7d"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "spm.model: 0%| | 0.00/2.46M [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "5986a4862656454282550f48aff8232b"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "tokenizer.json: 0%| | 0.00/8.66M [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "e2c9fbe59d864951ba3cd1f4cfc96e49"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "added_tokens.json: 0%| | 0.00/23.0 [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "ccf87863deac46178cfbb70bebe37f54"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "special_tokens_map.json: 0%| | 0.00/173 [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "b61be87ac3bb4533a7230f308471c9e6"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "Using the export variant default. Available variants are:\n",
- " - default: The default ONNX variant.\n",
- "Using framework PyTorch: 2.1.0+cu121\n",
- "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:561: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
- " torch.tensor(mid - 1).type_as(relative_pos),\n",
- "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:565: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
- " torch.ceil(torch.log(abs_pos / mid) / torch.log(torch.tensor((max_position - 1) / mid)) * (mid - 1)) + mid\n",
- "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:724: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
- " scale = torch.sqrt(torch.tensor(query_layer.size(-1), dtype=torch.float) * scale_factor)\n",
- "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:724: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
- " scale = torch.sqrt(torch.tensor(query_layer.size(-1), dtype=torch.float) * scale_factor)\n",
- "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:803: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
- " scale = torch.sqrt(torch.tensor(pos_key_layer.size(-1), dtype=torch.float) * scale_factor)\n",
- "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:803: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
- " scale = torch.sqrt(torch.tensor(pos_key_layer.size(-1), dtype=torch.float) * scale_factor)\n",
- "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:815: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
- " scale = torch.sqrt(torch.tensor(pos_query_layer.size(-1), dtype=torch.float) * scale_factor)\n",
- "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:815: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
- " scale = torch.sqrt(torch.tensor(pos_query_layer.size(-1), dtype=torch.float) * scale_factor)\n",
- "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:816: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
- " if key_layer.size(-2) != query_layer.size(-2):\n",
- "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:112: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
- " output = input.masked_fill(rmask, torch.tensor(torch.finfo(input.dtype).min))\n"
- ]
- }
- ],
- "source": [
- "from optimum.onnxruntime import ORTModelForTokenClassification\n",
- "import tensorflow as tf\n",
- "\n",
- "MODEL_NAME = 'davanstrien/deberta-v3-base_fine_tuned_food_ner'\n",
- "ONNX_MODEL = f\"onnx_models/{MODEL_NAME}\"\n",
- "\n",
- "ort_model = ORTModelForTokenClassification.from_pretrained(MODEL_NAME, export=True)\n",
- "\n",
- "# Save the ONNX model\n",
- "ort_model.save_pretrained(ONNX_MODEL)"
+ "text/plain": [
+ "spm.model: 0%| | 0.00/2.46M [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "awSrdgUtedjY"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "e2c9fbe59d864951ba3cd1f4cfc96e49",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "Let's have a look inside these two directories and see what we are dealing with:"
+ "text/plain": [
+ "tokenizer.json: 0%| | 0.00/8.66M [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {
- "id": "DbaWlsRQedjZ",
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "outputId": "15def99f-6a66-496d-af2b-97081b7738a5"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "ccf87863deac46178cfbb70bebe37f54",
+ "version_major": 2,
+ "version_minor": 0
},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "total 729924\n",
- "-rw-r--r-- 1 root root 23 Jan 4 17:56 added_tokens.json\n",
- "-rw-r--r-- 1 root root 2423 Jan 4 17:56 config.json\n",
- "-rw-r--r-- 1 root root 736301299 Jan 4 17:56 model.onnx\n",
- "-rw-r--r-- 1 root root 173 Jan 4 17:56 special_tokens_map.json\n",
- "-rw-r--r-- 1 root root 2464616 Jan 4 17:56 spm.model\n",
- "-rw-r--r-- 1 root root 412 Jan 4 17:56 tokenizer_config.json\n",
- "-rw-r--r-- 1 root root 8648791 Jan 4 17:56 tokenizer.json\n"
- ]
- }
- ],
- "source": [
- "!ls -l {ONNX_MODEL}"
+ "text/plain": [
+ "added_tokens.json: 0%| | 0.00/23.0 [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "id": "XE6E5QcVedjZ"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "b61be87ac3bb4533a7230f308471c9e6",
+ "version_major": 2,
+ "version_minor": 0
},
- "outputs": [],
- "source": [
- "!mkdir {ONNX_MODEL}/assets"
+ "text/plain": [
+ "special_tokens_map.json: 0%| | 0.00/173 [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "5SvBrtdUedjZ"
- },
- "source": [
- "- As you can see, we need to move `spm.model` from the tokenizer to assets folder which Spark NLP will look for\n",
- "- We also need `labels` and their `ids` which is saved inside the model's config. We will save this inside `labels.txt`"
- ]
- },
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Using the export variant default. Available variants are:\n",
+ " - default: The default ONNX variant.\n",
+ "Using framework PyTorch: 2.1.0+cu121\n",
+ "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:561: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
+ " torch.tensor(mid - 1).type_as(relative_pos),\n",
+ "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:565: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
+ " torch.ceil(torch.log(abs_pos / mid) / torch.log(torch.tensor((max_position - 1) / mid)) * (mid - 1)) + mid\n",
+ "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:724: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
+ " scale = torch.sqrt(torch.tensor(query_layer.size(-1), dtype=torch.float) * scale_factor)\n",
+ "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:724: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+ " scale = torch.sqrt(torch.tensor(query_layer.size(-1), dtype=torch.float) * scale_factor)\n",
+ "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:803: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
+ " scale = torch.sqrt(torch.tensor(pos_key_layer.size(-1), dtype=torch.float) * scale_factor)\n",
+ "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:803: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+ " scale = torch.sqrt(torch.tensor(pos_key_layer.size(-1), dtype=torch.float) * scale_factor)\n",
+ "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:815: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
+ " scale = torch.sqrt(torch.tensor(pos_query_layer.size(-1), dtype=torch.float) * scale_factor)\n",
+ "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:815: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+ " scale = torch.sqrt(torch.tensor(pos_query_layer.size(-1), dtype=torch.float) * scale_factor)\n",
+ "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:816: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
+ " if key_layer.size(-2) != query_layer.size(-2):\n",
+ "/usr/local/lib/python3.10/dist-packages/transformers/models/deberta_v2/modeling_deberta_v2.py:112: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
+ " output = input.masked_fill(rmask, torch.tensor(torch.finfo(input.dtype).min))\n"
+ ]
+ }
+ ],
+ "source": [
+ "from optimum.onnxruntime import ORTModelForTokenClassification\n",
+ "import tensorflow as tf\n",
+ "\n",
+ "MODEL_NAME = 'davanstrien/deberta-v3-base_fine_tuned_food_ner'\n",
+ "ONNX_MODEL = f\"onnx_models/{MODEL_NAME}\"\n",
+ "\n",
+ "ort_model = ORTModelForTokenClassification.from_pretrained(MODEL_NAME, export=True)\n",
+ "\n",
+ "# Save the ONNX model\n",
+ "ort_model.save_pretrained(ONNX_MODEL)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's have a look inside these two directories and see what we are dealing with:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {
- "id": "LgBjd6tDedjZ"
- },
- "outputs": [],
- "source": [
- "# get label2id dictionary\n",
- "labels = ort_model.config.label2id\n",
- "# sort the dictionary based on the id\n",
- "labels = sorted(labels, key=labels.get)\n",
- "\n",
- "with open(ONNX_MODEL +'/assets/labels.txt', 'w') as f:\n",
- " f.write('\\n'.join(labels))"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "total 729924\n",
+ "-rw-r--r-- 1 root root 23 Jan 4 17:56 added_tokens.json\n",
+ "-rw-r--r-- 1 root root 2423 Jan 4 17:56 config.json\n",
+ "-rw-r--r-- 1 root root 736301299 Jan 4 17:56 model.onnx\n",
+ "-rw-r--r-- 1 root root 173 Jan 4 17:56 special_tokens_map.json\n",
+ "-rw-r--r-- 1 root root 2464616 Jan 4 17:56 spm.model\n",
+ "-rw-r--r-- 1 root root 412 Jan 4 17:56 tokenizer_config.json\n",
+ "-rw-r--r-- 1 root root 8648791 Jan 4 17:56 tokenizer.json\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls -l {ONNX_MODEL}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mkdir {ONNX_MODEL}/assets"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- As you can see, we need to move `spm.model` from the tokenizer to assets folder which Spark NLP will look for\n",
+ "- We also need `labels` and their `ids` which is saved inside the model's config. We will save this inside `labels.txt`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# get label2id dictionary\n",
+ "labels = ort_model.config.label2id\n",
+ "# sort the dictionary based on the id\n",
+ "labels = sorted(labels, key=labels.get)\n",
+ "\n",
+ "with open(ONNX_MODEL +'/assets/labels.txt', 'w') as f:\n",
+ " f.write('\\n'.join(labels))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mv {ONNX_MODEL}/spm.model {ONNX_MODEL}/assets"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Voila! We have our `spm.model` and `labels.txt` inside assets directory"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {
- "id": "Sxe8im1nedjZ"
- },
- "outputs": [],
- "source": [
- "!mv {ONNX_MODEL}/spm.model {ONNX_MODEL}/assets"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "onnx_models/davanstrien/deberta-v3-base_fine_tuned_food_ner:\n",
+ "total 727520\n",
+ "-rw-r--r-- 1 root root 23 Jan 4 17:56 added_tokens.json\n",
+ "drwxr-xr-x 2 root root 4096 Jan 4 17:56 assets\n",
+ "-rw-r--r-- 1 root root 2423 Jan 4 17:56 config.json\n",
+ "-rw-r--r-- 1 root root 736301299 Jan 4 17:56 model.onnx\n",
+ "-rw-r--r-- 1 root root 173 Jan 4 17:56 special_tokens_map.json\n",
+ "-rw-r--r-- 1 root root 412 Jan 4 17:56 tokenizer_config.json\n",
+ "-rw-r--r-- 1 root root 8648791 Jan 4 17:56 tokenizer.json\n",
+ "\n",
+ "onnx_models/davanstrien/deberta-v3-base_fine_tuned_food_ner/assets:\n",
+ "total 2412\n",
+ "-rw-r--r-- 1 root root 331 Jan 4 17:56 labels.txt\n",
+ "-rw-r--r-- 1 root root 2464616 Jan 4 17:56 spm.model\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls -lR {ONNX_MODEL}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Import and Save DeBertaForTokenClassification in Spark NLP\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's install and setup Spark NLP in Google Colab\n",
+ "- This part is pretty easy via our simple script"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "xQrrcEhsedjZ"
- },
- "source": [
- "Voila! We have our `spm.model` and `labels.txt` inside assets directory"
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Installing PySpark 3.2.3 and Spark NLP 5.2.2\n",
+ "setup Colab for PySpark 3.2.3 and Spark NLP 5.2.2\n"
+ ]
+ }
+ ],
+ "source": [
+ "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's start Spark with Spark NLP included via our simple `start()` function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Apache Spark version: 3.2.3\n"
+ ]
+ }
+ ],
+ "source": [
+ "import sparknlp\n",
+ "# let's start Spark with Spark NLP\n",
+ "spark = sparknlp.start()\n",
+ "\n",
+ "print(\"Apache Spark version: {}\".format(spark.version))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's use `loadSavedModel` functon in `DeBertaForTokenClassification` which allows us to load TensorFlow model in SavedModel format\n",
+ "- Most params can be set later when you are loading this model in `DeBertaForTokenClassification` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n",
+ "- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n",
+ "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sparknlp.annotator import *\n",
+ "from sparknlp.base import *\n",
+ "\n",
+ "tokenClassifier = DeBertaForTokenClassification.loadSavedModel(\n",
+ " ONNX_MODEL,\n",
+ " spark\n",
+ " )\\\n",
+ " .setInputCols([\"document\",'token'])\\\n",
+ " .setOutputCol(\"ner\")\\\n",
+ " .setCaseSensitive(True)\\\n",
+ " .setMaxSentenceLength(128)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tokenClassifier.write().overwrite().save(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's clean up stuff we don't need anymore"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!rm -rf {ONNX_MODEL}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Awesome 😎 !\n",
+ "\n",
+ "This is your DeBertaForTokenClassification model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "total 721576\n",
+ "-rw-r--r-- 1 root root 736413822 Jan 4 17:57 deberta_classification_onnx\n",
+ "-rw-r--r-- 1 root root 2464616 Jan 4 17:57 deberta_spp\n",
+ "drwxr-xr-x 3 root root 4096 Jan 4 17:57 fields\n",
+ "drwxr-xr-x 2 root root 4096 Jan 4 17:57 metadata\n"
+ ]
+ }
+ ],
+ "source": [
+ "! ls -l {ONNX_MODEL}_spark_nlp_onnx"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny DeBertaForTokenClassification model 😊"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tokenClassifier_loaded = DeBertaForTokenClassification.load(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))\\\n",
+ " .setInputCols([\"document\",'token'])\\\n",
+ " .setOutputCol(\"ner\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You can see what labels were used to train this model via `getClasses` function:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['I-UNIT',\n",
+ " 'U-PHYSICAL_QUALITY',\n",
+ " 'L-COLOR',\n",
+ " 'U-UNIT',\n",
+ " 'U-COLOR',\n",
+ " 'I-QUANTITY',\n",
+ " 'B-FOOD',\n",
+ " 'U-PART',\n",
+ " 'U-TASTE',\n",
+ " 'U-PROCESS',\n",
+ " 'L-PURPOSE',\n",
+ " 'I-PART',\n",
+ " 'I-PURPOSE',\n",
+ " 'B-PURPOSE',\n",
+ " 'L-PROCESS',\n",
+ " 'L-QUANTITY',\n",
+ " 'B-UNIT',\n",
+ " 'B-QUANTITY',\n",
+ " 'U-FOOD',\n",
+ " 'B-PART',\n",
+ " 'I-TASTE',\n",
+ " 'B-TASTE',\n",
+ " 'L-TASTE',\n",
+ " 'L-FOOD',\n",
+ " 'I-PROCESS',\n",
+ " 'L-PHYSICAL_QUALITY',\n",
+ " 'B-PHYSICAL_QUALITY',\n",
+ " 'I-PHYSICAL_QUALITY',\n",
+ " 'I-FOOD',\n",
+ " 'B-PROCESS',\n",
+ " 'B-COLOR',\n",
+ " 'O',\n",
+ " 'L-PART',\n",
+ " 'L-UNIT',\n",
+ " 'U-QUANTITY']"
]
- },
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# .getClasses was introduced in spark-nlp==3.4.0\n",
+ "tokenClassifier_loaded.getClasses()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This is how you can use your loaded classifier model in Spark NLP 🚀 pipeline:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {
- "id": "8ulfThXDedjZ",
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "outputId": "b0494611-722a-4e98-e19c-a1f6bfbf03df"
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "onnx_models/davanstrien/deberta-v3-base_fine_tuned_food_ner:\n",
- "total 727520\n",
- "-rw-r--r-- 1 root root 23 Jan 4 17:56 added_tokens.json\n",
- "drwxr-xr-x 2 root root 4096 Jan 4 17:56 assets\n",
- "-rw-r--r-- 1 root root 2423 Jan 4 17:56 config.json\n",
- "-rw-r--r-- 1 root root 736301299 Jan 4 17:56 model.onnx\n",
- "-rw-r--r-- 1 root root 173 Jan 4 17:56 special_tokens_map.json\n",
- "-rw-r--r-- 1 root root 412 Jan 4 17:56 tokenizer_config.json\n",
- "-rw-r--r-- 1 root root 8648791 Jan 4 17:56 tokenizer.json\n",
- "\n",
- "onnx_models/davanstrien/deberta-v3-base_fine_tuned_food_ner/assets:\n",
- "total 2412\n",
- "-rw-r--r-- 1 root root 331 Jan 4 17:56 labels.txt\n",
- "-rw-r--r-- 1 root root 2464616 Jan 4 17:56 spm.model\n"
- ]
- }
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "+-----------------+--------------------+\n",
+ "| text| result|\n",
+ "+-----------------+--------------------+\n",
+ "|I love spicy food|[O, O, U-TASTE, L...|\n",
+ "+-----------------+--------------------+\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "document_assembler = DocumentAssembler() \\\n",
+ " .setInputCol('text') \\\n",
+ " .setOutputCol('document')\n",
+ "\n",
+ "tokenizer = Tokenizer() \\\n",
+ " .setInputCols(['document']) \\\n",
+ " .setOutputCol('token')\n",
+ "\n",
+ "pipeline = Pipeline(stages=[\n",
+ " document_assembler,\n",
+ " tokenizer,\n",
+ " tokenClassifier_loaded\n",
+ "])\n",
+ "\n",
+ "# couple of simple examples\n",
+ "example = spark.createDataFrame([[\"I love spicy food\"]]).toDF(\"text\")\n",
+ "\n",
+ "result = pipeline.fit(example).transform(example)\n",
+ "\n",
+ "# result is a DataFrame\n",
+ "result.select(\"text\", \"ner.result\").show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "That's it! You can now go wild and use hundreds of `DeBertaForTokenClassification` models from HuggingFace 🤗 in Spark NLP 🚀\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "gpuType": "T4",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "016a392548494bb5afcc686153f07981": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_349e3bfd7bd34a16939e0d427c8a039b",
+ "IPY_MODEL_fd125d554b364f2fb05ab3e06b6b6d48",
+ "IPY_MODEL_818a51ebfa3c4f698e748f6356ab275c"
],
- "source": [
- "!ls -lR {ONNX_MODEL}"
- ]
+ "layout": "IPY_MODEL_9be87e2eeaca45c28174163186031aa4"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "4uh4zFfcedja"
- },
- "source": [
- "## Import and Save DeBertaForTokenClassification in Spark NLP\n"
- ]
+ "032f00fa51294589bb39278998176925": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "_f6ZMFdEedja"
- },
- "source": [
- "- Let's install and setup Spark NLP in Google Colab\n",
- "- This part is pretty easy via our simple script"
- ]
+ "0436eef144e941178929119487eb1d6a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {
- "id": "x9mEl8tfedja",
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "outputId": "b1461a13-1779-4647-8db9-d43d4b705725"
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Installing PySpark 3.2.3 and Spark NLP 5.2.2\n",
- "setup Colab for PySpark 3.2.3 and Spark NLP 5.2.2\n"
- ]
- }
+ "051f7a5e5293438a86f448bcf3ab3628": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "0e6fa53ead5d4017b270d12bff278e97": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "11050a770a9e47138718b81150f2bea3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "12934ef1f8994658b6af18f11a58120f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_46815e9165f941da94ad23abb65d633a",
+ "placeholder": "",
+ "style": "IPY_MODEL_ac72abed72d440c5a8bdc99a34366a68",
+ "value": " 173/173 [00:00<00:00, 11.0kB/s]"
+ }
+ },
+ "14c1845f589d474d8117a9ad21982b3a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "1944e124a9c44536a25896c74f97a792": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "1c7b46f5656c4e06b35d4b898d1ee406": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "1ead2b8c273848adbce31e9627dbda26": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_85e50550fc71471495e132d7f84ed0ab",
+ "max": 8656551,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_29022bc352014727a2567f1f7f97cfd9",
+ "value": 8656551
+ }
+ },
+ "222d6f2198964b25a1727603efb244a8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d82005f661bb4f5182ea30a602532ecf",
+ "placeholder": "",
+ "style": "IPY_MODEL_14c1845f589d474d8117a9ad21982b3a",
+ "value": " 2.46M/2.46M [00:00<00:00, 6.60MB/s]"
+ }
+ },
+ "248701fdac1149a48aaaa367efe88ed7": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2595e0c9f84940918a9ea7da5056b94f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "28f58f45348b490aa1aa15e42555927f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_e932157e8a3c4190871f8e7f094cf455",
+ "IPY_MODEL_75901c3613144a5197097ecc977dc52c",
+ "IPY_MODEL_5ffe04b3274f46cdb1cd07a24150a94e"
],
- "source": [
- "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"
- ]
+ "layout": "IPY_MODEL_edbffa1db81a4f53809097ecba9ec7a0"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "cIpIKaFVedja"
- },
- "source": [
- "Let's start Spark with Spark NLP included via our simple `start()` function"
- ]
+ "29022bc352014727a2567f1f7f97cfd9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {
- "id": "tQf83Yqaedja",
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "outputId": "f930bab8-7a70-48da-b098-a4d971acb162"
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Apache Spark version: 3.2.3\n"
- ]
- }
+ "32fb58a8b61643598f3601881a6247fc": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "348e99e83a2b460f908fc75198839d2c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4de2922eb9c44317b0095c13b053379c",
+ "placeholder": "",
+ "style": "IPY_MODEL_b6719791e8904793a489ab2bddf1d9e9",
+ "value": "spm.model: 100%"
+ }
+ },
+ "349e3bfd7bd34a16939e0d427c8a039b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_8845d01b94c544f8890f72e3d1189464",
+ "placeholder": "",
+ "style": "IPY_MODEL_051f7a5e5293438a86f448bcf3ab3628",
+ "value": "model.safetensors: 100%"
+ }
+ },
+ "39fe3520c2d94087bfa0a2c316c9c153": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "3fbcca93c88b43de8de92fabd4bdaada": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "40580e5c537d48bd83a1c558ee9c2681": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "40591a7f0aa34a499d5f1d79d388f0bf": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "41e2d87e3b964047bc9560446309e918": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_851dc22f9dac4b0184387d0ca28ede77",
+ "placeholder": "",
+ "style": "IPY_MODEL_d210b59504a9405aad9458ca8f3f205a",
+ "value": " 399/399 [00:00<00:00, 30.9kB/s]"
+ }
+ },
+ "41facf9b456c4304acd3c763221a15b1": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "42c290ae973341f6bfc2a08937a17685": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "46815e9165f941da94ad23abb65d633a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4b4abee52a83499cb3ade8e90bf150dc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4de2922eb9c44317b0095c13b053379c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5326823753034500b18451917546424b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "544e083e923d472b91f027abb1c5fb9c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5986a4862656454282550f48aff8232b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_348e99e83a2b460f908fc75198839d2c",
+ "IPY_MODEL_5f96ce5f2a19474a83dafa3135fc13ea",
+ "IPY_MODEL_222d6f2198964b25a1727603efb244a8"
],
- "source": [
- "import sparknlp\n",
- "# let's start Spark with Spark NLP\n",
- "spark = sparknlp.start()\n",
- "\n",
- "print(\"Apache Spark version: {}\".format(spark.version))"
- ]
+ "layout": "IPY_MODEL_40580e5c537d48bd83a1c558ee9c2681"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "APhVtcuMedja"
- },
- "source": [
- "- Let's use `loadSavedModel` functon in `DeBertaForTokenClassification` which allows us to load TensorFlow model in SavedModel format\n",
- "- Most params can be set later when you are loading this model in `DeBertaForTokenClassification` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n",
- "- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n",
- "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.\n",
- "\n"
- ]
+ "5a8123d27a2c45fc8d771b5e9d586590": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {
- "id": "DfnMEXnoedja"
- },
- "outputs": [],
- "source": [
- "from sparknlp.annotator import *\n",
- "from sparknlp.base import *\n",
- "\n",
- "tokenClassifier = DeBertaForTokenClassification.loadSavedModel(\n",
- " ONNX_MODEL,\n",
- " spark\n",
- " )\\\n",
- " .setInputCols([\"document\",'token'])\\\n",
- " .setOutputCol(\"ner\")\\\n",
- " .setCaseSensitive(True)\\\n",
- " .setMaxSentenceLength(128)"
- ]
+ "5f96ce5f2a19474a83dafa3135fc13ea": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_72632f0fef104b10a632d5ecc179a2c2",
+ "max": 2464616,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_42c290ae973341f6bfc2a08937a17685",
+ "value": 2464616
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "jDEIO_ogedja"
- },
- "source": [
- "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
- ]
+ "5ffe04b3274f46cdb1cd07a24150a94e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_39fe3520c2d94087bfa0a2c316c9c153",
+ "placeholder": "",
+ "style": "IPY_MODEL_3fbcca93c88b43de8de92fabd4bdaada",
+ "value": " 2.40k/2.40k [00:00<00:00, 163kB/s]"
+ }
},
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {
- "id": "RD9apQL1edja"
- },
- "outputs": [],
- "source": [
- "tokenClassifier.write().overwrite().save(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))"
- ]
+ "6a70306424044419a88929531ad939af": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "1Q13X5yXedja"
- },
- "source": [
- "Let's clean up stuff we don't need anymore"
- ]
+ "6a97f2fa4d0f4a9cad417e64c32ea7a8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4b4abee52a83499cb3ade8e90bf150dc",
+ "placeholder": "",
+ "style": "IPY_MODEL_032f00fa51294589bb39278998176925",
+ "value": " 8.66M/8.66M [00:00<00:00, 25.5MB/s]"
+ }
},
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {
- "id": "YOTQAE82edja"
- },
- "outputs": [],
- "source": [
- "!rm -rf {ONNX_MODEL}"
- ]
+ "6fa78d78d4c043c49de4ec7aa76fa32d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "3Y06tv9pedjb"
- },
- "source": [
- "Awesome 😎 !\n",
- "\n",
- "This is your DeBertaForTokenClassification model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀"
- ]
+ "71e530f0a9c84f568678e5135974ab97": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {
- "id": "mjwgkfu_edjb",
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "outputId": "5c1d8fb8-9690-4fd7-af33-925258aee467"
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "total 721576\n",
- "-rw-r--r-- 1 root root 736413822 Jan 4 17:57 deberta_classification_onnx\n",
- "-rw-r--r-- 1 root root 2464616 Jan 4 17:57 deberta_spp\n",
- "drwxr-xr-x 3 root root 4096 Jan 4 17:57 fields\n",
- "drwxr-xr-x 2 root root 4096 Jan 4 17:57 metadata\n"
- ]
- }
+ "72632f0fef104b10a632d5ecc179a2c2": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "75901c3613144a5197097ecc977dc52c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_1944e124a9c44536a25896c74f97a792",
+ "max": 2401,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_ffb685c7bf4147ddb7691207d5aeb171",
+ "value": 2401
+ }
+ },
+ "7b7c7492f8c7418886f884d85c8f9d7d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_ccb33de54b864163ad928ff7dbe25f55",
+ "IPY_MODEL_ff17312cb37f4481a9cee79d436f4a88",
+ "IPY_MODEL_41e2d87e3b964047bc9560446309e918"
],
- "source": [
- "! ls -l {ONNX_MODEL}_spark_nlp_onnx"
- ]
+ "layout": "IPY_MODEL_884cc437587c40b18b93437b7e06d2ee"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "XnyJFndHedjb"
- },
- "source": [
- "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny DeBertaForTokenClassification model 😊"
- ]
+ "7cb72e07c7594512ba36e92178a48a71": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {
- "id": "bddFxDRgedjb"
- },
- "outputs": [],
- "source": [
- "tokenClassifier_loaded = DeBertaForTokenClassification.load(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))\\\n",
- " .setInputCols([\"document\",'token'])\\\n",
- " .setOutputCol(\"ner\")"
- ]
+ "7cfe733b2327414f9b20409ca6cea0f1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "S9Z4h8zxedjb"
- },
- "source": [
- "You can see what labels were used to train this model via `getClasses` function:"
- ]
+ "81376469b6934cb4b7c84881fd35b263": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {
- "id": "X3yaUh2hedjb",
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "outputId": "f4ac228d-8152-467e-cf14-e9315e49c8c4"
- },
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "['I-UNIT',\n",
- " 'U-PHYSICAL_QUALITY',\n",
- " 'L-COLOR',\n",
- " 'U-UNIT',\n",
- " 'U-COLOR',\n",
- " 'I-QUANTITY',\n",
- " 'B-FOOD',\n",
- " 'U-PART',\n",
- " 'U-TASTE',\n",
- " 'U-PROCESS',\n",
- " 'L-PURPOSE',\n",
- " 'I-PART',\n",
- " 'I-PURPOSE',\n",
- " 'B-PURPOSE',\n",
- " 'L-PROCESS',\n",
- " 'L-QUANTITY',\n",
- " 'B-UNIT',\n",
- " 'B-QUANTITY',\n",
- " 'U-FOOD',\n",
- " 'B-PART',\n",
- " 'I-TASTE',\n",
- " 'B-TASTE',\n",
- " 'L-TASTE',\n",
- " 'L-FOOD',\n",
- " 'I-PROCESS',\n",
- " 'L-PHYSICAL_QUALITY',\n",
- " 'B-PHYSICAL_QUALITY',\n",
- " 'I-PHYSICAL_QUALITY',\n",
- " 'I-FOOD',\n",
- " 'B-PROCESS',\n",
- " 'B-COLOR',\n",
- " 'O',\n",
- " 'L-PART',\n",
- " 'L-UNIT',\n",
- " 'U-QUANTITY']"
- ]
- },
- "metadata": {},
- "execution_count": 14
- }
+ "818a51ebfa3c4f698e748f6356ab275c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_db688ccf3e104772ad402ba7745cdd1a",
+ "placeholder": "",
+ "style": "IPY_MODEL_e8fb168c53834a2992081a6f29c02a8f",
+ "value": " 735M/735M [00:12<00:00, 68.3MB/s]"
+ }
+ },
+ "851dc22f9dac4b0184387d0ca28ede77": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "85e50550fc71471495e132d7f84ed0ab": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "8845d01b94c544f8890f72e3d1189464": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "884cc437587c40b18b93437b7e06d2ee": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "91a1588c6b2c4ddba09ce37b439ed680": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_81376469b6934cb4b7c84881fd35b263",
+ "placeholder": "",
+ "style": "IPY_MODEL_98cbfcbc57394d4eac49a52a86fa8344",
+ "value": " 23.0/23.0 [00:00<00:00, 1.51kB/s]"
+ }
+ },
+ "98cbfcbc57394d4eac49a52a86fa8344": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "9be87e2eeaca45c28174163186031aa4": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ac47d733b1c8432e95a721628881fc91": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_1c7b46f5656c4e06b35d4b898d1ee406",
+ "placeholder": "",
+ "style": "IPY_MODEL_0e6fa53ead5d4017b270d12bff278e97",
+ "value": "tokenizer.json: 100%"
+ }
+ },
+ "ac72abed72d440c5a8bdc99a34366a68": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "b61be87ac3bb4533a7230f308471c9e6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_bb2297ca44594e6f94ff3ea9c8fa3d3f",
+ "IPY_MODEL_e8e740bbaf4e49c8b1c8da5bf1c14470",
+ "IPY_MODEL_12934ef1f8994658b6af18f11a58120f"
],
- "source": [
- "# .getClasses was introduced in spark-nlp==3.4.0\n",
- "tokenClassifier_loaded.getClasses()"
- ]
+ "layout": "IPY_MODEL_e3698564f53148a087e561a2eae6d737"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "snpOu1wyedjb"
- },
- "source": [
- "This is how you can use your loaded classifier model in Spark NLP 🚀 pipeline:"
- ]
+ "b6719791e8904793a489ab2bddf1d9e9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": 16,
- "metadata": {
- "id": "sGJzYyeledjb",
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "outputId": "e301be6b-f142-443b-9af5-0d2b91997fd6"
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "+-----------------+--------------------+\n",
- "| text| result|\n",
- "+-----------------+--------------------+\n",
- "|I love spicy food|[O, O, U-TASTE, L...|\n",
- "+-----------------+--------------------+\n",
- "\n"
- ]
- }
+ "bb2297ca44594e6f94ff3ea9c8fa3d3f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_cd5281764b48462c9ed417c3b9a8d997",
+ "placeholder": "",
+ "style": "IPY_MODEL_7cfe733b2327414f9b20409ca6cea0f1",
+ "value": "special_tokens_map.json: 100%"
+ }
+ },
+ "c9946529acce4be5991ce6ddfc11e2a1": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ccb33de54b864163ad928ff7dbe25f55": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_2595e0c9f84940918a9ea7da5056b94f",
+ "placeholder": "",
+ "style": "IPY_MODEL_32fb58a8b61643598f3601881a6247fc",
+ "value": "tokenizer_config.json: 100%"
+ }
+ },
+ "ccf87863deac46178cfbb70bebe37f54": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_db845fbcd1014d9cb4bd6e1b45d24032",
+ "IPY_MODEL_f8f8d4405b444d2699b38e70afec9350",
+ "IPY_MODEL_91a1588c6b2c4ddba09ce37b439ed680"
],
- "source": [
- "document_assembler = DocumentAssembler() \\\n",
- " .setInputCol('text') \\\n",
- " .setOutputCol('document')\n",
- "\n",
- "tokenizer = Tokenizer() \\\n",
- " .setInputCols(['document']) \\\n",
- " .setOutputCol('token')\n",
- "\n",
- "pipeline = Pipeline(stages=[\n",
- " document_assembler,\n",
- " tokenizer,\n",
- " tokenClassifier_loaded\n",
- "])\n",
- "\n",
- "# couple of simple examples\n",
- "example = spark.createDataFrame([[\"I love spicy food\"]]).toDF(\"text\")\n",
- "\n",
- "result = pipeline.fit(example).transform(example)\n",
- "\n",
- "# result is a DataFrame\n",
- "result.select(\"text\", \"ner.result\").show()"
- ]
+ "layout": "IPY_MODEL_40591a7f0aa34a499d5f1d79d388f0bf"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "5pRDDJxqedjb"
- },
- "source": [
- "That's it! You can now go wild and use hundreds of `DeBertaForTokenClassification` models from HuggingFace 🤗 in Spark NLP 🚀\n"
- ]
- }
- ],
- "metadata": {
- "colab": {
- "provenance": [],
- "gpuType": "T4"
- },
- "kernelspec": {
- "display_name": "Python 3",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3"
- },
- "accelerator": "GPU",
- "widgets": {
- "application/vnd.jupyter.widget-state+json": {
- "28f58f45348b490aa1aa15e42555927f": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_e932157e8a3c4190871f8e7f094cf455",
- "IPY_MODEL_75901c3613144a5197097ecc977dc52c",
- "IPY_MODEL_5ffe04b3274f46cdb1cd07a24150a94e"
- ],
- "layout": "IPY_MODEL_edbffa1db81a4f53809097ecba9ec7a0"
- }
- },
- "e932157e8a3c4190871f8e7f094cf455": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_0436eef144e941178929119487eb1d6a",
- "placeholder": "",
- "style": "IPY_MODEL_71e530f0a9c84f568678e5135974ab97",
- "value": "config.json: 100%"
- }
- },
- "75901c3613144a5197097ecc977dc52c": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_1944e124a9c44536a25896c74f97a792",
- "max": 2401,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_ffb685c7bf4147ddb7691207d5aeb171",
- "value": 2401
- }
- },
- "5ffe04b3274f46cdb1cd07a24150a94e": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_39fe3520c2d94087bfa0a2c316c9c153",
- "placeholder": "",
- "style": "IPY_MODEL_3fbcca93c88b43de8de92fabd4bdaada",
- "value": " 2.40k/2.40k [00:00<00:00, 163kB/s]"
- }
- },
- "edbffa1db81a4f53809097ecba9ec7a0": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "0436eef144e941178929119487eb1d6a": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "71e530f0a9c84f568678e5135974ab97": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "1944e124a9c44536a25896c74f97a792": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "ffb685c7bf4147ddb7691207d5aeb171": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "39fe3520c2d94087bfa0a2c316c9c153": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "3fbcca93c88b43de8de92fabd4bdaada": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "016a392548494bb5afcc686153f07981": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_349e3bfd7bd34a16939e0d427c8a039b",
- "IPY_MODEL_fd125d554b364f2fb05ab3e06b6b6d48",
- "IPY_MODEL_818a51ebfa3c4f698e748f6356ab275c"
- ],
- "layout": "IPY_MODEL_9be87e2eeaca45c28174163186031aa4"
- }
- },
- "349e3bfd7bd34a16939e0d427c8a039b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_8845d01b94c544f8890f72e3d1189464",
- "placeholder": "",
- "style": "IPY_MODEL_051f7a5e5293438a86f448bcf3ab3628",
- "value": "model.safetensors: 100%"
- }
- },
- "fd125d554b364f2fb05ab3e06b6b6d48": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_5a8123d27a2c45fc8d771b5e9d586590",
- "max": 735462440,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_ed5cb867566e496482c61ec98e195ac3",
- "value": 735462440
- }
- },
- "818a51ebfa3c4f698e748f6356ab275c": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_db688ccf3e104772ad402ba7745cdd1a",
- "placeholder": "",
- "style": "IPY_MODEL_e8fb168c53834a2992081a6f29c02a8f",
- "value": " 735M/735M [00:12<00:00, 68.3MB/s]"
- }
- },
- "9be87e2eeaca45c28174163186031aa4": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "8845d01b94c544f8890f72e3d1189464": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "051f7a5e5293438a86f448bcf3ab3628": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "5a8123d27a2c45fc8d771b5e9d586590": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "ed5cb867566e496482c61ec98e195ac3": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "db688ccf3e104772ad402ba7745cdd1a": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "e8fb168c53834a2992081a6f29c02a8f": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "7b7c7492f8c7418886f884d85c8f9d7d": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_ccb33de54b864163ad928ff7dbe25f55",
- "IPY_MODEL_ff17312cb37f4481a9cee79d436f4a88",
- "IPY_MODEL_41e2d87e3b964047bc9560446309e918"
- ],
- "layout": "IPY_MODEL_884cc437587c40b18b93437b7e06d2ee"
- }
- },
- "ccb33de54b864163ad928ff7dbe25f55": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_2595e0c9f84940918a9ea7da5056b94f",
- "placeholder": "",
- "style": "IPY_MODEL_32fb58a8b61643598f3601881a6247fc",
- "value": "tokenizer_config.json: 100%"
- }
- },
- "ff17312cb37f4481a9cee79d436f4a88": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_248701fdac1149a48aaaa367efe88ed7",
- "max": 399,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_11050a770a9e47138718b81150f2bea3",
- "value": 399
- }
- },
- "41e2d87e3b964047bc9560446309e918": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_851dc22f9dac4b0184387d0ca28ede77",
- "placeholder": "",
- "style": "IPY_MODEL_d210b59504a9405aad9458ca8f3f205a",
- "value": " 399/399 [00:00<00:00, 30.9kB/s]"
- }
- },
- "884cc437587c40b18b93437b7e06d2ee": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "2595e0c9f84940918a9ea7da5056b94f": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "32fb58a8b61643598f3601881a6247fc": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "248701fdac1149a48aaaa367efe88ed7": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "11050a770a9e47138718b81150f2bea3": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "851dc22f9dac4b0184387d0ca28ede77": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "d210b59504a9405aad9458ca8f3f205a": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "5986a4862656454282550f48aff8232b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_348e99e83a2b460f908fc75198839d2c",
- "IPY_MODEL_5f96ce5f2a19474a83dafa3135fc13ea",
- "IPY_MODEL_222d6f2198964b25a1727603efb244a8"
- ],
- "layout": "IPY_MODEL_40580e5c537d48bd83a1c558ee9c2681"
- }
- },
- "348e99e83a2b460f908fc75198839d2c": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_4de2922eb9c44317b0095c13b053379c",
- "placeholder": "",
- "style": "IPY_MODEL_b6719791e8904793a489ab2bddf1d9e9",
- "value": "spm.model: 100%"
- }
- },
- "5f96ce5f2a19474a83dafa3135fc13ea": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_72632f0fef104b10a632d5ecc179a2c2",
- "max": 2464616,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_42c290ae973341f6bfc2a08937a17685",
- "value": 2464616
- }
- },
- "222d6f2198964b25a1727603efb244a8": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_d82005f661bb4f5182ea30a602532ecf",
- "placeholder": "",
- "style": "IPY_MODEL_14c1845f589d474d8117a9ad21982b3a",
- "value": " 2.46M/2.46M [00:00<00:00, 6.60MB/s]"
- }
- },
- "40580e5c537d48bd83a1c558ee9c2681": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "4de2922eb9c44317b0095c13b053379c": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "b6719791e8904793a489ab2bddf1d9e9": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "72632f0fef104b10a632d5ecc179a2c2": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "42c290ae973341f6bfc2a08937a17685": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "d82005f661bb4f5182ea30a602532ecf": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "14c1845f589d474d8117a9ad21982b3a": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "e2c9fbe59d864951ba3cd1f4cfc96e49": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_ac47d733b1c8432e95a721628881fc91",
- "IPY_MODEL_1ead2b8c273848adbce31e9627dbda26",
- "IPY_MODEL_6a97f2fa4d0f4a9cad417e64c32ea7a8"
- ],
- "layout": "IPY_MODEL_41facf9b456c4304acd3c763221a15b1"
- }
- },
- "ac47d733b1c8432e95a721628881fc91": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_1c7b46f5656c4e06b35d4b898d1ee406",
- "placeholder": "",
- "style": "IPY_MODEL_0e6fa53ead5d4017b270d12bff278e97",
- "value": "tokenizer.json: 100%"
- }
- },
- "1ead2b8c273848adbce31e9627dbda26": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_85e50550fc71471495e132d7f84ed0ab",
- "max": 8656551,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_29022bc352014727a2567f1f7f97cfd9",
- "value": 8656551
- }
- },
- "6a97f2fa4d0f4a9cad417e64c32ea7a8": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_4b4abee52a83499cb3ade8e90bf150dc",
- "placeholder": "",
- "style": "IPY_MODEL_032f00fa51294589bb39278998176925",
- "value": " 8.66M/8.66M [00:00<00:00, 25.5MB/s]"
- }
- },
- "41facf9b456c4304acd3c763221a15b1": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "1c7b46f5656c4e06b35d4b898d1ee406": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "0e6fa53ead5d4017b270d12bff278e97": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "85e50550fc71471495e132d7f84ed0ab": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "29022bc352014727a2567f1f7f97cfd9": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "4b4abee52a83499cb3ade8e90bf150dc": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "032f00fa51294589bb39278998176925": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "ccf87863deac46178cfbb70bebe37f54": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_db845fbcd1014d9cb4bd6e1b45d24032",
- "IPY_MODEL_f8f8d4405b444d2699b38e70afec9350",
- "IPY_MODEL_91a1588c6b2c4ddba09ce37b439ed680"
- ],
- "layout": "IPY_MODEL_40591a7f0aa34a499d5f1d79d388f0bf"
- }
- },
- "db845fbcd1014d9cb4bd6e1b45d24032": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_c9946529acce4be5991ce6ddfc11e2a1",
- "placeholder": "",
- "style": "IPY_MODEL_6a70306424044419a88929531ad939af",
- "value": "added_tokens.json: 100%"
- }
- },
- "f8f8d4405b444d2699b38e70afec9350": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_544e083e923d472b91f027abb1c5fb9c",
- "max": 23,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_7cb72e07c7594512ba36e92178a48a71",
- "value": 23
- }
- },
- "91a1588c6b2c4ddba09ce37b439ed680": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_81376469b6934cb4b7c84881fd35b263",
- "placeholder": "",
- "style": "IPY_MODEL_98cbfcbc57394d4eac49a52a86fa8344",
- "value": " 23.0/23.0 [00:00<00:00, 1.51kB/s]"
- }
- },
- "40591a7f0aa34a499d5f1d79d388f0bf": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "c9946529acce4be5991ce6ddfc11e2a1": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "6a70306424044419a88929531ad939af": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "544e083e923d472b91f027abb1c5fb9c": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "7cb72e07c7594512ba36e92178a48a71": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "81376469b6934cb4b7c84881fd35b263": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "98cbfcbc57394d4eac49a52a86fa8344": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "b61be87ac3bb4533a7230f308471c9e6": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_bb2297ca44594e6f94ff3ea9c8fa3d3f",
- "IPY_MODEL_e8e740bbaf4e49c8b1c8da5bf1c14470",
- "IPY_MODEL_12934ef1f8994658b6af18f11a58120f"
- ],
- "layout": "IPY_MODEL_e3698564f53148a087e561a2eae6d737"
- }
- },
- "bb2297ca44594e6f94ff3ea9c8fa3d3f": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_cd5281764b48462c9ed417c3b9a8d997",
- "placeholder": "",
- "style": "IPY_MODEL_7cfe733b2327414f9b20409ca6cea0f1",
- "value": "special_tokens_map.json: 100%"
- }
- },
- "e8e740bbaf4e49c8b1c8da5bf1c14470": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_5326823753034500b18451917546424b",
- "max": 173,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_6fa78d78d4c043c49de4ec7aa76fa32d",
- "value": 173
- }
- },
- "12934ef1f8994658b6af18f11a58120f": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_46815e9165f941da94ad23abb65d633a",
- "placeholder": "",
- "style": "IPY_MODEL_ac72abed72d440c5a8bdc99a34366a68",
- "value": " 173/173 [00:00<00:00, 11.0kB/s]"
- }
- },
- "e3698564f53148a087e561a2eae6d737": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "cd5281764b48462c9ed417c3b9a8d997": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "7cfe733b2327414f9b20409ca6cea0f1": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "5326823753034500b18451917546424b": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "6fa78d78d4c043c49de4ec7aa76fa32d": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "46815e9165f941da94ad23abb65d633a": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "ac72abed72d440c5a8bdc99a34366a68": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- }
- }
+ "cd5281764b48462c9ed417c3b9a8d997": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d210b59504a9405aad9458ca8f3f205a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "d82005f661bb4f5182ea30a602532ecf": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "db688ccf3e104772ad402ba7745cdd1a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "db845fbcd1014d9cb4bd6e1b45d24032": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_c9946529acce4be5991ce6ddfc11e2a1",
+ "placeholder": "",
+ "style": "IPY_MODEL_6a70306424044419a88929531ad939af",
+ "value": "added_tokens.json: 100%"
+ }
+ },
+ "e2c9fbe59d864951ba3cd1f4cfc96e49": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_ac47d733b1c8432e95a721628881fc91",
+ "IPY_MODEL_1ead2b8c273848adbce31e9627dbda26",
+ "IPY_MODEL_6a97f2fa4d0f4a9cad417e64c32ea7a8"
+ ],
+ "layout": "IPY_MODEL_41facf9b456c4304acd3c763221a15b1"
+ }
+ },
+ "e3698564f53148a087e561a2eae6d737": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e8e740bbaf4e49c8b1c8da5bf1c14470": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_5326823753034500b18451917546424b",
+ "max": 173,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_6fa78d78d4c043c49de4ec7aa76fa32d",
+ "value": 173
+ }
+ },
+ "e8fb168c53834a2992081a6f29c02a8f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "e932157e8a3c4190871f8e7f094cf455": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0436eef144e941178929119487eb1d6a",
+ "placeholder": "",
+ "style": "IPY_MODEL_71e530f0a9c84f568678e5135974ab97",
+ "value": "config.json: 100%"
+ }
+ },
+ "ed5cb867566e496482c61ec98e195ac3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "edbffa1db81a4f53809097ecba9ec7a0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f8f8d4405b444d2699b38e70afec9350": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_544e083e923d472b91f027abb1c5fb9c",
+ "max": 23,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_7cb72e07c7594512ba36e92178a48a71",
+ "value": 23
+ }
+ },
+ "fd125d554b364f2fb05ab3e06b6b6d48": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_5a8123d27a2c45fc8d771b5e9d586590",
+ "max": 735462440,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_ed5cb867566e496482c61ec98e195ac3",
+ "value": 735462440
+ }
+ },
+ "ff17312cb37f4481a9cee79d436f4a88": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_248701fdac1149a48aaaa367efe88ed7",
+ "max": 399,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_11050a770a9e47138718b81150f2bea3",
+ "value": 399
+ }
+ },
+ "ffb685c7bf4147ddb7691207d5aeb171": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
}
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
\ No newline at end of file
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DistilBERT.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DistilBERT.ipynb
index 95d8c86216561d..3ff1f88979172e 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DistilBERT.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DistilBERT.ipynb
@@ -28,7 +28,7 @@
"metadata": {},
"source": [
"- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."
]
},
{
@@ -70,7 +70,7 @@
}
],
"source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum tensorflow"
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum tensorflow"
]
},
{
@@ -341,7 +341,7 @@
"distilbert = DistilBertEmbeddings.loadSavedModel(f\"{EXPORT_PATH}\", spark)\\\n",
" .setInputCols([\"document\",'token'])\\\n",
" .setOutputCol(\"distilbert\")\\\n",
- " .setCaseSensitive(True)\n",
+ " .setCaseSensitive(True)\\\n",
" .setStorageRef('distilbert_base_cased')"
]
},
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DistilBertForQuestionAnswering.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DistilBertForQuestionAnswering.ipynb
index f3a8e8c0e775c7..63d773b068b6d9 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DistilBertForQuestionAnswering.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DistilBertForQuestionAnswering.ipynb
@@ -36,7 +36,7 @@
"metadata": {},
"source": [
"- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n",
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n",
"- Albert uses SentencePiece, so we will have to install that as well"
]
},
@@ -92,7 +92,7 @@
}
],
"source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum tensorflow==2.11.0"
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum tensorflow==2.11.0"
]
},
{
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DistilBertForSequenceClassification.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DistilBertForSequenceClassification.ipynb
index 327b4499a8a73e..83137eeccffa56 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DistilBertForSequenceClassification.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DistilBertForSequenceClassification.ipynb
@@ -36,7 +36,7 @@
"metadata": {},
"source": [
"- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."
]
},
{
@@ -91,7 +91,7 @@
}
],
"source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum sentencepiece tensorflow==2.11.0"
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum sentencepiece tensorflow==2.11.0"
]
},
{
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DistilBertForTokenClassification.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DistilBertForTokenClassification.ipynb
index 2a1fd8c7387a47..32b3bc550fe70b 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DistilBertForTokenClassification.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_DistilBertForTokenClassification.ipynb
@@ -36,7 +36,7 @@
"metadata": {},
"source": [
"- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases\n",
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases\n",
"- Albert uses SentencePiece, so we will have to install that as well"
]
},
@@ -92,7 +92,7 @@
}
],
"source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum tensorflow==2.11.0"
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum tensorflow==2.11.0"
]
},
{
@@ -213,7 +213,7 @@
"import tensorflow as tf\n",
"\n",
"MODEL_NAME = 'elastic/distilbert-base-cased-finetuned-conll03-english'\n",
- "ONNX_MODEL = f\"onnx_models/{ONNX_MODEL}\"\n",
+ "ONNX_MODEL = f\"onnx_models/{MODEL_NAME}\"\n",
"\n",
"ort_model = ORTModelForTokenClassification.from_pretrained(MODEL_NAME, export=True)\n",
"\n",
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_E5.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_E5.ipynb
index e63c891e83a887..fabe92351cb934 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_E5.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_E5.ipynb
@@ -1,397 +1,543 @@
{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
- "\n",
- "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_E5.ipynb)\n",
- "\n",
- "# Import ONNX E5 models from HuggingFace 🤗 into Spark NLP 🚀\n",
- "\n",
- "Let's keep in mind a few things before we start 😊\n",
- "\n",
- "- ONNX support for this annotator was introduced in `Spark NLP 5.1.0`, enabling high performance inference for models. Please make sure you have upgraded to the latest Spark NLP release.\n",
- "- You can import models for E5 from HuggingFace and they have to be in `Sentence Similarity` category. Meaning, you cannot use E5 models trained/fine-tuned on a specific task such as token/sequence classification."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Export and Save HuggingFace model"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
+ "cells": [
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.1/7.1 MB\u001b[0m \u001b[31m18.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m380.6/380.6 kB\u001b[0m \u001b[31m22.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m21.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m40.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m37.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m54.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m17.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m519.3/519.3 kB\u001b[0m \u001b[31m39.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m59.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m46.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m37.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m16.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m13.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
- "tensorflow 2.12.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 3.20.2 which is incompatible.\n",
- "tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 3.20.2 which is incompatible.\u001b[0m\u001b[31m\n",
- "\u001b[0m"
- ]
- }
- ],
- "source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum tensorflow"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
- "- We'll use [intfloat/e5-small-v2](https://huggingface.co/intfloat/e5-small-v2) model from HuggingFace as an example and load it as a `ORTModelForFeatureExtraction`, representing an ONNX model.\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "inxS_UuG7k1K"
+ },
+ "source": [
+ "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
+ "\n",
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_E5.ipynb)\n",
+ "\n",
+ "# Import ONNX E5 models from HuggingFace 🤗 into Spark NLP 🚀\n",
+ "\n",
+ "Let's keep in mind a few things before we start 😊\n",
+ "\n",
+ "- ONNX support for this annotator was introduced in `Spark NLP 5.1.0`, enabling high performance inference for models. Please make sure you have upgraded to the latest Spark NLP release.\n",
+ "- You can import models for E5 from HuggingFace and they have to be in `Sentence Similarity` category. Meaning, you cannot use E5 models trained/fine-tuned on a specific task such as token/sequence classification."
+ ]
+ },
{
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Framework not specified. Using pt to export to ONNX.\n",
- "Using framework PyTorch: 2.0.1+cu118\n",
- "Overriding 1 configuration item(s)\n",
- "\t- use_cache -> False\n"
- ]
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "rES2GQnL7k1L"
+ },
+ "source": [
+ "## Export and Save HuggingFace model"
+ ]
},
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "============= Diagnostic Run torch.onnx.export version 2.0.1+cu118 =============\n",
- "verbose: False, log level: Level.ERROR\n",
- "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
- "\n"
- ]
- }
- ],
- "source": [
- "from optimum.onnxruntime import ORTModelForFeatureExtraction\n",
- "\n",
- "MODEL_NAME = \"intfloat/e5-small-v2\"\n",
- "EXPORT_PATH = f\"onnx_models/{MODEL_NAME}\"\n",
- "\n",
- "ort_model = ORTModelForFeatureExtraction.from_pretrained(MODEL_NAME, export=True)\n",
- "\n",
- "# Save the ONNX model\n",
- "ort_model.save_pretrained(EXPORT_PATH)\n",
- "\n",
- "# Create directory for assets and move the tokenizer files.\n",
- "# A separate folder is needed for Spark NLP.\n",
- "!mkdir {EXPORT_PATH}/assets\n",
- "!mv {EXPORT_PATH}/vocab.txt {EXPORT_PATH}/assets/"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Let's have a look inside these two directories and see what we are dealing with:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "_J-t5wW37k1M"
+ },
+ "source": [
+ "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
+ "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."
+ ]
+ },
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "total 130692\n",
- "drwxr-xr-x 2 root root 4096 Sep 5 09:03 assets\n",
- "-rw-r--r-- 1 root root 626 Sep 5 09:03 config.json\n",
- "-rw-r--r-- 1 root root 133093467 Sep 5 09:03 model.onnx\n",
- "-rw-r--r-- 1 root root 125 Sep 5 09:03 special_tokens_map.json\n",
- "-rw-r--r-- 1 root root 314 Sep 5 09:03 tokenizer_config.json\n",
- "-rw-r--r-- 1 root root 711396 Sep 5 09:03 tokenizer.json\n"
- ]
- }
- ],
- "source": [
- "!ls -l {EXPORT_PATH}"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "id": "0OBtmDTB7k1M",
+ "outputId": "9d07d7c3-c9c9-4666-b620-3524d7daa9de",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.1/7.1 MB\u001b[0m \u001b[31m13.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m410.0/410.0 kB\u001b[0m \u001b[31m24.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m589.8/589.8 MB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m54.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m6.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.8/455.8 kB\u001b[0m \u001b[31m32.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m59.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m15.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m510.5/510.5 kB\u001b[0m \u001b[31m36.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.3/5.3 MB\u001b[0m \u001b[31m68.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m46.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m50.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m31.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m311.2/311.2 kB\u001b[0m \u001b[31m19.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.9/15.9 MB\u001b[0m \u001b[31m45.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m63.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m74.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m61.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m76.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m80.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m75.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m83.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m76.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m80.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m74.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m52.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m68.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m77.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m65.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m73.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m475.2/475.2 MB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m84.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m69.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m77.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m475.2/475.2 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m475.2/475.2 MB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.9/489.9 MB\u001b[0m \u001b[31m1.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m76.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m34.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m70.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m86.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.8/489.8 MB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m479.7/479.7 MB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m70.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.3/17.3 MB\u001b[0m \u001b[31m61.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m82.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.8/440.8 kB\u001b[0m \u001b[31m35.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m524.1/524.1 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m64.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m90.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m37.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m82.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m781.3/781.3 kB\u001b[0m \u001b[31m57.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m80.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m78.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.1/17.1 MB\u001b[0m \u001b[31m69.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m588.3/588.3 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m63.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m57.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m81.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m439.2/439.2 kB\u001b[0m \u001b[31m33.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m80.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m13.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m20.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m14.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.8/83.8 kB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.7/455.7 kB\u001b[0m \u001b[31m37.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m39.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m38.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m451.2/451.2 kB\u001b[0m \u001b[31m38.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.7/15.7 MB\u001b[0m \u001b[31m66.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m55.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m59.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m59.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m59.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.1/13.1 MB\u001b[0m \u001b[31m19.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+ "pandas-gbq 0.19.2 requires google-auth-oauthlib>=0.7.0, but you have google-auth-oauthlib 0.4.6 which is incompatible.\n",
+ "tensorflow-datasets 4.9.4 requires protobuf>=3.20, but you have protobuf 3.19.6 which is incompatible.\n",
+ "tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 3.19.6 which is incompatible.\n",
+ "tf-keras 2.15.1 requires tensorflow<2.16,>=2.15, but you have tensorflow 2.11.1 which is incompatible.\u001b[0m\u001b[31m\n",
+ "\u001b[0m"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum tensorflow"
+ ]
+ },
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "total 228\n",
- "-rw-r--r-- 1 root root 231508 Sep 5 09:03 vocab.txt\n"
- ]
- }
- ],
- "source": [
- "!ls -l {EXPORT_PATH}/assets"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Import and Save E5 in Spark NLP\n",
- "\n",
- "- Let's install and setup Spark NLP in Google Colab\n",
- "- This part is pretty easy via our simple script"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "M6DKNctk7k1N"
+ },
+ "source": [
+ "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
+ "- We'll use [intfloat/e5-small-v2](https://huggingface.co/intfloat/e5-small-v2) model from HuggingFace as an example and load it as a `ORTModelForFeatureExtraction`, representing an ONNX model.\n"
+ ]
+ },
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Installing PySpark 3.2.3 and Spark NLP 5.1.0\n",
- "setup Colab for PySpark 3.2.3 and Spark NLP 5.1.0\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 MB\u001b[0m \u001b[31m4.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m531.2/531.2 kB\u001b[0m \u001b[31m39.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.7/199.7 kB\u001b[0m \u001b[31m19.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
- ]
- }
- ],
- "source": [
- "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Let's start Spark with Spark NLP included via our simple `start()` function"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "import sparknlp\n",
- "# let's start Spark with Spark NLP\n",
- "spark = sparknlp.start()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "- Let's use `loadSavedModel` functon in `E5Embeddings` which allows us to load the ONNX model\n",
- "- Most params will be set automatically. They can also be set later after loading the model in `E5Embeddings` during runtime, so don't worry about setting them now\n",
- "- `loadSavedModel` accepts two params, first is the path to the exported model. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n",
- "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from sparknlp.annotator import *\n",
- "\n",
- "# All these params should be identical to the original ONNX model\n",
- "E5 = E5Embeddings.loadSavedModel(f\"{EXPORT_PATH}\", spark)\\\n",
- " .setInputCols([\"document\"])\\\n",
- " .setOutputCol(\"E5\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "E5.write().overwrite().save(f\"{MODEL_NAME}_spark_nlp\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Let's clean up stuff we don't need anymore"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "!rm -rf {EXPORT_PATH}"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Awesome 😎 !\n",
- "\n",
- "This is your ONNX E5 model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "id": "Nrcm1-V-7k1N",
+ "outputId": "d0ba8939-127f-4600-c074-5514a3fb033f",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n",
+ "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
+ "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
+ "You will be able to reuse this secret in all of your notebooks.\n",
+ "Please note that authentication is recommended but still optional to access public models or datasets.\n",
+ " warnings.warn(\n",
+ "Framework not specified. Using pt to export the model.\n",
+ "Using the export variant default. Available variants are:\n",
+ " - default: The default ONNX variant.\n",
+ "Using framework PyTorch: 2.2.1+cu121\n",
+ "Overriding 1 configuration item(s)\n",
+ "\t- use_cache -> False\n"
+ ]
+ }
+ ],
+ "source": [
+ "from optimum.onnxruntime import ORTModelForFeatureExtraction\n",
+ "\n",
+ "MODEL_NAME = \"intfloat/e5-small-v2\"\n",
+ "EXPORT_PATH = f\"onnx_models/{MODEL_NAME}\"\n",
+ "\n",
+ "ort_model = ORTModelForFeatureExtraction.from_pretrained(MODEL_NAME, export=True)\n",
+ "\n",
+ "# Save the ONNX model\n",
+ "ort_model.save_pretrained(EXPORT_PATH)\n",
+ "\n",
+ "# Create directory for assets and move the tokenizer files.\n",
+ "# A separate folder is needed for Spark NLP.\n",
+ "!mkdir {EXPORT_PATH}/assets\n",
+ "!mv {EXPORT_PATH}/vocab.txt {EXPORT_PATH}/assets/"
+ ]
+ },
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "total 130008\n",
- "-rw-r--r-- 1 root root 133113905 Sep 5 08:57 e5_onnx\n",
- "drwxr-xr-x 3 root root 4096 Sep 5 08:57 fields\n",
- "drwxr-xr-x 2 root root 4096 Sep 5 08:57 metadata\n"
- ]
- }
- ],
- "source": [
- "! ls -l {MODEL_NAME}_spark_nlp"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny E5 model 😊"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "import sparknlp\n",
- "\n",
- "from sparknlp.base import *\n",
- "from sparknlp.annotator import *\n",
- "\n",
- "document_assembler = DocumentAssembler()\\\n",
- " .setInputCol(\"text\")\\\n",
- " .setOutputCol(\"document\")\n",
- "\n",
- "E5_loaded = E5Embeddings.load(f\"{MODEL_NAME}_spark_nlp\")\\\n",
- " .setInputCols([\"document\"])\\\n",
- " .setOutputCol(\"E5\")\\\n",
- "\n",
- "pipeline = Pipeline(\n",
- " stages = [\n",
- " document_assembler,\n",
- " E5_loaded\n",
- " ])\n",
- "\n",
- "data = spark.createDataFrame([['William Henry Gates III (born October 28, 1955) is an American business magnate, software developer, investor,and philanthropist.']]).toDF(\"text\")\n",
- "model = pipeline.fit(data)\n",
- "result = model.transform(data)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "EbR3_SbG7k1O"
+ },
+ "source": [
+ "Let's have a look inside these two directories and see what we are dealing with:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "id": "cosFNNCM7k1P",
+ "outputId": "f5d098d7-aa31-4dc1-999f-5a218d26d90f",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "total 130692\n",
+ "drwxr-xr-x 2 root root 4096 Apr 12 11:02 assets\n",
+ "-rw-r--r-- 1 root root 626 Apr 12 11:02 config.json\n",
+ "-rw-r--r-- 1 root root 133093468 Apr 12 11:02 model.onnx\n",
+ "-rw-r--r-- 1 root root 125 Apr 12 11:02 special_tokens_map.json\n",
+ "-rw-r--r-- 1 root root 314 Apr 12 11:02 tokenizer_config.json\n",
+ "-rw-r--r-- 1 root root 711396 Apr 12 11:02 tokenizer.json\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls -l {EXPORT_PATH}"
+ ]
+ },
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "+--------------------+\n",
- "| embeddings|\n",
- "+--------------------+\n",
- "|[-0.35357836, 0.3...|\n",
- "+--------------------+\n",
- "\n"
- ]
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "id": "FmE_V8JI7k1P",
+ "outputId": "85de6e65-e485-45e7-b0a0-eae6212bf6af",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "total 228\n",
+ "-rw-r--r-- 1 root root 231508 Apr 12 11:02 vocab.txt\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls -l {EXPORT_PATH}/assets"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "jFxCu5FZ7k1R"
+ },
+ "source": [
+ "## Import and Save E5 in Spark NLP\n",
+ "\n",
+ "- Let's install and setup Spark NLP in Google Colab\n",
+ "- This part is pretty easy via our simple script\n",
+ "- However, we need to upgrade Spark to a more recent version to use this annotator."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "id": "IND1Fqp07k1R",
+ "outputId": "cf139caf-875c-4998-a047-fb842a7adc65",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Installing PySpark 3.2.3 and Spark NLP 5.3.3\n",
+ "setup Colab for PySpark 3.2.3 and Spark NLP 5.3.3\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 MB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m568.4/568.4 kB\u001b[0m \u001b[31m44.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.7/199.7 kB\u001b[0m \u001b[31m20.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
+ ]
+ }
+ ],
+ "source": [
+ "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash\n",
+ "! pip install -U pyspark==3.4.1"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "2hAdyHc_7k1R"
+ },
+ "source": [
+ "Let's start Spark with Spark NLP included via our simple `start()` function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "id": "ViVKUlqj7k1S"
+ },
+ "outputs": [],
+ "source": [
+ "import sparknlp\n",
+ "# let's start Spark with Spark NLP\n",
+ "spark = sparknlp.start()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "fazfE9lb7k1S"
+ },
+ "source": [
+ "- Let's use `loadSavedModel` functon in `E5Embeddings` which allows us to load the ONNX model\n",
+ "- Most params will be set automatically. They can also be set later after loading the model in `E5Embeddings` during runtime, so don't worry about setting them now\n",
+ "- `loadSavedModel` accepts two params, first is the path to the exported model. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n",
+ "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "id": "-soDJIu-7k1S"
+ },
+ "outputs": [],
+ "source": [
+ "from sparknlp.annotator import *\n",
+ "\n",
+ "MODEL_NAME = \"intfloat/e5-small-v2\"\n",
+ "EXPORT_PATH = f\"onnx_models/{MODEL_NAME}\"\n",
+ "\n",
+ "# All these params should be identical to the original ONNX model\n",
+ "E5 = E5Embeddings.loadSavedModel(f\"{EXPORT_PATH}\", spark)\\\n",
+ " .setInputCols([\"document\"])\\\n",
+ " .setOutputCol(\"E5\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "0BU5PjTc7k1S"
+ },
+ "source": [
+ "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "id": "jpUjHNk67k1T"
+ },
+ "outputs": [],
+ "source": [
+ "E5.write().overwrite().save(f\"{MODEL_NAME}_spark_nlp\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "AbK6dSr37k1T"
+ },
+ "source": [
+ "Let's clean up stuff we don't need anymore"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "id": "A-RW-l_87k1T"
+ },
+ "outputs": [],
+ "source": [
+ "!rm -rf {EXPORT_PATH}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "hjuhoNym7k1U"
+ },
+ "source": [
+ "Awesome 😎 !\n",
+ "\n",
+ "This is your ONNX E5 model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "id": "t8WxTSsS7k1U",
+ "outputId": "2c8417c5-5622-4aeb-fc1a-d3e50453d901",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "total 130008\n",
+ "-rw-r--r-- 1 root root 133113906 Apr 12 11:05 e5_onnx\n",
+ "drwxr-xr-x 3 root root 4096 Apr 12 11:04 fields\n",
+ "drwxr-xr-x 2 root root 4096 Apr 12 11:04 metadata\n"
+ ]
+ }
+ ],
+ "source": [
+ "! ls -l {MODEL_NAME}_spark_nlp"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "qMb-ejgJ7k1U"
+ },
+ "source": [
+ "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny E5 model 😊"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "id": "YXy8sgat7k1U"
+ },
+ "outputs": [],
+ "source": [
+ "import sparknlp\n",
+ "\n",
+ "from sparknlp.base import *\n",
+ "from sparknlp.annotator import *\n",
+ "\n",
+ "document_assembler = DocumentAssembler()\\\n",
+ " .setInputCol(\"text\")\\\n",
+ " .setOutputCol(\"document\")\n",
+ "\n",
+ "E5_loaded = E5Embeddings.load(f\"{MODEL_NAME}_spark_nlp\")\\\n",
+ " .setInputCols([\"document\"])\\\n",
+ " .setOutputCol(\"E5\")\\\n",
+ "\n",
+ "pipeline = Pipeline(\n",
+ " stages = [\n",
+ " document_assembler,\n",
+ " E5_loaded\n",
+ " ])\n",
+ "\n",
+ "data = spark.createDataFrame([['William Henry Gates III (born October 28, 1955) is an American business magnate, software developer, investor,and philanthropist.']]).toDF(\"text\")\n",
+ "model = pipeline.fit(data)\n",
+ "result = model.transform(data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {
+ "id": "G_tyqtE77k1U",
+ "outputId": "9ea755c6-0686-43af-91b1-f9a19006d070",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "+--------------------+\n",
+ "| embeddings|\n",
+ "+--------------------+\n",
+ "|[-0.042928364, 0....|\n",
+ "+--------------------+\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "result.selectExpr(\"explode(E5.embeddings) as embeddings\").show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "sJqE6o8m7k1Z"
+ },
+ "source": [
+ "That's it! You can now go wild and use hundreds of E5 models from HuggingFace 🤗 in Spark NLP 🚀\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3"
}
- ],
- "source": [
- "result.selectExpr(\"explode(E5.embeddings) as embeddings\").show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "That's it! You can now go wild and use hundreds of E5 models from HuggingFace 🤗 in Spark NLP 🚀\n"
- ]
- }
- ],
- "metadata": {
- "colab": {
- "provenance": []
- },
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
},
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 1
-}
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_MPNet.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_MPNet.ipynb
index af5cebcfe3e9ba..9b97fabd7756d0 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_MPNet.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_MPNet.ipynb
@@ -40,27 +40,30 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.1/7.1 MB\u001b[0m \u001b[31m19.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m380.6/380.6 kB\u001b[0m \u001b[31m33.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m27.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m54.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m10.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m44.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m79.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m27.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m519.3/519.3 kB\u001b[0m \u001b[31m44.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m86.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m73.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m71.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m11.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m16.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m25.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m18.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.6/7.6 MB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m410.0/410.0 kB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m589.8/589.8 MB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m42.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.8/455.8 kB\u001b[0m \u001b[31m31.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m44.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m17.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m510.5/510.5 kB\u001b[0m \u001b[31m37.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.3/5.3 MB\u001b[0m \u001b[31m48.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m52.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m47.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m43.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m311.2/311.2 kB\u001b[0m \u001b[31m28.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.9/15.9 MB\u001b[0m \u001b[31m39.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m9.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m9.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m18.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.8/83.8 kB\u001b[0m \u001b[31m8.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
- "tensorflow 2.12.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 3.20.2 which is incompatible.\n",
- "tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 3.20.2 which is incompatible.\u001b[0m\u001b[31m\n",
+ "tf-keras 2.15.1 requires tensorflow<2.16,>=2.15, but you have tensorflow 2.16.1 which is incompatible.\u001b[0m\u001b[31m\n",
"\u001b[0m"
]
}
@@ -82,15 +85,31 @@
"execution_count": null,
"metadata": {},
"outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/usr/local/lib/python3.10/dist-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n",
+ " torch.utils._pytree._register_pytree_node(\n",
+ "/usr/local/lib/python3.10/dist-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n",
+ " torch.utils._pytree._register_pytree_node(\n",
+ "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n",
+ "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
+ "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
+ "You will be able to reuse this secret in all of your notebooks.\n",
+ "Please note that authentication is recommended but still optional to access public models or datasets.\n",
+ " warnings.warn(\n"
+ ]
+ },
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "aaa606f9a3ff42a79f352eab50bafe2e",
+ "model_id": "6ba49f9ffa4147d2bb4dfbd6bc76b61c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
- "Downloading (…)lve/main/config.json: 0%| | 0.00/571 [00:00, ?B/s]"
+ "config.json: 0%| | 0.00/571 [00:00, ?B/s]"
]
},
"metadata": {},
@@ -100,18 +119,18 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "Framework not specified. Using pt to export to ONNX.\n"
+ "Framework not specified. Using pt to export the model.\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "4ab501e2862c46b090137d7bbbd49966",
+ "model_id": "390522e52ce043a2a62240efe6888d7b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
- "Downloading pytorch_model.bin: 0%| | 0.00/438M [00:00, ?B/s]"
+ "model.safetensors: 0%| | 0.00/438M [00:00, ?B/s]"
]
},
"metadata": {},
@@ -120,12 +139,12 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "2de039ff8dc94eca8426b86904c921cf",
+ "model_id": "59e2441f282e406cbd19b74193eab6b4",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
- "Downloading (…)okenizer_config.json: 0%| | 0.00/363 [00:00, ?B/s]"
+ "tokenizer_config.json: 0%| | 0.00/363 [00:00, ?B/s]"
]
},
"metadata": {},
@@ -134,12 +153,12 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "ff7e180b5a35431389ee4821437c79de",
+ "model_id": "5d8616c9ce0b4217aabca7a958c6f398",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
- "Downloading (…)solve/main/vocab.txt: 0%| | 0.00/232k [00:00, ?B/s]"
+ "vocab.txt: 0%| | 0.00/232k [00:00, ?B/s]"
]
},
"metadata": {},
@@ -148,12 +167,12 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "f3f99620407d43f48ed8aba637c9814d",
+ "model_id": "2450f75e5a654ebc81fbd4bf33aa2764",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
- "Downloading (…)/main/tokenizer.json: 0%| | 0.00/466k [00:00, ?B/s]"
+ "tokenizer.json: 0%| | 0.00/466k [00:00, ?B/s]"
]
},
"metadata": {},
@@ -162,12 +181,12 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "0d0a620f0c8740799c988821e7a3e988",
+ "model_id": "7099901d2c14479a8d51dcc2fadbe531",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
- "Downloading (…)cial_tokens_map.json: 0%| | 0.00/239 [00:00, ?B/s]"
+ "special_tokens_map.json: 0%| | 0.00/239 [00:00, ?B/s]"
]
},
"metadata": {},
@@ -177,17 +196,9 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "Using framework PyTorch: 2.0.1+cu118\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "============= Diagnostic Run torch.onnx.export version 2.0.1+cu118 =============\n",
- "verbose: False, log level: Level.ERROR\n",
- "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
- "\n"
+ "Using the export variant default. Available variants are:\n",
+ " - default: The default ONNX variant.\n",
+ "Using framework PyTorch: 2.2.1+cu121\n"
]
}
],
@@ -225,12 +236,12 @@
"output_type": "stream",
"text": [
"total 426324\n",
- "drwxr-xr-x 2 root root 4096 Sep 5 09:57 assets\n",
- "-rw-r--r-- 1 root root 591 Sep 5 09:57 config.json\n",
- "-rw-r--r-- 1 root root 435826547 Sep 5 09:57 model.onnx\n",
- "-rw-r--r-- 1 root root 280 Sep 5 09:57 special_tokens_map.json\n",
- "-rw-r--r-- 1 root root 357 Sep 5 09:57 tokenizer_config.json\n",
- "-rw-r--r-- 1 root root 710944 Sep 5 09:57 tokenizer.json\n"
+ "drwxr-xr-x 2 root root 4096 Apr 12 11:16 assets\n",
+ "-rw-r--r-- 1 root root 591 Apr 12 11:16 config.json\n",
+ "-rw-r--r-- 1 root root 435826548 Apr 12 11:16 model.onnx\n",
+ "-rw-r--r-- 1 root root 280 Apr 12 11:16 special_tokens_map.json\n",
+ "-rw-r--r-- 1 root root 548 Apr 12 11:16 tokenizer_config.json\n",
+ "-rw-r--r-- 1 root root 710944 Apr 12 11:16 tokenizer.json\n"
]
}
],
@@ -248,7 +259,7 @@
"output_type": "stream",
"text": [
"total 228\n",
- "-rw-r--r-- 1 root root 231536 Sep 5 09:57 vocab.txt\n"
+ "-rw-r--r-- 1 root root 231536 Apr 12 11:16 vocab.txt\n"
]
}
],
@@ -263,7 +274,8 @@
"## Import and Save MPNet in Spark NLP\n",
"\n",
"- Let's install and setup Spark NLP in Google Colab\n",
- "- This part is pretty easy via our simple script"
+ "- This part is pretty easy via our simple script\n",
+ "- However, we need to upgrade Spark to a more recent version to use this annotator."
]
},
{
@@ -275,18 +287,41 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Installing PySpark 3.2.3 and Spark NLP 5.1.0\n",
- "setup Colab for PySpark 3.2.3 and Spark NLP 5.1.0\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 MB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "Installing PySpark 3.2.3 and Spark NLP 5.3.3\n",
+ "setup Colab for PySpark 3.2.3 and Spark NLP 5.3.3\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 MB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m531.2/531.2 kB\u001b[0m \u001b[31m41.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.7/199.7 kB\u001b[0m \u001b[31m24.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m568.4/568.4 kB\u001b[0m \u001b[31m39.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.7/199.7 kB\u001b[0m \u001b[31m19.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "Collecting pyspark==3.4.1\n",
+ " Downloading pyspark-3.4.1.tar.gz (310.8 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m310.8/310.8 MB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "Collecting py4j==0.10.9.7 (from pyspark==3.4.1)\n",
+ " Downloading py4j-0.10.9.7-py2.py3-none-any.whl (200 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m200.5/200.5 kB\u001b[0m \u001b[31m21.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hBuilding wheels for collected packages: pyspark\n",
+ " Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ " Created wheel for pyspark: filename=pyspark-3.4.1-py2.py3-none-any.whl size=311285388 sha256=5704d3b33140d4bbcbe211cbfaac13876a307a698a83744d11aa3195db0f1aef\n",
+ " Stored in directory: /root/.cache/pip/wheels/0d/77/a3/ff2f74cc9ab41f8f594dabf0579c2a7c6de920d584206e0834\n",
+ "Successfully built pyspark\n",
+ "Installing collected packages: py4j, pyspark\n",
+ " Attempting uninstall: py4j\n",
+ " Found existing installation: py4j 0.10.9.5\n",
+ " Uninstalling py4j-0.10.9.5:\n",
+ " Successfully uninstalled py4j-0.10.9.5\n",
+ " Attempting uninstall: pyspark\n",
+ " Found existing installation: pyspark 3.2.3\n",
+ " Uninstalling pyspark-3.2.3:\n",
+ " Successfully uninstalled pyspark-3.2.3\n",
+ "Successfully installed py4j-0.10.9.7 pyspark-3.4.1\n"
]
}
],
"source": [
- "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"
+ "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash\n",
+ "! pip install -U pyspark==3.4.1"
]
},
{
@@ -300,7 +335,16 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/usr/lib/python3.10/subprocess.py:1796: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n",
+ " self.pid = _posixsubprocess.fork_exec(\n"
+ ]
+ }
+ ],
"source": [
"import sparknlp\n",
"# let's start Spark with Spark NLP\n",
@@ -382,9 +426,9 @@
"output_type": "stream",
"text": [
"total 425688\n",
- "drwxr-xr-x 3 root root 4096 Sep 5 10:02 fields\n",
- "drwxr-xr-x 2 root root 4096 Sep 5 10:02 metadata\n",
- "-rw-r--r-- 1 root root 435893186 Sep 5 10:02 mpnet_onnx\n"
+ "drwxr-xr-x 3 root root 4096 Apr 12 11:19 fields\n",
+ "drwxr-xr-x 2 root root 4096 Apr 12 11:19 metadata\n",
+ "-rw-r--r-- 1 root root 435893187 Apr 12 11:19 mpnet_onnx\n"
]
}
],
@@ -433,20 +477,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "+--------------------+\n",
- "| embeddings|\n",
- "+--------------------+\n",
- "|[-0.10415352, 0.3...|\n",
- "+--------------------+\n",
- "\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"result.selectExpr(\"explode(MPNet.embeddings) as embeddings\").show()"
]
@@ -478,12 +509,26 @@
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.12"
+ "pygments_lexer": "ipython3"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
- "01fdb22338944766bb3df3eef7e370cf": {
+ "064371eadfd34fe4aa825735a503755a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "0781a4e08cba4004bff187a20f574554": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
@@ -498,7 +543,7 @@
"description_width": ""
}
},
- "05e41a7177ab4b369297fab6d5ef6f7d": {
+ "080a0292414545639cdb7937f62e8765": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
@@ -550,50 +595,7 @@
"width": null
}
},
- "0a8bfc6031944afebf4b3c79865f8ea6": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_e5ee5a6862434d3fafaa47a00837cf84",
- "placeholder": "",
- "style": "IPY_MODEL_11359ce9598e4d3884eaa5b42d0a1ef2",
- "value": " 438M/438M [00:01<00:00, 233MB/s]"
- }
- },
- "0d0a620f0c8740799c988821e7a3e988": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HBoxModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_3cda7f5248b3402b91959a318619386f",
- "IPY_MODEL_da077b0bfb544ea1a3d461aec23412f5",
- "IPY_MODEL_36be256eb4b04d3c9efb5139ee0c876d"
- ],
- "layout": "IPY_MODEL_81feaad74bbf44e6a5f7156a2c5cbb2b"
- }
- },
- "0d358a124a1c4dc2b542b5339d8be8af": {
+ "0c73e4f7741b44a4a06c54eb4183adf3": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
@@ -645,22 +647,73 @@
"width": null
}
},
- "10e75ddbc8894944baf46753f48b13fd": {
+ "0d57206a52804aa0a05ad04ea3b1b5f4": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
+ "model_name": "HTMLModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_name": "HTMLModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b9de40ddd3ac486c85724cee8c0cb9b8",
+ "placeholder": "",
+ "style": "IPY_MODEL_f4244c857db94380820fdb4d596fd171",
+ "value": "tokenizer.json: 100%"
+ }
+ },
+ "1376988fabb94ef1ad38b1868f5ece1a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0c73e4f7741b44a4a06c54eb4183adf3",
+ "placeholder": "",
+ "style": "IPY_MODEL_0781a4e08cba4004bff187a20f574554",
+ "value": " 438M/438M [00:04<00:00, 163MB/s]"
+ }
+ },
+ "1a67d8af03144d97b0eebd1409b435e5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_ae1ed422450f411db21b9813d90abffb",
+ "max": 239,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_4605f52d9af245749cbbd3ea96a80a59",
+ "value": 239
}
},
- "11359ce9598e4d3884eaa5b42d0a1ef2": {
+ "1dda605f2f364a19824d5ab03666e696": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
@@ -675,59 +728,98 @@
"description_width": ""
}
},
- "13416ebdcd574213b8ee883ea90094e5": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
+ "227e255c08344055b1753f608e4c71d4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_9d1a966bb7934c55966aedd3915eeacd",
+ "placeholder": "",
+ "style": "IPY_MODEL_6a1b22ad3a864c7eab918f6e7d1991fa",
+ "value": " 466k/466k [00:00<00:00, 15.6MB/s]"
}
},
- "1533275340c8404fad14801d1f1c29c9": {
+ "2450f75e5a654ebc81fbd4bf33aa2764": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_0d57206a52804aa0a05ad04ea3b1b5f4",
+ "IPY_MODEL_78f831ce76e344bb98fe2163188da84f",
+ "IPY_MODEL_227e255c08344055b1753f608e4c71d4"
+ ],
+ "layout": "IPY_MODEL_e48bccfc40294cbdaa5a94304b0f784e"
+ }
+ },
+ "2470fff5bf084972bf1337536d6e2749": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4be542659a4847788385b63cb09cd241",
+ "max": 363,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_4486a9b71d7a4b9791e84343b064d5d5",
+ "value": 363
+ }
+ },
+ "274816b915864cb4a3f97c0b3384491e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_53fe6e1de58e4fff92cf59ec127d8dee",
+ "max": 571,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_a827a5ab249c40d9ad6ed0a6a4b6d752",
+ "value": 571
+ }
+ },
+ "28073ac8dc7f42bba74c4054871c4070": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
@@ -779,105 +871,23 @@
"width": null
}
},
- "282283d3c9dc40a79c52a90da0e78463": {
+ "34f0b66643d742298005d461f94a1b0f": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
+ "model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
+ "bar_color": null,
"description_width": ""
}
},
- "29a8fd357de84bb885ff8cb74d975e23": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "2adbf509a8564612a0634ecfd5ce2d05": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "2c8301b596004c75ae1c831e68623c22": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "ProgressStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "2de039ff8dc94eca8426b86904c921cf": {
+ "390522e52ce043a2a62240efe6888d7b": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HBoxModel",
@@ -892,35 +902,30 @@
"_view_name": "HBoxView",
"box_style": "",
"children": [
- "IPY_MODEL_86b539a7775b488f9e241696cfa50b86",
- "IPY_MODEL_7021a3c6c7e54ec69fb555fce3185009",
- "IPY_MODEL_f8c2905e891b444baecfcd02187692a6"
+ "IPY_MODEL_a55bf43d7c0b4096bc90ad4bdcfea0a6",
+ "IPY_MODEL_97aa2385a25e4f5f9b791b826ffcd3f9",
+ "IPY_MODEL_1376988fabb94ef1ad38b1868f5ece1a"
],
- "layout": "IPY_MODEL_765a65ab2b774ef7bbaa14e8b1d468b8"
+ "layout": "IPY_MODEL_bc50955a524c4280aaaac5f3e6c3ab62"
}
},
- "2ed3308ee5bc46e681a1ac89b34b5b07": {
+ "4486a9b71d7a4b9791e84343b064d5d5": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
- "model_name": "HTMLModel",
+ "model_name": "ProgressStyleModel",
"state": {
- "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
+ "_model_name": "ProgressStyleModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_cda15c545f12488ea3857545d9010d81",
- "placeholder": "",
- "style": "IPY_MODEL_282283d3c9dc40a79c52a90da0e78463",
- "value": " 571/571 [00:00<00:00, 8.29kB/s]"
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
}
},
- "2f78bde2f43444fe87b0945b3547e6b5": {
+ "4605f52d9af245749cbbd3ea96a80a59": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ProgressStyleModel",
@@ -936,7 +941,7 @@
"description_width": ""
}
},
- "2fbf0d67a1304bf7a87f1679b72f57f6": {
+ "49173f95470041ebbd5cc5c991889ae7": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "FloatProgressModel",
@@ -952,73 +957,67 @@
"bar_style": "success",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_bc930127d58d42ac865e9d5ce68c9940",
- "max": 438011953,
+ "layout": "IPY_MODEL_d9b8d7ddadf1436e97e595c8eaced813",
+ "max": 231536,
"min": 0,
"orientation": "horizontal",
- "style": "IPY_MODEL_5e3e8e5c5e8b4256ae6a74417d1209ae",
- "value": 438011953
+ "style": "IPY_MODEL_fdb96f0441b14a0f9c96c625c2301382",
+ "value": 231536
}
},
- "32cf0f5a194f42549a538acccbfc5301": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "ProgressStyleModel",
+ "4be542659a4847788385b63cb09cd241": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "36be256eb4b04d3c9efb5139ee0c876d": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_f5df8d84dc824e9fa758b4c54546baf9",
- "placeholder": "",
- "style": "IPY_MODEL_fd928b696b904aa38eedd9c32e3facad",
- "value": " 239/239 [00:00<00:00, 7.52kB/s]"
- }
- },
- "3cda7f5248b3402b91959a318619386f": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_bb1dfd55b9e74a0b80e7750870593ecc",
- "placeholder": "",
- "style": "IPY_MODEL_82ecfe71d45a4517a465a9947efd2b4a",
- "value": "Downloading (…)cial_tokens_map.json: 100%"
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
}
},
- "493258070c9a4f768e1c7dfae946d829": {
+ "4c1a8d80c49949aeb929429db8dbf837": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
@@ -1070,53 +1069,59 @@
"width": null
}
},
- "4ab501e2862c46b090137d7bbbd49966": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HBoxModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_86c3f3d5cd434502bebb3215877b851d",
- "IPY_MODEL_2fbf0d67a1304bf7a87f1679b72f57f6",
- "IPY_MODEL_0a8bfc6031944afebf4b3c79865f8ea6"
- ],
- "layout": "IPY_MODEL_95450001ffef4f67a5e522fa8116cc19"
- }
- },
- "4e9236e0306f49a09a90ee8c81c89c47": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "FloatProgressModel",
+ "53fe6e1de58e4fff92cf59ec127d8dee": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_559ca203fd414192a708e91b7f0fde49",
- "max": 231536,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_2c8301b596004c75ae1c831e68623c22",
- "value": 231536
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
}
},
- "559ca203fd414192a708e91b7f0fde49": {
+ "550a4cfbc3a44c1abbe1b0e18d1d9580": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
@@ -1168,7 +1173,7 @@
"width": null
}
},
- "5e3e8e5c5e8b4256ae6a74417d1209ae": {
+ "56daf28acf074e7ebfa6c98abdbd6d96": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ProgressStyleModel",
@@ -1184,7 +1189,7 @@
"description_width": ""
}
},
- "650e6cc47615480ca4afe1511ba95994": {
+ "5815ad5149c4437db0ea987d4c318d64": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
@@ -1236,7 +1241,152 @@
"width": null
}
},
- "68e8c235eeef4493aa44284e4bbfbb24": {
+ "5907a006a9a64d4a8cdd52240d06b746": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_f295fbfff10d41e8af0e50c138d145b5",
+ "placeholder": "",
+ "style": "IPY_MODEL_785f315c695a42098bb4d39b2c3a2706",
+ "value": " 571/571 [00:00<00:00, 36.6kB/s]"
+ }
+ },
+ "59e2441f282e406cbd19b74193eab6b4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_dfc60d5dca2548d0ada81f5446856093",
+ "IPY_MODEL_2470fff5bf084972bf1337536d6e2749",
+ "IPY_MODEL_d7ec1afd1b6d45388022b0f6d1cbae3d"
+ ],
+ "layout": "IPY_MODEL_e9fac6fbae534569836c8ab59a1f24bc"
+ }
+ },
+ "5d8616c9ce0b4217aabca7a958c6f398": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_eae3f7e58c8d461da61aa6ccf4f8df1e",
+ "IPY_MODEL_49173f95470041ebbd5cc5c991889ae7",
+ "IPY_MODEL_605f525e69384e968b72542c817715ef"
+ ],
+ "layout": "IPY_MODEL_aea46180031d481195078ae5ff40f70c"
+ }
+ },
+ "605f525e69384e968b72542c817715ef": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_7f0667abf777437bbe43339d4686aba9",
+ "placeholder": "",
+ "style": "IPY_MODEL_064371eadfd34fe4aa825735a503755a",
+ "value": " 232k/232k [00:00<00:00, 3.81MB/s]"
+ }
+ },
+ "6a1b22ad3a864c7eab918f6e7d1991fa": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "6ba49f9ffa4147d2bb4dfbd6bc76b61c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_fec1115da3f248128efe3af6860ab9cf",
+ "IPY_MODEL_274816b915864cb4a3f97c0b3384491e",
+ "IPY_MODEL_5907a006a9a64d4a8cdd52240d06b746"
+ ],
+ "layout": "IPY_MODEL_5815ad5149c4437db0ea987d4c318d64"
+ }
+ },
+ "7099901d2c14479a8d51dcc2fadbe531": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_b2f121b60ff74c55bd288793eef4b099",
+ "IPY_MODEL_1a67d8af03144d97b0eebd1409b435e5",
+ "IPY_MODEL_78851933b1ea4c8da74a94ef72d9fa08"
+ ],
+ "layout": "IPY_MODEL_080a0292414545639cdb7937f62e8765"
+ }
+ },
+ "75730e54bc994dd282b415a89624d418": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
@@ -1288,23 +1438,43 @@
"width": null
}
},
- "6a33b58985d243c595e2a30eac0eed86": {
+ "785f315c695a42098bb4d39b2c3a2706": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
- "model_name": "ProgressStyleModel",
+ "model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
+ "_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
- "bar_color": null,
"description_width": ""
}
},
- "7021a3c6c7e54ec69fb555fce3185009": {
+ "78851933b1ea4c8da74a94ef72d9fa08": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_9432ab47d9ca4a3e9d865a220e28ec5a",
+ "placeholder": "",
+ "style": "IPY_MODEL_85ffa60f6d364eae8f2c68d52b4cdaf2",
+ "value": " 239/239 [00:00<00:00, 9.55kB/s]"
+ }
+ },
+ "78f831ce76e344bb98fe2163188da84f": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "FloatProgressModel",
@@ -1320,15 +1490,15 @@
"bar_style": "success",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_f8dd258624d047f2b7cd39246904f702",
- "max": 363,
+ "layout": "IPY_MODEL_9957516192f64605ae0d58dbff40d524",
+ "max": 466021,
"min": 0,
"orientation": "horizontal",
- "style": "IPY_MODEL_32cf0f5a194f42549a538acccbfc5301",
- "value": 363
+ "style": "IPY_MODEL_56daf28acf074e7ebfa6c98abdbd6d96",
+ "value": 466021
}
},
- "71d5b0df6d714ed7a3fd73538ec4c99f": {
+ "7f0667abf777437bbe43339d4686aba9": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
@@ -1380,7 +1550,37 @@
"width": null
}
},
- "765a65ab2b774ef7bbaa14e8b1d468b8": {
+ "85ffa60f6d364eae8f2c68d52b4cdaf2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "8e20abf7be4c4a06b9f677b97c232ea9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "9432ab47d9ca4a3e9d865a220e28ec5a": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
@@ -1432,7 +1632,31 @@
"width": null
}
},
- "7ebefdd01bca467a80ab2bbba09e1811": {
+ "97aa2385a25e4f5f9b791b826ffcd3f9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d102a3d3eb2e4946b79fb79981fdafcd",
+ "max": 437971872,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_34f0b66643d742298005d461f94a1b0f",
+ "value": 437971872
+ }
+ },
+ "9957516192f64605ae0d58dbff40d524": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
@@ -1484,7 +1708,7 @@
"width": null
}
},
- "7edd6dc27eab4d6d962fee6344499527": {
+ "9d1a966bb7934c55966aedd3915eeacd": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
@@ -1536,7 +1760,59 @@
"width": null
}
},
- "812c93a90d0b44ffa92e1d5e08a96b79": {
+ "a55bf43d7c0b4096bc90ad4bdcfea0a6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_550a4cfbc3a44c1abbe1b0e18d1d9580",
+ "placeholder": "",
+ "style": "IPY_MODEL_e6e0efd678b5425aaa224848aa80ae85",
+ "value": "model.safetensors: 100%"
+ }
+ },
+ "a827a5ab249c40d9ad6ed0a6a4b6d752": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "adf569f318764d0fa64e33d12feb3b5d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "ae1ed422450f411db21b9813d90abffb": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
@@ -1588,7 +1864,7 @@
"width": null
}
},
- "81feaad74bbf44e6a5f7156a2c5cbb2b": {
+ "aea46180031d481195078ae5ff40f70c": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
@@ -1640,7 +1916,7 @@
"width": null
}
},
- "82ecfe71d45a4517a465a9947efd2b4a": {
+ "afd9a7bcee5d4abe8bb5ee25eec38f19": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
@@ -1655,7 +1931,7 @@
"description_width": ""
}
},
- "86b539a7775b488f9e241696cfa50b86": {
+ "b2f121b60ff74c55bd288793eef4b099": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
@@ -1670,34 +1946,28 @@
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_493258070c9a4f768e1c7dfae946d829",
+ "layout": "IPY_MODEL_4c1a8d80c49949aeb929429db8dbf837",
"placeholder": "",
- "style": "IPY_MODEL_01fdb22338944766bb3df3eef7e370cf",
- "value": "Downloading (…)okenizer_config.json: 100%"
+ "style": "IPY_MODEL_afd9a7bcee5d4abe8bb5ee25eec38f19",
+ "value": "special_tokens_map.json: 100%"
}
},
- "86c3f3d5cd434502bebb3215877b851d": {
+ "b6fdc38b73024a9eac973d0e54670dd9": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
- "model_name": "HTMLModel",
+ "model_name": "DescriptionStyleModel",
"state": {
- "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
+ "_model_name": "DescriptionStyleModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_7ebefdd01bca467a80ab2bbba09e1811",
- "placeholder": "",
- "style": "IPY_MODEL_2adbf509a8564612a0634ecfd5ce2d05",
- "value": "Downloading pytorch_model.bin: 100%"
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
}
},
- "8fe2572fa06940d18f4edad77b7bf067": {
+ "b9de40ddd3ac486c85724cee8c0cb9b8": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
@@ -1749,7 +2019,7 @@
"width": null
}
},
- "95450001ffef4f67a5e522fa8116cc19": {
+ "bc50955a524c4280aaaac5f3e6c3ab62": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
@@ -1801,80 +2071,7 @@
"width": null
}
},
- "aaa606f9a3ff42a79f352eab50bafe2e": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HBoxModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_ade3101516c44f94a5034c721d6c407b",
- "IPY_MODEL_c4f5e4643b2d4ca2b87cf1d00d6342c0",
- "IPY_MODEL_2ed3308ee5bc46e681a1ac89b34b5b07"
- ],
- "layout": "IPY_MODEL_7edd6dc27eab4d6d962fee6344499527"
- }
- },
- "ade3101516c44f94a5034c721d6c407b": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_13416ebdcd574213b8ee883ea90094e5",
- "placeholder": "",
- "style": "IPY_MODEL_e21eada10ca348079ac3ff312a5f5551",
- "value": "Downloading (…)lve/main/config.json: 100%"
- }
- },
- "ae53ab53fcae48fba131bf23bd1e4377": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "b99096aa190146cc8bc134e7eb060b31": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "bb1dfd55b9e74a0b80e7750870593ecc": {
+ "ca910c385e5047d3a25e3e1dbc4089c7": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
@@ -1926,7 +2123,7 @@
"width": null
}
},
- "bc930127d58d42ac865e9d5ce68c9940": {
+ "d102a3d3eb2e4946b79fb79981fdafcd": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
@@ -1978,61 +2175,28 @@
"width": null
}
},
- "c1cc36f640044a4c90f2a71420496ce0": {
+ "d7ec1afd1b6d45388022b0f6d1cbae3d": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "c329a450456e43d0b7b869f2de6808e5": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "c4f5e4643b2d4ca2b87cf1d00d6342c0": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "FloatProgressModel",
+ "model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
+ "_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
+ "_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_8fe2572fa06940d18f4edad77b7bf067",
- "max": 571,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_f65b11c3b20846ea96c67ea48e24be29",
- "value": 571
+ "layout": "IPY_MODEL_28073ac8dc7f42bba74c4054871c4070",
+ "placeholder": "",
+ "style": "IPY_MODEL_1dda605f2f364a19824d5ab03666e696",
+ "value": " 363/363 [00:00<00:00, 15.9kB/s]"
}
},
- "cda15c545f12488ea3857545d9010d81": {
+ "d9b8d7ddadf1436e97e595c8eaced813": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
@@ -2084,52 +2248,7 @@
"width": null
}
},
- "da077b0bfb544ea1a3d461aec23412f5": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "FloatProgressModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_e218858572104c5abd65a4db4fa2cdb6",
- "max": 239,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_2f78bde2f43444fe87b0945b3547e6b5",
- "value": 239
- }
- },
- "de8c6720874d4e3687ed1c8ae44a8bc4": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_1533275340c8404fad14801d1f1c29c9",
- "placeholder": "",
- "style": "IPY_MODEL_b99096aa190146cc8bc134e7eb060b31",
- "value": "Downloading (…)solve/main/vocab.txt: 100%"
- }
- },
- "e065f715e2e84666b408691b369fcd31": {
+ "dfc60d5dca2548d0ada81f5446856093": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
@@ -2144,13 +2263,13 @@
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_29a8fd357de84bb885ff8cb74d975e23",
+ "layout": "IPY_MODEL_75730e54bc994dd282b415a89624d418",
"placeholder": "",
- "style": "IPY_MODEL_10e75ddbc8894944baf46753f48b13fd",
- "value": " 466k/466k [00:00<00:00, 6.74MB/s]"
+ "style": "IPY_MODEL_b6fdc38b73024a9eac973d0e54670dd9",
+ "value": "tokenizer_config.json: 100%"
}
},
- "e218858572104c5abd65a4db4fa2cdb6": {
+ "e48bccfc40294cbdaa5a94304b0f784e": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
@@ -2202,7 +2321,7 @@
"width": null
}
},
- "e21eada10ca348079ac3ff312a5f5551": {
+ "e6e0efd678b5425aaa224848aa80ae85": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
@@ -2217,28 +2336,7 @@
"description_width": ""
}
},
- "e45fa861bced4ec69361b6e2ff35cb87": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_812c93a90d0b44ffa92e1d5e08a96b79",
- "placeholder": "",
- "style": "IPY_MODEL_c1cc36f640044a4c90f2a71420496ce0",
- "value": " 232k/232k [00:00<00:00, 3.25MB/s]"
- }
- },
- "e5ee5a6862434d3fafaa47a00837cf84": {
+ "e9fac6fbae534569836c8ab59a1f24bc": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
@@ -2290,7 +2388,7 @@
"width": null
}
},
- "f0dafb9a2817491ea947fd1e09702df7": {
+ "eae3f7e58c8d461da61aa6ccf4f8df1e": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
@@ -2305,35 +2403,13 @@
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_71d5b0df6d714ed7a3fd73538ec4c99f",
+ "layout": "IPY_MODEL_ca910c385e5047d3a25e3e1dbc4089c7",
"placeholder": "",
- "style": "IPY_MODEL_c329a450456e43d0b7b869f2de6808e5",
- "value": "Downloading (…)/main/tokenizer.json: 100%"
- }
- },
- "f3f99620407d43f48ed8aba637c9814d": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HBoxModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_f0dafb9a2817491ea947fd1e09702df7",
- "IPY_MODEL_f700417773ff4fa59ff2695bd750790c",
- "IPY_MODEL_e065f715e2e84666b408691b369fcd31"
- ],
- "layout": "IPY_MODEL_05e41a7177ab4b369297fab6d5ef6f7d"
+ "style": "IPY_MODEL_adf569f318764d0fa64e33d12feb3b5d",
+ "value": "vocab.txt: 100%"
}
},
- "f5df8d84dc824e9fa758b4c54546baf9": {
+ "ee403f65dc994a0eb948056265d9c645": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
@@ -2385,68 +2461,7 @@
"width": null
}
},
- "f65b11c3b20846ea96c67ea48e24be29": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "ProgressStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "f700417773ff4fa59ff2695bd750790c": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "FloatProgressModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_68e8c235eeef4493aa44284e4bbfbb24",
- "max": 466021,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_6a33b58985d243c595e2a30eac0eed86",
- "value": 466021
- }
- },
- "f8c2905e891b444baecfcd02187692a6": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_0d358a124a1c4dc2b542b5339d8be8af",
- "placeholder": "",
- "style": "IPY_MODEL_ae53ab53fcae48fba131bf23bd1e4377",
- "value": " 363/363 [00:00<00:00, 20.7kB/s]"
- }
- },
- "f8dd258624d047f2b7cd39246904f702": {
+ "f295fbfff10d41e8af0e50c138d145b5": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
@@ -2498,7 +2513,7 @@
"width": null
}
},
- "fd928b696b904aa38eedd9c32e3facad": {
+ "f4244c857db94380820fdb4d596fd171": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
@@ -2513,31 +2528,46 @@
"description_width": ""
}
},
- "ff7e180b5a35431389ee4821437c79de": {
+ "fdb96f0441b14a0f9c96c625c2301382": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
- "model_name": "HBoxModel",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "fec1115da3f248128efe3af6860ab9cf": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
+ "_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_de8c6720874d4e3687ed1c8ae44a8bc4",
- "IPY_MODEL_4e9236e0306f49a09a90ee8c81c89c47",
- "IPY_MODEL_e45fa861bced4ec69361b6e2ff35cb87"
- ],
- "layout": "IPY_MODEL_650e6cc47615480ca4afe1511ba95994"
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_ee403f65dc994a0eb948056265d9c645",
+ "placeholder": "",
+ "style": "IPY_MODEL_8e20abf7be4c4a06b9f677b97c232ea9",
+ "value": "config.json: 100%"
}
}
}
}
},
"nbformat": 4,
- "nbformat_minor": 1
+ "nbformat_minor": 0
}
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_MPNetForQuestionAnswering.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_MPNetForQuestionAnswering.ipynb
index 74d3014a49b2ab..cd4835de6d3325 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_MPNetForQuestionAnswering.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_MPNetForQuestionAnswering.ipynb
@@ -6,7 +6,7 @@
"source": [
"![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
"\n",
- "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_MPNet.ipynb)\n",
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_MPNetForQuestionAnswering.ipynb)\n",
"\n",
"# Import ONNX MPNet models from HuggingFace 🤗 into Spark NLP 🚀\n",
"\n",
@@ -378,11 +378,10 @@
"colab": {
"provenance": []
},
- "kernelspec": {
- "display_name": "Python [conda env:sparknlp_dev]",
- "language": "python",
- "name": "conda-env-sparknlp_dev-py"
- },
+ "kernelspec": ,{
+ "display_name": "Python 3",
+ "name": "python3"
+ }
"language_info": {
"codemirror_mode": {
"name": "ipython",
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_MPNetForSequenceClassification.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_MPNetForSequenceClassification.ipynb
index f12c4869dd6829..7375b29fb00dee 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_MPNetForSequenceClassification.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_MPNetForSequenceClassification.ipynb
@@ -35,7 +35,7 @@
"metadata": {},
"source": [
"- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n",
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n",
"- Additionally, we need to install `setfit` to load the model components."
]
},
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_Marian.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_Marian.ipynb
index 44fbdde06405b7..8797a0fffaed1a 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_Marian.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_Marian.ipynb
@@ -66,7 +66,7 @@
}
],
"source": [
- "!pip install -q --upgrade transformers[onnx]==4.35.2 optimum sentencepiece"
+ "!pip install -q --upgrade transformers[onnx]==4.35.2 optimum sentencepiece onnx==1.14.0"
]
},
{
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_RoBERTa.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_RoBERTa.ipynb
index b6bc00e7864ab2..0961409a1765e6 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_RoBERTa.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_RoBERTa.ipynb
@@ -28,7 +28,7 @@
"metadata": {},
"source": [
"- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."
]
},
{
@@ -70,7 +70,7 @@
}
],
"source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum"
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum"
]
},
{
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_RoBertaForQuestionAnswering.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_RoBertaForQuestionAnswering.ipynb
index f3f25a96b53ef3..defac82160ee99 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_RoBertaForQuestionAnswering.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_RoBertaForQuestionAnswering.ipynb
@@ -36,7 +36,7 @@
"metadata": {},
"source": [
"- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n",
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n",
"- Albert uses SentencePiece, so we will have to install that as well"
]
},
@@ -118,7 +118,7 @@
}
],
"source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum tensorflow"
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum tensorflow"
]
},
{
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_RoBertaForSequenceClassification.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_RoBertaForSequenceClassification.ipynb
index 5bf5ca4b7a29ac..54d26f2be294fb 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_RoBertaForSequenceClassification.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_RoBertaForSequenceClassification.ipynb
@@ -1,2926 +1,2774 @@
{
- "cells": [
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
+ "\n",
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_RoBertaForSequenceClassification.ipynb)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Import ONNX RoBertaForSequenceClassification models from HuggingFace 🤗 into Spark NLP 🚀\n",
+ "\n",
+ "Let's keep in mind a few things before we start 😊\n",
+ "\n",
+ "- ONNX support was introduced in `Spark NLP 5.0.0`, enabling high performance inference for models.\n",
+ "- `RoBertaForSequenceClassification` is only available since in `Spark NLP 5.1.4` and after. So please make sure you have upgraded to the latest Spark NLP release\n",
+ "- You can import RoBERTa models trained/fine-tuned for sequence classification via `RobertaForSequenceClassification` or `TFRobertaForSequenceClassification`. These models are usually under `Text Classification` category and have `roberta` in their labels\n",
+ "- Reference: [TFRobertaForSequenceClassification](https://huggingface.co/docs/transformers/model_doc/roberta#transformers.TFRobertaForSequenceClassification)\n",
+ "- Some [example models](https://huggingface.co/models?filter=roberta&pipeline_tag=text-classification)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Export and Save HuggingFace model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "9t3PKzM5c7ly"
- },
- "source": [
- "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
- "\n",
- "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_RoBertaForSequenceClassification.ipynb)"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.1/7.1 MB\u001b[0m \u001b[31m56.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m301.0/301.0 kB\u001b[0m \u001b[31m37.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
+ " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
+ " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.8/489.8 MB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.0/302.0 kB\u001b[0m \u001b[31m29.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m78.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m11.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m43.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m118.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m24.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m519.6/519.6 kB\u001b[0m \u001b[31m53.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m76.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m124.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m45.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m87.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m81.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m83.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m121.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m98.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m125.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m63.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m9.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m11.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m116.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m69.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m479.7/479.7 MB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m113.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m524.1/524.1 MB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m72.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m89.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m40.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m63.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m781.3/781.3 kB\u001b[0m \u001b[31m48.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m81.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m86.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m588.3/588.3 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m87.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m73.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m88.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m439.2/439.2 kB\u001b[0m \u001b[31m46.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m113.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m15.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m26.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m17.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.8/83.8 kB\u001b[0m \u001b[31m11.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m48.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m451.2/451.2 kB\u001b[0m \u001b[31m49.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m106.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m26.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m62.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.1/13.1 MB\u001b[0m \u001b[31m30.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Building wheel for optimum (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
+ "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+ "tensorflow-datasets 4.9.3 requires protobuf>=3.20, but you have protobuf 3.19.6 which is incompatible.\n",
+ "tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 3.19.6 which is incompatible.\u001b[0m\u001b[31m\n",
+ "\u001b[0m"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum tensorflow numpy==1.23.0"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
+ "- We'll use [arpanghoshal/EmoRoBERTa](https://huggingface.co/arpanghoshal/EmoRoBERTa) model from HuggingFace as an example and load it as a `ORTModelForSequenceClassification`, representing an ONNX model."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "JRK1uzQWc7l4"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "5fbfd91779024dd98573a8251b72791d",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "## Import ONNX RoBertaForSequenceClassification models from HuggingFace 🤗 into Spark NLP 🚀\n",
- "\n",
- "Let's keep in mind a few things before we start 😊\n",
- "\n",
- "- ONNX support was introduced in `Spark NLP 5.0.0`, enabling high performance inference for models.\n",
- "- `RoBertaForSequenceClassification` is only available since in `Spark NLP 5.1.4` and after. So please make sure you have upgraded to the latest Spark NLP release\n",
- "- You can import RoBERTa models trained/fine-tuned for sequence classification via `RobertaForSequenceClassification` or `TFRobertaForSequenceClassification`. These models are usually under `Text Classification` category and have `roberta` in their labels\n",
- "- Reference: [TFRobertaForSequenceClassification](https://huggingface.co/docs/transformers/model_doc/roberta#transformers.TFRobertaForSequenceClassification)\n",
- "- Some [example models](https://huggingface.co/models?filter=roberta&pipeline_tag=text-classification)"
+ "text/plain": [
+ "(…)shal/EmoRoBERTa/resolve/main/config.json: 0%| | 0.00/1.72k [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "OmQvX4u1c7l7"
- },
- "source": [
- "## Export and Save HuggingFace model"
- ]
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Framework not specified. Using tf to export to ONNX.\n"
+ ]
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "rJIOJ03Cc7mE"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "2ae0331621f44a7695e5d9d0a020fe92",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."
+ "text/plain": [
+ "tf_model.h5: 0%| | 0.00/501M [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "4Mi0hDUWc7mF",
- "outputId": "50cbec2e-17f1-483f-ce06-7b0a096b74c7"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.1/7.1 MB\u001b[0m \u001b[31m56.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m301.0/301.0 kB\u001b[0m \u001b[31m37.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
- " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
- " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.8/489.8 MB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.0/302.0 kB\u001b[0m \u001b[31m29.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m78.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m11.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m43.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m118.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m24.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m519.6/519.6 kB\u001b[0m \u001b[31m53.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m76.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m124.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m45.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m87.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m81.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m83.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m121.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m98.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m125.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m63.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m9.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m11.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m116.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m69.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m479.7/479.7 MB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m113.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m524.1/524.1 MB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m72.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m89.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m40.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m63.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m781.3/781.3 kB\u001b[0m \u001b[31m48.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m81.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m86.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m588.3/588.3 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m87.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m73.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m88.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m439.2/439.2 kB\u001b[0m \u001b[31m46.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m113.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m15.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m26.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m17.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.8/83.8 kB\u001b[0m \u001b[31m11.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m48.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m451.2/451.2 kB\u001b[0m \u001b[31m49.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m106.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m26.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m62.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.1/13.1 MB\u001b[0m \u001b[31m30.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h Building wheel for optimum (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
- "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
- "tensorflow-datasets 4.9.3 requires protobuf>=3.20, but you have protobuf 3.19.6 which is incompatible.\n",
- "tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 3.19.6 which is incompatible.\u001b[0m\u001b[31m\n",
- "\u001b[0m"
- ]
- }
- ],
- "source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum tensorflow"
- ]
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.\n",
+ "\n",
+ "All the layers of TFRobertaForSequenceClassification were initialized from the model checkpoint at arpanghoshal/EmoRoBERTa.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaForSequenceClassification for predictions without further training.\n"
+ ]
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "qyUn2L2gc7mF"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "8bcfb10ceba3482b8a36cbe49b9ee981",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
- "- We'll use [arpanghoshal/EmoRoBERTa](https://huggingface.co/arpanghoshal/EmoRoBERTa) model from HuggingFace as an example and load it as a `ORTModelForSequenceClassification`, representing an ONNX model."
+ "text/plain": [
+ "(…)BERTa/resolve/main/tokenizer_config.json: 0%| | 0.00/25.0 [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "via4qVqNc7mG",
- "outputId": "9f88ca2f-f4a9-4a1e-df0d-897394879bb4",
- "colab": {
- "referenced_widgets": [
- "5fbfd91779024dd98573a8251b72791d",
- "5c69b2b921364ed689b86c7df266b9ac",
- "16009d1ead7b429b850233aa837a7b2d",
- "7eaf31b5e29d443faa7a51b7db827591",
- "3dcf304e43d548398c3a1ec31e35d175",
- "a357207cf2364537b6b7af4bcc3a023d",
- "8c686da42706418393303a5a20877092",
- "e9869c9db5eb49b097057715ce38aa81",
- "1db6eb8573fe496083a9dc7eecf04423",
- "3e10082ca6d84864b50d9af3732ab3e0",
- "6c4a17cdb4ef4b9fa1149b0974abea15",
- "2ae0331621f44a7695e5d9d0a020fe92",
- "015f0d45838e4af9af076781b7aa972d",
- "d6bb4c9501d440c888eb46111c838879",
- "6db4fe3b814945f0a6a5cdc0e4b51f6b",
- "55431ee7275b421494d58326adc2fc6b",
- "d49da31cb9ae404c9ec91b149664869a",
- "a9c84da30f4f427a96aa2c19abf76e68",
- "5157e3971ff5473f97f7e8289664740a",
- "99be2ddc8fdd4b0ca92ac3c404f165fc",
- "d0bffccc8dd241bd9c3f9aea84f9df91",
- "2379cb61017b4b489b1afe1cbee69271",
- "8bcfb10ceba3482b8a36cbe49b9ee981",
- "9b641237a71c4bdf859cb4156e0d1bf3",
- "9677f089e0d2429180c9da716359a330",
- "a7f11a68b922413baf9b08f31404bf99",
- "7ae2064f5300443bb2fd19479fb27153",
- "f9159108ec0248f7ad72963860d8a225",
- "8c4562067daf4971920ddb36672e6c9c",
- "0053473f98634c6db3fdc1a98375395e",
- "1a84293ac3ed46299b7eea091fdd974d",
- "fcc3121c28b446429a3dada8413b26d4",
- "093cdd054a864e9a874b446d8a08804d",
- "952294f9ffbf48648b3f1cdd961e3aed",
- "5fa90b3e28204414b4f90b9bd60f74f7",
- "391db5e30ad6496992ea0cb6d3b9987a",
- "c9ad96305414428cbf85fe47a9190e43",
- "5bc6df5269f046d999597dbd19603b71",
- "525dfac737ee45d79926005b93c32651",
- "b9c29e2ddc7e45798222aab437cc478d",
- "c997ceb059a146fcb0c703351c1761dc",
- "8096ba7c617848bd8d55da036098e1f1",
- "41873434547e4ab3b18ca625d92a5b7e",
- "ad20fa821a3c45369ca41b78c34d7410",
- "32e846056bf14e16a5b232a73a947c01",
- "9ca99a56ef5a4bcfad453e03db0da9c3",
- "52b4033e2cec4d3eb4988fd1974782ab",
- "da76feb9e0e842e7b00fcdde6bf8f06b",
- "3042f6cff3bd471cbd98f56175051895",
- "3669d7ae2362449da4a0a0780d5f63c5",
- "39a48467ad544aa2a87051d2f20a40b8",
- "3b447409a2be4ae885e660e1b4466f98",
- "2499b948f0ed4e228983136bcf5edb4a",
- "4bf72dff2ada442297768dc3cdf5a128",
- "3ba005b695274184a587bc747e1b1f2f",
- "0adaff4deffd49e5afd1ce940c7d39bb",
- "5eb8c09ffde046fa9dc04747e68ce62a",
- "d52070540deb459fac543ed8f25235ac",
- "51ed8cb8b4e5479fab1dc8e6a68e6e51",
- "f4a5589dd1fa4a969c0d1b7fc8e48899",
- "f053be36c9bd4219819805bdd7c2d889",
- "d5c7d6bb07f446c580cc828278d96ddc",
- "1d42f739e22740dd9a6a48b2ea9a842b",
- "c0cbff305800422e81169bbf6148a06e",
- "3028097af9f44d4c90fa052606381fb5",
- "f95544b3034e4a8c913d7214847b5ee4"
- ]
- }
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "952294f9ffbf48648b3f1cdd961e3aed",
+ "version_major": 2,
+ "version_minor": 0
},
- "outputs": [
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "5fbfd91779024dd98573a8251b72791d",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "(…)shal/EmoRoBERTa/resolve/main/config.json: 0%| | 0.00/1.72k [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Framework not specified. Using tf to export to ONNX.\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "2ae0331621f44a7695e5d9d0a020fe92",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "tf_model.h5: 0%| | 0.00/501M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.\n",
- "\n",
- "All the layers of TFRobertaForSequenceClassification were initialized from the model checkpoint at arpanghoshal/EmoRoBERTa.\n",
- "If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaForSequenceClassification for predictions without further training.\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "8bcfb10ceba3482b8a36cbe49b9ee981",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "(…)BERTa/resolve/main/tokenizer_config.json: 0%| | 0.00/25.0 [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "952294f9ffbf48648b3f1cdd961e3aed",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "(…)oshal/EmoRoBERTa/resolve/main/vocab.json: 0%| | 0.00/798k [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "32e846056bf14e16a5b232a73a947c01",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "(…)oshal/EmoRoBERTa/resolve/main/merges.txt: 0%| | 0.00/456k [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "0adaff4deffd49e5afd1ce940c7d39bb",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "(…)RTa/resolve/main/special_tokens_map.json: 0%| | 0.00/239 [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Using the export variant default. Available variants are:\n",
- "\t- default: The default ONNX variant.\n",
- "`input_shapes` argument is not supported by the Tensorflow ONNX export and will be ignored.\n",
- "Using framework TensorFlow: 2.11.1\n",
- "Overriding 1 configuration item(s)\n",
- "\t- use_cache -> False\n"
- ]
- }
- ],
- "source": [
- "from optimum.onnxruntime import ORTModelForSequenceClassification\n",
- "import tensorflow as tf\n",
- "\n",
- "MODEL_NAME = 'arpanghoshal/EmoRoBERTa'\n",
- "ONNX_MODEL = f\"onnx_models/{MODEL_NAME}\"\n",
- "\n",
- "ort_model = ORTModelForSequenceClassification.from_pretrained(MODEL_NAME, export=True)\n",
- "\n",
- "# Save the ONNX model\n",
- "ort_model.save_pretrained(ONNX_MODEL)"
+ "text/plain": [
+ "(…)oshal/EmoRoBERTa/resolve/main/vocab.json: 0%| | 0.00/798k [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "rCOZMBBOc7mG"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "32e846056bf14e16a5b232a73a947c01",
+ "version_major": 2,
+ "version_minor": 0
},
- "outputs": [],
- "source": [
- "import json\n",
- "\n",
- "# Read the vocab JSON file\n",
- "with open('{}/vocab.json'.format(ONNX_MODEL), 'r') as json_file:\n",
- " tokenizer = json.load(json_file)\n",
- "\n",
- "# let's save the vocab as txt file\n",
- "with open('{}/vocab.txt'.format(ONNX_MODEL), 'w') as keys_file:\n",
- " for item in tokenizer.keys():\n",
- " keys_file.write(\"%s\\n\" % item)"
+ "text/plain": [
+ "(…)oshal/EmoRoBERTa/resolve/main/merges.txt: 0%| | 0.00/456k [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "qAWiSFhgc7mH"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "0adaff4deffd49e5afd1ce940c7d39bb",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "Let's have a look inside these two directories and see what we are dealing with:"
+ "text/plain": [
+ "(…)RTa/resolve/main/special_tokens_map.json: 0%| | 0.00/239 [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "4j6J_ckRc7mH",
- "outputId": "eb942b5b-761a-43a8-edac-8cef55aa060a"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "total 491140\n",
- "-rw-r--r-- 1 root root 1894 Oct 16 21:06 config.json\n",
- "-rw-r--r-- 1 root root 456318 Oct 16 21:06 merges.txt\n",
- "-rw-r--r-- 1 root root 499132924 Oct 16 21:06 model.onnx\n",
- "-rw-r--r-- 1 root root 280 Oct 16 21:06 special_tokens_map.json\n",
- "-rw-r--r-- 1 root root 1337 Oct 16 21:06 tokenizer_config.json\n",
- "-rw-r--r-- 1 root root 2108619 Oct 16 21:06 tokenizer.json\n",
- "-rw-r--r-- 1 root root 798293 Oct 16 21:06 vocab.json\n",
- "-rw-r--r-- 1 root root 407065 Oct 16 21:07 vocab.txt\n"
- ]
- }
- ],
- "source": [
- "!ls -l {ONNX_MODEL}"
- ]
- },
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Using the export variant default. Available variants are:\n",
+ "\t- default: The default ONNX variant.\n",
+ "`input_shapes` argument is not supported by the Tensorflow ONNX export and will be ignored.\n",
+ "Using framework TensorFlow: 2.11.1\n",
+ "Overriding 1 configuration item(s)\n",
+ "\t- use_cache -> False\n"
+ ]
+ }
+ ],
+ "source": [
+ "from optimum.onnxruntime import ORTModelForSequenceClassification\n",
+ "import tensorflow as tf\n",
+ "\n",
+ "MODEL_NAME = 'arpanghoshal/EmoRoBERTa'\n",
+ "ONNX_MODEL = f\"onnx_models/{MODEL_NAME}\"\n",
+ "\n",
+ "ort_model = ORTModelForSequenceClassification.from_pretrained(MODEL_NAME, export=True)\n",
+ "\n",
+ "# Save the ONNX model\n",
+ "ort_model.save_pretrained(ONNX_MODEL)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import json\n",
+ "\n",
+ "# Read the vocab JSON file\n",
+ "with open('{}/vocab.json'.format(ONNX_MODEL), 'r') as json_file:\n",
+ " tokenizer = json.load(json_file)\n",
+ "\n",
+ "# let's save the vocab as txt file\n",
+ "with open('{}/vocab.txt'.format(ONNX_MODEL), 'w') as keys_file:\n",
+ " for item in tokenizer.keys():\n",
+ " keys_file.write(\"%s\\n\" % item)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's have a look inside these two directories and see what we are dealing with:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "ag3hROoTc7mI"
- },
- "source": [
- "- As you can see, we need to move `vocab.txt` and `merges.txt` from the tokenizer to `assets` folder which Spark NLP will look for\n",
- "- We also need `labels` and their `ids` which is saved inside the model's config. We will save this inside `labels.txt`"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "total 491140\n",
+ "-rw-r--r-- 1 root root 1894 Oct 16 21:06 config.json\n",
+ "-rw-r--r-- 1 root root 456318 Oct 16 21:06 merges.txt\n",
+ "-rw-r--r-- 1 root root 499132924 Oct 16 21:06 model.onnx\n",
+ "-rw-r--r-- 1 root root 280 Oct 16 21:06 special_tokens_map.json\n",
+ "-rw-r--r-- 1 root root 1337 Oct 16 21:06 tokenizer_config.json\n",
+ "-rw-r--r-- 1 root root 2108619 Oct 16 21:06 tokenizer.json\n",
+ "-rw-r--r-- 1 root root 798293 Oct 16 21:06 vocab.json\n",
+ "-rw-r--r-- 1 root root 407065 Oct 16 21:07 vocab.txt\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls -l {ONNX_MODEL}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- As you can see, we need to move `vocab.txt` and `merges.txt` from the tokenizer to `assets` folder which Spark NLP will look for\n",
+ "- We also need `labels` and their `ids` which is saved inside the model's config. We will save this inside `labels.txt`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mkdir {ONNX_MODEL}/assets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# get label2id dictionary\n",
+ "labels = ort_model.config.id2label\n",
+ "# sort the dictionary based on the id\n",
+ "labels = [value for key,value in sorted(labels.items(), reverse=False)]\n",
+ "\n",
+ "with open(ONNX_MODEL + '/assets/labels.txt', 'w') as f:\n",
+ " f.write('\\n'.join(labels))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mv {ONNX_MODEL}/vocab.txt {ONNX_MODEL}/assets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mv {ONNX_MODEL}/merges.txt {ONNX_MODEL}/assets"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Voila! We have our `vocab.txt`, `merges.txt` and `labels.txt` inside assets directory"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "1lNEPm_Ic7mI"
- },
- "outputs": [],
- "source": [
- "!mkdir {ONNX_MODEL}/assets"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "onnx_models/arpanghoshal/EmoRoBERTa:\n",
+ "total 490296\n",
+ "drwxr-xr-x 2 root root 4096 Oct 16 21:08 assets\n",
+ "-rw-r--r-- 1 root root 1894 Oct 16 21:06 config.json\n",
+ "-rw-r--r-- 1 root root 499132924 Oct 16 21:06 model.onnx\n",
+ "-rw-r--r-- 1 root root 280 Oct 16 21:06 special_tokens_map.json\n",
+ "-rw-r--r-- 1 root root 1337 Oct 16 21:06 tokenizer_config.json\n",
+ "-rw-r--r-- 1 root root 2108619 Oct 16 21:06 tokenizer.json\n",
+ "-rw-r--r-- 1 root root 798293 Oct 16 21:06 vocab.json\n",
+ "\n",
+ "onnx_models/arpanghoshal/EmoRoBERTa/assets:\n",
+ "total 852\n",
+ "-rw-r--r-- 1 root root 248 Oct 16 21:08 labels.txt\n",
+ "-rw-r--r-- 1 root root 456318 Oct 16 21:06 merges.txt\n",
+ "-rw-r--r-- 1 root root 407065 Oct 16 21:07 vocab.txt\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls -lR {ONNX_MODEL}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Import and Save RoBertaForSequenceClassification in Spark NLP\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's install and setup Spark NLP in Google Colab\n",
+ "- This part is pretty easy via our simple script"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "1eD9itghc7mJ"
- },
- "outputs": [],
- "source": [
- "# get label2id dictionary\n",
- "labels = ort_model.config.id2label\n",
- "# sort the dictionary based on the id\n",
- "labels = [value for key,value in sorted(labels.items(), reverse=False)]\n",
- "\n",
- "with open(ONNX_MODEL + '/assets/labels.txt', 'w') as f:\n",
- " f.write('\\n'.join(labels))"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "--2023-10-16 21:08:22-- http://setup.johnsnowlabs.com/colab.sh\n",
+ "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n",
+ "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n",
+ "HTTP request sent, awaiting response... 302 Moved Temporarily\n",
+ "Location: https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n",
+ "--2023-10-16 21:08:23-- https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n",
+ "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.111.133, ...\n",
+ "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n",
+ "HTTP request sent, awaiting response... 200 OK\n",
+ "Length: 1191 (1.2K) [text/plain]\n",
+ "Saving to: ‘STDOUT’\n",
+ "\n",
+ "- 100%[===================>] 1.16K --.-KB/s in 0s \n",
+ "\n",
+ "2023-10-16 21:08:23 (93.8 MB/s) - written to stdout [1191/1191]\n",
+ "\n",
+ "Installing PySpark 3.2.3 and Spark NLP 5.1.3\n",
+ "setup Colab for PySpark 3.2.3 and Spark NLP 5.1.3\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 MB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m537.5/537.5 kB\u001b[0m \u001b[31m41.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.7/199.7 kB\u001b[0m \u001b[31m21.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
+ ]
+ }
+ ],
+ "source": [
+ "! wget http://setup.johnsnowlabs.com/colab.sh -O - | bash"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's start Spark with Spark NLP included via our simple `start()` function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "NoAl9qo3c7mJ"
- },
- "outputs": [],
- "source": [
- "!mv {ONNX_MODEL}/vocab.txt {ONNX_MODEL}/assets"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Apache Spark version: 3.2.3\n"
+ ]
+ }
+ ],
+ "source": [
+ "import sparknlp\n",
+ "# let's start Spark with Spark NLP\n",
+ "spark = sparknlp.start()\n",
+ "\n",
+ "print(\"Apache Spark version: {}\".format(spark.version))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's use `loadSavedModel` functon in `RoBertaForSequenceClassification` which allows us to load TensorFlow model in SavedModel format\n",
+ "- Most params can be set later when you are loading this model in `RoBertaForSequenceClassification` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n",
+ "- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n",
+ "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sparknlp.annotator import *\n",
+ "from sparknlp.base import *\n",
+ "\n",
+ "sequenceClassifier = RoBertaForSequenceClassification.loadSavedModel(\n",
+ " ONNX_MODEL,\n",
+ " spark\n",
+ " )\\\n",
+ " .setInputCols([\"document\",'token'])\\\n",
+ " .setOutputCol(\"class\")\\\n",
+ " .setCaseSensitive(True)\\\n",
+ " .setMaxSentenceLength(128)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sequenceClassifier.write().overwrite().save(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's clean up stuff we don't need anymore"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!rm -rf {ONNX_MODEL}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Awesome 😎 !\n",
+ "\n",
+ "This is your RoBertaForSequenceClassification model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "Iy-uQsGBc7mJ"
- },
- "outputs": [],
- "source": [
- "!mv {ONNX_MODEL}/merges.txt {ONNX_MODEL}/assets"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "total 487524\n",
+ "drwxr-xr-x 5 root root 4096 Oct 16 21:15 fields\n",
+ "drwxr-xr-x 2 root root 4096 Oct 16 21:15 metadata\n",
+ "-rw-r--r-- 1 root root 499209257 Oct 16 21:16 roberta_classification_onnx\n"
+ ]
+ }
+ ],
+ "source": [
+ "! ls -l {ONNX_MODEL}_spark_nlp_onnx"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny RoBertaForSequenceClassification model 😊"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sequenceClassifier_loaded = RoBertaForSequenceClassification.load(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))\\\n",
+ " .setInputCols([\"document\",'token'])\\\n",
+ " .setOutputCol(\"class\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You can see what labels were used to train this model via `getClasses` function:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "AcIBtJgYc7mK"
- },
- "source": [
- "Voila! We have our `vocab.txt`, `merges.txt` and `labels.txt` inside assets directory"
+ "data": {
+ "text/plain": [
+ "['disgust',\n",
+ " 'optimism',\n",
+ " 'embarrassment',\n",
+ " 'amusement',\n",
+ " 'realization',\n",
+ " 'surprise',\n",
+ " 'grief',\n",
+ " 'caring',\n",
+ " 'disapproval',\n",
+ " 'disappointment',\n",
+ " 'joy',\n",
+ " 'confusion',\n",
+ " 'excitement',\n",
+ " 'approval',\n",
+ " 'curiosity',\n",
+ " 'anger',\n",
+ " 'love',\n",
+ " 'admiration',\n",
+ " 'gratitude',\n",
+ " 'annoyance',\n",
+ " 'remorse',\n",
+ " 'nervousness',\n",
+ " 'neutral',\n",
+ " 'pride',\n",
+ " 'fear',\n",
+ " 'sadness',\n",
+ " 'desire',\n",
+ " 'relief']"
]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# .getClasses was introduced in spark-nlp==3.4.0\n",
+ "sequenceClassifier_loaded.getClasses()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This is how you can use your loaded classifier model in Spark NLP 🚀 pipeline:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sparknlp.annotator import *\n",
+ "from sparknlp.base import *\n",
+ "\n",
+ "document_assembler = DocumentAssembler() \\\n",
+ " .setInputCol('text') \\\n",
+ " .setOutputCol('document')\n",
+ "\n",
+ "tokenizer = Tokenizer() \\\n",
+ " .setInputCols(['document']) \\\n",
+ " .setOutputCol('token')\n",
+ "\n",
+ "pipeline = Pipeline(stages=[\n",
+ " document_assembler,\n",
+ " tokenizer,\n",
+ " sequenceClassifier_loaded\n",
+ "])\n",
+ "\n",
+ "# couple of simple examples\n",
+ "example = spark.createDataFrame([[\"I love you!\"], ['I feel lucky to be here.']]).toDF(\"text\")\n",
+ "\n",
+ "result = pipeline.fit(example).transform(example)\n",
+ "\n",
+ "# result is a DataFrame\n",
+ "result.select(\"text\", \"class.result\").show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "That's it! You can now go wild and use hundreds of `RoBertaForSequenceClassification` models from HuggingFace 🤗 in Spark NLP 🚀\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "gpuType": "T4",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "0053473f98634c6db3fdc1a98375395e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "3C62iosEc7mK",
- "outputId": "640df9da-a3ed-4548-a1e9-5004b765545e"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "onnx_models/arpanghoshal/EmoRoBERTa:\n",
- "total 490296\n",
- "drwxr-xr-x 2 root root 4096 Oct 16 21:08 assets\n",
- "-rw-r--r-- 1 root root 1894 Oct 16 21:06 config.json\n",
- "-rw-r--r-- 1 root root 499132924 Oct 16 21:06 model.onnx\n",
- "-rw-r--r-- 1 root root 280 Oct 16 21:06 special_tokens_map.json\n",
- "-rw-r--r-- 1 root root 1337 Oct 16 21:06 tokenizer_config.json\n",
- "-rw-r--r-- 1 root root 2108619 Oct 16 21:06 tokenizer.json\n",
- "-rw-r--r-- 1 root root 798293 Oct 16 21:06 vocab.json\n",
- "\n",
- "onnx_models/arpanghoshal/EmoRoBERTa/assets:\n",
- "total 852\n",
- "-rw-r--r-- 1 root root 248 Oct 16 21:08 labels.txt\n",
- "-rw-r--r-- 1 root root 456318 Oct 16 21:06 merges.txt\n",
- "-rw-r--r-- 1 root root 407065 Oct 16 21:07 vocab.txt\n"
- ]
- }
+ "015f0d45838e4af9af076781b7aa972d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d49da31cb9ae404c9ec91b149664869a",
+ "placeholder": "",
+ "style": "IPY_MODEL_a9c84da30f4f427a96aa2c19abf76e68",
+ "value": "tf_model.h5: 100%"
+ }
+ },
+ "093cdd054a864e9a874b446d8a08804d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "0adaff4deffd49e5afd1ce940c7d39bb": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_5eb8c09ffde046fa9dc04747e68ce62a",
+ "IPY_MODEL_d52070540deb459fac543ed8f25235ac",
+ "IPY_MODEL_51ed8cb8b4e5479fab1dc8e6a68e6e51"
],
- "source": [
- "!ls -lR {ONNX_MODEL}"
- ]
+ "layout": "IPY_MODEL_f4a5589dd1fa4a969c0d1b7fc8e48899"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "zK6xJduGc7mK"
- },
- "source": [
- "## Import and Save RoBertaForSequenceClassification in Spark NLP\n"
- ]
+ "16009d1ead7b429b850233aa837a7b2d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e9869c9db5eb49b097057715ce38aa81",
+ "max": 1720,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_1db6eb8573fe496083a9dc7eecf04423",
+ "value": 1720
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "Tx7PXwVdc7mL"
- },
- "source": [
- "- Let's install and setup Spark NLP in Google Colab\n",
- "- This part is pretty easy via our simple script"
- ]
+ "1a84293ac3ed46299b7eea091fdd974d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "PDf1ieS-c7mL",
- "outputId": "eadcbbe7-fde3-410e-e8b6-e35d5c2704d1"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "--2023-10-16 21:08:22-- http://setup.johnsnowlabs.com/colab.sh\n",
- "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n",
- "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n",
- "HTTP request sent, awaiting response... 302 Moved Temporarily\n",
- "Location: https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n",
- "--2023-10-16 21:08:23-- https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n",
- "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.111.133, ...\n",
- "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n",
- "HTTP request sent, awaiting response... 200 OK\n",
- "Length: 1191 (1.2K) [text/plain]\n",
- "Saving to: ‘STDOUT’\n",
- "\n",
- "- 100%[===================>] 1.16K --.-KB/s in 0s \n",
- "\n",
- "2023-10-16 21:08:23 (93.8 MB/s) - written to stdout [1191/1191]\n",
- "\n",
- "Installing PySpark 3.2.3 and Spark NLP 5.1.3\n",
- "setup Colab for PySpark 3.2.3 and Spark NLP 5.1.3\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 MB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m537.5/537.5 kB\u001b[0m \u001b[31m41.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.7/199.7 kB\u001b[0m \u001b[31m21.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
- ]
- }
+ "1d42f739e22740dd9a6a48b2ea9a842b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "1db6eb8573fe496083a9dc7eecf04423": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "2379cb61017b4b489b1afe1cbee69271": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "2499b948f0ed4e228983136bcf5edb4a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "2ae0331621f44a7695e5d9d0a020fe92": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_015f0d45838e4af9af076781b7aa972d",
+ "IPY_MODEL_d6bb4c9501d440c888eb46111c838879",
+ "IPY_MODEL_6db4fe3b814945f0a6a5cdc0e4b51f6b"
],
- "source": [
- "! wget http://setup.johnsnowlabs.com/colab.sh -O - | bash"
- ]
+ "layout": "IPY_MODEL_55431ee7275b421494d58326adc2fc6b"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "q0BWnXsac7mL"
- },
- "source": [
- "Let's start Spark with Spark NLP included via our simple `start()` function"
- ]
+ "3028097af9f44d4c90fa052606381fb5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "QrJGDaJmc7mL",
- "outputId": "deab4121-a931-40de-9f57-1bb336a6900b"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Apache Spark version: 3.2.3\n"
- ]
- }
+ "3042f6cff3bd471cbd98f56175051895": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "32e846056bf14e16a5b232a73a947c01": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_9ca99a56ef5a4bcfad453e03db0da9c3",
+ "IPY_MODEL_52b4033e2cec4d3eb4988fd1974782ab",
+ "IPY_MODEL_da76feb9e0e842e7b00fcdde6bf8f06b"
],
- "source": [
- "import sparknlp\n",
- "# let's start Spark with Spark NLP\n",
- "spark = sparknlp.start()\n",
- "\n",
- "print(\"Apache Spark version: {}\".format(spark.version))"
- ]
+ "layout": "IPY_MODEL_3042f6cff3bd471cbd98f56175051895"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "OJwHAwCBc7mM"
- },
- "source": [
- "- Let's use `loadSavedModel` functon in `RoBertaForSequenceClassification` which allows us to load TensorFlow model in SavedModel format\n",
- "- Most params can be set later when you are loading this model in `RoBertaForSequenceClassification` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n",
- "- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n",
- "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively."
- ]
+ "3669d7ae2362449da4a0a0780d5f63c5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "GTuQL16tc7mM"
- },
- "outputs": [],
- "source": [
- "from sparknlp.annotator import *\n",
- "from sparknlp.base import *\n",
- "\n",
- "sequenceClassifier = RoBertaForSequenceClassification.loadSavedModel(\n",
- " ONNX_MODEL,\n",
- " spark\n",
- " )\\\n",
- " .setInputCols([\"document\",'token'])\\\n",
- " .setOutputCol(\"class\")\\\n",
- " .setCaseSensitive(True)\\\n",
- " .setMaxSentenceLength(128)"
- ]
+ "391db5e30ad6496992ea0cb6d3b9987a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_c997ceb059a146fcb0c703351c1761dc",
+ "max": 798293,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_8096ba7c617848bd8d55da036098e1f1",
+ "value": 798293
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "onEFpv7Tc7mM"
- },
- "source": [
- "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
- ]
+ "39a48467ad544aa2a87051d2f20a40b8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "UJaNm35nc7mM"
- },
- "outputs": [],
- "source": [
- "sequenceClassifier.write().overwrite().save(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))"
- ]
+ "3b447409a2be4ae885e660e1b4466f98": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "790iTH95c7mN"
- },
- "source": [
- "Let's clean up stuff we don't need anymore"
- ]
+ "3ba005b695274184a587bc747e1b1f2f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "KD-CLAnPc7mN"
- },
- "outputs": [],
- "source": [
- "!rm -rf {ONNX_MODEL}"
- ]
+ "3dcf304e43d548398c3a1ec31e35d175": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "92VPKttJc7mN"
- },
- "source": [
- "Awesome 😎 !\n",
- "\n",
- "This is your RoBertaForSequenceClassification model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀"
- ]
+ "3e10082ca6d84864b50d9af3732ab3e0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "N_nIp5DOc7mN",
- "outputId": "f10a34d1-9792-418c-9571-a9f3ebaa371e"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "total 487524\n",
- "drwxr-xr-x 5 root root 4096 Oct 16 21:15 fields\n",
- "drwxr-xr-x 2 root root 4096 Oct 16 21:15 metadata\n",
- "-rw-r--r-- 1 root root 499209257 Oct 16 21:16 roberta_classification_onnx\n"
- ]
- }
+ "41873434547e4ab3b18ca625d92a5b7e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4bf72dff2ada442297768dc3cdf5a128": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5157e3971ff5473f97f7e8289664740a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "51ed8cb8b4e5479fab1dc8e6a68e6e51": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_3028097af9f44d4c90fa052606381fb5",
+ "placeholder": "",
+ "style": "IPY_MODEL_f95544b3034e4a8c913d7214847b5ee4",
+ "value": " 239/239 [00:00<00:00, 9.82kB/s]"
+ }
+ },
+ "525dfac737ee45d79926005b93c32651": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "52b4033e2cec4d3eb4988fd1974782ab": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_3b447409a2be4ae885e660e1b4466f98",
+ "max": 456356,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_2499b948f0ed4e228983136bcf5edb4a",
+ "value": 456356
+ }
+ },
+ "55431ee7275b421494d58326adc2fc6b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5bc6df5269f046d999597dbd19603b71": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5c69b2b921364ed689b86c7df266b9ac": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a357207cf2364537b6b7af4bcc3a023d",
+ "placeholder": "",
+ "style": "IPY_MODEL_8c686da42706418393303a5a20877092",
+ "value": "(…)shal/EmoRoBERTa/resolve/main/config.json: 100%"
+ }
+ },
+ "5eb8c09ffde046fa9dc04747e68ce62a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_f053be36c9bd4219819805bdd7c2d889",
+ "placeholder": "",
+ "style": "IPY_MODEL_d5c7d6bb07f446c580cc828278d96ddc",
+ "value": "(…)RTa/resolve/main/special_tokens_map.json: 100%"
+ }
+ },
+ "5fa90b3e28204414b4f90b9bd60f74f7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_525dfac737ee45d79926005b93c32651",
+ "placeholder": "",
+ "style": "IPY_MODEL_b9c29e2ddc7e45798222aab437cc478d",
+ "value": "(…)oshal/EmoRoBERTa/resolve/main/vocab.json: 100%"
+ }
+ },
+ "5fbfd91779024dd98573a8251b72791d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_5c69b2b921364ed689b86c7df266b9ac",
+ "IPY_MODEL_16009d1ead7b429b850233aa837a7b2d",
+ "IPY_MODEL_7eaf31b5e29d443faa7a51b7db827591"
],
- "source": [
- "! ls -l {ONNX_MODEL}_spark_nlp_onnx"
- ]
+ "layout": "IPY_MODEL_3dcf304e43d548398c3a1ec31e35d175"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "urmb3Gjuc7mN"
- },
- "source": [
- "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny RoBertaForSequenceClassification model 😊"
- ]
+ "6c4a17cdb4ef4b9fa1149b0974abea15": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "mvLEGwPSc7mO"
- },
- "outputs": [],
- "source": [
- "sequenceClassifier_loaded = RoBertaForSequenceClassification.load(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))\\\n",
- " .setInputCols([\"document\",'token'])\\\n",
- " .setOutputCol(\"class\")"
- ]
+ "6db4fe3b814945f0a6a5cdc0e4b51f6b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d0bffccc8dd241bd9c3f9aea84f9df91",
+ "placeholder": "",
+ "style": "IPY_MODEL_2379cb61017b4b489b1afe1cbee69271",
+ "value": " 501M/501M [00:01<00:00, 212MB/s]"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "exdim7FZc7mO"
- },
- "source": [
- "You can see what labels were used to train this model via `getClasses` function:"
- ]
+ "7ae2064f5300443bb2fd19479fb27153": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "umjbpKeGc7mO",
- "outputId": "1b989ddf-cbb4-4870-bfe0-39f38c420926"
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "['disgust',\n",
- " 'optimism',\n",
- " 'embarrassment',\n",
- " 'amusement',\n",
- " 'realization',\n",
- " 'surprise',\n",
- " 'grief',\n",
- " 'caring',\n",
- " 'disapproval',\n",
- " 'disappointment',\n",
- " 'joy',\n",
- " 'confusion',\n",
- " 'excitement',\n",
- " 'approval',\n",
- " 'curiosity',\n",
- " 'anger',\n",
- " 'love',\n",
- " 'admiration',\n",
- " 'gratitude',\n",
- " 'annoyance',\n",
- " 'remorse',\n",
- " 'nervousness',\n",
- " 'neutral',\n",
- " 'pride',\n",
- " 'fear',\n",
- " 'sadness',\n",
- " 'desire',\n",
- " 'relief']"
- ]
- },
- "execution_count": null,
- "metadata": {},
- "output_type": "execute_result"
- }
+ "7eaf31b5e29d443faa7a51b7db827591": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_3e10082ca6d84864b50d9af3732ab3e0",
+ "placeholder": "",
+ "style": "IPY_MODEL_6c4a17cdb4ef4b9fa1149b0974abea15",
+ "value": " 1.72k/1.72k [00:00<00:00, 35.4kB/s]"
+ }
+ },
+ "8096ba7c617848bd8d55da036098e1f1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "8bcfb10ceba3482b8a36cbe49b9ee981": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_9b641237a71c4bdf859cb4156e0d1bf3",
+ "IPY_MODEL_9677f089e0d2429180c9da716359a330",
+ "IPY_MODEL_a7f11a68b922413baf9b08f31404bf99"
],
- "source": [
- "# .getClasses was introduced in spark-nlp==3.4.0\n",
- "sequenceClassifier_loaded.getClasses()"
- ]
+ "layout": "IPY_MODEL_7ae2064f5300443bb2fd19479fb27153"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "lV66JB3oc7mP"
- },
- "source": [
- "This is how you can use your loaded classifier model in Spark NLP 🚀 pipeline:"
- ]
+ "8c4562067daf4971920ddb36672e6c9c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "lSAqXURrc7mP"
- },
- "outputs": [],
- "source": [
- "from sparknlp.annotator import *\n",
- "from sparknlp.base import *\n",
- "\n",
- "document_assembler = DocumentAssembler() \\\n",
- " .setInputCol('text') \\\n",
- " .setOutputCol('document')\n",
- "\n",
- "tokenizer = Tokenizer() \\\n",
- " .setInputCols(['document']) \\\n",
- " .setOutputCol('token')\n",
- "\n",
- "pipeline = Pipeline(stages=[\n",
- " document_assembler,\n",
- " tokenizer,\n",
- " sequenceClassifier_loaded\n",
- "])\n",
- "\n",
- "# couple of simple examples\n",
- "example = spark.createDataFrame([[\"I love you!\"], ['I feel lucky to be here.']]).toDF(\"text\")\n",
- "\n",
- "result = pipeline.fit(example).transform(example)\n",
- "\n",
- "# result is a DataFrame\n",
- "result.select(\"text\", \"class.result\").show()"
- ]
+ "8c686da42706418393303a5a20877092": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "HK07UoXQc7mP"
- },
- "source": [
- "That's it! You can now go wild and use hundreds of `RoBertaForSequenceClassification` models from HuggingFace 🤗 in Spark NLP 🚀\n"
- ]
- }
- ],
- "metadata": {
- "accelerator": "GPU",
- "colab": {
- "gpuType": "T4",
- "provenance": []
- },
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3"
- },
- "widgets": {
- "application/vnd.jupyter.widget-state+json": {
- "0053473f98634c6db3fdc1a98375395e": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "015f0d45838e4af9af076781b7aa972d": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_d49da31cb9ae404c9ec91b149664869a",
- "placeholder": "",
- "style": "IPY_MODEL_a9c84da30f4f427a96aa2c19abf76e68",
- "value": "tf_model.h5: 100%"
- }
- },
- "093cdd054a864e9a874b446d8a08804d": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "0adaff4deffd49e5afd1ce940c7d39bb": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HBoxModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_5eb8c09ffde046fa9dc04747e68ce62a",
- "IPY_MODEL_d52070540deb459fac543ed8f25235ac",
- "IPY_MODEL_51ed8cb8b4e5479fab1dc8e6a68e6e51"
- ],
- "layout": "IPY_MODEL_f4a5589dd1fa4a969c0d1b7fc8e48899"
- }
- },
- "16009d1ead7b429b850233aa837a7b2d": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "FloatProgressModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_e9869c9db5eb49b097057715ce38aa81",
- "max": 1720,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_1db6eb8573fe496083a9dc7eecf04423",
- "value": 1720
- }
- },
- "1a84293ac3ed46299b7eea091fdd974d": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "ProgressStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "1d42f739e22740dd9a6a48b2ea9a842b": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "1db6eb8573fe496083a9dc7eecf04423": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "ProgressStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "2379cb61017b4b489b1afe1cbee69271": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "2499b948f0ed4e228983136bcf5edb4a": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "ProgressStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "2ae0331621f44a7695e5d9d0a020fe92": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HBoxModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_015f0d45838e4af9af076781b7aa972d",
- "IPY_MODEL_d6bb4c9501d440c888eb46111c838879",
- "IPY_MODEL_6db4fe3b814945f0a6a5cdc0e4b51f6b"
- ],
- "layout": "IPY_MODEL_55431ee7275b421494d58326adc2fc6b"
- }
- },
- "3028097af9f44d4c90fa052606381fb5": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "3042f6cff3bd471cbd98f56175051895": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "32e846056bf14e16a5b232a73a947c01": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HBoxModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_9ca99a56ef5a4bcfad453e03db0da9c3",
- "IPY_MODEL_52b4033e2cec4d3eb4988fd1974782ab",
- "IPY_MODEL_da76feb9e0e842e7b00fcdde6bf8f06b"
- ],
- "layout": "IPY_MODEL_3042f6cff3bd471cbd98f56175051895"
- }
- },
- "3669d7ae2362449da4a0a0780d5f63c5": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "391db5e30ad6496992ea0cb6d3b9987a": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "FloatProgressModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_c997ceb059a146fcb0c703351c1761dc",
- "max": 798293,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_8096ba7c617848bd8d55da036098e1f1",
- "value": 798293
- }
- },
- "39a48467ad544aa2a87051d2f20a40b8": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "3b447409a2be4ae885e660e1b4466f98": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "3ba005b695274184a587bc747e1b1f2f": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "3dcf304e43d548398c3a1ec31e35d175": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "3e10082ca6d84864b50d9af3732ab3e0": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "41873434547e4ab3b18ca625d92a5b7e": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "4bf72dff2ada442297768dc3cdf5a128": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "5157e3971ff5473f97f7e8289664740a": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "51ed8cb8b4e5479fab1dc8e6a68e6e51": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_3028097af9f44d4c90fa052606381fb5",
- "placeholder": "",
- "style": "IPY_MODEL_f95544b3034e4a8c913d7214847b5ee4",
- "value": " 239/239 [00:00<00:00, 9.82kB/s]"
- }
- },
- "525dfac737ee45d79926005b93c32651": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "52b4033e2cec4d3eb4988fd1974782ab": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "FloatProgressModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_3b447409a2be4ae885e660e1b4466f98",
- "max": 456356,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_2499b948f0ed4e228983136bcf5edb4a",
- "value": 456356
- }
- },
- "55431ee7275b421494d58326adc2fc6b": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "5bc6df5269f046d999597dbd19603b71": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "5c69b2b921364ed689b86c7df266b9ac": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_a357207cf2364537b6b7af4bcc3a023d",
- "placeholder": "",
- "style": "IPY_MODEL_8c686da42706418393303a5a20877092",
- "value": "(…)shal/EmoRoBERTa/resolve/main/config.json: 100%"
- }
- },
- "5eb8c09ffde046fa9dc04747e68ce62a": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_f053be36c9bd4219819805bdd7c2d889",
- "placeholder": "",
- "style": "IPY_MODEL_d5c7d6bb07f446c580cc828278d96ddc",
- "value": "(…)RTa/resolve/main/special_tokens_map.json: 100%"
- }
- },
- "5fa90b3e28204414b4f90b9bd60f74f7": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_525dfac737ee45d79926005b93c32651",
- "placeholder": "",
- "style": "IPY_MODEL_b9c29e2ddc7e45798222aab437cc478d",
- "value": "(…)oshal/EmoRoBERTa/resolve/main/vocab.json: 100%"
- }
- },
- "5fbfd91779024dd98573a8251b72791d": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HBoxModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_5c69b2b921364ed689b86c7df266b9ac",
- "IPY_MODEL_16009d1ead7b429b850233aa837a7b2d",
- "IPY_MODEL_7eaf31b5e29d443faa7a51b7db827591"
- ],
- "layout": "IPY_MODEL_3dcf304e43d548398c3a1ec31e35d175"
- }
- },
- "6c4a17cdb4ef4b9fa1149b0974abea15": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "6db4fe3b814945f0a6a5cdc0e4b51f6b": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_d0bffccc8dd241bd9c3f9aea84f9df91",
- "placeholder": "",
- "style": "IPY_MODEL_2379cb61017b4b489b1afe1cbee69271",
- "value": " 501M/501M [00:01<00:00, 212MB/s]"
- }
- },
- "7ae2064f5300443bb2fd19479fb27153": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "7eaf31b5e29d443faa7a51b7db827591": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_3e10082ca6d84864b50d9af3732ab3e0",
- "placeholder": "",
- "style": "IPY_MODEL_6c4a17cdb4ef4b9fa1149b0974abea15",
- "value": " 1.72k/1.72k [00:00<00:00, 35.4kB/s]"
- }
- },
- "8096ba7c617848bd8d55da036098e1f1": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "ProgressStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "8bcfb10ceba3482b8a36cbe49b9ee981": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HBoxModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_9b641237a71c4bdf859cb4156e0d1bf3",
- "IPY_MODEL_9677f089e0d2429180c9da716359a330",
- "IPY_MODEL_a7f11a68b922413baf9b08f31404bf99"
- ],
- "layout": "IPY_MODEL_7ae2064f5300443bb2fd19479fb27153"
- }
- },
- "8c4562067daf4971920ddb36672e6c9c": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "8c686da42706418393303a5a20877092": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "952294f9ffbf48648b3f1cdd961e3aed": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HBoxModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_5fa90b3e28204414b4f90b9bd60f74f7",
- "IPY_MODEL_391db5e30ad6496992ea0cb6d3b9987a",
- "IPY_MODEL_c9ad96305414428cbf85fe47a9190e43"
- ],
- "layout": "IPY_MODEL_5bc6df5269f046d999597dbd19603b71"
- }
- },
- "9677f089e0d2429180c9da716359a330": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "FloatProgressModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_0053473f98634c6db3fdc1a98375395e",
- "max": 25,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_1a84293ac3ed46299b7eea091fdd974d",
- "value": 25
- }
- },
- "99be2ddc8fdd4b0ca92ac3c404f165fc": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "ProgressStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "9b641237a71c4bdf859cb4156e0d1bf3": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_f9159108ec0248f7ad72963860d8a225",
- "placeholder": "",
- "style": "IPY_MODEL_8c4562067daf4971920ddb36672e6c9c",
- "value": "(…)BERTa/resolve/main/tokenizer_config.json: 100%"
- }
- },
- "9ca99a56ef5a4bcfad453e03db0da9c3": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_3669d7ae2362449da4a0a0780d5f63c5",
- "placeholder": "",
- "style": "IPY_MODEL_39a48467ad544aa2a87051d2f20a40b8",
- "value": "(…)oshal/EmoRoBERTa/resolve/main/merges.txt: 100%"
- }
- },
- "a357207cf2364537b6b7af4bcc3a023d": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "a7f11a68b922413baf9b08f31404bf99": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_fcc3121c28b446429a3dada8413b26d4",
- "placeholder": "",
- "style": "IPY_MODEL_093cdd054a864e9a874b446d8a08804d",
- "value": " 25.0/25.0 [00:00<00:00, 1.64kB/s]"
- }
- },
- "a9c84da30f4f427a96aa2c19abf76e68": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "ad20fa821a3c45369ca41b78c34d7410": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "b9c29e2ddc7e45798222aab437cc478d": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "c0cbff305800422e81169bbf6148a06e": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "ProgressStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "c997ceb059a146fcb0c703351c1761dc": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "c9ad96305414428cbf85fe47a9190e43": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_41873434547e4ab3b18ca625d92a5b7e",
- "placeholder": "",
- "style": "IPY_MODEL_ad20fa821a3c45369ca41b78c34d7410",
- "value": " 798k/798k [00:00<00:00, 3.23MB/s]"
- }
- },
- "d0bffccc8dd241bd9c3f9aea84f9df91": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "d49da31cb9ae404c9ec91b149664869a": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "d52070540deb459fac543ed8f25235ac": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "FloatProgressModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_1d42f739e22740dd9a6a48b2ea9a842b",
- "max": 239,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_c0cbff305800422e81169bbf6148a06e",
- "value": 239
- }
- },
- "d5c7d6bb07f446c580cc828278d96ddc": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "d6bb4c9501d440c888eb46111c838879": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "FloatProgressModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_5157e3971ff5473f97f7e8289664740a",
- "max": 501322656,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_99be2ddc8fdd4b0ca92ac3c404f165fc",
- "value": 501322656
- }
- },
- "da76feb9e0e842e7b00fcdde6bf8f06b": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_4bf72dff2ada442297768dc3cdf5a128",
- "placeholder": "",
- "style": "IPY_MODEL_3ba005b695274184a587bc747e1b1f2f",
- "value": " 456k/456k [00:00<00:00, 937kB/s]"
- }
- },
- "e9869c9db5eb49b097057715ce38aa81": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "f053be36c9bd4219819805bdd7c2d889": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "f4a5589dd1fa4a969c0d1b7fc8e48899": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "f9159108ec0248f7ad72963860d8a225": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "f95544b3034e4a8c913d7214847b5ee4": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "fcc3121c28b446429a3dada8413b26d4": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- }
- }
+ "952294f9ffbf48648b3f1cdd961e3aed": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_5fa90b3e28204414b4f90b9bd60f74f7",
+ "IPY_MODEL_391db5e30ad6496992ea0cb6d3b9987a",
+ "IPY_MODEL_c9ad96305414428cbf85fe47a9190e43"
+ ],
+ "layout": "IPY_MODEL_5bc6df5269f046d999597dbd19603b71"
+ }
+ },
+ "9677f089e0d2429180c9da716359a330": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0053473f98634c6db3fdc1a98375395e",
+ "max": 25,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_1a84293ac3ed46299b7eea091fdd974d",
+ "value": 25
+ }
+ },
+ "99be2ddc8fdd4b0ca92ac3c404f165fc": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "9b641237a71c4bdf859cb4156e0d1bf3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_f9159108ec0248f7ad72963860d8a225",
+ "placeholder": "",
+ "style": "IPY_MODEL_8c4562067daf4971920ddb36672e6c9c",
+ "value": "(…)BERTa/resolve/main/tokenizer_config.json: 100%"
+ }
+ },
+ "9ca99a56ef5a4bcfad453e03db0da9c3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_3669d7ae2362449da4a0a0780d5f63c5",
+ "placeholder": "",
+ "style": "IPY_MODEL_39a48467ad544aa2a87051d2f20a40b8",
+ "value": "(…)oshal/EmoRoBERTa/resolve/main/merges.txt: 100%"
+ }
+ },
+ "a357207cf2364537b6b7af4bcc3a023d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a7f11a68b922413baf9b08f31404bf99": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_fcc3121c28b446429a3dada8413b26d4",
+ "placeholder": "",
+ "style": "IPY_MODEL_093cdd054a864e9a874b446d8a08804d",
+ "value": " 25.0/25.0 [00:00<00:00, 1.64kB/s]"
+ }
+ },
+ "a9c84da30f4f427a96aa2c19abf76e68": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "ad20fa821a3c45369ca41b78c34d7410": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "b9c29e2ddc7e45798222aab437cc478d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "c0cbff305800422e81169bbf6148a06e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "c997ceb059a146fcb0c703351c1761dc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "c9ad96305414428cbf85fe47a9190e43": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_41873434547e4ab3b18ca625d92a5b7e",
+ "placeholder": "",
+ "style": "IPY_MODEL_ad20fa821a3c45369ca41b78c34d7410",
+ "value": " 798k/798k [00:00<00:00, 3.23MB/s]"
+ }
+ },
+ "d0bffccc8dd241bd9c3f9aea84f9df91": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d49da31cb9ae404c9ec91b149664869a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d52070540deb459fac543ed8f25235ac": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_1d42f739e22740dd9a6a48b2ea9a842b",
+ "max": 239,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_c0cbff305800422e81169bbf6148a06e",
+ "value": 239
+ }
+ },
+ "d5c7d6bb07f446c580cc828278d96ddc": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "d6bb4c9501d440c888eb46111c838879": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_5157e3971ff5473f97f7e8289664740a",
+ "max": 501322656,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_99be2ddc8fdd4b0ca92ac3c404f165fc",
+ "value": 501322656
+ }
+ },
+ "da76feb9e0e842e7b00fcdde6bf8f06b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4bf72dff2ada442297768dc3cdf5a128",
+ "placeholder": "",
+ "style": "IPY_MODEL_3ba005b695274184a587bc747e1b1f2f",
+ "value": " 456k/456k [00:00<00:00, 937kB/s]"
+ }
+ },
+ "e9869c9db5eb49b097057715ce38aa81": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f053be36c9bd4219819805bdd7c2d889": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f4a5589dd1fa4a969c0d1b7fc8e48899": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f9159108ec0248f7ad72963860d8a225": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f95544b3034e4a8c913d7214847b5ee4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "fcc3121c28b446429a3dada8413b26d4": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
}
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
\ No newline at end of file
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_RoBertaForTokenClassification.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_RoBertaForTokenClassification.ipynb
index 042a46f3bfe27c..8b32924f9d79fe 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_RoBertaForTokenClassification.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_RoBertaForTokenClassification.ipynb
@@ -1,3261 +1,3097 @@
{
- "cells": [
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
+ "\n",
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_RoBertaForTokenClassification.ipynb)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Import ONNX RoBertaForTokenClassification models from HuggingFace 🤗 into Spark NLP 🚀\n",
+ "\n",
+ "Let's keep in mind a few things before we start 😊\n",
+ "\n",
+ "- ONNX support was introduced in `Spark NLP 5.0.0`, enabling high performance inference for models.\n",
+ "- `RoBertaForTokenClassification` is only available since in `Spark NLP 5.1.4` and after. So please make sure you have upgraded to the latest Spark NLP release\n",
+ "- You can import RoBERTa models trained/fine-tuned for token classification via `RobertaForTokenClassification` or `TFRobertaForTokenClassification`. These models are usually under `Token Classification` category and have `roberta` in their labels\n",
+ "- Reference: [TFRobertaForTokenClassification](https://huggingface.co/transformers/model_doc/roberta.html#tfrobertafortokenclassification)\n",
+ "- Some [example models](https://huggingface.co/models?filter=roberta&pipeline_tag=token-classification)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Export and Save HuggingFace model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases\n",
+ "- Albert uses SentencePiece, so we will have to install that as well"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "vctEEFUYk8Nu"
- },
- "source": [
- "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
- "\n",
- "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_RoBertaForTokenClassification.ipynb)"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.1/7.1 MB\u001b[0m \u001b[31m53.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m301.0/301.0 kB\u001b[0m \u001b[31m28.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
+ " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
+ " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.8/489.8 MB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.0/302.0 kB\u001b[0m \u001b[31m32.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m79.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m9.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m42.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m81.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m24.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m519.6/519.6 kB\u001b[0m \u001b[31m37.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m56.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m68.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m36.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m69.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m70.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m60.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m112.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m109.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m115.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m110.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m102.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m113.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m89.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m119.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m123.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m479.7/479.7 MB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m86.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m524.1/524.1 MB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m78.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m112.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m46.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m113.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m781.3/781.3 kB\u001b[0m \u001b[31m61.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m83.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m119.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m588.3/588.3 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m91.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m80.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m112.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m439.2/439.2 kB\u001b[0m \u001b[31m46.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m112.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m12.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m15.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m24.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m19.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.8/83.8 kB\u001b[0m \u001b[31m11.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m48.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m451.2/451.2 kB\u001b[0m \u001b[31m48.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m96.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m103.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m101.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.1/13.1 MB\u001b[0m \u001b[31m100.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Building wheel for optimum (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
+ "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+ "tensorflow-datasets 4.9.3 requires protobuf>=3.20, but you have protobuf 3.19.6 which is incompatible.\n",
+ "tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 3.19.6 which is incompatible.\u001b[0m\u001b[31m\n",
+ "\u001b[0m"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum tensorflow"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
+ "- We'll use [philschmid/distilroberta-base-ner-wikiann-conll2003-3-class](https://huggingface.co/philschmid/distilroberta-base-ner-wikiann-conll2003-3-class) model from HuggingFace as an example and load it as a `ORTModelForSequenceClassification`, representing an ONNX model."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "O3-MQYmLk8Nx"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "19403261179149178f0b54c0a125f198",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "## Import ONNX RoBertaForTokenClassification models from HuggingFace 🤗 into Spark NLP 🚀\n",
- "\n",
- "Let's keep in mind a few things before we start 😊\n",
- "\n",
- "- ONNX support was introduced in `Spark NLP 5.0.0`, enabling high performance inference for models.\n",
- "- `RoBertaForTokenClassification` is only available since in `Spark NLP 5.1.4` and after. So please make sure you have upgraded to the latest Spark NLP release\n",
- "- You can import RoBERTa models trained/fine-tuned for token classification via `RobertaForTokenClassification` or `TFRobertaForTokenClassification`. These models are usually under `Token Classification` category and have `roberta` in their labels\n",
- "- Reference: [TFRobertaForTokenClassification](https://huggingface.co/transformers/model_doc/roberta.html#tfrobertafortokenclassification)\n",
- "- Some [example models](https://huggingface.co/models?filter=roberta&pipeline_tag=token-classification)"
+ "text/plain": [
+ "(…)nll2003-3-class/resolve/main/config.json: 0%| | 0.00/962 [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "ivFy0MSnk8N4"
- },
- "source": [
- "## Export and Save HuggingFace model"
- ]
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Framework not specified. Using pt to export to ONNX.\n"
+ ]
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "r984bEv3k8N4"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "ef98c16f3cf84728b593cc1be081c9b2",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases\n",
- "- Albert uses SentencePiece, so we will have to install that as well"
+ "text/plain": [
+ "pytorch_model.bin: 0%| | 0.00/326M [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "WS-qrmd1k8N4",
- "outputId": "bfe19085-721a-431d-e097-50a33fcde327"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "a4fc8b5fe2a643e384545787ac7f0f98",
+ "version_major": 2,
+ "version_minor": 0
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.1/7.1 MB\u001b[0m \u001b[31m53.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m301.0/301.0 kB\u001b[0m \u001b[31m28.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
- " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
- " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.8/489.8 MB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.0/302.0 kB\u001b[0m \u001b[31m32.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m79.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m9.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m42.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m81.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m24.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m519.6/519.6 kB\u001b[0m \u001b[31m37.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m56.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m68.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m36.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m69.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m70.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m60.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m112.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m109.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m115.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m110.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m102.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m113.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m89.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m119.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m123.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m479.7/479.7 MB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m86.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m524.1/524.1 MB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m78.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m112.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m46.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m113.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m781.3/781.3 kB\u001b[0m \u001b[31m61.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m83.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m119.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m588.3/588.3 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m91.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m80.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m112.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m439.2/439.2 kB\u001b[0m \u001b[31m46.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m112.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m12.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m15.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m24.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m19.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.8/83.8 kB\u001b[0m \u001b[31m11.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m48.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m451.2/451.2 kB\u001b[0m \u001b[31m48.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m96.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m103.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m101.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.1/13.1 MB\u001b[0m \u001b[31m100.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h Building wheel for optimum (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
- "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
- "tensorflow-datasets 4.9.3 requires protobuf>=3.20, but you have protobuf 3.19.6 which is incompatible.\n",
- "tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 3.19.6 which is incompatible.\u001b[0m\u001b[31m\n",
- "\u001b[0m"
- ]
- }
- ],
- "source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum tensorflow"
+ "text/plain": [
+ "(…)class/resolve/main/tokenizer_config.json: 0%| | 0.00/293 [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "DfkYcjS3k8N5"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "2f5282ba4afc45d9b43b18ccd50cd984",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
- "- We'll use [philschmid/distilroberta-base-ner-wikiann-conll2003-3-class](https://huggingface.co/philschmid/distilroberta-base-ner-wikiann-conll2003-3-class) model from HuggingFace as an example and load it as a `ORTModelForSequenceClassification`, representing an ONNX model."
+ "text/plain": [
+ "(…)onll2003-3-class/resolve/main/vocab.json: 0%| | 0.00/798k [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "Wwxrj4lYk8N5",
- "outputId": "d54d22af-eadf-4106-c0b7-6b3ef12a8f88",
- "colab": {
- "referenced_widgets": [
- "19403261179149178f0b54c0a125f198",
- "c1ac31ed4ded444586913047df105d63",
- "1ccb91d2654d47d7aa883c016a8b4e49",
- "191f55fc572b4f5a9b41e0c0dbd20414",
- "595ee009a3604de7a1d1c12e127b8f85",
- "dcd04fa06ba744b697e172fa8365b009",
- "a4d075edad9243b5af25945e727e011f",
- "0b89fef36cfa4301a27a58e6a1dec354",
- "380ca1c370174ba58ec9b669a4e4a2ff",
- "2db07d4ad6ff49b5b5ce76ea60c655fe",
- "528de7c76ae84ccfb4614faddf133cfb",
- "ef98c16f3cf84728b593cc1be081c9b2",
- "985ccb56f8df409d97836c83c7f57e44",
- "050dbc230ffa47e1a8b293f622b4ea57",
- "1898befd7f36447ea5194e2c68d00c31",
- "a56652b29f014151ad02630853888abe",
- "0993a78aca3348468b8615d096466b80",
- "33f7e429f91b4e359fd05bbef32e5a46",
- "fb53f3bf55664c4e9aa685809d9b550f",
- "7d587ac5d3ee4a89a99bc5c0b8044669",
- "da56089370b6403fa52b9787b84ad86d",
- "b8ed253331fe4d4e9b7a10dd282ea172",
- "a4fc8b5fe2a643e384545787ac7f0f98",
- "e542527c11944d088846505d08c52806",
- "9b31a5e52daf472b8900efa4b0c396b1",
- "e0a9517ffb70428cba4d8b8749603444",
- "af0ef342d5b14b86b18bfe3dba1c6b9f",
- "8caea9c1009646e9839e9e410f1006b8",
- "afe9cc266e03429b84d094ab1cb29a97",
- "b006c4b472c341ab9fe9781a79c78c0a",
- "12eee2449390429192df0e0394598062",
- "ca78d9dbdc854ad7b45d8e0de3cf2d9f",
- "ec26975de7f3493795c3cdf5a471a59d",
- "2f5282ba4afc45d9b43b18ccd50cd984",
- "389699b0b0404174be6e092dae71a8e8",
- "3112e06bfd6d41408438c93cddcd306a",
- "3b4f2c964e5d42428b9ce9db06be885c",
- "624dde2b14de4f87b969772e7792666c",
- "59b60b985b14488f817d2693785d966f",
- "f582ccbb662e4a9bbf3699c7f69a56d5",
- "2f3625a6b69e4b28a0180d769ef3eafb",
- "84fd9d1f1821408f9adbbba4d3b1ff04",
- "f86b586715894e38abc7196a303f54e7",
- "025eda03fbad4dd18d7dae72aedd0106",
- "00d4770b7983470192967410038d0068",
- "c33367067b5c41529e4cb8301bb4631b",
- "f56039a6fb3f4dc7913ea06536e476c3",
- "f4f066292c894698a145d97645ef0852",
- "74cda4b89a124b009c187cb98a04899d",
- "9ba0de8569964cb6a092f0359711d28e",
- "4dd732d7405d4c8bac2b1d4297ec8088",
- "b0491ab858894635bcaae3b307db5d71",
- "885a765e32834db28e6a6aa47a853a8f",
- "c3a63933131d4d1788b3a552a3c16a8e",
- "74aa18fd09fb43f099973ef2c77f1fea",
- "166671c87f7d48feafb05bb58c739600",
- "1383a4cde8674b039c59c15a63901461",
- "3de9ee6582f1423598931cea294c532c",
- "ac0bec7637084a0e8e51231de626f69e",
- "d72f34d844b542b0a4e1ec0264880cab",
- "0fc0a55a8d234a17a7d725a93c45fd50",
- "b7201dc4f9584e1c97488425a766c4c6",
- "f6daf5c19e5e48e193af8b70e61bc1a3",
- "1689463b2a3d4b39bb427733c160287a",
- "2b8a0ac51adf4cd9b94deb879084696f",
- "7d9fe46aa745480e9947350f443be964",
- "f3774c5dc43b44d69c256f259cb22a8c",
- "ff652925a7bd40bc9939be17cacd818e",
- "c56e4e6111074a75820abdce355adb39",
- "4c7a6185b4e549ae82bbd69c5951b89d",
- "ff0eedc4f66f4fc9894f22a83245c55f",
- "82cac82421da40a4bb21aacad13aef90",
- "2094ab4f61fc4dbbb2a45b8cb10d696f",
- "8572225a06a14ca8965a8039b2298070",
- "29e72b30d40644b4afdd19b137f8952d",
- "ded495ef523c49779063258bcec83c4e",
- "9daae647890b4527bf66caccc15afff8"
- ]
- }
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "00d4770b7983470192967410038d0068",
+ "version_major": 2,
+ "version_minor": 0
},
- "outputs": [
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "19403261179149178f0b54c0a125f198",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "(…)nll2003-3-class/resolve/main/config.json: 0%| | 0.00/962 [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Framework not specified. Using pt to export to ONNX.\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "ef98c16f3cf84728b593cc1be081c9b2",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "pytorch_model.bin: 0%| | 0.00/326M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "a4fc8b5fe2a643e384545787ac7f0f98",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "(…)class/resolve/main/tokenizer_config.json: 0%| | 0.00/293 [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "2f5282ba4afc45d9b43b18ccd50cd984",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "(…)onll2003-3-class/resolve/main/vocab.json: 0%| | 0.00/798k [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "00d4770b7983470192967410038d0068",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "(…)onll2003-3-class/resolve/main/merges.txt: 0%| | 0.00/456k [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "166671c87f7d48feafb05bb58c739600",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "(…)2003-3-class/resolve/main/tokenizer.json: 0%| | 0.00/1.36M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "f3774c5dc43b44d69c256f259cb22a8c",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "(…)ass/resolve/main/special_tokens_map.json: 0%| | 0.00/239 [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Using the export variant default. Available variants are:\n",
- "\t- default: The default ONNX variant.\n",
- "Using framework PyTorch: 2.0.1+cu118\n",
- "Overriding 1 configuration item(s)\n",
- "\t- use_cache -> False\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "============= Diagnostic Run torch.onnx.export version 2.0.1+cu118 =============\n",
- "verbose: False, log level: Level.ERROR\n",
- "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
- "\n"
- ]
- }
- ],
- "source": [
- "from optimum.onnxruntime import ORTModelForTokenClassification\n",
- "import tensorflow as tf\n",
- "\n",
- "MODEL_NAME = 'philschmid/distilroberta-base-ner-wikiann-conll2003-3-class'\n",
- "ONNX_MODEL = f\"onnx_models/{MODEL_NAME}\"\n",
- "\n",
- "ort_model = ORTModelForTokenClassification.from_pretrained(MODEL_NAME, export=True)\n",
- "\n",
- "# Save the ONNX model\n",
- "ort_model.save_pretrained(ONNX_MODEL)"
+ "text/plain": [
+ "(…)onll2003-3-class/resolve/main/merges.txt: 0%| | 0.00/456k [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "YgpzkzZpk8N5"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "166671c87f7d48feafb05bb58c739600",
+ "version_major": 2,
+ "version_minor": 0
},
- "outputs": [],
- "source": [
- "import json\n",
- "\n",
- "# Read the vocab JSON file\n",
- "with open('{}/vocab.json'.format(ONNX_MODEL), 'r') as json_file:\n",
- " tokenizer = json.load(json_file)\n",
- "\n",
- "# let's save the vocab as txt file\n",
- "with open('{}/vocab.txt'.format(ONNX_MODEL), 'w') as keys_file:\n",
- " for item in tokenizer.keys():\n",
- " keys_file.write(\"%s\\n\" % item)"
+ "text/plain": [
+ "(…)2003-3-class/resolve/main/tokenizer.json: 0%| | 0.00/1.36M [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "8k9xqg-sk8N6"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "f3774c5dc43b44d69c256f259cb22a8c",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "Let's have a look inside these two directories and see what we are dealing with:"
+ "text/plain": [
+ "(…)ass/resolve/main/special_tokens_map.json: 0%| | 0.00/239 [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "mh74NCVMk8N6",
- "outputId": "a3f534a5-2e80-4ba4-d3f0-846defda6932"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "total 321892\n",
- "drwxr-xr-x 2 root root 4096 Oct 16 22:13 assets\n",
- "-rw-r--r-- 1 root root 1034 Oct 16 22:10 config.json\n",
- "-rw-r--r-- 1 root root 326278966 Oct 16 22:10 model.onnx\n",
- "-rw-r--r-- 1 root root 280 Oct 16 22:10 special_tokens_map.json\n",
- "-rw-r--r-- 1 root root 350 Oct 16 22:10 tokenizer_config.json\n",
- "-rw-r--r-- 1 root root 2108715 Oct 16 22:10 tokenizer.json\n",
- "-rw-r--r-- 1 root root 798293 Oct 16 22:10 vocab.json\n",
- "-rw-r--r-- 1 root root 407065 Oct 16 22:18 vocab.txt\n"
- ]
- }
- ],
- "source": [
- "!ls -l {ONNX_MODEL}"
- ]
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Using the export variant default. Available variants are:\n",
+ "\t- default: The default ONNX variant.\n",
+ "Using framework PyTorch: 2.0.1+cu118\n",
+ "Overriding 1 configuration item(s)\n",
+ "\t- use_cache -> False\n"
+ ]
},
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "i24WdH62k8N7"
- },
- "outputs": [],
- "source": [
- "!mkdir {ONNX_MODEL}/assets"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "============= Diagnostic Run torch.onnx.export version 2.0.1+cu118 =============\n",
+ "verbose: False, log level: Level.ERROR\n",
+ "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "from optimum.onnxruntime import ORTModelForTokenClassification\n",
+ "import tensorflow as tf\n",
+ "\n",
+ "MODEL_NAME = 'philschmid/distilroberta-base-ner-wikiann-conll2003-3-class'\n",
+ "ONNX_MODEL = f\"onnx_models/{MODEL_NAME}\"\n",
+ "\n",
+ "ort_model = ORTModelForTokenClassification.from_pretrained(MODEL_NAME, export=True)\n",
+ "\n",
+ "# Save the ONNX model\n",
+ "ort_model.save_pretrained(ONNX_MODEL)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import json\n",
+ "\n",
+ "# Read the vocab JSON file\n",
+ "with open('{}/vocab.json'.format(ONNX_MODEL), 'r') as json_file:\n",
+ " tokenizer = json.load(json_file)\n",
+ "\n",
+ "# let's save the vocab as txt file\n",
+ "with open('{}/vocab.txt'.format(ONNX_MODEL), 'w') as keys_file:\n",
+ " for item in tokenizer.keys():\n",
+ " keys_file.write(\"%s\\n\" % item)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's have a look inside these two directories and see what we are dealing with:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "R7clJnf-k8N7"
- },
- "source": [
- "- As you can see, we need to move `vocab.txt` and `merges.txt` from the tokenizer to `assets` folder which Spark NLP will look for\n",
- "- We also need `labels` and their `ids` which is saved inside the model's config. We will save this inside `labels.txt`"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "total 321892\n",
+ "drwxr-xr-x 2 root root 4096 Oct 16 22:13 assets\n",
+ "-rw-r--r-- 1 root root 1034 Oct 16 22:10 config.json\n",
+ "-rw-r--r-- 1 root root 326278966 Oct 16 22:10 model.onnx\n",
+ "-rw-r--r-- 1 root root 280 Oct 16 22:10 special_tokens_map.json\n",
+ "-rw-r--r-- 1 root root 350 Oct 16 22:10 tokenizer_config.json\n",
+ "-rw-r--r-- 1 root root 2108715 Oct 16 22:10 tokenizer.json\n",
+ "-rw-r--r-- 1 root root 798293 Oct 16 22:10 vocab.json\n",
+ "-rw-r--r-- 1 root root 407065 Oct 16 22:18 vocab.txt\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls -l {ONNX_MODEL}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mkdir {ONNX_MODEL}/assets"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- As you can see, we need to move `vocab.txt` and `merges.txt` from the tokenizer to `assets` folder which Spark NLP will look for\n",
+ "- We also need `labels` and their `ids` which is saved inside the model's config. We will save this inside `labels.txt`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# get label2id dictionary\n",
+ "labels = ort_model.config.id2label\n",
+ "# sort the dictionary based on the id\n",
+ "labels = [value for key,value in sorted(labels.items(), reverse=False)]\n",
+ "\n",
+ "with open(ONNX_MODEL + '/assets/labels.txt', 'w') as f:\n",
+ " f.write('\\n'.join(labels))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mv {ONNX_MODEL}/vocab.txt {ONNX_MODEL}/assets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mv {ONNX_MODEL}/merges.txt {ONNX_MODEL}/assets"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Voila! We have our `vocab.txt` and `merges.txt` inside assets directory"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "Acu1x8BQk8N7"
- },
- "outputs": [],
- "source": [
- "# get label2id dictionary\n",
- "labels = ort_model.config.id2label\n",
- "# sort the dictionary based on the id\n",
- "labels = [value for key,value in sorted(labels.items(), reverse=False)]\n",
- "\n",
- "with open(ONNX_MODEL + '/assets/labels.txt', 'w') as f:\n",
- " f.write('\\n'.join(labels))"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "onnx_models/philschmid/distilroberta-base-ner-wikiann-conll2003-3-class:\n",
+ "total 321892\n",
+ "drwxr-xr-x 2 root root 4096 Oct 16 22:18 assets\n",
+ "-rw-r--r-- 1 root root 1034 Oct 16 22:10 config.json\n",
+ "-rw-r--r-- 1 root root 326278966 Oct 16 22:10 model.onnx\n",
+ "-rw-r--r-- 1 root root 280 Oct 16 22:10 special_tokens_map.json\n",
+ "-rw-r--r-- 1 root root 350 Oct 16 22:10 tokenizer_config.json\n",
+ "-rw-r--r-- 1 root root 2108715 Oct 16 22:10 tokenizer.json\n",
+ "-rw-r--r-- 1 root root 798293 Oct 16 22:10 vocab.json\n",
+ "-rw-r--r-- 1 root root 407065 Oct 16 22:18 vocab.txt\n",
+ "\n",
+ "onnx_models/philschmid/distilroberta-base-ner-wikiann-conll2003-3-class/assets:\n",
+ "total 852\n",
+ "-rw-r--r-- 1 root root 37 Oct 16 22:18 labels.txt\n",
+ "-rw-r--r-- 1 root root 456318 Oct 16 22:10 merges.txt\n",
+ "-rw-r--r-- 1 root root 407065 Oct 16 22:12 vocab.txt\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls -lR {ONNX_MODEL}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Import and Save RoBertaForTokenClassification in Spark NLP\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's install and setup Spark NLP in Google Colab\n",
+ "- This part is pretty easy via our simple script"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "7Jm6IFTSk8N8"
- },
- "outputs": [],
- "source": [
- "!mv {ONNX_MODEL}/vocab.txt {ONNX_MODEL}/assets"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Installing PySpark 3.2.3 and Spark NLP 5.1.3\n",
+ "setup Colab for PySpark 3.2.3 and Spark NLP 5.1.3\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 MB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m537.5/537.5 kB\u001b[0m \u001b[31m33.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.7/199.7 kB\u001b[0m \u001b[31m26.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
+ ]
+ }
+ ],
+ "source": [
+ "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's start Spark with Spark NLP included via our simple `start()` function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "PfDg1SyDk8N8"
- },
- "outputs": [],
- "source": [
- "!mv {ONNX_MODEL}/merges.txt {ONNX_MODEL}/assets"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Apache Spark version: 3.2.3\n"
+ ]
+ }
+ ],
+ "source": [
+ "import sparknlp\n",
+ "# let's start Spark with Spark NLP\n",
+ "spark = sparknlp.start()\n",
+ "\n",
+ "print(\"Apache Spark version: {}\".format(spark.version))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's use `loadSavedModel` functon in `RoBertaForTokenClassification` which allows us to load TensorFlow model in SavedModel format\n",
+ "- Most params can be set later when you are loading this model in `RoBertaForTokenClassification` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n",
+ "- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n",
+ "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sparknlp.annotator import *\n",
+ "from sparknlp.base import *\n",
+ "\n",
+ "tokenClassifier = RoBertaForTokenClassification\\\n",
+ " .loadSavedModel(ONNX_MODEL, spark)\\\n",
+ " .setInputCols([\"document\",'token'])\\\n",
+ " .setOutputCol(\"ner\")\\\n",
+ " .setCaseSensitive(True)\\\n",
+ " .setMaxSentenceLength(128)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tokenClassifier.write().overwrite().save(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's clean up stuff we don't need anymore"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!rm -rf {ONNX_MODEL}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Awesome 😎 !\n",
+ "\n",
+ "This is your RoBertaForTokenClassification model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "m4cXGPOEk8N8"
- },
- "source": [
- "Voila! We have our `vocab.txt` and `merges.txt` inside assets directory"
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "total 318696\n",
+ "drwxr-xr-x 5 root root 4096 Oct 16 22:21 fields\n",
+ "drwxr-xr-x 2 root root 4096 Oct 16 22:21 metadata\n",
+ "-rw-r--r-- 1 root root 326328924 Oct 16 22:21 roberta_classification_onnx\n"
+ ]
+ }
+ ],
+ "source": [
+ "! ls -l {ONNX_MODEL}_spark_nlp_onnx"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny RoBertaForTokenClassification model 😊"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tokenClassifier_loaded = RoBertaForTokenClassification.load(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))\\\n",
+ " .setInputCols([\"document\",'token'])\\\n",
+ " .setOutputCol(\"ner\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You can see what labels were used to train this model via `getClasses` function:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['B-LOC', 'I-ORG', 'I-LOC', 'I-PER', 'B-ORG', 'O', 'B-PER']"
]
- },
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# .getClasses was introduced in spark-nlp==3.4.0\n",
+ "tokenClassifier_loaded.getClasses()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This is how you can use your loaded classifier model in Spark NLP 🚀 pipeline:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "dWb6ZEtXk8N8",
- "outputId": "7f110ef4-8cb5-48e1-925d-338ae57c5046"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "onnx_models/philschmid/distilroberta-base-ner-wikiann-conll2003-3-class:\n",
- "total 321892\n",
- "drwxr-xr-x 2 root root 4096 Oct 16 22:18 assets\n",
- "-rw-r--r-- 1 root root 1034 Oct 16 22:10 config.json\n",
- "-rw-r--r-- 1 root root 326278966 Oct 16 22:10 model.onnx\n",
- "-rw-r--r-- 1 root root 280 Oct 16 22:10 special_tokens_map.json\n",
- "-rw-r--r-- 1 root root 350 Oct 16 22:10 tokenizer_config.json\n",
- "-rw-r--r-- 1 root root 2108715 Oct 16 22:10 tokenizer.json\n",
- "-rw-r--r-- 1 root root 798293 Oct 16 22:10 vocab.json\n",
- "-rw-r--r-- 1 root root 407065 Oct 16 22:18 vocab.txt\n",
- "\n",
- "onnx_models/philschmid/distilroberta-base-ner-wikiann-conll2003-3-class/assets:\n",
- "total 852\n",
- "-rw-r--r-- 1 root root 37 Oct 16 22:18 labels.txt\n",
- "-rw-r--r-- 1 root root 456318 Oct 16 22:10 merges.txt\n",
- "-rw-r--r-- 1 root root 407065 Oct 16 22:12 vocab.txt\n"
- ]
- }
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "+--------------------+--------------------+\n",
+ "| text| result|\n",
+ "+--------------------+--------------------+\n",
+ "|My name is Clara ...|[O, O, O, B-PER, ...|\n",
+ "|My name is Clara ...|[O, O, O, B-PER, ...|\n",
+ "+--------------------+--------------------+\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "document_assembler = DocumentAssembler() \\\n",
+ " .setInputCol('text') \\\n",
+ " .setOutputCol('document')\n",
+ "\n",
+ "tokenizer = Tokenizer() \\\n",
+ " .setInputCols(['document']) \\\n",
+ " .setOutputCol('token')\n",
+ "\n",
+ "pipeline = Pipeline(stages=[\n",
+ " document_assembler,\n",
+ " tokenizer,\n",
+ " tokenClassifier_loaded\n",
+ "])\n",
+ "\n",
+ "# couple of simple examples\n",
+ "example = spark.createDataFrame([[\"My name is Clara and I live in Berkeley, California.\"], ['My name is Clara and I live in Berkeley, California.']]).toDF(\"text\")\n",
+ "\n",
+ "result = pipeline.fit(example).transform(example)\n",
+ "\n",
+ "# result is a DataFrame\n",
+ "result.select(\"text\", \"ner.result\").show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "That's it! You can now go wild and use hundreds of `RoBertaForTokenClassification` models from HuggingFace 🤗 in Spark NLP 🚀\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "gpuType": "T4",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "00d4770b7983470192967410038d0068": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_c33367067b5c41529e4cb8301bb4631b",
+ "IPY_MODEL_f56039a6fb3f4dc7913ea06536e476c3",
+ "IPY_MODEL_f4f066292c894698a145d97645ef0852"
],
- "source": [
- "!ls -lR {ONNX_MODEL}"
- ]
+ "layout": "IPY_MODEL_74cda4b89a124b009c187cb98a04899d"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "W9-Fowe_k8N9"
- },
- "source": [
- "## Import and Save RoBertaForTokenClassification in Spark NLP\n"
- ]
+ "025eda03fbad4dd18d7dae72aedd0106": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "h0II8wYvk8N9"
- },
- "source": [
- "- Let's install and setup Spark NLP in Google Colab\n",
- "- This part is pretty easy via our simple script"
- ]
+ "050dbc230ffa47e1a8b293f622b4ea57": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_fb53f3bf55664c4e9aa685809d9b550f",
+ "max": 326181207,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_7d587ac5d3ee4a89a99bc5c0b8044669",
+ "value": 326181207
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "gsYqnf_gk8N9",
- "outputId": "9dfa476f-0c5c-48ae-daf6-0b7fb3ef4bcb"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Installing PySpark 3.2.3 and Spark NLP 5.1.3\n",
- "setup Colab for PySpark 3.2.3 and Spark NLP 5.1.3\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 MB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m537.5/537.5 kB\u001b[0m \u001b[31m33.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.7/199.7 kB\u001b[0m \u001b[31m26.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
- ]
- }
- ],
- "source": [
- "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"
- ]
+ "0993a78aca3348468b8615d096466b80": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "Q1MWCjcLk8N9"
- },
- "source": [
- "Let's start Spark with Spark NLP included via our simple `start()` function"
- ]
+ "0b89fef36cfa4301a27a58e6a1dec354": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "ugpRusXwk8N9",
- "outputId": "4f1f2146-f7ea-46ee-becb-c7d45b224b89"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Apache Spark version: 3.2.3\n"
- ]
- }
+ "0fc0a55a8d234a17a7d725a93c45fd50": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "12eee2449390429192df0e0394598062": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "1383a4cde8674b039c59c15a63901461": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0fc0a55a8d234a17a7d725a93c45fd50",
+ "placeholder": "",
+ "style": "IPY_MODEL_b7201dc4f9584e1c97488425a766c4c6",
+ "value": "(…)2003-3-class/resolve/main/tokenizer.json: 100%"
+ }
+ },
+ "166671c87f7d48feafb05bb58c739600": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_1383a4cde8674b039c59c15a63901461",
+ "IPY_MODEL_3de9ee6582f1423598931cea294c532c",
+ "IPY_MODEL_ac0bec7637084a0e8e51231de626f69e"
],
- "source": [
- "import sparknlp\n",
- "# let's start Spark with Spark NLP\n",
- "spark = sparknlp.start()\n",
- "\n",
- "print(\"Apache Spark version: {}\".format(spark.version))"
- ]
+ "layout": "IPY_MODEL_d72f34d844b542b0a4e1ec0264880cab"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "pj76mzEuk8N-"
- },
- "source": [
- "- Let's use `loadSavedModel` functon in `RoBertaForTokenClassification` which allows us to load TensorFlow model in SavedModel format\n",
- "- Most params can be set later when you are loading this model in `RoBertaForTokenClassification` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n",
- "- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n",
- "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively."
- ]
+ "1689463b2a3d4b39bb427733c160287a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "wX3vfOybk8N-"
- },
- "outputs": [],
- "source": [
- "from sparknlp.annotator import *\n",
- "from sparknlp.base import *\n",
- "\n",
- "tokenClassifier = RoBertaForTokenClassification\\\n",
- " .loadSavedModel(ONNX_MODEL, spark)\\\n",
- " .setInputCols([\"document\",'token'])\\\n",
- " .setOutputCol(\"ner\")\\\n",
- " .setCaseSensitive(True)\\\n",
- " .setMaxSentenceLength(128)"
- ]
+ "1898befd7f36447ea5194e2c68d00c31": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_da56089370b6403fa52b9787b84ad86d",
+ "placeholder": "",
+ "style": "IPY_MODEL_b8ed253331fe4d4e9b7a10dd282ea172",
+ "value": " 326M/326M [00:06<00:00, 37.1MB/s]"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "RMulAxFNk8N-"
- },
- "source": [
- "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
- ]
+ "191f55fc572b4f5a9b41e0c0dbd20414": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_2db07d4ad6ff49b5b5ce76ea60c655fe",
+ "placeholder": "",
+ "style": "IPY_MODEL_528de7c76ae84ccfb4614faddf133cfb",
+ "value": " 962/962 [00:00<00:00, 26.2kB/s]"
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "dJjyUZOjk8N-"
- },
- "outputs": [],
- "source": [
- "tokenClassifier.write().overwrite().save(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))"
- ]
+ "19403261179149178f0b54c0a125f198": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_c1ac31ed4ded444586913047df105d63",
+ "IPY_MODEL_1ccb91d2654d47d7aa883c016a8b4e49",
+ "IPY_MODEL_191f55fc572b4f5a9b41e0c0dbd20414"
+ ],
+ "layout": "IPY_MODEL_595ee009a3604de7a1d1c12e127b8f85"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "y9Uez8OFk8N-"
- },
- "source": [
- "Let's clean up stuff we don't need anymore"
- ]
+ "1ccb91d2654d47d7aa883c016a8b4e49": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0b89fef36cfa4301a27a58e6a1dec354",
+ "max": 962,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_380ca1c370174ba58ec9b669a4e4a2ff",
+ "value": 962
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "uA5C0YHqk8N_"
- },
- "outputs": [],
- "source": [
- "!rm -rf {ONNX_MODEL}"
- ]
+ "2094ab4f61fc4dbbb2a45b8cb10d696f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "fpI56D69k8N_"
- },
- "source": [
- "Awesome 😎 !\n",
- "\n",
- "This is your RoBertaForTokenClassification model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀"
- ]
+ "29e72b30d40644b4afdd19b137f8952d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "Eh1srgXGk8N_",
- "outputId": "edf7b117-637b-4031-a831-4b0c254dd223"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "total 318696\n",
- "drwxr-xr-x 5 root root 4096 Oct 16 22:21 fields\n",
- "drwxr-xr-x 2 root root 4096 Oct 16 22:21 metadata\n",
- "-rw-r--r-- 1 root root 326328924 Oct 16 22:21 roberta_classification_onnx\n"
- ]
- }
+ "2b8a0ac51adf4cd9b94deb879084696f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2db07d4ad6ff49b5b5ce76ea60c655fe": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2f3625a6b69e4b28a0180d769ef3eafb": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2f5282ba4afc45d9b43b18ccd50cd984": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_389699b0b0404174be6e092dae71a8e8",
+ "IPY_MODEL_3112e06bfd6d41408438c93cddcd306a",
+ "IPY_MODEL_3b4f2c964e5d42428b9ce9db06be885c"
],
- "source": [
- "! ls -l {ONNX_MODEL}_spark_nlp_onnx"
- ]
+ "layout": "IPY_MODEL_624dde2b14de4f87b969772e7792666c"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "v4phtEi-k8N_"
- },
- "source": [
- "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny RoBertaForTokenClassification model 😊"
- ]
+ "3112e06bfd6d41408438c93cddcd306a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_2f3625a6b69e4b28a0180d769ef3eafb",
+ "max": 798293,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_84fd9d1f1821408f9adbbba4d3b1ff04",
+ "value": 798293
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "5n0pmqzfk8N_"
- },
- "outputs": [],
- "source": [
- "tokenClassifier_loaded = RoBertaForTokenClassification.load(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))\\\n",
- " .setInputCols([\"document\",'token'])\\\n",
- " .setOutputCol(\"ner\")"
- ]
+ "33f7e429f91b4e359fd05bbef32e5a46": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "02FdQlx_k8OA"
- },
- "source": [
- "You can see what labels were used to train this model via `getClasses` function:"
- ]
+ "380ca1c370174ba58ec9b669a4e4a2ff": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "yLWujMlek8OA",
- "outputId": "ce590db8-19a7-4882-cf86-fbd028e6eeab"
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "['B-LOC', 'I-ORG', 'I-LOC', 'I-PER', 'B-ORG', 'O', 'B-PER']"
- ]
- },
- "execution_count": null,
- "metadata": {},
- "output_type": "execute_result"
- }
+ "389699b0b0404174be6e092dae71a8e8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_59b60b985b14488f817d2693785d966f",
+ "placeholder": "",
+ "style": "IPY_MODEL_f582ccbb662e4a9bbf3699c7f69a56d5",
+ "value": "(…)onll2003-3-class/resolve/main/vocab.json: 100%"
+ }
+ },
+ "3b4f2c964e5d42428b9ce9db06be885c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_f86b586715894e38abc7196a303f54e7",
+ "placeholder": "",
+ "style": "IPY_MODEL_025eda03fbad4dd18d7dae72aedd0106",
+ "value": " 798k/798k [00:00<00:00, 15.4MB/s]"
+ }
+ },
+ "3de9ee6582f1423598931cea294c532c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_f6daf5c19e5e48e193af8b70e61bc1a3",
+ "max": 1355931,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_1689463b2a3d4b39bb427733c160287a",
+ "value": 1355931
+ }
+ },
+ "4c7a6185b4e549ae82bbd69c5951b89d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_ded495ef523c49779063258bcec83c4e",
+ "placeholder": "",
+ "style": "IPY_MODEL_9daae647890b4527bf66caccc15afff8",
+ "value": " 239/239 [00:00<00:00, 11.6kB/s]"
+ }
+ },
+ "4dd732d7405d4c8bac2b1d4297ec8088": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "528de7c76ae84ccfb4614faddf133cfb": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "595ee009a3604de7a1d1c12e127b8f85": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "59b60b985b14488f817d2693785d966f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "624dde2b14de4f87b969772e7792666c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "74aa18fd09fb43f099973ef2c77f1fea": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "74cda4b89a124b009c187cb98a04899d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "7d587ac5d3ee4a89a99bc5c0b8044669": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "7d9fe46aa745480e9947350f443be964": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "82cac82421da40a4bb21aacad13aef90": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "84fd9d1f1821408f9adbbba4d3b1ff04": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "8572225a06a14ca8965a8039b2298070": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "885a765e32834db28e6a6aa47a853a8f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "8caea9c1009646e9839e9e410f1006b8": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "985ccb56f8df409d97836c83c7f57e44": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0993a78aca3348468b8615d096466b80",
+ "placeholder": "",
+ "style": "IPY_MODEL_33f7e429f91b4e359fd05bbef32e5a46",
+ "value": "pytorch_model.bin: 100%"
+ }
+ },
+ "9b31a5e52daf472b8900efa4b0c396b1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b006c4b472c341ab9fe9781a79c78c0a",
+ "max": 293,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_12eee2449390429192df0e0394598062",
+ "value": 293
+ }
+ },
+ "9ba0de8569964cb6a092f0359711d28e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "9daae647890b4527bf66caccc15afff8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a4d075edad9243b5af25945e727e011f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a4fc8b5fe2a643e384545787ac7f0f98": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_e542527c11944d088846505d08c52806",
+ "IPY_MODEL_9b31a5e52daf472b8900efa4b0c396b1",
+ "IPY_MODEL_e0a9517ffb70428cba4d8b8749603444"
],
- "source": [
- "# .getClasses was introduced in spark-nlp==3.4.0\n",
- "tokenClassifier_loaded.getClasses()"
- ]
+ "layout": "IPY_MODEL_af0ef342d5b14b86b18bfe3dba1c6b9f"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "zV9cWCX3k8OA"
- },
- "source": [
- "This is how you can use your loaded classifier model in Spark NLP 🚀 pipeline:"
- ]
+ "a56652b29f014151ad02630853888abe": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "y1-iMx67k8OA",
- "outputId": "e1bcf11b-91f3-416d-fe08-6ba3d96ba9a5"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "+--------------------+--------------------+\n",
- "| text| result|\n",
- "+--------------------+--------------------+\n",
- "|My name is Clara ...|[O, O, O, B-PER, ...|\n",
- "|My name is Clara ...|[O, O, O, B-PER, ...|\n",
- "+--------------------+--------------------+\n",
- "\n"
- ]
- }
+ "ac0bec7637084a0e8e51231de626f69e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_2b8a0ac51adf4cd9b94deb879084696f",
+ "placeholder": "",
+ "style": "IPY_MODEL_7d9fe46aa745480e9947350f443be964",
+ "value": " 1.36M/1.36M [00:00<00:00, 26.4MB/s]"
+ }
+ },
+ "af0ef342d5b14b86b18bfe3dba1c6b9f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "afe9cc266e03429b84d094ab1cb29a97": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "b006c4b472c341ab9fe9781a79c78c0a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b0491ab858894635bcaae3b307db5d71": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b7201dc4f9584e1c97488425a766c4c6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "b8ed253331fe4d4e9b7a10dd282ea172": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "c1ac31ed4ded444586913047df105d63": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_dcd04fa06ba744b697e172fa8365b009",
+ "placeholder": "",
+ "style": "IPY_MODEL_a4d075edad9243b5af25945e727e011f",
+ "value": "(…)nll2003-3-class/resolve/main/config.json: 100%"
+ }
+ },
+ "c33367067b5c41529e4cb8301bb4631b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_9ba0de8569964cb6a092f0359711d28e",
+ "placeholder": "",
+ "style": "IPY_MODEL_4dd732d7405d4c8bac2b1d4297ec8088",
+ "value": "(…)onll2003-3-class/resolve/main/merges.txt: 100%"
+ }
+ },
+ "c3a63933131d4d1788b3a552a3c16a8e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "c56e4e6111074a75820abdce355adb39": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_8572225a06a14ca8965a8039b2298070",
+ "max": 239,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_29e72b30d40644b4afdd19b137f8952d",
+ "value": 239
+ }
+ },
+ "ca78d9dbdc854ad7b45d8e0de3cf2d9f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d72f34d844b542b0a4e1ec0264880cab": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "da56089370b6403fa52b9787b84ad86d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "dcd04fa06ba744b697e172fa8365b009": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ded495ef523c49779063258bcec83c4e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e0a9517ffb70428cba4d8b8749603444": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_ca78d9dbdc854ad7b45d8e0de3cf2d9f",
+ "placeholder": "",
+ "style": "IPY_MODEL_ec26975de7f3493795c3cdf5a471a59d",
+ "value": " 293/293 [00:00<00:00, 15.7kB/s]"
+ }
+ },
+ "e542527c11944d088846505d08c52806": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_8caea9c1009646e9839e9e410f1006b8",
+ "placeholder": "",
+ "style": "IPY_MODEL_afe9cc266e03429b84d094ab1cb29a97",
+ "value": "(…)class/resolve/main/tokenizer_config.json: 100%"
+ }
+ },
+ "ec26975de7f3493795c3cdf5a471a59d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "ef98c16f3cf84728b593cc1be081c9b2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_985ccb56f8df409d97836c83c7f57e44",
+ "IPY_MODEL_050dbc230ffa47e1a8b293f622b4ea57",
+ "IPY_MODEL_1898befd7f36447ea5194e2c68d00c31"
],
- "source": [
- "document_assembler = DocumentAssembler() \\\n",
- " .setInputCol('text') \\\n",
- " .setOutputCol('document')\n",
- "\n",
- "tokenizer = Tokenizer() \\\n",
- " .setInputCols(['document']) \\\n",
- " .setOutputCol('token')\n",
- "\n",
- "pipeline = Pipeline(stages=[\n",
- " document_assembler,\n",
- " tokenizer,\n",
- " tokenClassifier_loaded\n",
- "])\n",
- "\n",
- "# couple of simple examples\n",
- "example = spark.createDataFrame([[\"My name is Clara and I live in Berkeley, California.\"], ['My name is Clara and I live in Berkeley, California.']]).toDF(\"text\")\n",
- "\n",
- "result = pipeline.fit(example).transform(example)\n",
- "\n",
- "# result is a DataFrame\n",
- "result.select(\"text\", \"ner.result\").show()"
- ]
+ "layout": "IPY_MODEL_a56652b29f014151ad02630853888abe"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "U_RooKYek8OB"
- },
- "source": [
- "That's it! You can now go wild and use hundreds of `RoBertaForTokenClassification` models from HuggingFace 🤗 in Spark NLP 🚀\n"
- ]
- }
- ],
- "metadata": {
- "accelerator": "GPU",
- "colab": {
- "gpuType": "T4",
- "provenance": []
- },
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3"
- },
- "widgets": {
- "application/vnd.jupyter.widget-state+json": {
- "00d4770b7983470192967410038d0068": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HBoxModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_c33367067b5c41529e4cb8301bb4631b",
- "IPY_MODEL_f56039a6fb3f4dc7913ea06536e476c3",
- "IPY_MODEL_f4f066292c894698a145d97645ef0852"
- ],
- "layout": "IPY_MODEL_74cda4b89a124b009c187cb98a04899d"
- }
- },
- "025eda03fbad4dd18d7dae72aedd0106": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "050dbc230ffa47e1a8b293f622b4ea57": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "FloatProgressModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_fb53f3bf55664c4e9aa685809d9b550f",
- "max": 326181207,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_7d587ac5d3ee4a89a99bc5c0b8044669",
- "value": 326181207
- }
- },
- "0993a78aca3348468b8615d096466b80": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "0b89fef36cfa4301a27a58e6a1dec354": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "0fc0a55a8d234a17a7d725a93c45fd50": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "12eee2449390429192df0e0394598062": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "ProgressStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "1383a4cde8674b039c59c15a63901461": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_0fc0a55a8d234a17a7d725a93c45fd50",
- "placeholder": "",
- "style": "IPY_MODEL_b7201dc4f9584e1c97488425a766c4c6",
- "value": "(…)2003-3-class/resolve/main/tokenizer.json: 100%"
- }
- },
- "166671c87f7d48feafb05bb58c739600": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HBoxModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_1383a4cde8674b039c59c15a63901461",
- "IPY_MODEL_3de9ee6582f1423598931cea294c532c",
- "IPY_MODEL_ac0bec7637084a0e8e51231de626f69e"
- ],
- "layout": "IPY_MODEL_d72f34d844b542b0a4e1ec0264880cab"
- }
- },
- "1689463b2a3d4b39bb427733c160287a": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "ProgressStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "1898befd7f36447ea5194e2c68d00c31": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_da56089370b6403fa52b9787b84ad86d",
- "placeholder": "",
- "style": "IPY_MODEL_b8ed253331fe4d4e9b7a10dd282ea172",
- "value": " 326M/326M [00:06<00:00, 37.1MB/s]"
- }
- },
- "191f55fc572b4f5a9b41e0c0dbd20414": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_2db07d4ad6ff49b5b5ce76ea60c655fe",
- "placeholder": "",
- "style": "IPY_MODEL_528de7c76ae84ccfb4614faddf133cfb",
- "value": " 962/962 [00:00<00:00, 26.2kB/s]"
- }
- },
- "19403261179149178f0b54c0a125f198": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HBoxModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_c1ac31ed4ded444586913047df105d63",
- "IPY_MODEL_1ccb91d2654d47d7aa883c016a8b4e49",
- "IPY_MODEL_191f55fc572b4f5a9b41e0c0dbd20414"
- ],
- "layout": "IPY_MODEL_595ee009a3604de7a1d1c12e127b8f85"
- }
- },
- "1ccb91d2654d47d7aa883c016a8b4e49": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "FloatProgressModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_0b89fef36cfa4301a27a58e6a1dec354",
- "max": 962,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_380ca1c370174ba58ec9b669a4e4a2ff",
- "value": 962
- }
- },
- "2094ab4f61fc4dbbb2a45b8cb10d696f": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "29e72b30d40644b4afdd19b137f8952d": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "ProgressStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "2b8a0ac51adf4cd9b94deb879084696f": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "2db07d4ad6ff49b5b5ce76ea60c655fe": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "2f3625a6b69e4b28a0180d769ef3eafb": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "2f5282ba4afc45d9b43b18ccd50cd984": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HBoxModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_389699b0b0404174be6e092dae71a8e8",
- "IPY_MODEL_3112e06bfd6d41408438c93cddcd306a",
- "IPY_MODEL_3b4f2c964e5d42428b9ce9db06be885c"
- ],
- "layout": "IPY_MODEL_624dde2b14de4f87b969772e7792666c"
- }
- },
- "3112e06bfd6d41408438c93cddcd306a": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "FloatProgressModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_2f3625a6b69e4b28a0180d769ef3eafb",
- "max": 798293,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_84fd9d1f1821408f9adbbba4d3b1ff04",
- "value": 798293
- }
- },
- "33f7e429f91b4e359fd05bbef32e5a46": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "380ca1c370174ba58ec9b669a4e4a2ff": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "ProgressStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "389699b0b0404174be6e092dae71a8e8": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_59b60b985b14488f817d2693785d966f",
- "placeholder": "",
- "style": "IPY_MODEL_f582ccbb662e4a9bbf3699c7f69a56d5",
- "value": "(…)onll2003-3-class/resolve/main/vocab.json: 100%"
- }
- },
- "3b4f2c964e5d42428b9ce9db06be885c": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_f86b586715894e38abc7196a303f54e7",
- "placeholder": "",
- "style": "IPY_MODEL_025eda03fbad4dd18d7dae72aedd0106",
- "value": " 798k/798k [00:00<00:00, 15.4MB/s]"
- }
- },
- "3de9ee6582f1423598931cea294c532c": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "FloatProgressModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_f6daf5c19e5e48e193af8b70e61bc1a3",
- "max": 1355931,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_1689463b2a3d4b39bb427733c160287a",
- "value": 1355931
- }
- },
- "4c7a6185b4e549ae82bbd69c5951b89d": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_ded495ef523c49779063258bcec83c4e",
- "placeholder": "",
- "style": "IPY_MODEL_9daae647890b4527bf66caccc15afff8",
- "value": " 239/239 [00:00<00:00, 11.6kB/s]"
- }
- },
- "4dd732d7405d4c8bac2b1d4297ec8088": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "528de7c76ae84ccfb4614faddf133cfb": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "595ee009a3604de7a1d1c12e127b8f85": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "59b60b985b14488f817d2693785d966f": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "624dde2b14de4f87b969772e7792666c": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "74aa18fd09fb43f099973ef2c77f1fea": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "74cda4b89a124b009c187cb98a04899d": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "7d587ac5d3ee4a89a99bc5c0b8044669": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "ProgressStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "7d9fe46aa745480e9947350f443be964": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "82cac82421da40a4bb21aacad13aef90": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "84fd9d1f1821408f9adbbba4d3b1ff04": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "ProgressStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "8572225a06a14ca8965a8039b2298070": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "885a765e32834db28e6a6aa47a853a8f": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "ProgressStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "8caea9c1009646e9839e9e410f1006b8": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "985ccb56f8df409d97836c83c7f57e44": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_0993a78aca3348468b8615d096466b80",
- "placeholder": "",
- "style": "IPY_MODEL_33f7e429f91b4e359fd05bbef32e5a46",
- "value": "pytorch_model.bin: 100%"
- }
- },
- "9b31a5e52daf472b8900efa4b0c396b1": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "FloatProgressModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_b006c4b472c341ab9fe9781a79c78c0a",
- "max": 293,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_12eee2449390429192df0e0394598062",
- "value": 293
- }
- },
- "9ba0de8569964cb6a092f0359711d28e": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "9daae647890b4527bf66caccc15afff8": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "a4d075edad9243b5af25945e727e011f": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "a4fc8b5fe2a643e384545787ac7f0f98": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HBoxModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_e542527c11944d088846505d08c52806",
- "IPY_MODEL_9b31a5e52daf472b8900efa4b0c396b1",
- "IPY_MODEL_e0a9517ffb70428cba4d8b8749603444"
- ],
- "layout": "IPY_MODEL_af0ef342d5b14b86b18bfe3dba1c6b9f"
- }
- },
- "a56652b29f014151ad02630853888abe": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "ac0bec7637084a0e8e51231de626f69e": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_2b8a0ac51adf4cd9b94deb879084696f",
- "placeholder": "",
- "style": "IPY_MODEL_7d9fe46aa745480e9947350f443be964",
- "value": " 1.36M/1.36M [00:00<00:00, 26.4MB/s]"
- }
- },
- "af0ef342d5b14b86b18bfe3dba1c6b9f": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "afe9cc266e03429b84d094ab1cb29a97": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "b006c4b472c341ab9fe9781a79c78c0a": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "b0491ab858894635bcaae3b307db5d71": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "b7201dc4f9584e1c97488425a766c4c6": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "b8ed253331fe4d4e9b7a10dd282ea172": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "c1ac31ed4ded444586913047df105d63": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_dcd04fa06ba744b697e172fa8365b009",
- "placeholder": "",
- "style": "IPY_MODEL_a4d075edad9243b5af25945e727e011f",
- "value": "(…)nll2003-3-class/resolve/main/config.json: 100%"
- }
- },
- "c33367067b5c41529e4cb8301bb4631b": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_9ba0de8569964cb6a092f0359711d28e",
- "placeholder": "",
- "style": "IPY_MODEL_4dd732d7405d4c8bac2b1d4297ec8088",
- "value": "(…)onll2003-3-class/resolve/main/merges.txt: 100%"
- }
- },
- "c3a63933131d4d1788b3a552a3c16a8e": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "c56e4e6111074a75820abdce355adb39": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "FloatProgressModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_8572225a06a14ca8965a8039b2298070",
- "max": 239,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_29e72b30d40644b4afdd19b137f8952d",
- "value": 239
- }
- },
- "ca78d9dbdc854ad7b45d8e0de3cf2d9f": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "d72f34d844b542b0a4e1ec0264880cab": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "da56089370b6403fa52b9787b84ad86d": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "dcd04fa06ba744b697e172fa8365b009": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "ded495ef523c49779063258bcec83c4e": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "e0a9517ffb70428cba4d8b8749603444": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_ca78d9dbdc854ad7b45d8e0de3cf2d9f",
- "placeholder": "",
- "style": "IPY_MODEL_ec26975de7f3493795c3cdf5a471a59d",
- "value": " 293/293 [00:00<00:00, 15.7kB/s]"
- }
- },
- "e542527c11944d088846505d08c52806": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_8caea9c1009646e9839e9e410f1006b8",
- "placeholder": "",
- "style": "IPY_MODEL_afe9cc266e03429b84d094ab1cb29a97",
- "value": "(…)class/resolve/main/tokenizer_config.json: 100%"
- }
- },
- "ec26975de7f3493795c3cdf5a471a59d": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "ef98c16f3cf84728b593cc1be081c9b2": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HBoxModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_985ccb56f8df409d97836c83c7f57e44",
- "IPY_MODEL_050dbc230ffa47e1a8b293f622b4ea57",
- "IPY_MODEL_1898befd7f36447ea5194e2c68d00c31"
- ],
- "layout": "IPY_MODEL_a56652b29f014151ad02630853888abe"
- }
- },
- "f3774c5dc43b44d69c256f259cb22a8c": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HBoxModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_ff652925a7bd40bc9939be17cacd818e",
- "IPY_MODEL_c56e4e6111074a75820abdce355adb39",
- "IPY_MODEL_4c7a6185b4e549ae82bbd69c5951b89d"
- ],
- "layout": "IPY_MODEL_ff0eedc4f66f4fc9894f22a83245c55f"
- }
- },
- "f4f066292c894698a145d97645ef0852": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_c3a63933131d4d1788b3a552a3c16a8e",
- "placeholder": "",
- "style": "IPY_MODEL_74aa18fd09fb43f099973ef2c77f1fea",
- "value": " 456k/456k [00:00<00:00, 6.49MB/s]"
- }
- },
- "f56039a6fb3f4dc7913ea06536e476c3": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "FloatProgressModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_b0491ab858894635bcaae3b307db5d71",
- "max": 456356,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_885a765e32834db28e6a6aa47a853a8f",
- "value": 456356
- }
- },
- "f582ccbb662e4a9bbf3699c7f69a56d5": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "DescriptionStyleModel",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "f6daf5c19e5e48e193af8b70e61bc1a3": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "f86b586715894e38abc7196a303f54e7": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "fb53f3bf55664c4e9aa685809d9b550f": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "ff0eedc4f66f4fc9894f22a83245c55f": {
- "model_module": "@jupyter-widgets/base",
- "model_module_version": "1.2.0",
- "model_name": "LayoutModel",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "ff652925a7bd40bc9939be17cacd818e": {
- "model_module": "@jupyter-widgets/controls",
- "model_module_version": "1.5.0",
- "model_name": "HTMLModel",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_82cac82421da40a4bb21aacad13aef90",
- "placeholder": "",
- "style": "IPY_MODEL_2094ab4f61fc4dbbb2a45b8cb10d696f",
- "value": "(…)ass/resolve/main/special_tokens_map.json: 100%"
- }
- }
- }
+ "f3774c5dc43b44d69c256f259cb22a8c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_ff652925a7bd40bc9939be17cacd818e",
+ "IPY_MODEL_c56e4e6111074a75820abdce355adb39",
+ "IPY_MODEL_4c7a6185b4e549ae82bbd69c5951b89d"
+ ],
+ "layout": "IPY_MODEL_ff0eedc4f66f4fc9894f22a83245c55f"
+ }
+ },
+ "f4f066292c894698a145d97645ef0852": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_c3a63933131d4d1788b3a552a3c16a8e",
+ "placeholder": "",
+ "style": "IPY_MODEL_74aa18fd09fb43f099973ef2c77f1fea",
+ "value": " 456k/456k [00:00<00:00, 6.49MB/s]"
+ }
+ },
+ "f56039a6fb3f4dc7913ea06536e476c3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b0491ab858894635bcaae3b307db5d71",
+ "max": 456356,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_885a765e32834db28e6a6aa47a853a8f",
+ "value": 456356
+ }
+ },
+ "f582ccbb662e4a9bbf3699c7f69a56d5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "f6daf5c19e5e48e193af8b70e61bc1a3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f86b586715894e38abc7196a303f54e7": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "fb53f3bf55664c4e9aa685809d9b550f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ff0eedc4f66f4fc9894f22a83245c55f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ff652925a7bd40bc9939be17cacd818e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_82cac82421da40a4bb21aacad13aef90",
+ "placeholder": "",
+ "style": "IPY_MODEL_2094ab4f61fc4dbbb2a45b8cb10d696f",
+ "value": "(…)ass/resolve/main/special_tokens_map.json: 100%"
+ }
}
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
\ No newline at end of file
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_T5.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_T5.ipynb
index 8a7518f56631df..f79307a780625f 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_T5.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_T5.ipynb
@@ -67,7 +67,7 @@
}
],
"source": [
- "!pip install -q --upgrade transformers[onnx]==4.35.2 optimum sentencepiece"
+ "!pip install -q --upgrade transformers[onnx]==4.35.2 optimum sentencepiece onnx==1.14.0"
]
},
{
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_Whisper.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_Whisper.ipynb
index 262cad47cf6a91..e1d5f5596c944d 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_Whisper.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_Whisper.ipynb
@@ -1,553 +1,710 @@
{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
- "\n",
- "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_Whisper.ipynb)\n",
- "\n",
- "# Import ONNX Whisper models from HuggingFace 🤗 into Spark NLP 🚀\n",
- "\n",
- "Let's keep in mind a few things before we start 😊\n",
- "\n",
- "- ONNX support was introduced in `Spark NLP 5.0.0`, enabling high performance inference for models. Please make sure you have upgraded to the latest Spark NLP release.\n",
- "- The Whisper model was introduced in `Spark NLP 5.1.0 and requires Spark version 3.4.1 and up.`\n",
- "- Official models are supported, but not all custom models may work."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Export and Save HuggingFace model"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.31.0`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
+ "cells": [
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.4/7.4 MB\u001b[0m \u001b[31m18.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m364.2/364.2 kB\u001b[0m \u001b[31m25.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m21.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m32.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m33.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.8/83.8 kB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m451.2/451.2 kB\u001b[0m \u001b[31m17.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m28.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m519.3/519.3 kB\u001b[0m \u001b[31m21.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m39.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m32.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m16.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m10.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h"
- ]
- }
- ],
- "source": [
- "!pip install -q --upgrade \"transformers[onnx]==4.31.0\" optimum tensorflow"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
- "- We'll use the [whisper-tiny](https://huggingface.co/openai/whisper-tiny) model from HuggingFace as an example and export it with the `optimum-cli`."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "MODEL_NAME = \"openai/whisper-tiny\"\n",
- "EXPORT_PATH = f\"export_onnx/{MODEL_NAME}\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "hEdJynTH3L0x"
+ },
+ "source": [
+ "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
+ "\n",
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_Whisper.ipynb)\n",
+ "\n",
+ "# Import ONNX Whisper models from HuggingFace 🤗 into Spark NLP 🚀\n",
+ "\n",
+ "Let's keep in mind a few things before we start 😊\n",
+ "\n",
+ "- ONNX support was introduced in `Spark NLP 5.0.0`, enabling high performance inference for models. Please make sure you have upgraded to the latest Spark NLP release.\n",
+ "- The Whisper model was introduced in `Spark NLP 5.1.0 and requires Spark version 3.4.1 and up.`\n",
+ "- Official models are supported, but not all custom models may work."
+ ]
+ },
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "2023-08-14 13:53:19.500633: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
- "Framework not specified. Using pt to export to ONNX.\n",
- "Downloading (…)lve/main/config.json: 100% 1.98k/1.98k [00:00<00:00, 3.50MB/s]\n",
- "Downloading model.safetensors: 100% 151M/151M [00:05<00:00, 28.6MB/s]\n",
- "Downloading (…)neration_config.json: 100% 3.72k/3.72k [00:00<00:00, 17.7MB/s]\n",
- "Automatic task detection to automatic-speech-recognition-with-past (possible synonyms are: speech2seq-lm-with-past).\n",
- "Downloading (…)okenizer_config.json: 100% 841/841 [00:00<00:00, 2.66MB/s]\n",
- "Downloading (…)olve/main/vocab.json: 100% 1.04M/1.04M [00:00<00:00, 10.8MB/s]\n",
- "Downloading (…)/main/tokenizer.json: 100% 2.20M/2.20M [00:00<00:00, 25.4MB/s]\n",
- "Downloading (…)olve/main/merges.txt: 100% 494k/494k [00:00<00:00, 18.1MB/s]\n",
- "Downloading (…)main/normalizer.json: 100% 52.7k/52.7k [00:00<00:00, 73.6MB/s]\n",
- "Downloading (…)in/added_tokens.json: 100% 2.08k/2.08k [00:00<00:00, 7.43MB/s]\n",
- "Downloading (…)cial_tokens_map.json: 100% 2.08k/2.08k [00:00<00:00, 7.93MB/s]\n",
- "Downloading (…)rocessor_config.json: 100% 185k/185k [00:00<00:00, 7.36MB/s]\n",
- "Using framework PyTorch: 2.0.1+cu118\n",
- "Overriding 1 configuration item(s)\n",
- "\t- use_cache -> False\n",
- "/usr/local/lib/python3.10/dist-packages/transformers/models/whisper/modeling_whisper.py:410: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
- " if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):\n",
- "/usr/local/lib/python3.10/dist-packages/transformers/models/whisper/modeling_whisper.py:449: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
- " if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):\n",
- "============= Diagnostic Run torch.onnx.export version 2.0.1+cu118 =============\n",
- "verbose: False, log level: Level.ERROR\n",
- "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
- "\n",
- "Using framework PyTorch: 2.0.1+cu118\n",
- "Overriding 1 configuration item(s)\n",
- "\t- use_cache -> True\n",
- "/usr/local/lib/python3.10/dist-packages/transformers/models/whisper/modeling_whisper.py:1004: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
- " if input_shape[-1] > 1:\n",
- "/usr/local/lib/python3.10/dist-packages/transformers/models/whisper/modeling_whisper.py:417: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
- " if attention_mask.size() != (bsz, 1, tgt_len, src_len):\n",
- "============= Diagnostic Run torch.onnx.export version 2.0.1+cu118 =============\n",
- "verbose: False, log level: Level.ERROR\n",
- "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
- "\n",
- "Using framework PyTorch: 2.0.1+cu118\n",
- "Overriding 1 configuration item(s)\n",
- "\t- use_cache -> True\n",
- "Asked a sequence length of 16, but a sequence length of 1 will be used with use_past == True for `decoder_input_ids`.\n",
- "/usr/local/lib/python3.10/dist-packages/transformers/models/whisper/modeling_whisper.py:372: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
- " and past_key_value[0].shape[2] == key_value_states.shape[1]\n",
- "============= Diagnostic Run torch.onnx.export version 2.0.1+cu118 =============\n",
- "verbose: False, log level: Level.ERROR\n",
- "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
- "\n",
- "Post-processing the exported models...\n",
- "The two models proto have different outputs (17 and 9 outputs). Constant outputs will be added to unify the two models outputs.\n",
- "Addind a constant output for present.0.encoder.key of shape [0, 6, 1, 64] in model2.\n",
- "Addind a constant output for present.0.encoder.value of shape [0, 6, 1, 64] in model2.\n",
- "Addind a constant output for present.1.encoder.key of shape [0, 6, 1, 64] in model2.\n",
- "Addind a constant output for present.1.encoder.value of shape [0, 6, 1, 64] in model2.\n",
- "Addind a constant output for present.2.encoder.key of shape [0, 6, 1, 64] in model2.\n",
- "Addind a constant output for present.2.encoder.value of shape [0, 6, 1, 64] in model2.\n",
- "Addind a constant output for present.3.encoder.key of shape [0, 6, 1, 64] in model2.\n",
- "Addind a constant output for present.3.encoder.value of shape [0, 6, 1, 64] in model2.\n",
- "Validating models in subprocesses...\n",
- "2023-08-14 13:53:53.825862: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
- "Validating ONNX model export_onnx/openai/whisper-tiny/encoder_model.onnx...\n",
- "\t-[✓] ONNX model output names match reference model (last_hidden_state)\n",
- "\t- Validating ONNX Model output \"last_hidden_state\":\n",
- "\t\t-[✓] (2, 1500, 384) matches (2, 1500, 384)\n",
- "\t\t-[✓] all values close (atol: 0.001)\n",
- "2023-08-14 13:54:09.277640: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
- "Validating ONNX model export_onnx/openai/whisper-tiny/decoder_model_merged.onnx...\n",
- "2023-08-14 13:54:11.873629973 [W:onnxruntime:, graph.cc:3543 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Shape_4_output_0'. It is not used by any node and should be removed from the model.\n",
- "2023-08-14 13:54:11.873793574 [W:onnxruntime:, graph.cc:3543 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_16_output_0'. It is not used by any node and should be removed from the model.\n",
- "2023-08-14 13:54:11.892860734 [W:onnxruntime:, graph.cc:3543 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_output_0'. It is not used by any node and should be removed from the model.\n",
- "2023-08-14 13:54:11.892952656 [W:onnxruntime:, graph.cc:3543 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_1_output_0'. It is not used by any node and should be removed from the model.\n",
- "2023-08-14 13:54:11.893552082 [W:onnxruntime:, graph.cc:3543 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_10_output_0'. It is not used by any node and should be removed from the model.\n",
- "2023-08-14 13:54:11.893587646 [W:onnxruntime:, graph.cc:3543 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_2_output_0'. It is not used by any node and should be removed from the model.\n",
- "2023-08-14 13:54:11.893601631 [W:onnxruntime:, graph.cc:3543 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_12_output_0'. It is not used by any node and should be removed from the model.\n",
- "2023-08-14 13:54:11.893631102 [W:onnxruntime:, graph.cc:3543 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_9_output_0'. It is not used by any node and should be removed from the model.\n",
- "2023-08-14 13:54:11.893696170 [W:onnxruntime:, graph.cc:3543 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_11_output_0'. It is not used by any node and should be removed from the model.\n",
- "\t-[✓] ONNX model output names match reference model (present.1.decoder.key, present.2.encoder.value, present.3.encoder.key, present.0.decoder.value, present.1.decoder.value, present.0.encoder.key, present.2.decoder.key, present.3.decoder.value, present.2.encoder.key, present.2.decoder.value, present.1.encoder.key, present.3.encoder.value, present.1.encoder.value, present.0.decoder.key, present.3.decoder.key, logits, present.0.encoder.value)\n",
- "\t- Validating ONNX Model output \"logits\":\n",
- "\t\t-[✓] (2, 16, 51865) matches (2, 16, 51865)\n",
- "\t\t-[✓] all values close (atol: 0.001)\n",
- "\t- Validating ONNX Model output \"present.0.decoder.key\":\n",
- "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
- "\t\t-[✓] all values close (atol: 0.001)\n",
- "\t- Validating ONNX Model output \"present.0.decoder.value\":\n",
- "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
- "\t\t-[✓] all values close (atol: 0.001)\n",
- "\t- Validating ONNX Model output \"present.0.encoder.key\":\n",
- "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
- "\t\t-[✓] all values close (atol: 0.001)\n",
- "\t- Validating ONNX Model output \"present.0.encoder.value\":\n",
- "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
- "\t\t-[✓] all values close (atol: 0.001)\n",
- "\t- Validating ONNX Model output \"present.1.decoder.key\":\n",
- "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
- "\t\t-[✓] all values close (atol: 0.001)\n",
- "\t- Validating ONNX Model output \"present.1.decoder.value\":\n",
- "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
- "\t\t-[✓] all values close (atol: 0.001)\n",
- "\t- Validating ONNX Model output \"present.1.encoder.key\":\n",
- "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
- "\t\t-[✓] all values close (atol: 0.001)\n",
- "\t- Validating ONNX Model output \"present.1.encoder.value\":\n",
- "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
- "\t\t-[✓] all values close (atol: 0.001)\n",
- "\t- Validating ONNX Model output \"present.2.decoder.key\":\n",
- "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
- "\t\t-[✓] all values close (atol: 0.001)\n",
- "\t- Validating ONNX Model output \"present.2.decoder.value\":\n",
- "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
- "\t\t-[✓] all values close (atol: 0.001)\n",
- "\t- Validating ONNX Model output \"present.2.encoder.key\":\n",
- "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
- "\t\t-[✓] all values close (atol: 0.001)\n",
- "\t- Validating ONNX Model output \"present.2.encoder.value\":\n",
- "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
- "\t\t-[✓] all values close (atol: 0.001)\n",
- "\t- Validating ONNX Model output \"present.3.decoder.key\":\n",
- "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
- "\t\t-[✓] all values close (atol: 0.001)\n",
- "\t- Validating ONNX Model output \"present.3.decoder.value\":\n",
- "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
- "\t\t-[✓] all values close (atol: 0.001)\n",
- "\t- Validating ONNX Model output \"present.3.encoder.key\":\n",
- "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
- "\t\t-[✓] all values close (atol: 0.001)\n",
- "\t- Validating ONNX Model output \"present.3.encoder.value\":\n",
- "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
- "\t\t-[✓] all values close (atol: 0.001)\n",
- "2023-08-14 13:54:20.179734: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
- "Validating ONNX model export_onnx/openai/whisper-tiny/decoder_model_merged.onnx...\n",
- "Asked a sequence length of 16, but a sequence length of 1 will be used with use_past == True for `decoder_input_ids`.\n",
- "2023-08-14 13:54:23.118265457 [W:onnxruntime:, graph.cc:3543 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Shape_4_output_0'. It is not used by any node and should be removed from the model.\n",
- "2023-08-14 13:54:23.118402025 [W:onnxruntime:, graph.cc:3543 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_16_output_0'. It is not used by any node and should be removed from the model.\n",
- "2023-08-14 13:54:23.134562875 [W:onnxruntime:, graph.cc:3543 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_output_0'. It is not used by any node and should be removed from the model.\n",
- "2023-08-14 13:54:23.134629569 [W:onnxruntime:, graph.cc:3543 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_1_output_0'. It is not used by any node and should be removed from the model.\n",
- "2023-08-14 13:54:23.135051085 [W:onnxruntime:, graph.cc:3543 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_10_output_0'. It is not used by any node and should be removed from the model.\n",
- "2023-08-14 13:54:23.135074933 [W:onnxruntime:, graph.cc:3543 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_2_output_0'. It is not used by any node and should be removed from the model.\n",
- "2023-08-14 13:54:23.135088187 [W:onnxruntime:, graph.cc:3543 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_12_output_0'. It is not used by any node and should be removed from the model.\n",
- "2023-08-14 13:54:23.135109430 [W:onnxruntime:, graph.cc:3543 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_9_output_0'. It is not used by any node and should be removed from the model.\n",
- "2023-08-14 13:54:23.135158578 [W:onnxruntime:, graph.cc:3543 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_11_output_0'. It is not used by any node and should be removed from the model.\n",
- "\t-[✓] ONNX model output names match reference model (present.0.decoder.key, present.3.decoder.key, present.1.decoder.key, present.1.decoder.value, logits, present.3.decoder.value, present.2.decoder.value, present.2.decoder.key, present.0.decoder.value)\n",
- "\t- Validating ONNX Model output \"logits\":\n",
- "\t\t-[✓] (2, 1, 51865) matches (2, 1, 51865)\n",
- "\t\t-[✓] all values close (atol: 0.001)\n",
- "\t- Validating ONNX Model output \"present.0.decoder.key\":\n",
- "\t\t-[✓] (2, 6, 17, 64) matches (2, 6, 17, 64)\n",
- "\t\t-[✓] all values close (atol: 0.001)\n",
- "\t- Validating ONNX Model output \"present.0.decoder.value\":\n",
- "\t\t-[✓] (2, 6, 17, 64) matches (2, 6, 17, 64)\n",
- "\t\t-[✓] all values close (atol: 0.001)\n",
- "\t- Validating ONNX Model output \"present.1.decoder.key\":\n",
- "\t\t-[✓] (2, 6, 17, 64) matches (2, 6, 17, 64)\n",
- "\t\t-[✓] all values close (atol: 0.001)\n",
- "\t- Validating ONNX Model output \"present.1.decoder.value\":\n",
- "\t\t-[✓] (2, 6, 17, 64) matches (2, 6, 17, 64)\n",
- "\t\t-[✓] all values close (atol: 0.001)\n",
- "\t- Validating ONNX Model output \"present.2.decoder.key\":\n",
- "\t\t-[✓] (2, 6, 17, 64) matches (2, 6, 17, 64)\n",
- "\t\t-[✓] all values close (atol: 0.001)\n",
- "\t- Validating ONNX Model output \"present.2.decoder.value\":\n",
- "\t\t-[✓] (2, 6, 17, 64) matches (2, 6, 17, 64)\n",
- "\t\t-[✓] all values close (atol: 0.001)\n",
- "\t- Validating ONNX Model output \"present.3.decoder.key\":\n",
- "\t\t-[✓] (2, 6, 17, 64) matches (2, 6, 17, 64)\n",
- "\t\t-[✓] all values close (atol: 0.001)\n",
- "\t- Validating ONNX Model output \"present.3.decoder.value\":\n",
- "\t\t-[✓] (2, 6, 17, 64) matches (2, 6, 17, 64)\n",
- "\t\t-[✓] all values close (atol: 0.001)\n",
- "The ONNX export succeeded and the exported model was saved at: export_onnx/openai/whisper-tiny\n"
- ]
- }
- ],
- "source": [
- "! optimum-cli export onnx --model {MODEL_NAME} {EXPORT_PATH}"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "We have to move additional model assets into a seperate folder, so that Spark NLP can load it properly."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "! mkdir -p {EXPORT_PATH}/assets\n",
- "! mv -t {EXPORT_PATH}/assets {EXPORT_PATH}/*.json {EXPORT_PATH}/*.txt"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Let's have a look inside these two directories and see what we are dealing with:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "DfiBPTV83L0y"
+ },
+ "source": [
+ "## Export and Save HuggingFace model"
+ ]
+ },
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "total 607868\n",
- "drwxr-xr-x 2 root root 4096 Aug 14 13:55 assets\n",
- "-rw-r--r-- 1 root root 198197526 Aug 14 13:53 decoder_model_merged.onnx\n",
- "-rw-r--r-- 1 root root 198049530 Aug 14 13:53 decoder_model.onnx\n",
- "-rw-r--r-- 1 root root 193295315 Aug 14 13:53 decoder_with_past_model.onnx\n",
- "-rw-r--r-- 1 root root 32900723 Aug 14 13:53 encoder_model.onnx\n"
- ]
- }
- ],
- "source": [
- "!ls -l {EXPORT_PATH}"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "IhUUhv8h3L0z"
+ },
+ "source": [
+ "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
+ "- We lock `transformers` on version `4.31.0`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."
+ ]
+ },
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "total 3728\n",
- "-rw-r--r-- 1 root root 2082 Aug 14 13:53 added_tokens.json\n",
- "-rw-r--r-- 1 root root 2243 Aug 14 13:53 config.json\n",
- "-rw-r--r-- 1 root root 3711 Aug 14 13:53 generation_config.json\n",
- "-rw-r--r-- 1 root root 493864 Aug 14 13:53 merges.txt\n",
- "-rw-r--r-- 1 root root 52666 Aug 14 13:53 normalizer.json\n",
- "-rw-r--r-- 1 root root 339 Aug 14 13:53 preprocessor_config.json\n",
- "-rw-r--r-- 1 root root 2077 Aug 14 13:53 special_tokens_map.json\n",
- "-rw-r--r-- 1 root root 835 Aug 14 13:53 tokenizer_config.json\n",
- "-rw-r--r-- 1 root root 2203267 Aug 14 13:53 tokenizer.json\n",
- "-rw-r--r-- 1 root root 1036584 Aug 14 13:53 vocab.json\n"
- ]
- }
- ],
- "source": [
- "!ls -l {EXPORT_PATH}/assets"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Import and Save Whisper in Spark NLP\n",
- "\n",
- "- Let's install and setup Spark NLP in Google Colab\n",
- "- This part is pretty easy via our simple script\n",
- "- Additionally, we need to upgrade Spark to version 3.4.1."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash\n",
- "! pip install -U pyspark==3.4.1"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Let's start Spark with Spark NLP included via our simple `start()` function"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "import sparknlp\n",
- "\n",
- "# let's start Spark with Spark NLP\n",
- "spark = sparknlp.start()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "- Let's use `loadSavedModel` functon in `WhisperForCTC` which allows us to load the ONNX model\n",
- "- Most params will be set automatically. They can also be set later after loading the model in `WhisperForCTC` during runtime, so don't worry about setting them now\n",
- "- `loadSavedModel` accepts two params, first is the path to the exported model. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n",
- "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from sparknlp.annotator import *\n",
- "\n",
- "# All these params should be identical to the original ONNX model\n",
- "whisper = (\n",
- " WhisperForCTC.loadSavedModel(f\"{EXPORT_PATH}\", spark)\n",
- " .setInputCols(\"audio_assembler\")\n",
- " .setOutputCol(\"text\")\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "whisper.write().overwrite().save(f\"{MODEL_NAME}_spark_nlp\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Let's clean up stuff we don't need anymore"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "!rm -rf {EXPORT_PATH}"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Awesome 😎 !\n",
- "\n",
- "This is your ONNX Whisper model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
+ "cell_type": "markdown",
+ "source": [],
+ "metadata": {
+ "id": "sUIG8ym_3ZdY"
+ }
+ },
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "total 414388\n",
- "-rw-r--r-- 1 root root 198079914 Aug 14 14:03 decoder_model_whisper_ctc\n",
- "-rw-r--r-- 1 root root 193324994 Aug 14 14:03 decoder_with_past_model_whisper_ctc\n",
- "-rw-r--r-- 1 root root 32905912 Aug 14 14:03 encoder_model_whisper_ctc\n",
- "drwxr-xr-x 6 root root 4096 Aug 14 14:03 fields\n",
- "drwxr-xr-x 2 root root 4096 Aug 14 14:03 metadata\n"
- ]
- }
- ],
- "source": [
- "! ls -l {MODEL_NAME}_spark_nlp"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny Whisper model 😊"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "id": "yy9Ig4tY3L0z",
+ "outputId": "648a6286-3ad1-4656-f8fa-06d156549d41",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/7.4 MB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━\u001b[0m\u001b[90m╺\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.3/7.4 MB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[90m╺\u001b[0m\u001b[90m━━━━━━━━━━━\u001b[0m \u001b[32m5.2/7.4 MB\u001b[0m \u001b[31m75.2 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m \u001b[32m7.4/7.4 MB\u001b[0m \u001b[31m86.5 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.4/7.4 MB\u001b[0m \u001b[31m56.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m410.0/410.0 kB\u001b[0m \u001b[31m35.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m589.8/589.8 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m55.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m91.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m11.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.8/455.8 kB\u001b[0m \u001b[31m37.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m105.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m24.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m510.5/510.5 kB\u001b[0m \u001b[31m45.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.3/5.3 MB\u001b[0m \u001b[31m78.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m79.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m98.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m66.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m311.2/311.2 kB\u001b[0m \u001b[31m34.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m14.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m23.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m17.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.8/83.8 kB\u001b[0m \u001b[31m11.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+ "tf-keras 2.15.1 requires tensorflow<2.16,>=2.15, but you have tensorflow 2.16.1 which is incompatible.\u001b[0m\u001b[31m\n",
+ "\u001b[0m"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install -q --upgrade \"transformers[onnx]==4.31.0\" optimum tensorflow onnx==1.14.0"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "l_WSgW9w3L00"
+ },
+ "source": [
+ "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
+ "- We'll use the [whisper-tiny](https://huggingface.co/openai/whisper-tiny) model from HuggingFace as an example and export it with the `optimum-cli`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "id": "Ar3GeeF43L00"
+ },
+ "outputs": [],
+ "source": [
+ "MODEL_NAME = \"openai/whisper-tiny\"\n",
+ "EXPORT_PATH = f\"export_onnx/{MODEL_NAME}\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "id": "1F7dqTBe3L01",
+ "outputId": "9a9d903b-a829-4d9d-d2ed-48948e476e73",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "2024-04-12 10:35:18.194732: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
+ "Framework not specified. Using pt to export the model.\n",
+ "config.json: 100% 1.98k/1.98k [00:00<00:00, 7.28MB/s]\n",
+ "model.safetensors: 100% 151M/151M [00:00<00:00, 311MB/s]\n",
+ "generation_config.json: 100% 3.75k/3.75k [00:00<00:00, 17.5MB/s]\n",
+ "Automatic task detection to automatic-speech-recognition-with-past (possible synonyms are: speech2seq-lm-with-past).\n",
+ "tokenizer_config.json: 100% 283k/283k [00:00<00:00, 30.7MB/s]\n",
+ "vocab.json: 100% 836k/836k [00:00<00:00, 20.0MB/s]\n",
+ "tokenizer.json: 100% 2.48M/2.48M [00:00<00:00, 5.49MB/s]\n",
+ "merges.txt: 100% 494k/494k [00:00<00:00, 22.1MB/s]\n",
+ "normalizer.json: 100% 52.7k/52.7k [00:00<00:00, 115MB/s]\n",
+ "added_tokens.json: 100% 34.6k/34.6k [00:00<00:00, 79.4MB/s]\n",
+ "special_tokens_map.json: 100% 2.19k/2.19k [00:00<00:00, 8.36MB/s]\n",
+ "preprocessor_config.json: 100% 185k/185k [00:00<00:00, 41.6MB/s]\n",
+ "Using the export variant default. Available variants are:\n",
+ " - default: The default ONNX variant.\n",
+ "Using framework PyTorch: 2.2.1+cu121\n",
+ "Overriding 1 configuration item(s)\n",
+ "\t- use_cache -> False\n",
+ "/usr/local/lib/python3.10/dist-packages/transformers/models/whisper/modeling_whisper.py:410: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
+ " if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):\n",
+ "/usr/local/lib/python3.10/dist-packages/transformers/models/whisper/modeling_whisper.py:449: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
+ " if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):\n",
+ "Using framework PyTorch: 2.2.1+cu121\n",
+ "Overriding 1 configuration item(s)\n",
+ "\t- use_cache -> True\n",
+ "/usr/local/lib/python3.10/dist-packages/transformers/models/whisper/modeling_whisper.py:1004: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
+ " if input_shape[-1] > 1:\n",
+ "/usr/local/lib/python3.10/dist-packages/transformers/models/whisper/modeling_whisper.py:417: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
+ " if attention_mask.size() != (bsz, 1, tgt_len, src_len):\n",
+ "Using framework PyTorch: 2.2.1+cu121\n",
+ "Overriding 1 configuration item(s)\n",
+ "\t- use_cache -> True\n",
+ "/usr/local/lib/python3.10/dist-packages/transformers/models/whisper/modeling_whisper.py:372: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
+ " and past_key_value[0].shape[2] == key_value_states.shape[1]\n",
+ "Post-processing the exported models...\n",
+ "Weight deduplication check in the ONNX export requires accelerate. Please install accelerate to run it.\n",
+ "The two models proto have different outputs (17 and 9 outputs). Constant outputs will be added to unify the two models outputs.\n",
+ "Adding a constant output for present.0.encoder.key of shape [0, 6, 1, 64] in model2.\n",
+ "Adding a constant output for present.0.encoder.value of shape [0, 6, 1, 64] in model2.\n",
+ "Adding a constant output for present.1.encoder.key of shape [0, 6, 1, 64] in model2.\n",
+ "Adding a constant output for present.1.encoder.value of shape [0, 6, 1, 64] in model2.\n",
+ "Adding a constant output for present.2.encoder.key of shape [0, 6, 1, 64] in model2.\n",
+ "Adding a constant output for present.2.encoder.value of shape [0, 6, 1, 64] in model2.\n",
+ "Adding a constant output for present.3.encoder.key of shape [0, 6, 1, 64] in model2.\n",
+ "Adding a constant output for present.3.encoder.value of shape [0, 6, 1, 64] in model2.\n",
+ "Validating ONNX model export_onnx/openai/whisper-tiny/encoder_model.onnx...\n",
+ "\t-[✓] ONNX model output names match reference model (last_hidden_state)\n",
+ "\t- Validating ONNX Model output \"last_hidden_state\":\n",
+ "\t\t-[✓] (2, 1500, 384) matches (2, 1500, 384)\n",
+ "\t\t-[✓] all values close (atol: 0.001)\n",
+ "Validating ONNX model export_onnx/openai/whisper-tiny/decoder_model_merged.onnx...\n",
+ "\u001b[0;93m2024-04-12 10:35:46.461625726 [W:onnxruntime:, graph.cc:3593 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Shape_4_output_0'. It is not used by any node and should be removed from the model.\u001b[m\n",
+ "\u001b[0;93m2024-04-12 10:35:46.461718257 [W:onnxruntime:, graph.cc:3593 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_23_output_0'. It is not used by any node and should be removed from the model.\u001b[m\n",
+ "\u001b[0;93m2024-04-12 10:35:46.478143231 [W:onnxruntime:, graph.cc:3593 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_1_output_0'. It is not used by any node and should be removed from the model.\u001b[m\n",
+ "\u001b[0;93m2024-04-12 10:35:46.478201698 [W:onnxruntime:, graph.cc:3593 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_output_0'. It is not used by any node and should be removed from the model.\u001b[m\n",
+ "\u001b[0;93m2024-04-12 10:35:46.478215371 [W:onnxruntime:, graph.cc:3593 CleanUnusedInitializersAndNodeArgs] Removing initializer 'onnx::Unsqueeze_192'. It is not used by any node and should be removed from the model.\u001b[m\n",
+ "\u001b[0;93m2024-04-12 10:35:46.478645008 [W:onnxruntime:, graph.cc:3593 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_11_output_0'. It is not used by any node and should be removed from the model.\u001b[m\n",
+ "\u001b[0;93m2024-04-12 10:35:46.478663150 [W:onnxruntime:, graph.cc:3593 CleanUnusedInitializersAndNodeArgs] Removing initializer 'onnx::Unsqueeze_211'. It is not used by any node and should be removed from the model.\u001b[m\n",
+ "\u001b[0;93m2024-04-12 10:35:46.478686496 [W:onnxruntime:, graph.cc:3593 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_12_output_0'. It is not used by any node and should be removed from the model.\u001b[m\n",
+ "\u001b[0;93m2024-04-12 10:35:46.478693914 [W:onnxruntime:, graph.cc:3593 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_14_output_0'. It is not used by any node and should be removed from the model.\u001b[m\n",
+ "\u001b[0;93m2024-04-12 10:35:46.478705872 [W:onnxruntime:, graph.cc:3593 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_2_output_0'. It is not used by any node and should be removed from the model.\u001b[m\n",
+ "\u001b[0;93m2024-04-12 10:35:46.478714456 [W:onnxruntime:, graph.cc:3593 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_13_output_0'. It is not used by any node and should be removed from the model.\u001b[m\n",
+ "\u001b[0;93m2024-04-12 10:35:46.478728444 [W:onnxruntime:, graph.cc:3593 CleanUnusedInitializersAndNodeArgs] Removing initializer 'onnx::Unsqueeze_180'. It is not used by any node and should be removed from the model.\u001b[m\n",
+ "\u001b[0;93m2024-04-12 10:35:46.478740137 [W:onnxruntime:, graph.cc:3593 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_10_output_0'. It is not used by any node and should be removed from the model.\u001b[m\n",
+ "\t-[✓] ONNX model output names match reference model (present.2.decoder.key, present.3.encoder.key, present.0.encoder.key, present.1.decoder.key, present.2.decoder.value, present.1.encoder.key, present.3.decoder.key, present.0.decoder.key, logits, present.2.encoder.key, present.3.decoder.value, present.2.encoder.value, present.1.encoder.value, present.0.decoder.value, present.1.decoder.value, present.3.encoder.value, present.0.encoder.value)\n",
+ "\t- Validating ONNX Model output \"logits\":\n",
+ "\t\t-[✓] (2, 16, 51865) matches (2, 16, 51865)\n",
+ "\t\t-[✓] all values close (atol: 0.001)\n",
+ "\t- Validating ONNX Model output \"present.0.decoder.key\":\n",
+ "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
+ "\t\t-[✓] all values close (atol: 0.001)\n",
+ "\t- Validating ONNX Model output \"present.0.decoder.value\":\n",
+ "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
+ "\t\t-[✓] all values close (atol: 0.001)\n",
+ "\t- Validating ONNX Model output \"present.0.encoder.key\":\n",
+ "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
+ "\t\t-[✓] all values close (atol: 0.001)\n",
+ "\t- Validating ONNX Model output \"present.0.encoder.value\":\n",
+ "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
+ "\t\t-[✓] all values close (atol: 0.001)\n",
+ "\t- Validating ONNX Model output \"present.1.decoder.key\":\n",
+ "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
+ "\t\t-[✓] all values close (atol: 0.001)\n",
+ "\t- Validating ONNX Model output \"present.1.decoder.value\":\n",
+ "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
+ "\t\t-[✓] all values close (atol: 0.001)\n",
+ "\t- Validating ONNX Model output \"present.1.encoder.key\":\n",
+ "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
+ "\t\t-[✓] all values close (atol: 0.001)\n",
+ "\t- Validating ONNX Model output \"present.1.encoder.value\":\n",
+ "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
+ "\t\t-[✓] all values close (atol: 0.001)\n",
+ "\t- Validating ONNX Model output \"present.2.decoder.key\":\n",
+ "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
+ "\t\t-[✓] all values close (atol: 0.001)\n",
+ "\t- Validating ONNX Model output \"present.2.decoder.value\":\n",
+ "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
+ "\t\t-[✓] all values close (atol: 0.001)\n",
+ "\t- Validating ONNX Model output \"present.2.encoder.key\":\n",
+ "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
+ "\t\t-[✓] all values close (atol: 0.001)\n",
+ "\t- Validating ONNX Model output \"present.2.encoder.value\":\n",
+ "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
+ "\t\t-[✓] all values close (atol: 0.001)\n",
+ "\t- Validating ONNX Model output \"present.3.decoder.key\":\n",
+ "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
+ "\t\t-[✓] all values close (atol: 0.001)\n",
+ "\t- Validating ONNX Model output \"present.3.decoder.value\":\n",
+ "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
+ "\t\t-[✓] all values close (atol: 0.001)\n",
+ "\t- Validating ONNX Model output \"present.3.encoder.key\":\n",
+ "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
+ "\t\t-[✓] all values close (atol: 0.001)\n",
+ "\t- Validating ONNX Model output \"present.3.encoder.value\":\n",
+ "\t\t-[✓] (2, 6, 16, 64) matches (2, 6, 16, 64)\n",
+ "\t\t-[✓] all values close (atol: 0.001)\n",
+ "Validating ONNX model export_onnx/openai/whisper-tiny/decoder_model_merged.onnx...\n",
+ "\u001b[0;93m2024-04-12 10:35:47.393181620 [W:onnxruntime:, graph.cc:3593 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Shape_4_output_0'. It is not used by any node and should be removed from the model.\u001b[m\n",
+ "\u001b[0;93m2024-04-12 10:35:47.393302485 [W:onnxruntime:, graph.cc:3593 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_23_output_0'. It is not used by any node and should be removed from the model.\u001b[m\n",
+ "\u001b[0;93m2024-04-12 10:35:47.411905010 [W:onnxruntime:, graph.cc:3593 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_1_output_0'. It is not used by any node and should be removed from the model.\u001b[m\n",
+ "\u001b[0;93m2024-04-12 10:35:47.411952338 [W:onnxruntime:, graph.cc:3593 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_output_0'. It is not used by any node and should be removed from the model.\u001b[m\n",
+ "\u001b[0;93m2024-04-12 10:35:47.411978486 [W:onnxruntime:, graph.cc:3593 CleanUnusedInitializersAndNodeArgs] Removing initializer 'onnx::Unsqueeze_192'. It is not used by any node and should be removed from the model.\u001b[m\n",
+ "\u001b[0;93m2024-04-12 10:35:47.412527152 [W:onnxruntime:, graph.cc:3593 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_11_output_0'. It is not used by any node and should be removed from the model.\u001b[m\n",
+ "\u001b[0;93m2024-04-12 10:35:47.412551044 [W:onnxruntime:, graph.cc:3593 CleanUnusedInitializersAndNodeArgs] Removing initializer 'onnx::Unsqueeze_211'. It is not used by any node and should be removed from the model.\u001b[m\n",
+ "\u001b[0;93m2024-04-12 10:35:47.412584655 [W:onnxruntime:, graph.cc:3593 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_12_output_0'. It is not used by any node and should be removed from the model.\u001b[m\n",
+ "\u001b[0;93m2024-04-12 10:35:47.412597581 [W:onnxruntime:, graph.cc:3593 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_14_output_0'. It is not used by any node and should be removed from the model.\u001b[m\n",
+ "\u001b[0;93m2024-04-12 10:35:47.412615576 [W:onnxruntime:, graph.cc:3593 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_2_output_0'. It is not used by any node and should be removed from the model.\u001b[m\n",
+ "\u001b[0;93m2024-04-12 10:35:47.412631568 [W:onnxruntime:, graph.cc:3593 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_13_output_0'. It is not used by any node and should be removed from the model.\u001b[m\n",
+ "\u001b[0;93m2024-04-12 10:35:47.412656271 [W:onnxruntime:, graph.cc:3593 CleanUnusedInitializersAndNodeArgs] Removing initializer 'onnx::Unsqueeze_180'. It is not used by any node and should be removed from the model.\u001b[m\n",
+ "\u001b[0;93m2024-04-12 10:35:47.412676295 [W:onnxruntime:, graph.cc:3593 CleanUnusedInitializersAndNodeArgs] Removing initializer '/model/decoder/Constant_10_output_0'. It is not used by any node and should be removed from the model.\u001b[m\n",
+ "\t-[✓] ONNX model output names match reference model (present.2.decoder.key, present.1.decoder.key, present.2.decoder.value, present.3.decoder.key, present.3.decoder.value, present.0.decoder.key, logits, present.0.decoder.value, present.1.decoder.value)\n",
+ "\t- Validating ONNX Model output \"logits\":\n",
+ "\t\t-[✓] (2, 1, 51865) matches (2, 1, 51865)\n",
+ "\t\t-[✓] all values close (atol: 0.001)\n",
+ "\t- Validating ONNX Model output \"present.0.decoder.key\":\n",
+ "\t\t-[✓] (2, 6, 17, 64) matches (2, 6, 17, 64)\n",
+ "\t\t-[✓] all values close (atol: 0.001)\n",
+ "\t- Validating ONNX Model output \"present.0.decoder.value\":\n",
+ "\t\t-[✓] (2, 6, 17, 64) matches (2, 6, 17, 64)\n",
+ "\t\t-[✓] all values close (atol: 0.001)\n",
+ "\t- Validating ONNX Model output \"present.1.decoder.key\":\n",
+ "\t\t-[✓] (2, 6, 17, 64) matches (2, 6, 17, 64)\n",
+ "\t\t-[✓] all values close (atol: 0.001)\n",
+ "\t- Validating ONNX Model output \"present.1.decoder.value\":\n",
+ "\t\t-[✓] (2, 6, 17, 64) matches (2, 6, 17, 64)\n",
+ "\t\t-[✓] all values close (atol: 0.001)\n",
+ "\t- Validating ONNX Model output \"present.2.decoder.key\":\n",
+ "\t\t-[✓] (2, 6, 17, 64) matches (2, 6, 17, 64)\n",
+ "\t\t-[✓] all values close (atol: 0.001)\n",
+ "\t- Validating ONNX Model output \"present.2.decoder.value\":\n",
+ "\t\t-[✓] (2, 6, 17, 64) matches (2, 6, 17, 64)\n",
+ "\t\t-[✓] all values close (atol: 0.001)\n",
+ "\t- Validating ONNX Model output \"present.3.decoder.key\":\n",
+ "\t\t-[✓] (2, 6, 17, 64) matches (2, 6, 17, 64)\n",
+ "\t\t-[✓] all values close (atol: 0.001)\n",
+ "\t- Validating ONNX Model output \"present.3.decoder.value\":\n",
+ "\t\t-[✓] (2, 6, 17, 64) matches (2, 6, 17, 64)\n",
+ "\t\t-[✓] all values close (atol: 0.001)\n",
+ "The ONNX export succeeded and the exported model was saved at: export_onnx/openai/whisper-tiny\n"
+ ]
+ }
+ ],
+ "source": [
+ "! optimum-cli export onnx --model {MODEL_NAME} {EXPORT_PATH}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "_jrTPqhE3L01"
+ },
+ "source": [
+ "We have to move additional model assets into a seperate folder, so that Spark NLP can load it properly."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "id": "CyHyF5Pr3L02"
+ },
+ "outputs": [],
+ "source": [
+ "! mkdir -p {EXPORT_PATH}/assets\n",
+ "! mv -t {EXPORT_PATH}/assets {EXPORT_PATH}/*.json {EXPORT_PATH}/*.txt"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "vqXo5KCK3L02"
+ },
+ "source": [
+ "Let's have a look inside these two directories and see what we are dealing with:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "id": "qFXX_acJ3L03",
+ "outputId": "e82587f4-e280-4288-de3a-087d8c1f7aaa",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "total 607916\n",
+ "drwxr-xr-x 2 root root 4096 Apr 12 10:35 assets\n",
+ "-rw-r--r-- 1 root root 198217996 Apr 12 10:35 decoder_model_merged.onnx\n",
+ "-rw-r--r-- 1 root root 198061779 Apr 12 10:35 decoder_model.onnx\n",
+ "-rw-r--r-- 1 root root 193303540 Apr 12 10:35 decoder_with_past_model.onnx\n",
+ "-rw-r--r-- 1 root root 32904958 Apr 12 10:35 encoder_model.onnx\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls -l {EXPORT_PATH}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "id": "-lbCcSP13L03",
+ "outputId": "9b37ce06-a335-4c82-8899-55fb9197815a",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "total 4308\n",
+ "-rw-r--r-- 1 root root 34604 Apr 12 10:35 added_tokens.json\n",
+ "-rw-r--r-- 1 root root 2243 Apr 12 10:35 config.json\n",
+ "-rw-r--r-- 1 root root 3742 Apr 12 10:35 generation_config.json\n",
+ "-rw-r--r-- 1 root root 493869 Apr 12 10:35 merges.txt\n",
+ "-rw-r--r-- 1 root root 52666 Apr 12 10:35 normalizer.json\n",
+ "-rw-r--r-- 1 root root 339 Apr 12 10:35 preprocessor_config.json\n",
+ "-rw-r--r-- 1 root root 2194 Apr 12 10:35 special_tokens_map.json\n",
+ "-rw-r--r-- 1 root root 283277 Apr 12 10:35 tokenizer_config.json\n",
+ "-rw-r--r-- 1 root root 2480466 Apr 12 10:35 tokenizer.json\n",
+ "-rw-r--r-- 1 root root 1036584 Apr 12 10:35 vocab.json\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls -l {EXPORT_PATH}/assets"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Q63ic12h3L04"
+ },
+ "source": [
+ "## Import and Save Whisper in Spark NLP\n",
+ "\n",
+ "- Let's install and setup Spark NLP in Google Colab\n",
+ "- This part is pretty easy via our simple script\n",
+ "- Additionally, we need to upgrade Spark to version 3.4.1."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "id": "ZKZ_tizZ3L04",
+ "outputId": "2d7801ea-99fd-44ac-a963-e98f8feb0c06",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Installing PySpark 3.2.3 and Spark NLP 5.3.3\n",
+ "setup Colab for PySpark 3.2.3 and Spark NLP 5.3.3\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 MB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m568.4/568.4 kB\u001b[0m \u001b[31m36.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.7/199.7 kB\u001b[0m \u001b[31m22.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "Collecting pyspark==3.4.1\n",
+ " Downloading pyspark-3.4.1.tar.gz (310.8 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m310.8/310.8 MB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "Collecting py4j==0.10.9.7 (from pyspark==3.4.1)\n",
+ " Downloading py4j-0.10.9.7-py2.py3-none-any.whl (200 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m200.5/200.5 kB\u001b[0m \u001b[31m23.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hBuilding wheels for collected packages: pyspark\n",
+ " Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ " Created wheel for pyspark: filename=pyspark-3.4.1-py2.py3-none-any.whl size=311285388 sha256=35520bb723dd6a52ac228a8c249191033e27475dc70be0af064dde9b1b780d3c\n",
+ " Stored in directory: /root/.cache/pip/wheels/0d/77/a3/ff2f74cc9ab41f8f594dabf0579c2a7c6de920d584206e0834\n",
+ "Successfully built pyspark\n",
+ "Installing collected packages: py4j, pyspark\n",
+ " Attempting uninstall: py4j\n",
+ " Found existing installation: py4j 0.10.9.5\n",
+ " Uninstalling py4j-0.10.9.5:\n",
+ " Successfully uninstalled py4j-0.10.9.5\n",
+ " Attempting uninstall: pyspark\n",
+ " Found existing installation: pyspark 3.2.3\n",
+ " Uninstalling pyspark-3.2.3:\n",
+ " Successfully uninstalled pyspark-3.2.3\n",
+ "Successfully installed py4j-0.10.9.7 pyspark-3.4.1\n"
+ ]
+ }
+ ],
+ "source": [
+ "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash\n",
+ "! pip install -U pyspark==3.4.1"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "4EiV3v3D3L05"
+ },
+ "source": [
+ "Let's start Spark with Spark NLP included via our simple `start()` function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "id": "HKzEZfQn3L05"
+ },
+ "outputs": [],
+ "source": [
+ "import sparknlp\n",
+ "\n",
+ "# let's start Spark with Spark NLP\n",
+ "spark = sparknlp.start()"
+ ]
+ },
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "+------------------------------------------------------------------------------------------+\n",
- "|result |\n",
- "+------------------------------------------------------------------------------------------+\n",
- "|[ Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.]|\n",
- "+------------------------------------------------------------------------------------------+\n",
- "\n"
- ]
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "8UCXtwOd3L05"
+ },
+ "source": [
+ "- Let's use `loadSavedModel` functon in `WhisperForCTC` which allows us to load the ONNX model\n",
+ "- Most params will be set automatically. They can also be set later after loading the model in `WhisperForCTC` during runtime, so don't worry about setting them now\n",
+ "- `loadSavedModel` accepts two params, first is the path to the exported model. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n",
+ "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "id": "fZNPXuQP3L05"
+ },
+ "outputs": [],
+ "source": [
+ "from sparknlp.annotator import *\n",
+ "\n",
+ "# All these params should be identical to the original ONNX model\n",
+ "whisper = (\n",
+ " WhisperForCTC.loadSavedModel(f\"{EXPORT_PATH}\", spark)\n",
+ " .setInputCols(\"audio_assembler\")\n",
+ " .setOutputCol(\"text\")\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "5tSlzbOR3L06"
+ },
+ "source": [
+ "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "id": "nkP_gWrt3L06"
+ },
+ "outputs": [],
+ "source": [
+ "whisper.write().overwrite().save(f\"{MODEL_NAME}_spark_nlp\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "yKqHPm903L06"
+ },
+ "source": [
+ "Let's clean up stuff we don't need anymore"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {
+ "id": "6Dfa7zDK3L06"
+ },
+ "outputs": [],
+ "source": [
+ "!rm -rf {EXPORT_PATH}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "5ecbVmq73L06"
+ },
+ "source": [
+ "Awesome 😎 !\n",
+ "\n",
+ "This is your ONNX Whisper model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {
+ "id": "WKxyiCOi3L07",
+ "outputId": "2eae5016-aa01-4da2-f6f9-574b8f4136fb",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "total 414404\n",
+ "-rw-r--r-- 1 root root 198092144 Apr 12 10:38 decoder_model\n",
+ "-rw-r--r-- 1 root root 193333200 Apr 12 10:38 decoder_with_past_model\n",
+ "-rw-r--r-- 1 root root 32910123 Apr 12 10:38 encoder_model\n",
+ "drwxr-xr-x 6 root root 4096 Apr 12 10:38 fields\n",
+ "drwxr-xr-x 2 root root 4096 Apr 12 10:38 metadata\n"
+ ]
+ }
+ ],
+ "source": [
+ "! ls -l {MODEL_NAME}_spark_nlp"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "0VEQV_Cv3L07"
+ },
+ "source": [
+ "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny Whisper model 😊"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "! wget https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp/master/src/test/resources/audio/txt/librispeech_asr_0.txt"
+ ],
+ "metadata": {
+ "id": "KzAIXRki4kRQ",
+ "outputId": "c926a754-3bb1-4790-b3cf-ec10a93a0ebc",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "execution_count": 14,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "--2024-04-12 10:39:27-- https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp/master/src/test/resources/audio/txt/librispeech_asr_0.txt\n",
+ "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.108.133, 185.199.110.133, ...\n",
+ "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.\n",
+ "HTTP request sent, awaiting response... 200 OK\n",
+ "Length: 2199992 (2.1M) [text/plain]\n",
+ "Saving to: ‘librispeech_asr_0.txt’\n",
+ "\n",
+ "\rlibrispeech_asr_0.t 0%[ ] 0 --.-KB/s \rlibrispeech_asr_0.t 100%[===================>] 2.10M --.-KB/s in 0.01s \n",
+ "\n",
+ "2024-04-12 10:39:27 (143 MB/s) - ‘librispeech_asr_0.txt’ saved [2199992/2199992]\n",
+ "\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {
+ "id": "L9hjHeKs3L07",
+ "outputId": "65c2a3cb-675f-4873-e786-3a644ffe0b88",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "+------------------------------------------------------------------------------------------+\n",
+ "|result |\n",
+ "+------------------------------------------------------------------------------------------+\n",
+ "|[ Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.]|\n",
+ "+------------------------------------------------------------------------------------------+\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "import sparknlp\n",
+ "from sparknlp.base import *\n",
+ "from sparknlp.annotator import *\n",
+ "from pyspark.ml import Pipeline\n",
+ "\n",
+ "audioAssembler = AudioAssembler() \\\n",
+ " .setInputCol(\"audio_content\") \\\n",
+ " .setOutputCol(\"audio_assembler\")\n",
+ "\n",
+ "speechToText = WhisperForCTC.load(f\"{MODEL_NAME}_spark_nlp\")\n",
+ "\n",
+ "pipeline = Pipeline().setStages([audioAssembler, speechToText])\n",
+ "\n",
+ "audio_path = \"librispeech_asr_0.txt\"\n",
+ "with open(audio_path) as file:\n",
+ " raw_floats = [float(data) for data in file.read().strip().split(\"\\n\")]\n",
+ "\n",
+ "processedAudioFloats = spark.createDataFrame([[raw_floats]]).toDF(\"audio_content\")\n",
+ "\n",
+ "result = pipeline.fit(processedAudioFloats).transform(processedAudioFloats)\n",
+ "result.select(\"text.result\").show(truncate = False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "s_uVMnSS3L07"
+ },
+ "source": [
+ "That's it! You can now go wild and use hundreds of Whisper models from HuggingFace 🤗 in Spark NLP 🚀\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3"
}
- ],
- "source": [
- "import sparknlp\n",
- "from sparknlp.base import *\n",
- "from sparknlp.annotator import *\n",
- "from pyspark.ml import Pipeline\n",
- "\n",
- "audioAssembler = AudioAssembler() \\\n",
- " .setInputCol(\"audio_content\") \\\n",
- " .setOutputCol(\"audio_assembler\")\n",
- "\n",
- "speechToText = WhisperForCTC.load(f\"{MODEL_NAME}_spark_nlp\")\n",
- "\n",
- "pipeline = Pipeline().setStages([audioAssembler, speechToText])\n",
- "\n",
- "audio_path = \"../../../../src/test/resources/audio/txt/librispeech_asr_0.txt\"\n",
- "with open(audio_path) as file:\n",
- " raw_floats = [float(data) for data in file.read().strip().split(\"\\n\")]\n",
- "\n",
- "processedAudioFloats = spark.createDataFrame([[raw_floats]]).toDF(\"audio_content\")\n",
- "\n",
- "result = pipeline.fit(processedAudioFloats).transform(processedAudioFloats)\n",
- "result.select(\"text.result\").show(truncate = False)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "That's it! You can now go wild and use hundreds of Whisper models from HuggingFace 🤗 in Spark NLP 🚀\n"
- ]
- }
- ],
- "metadata": {
- "colab": {
- "provenance": []
- },
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
},
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat": 4,
+ "nbformat_minor": 0
}
\ No newline at end of file
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_XLM_RoBERTa.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_XLM_RoBERTa.ipynb
index d7c228a6d55336..4f87291c05ae41 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_XLM_RoBERTa.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_XLM_RoBERTa.ipynb
@@ -1,2421 +1,2284 @@
{
- "cells": [
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
+ "\n",
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_XLM_RoBERTa.ipynb)\n",
+ "\n",
+ "# Import ONNX XLM-RoBERTa models from HuggingFace 🤗 into Spark NLP 🚀\n",
+ "\n",
+ "Let's keep in mind a few things before we start 😊\n",
+ "\n",
+ "- ONNX support was introduced in `Spark NLP 5.0.0`, enabling high performance inference for models. Please make sure you have upgraded to the latest Spark NLP release.\n",
+ "- You can import models for XLM-RoBERTa from HuggingFace and they have to be in `Fill Mask` category. Meaning, you cannot use XLM-RoBERTa models trained/fine-tuned on a specific task such as token/sequence classification."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Export and Save HuggingFace model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "tGk3flXBkgA1"
- },
- "source": [
- "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
- "\n",
- "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_XLM-RoBERTa.ipynb)\n",
- "\n",
- "# Import ONNX XLM-RoBERTa models from HuggingFace 🤗 into Spark NLP 🚀\n",
- "\n",
- "Let's keep in mind a few things before we start 😊\n",
- "\n",
- "- ONNX support was introduced in `Spark NLP 5.0.0`, enabling high performance inference for models. Please make sure you have upgraded to the latest Spark NLP release.\n",
- "- You can import models for XLM-RoBERTa from HuggingFace and they have to be in `Fill Mask` category. Meaning, you cannot use XLM-RoBERTa models trained/fine-tuned on a specific task such as token/sequence classification."
- ]
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.1/7.1 MB\u001b[0m \u001b[31m14.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m407.1/407.1 kB\u001b[0m \u001b[31m25.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m475.2/475.2 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m78.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.8/455.8 kB\u001b[0m \u001b[31m42.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m91.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m22.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m536.7/536.7 kB\u001b[0m \u001b[31m42.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.7/15.7 MB\u001b[0m \u001b[31m71.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m88.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m77.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m92.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m64.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m93.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m85.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m89.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m86.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m87.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m13.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m88.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m77.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m89.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m61.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m34.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m475.2/475.2 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.9/489.9 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m56.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m35.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m50.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m47.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.8/489.8 MB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m479.7/479.7 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m75.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.3/17.3 MB\u001b[0m \u001b[31m66.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m82.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.8/440.8 kB\u001b[0m \u001b[31m35.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m524.1/524.1 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m65.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m84.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m39.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m81.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m781.3/781.3 kB\u001b[0m \u001b[31m50.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m88.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m86.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.1/17.1 MB\u001b[0m \u001b[31m45.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m588.3/588.3 MB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m64.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m62.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m91.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m439.2/439.2 kB\u001b[0m \u001b[31m37.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m83.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m10.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m13.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m17.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.8/83.8 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.7/455.7 kB\u001b[0m \u001b[31m40.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m40.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m36.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m451.2/451.2 kB\u001b[0m \u001b[31m36.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m78.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m82.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m81.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m74.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.1/13.1 MB\u001b[0m \u001b[31m78.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+ "pandas-gbq 0.19.2 requires google-auth-oauthlib>=0.7.0, but you have google-auth-oauthlib 0.4.6 which is incompatible.\n",
+ "tensorflow-datasets 4.9.4 requires protobuf>=3.20, but you have protobuf 3.19.6 which is incompatible.\n",
+ "tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 3.19.6 which is incompatible.\u001b[0m\u001b[31m\n",
+ "\u001b[0m"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum tensorflow"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
+ "- We'll use [xlm-roberta-base](https://huggingface.co/xlm-roberta-base) model from HuggingFace as an example and load it as a `ORTModelForFeatureExtraction`, representing an ONNX model.\n",
+ "- In addition to the XLM-RoBERTa model, we also need to save the `XLMRobertaTokenizer`. This is the same for every model, these are assets (saved in `/assets`) needed for tokenization inside Spark NLP."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n",
+ "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
+ "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
+ "You will be able to reuse this secret in all of your notebooks.\n",
+ "Please note that authentication is recommended but still optional to access public models or datasets.\n",
+ " warnings.warn(\n"
+ ]
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "xwUb8_YgkgA3"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "ec1a9b2d5a994caea613dc851046eddf",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "## Export and Save HuggingFace model"
+ "text/plain": [
+ "config.json: 0%| | 0.00/615 [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "1btkdcgDkgA4"
- },
- "source": [
- "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."
- ]
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Framework not specified. Using pt to export the model.\n"
+ ]
},
{
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "id": "Ob-7qVD7kgA4",
- "outputId": "c802c6bd-8a0a-4d07-c43a-7a6ded7ece0f",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "298e83eae1d748aa8049f6adba534ac8",
+ "version_major": 2,
+ "version_minor": 0
},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.1/7.1 MB\u001b[0m \u001b[31m14.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m407.1/407.1 kB\u001b[0m \u001b[31m25.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m475.2/475.2 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m78.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.8/455.8 kB\u001b[0m \u001b[31m42.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m91.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m22.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m536.7/536.7 kB\u001b[0m \u001b[31m42.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.7/15.7 MB\u001b[0m \u001b[31m71.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m88.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m77.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m92.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m64.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m93.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m85.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m89.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m86.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m87.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m13.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m88.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m77.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m89.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m61.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m34.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m475.2/475.2 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.9/489.9 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m56.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m35.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m50.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m47.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.8/489.8 MB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m479.7/479.7 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m75.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.3/17.3 MB\u001b[0m \u001b[31m66.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m82.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.8/440.8 kB\u001b[0m \u001b[31m35.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m524.1/524.1 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m65.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m84.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m39.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m81.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m781.3/781.3 kB\u001b[0m \u001b[31m50.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m88.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m86.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.1/17.1 MB\u001b[0m \u001b[31m45.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m588.3/588.3 MB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m64.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m62.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m91.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m439.2/439.2 kB\u001b[0m \u001b[31m37.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m83.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m10.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m13.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m17.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.8/83.8 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.7/455.7 kB\u001b[0m \u001b[31m40.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m40.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m36.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m451.2/451.2 kB\u001b[0m \u001b[31m36.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m78.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m82.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m81.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m74.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.1/13.1 MB\u001b[0m \u001b[31m78.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
- "pandas-gbq 0.19.2 requires google-auth-oauthlib>=0.7.0, but you have google-auth-oauthlib 0.4.6 which is incompatible.\n",
- "tensorflow-datasets 4.9.4 requires protobuf>=3.20, but you have protobuf 3.19.6 which is incompatible.\n",
- "tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 3.19.6 which is incompatible.\u001b[0m\u001b[31m\n",
- "\u001b[0m"
- ]
- }
- ],
- "source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum tensorflow"
+ "text/plain": [
+ "model.safetensors: 0%| | 0.00/1.12G [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "B4_lpbejkgA6"
- },
- "source": [
- "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
- "- We'll use [xlm-roberta-base](https://huggingface.co/xlm-roberta-base) model from HuggingFace as an example and load it as a `ORTModelForFeatureExtraction`, representing an ONNX model.\n",
- "- In addition to the XLM-RoBERTa model, we also need to save the `XLMRobertaTokenizer`. This is the same for every model, these are assets (saved in `/assets`) needed for tokenization inside Spark NLP."
- ]
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaModel: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias']\n",
+ "- This IS expected if you are initializing XLMRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+ "- This IS NOT expected if you are initializing XLMRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
+ ]
},
{
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {
- "id": "Hq1EcGX4kgA6",
- "outputId": "5ca4f13e-89f8-4b9e-e38f-6e32f7ef007c",
- "colab": {
- "referenced_widgets": [
- "ec1a9b2d5a994caea613dc851046eddf",
- "4331cf9d02474f9294b1615bd00b9edd",
- "f617219731114a22a24ee6a477696004",
- "1837891dac1a4f56aa35a984af00a197",
- "3bd8d3b7d2e242108d04cb724c7ad89f",
- "264dd92cdff642c68d84d9ea6309b187",
- "e78dcd3395b94f33b945c5f4d4a214e0",
- "4b3d82f11ee243a7b399a671a06b0b72",
- "e631d560fb66440891770005fc492e90",
- "3be731e51a324504ae4e17c21d552d57",
- "2993ed18ddf2429e885b32fb941f754d",
- "298e83eae1d748aa8049f6adba534ac8",
- "5eddacbffc8740b7bc3a9c1b179ac4dd",
- "d52133a3ba584a24bf06c5cf1dbf42bc",
- "b5815c68e9754f1ea7161db566c52d8c",
- "8598b6cace044c34ac4d8b74f926da66",
- "79dfd5ceaa3a49ee9eb1414ea4744580",
- "d63f203cc55b41028203064c88e490c2",
- "13f83e1a9a9747d596ca20aeb6c344db",
- "a5dd185eef4844fa847808b2b91aa26b",
- "b30358df084f451090cde47debff0345",
- "871a297967cc4fe49c5463f4472a7b2f",
- "3609e88700234d4887116cffdb8a31cb",
- "e5d2ac11228d4d07ab7370748e7fdab0",
- "0c82e8ffd4b74655b03da94aef80ec04",
- "b491f6db46ab493fb264bb2ff3c21b11",
- "4495fecb11d647f69f1d1bcc8da2c8ff",
- "f04fd1d8dae449c8a4860127f9a14a24",
- "056d3ff42ddd43b2b20f5ec3cebaf38b",
- "3db2b1532b4f47da9f7fa17bf38a7004",
- "57bd7a45c349469c88051275e5fb4087",
- "38432651440344299dd9d6d462d681b4",
- "37c520248bb842818112488b6f915b8e",
- "21316bfe791349378b4dd59008d7ef94",
- "55635d82f5624418a147cb9b19da84c1",
- "2c193771cf3a4ae2b2e2830733bbf0c1",
- "c57eceb2ca494a02946d9d8e6dc78506",
- "bcf1015593684c5b8f71ea707a408f7d",
- "dd1d5e184948402894199ebaf7defc9b",
- "721dd749d6444b46b0293a227b5ebcb6",
- "add4d5c9cbfa4a16bbc125986e9a56d1",
- "1a305c881d794ebcb9567083faa633c2",
- "5c22226c130b45269da0c68652a52c7e",
- "9a99c367c8ce4ab5ac4987b0f3ca878d",
- "6a7a2098b1a74a6da302eacba871daa7",
- "f2d8a8b7a58c45bbaa08defddd1f228a",
- "622af6562a5a44348aaa91c54f116d66",
- "2093e4b98ee847b892caf6aeb5e6a733",
- "4ea300be5e9546eda7189e1d1f39c22c",
- "b600282be466458089b1a620acb2d28c",
- "54cedeb3ae624df0ab7bbf2f6e2cda62",
- "ef2da6ecc4564ec9a5f72145e5fced87",
- "73dc20453159438daebc82a61350d9c8",
- "2045c150cb67451cb3dc40564147b21a",
- "b8d9d288ab7f466eb6f48758e215828e"
- ],
- "base_uri": "https://localhost:8080/",
- "height": 457
- }
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "3609e88700234d4887116cffdb8a31cb",
+ "version_major": 2,
+ "version_minor": 0
},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n",
- "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
- "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
- "You will be able to reuse this secret in all of your notebooks.\n",
- "Please note that authentication is recommended but still optional to access public models or datasets.\n",
- " warnings.warn(\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "config.json: 0%| | 0.00/615 [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "ec1a9b2d5a994caea613dc851046eddf"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "Framework not specified. Using pt to export the model.\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "model.safetensors: 0%| | 0.00/1.12G [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "298e83eae1d748aa8049f6adba534ac8"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaModel: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias']\n",
- "- This IS expected if you are initializing XLMRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
- "- This IS NOT expected if you are initializing XLMRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "tokenizer_config.json: 0%| | 0.00/25.0 [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "3609e88700234d4887116cffdb8a31cb"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "sentencepiece.bpe.model: 0%| | 0.00/5.07M [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "21316bfe791349378b4dd59008d7ef94"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "tokenizer.json: 0%| | 0.00/9.10M [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "6a7a2098b1a74a6da302eacba871daa7"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "Using the export variant default. Available variants are:\n",
- " - default: The default ONNX variant.\n",
- "Using framework PyTorch: 2.1.0+cu121\n",
- "Overriding 1 configuration item(s)\n",
- "\t- use_cache -> False\n"
- ]
- }
- ],
- "source": [
- "from optimum.onnxruntime import ORTModelForFeatureExtraction\n",
- "\n",
- "MODEL_NAME = \"xlm-roberta-base\"\n",
- "EXPORT_PATH = f\"onnx_models/{MODEL_NAME}\"\n",
- "\n",
- "ort_model = ORTModelForFeatureExtraction.from_pretrained(MODEL_NAME, export=True)\n",
- "\n",
- "# Save the ONNX model\n",
- "ort_model.save_pretrained(EXPORT_PATH)\n",
- "\n",
- "# Create directory for assets and move the tokenizer files.\n",
- "# A separate folder is needed for Spark NLP.\n",
- "!mkdir {EXPORT_PATH}/assets\n",
- "!mv {EXPORT_PATH}/sentencepiece.bpe.model {EXPORT_PATH}/assets/"
+ "text/plain": [
+ "tokenizer_config.json: 0%| | 0.00/25.0 [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "jVx8Z19ikgA7"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "21316bfe791349378b4dd59008d7ef94",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "Let's have a look inside these two directories and see what we are dealing with:"
+ "text/plain": [
+ "sentencepiece.bpe.model: 0%| | 0.00/5.07M [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "id": "23WVUAczkgA7",
- "outputId": "2d1ec37f-bec4-4636-8f58-7fb6238efa3d",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "6a7a2098b1a74a6da302eacba871daa7",
+ "version_major": 2,
+ "version_minor": 0
},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "total 1100748\n",
- "drwxr-xr-x 2 root root 4096 Mar 1 02:25 assets\n",
- "-rw-r--r-- 1 root root 679 Mar 1 02:25 config.json\n",
- "-rw-r--r-- 1 root root 1110059085 Mar 1 02:25 model.onnx\n",
- "-rw-r--r-- 1 root root 280 Mar 1 02:25 special_tokens_map.json\n",
- "-rw-r--r-- 1 root root 418 Mar 1 02:25 tokenizer_config.json\n",
- "-rw-r--r-- 1 root root 17082660 Mar 1 02:25 tokenizer.json\n"
- ]
- }
- ],
- "source": [
- "!ls -l {EXPORT_PATH}"
+ "text/plain": [
+ "tokenizer.json: 0%| | 0.00/9.10M [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {
- "id": "4G0_E6kqkgA8",
- "outputId": "1fd4adda-04a7-41dd-b764-8a667456d206",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "total 4952\n",
- "-rw-r--r-- 1 root root 5069051 Mar 1 02:25 sentencepiece.bpe.model\n"
- ]
- }
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Using the export variant default. Available variants are:\n",
+ " - default: The default ONNX variant.\n",
+ "Using framework PyTorch: 2.1.0+cu121\n",
+ "Overriding 1 configuration item(s)\n",
+ "\t- use_cache -> False\n"
+ ]
+ }
+ ],
+ "source": [
+ "from optimum.onnxruntime import ORTModelForFeatureExtraction\n",
+ "\n",
+ "MODEL_NAME = \"xlm-roberta-base\"\n",
+ "EXPORT_PATH = f\"onnx_models/{MODEL_NAME}\"\n",
+ "\n",
+ "ort_model = ORTModelForFeatureExtraction.from_pretrained(MODEL_NAME, export=True)\n",
+ "\n",
+ "# Save the ONNX model\n",
+ "ort_model.save_pretrained(EXPORT_PATH)\n",
+ "\n",
+ "# Create directory for assets and move the tokenizer files.\n",
+ "# A separate folder is needed for Spark NLP.\n",
+ "!mkdir {EXPORT_PATH}/assets\n",
+ "!mv {EXPORT_PATH}/sentencepiece.bpe.model {EXPORT_PATH}/assets/"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's have a look inside these two directories and see what we are dealing with:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "total 1100748\n",
+ "drwxr-xr-x 2 root root 4096 Mar 1 02:25 assets\n",
+ "-rw-r--r-- 1 root root 679 Mar 1 02:25 config.json\n",
+ "-rw-r--r-- 1 root root 1110059085 Mar 1 02:25 model.onnx\n",
+ "-rw-r--r-- 1 root root 280 Mar 1 02:25 special_tokens_map.json\n",
+ "-rw-r--r-- 1 root root 418 Mar 1 02:25 tokenizer_config.json\n",
+ "-rw-r--r-- 1 root root 17082660 Mar 1 02:25 tokenizer.json\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls -l {EXPORT_PATH}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "total 4952\n",
+ "-rw-r--r-- 1 root root 5069051 Mar 1 02:25 sentencepiece.bpe.model\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls -l {EXPORT_PATH}/assets"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Import and Save XLM-RoBERTa in Spark NLP\n",
+ "\n",
+ "- Let's install and setup Spark NLP in Google Colab\n",
+ "- This part is pretty easy via our simple script"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Installing PySpark 3.2.3 and Spark NLP 5.3.0\n",
+ "setup Colab for PySpark 3.2.3 and Spark NLP 5.3.0\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 MB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m564.8/564.8 kB\u001b[0m \u001b[31m39.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.7/199.7 kB\u001b[0m \u001b[31m15.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
+ ]
+ }
+ ],
+ "source": [
+ "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's start Spark with Spark NLP included via our simple `start()` function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import sparknlp\n",
+ "# let's start Spark with Spark NLP\n",
+ "spark = sparknlp.start()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's use `loadSavedModel` functon in `XlmRoBertaEmbeddings` which allows us to load the ONNX model\n",
+ "- Most params will be set automatically. They can also be set later after loading the model in `XlmRoBertaEmbeddings` during runtime, so don't worry about setting them now\n",
+ "- `loadSavedModel` accepts two params, first is the path to the exported model. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n",
+ "- `setStorageRef` is very important. When you are training a task like NER or any Text Classification, we use this reference to bound the trained model to this specific embeddings so you won't load a different embeddings by mistake and see terrible results 😊\n",
+ "- It's up to you what you put in `setStorageRef` but it cannot be changed later on. We usually use the name of the model to be clear, but you can get creative if you want!\n",
+ "- The `dimension` param is is purely cosmetic and won't change anything. It's mostly for you to know later via `.getDimension` what is the dimension of your model. So set this accordingly.\n",
+ "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sparknlp.annotator import *\n",
+ "\n",
+ "# All these params should be identical to the original ONNX model\n",
+ "xlm_roberta = XlmRoBertaEmbeddings.loadSavedModel(f\"{EXPORT_PATH}\", spark)\\\n",
+ " .setInputCols([\"document\",'token'])\\\n",
+ " .setOutputCol(\"xlm_roberta\")\\\n",
+ " .setCaseSensitive(True)\\\n",
+ " .setDimension(768)\\\n",
+ " .setStorageRef('xlm_roberta_base')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "xlm_roberta.write().overwrite().save(f\"{MODEL_NAME}_spark_nlp\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's clean up stuff we don't need anymore"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!rm -rf {EXPORT_PATH}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Awesome 😎 !\n",
+ "\n",
+ "This is your ONNX XLM-RoBERTa model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "total 1089168\n",
+ "drwxr-xr-x 2 root root 4096 Mar 1 02:28 metadata\n",
+ "-rw-r--r-- 1 root root 1110228614 Mar 1 02:30 xlmroberta_onnx\n",
+ "-rw-r--r-- 1 root root 5069051 Mar 1 02:30 xlmroberta_spp\n"
+ ]
+ }
+ ],
+ "source": [
+ "! ls -l {MODEL_NAME}_spark_nlp"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny XLM-RoBERTa model 😊"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import sparknlp\n",
+ "\n",
+ "from sparknlp.base import *\n",
+ "from sparknlp.annotator import *\n",
+ "\n",
+ "document_assembler = DocumentAssembler()\\\n",
+ " .setInputCol(\"text\")\\\n",
+ " .setOutputCol(\"document\")\n",
+ "\n",
+ "tokenizer = Tokenizer()\\\n",
+ " .setInputCols([\"document\"])\\\n",
+ " .setOutputCol(\"token\")\n",
+ "\n",
+ "xlm_roberta_loaded = XlmRoBertaEmbeddings.load(f\"{MODEL_NAME}_spark_nlp\")\\\n",
+ " .setInputCols([\"document\",'token'])\\\n",
+ " .setOutputCol(\"xlm_roberta\")\\\n",
+ "\n",
+ "pipeline = Pipeline(\n",
+ " stages = [\n",
+ " document_assembler,\n",
+ " tokenizer,\n",
+ " xlm_roberta_loaded\n",
+ " ])\n",
+ "\n",
+ "data = spark.createDataFrame([['William Henry Gates III (born October 28, 1955) is an American business magnate, software developer, investor,and philanthropist.']]).toDF(\"text\")\n",
+ "model = pipeline.fit(data)\n",
+ "result = model.transform(data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "+--------------------+\n",
+ "| embeddings|\n",
+ "+--------------------+\n",
+ "|[0.01781074, 0.16...|\n",
+ "|[-0.005121246, 0....|\n",
+ "|[0.00517074, 0.11...|\n",
+ "|[0.0065734405, 0....|\n",
+ "|[-0.028697606, 0....|\n",
+ "|[-0.0055652205, 0...|\n",
+ "|[-0.017623652, 0....|\n",
+ "|[-0.11884157, 0.0...|\n",
+ "|[-0.08074703, 0.1...|\n",
+ "|[-0.034696702, 0....|\n",
+ "|[-0.06809586, 0.1...|\n",
+ "|[-0.0508499, 0.07...|\n",
+ "|[-0.0065260027, 0...|\n",
+ "|[-0.029709894, 0....|\n",
+ "|[0.011362225, 0.2...|\n",
+ "|[0.044628896, 0.5...|\n",
+ "|[0.022999618, 0.2...|\n",
+ "|[0.017432231, 0.2...|\n",
+ "|[-0.024950821, 0....|\n",
+ "|[-0.031514782, 0....|\n",
+ "+--------------------+\n",
+ "only showing top 20 rows\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "result.selectExpr(\"explode(xlm_roberta.embeddings) as embeddings\").show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "That's it! You can now go wild and use hundreds of XLM-RoBERTa models from HuggingFace 🤗 in Spark NLP 🚀\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "056d3ff42ddd43b2b20f5ec3cebaf38b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "0c82e8ffd4b74655b03da94aef80ec04": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_3db2b1532b4f47da9f7fa17bf38a7004",
+ "max": 25,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_57bd7a45c349469c88051275e5fb4087",
+ "value": 25
+ }
+ },
+ "13f83e1a9a9747d596ca20aeb6c344db": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "1837891dac1a4f56aa35a984af00a197": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_3be731e51a324504ae4e17c21d552d57",
+ "placeholder": "",
+ "style": "IPY_MODEL_2993ed18ddf2429e885b32fb941f754d",
+ "value": " 615/615 [00:00<00:00, 32.3kB/s]"
+ }
+ },
+ "1a305c881d794ebcb9567083faa633c2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "2045c150cb67451cb3dc40564147b21a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2093e4b98ee847b892caf6aeb5e6a733": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_2045c150cb67451cb3dc40564147b21a",
+ "placeholder": "",
+ "style": "IPY_MODEL_b8d9d288ab7f466eb6f48758e215828e",
+ "value": " 9.10M/9.10M [00:00<00:00, 28.9MB/s]"
+ }
+ },
+ "21316bfe791349378b4dd59008d7ef94": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_55635d82f5624418a147cb9b19da84c1",
+ "IPY_MODEL_2c193771cf3a4ae2b2e2830733bbf0c1",
+ "IPY_MODEL_c57eceb2ca494a02946d9d8e6dc78506"
],
- "source": [
- "!ls -l {EXPORT_PATH}/assets"
- ]
+ "layout": "IPY_MODEL_bcf1015593684c5b8f71ea707a408f7d"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "zRN-_0wpkgA8"
- },
- "source": [
- "## Import and Save XLM-RoBERTa in Spark NLP\n",
- "\n",
- "- Let's install and setup Spark NLP in Google Colab\n",
- "- This part is pretty easy via our simple script"
- ]
+ "264dd92cdff642c68d84d9ea6309b187": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {
- "id": "D0yO3TmwkgA8",
- "outputId": "8aaaaf9c-50ae-480b-b384-2ef72fdb6506",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Installing PySpark 3.2.3 and Spark NLP 5.3.0\n",
- "setup Colab for PySpark 3.2.3 and Spark NLP 5.3.0\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 MB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m564.8/564.8 kB\u001b[0m \u001b[31m39.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.7/199.7 kB\u001b[0m \u001b[31m15.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
- ]
- }
+ "298e83eae1d748aa8049f6adba534ac8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_5eddacbffc8740b7bc3a9c1b179ac4dd",
+ "IPY_MODEL_d52133a3ba584a24bf06c5cf1dbf42bc",
+ "IPY_MODEL_b5815c68e9754f1ea7161db566c52d8c"
],
- "source": [
- "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"
- ]
+ "layout": "IPY_MODEL_8598b6cace044c34ac4d8b74f926da66"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "7slGi8nrkgA9"
- },
- "source": [
- "Let's start Spark with Spark NLP included via our simple `start()` function"
- ]
+ "2993ed18ddf2429e885b32fb941f754d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {
- "id": "Z_Pnd_W8kgA9"
- },
- "outputs": [],
- "source": [
- "import sparknlp\n",
- "# let's start Spark with Spark NLP\n",
- "spark = sparknlp.start()"
- ]
+ "2c193771cf3a4ae2b2e2830733bbf0c1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_add4d5c9cbfa4a16bbc125986e9a56d1",
+ "max": 5069051,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_1a305c881d794ebcb9567083faa633c2",
+ "value": 5069051
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "qfwqBV67kgA9"
- },
- "source": [
- "- Let's use `loadSavedModel` functon in `XlmRoBertaEmbeddings` which allows us to load the ONNX model\n",
- "- Most params will be set automatically. They can also be set later after loading the model in `XlmRoBertaEmbeddings` during runtime, so don't worry about setting them now\n",
- "- `loadSavedModel` accepts two params, first is the path to the exported model. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n",
- "- `setStorageRef` is very important. When you are training a task like NER or any Text Classification, we use this reference to bound the trained model to this specific embeddings so you won't load a different embeddings by mistake and see terrible results 😊\n",
- "- It's up to you what you put in `setStorageRef` but it cannot be changed later on. We usually use the name of the model to be clear, but you can get creative if you want!\n",
- "- The `dimension` param is is purely cosmetic and won't change anything. It's mostly for you to know later via `.getDimension` what is the dimension of your model. So set this accordingly.\n",
- "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.\n"
- ]
+ "3609e88700234d4887116cffdb8a31cb": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_e5d2ac11228d4d07ab7370748e7fdab0",
+ "IPY_MODEL_0c82e8ffd4b74655b03da94aef80ec04",
+ "IPY_MODEL_b491f6db46ab493fb264bb2ff3c21b11"
+ ],
+ "layout": "IPY_MODEL_4495fecb11d647f69f1d1bcc8da2c8ff"
+ }
},
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {
- "id": "8CqvC6sJkgA9"
- },
- "outputs": [],
- "source": [
- "from sparknlp.annotator import *\n",
- "\n",
- "# All these params should be identical to the original ONNX model\n",
- "xlm_roberta = XlmRoBertaEmbeddings.loadSavedModel(f\"{EXPORT_PATH}\", spark)\\\n",
- " .setInputCols([\"document\",'token'])\\\n",
- " .setOutputCol(\"xlm_roberta\")\\\n",
- " .setCaseSensitive(True)\\\n",
- " .setDimension(768)\\\n",
- " .setStorageRef('xlm_roberta_base')"
- ]
+ "37c520248bb842818112488b6f915b8e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "8GWj_urkkgA9"
- },
- "source": [
- "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
- ]
+ "38432651440344299dd9d6d462d681b4": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {
- "id": "ZmiaXQXKkgA-"
- },
- "outputs": [],
- "source": [
- "xlm_roberta.write().overwrite().save(f\"{MODEL_NAME}_spark_nlp\")"
- ]
+ "3bd8d3b7d2e242108d04cb724c7ad89f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "Z0aGPBmVkgA-"
- },
- "source": [
- "Let's clean up stuff we don't need anymore"
- ]
+ "3be731e51a324504ae4e17c21d552d57": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {
- "id": "k2yCZptPkgA-"
- },
- "outputs": [],
- "source": [
- "!rm -rf {EXPORT_PATH}"
- ]
+ "3db2b1532b4f47da9f7fa17bf38a7004": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "XPsHMXy4kgA-"
- },
- "source": [
- "Awesome 😎 !\n",
- "\n",
- "This is your ONNX XLM-RoBERTa model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀"
- ]
+ "4331cf9d02474f9294b1615bd00b9edd": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_264dd92cdff642c68d84d9ea6309b187",
+ "placeholder": "",
+ "style": "IPY_MODEL_e78dcd3395b94f33b945c5f4d4a214e0",
+ "value": "config.json: 100%"
+ }
},
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {
- "id": "Iajnc2gekgA-",
- "outputId": "347d60b9-153e-4632-cec7-4fcaa7b66585",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "total 1089168\n",
- "drwxr-xr-x 2 root root 4096 Mar 1 02:28 metadata\n",
- "-rw-r--r-- 1 root root 1110228614 Mar 1 02:30 xlmroberta_onnx\n",
- "-rw-r--r-- 1 root root 5069051 Mar 1 02:30 xlmroberta_spp\n"
- ]
- }
+ "4495fecb11d647f69f1d1bcc8da2c8ff": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4b3d82f11ee243a7b399a671a06b0b72": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4ea300be5e9546eda7189e1d1f39c22c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "54cedeb3ae624df0ab7bbf2f6e2cda62": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "55635d82f5624418a147cb9b19da84c1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_dd1d5e184948402894199ebaf7defc9b",
+ "placeholder": "",
+ "style": "IPY_MODEL_721dd749d6444b46b0293a227b5ebcb6",
+ "value": "sentencepiece.bpe.model: 100%"
+ }
+ },
+ "57bd7a45c349469c88051275e5fb4087": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "5c22226c130b45269da0c68652a52c7e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5eddacbffc8740b7bc3a9c1b179ac4dd": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_79dfd5ceaa3a49ee9eb1414ea4744580",
+ "placeholder": "",
+ "style": "IPY_MODEL_d63f203cc55b41028203064c88e490c2",
+ "value": "model.safetensors: 100%"
+ }
+ },
+ "622af6562a5a44348aaa91c54f116d66": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_ef2da6ecc4564ec9a5f72145e5fced87",
+ "max": 9096718,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_73dc20453159438daebc82a61350d9c8",
+ "value": 9096718
+ }
+ },
+ "6a7a2098b1a74a6da302eacba871daa7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_f2d8a8b7a58c45bbaa08defddd1f228a",
+ "IPY_MODEL_622af6562a5a44348aaa91c54f116d66",
+ "IPY_MODEL_2093e4b98ee847b892caf6aeb5e6a733"
],
- "source": [
- "! ls -l {MODEL_NAME}_spark_nlp"
- ]
+ "layout": "IPY_MODEL_4ea300be5e9546eda7189e1d1f39c22c"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "o71ap_SXkgA-"
- },
- "source": [
- "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny XLM-RoBERTa model 😊"
- ]
+ "721dd749d6444b46b0293a227b5ebcb6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {
- "id": "eIlcw7nnkgA-"
- },
- "outputs": [],
- "source": [
- "import sparknlp\n",
- "\n",
- "from sparknlp.base import *\n",
- "from sparknlp.annotator import *\n",
- "\n",
- "document_assembler = DocumentAssembler()\\\n",
- " .setInputCol(\"text\")\\\n",
- " .setOutputCol(\"document\")\n",
- "\n",
- "tokenizer = Tokenizer()\\\n",
- " .setInputCols([\"document\"])\\\n",
- " .setOutputCol(\"token\")\n",
- "\n",
- "xlm_roberta_loaded = XlmRoBertaEmbeddings.load(f\"{MODEL_NAME}_spark_nlp\")\\\n",
- " .setInputCols([\"document\",'token'])\\\n",
- " .setOutputCol(\"xlm_roberta\")\\\n",
- "\n",
- "pipeline = Pipeline(\n",
- " stages = [\n",
- " document_assembler,\n",
- " tokenizer,\n",
- " xlm_roberta_loaded\n",
- " ])\n",
- "\n",
- "data = spark.createDataFrame([['William Henry Gates III (born October 28, 1955) is an American business magnate, software developer, investor,and philanthropist.']]).toDF(\"text\")\n",
- "model = pipeline.fit(data)\n",
- "result = model.transform(data)"
- ]
+ "73dc20453159438daebc82a61350d9c8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {
- "id": "37gi-KXfkgA_",
- "outputId": "7b86c51f-45b8-463d-b199-c5e2c7c8f847",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "+--------------------+\n",
- "| embeddings|\n",
- "+--------------------+\n",
- "|[0.01781074, 0.16...|\n",
- "|[-0.005121246, 0....|\n",
- "|[0.00517074, 0.11...|\n",
- "|[0.0065734405, 0....|\n",
- "|[-0.028697606, 0....|\n",
- "|[-0.0055652205, 0...|\n",
- "|[-0.017623652, 0....|\n",
- "|[-0.11884157, 0.0...|\n",
- "|[-0.08074703, 0.1...|\n",
- "|[-0.034696702, 0....|\n",
- "|[-0.06809586, 0.1...|\n",
- "|[-0.0508499, 0.07...|\n",
- "|[-0.0065260027, 0...|\n",
- "|[-0.029709894, 0....|\n",
- "|[0.011362225, 0.2...|\n",
- "|[0.044628896, 0.5...|\n",
- "|[0.022999618, 0.2...|\n",
- "|[0.017432231, 0.2...|\n",
- "|[-0.024950821, 0....|\n",
- "|[-0.031514782, 0....|\n",
- "+--------------------+\n",
- "only showing top 20 rows\n",
- "\n"
- ]
- }
+ "79dfd5ceaa3a49ee9eb1414ea4744580": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "8598b6cace044c34ac4d8b74f926da66": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "871a297967cc4fe49c5463f4472a7b2f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "9a99c367c8ce4ab5ac4987b0f3ca878d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a5dd185eef4844fa847808b2b91aa26b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "add4d5c9cbfa4a16bbc125986e9a56d1": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b30358df084f451090cde47debff0345": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b491f6db46ab493fb264bb2ff3c21b11": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_38432651440344299dd9d6d462d681b4",
+ "placeholder": "",
+ "style": "IPY_MODEL_37c520248bb842818112488b6f915b8e",
+ "value": " 25.0/25.0 [00:00<00:00, 97.1B/s]"
+ }
+ },
+ "b5815c68e9754f1ea7161db566c52d8c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b30358df084f451090cde47debff0345",
+ "placeholder": "",
+ "style": "IPY_MODEL_871a297967cc4fe49c5463f4472a7b2f",
+ "value": " 1.12G/1.12G [00:10<00:00, 146MB/s]"
+ }
+ },
+ "b600282be466458089b1a620acb2d28c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b8d9d288ab7f466eb6f48758e215828e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "bcf1015593684c5b8f71ea707a408f7d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "c57eceb2ca494a02946d9d8e6dc78506": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_5c22226c130b45269da0c68652a52c7e",
+ "placeholder": "",
+ "style": "IPY_MODEL_9a99c367c8ce4ab5ac4987b0f3ca878d",
+ "value": " 5.07M/5.07M [00:00<00:00, 19.0MB/s]"
+ }
+ },
+ "d52133a3ba584a24bf06c5cf1dbf42bc": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_13f83e1a9a9747d596ca20aeb6c344db",
+ "max": 1115567652,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_a5dd185eef4844fa847808b2b91aa26b",
+ "value": 1115567652
+ }
+ },
+ "d63f203cc55b41028203064c88e490c2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "dd1d5e184948402894199ebaf7defc9b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e5d2ac11228d4d07ab7370748e7fdab0": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_f04fd1d8dae449c8a4860127f9a14a24",
+ "placeholder": "",
+ "style": "IPY_MODEL_056d3ff42ddd43b2b20f5ec3cebaf38b",
+ "value": "tokenizer_config.json: 100%"
+ }
+ },
+ "e631d560fb66440891770005fc492e90": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "e78dcd3395b94f33b945c5f4d4a214e0": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "ec1a9b2d5a994caea613dc851046eddf": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_4331cf9d02474f9294b1615bd00b9edd",
+ "IPY_MODEL_f617219731114a22a24ee6a477696004",
+ "IPY_MODEL_1837891dac1a4f56aa35a984af00a197"
],
- "source": [
- "result.selectExpr(\"explode(xlm_roberta.embeddings) as embeddings\").show()"
- ]
+ "layout": "IPY_MODEL_3bd8d3b7d2e242108d04cb724c7ad89f"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "SMP8yiKhkgA_"
- },
- "source": [
- "That's it! You can now go wild and use hundreds of XLM-RoBERTa models from HuggingFace 🤗 in Spark NLP 🚀\n"
- ]
- }
- ],
- "metadata": {
- "colab": {
- "provenance": []
+ "ef2da6ecc4564ec9a5f72145e5fced87": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
+ "f04fd1d8dae449c8a4860127f9a14a24": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.12"
+ "f2d8a8b7a58c45bbaa08defddd1f228a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b600282be466458089b1a620acb2d28c",
+ "placeholder": "",
+ "style": "IPY_MODEL_54cedeb3ae624df0ab7bbf2f6e2cda62",
+ "value": "tokenizer.json: 100%"
+ }
},
- "widgets": {
- "application/vnd.jupyter.widget-state+json": {
- "ec1a9b2d5a994caea613dc851046eddf": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_4331cf9d02474f9294b1615bd00b9edd",
- "IPY_MODEL_f617219731114a22a24ee6a477696004",
- "IPY_MODEL_1837891dac1a4f56aa35a984af00a197"
- ],
- "layout": "IPY_MODEL_3bd8d3b7d2e242108d04cb724c7ad89f"
- }
- },
- "4331cf9d02474f9294b1615bd00b9edd": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_264dd92cdff642c68d84d9ea6309b187",
- "placeholder": "",
- "style": "IPY_MODEL_e78dcd3395b94f33b945c5f4d4a214e0",
- "value": "config.json: 100%"
- }
- },
- "f617219731114a22a24ee6a477696004": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_4b3d82f11ee243a7b399a671a06b0b72",
- "max": 615,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_e631d560fb66440891770005fc492e90",
- "value": 615
- }
- },
- "1837891dac1a4f56aa35a984af00a197": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_3be731e51a324504ae4e17c21d552d57",
- "placeholder": "",
- "style": "IPY_MODEL_2993ed18ddf2429e885b32fb941f754d",
- "value": " 615/615 [00:00<00:00, 32.3kB/s]"
- }
- },
- "3bd8d3b7d2e242108d04cb724c7ad89f": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "264dd92cdff642c68d84d9ea6309b187": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "e78dcd3395b94f33b945c5f4d4a214e0": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "4b3d82f11ee243a7b399a671a06b0b72": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "e631d560fb66440891770005fc492e90": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "3be731e51a324504ae4e17c21d552d57": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "2993ed18ddf2429e885b32fb941f754d": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "298e83eae1d748aa8049f6adba534ac8": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_5eddacbffc8740b7bc3a9c1b179ac4dd",
- "IPY_MODEL_d52133a3ba584a24bf06c5cf1dbf42bc",
- "IPY_MODEL_b5815c68e9754f1ea7161db566c52d8c"
- ],
- "layout": "IPY_MODEL_8598b6cace044c34ac4d8b74f926da66"
- }
- },
- "5eddacbffc8740b7bc3a9c1b179ac4dd": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_79dfd5ceaa3a49ee9eb1414ea4744580",
- "placeholder": "",
- "style": "IPY_MODEL_d63f203cc55b41028203064c88e490c2",
- "value": "model.safetensors: 100%"
- }
- },
- "d52133a3ba584a24bf06c5cf1dbf42bc": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_13f83e1a9a9747d596ca20aeb6c344db",
- "max": 1115567652,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_a5dd185eef4844fa847808b2b91aa26b",
- "value": 1115567652
- }
- },
- "b5815c68e9754f1ea7161db566c52d8c": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_b30358df084f451090cde47debff0345",
- "placeholder": "",
- "style": "IPY_MODEL_871a297967cc4fe49c5463f4472a7b2f",
- "value": " 1.12G/1.12G [00:10<00:00, 146MB/s]"
- }
- },
- "8598b6cace044c34ac4d8b74f926da66": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "79dfd5ceaa3a49ee9eb1414ea4744580": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "d63f203cc55b41028203064c88e490c2": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "13f83e1a9a9747d596ca20aeb6c344db": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "a5dd185eef4844fa847808b2b91aa26b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "b30358df084f451090cde47debff0345": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "871a297967cc4fe49c5463f4472a7b2f": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "3609e88700234d4887116cffdb8a31cb": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_e5d2ac11228d4d07ab7370748e7fdab0",
- "IPY_MODEL_0c82e8ffd4b74655b03da94aef80ec04",
- "IPY_MODEL_b491f6db46ab493fb264bb2ff3c21b11"
- ],
- "layout": "IPY_MODEL_4495fecb11d647f69f1d1bcc8da2c8ff"
- }
- },
- "e5d2ac11228d4d07ab7370748e7fdab0": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_f04fd1d8dae449c8a4860127f9a14a24",
- "placeholder": "",
- "style": "IPY_MODEL_056d3ff42ddd43b2b20f5ec3cebaf38b",
- "value": "tokenizer_config.json: 100%"
- }
- },
- "0c82e8ffd4b74655b03da94aef80ec04": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_3db2b1532b4f47da9f7fa17bf38a7004",
- "max": 25,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_57bd7a45c349469c88051275e5fb4087",
- "value": 25
- }
- },
- "b491f6db46ab493fb264bb2ff3c21b11": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_38432651440344299dd9d6d462d681b4",
- "placeholder": "",
- "style": "IPY_MODEL_37c520248bb842818112488b6f915b8e",
- "value": " 25.0/25.0 [00:00<00:00, 97.1B/s]"
- }
- },
- "4495fecb11d647f69f1d1bcc8da2c8ff": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "f04fd1d8dae449c8a4860127f9a14a24": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "056d3ff42ddd43b2b20f5ec3cebaf38b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "3db2b1532b4f47da9f7fa17bf38a7004": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "57bd7a45c349469c88051275e5fb4087": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "38432651440344299dd9d6d462d681b4": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "37c520248bb842818112488b6f915b8e": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "21316bfe791349378b4dd59008d7ef94": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_55635d82f5624418a147cb9b19da84c1",
- "IPY_MODEL_2c193771cf3a4ae2b2e2830733bbf0c1",
- "IPY_MODEL_c57eceb2ca494a02946d9d8e6dc78506"
- ],
- "layout": "IPY_MODEL_bcf1015593684c5b8f71ea707a408f7d"
- }
- },
- "55635d82f5624418a147cb9b19da84c1": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_dd1d5e184948402894199ebaf7defc9b",
- "placeholder": "",
- "style": "IPY_MODEL_721dd749d6444b46b0293a227b5ebcb6",
- "value": "sentencepiece.bpe.model: 100%"
- }
- },
- "2c193771cf3a4ae2b2e2830733bbf0c1": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_add4d5c9cbfa4a16bbc125986e9a56d1",
- "max": 5069051,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_1a305c881d794ebcb9567083faa633c2",
- "value": 5069051
- }
- },
- "c57eceb2ca494a02946d9d8e6dc78506": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_5c22226c130b45269da0c68652a52c7e",
- "placeholder": "",
- "style": "IPY_MODEL_9a99c367c8ce4ab5ac4987b0f3ca878d",
- "value": " 5.07M/5.07M [00:00<00:00, 19.0MB/s]"
- }
- },
- "bcf1015593684c5b8f71ea707a408f7d": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "dd1d5e184948402894199ebaf7defc9b": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "721dd749d6444b46b0293a227b5ebcb6": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "add4d5c9cbfa4a16bbc125986e9a56d1": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "1a305c881d794ebcb9567083faa633c2": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "5c22226c130b45269da0c68652a52c7e": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "9a99c367c8ce4ab5ac4987b0f3ca878d": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "6a7a2098b1a74a6da302eacba871daa7": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_f2d8a8b7a58c45bbaa08defddd1f228a",
- "IPY_MODEL_622af6562a5a44348aaa91c54f116d66",
- "IPY_MODEL_2093e4b98ee847b892caf6aeb5e6a733"
- ],
- "layout": "IPY_MODEL_4ea300be5e9546eda7189e1d1f39c22c"
- }
- },
- "f2d8a8b7a58c45bbaa08defddd1f228a": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_b600282be466458089b1a620acb2d28c",
- "placeholder": "",
- "style": "IPY_MODEL_54cedeb3ae624df0ab7bbf2f6e2cda62",
- "value": "tokenizer.json: 100%"
- }
- },
- "622af6562a5a44348aaa91c54f116d66": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_ef2da6ecc4564ec9a5f72145e5fced87",
- "max": 9096718,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_73dc20453159438daebc82a61350d9c8",
- "value": 9096718
- }
- },
- "2093e4b98ee847b892caf6aeb5e6a733": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_2045c150cb67451cb3dc40564147b21a",
- "placeholder": "",
- "style": "IPY_MODEL_b8d9d288ab7f466eb6f48758e215828e",
- "value": " 9.10M/9.10M [00:00<00:00, 28.9MB/s]"
- }
- },
- "4ea300be5e9546eda7189e1d1f39c22c": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "b600282be466458089b1a620acb2d28c": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "54cedeb3ae624df0ab7bbf2f6e2cda62": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "ef2da6ecc4564ec9a5f72145e5fced87": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "73dc20453159438daebc82a61350d9c8": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "2045c150cb67451cb3dc40564147b21a": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "b8d9d288ab7f466eb6f48758e215828e": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- }
- }
+ "f617219731114a22a24ee6a477696004": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4b3d82f11ee243a7b399a671a06b0b72",
+ "max": 615,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_e631d560fb66440891770005fc492e90",
+ "value": 615
+ }
}
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
\ No newline at end of file
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_XlmRoBertaForQuestionAnswering.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_XlmRoBertaForQuestionAnswering.ipynb
index 5f3a6e2d16d03d..34520072d358ab 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_XlmRoBertaForQuestionAnswering.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_XlmRoBertaForQuestionAnswering.ipynb
@@ -1,2433 +1,2295 @@
{
- "cells": [
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
+ "\n",
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_XlmRoBertaForQuestionAnswering.ipynb)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Import ONNX XlmRoBertaForQuestionAnswering models from HuggingFace 🤗 into Spark NLP 🚀\n",
+ "\n",
+ "Let's keep in mind a few things before we start 😊\n",
+ "\n",
+ "- ONNX support was introduced in `Spark NLP 5.0.0`, enabling high performance inference for models.\n",
+ "- `XlmRoBertaForQuestionAnswering` is only available since in `Spark NLP 5.2.3` and after. So please make sure you have upgraded to the latest Spark NLP release\n",
+ "- You can import XLM-RoBERTa models trained/fine-tuned for question answering via `XlmRoBertaForQuestionAnswering` or `TFXlmRoBertaForQuestionAnswering`. These models are usually under `Question Answering` category and have `xlm-roberta` in their labels\n",
+ "- Reference: [TFXlmRoBertaForQuestionAnswering](https://huggingface.co/docs/transformers/model_doc/xlm-roberta#transformers.TFXLMRobertaForQuestionAnswering)\n",
+ "- Some [example models](https://huggingface.co/models?filter=xlm-roberta&pipeline_tag=question-answering)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Export and Save HuggingFace model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n",
+ "- Albert uses SentencePiece, so we will have to install that as well"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "_opj2ZzntbDk"
- },
- "source": [
- "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
- "\n",
- "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_XlmRoBertaForQuestionAnswering.ipynb)"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.1/7.1 MB\u001b[0m \u001b[31m15.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m403.3/403.3 kB\u001b[0m \u001b[31m29.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m475.2/475.2 MB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m43.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.7/455.7 kB\u001b[0m \u001b[31m38.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m63.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m23.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m507.1/507.1 kB\u001b[0m \u001b[31m43.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.7/15.7 MB\u001b[0m \u001b[31m70.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m69.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m84.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m87.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m37.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m87.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m51.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m93.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m77.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m81.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m42.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m17.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m76.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m58.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m475.2/475.2 MB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.9/489.9 MB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m83.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m40.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m68.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m94.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.8/489.8 MB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m479.7/479.7 MB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m68.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m87.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.8/440.8 kB\u001b[0m \u001b[31m38.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m524.1/524.1 MB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m74.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m85.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m36.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m75.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m781.3/781.3 kB\u001b[0m \u001b[31m51.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m84.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m82.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m588.3/588.3 MB\u001b[0m \u001b[31m1.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m74.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m56.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m90.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m439.2/439.2 kB\u001b[0m \u001b[31m32.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m89.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m10.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m13.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m16.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.8/83.8 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m40.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m41.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m451.2/451.2 kB\u001b[0m \u001b[31m38.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m79.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m29.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m77.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m68.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.1/13.1 MB\u001b[0m \u001b[31m75.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+ "pandas-gbq 0.19.2 requires google-auth-oauthlib>=0.7.0, but you have google-auth-oauthlib 0.4.6 which is incompatible.\n",
+ "tensorflow-datasets 4.9.4 requires protobuf>=3.20, but you have protobuf 3.19.6 which is incompatible.\n",
+ "tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 3.19.6 which is incompatible.\u001b[0m\u001b[31m\n",
+ "\u001b[0m"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum tensorflow"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
+ "- We'll use ['deepset/xlm-roberta-base-squad2'](https://huggingface.co/'deepset/xlm-roberta-base-squad2') model from HuggingFace as an example as an example and load it as a `ORTModelForQuestionAnswering`, representing an ONNX model."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "u1i6TpsutbDl"
- },
- "source": [
- "## Import ONNX XlmRoBertaForQuestionAnswering models from HuggingFace 🤗 into Spark NLP 🚀\n",
- "\n",
- "Let's keep in mind a few things before we start 😊\n",
- "\n",
- "- ONNX support was introduced in `Spark NLP 5.0.0`, enabling high performance inference for models.\n",
- "- `XlmRoBertaForQuestionAnswering` is only available since in `Spark NLP 5.2.3` and after. So please make sure you have upgraded to the latest Spark NLP release\n",
- "- You can import XLM-RoBERTa models trained/fine-tuned for question answering via `XlmRoBertaForQuestionAnswering` or `TFXlmRoBertaForQuestionAnswering`. These models are usually under `Question Answering` category and have `xlm-roberta` in their labels\n",
- "- Reference: [TFXlmRoBertaForQuestionAnswering](https://huggingface.co/docs/transformers/model_doc/xlm-roberta#transformers.TFXLMRobertaForQuestionAnswering)\n",
- "- Some [example models](https://huggingface.co/models?filter=xlm-roberta&pipeline_tag=question-answering)"
- ]
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n",
+ "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
+ "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
+ "You will be able to reuse this secret in all of your notebooks.\n",
+ "Please note that authentication is recommended but still optional to access public models or datasets.\n",
+ " warnings.warn(\n"
+ ]
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "tikYI59NtbDl"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "b8d926231122407f95b4483350bc4e8e",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "## Export and Save HuggingFace model"
+ "text/plain": [
+ "config.json: 0%| | 0.00/605 [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "fHfLHo2CtbDl"
- },
- "source": [
- "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n",
- "- Albert uses SentencePiece, so we will have to install that as well"
- ]
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Framework not specified. Using pt to export to ONNX.\n"
+ ]
},
{
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "id": "BtaSwj8mtbDl",
- "outputId": "42f0e775-573e-4260-a696-48a25bedc212",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "7f808235cbfd4ec28e05215cbd27e3f8",
+ "version_major": 2,
+ "version_minor": 0
},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.1/7.1 MB\u001b[0m \u001b[31m15.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m403.3/403.3 kB\u001b[0m \u001b[31m29.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m475.2/475.2 MB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m43.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.7/455.7 kB\u001b[0m \u001b[31m38.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m63.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m23.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m507.1/507.1 kB\u001b[0m \u001b[31m43.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.7/15.7 MB\u001b[0m \u001b[31m70.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m69.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m84.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m87.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m37.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m87.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m51.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m93.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m77.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m81.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m42.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m17.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m76.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m58.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m475.2/475.2 MB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.9/489.9 MB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m83.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m40.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m68.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m94.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.8/489.8 MB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m479.7/479.7 MB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m68.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m87.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.8/440.8 kB\u001b[0m \u001b[31m38.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m524.1/524.1 MB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m74.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m85.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m36.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m75.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m781.3/781.3 kB\u001b[0m \u001b[31m51.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m84.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m82.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m588.3/588.3 MB\u001b[0m \u001b[31m1.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m74.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m56.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m90.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m439.2/439.2 kB\u001b[0m \u001b[31m32.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m89.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m10.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m13.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m16.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.8/83.8 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m40.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m41.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m451.2/451.2 kB\u001b[0m \u001b[31m38.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m79.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m29.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m77.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m68.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.1/13.1 MB\u001b[0m \u001b[31m75.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
- "pandas-gbq 0.19.2 requires google-auth-oauthlib>=0.7.0, but you have google-auth-oauthlib 0.4.6 which is incompatible.\n",
- "tensorflow-datasets 4.9.4 requires protobuf>=3.20, but you have protobuf 3.19.6 which is incompatible.\n",
- "tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 3.19.6 which is incompatible.\u001b[0m\u001b[31m\n",
- "\u001b[0m"
- ]
- }
- ],
- "source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum tensorflow"
+ "text/plain": [
+ "model.safetensors: 0%| | 0.00/1.11G [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "oOUudG_-tbDm"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "54210209428948189c549c1d2cd939ca",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
- "- We'll use ['deepset/xlm-roberta-base-squad2'](https://huggingface.co/'deepset/xlm-roberta-base-squad2') model from HuggingFace as an example as an example and load it as a `ORTModelForQuestionAnswering`, representing an ONNX model."
+ "text/plain": [
+ "tokenizer_config.json: 0%| | 0.00/79.0 [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "id": "0MbeB8jutbDm",
- "outputId": "f0f57344-6fc4-4d0b-a019-f05ae56b368c",
- "colab": {
- "referenced_widgets": [
- "b8d926231122407f95b4483350bc4e8e",
- "4154781b221948aab4258b4fa6799996",
- "2d9727f78a41430890ec77cce4fe0ce5",
- "17ca1b81af0f408e9ab164456872cd49",
- "6885319026334ff99533e70c8670baea",
- "6566341fd0c04880a7ab5ff1409e4448",
- "4119ae23b5bc4a0c9148e60eb9dfcb53",
- "88166d6e4ee04f11a79c2a1a532c7300",
- "130c539288884b3aa341f9be6c62d29a",
- "032d809a9bb64c9b863f6f4b7b115133",
- "c9bb2353da02443c94afd06685b8cde8",
- "7f808235cbfd4ec28e05215cbd27e3f8",
- "10e1aa374ebd43a9b083d9b7aab95b23",
- "a5844989c8f648df82ee6414eeec9a21",
- "d02f4f39378b4e568f0f9382efd6a9d1",
- "76f48ff667754dd682ade56eaaca7049",
- "f251ac2cd1ed45d4a7d25d6b906a84bc",
- "824bc6d195ba465497fe34898880a7f8",
- "05d85f70379b486db128698df001384e",
- "7c620665c5964164adaf3539a0cb5ff5",
- "f708d4e670c54cfe93d9ba82818590dc",
- "0be61c031fad4f31877ad29119b6a77e",
- "54210209428948189c549c1d2cd939ca",
- "e77456b2ef2a46ef99572a02e1b70817",
- "3fbcd89672f842bf9692ebffd72d26c7",
- "f87349ba371f41d788f802a64f9312f9",
- "c217c65b12264609862aeaeb1c25adc0",
- "2bef4b3dc3cd457fa4ccb2e796d771f7",
- "240bdca5cc794ae8b1319310e42aec2c",
- "389d3448663f4389afa71734e6cc1434",
- "937847a9252247979ed76e17dc60f7b6",
- "cb0069caad574477866382fa085508d5",
- "1f3d6a7de82f4710a79d814b6af57679",
- "5473ab4a957b414aa344f37f921d89a2",
- "423cf810d1254518af9e4fc79f97e54e",
- "83c2346cea5c4928a23f4fa38d2ffe40",
- "e2fde33a78ff4172b806b1530bda4e9a",
- "d1b3fb0032ce400c80922d128940333c",
- "96b57a70103f4ac9b014b375e1aee824",
- "74b28f16de69498b9915e501863b4d73",
- "8338b89888584db5944d0ad5baa9118f",
- "8baaa8a868ec4e0b98388e565d61a3db",
- "9c1d4b12b247412295fe57637f7d2e60",
- "ac6ad6631b054107b3287180cf3b9e68",
- "8c36c9eda216474d86fe73c161690a6d",
- "14d951a930d5478b9f4e77c430c2bb6f",
- "f6ae4d642a9f41cd829dde04347e50a3",
- "969548659ca74527afa6a0f92e3a98e7",
- "7f90a349f8ce4525aa33649d70c09a33",
- "80fb8200d6d74cbca19ce946d73d9efa",
- "67688fa958004ccfbacf0288f4075dea",
- "fa8d4ffdee2d454cb14941089ff881d6",
- "2577bab4d4a64010addd7c2b5370a6c8",
- "01f9a013ab38435191872a8bce64dd69",
- "0d91f57bfa25489eb70a23f2bd834cdf"
- ],
- "base_uri": "https://localhost:8080/",
- "height": 405
- }
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "5473ab4a957b414aa344f37f921d89a2",
+ "version_major": 2,
+ "version_minor": 0
},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n",
- "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
- "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
- "You will be able to reuse this secret in all of your notebooks.\n",
- "Please note that authentication is recommended but still optional to access public models or datasets.\n",
- " warnings.warn(\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "config.json: 0%| | 0.00/605 [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "b8d926231122407f95b4483350bc4e8e"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "Framework not specified. Using pt to export to ONNX.\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "model.safetensors: 0%| | 0.00/1.11G [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "7f808235cbfd4ec28e05215cbd27e3f8"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "tokenizer_config.json: 0%| | 0.00/79.0 [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "54210209428948189c549c1d2cd939ca"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "sentencepiece.bpe.model: 0%| | 0.00/5.07M [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "5473ab4a957b414aa344f37f921d89a2"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "special_tokens_map.json: 0%| | 0.00/150 [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "8c36c9eda216474d86fe73c161690a6d"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "Using the export variant default. Available variants are:\n",
- " - default: The default ONNX variant.\n",
- "Using framework PyTorch: 2.1.0+cu121\n",
- "Overriding 1 configuration item(s)\n",
- "\t- use_cache -> False\n"
- ]
- }
- ],
- "source": [
- "from optimum.onnxruntime import ORTModelForQuestionAnswering\n",
- "import tensorflow as tf\n",
- "\n",
- "MODEL_NAME = 'deepset/xlm-roberta-base-squad2'\n",
- "ONNX_MODEL = f\"onnx_models/{MODEL_NAME}\"\n",
- "\n",
- "ort_model = ORTModelForQuestionAnswering.from_pretrained(MODEL_NAME, export=True)\n",
- "\n",
- "# Save the ONNX model\n",
- "ort_model.save_pretrained(ONNX_MODEL)"
+ "text/plain": [
+ "sentencepiece.bpe.model: 0%| | 0.00/5.07M [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "CKGIfc8ltbDn"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "8c36c9eda216474d86fe73c161690a6d",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "Let's have a look inside this directory and see what we are dealing with:"
+ "text/plain": [
+ "special_tokens_map.json: 0%| | 0.00/150 [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {
- "id": "eoyOGvXftbDn",
- "outputId": "3d061001-3161-4594-f681-e88211f4e796",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "total 1105736\n",
- "-rw-r--r-- 1 root root 787 Jan 9 19:44 config.json\n",
- "-rw-r--r-- 1 root root 1110100056 Jan 9 19:44 model.onnx\n",
- "-rw-r--r-- 1 root root 5069051 Jan 9 19:44 sentencepiece.bpe.model\n",
- "-rw-r--r-- 1 root root 167 Jan 9 19:44 special_tokens_map.json\n",
- "-rw-r--r-- 1 root root 500 Jan 9 19:44 tokenizer_config.json\n",
- "-rw-r--r-- 1 root root 17082730 Jan 9 19:44 tokenizer.json\n"
- ]
- }
- ],
- "source": [
- "!ls -l {ONNX_MODEL}"
- ]
- },
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Using the export variant default. Available variants are:\n",
+ " - default: The default ONNX variant.\n",
+ "Using framework PyTorch: 2.1.0+cu121\n",
+ "Overriding 1 configuration item(s)\n",
+ "\t- use_cache -> False\n"
+ ]
+ }
+ ],
+ "source": [
+ "from optimum.onnxruntime import ORTModelForQuestionAnswering\n",
+ "import tensorflow as tf\n",
+ "\n",
+ "MODEL_NAME = 'deepset/xlm-roberta-base-squad2'\n",
+ "ONNX_MODEL = f\"onnx_models/{MODEL_NAME}\"\n",
+ "\n",
+ "ort_model = ORTModelForQuestionAnswering.from_pretrained(MODEL_NAME, export=True)\n",
+ "\n",
+ "# Save the ONNX model\n",
+ "ort_model.save_pretrained(ONNX_MODEL)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's have a look inside this directory and see what we are dealing with:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "blR5qjXwtbDn"
- },
- "source": [
- "- As you can see, we need to move `sentencepiece.bpe.model` from the tokenizer to `assets` folder which Spark NLP will look for"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "total 1105736\n",
+ "-rw-r--r-- 1 root root 787 Jan 9 19:44 config.json\n",
+ "-rw-r--r-- 1 root root 1110100056 Jan 9 19:44 model.onnx\n",
+ "-rw-r--r-- 1 root root 5069051 Jan 9 19:44 sentencepiece.bpe.model\n",
+ "-rw-r--r-- 1 root root 167 Jan 9 19:44 special_tokens_map.json\n",
+ "-rw-r--r-- 1 root root 500 Jan 9 19:44 tokenizer_config.json\n",
+ "-rw-r--r-- 1 root root 17082730 Jan 9 19:44 tokenizer.json\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls -l {ONNX_MODEL}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- As you can see, we need to move `sentencepiece.bpe.model` from the tokenizer to `assets` folder which Spark NLP will look for"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mkdir {ONNX_MODEL}/assets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mv {ONNX_MODEL}/sentencepiece.bpe.model {ONNX_MODEL}/assets"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Voila! We have our `sentencepiece.bpe.model` inside assets directory"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {
- "id": "QEOomAeKtbDn"
- },
- "outputs": [],
- "source": [
- "!mkdir {ONNX_MODEL}/assets"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "onnx_models/deepset/xlm-roberta-base-squad2:\n",
+ "total 1100788\n",
+ "drwxr-xr-x 2 root root 4096 Jan 9 19:44 assets\n",
+ "-rw-r--r-- 1 root root 787 Jan 9 19:44 config.json\n",
+ "-rw-r--r-- 1 root root 1110100056 Jan 9 19:44 model.onnx\n",
+ "-rw-r--r-- 1 root root 167 Jan 9 19:44 special_tokens_map.json\n",
+ "-rw-r--r-- 1 root root 500 Jan 9 19:44 tokenizer_config.json\n",
+ "-rw-r--r-- 1 root root 17082730 Jan 9 19:44 tokenizer.json\n",
+ "\n",
+ "onnx_models/deepset/xlm-roberta-base-squad2/assets:\n",
+ "total 4952\n",
+ "-rw-r--r-- 1 root root 5069051 Jan 9 19:44 sentencepiece.bpe.model\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls -lR {ONNX_MODEL}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Import and Save XlmRoBertaForQuestionAnswering in Spark NLP\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's install and setup Spark NLP in Google Colab\n",
+ "- This part is pretty easy via our simple script"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {
- "id": "5wjZ8w19tbDn"
- },
- "outputs": [],
- "source": [
- "!mv {ONNX_MODEL}/sentencepiece.bpe.model {ONNX_MODEL}/assets"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Installing PySpark 3.2.3 and Spark NLP 5.1.3\n",
+ "setup Colab for PySpark 3.2.3 and Spark NLP 5.1.3\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 MB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m537.5/537.5 kB\u001b[0m \u001b[31m40.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.7/199.7 kB\u001b[0m \u001b[31m23.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
+ ]
+ }
+ ],
+ "source": [
+ "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's start Spark with Spark NLP included via our simple `start()` function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "ehhwZp5ntbDn"
- },
- "source": [
- "Voila! We have our `sentencepiece.bpe.model` inside assets directory"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Apache Spark version: 3.2.3\n"
+ ]
+ }
+ ],
+ "source": [
+ "import sparknlp\n",
+ "# let's start Spark with Spark NLP\n",
+ "spark = sparknlp.start()\n",
+ "print(\"Apache Spark version: {}\".format(spark.version))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's use `loadSavedModel` functon in `RoBertaForQuestionAnswering` which allows us to load TensorFlow model in SavedModel format\n",
+ "- Most params can be set later when you are loading this model in `RoBertaForQuestionAnswering` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n",
+ "- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n",
+ "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sparknlp.annotator import *\n",
+ "from sparknlp.base import *\n",
+ "\n",
+ "spanClassifier = XlmRoBertaForQuestionAnswering.loadSavedModel(\n",
+ " ONNX_MODEL,\n",
+ " spark\n",
+ " )\\\n",
+ " .setInputCols([\"document_question\",'document_context'])\\\n",
+ " .setOutputCol(\"answer\")\\\n",
+ " .setCaseSensitive(True)\\\n",
+ " .setMaxSentenceLength(512)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "spanClassifier.write().overwrite().save(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's clean up stuff we don't need anymore"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!rm -rf {ONNX_MODEL}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Awesome 😎 !\n",
+ "\n",
+ "This is your XlmRoBertaForQuestionAnswering model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {
- "id": "s7B5nkQ7tbDn",
- "outputId": "d5a9f508-f04c-4281-b99e-a74ce6c8c153",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "onnx_models/deepset/xlm-roberta-base-squad2:\n",
- "total 1100788\n",
- "drwxr-xr-x 2 root root 4096 Jan 9 19:44 assets\n",
- "-rw-r--r-- 1 root root 787 Jan 9 19:44 config.json\n",
- "-rw-r--r-- 1 root root 1110100056 Jan 9 19:44 model.onnx\n",
- "-rw-r--r-- 1 root root 167 Jan 9 19:44 special_tokens_map.json\n",
- "-rw-r--r-- 1 root root 500 Jan 9 19:44 tokenizer_config.json\n",
- "-rw-r--r-- 1 root root 17082730 Jan 9 19:44 tokenizer.json\n",
- "\n",
- "onnx_models/deepset/xlm-roberta-base-squad2/assets:\n",
- "total 4952\n",
- "-rw-r--r-- 1 root root 5069051 Jan 9 19:44 sentencepiece.bpe.model\n"
- ]
- }
- ],
- "source": [
- "!ls -lR {ONNX_MODEL}"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "total 484956\n",
+ "drwxr-xr-x 4 root root 4096 Oct 17 16:49 fields\n",
+ "drwxr-xr-x 2 root root 4096 Oct 17 16:49 metadata\n",
+ "-rw-r--r-- 1 root root 496583922 Oct 17 16:49 roberta_classification_onnx\n"
+ ]
+ }
+ ],
+ "source": [
+ "! ls -l {ONNX_MODEL}_spark_nlp_onnx"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny XlmRoBertaForQuestionAnswering model in Spark NLP 🚀 pipeline!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "bVKi9X6ftbDn"
- },
- "source": [
- "## Import and Save RoBertaForQuestionAnswering in Spark NLP\n"
- ]
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "+---------------------------+\n",
+ "|result |\n",
+ "+---------------------------+\n",
+ "|[as Amazonia or the Amazon]|\n",
+ "+---------------------------+\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "document_assembler = MultiDocumentAssembler() \\\n",
+ " .setInputCols([\"question\", \"context\"]) \\\n",
+ " .setOutputCols([\"document_question\", \"document_context\"])\n",
+ "\n",
+ "spanClassifier_loaded = XlmRoBertaForQuestionAnswering.load(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))\\\n",
+ " .setInputCols([\"document_question\",'document_context'])\\\n",
+ " .setOutputCol(\"answer\")\n",
+ "\n",
+ "pipeline = Pipeline().setStages([\n",
+ " document_assembler,\n",
+ " spanClassifier_loaded\n",
+ "])\n",
+ "\n",
+ "context = \"\"\"The Amazon rainforest (Portuguese: Floresta Amazônica or Amazônia; Spanish: Selva Amazónica, Amazonía or usually Amazonia; French: Forêt amazonienne; Dutch: Amazoneregenwoud), also known in English as Amazonia or the Amazon Jungle, is a moist broadleaf forest that covers most of the Amazon basin of South America. This basin encompasses 7,000,000 square kilometres (2,700,000 sq mi), of which 5,500,000 square kilometres (2,100,000 sq mi) are covered by the rainforest. This region includes territory belonging to nine nations. The majority of the forest is contained within Brazil, with 60% of the rainforest, followed by Peru with 13%, Colombia with 10%, and with minor amounts in Venezuela, Ecuador, Bolivia, Guyana, Suriname and French Guiana. States or departments in four nations contain \"Amazonas\" in their names. The Amazon represents over half of the planet's remaining rainforests, and comprises the largest and most biodiverse tract of tropical rainforest in the world, with an estimated 390 billion individual trees divided into 16,000 species.\"\"\"\n",
+ "question = \"Which name is also used to describe the Amazon rainforest in English?\"\n",
+ "example = spark.createDataFrame([[question, context]]).toDF(\"question\", \"context\")\n",
+ "result = pipeline.fit(example).transform(example)\n",
+ "\n",
+ "result.select(\"answer.result\").show(1, False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "That's it! You can now go wild and use hundreds of `RoBertaForQuestionAnswering` models from HuggingFace 🤗 in Spark NLP 🚀\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "gpuType": "T4",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "01f9a013ab38435191872a8bce64dd69": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "be7jTIVAtbDo"
- },
- "source": [
- "- Let's install and setup Spark NLP in Google Colab\n",
- "- This part is pretty easy via our simple script"
- ]
+ "032d809a9bb64c9b863f6f4b7b115133": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "R_YAIBS_tbDo",
- "outputId": "7506fbe6-aa72-4697-ae19-5ab7ae404f18"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Installing PySpark 3.2.3 and Spark NLP 5.1.3\n",
- "setup Colab for PySpark 3.2.3 and Spark NLP 5.1.3\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 MB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m537.5/537.5 kB\u001b[0m \u001b[31m40.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.7/199.7 kB\u001b[0m \u001b[31m23.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
- ]
- }
+ "05d85f70379b486db128698df001384e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "0be61c031fad4f31877ad29119b6a77e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "0d91f57bfa25489eb70a23f2bd834cdf": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "10e1aa374ebd43a9b083d9b7aab95b23": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_f251ac2cd1ed45d4a7d25d6b906a84bc",
+ "placeholder": "",
+ "style": "IPY_MODEL_824bc6d195ba465497fe34898880a7f8",
+ "value": "model.safetensors: 100%"
+ }
+ },
+ "130c539288884b3aa341f9be6c62d29a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "14d951a930d5478b9f4e77c430c2bb6f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_80fb8200d6d74cbca19ce946d73d9efa",
+ "placeholder": "",
+ "style": "IPY_MODEL_67688fa958004ccfbacf0288f4075dea",
+ "value": "special_tokens_map.json: 100%"
+ }
+ },
+ "17ca1b81af0f408e9ab164456872cd49": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_032d809a9bb64c9b863f6f4b7b115133",
+ "placeholder": "",
+ "style": "IPY_MODEL_c9bb2353da02443c94afd06685b8cde8",
+ "value": " 605/605 [00:00<00:00, 24.9kB/s]"
+ }
+ },
+ "1f3d6a7de82f4710a79d814b6af57679": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "240bdca5cc794ae8b1319310e42aec2c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "2577bab4d4a64010addd7c2b5370a6c8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "2bef4b3dc3cd457fa4ccb2e796d771f7": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2d9727f78a41430890ec77cce4fe0ce5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_88166d6e4ee04f11a79c2a1a532c7300",
+ "max": 605,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_130c539288884b3aa341f9be6c62d29a",
+ "value": 605
+ }
+ },
+ "389d3448663f4389afa71734e6cc1434": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "3fbcd89672f842bf9692ebffd72d26c7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_389d3448663f4389afa71734e6cc1434",
+ "max": 79,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_937847a9252247979ed76e17dc60f7b6",
+ "value": 79
+ }
+ },
+ "4119ae23b5bc4a0c9148e60eb9dfcb53": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "4154781b221948aab4258b4fa6799996": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_6566341fd0c04880a7ab5ff1409e4448",
+ "placeholder": "",
+ "style": "IPY_MODEL_4119ae23b5bc4a0c9148e60eb9dfcb53",
+ "value": "config.json: 100%"
+ }
+ },
+ "423cf810d1254518af9e4fc79f97e54e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_96b57a70103f4ac9b014b375e1aee824",
+ "placeholder": "",
+ "style": "IPY_MODEL_74b28f16de69498b9915e501863b4d73",
+ "value": "sentencepiece.bpe.model: 100%"
+ }
+ },
+ "54210209428948189c549c1d2cd939ca": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_e77456b2ef2a46ef99572a02e1b70817",
+ "IPY_MODEL_3fbcd89672f842bf9692ebffd72d26c7",
+ "IPY_MODEL_f87349ba371f41d788f802a64f9312f9"
],
- "source": [
- "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"
- ]
+ "layout": "IPY_MODEL_c217c65b12264609862aeaeb1c25adc0"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "5MD6ogjatbDo"
- },
- "source": [
- "Let's start Spark with Spark NLP included via our simple `start()` function"
- ]
+ "5473ab4a957b414aa344f37f921d89a2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_423cf810d1254518af9e4fc79f97e54e",
+ "IPY_MODEL_83c2346cea5c4928a23f4fa38d2ffe40",
+ "IPY_MODEL_e2fde33a78ff4172b806b1530bda4e9a"
+ ],
+ "layout": "IPY_MODEL_d1b3fb0032ce400c80922d128940333c"
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "kcTBCppJtbDo",
- "outputId": "379c7e82-9918-4294-b20b-d0c45215febf"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Apache Spark version: 3.2.3\n"
- ]
- }
+ "6566341fd0c04880a7ab5ff1409e4448": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "67688fa958004ccfbacf0288f4075dea": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "6885319026334ff99533e70c8670baea": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "74b28f16de69498b9915e501863b4d73": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "76f48ff667754dd682ade56eaaca7049": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "7c620665c5964164adaf3539a0cb5ff5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "7f808235cbfd4ec28e05215cbd27e3f8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_10e1aa374ebd43a9b083d9b7aab95b23",
+ "IPY_MODEL_a5844989c8f648df82ee6414eeec9a21",
+ "IPY_MODEL_d02f4f39378b4e568f0f9382efd6a9d1"
],
- "source": [
- "import sparknlp\n",
- "# let's start Spark with Spark NLP\n",
- "spark = sparknlp.start()\n",
- "print(\"Apache Spark version: {}\".format(spark.version))"
- ]
+ "layout": "IPY_MODEL_76f48ff667754dd682ade56eaaca7049"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "k3S-0O9btbDo"
- },
- "source": [
- "- Let's use `loadSavedModel` functon in `RoBertaForQuestionAnswering` which allows us to load TensorFlow model in SavedModel format\n",
- "- Most params can be set later when you are loading this model in `RoBertaForQuestionAnswering` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n",
- "- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n",
- "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.\n",
- "\n"
- ]
+ "7f90a349f8ce4525aa33649d70c09a33": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "Gsnk6JQ7tbDo"
- },
- "outputs": [],
- "source": [
- "from sparknlp.annotator import *\n",
- "from sparknlp.base import *\n",
- "\n",
- "spanClassifier = RoBertaForQuestionAnswering.loadSavedModel(\n",
- " ONNX_MODEL,\n",
- " spark\n",
- " )\\\n",
- " .setInputCols([\"document_question\",'document_context'])\\\n",
- " .setOutputCol(\"answer\")\\\n",
- " .setCaseSensitive(True)\\\n",
- " .setMaxSentenceLength(512)"
- ]
+ "80fb8200d6d74cbca19ce946d73d9efa": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "3ed2WScitbDo"
- },
- "source": [
- "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
- ]
+ "824bc6d195ba465497fe34898880a7f8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "Gy7YzF0htbDo"
- },
- "outputs": [],
- "source": [
- "spanClassifier.write().overwrite().save(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))"
- ]
+ "8338b89888584db5944d0ad5baa9118f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "1p0HFM4atbDo"
- },
- "source": [
- "Let's clean up stuff we don't need anymore"
- ]
+ "83c2346cea5c4928a23f4fa38d2ffe40": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_8338b89888584db5944d0ad5baa9118f",
+ "max": 5069051,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_8baaa8a868ec4e0b98388e565d61a3db",
+ "value": 5069051
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "RvkyiLHotbDo"
- },
- "outputs": [],
- "source": [
- "!rm -rf {ONNX_MODEL}"
- ]
+ "88166d6e4ee04f11a79c2a1a532c7300": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "xiNxN0tdtbDo"
- },
- "source": [
- "Awesome 😎 !\n",
- "\n",
- "This is your RoBertaForQuestionAnswering model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀"
- ]
+ "8baaa8a868ec4e0b98388e565d61a3db": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "Kzym6Y90tbDo",
- "outputId": "b3f2deb2-be48-4eac-e747-472ec58d6873"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "total 484956\n",
- "drwxr-xr-x 4 root root 4096 Oct 17 16:49 fields\n",
- "drwxr-xr-x 2 root root 4096 Oct 17 16:49 metadata\n",
- "-rw-r--r-- 1 root root 496583922 Oct 17 16:49 roberta_classification_onnx\n"
- ]
- }
+ "8c36c9eda216474d86fe73c161690a6d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_14d951a930d5478b9f4e77c430c2bb6f",
+ "IPY_MODEL_f6ae4d642a9f41cd829dde04347e50a3",
+ "IPY_MODEL_969548659ca74527afa6a0f92e3a98e7"
],
- "source": [
- "! ls -l {ONNX_MODEL}_spark_nlp_onnx"
- ]
+ "layout": "IPY_MODEL_7f90a349f8ce4525aa33649d70c09a33"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "m2NiO3hytbDo"
- },
- "source": [
- "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny RoBertaForQuestionAnswering model in Spark NLP 🚀 pipeline!"
- ]
+ "937847a9252247979ed76e17dc60f7b6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "BmIIrsGctbDp",
- "outputId": "44f84743-6908-4143-87b6-244aae258115"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "+---------------------------+\n",
- "|result |\n",
- "+---------------------------+\n",
- "|[as Amazonia or the Amazon]|\n",
- "+---------------------------+\n",
- "\n"
- ]
- }
+ "969548659ca74527afa6a0f92e3a98e7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_01f9a013ab38435191872a8bce64dd69",
+ "placeholder": "",
+ "style": "IPY_MODEL_0d91f57bfa25489eb70a23f2bd834cdf",
+ "value": " 150/150 [00:00<00:00, 10.5kB/s]"
+ }
+ },
+ "96b57a70103f4ac9b014b375e1aee824": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "9c1d4b12b247412295fe57637f7d2e60": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a5844989c8f648df82ee6414eeec9a21": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_05d85f70379b486db128698df001384e",
+ "max": 1109846632,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_7c620665c5964164adaf3539a0cb5ff5",
+ "value": 1109846632
+ }
+ },
+ "ac6ad6631b054107b3287180cf3b9e68": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "b8d926231122407f95b4483350bc4e8e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_4154781b221948aab4258b4fa6799996",
+ "IPY_MODEL_2d9727f78a41430890ec77cce4fe0ce5",
+ "IPY_MODEL_17ca1b81af0f408e9ab164456872cd49"
],
- "source": [
- "document_assembler = MultiDocumentAssembler() \\\n",
- " .setInputCols([\"question\", \"context\"]) \\\n",
- " .setOutputCols([\"document_question\", \"document_context\"])\n",
- "\n",
- "spanClassifier_loaded = RoBertaForQuestionAnswering.load(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))\\\n",
- " .setInputCols([\"document_question\",'document_context'])\\\n",
- " .setOutputCol(\"answer\")\n",
- "\n",
- "pipeline = Pipeline().setStages([\n",
- " document_assembler,\n",
- " spanClassifier_loaded\n",
- "])\n",
- "\n",
- "context = \"\"\"The Amazon rainforest (Portuguese: Floresta Amazônica or Amazônia; Spanish: Selva Amazónica, Amazonía or usually Amazonia; French: Forêt amazonienne; Dutch: Amazoneregenwoud), also known in English as Amazonia or the Amazon Jungle, is a moist broadleaf forest that covers most of the Amazon basin of South America. This basin encompasses 7,000,000 square kilometres (2,700,000 sq mi), of which 5,500,000 square kilometres (2,100,000 sq mi) are covered by the rainforest. This region includes territory belonging to nine nations. The majority of the forest is contained within Brazil, with 60% of the rainforest, followed by Peru with 13%, Colombia with 10%, and with minor amounts in Venezuela, Ecuador, Bolivia, Guyana, Suriname and French Guiana. States or departments in four nations contain \"Amazonas\" in their names. The Amazon represents over half of the planet's remaining rainforests, and comprises the largest and most biodiverse tract of tropical rainforest in the world, with an estimated 390 billion individual trees divided into 16,000 species.\"\"\"\n",
- "question = \"Which name is also used to describe the Amazon rainforest in English?\"\n",
- "example = spark.createDataFrame([[question, context]]).toDF(\"question\", \"context\")\n",
- "result = pipeline.fit(example).transform(example)\n",
- "\n",
- "result.select(\"answer.result\").show(1, False)"
- ]
+ "layout": "IPY_MODEL_6885319026334ff99533e70c8670baea"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "M5L0cHZptbDp"
- },
- "source": [
- "That's it! You can now go wild and use hundreds of `RoBertaForQuestionAnswering` models from HuggingFace 🤗 in Spark NLP 🚀\n"
- ]
- }
- ],
- "metadata": {
- "accelerator": "GPU",
- "colab": {
- "gpuType": "T4",
- "provenance": []
+ "c217c65b12264609862aeaeb1c25adc0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
+ "c9bb2353da02443c94afd06685b8cde8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3"
+ "cb0069caad574477866382fa085508d5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- "widgets": {
- "application/vnd.jupyter.widget-state+json": {
- "b8d926231122407f95b4483350bc4e8e": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_4154781b221948aab4258b4fa6799996",
- "IPY_MODEL_2d9727f78a41430890ec77cce4fe0ce5",
- "IPY_MODEL_17ca1b81af0f408e9ab164456872cd49"
- ],
- "layout": "IPY_MODEL_6885319026334ff99533e70c8670baea"
- }
- },
- "4154781b221948aab4258b4fa6799996": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_6566341fd0c04880a7ab5ff1409e4448",
- "placeholder": "",
- "style": "IPY_MODEL_4119ae23b5bc4a0c9148e60eb9dfcb53",
- "value": "config.json: 100%"
- }
- },
- "2d9727f78a41430890ec77cce4fe0ce5": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_88166d6e4ee04f11a79c2a1a532c7300",
- "max": 605,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_130c539288884b3aa341f9be6c62d29a",
- "value": 605
- }
- },
- "17ca1b81af0f408e9ab164456872cd49": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_032d809a9bb64c9b863f6f4b7b115133",
- "placeholder": "",
- "style": "IPY_MODEL_c9bb2353da02443c94afd06685b8cde8",
- "value": " 605/605 [00:00<00:00, 24.9kB/s]"
- }
- },
- "6885319026334ff99533e70c8670baea": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "6566341fd0c04880a7ab5ff1409e4448": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "4119ae23b5bc4a0c9148e60eb9dfcb53": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "88166d6e4ee04f11a79c2a1a532c7300": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "130c539288884b3aa341f9be6c62d29a": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "032d809a9bb64c9b863f6f4b7b115133": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "c9bb2353da02443c94afd06685b8cde8": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "7f808235cbfd4ec28e05215cbd27e3f8": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_10e1aa374ebd43a9b083d9b7aab95b23",
- "IPY_MODEL_a5844989c8f648df82ee6414eeec9a21",
- "IPY_MODEL_d02f4f39378b4e568f0f9382efd6a9d1"
- ],
- "layout": "IPY_MODEL_76f48ff667754dd682ade56eaaca7049"
- }
- },
- "10e1aa374ebd43a9b083d9b7aab95b23": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_f251ac2cd1ed45d4a7d25d6b906a84bc",
- "placeholder": "",
- "style": "IPY_MODEL_824bc6d195ba465497fe34898880a7f8",
- "value": "model.safetensors: 100%"
- }
- },
- "a5844989c8f648df82ee6414eeec9a21": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_05d85f70379b486db128698df001384e",
- "max": 1109846632,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_7c620665c5964164adaf3539a0cb5ff5",
- "value": 1109846632
- }
- },
- "d02f4f39378b4e568f0f9382efd6a9d1": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_f708d4e670c54cfe93d9ba82818590dc",
- "placeholder": "",
- "style": "IPY_MODEL_0be61c031fad4f31877ad29119b6a77e",
- "value": " 1.11G/1.11G [00:22<00:00, 52.5MB/s]"
- }
- },
- "76f48ff667754dd682ade56eaaca7049": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "f251ac2cd1ed45d4a7d25d6b906a84bc": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "824bc6d195ba465497fe34898880a7f8": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "05d85f70379b486db128698df001384e": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "7c620665c5964164adaf3539a0cb5ff5": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "f708d4e670c54cfe93d9ba82818590dc": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "0be61c031fad4f31877ad29119b6a77e": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "54210209428948189c549c1d2cd939ca": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_e77456b2ef2a46ef99572a02e1b70817",
- "IPY_MODEL_3fbcd89672f842bf9692ebffd72d26c7",
- "IPY_MODEL_f87349ba371f41d788f802a64f9312f9"
- ],
- "layout": "IPY_MODEL_c217c65b12264609862aeaeb1c25adc0"
- }
- },
- "e77456b2ef2a46ef99572a02e1b70817": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_2bef4b3dc3cd457fa4ccb2e796d771f7",
- "placeholder": "",
- "style": "IPY_MODEL_240bdca5cc794ae8b1319310e42aec2c",
- "value": "tokenizer_config.json: 100%"
- }
- },
- "3fbcd89672f842bf9692ebffd72d26c7": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_389d3448663f4389afa71734e6cc1434",
- "max": 79,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_937847a9252247979ed76e17dc60f7b6",
- "value": 79
- }
- },
- "f87349ba371f41d788f802a64f9312f9": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_cb0069caad574477866382fa085508d5",
- "placeholder": "",
- "style": "IPY_MODEL_1f3d6a7de82f4710a79d814b6af57679",
- "value": " 79.0/79.0 [00:00<00:00, 3.95kB/s]"
- }
- },
- "c217c65b12264609862aeaeb1c25adc0": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "2bef4b3dc3cd457fa4ccb2e796d771f7": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "240bdca5cc794ae8b1319310e42aec2c": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "389d3448663f4389afa71734e6cc1434": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "937847a9252247979ed76e17dc60f7b6": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "cb0069caad574477866382fa085508d5": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "1f3d6a7de82f4710a79d814b6af57679": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "5473ab4a957b414aa344f37f921d89a2": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_423cf810d1254518af9e4fc79f97e54e",
- "IPY_MODEL_83c2346cea5c4928a23f4fa38d2ffe40",
- "IPY_MODEL_e2fde33a78ff4172b806b1530bda4e9a"
- ],
- "layout": "IPY_MODEL_d1b3fb0032ce400c80922d128940333c"
- }
- },
- "423cf810d1254518af9e4fc79f97e54e": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_96b57a70103f4ac9b014b375e1aee824",
- "placeholder": "",
- "style": "IPY_MODEL_74b28f16de69498b9915e501863b4d73",
- "value": "sentencepiece.bpe.model: 100%"
- }
- },
- "83c2346cea5c4928a23f4fa38d2ffe40": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_8338b89888584db5944d0ad5baa9118f",
- "max": 5069051,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_8baaa8a868ec4e0b98388e565d61a3db",
- "value": 5069051
- }
- },
- "e2fde33a78ff4172b806b1530bda4e9a": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_9c1d4b12b247412295fe57637f7d2e60",
- "placeholder": "",
- "style": "IPY_MODEL_ac6ad6631b054107b3287180cf3b9e68",
- "value": " 5.07M/5.07M [00:00<00:00, 83.1MB/s]"
- }
- },
- "d1b3fb0032ce400c80922d128940333c": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "96b57a70103f4ac9b014b375e1aee824": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "74b28f16de69498b9915e501863b4d73": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "8338b89888584db5944d0ad5baa9118f": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "8baaa8a868ec4e0b98388e565d61a3db": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "9c1d4b12b247412295fe57637f7d2e60": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "ac6ad6631b054107b3287180cf3b9e68": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "8c36c9eda216474d86fe73c161690a6d": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_14d951a930d5478b9f4e77c430c2bb6f",
- "IPY_MODEL_f6ae4d642a9f41cd829dde04347e50a3",
- "IPY_MODEL_969548659ca74527afa6a0f92e3a98e7"
- ],
- "layout": "IPY_MODEL_7f90a349f8ce4525aa33649d70c09a33"
- }
- },
- "14d951a930d5478b9f4e77c430c2bb6f": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_80fb8200d6d74cbca19ce946d73d9efa",
- "placeholder": "",
- "style": "IPY_MODEL_67688fa958004ccfbacf0288f4075dea",
- "value": "special_tokens_map.json: 100%"
- }
- },
- "f6ae4d642a9f41cd829dde04347e50a3": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_fa8d4ffdee2d454cb14941089ff881d6",
- "max": 150,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_2577bab4d4a64010addd7c2b5370a6c8",
- "value": 150
- }
- },
- "969548659ca74527afa6a0f92e3a98e7": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_01f9a013ab38435191872a8bce64dd69",
- "placeholder": "",
- "style": "IPY_MODEL_0d91f57bfa25489eb70a23f2bd834cdf",
- "value": " 150/150 [00:00<00:00, 10.5kB/s]"
- }
- },
- "7f90a349f8ce4525aa33649d70c09a33": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "80fb8200d6d74cbca19ce946d73d9efa": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "67688fa958004ccfbacf0288f4075dea": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "fa8d4ffdee2d454cb14941089ff881d6": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "2577bab4d4a64010addd7c2b5370a6c8": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "01f9a013ab38435191872a8bce64dd69": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "0d91f57bfa25489eb70a23f2bd834cdf": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- }
- }
+ "d02f4f39378b4e568f0f9382efd6a9d1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_f708d4e670c54cfe93d9ba82818590dc",
+ "placeholder": "",
+ "style": "IPY_MODEL_0be61c031fad4f31877ad29119b6a77e",
+ "value": " 1.11G/1.11G [00:22<00:00, 52.5MB/s]"
+ }
+ },
+ "d1b3fb0032ce400c80922d128940333c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e2fde33a78ff4172b806b1530bda4e9a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_9c1d4b12b247412295fe57637f7d2e60",
+ "placeholder": "",
+ "style": "IPY_MODEL_ac6ad6631b054107b3287180cf3b9e68",
+ "value": " 5.07M/5.07M [00:00<00:00, 83.1MB/s]"
+ }
+ },
+ "e77456b2ef2a46ef99572a02e1b70817": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_2bef4b3dc3cd457fa4ccb2e796d771f7",
+ "placeholder": "",
+ "style": "IPY_MODEL_240bdca5cc794ae8b1319310e42aec2c",
+ "value": "tokenizer_config.json: 100%"
+ }
+ },
+ "f251ac2cd1ed45d4a7d25d6b906a84bc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f6ae4d642a9f41cd829dde04347e50a3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_fa8d4ffdee2d454cb14941089ff881d6",
+ "max": 150,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_2577bab4d4a64010addd7c2b5370a6c8",
+ "value": 150
+ }
+ },
+ "f708d4e670c54cfe93d9ba82818590dc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f87349ba371f41d788f802a64f9312f9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_cb0069caad574477866382fa085508d5",
+ "placeholder": "",
+ "style": "IPY_MODEL_1f3d6a7de82f4710a79d814b6af57679",
+ "value": " 79.0/79.0 [00:00<00:00, 3.95kB/s]"
+ }
+ },
+ "fa8d4ffdee2d454cb14941089ff881d6": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
}
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
\ No newline at end of file
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_XlmRoBertaForSequenceClassification.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_XlmRoBertaForSequenceClassification.ipynb
index 4a1a54cef9cc6b..86eb57bf47cb98 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_XlmRoBertaForSequenceClassification.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_XlmRoBertaForSequenceClassification.ipynb
@@ -1,2173 +1,2036 @@
{
- "cells": [
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
+ "\n",
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_XlmRoBertaForSequenceClassification.ipynb)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Import ONNX XlmRoBertaForSequenceClassification models from HuggingFace 🤗 into Spark NLP 🚀\n",
+ "\n",
+ "Let's keep in mind a few things before we start 😊\n",
+ "\n",
+ "- ONNX support was introduced in `Spark NLP 5.0.0`, enabling high performance inference for models.\n",
+ "- `XlmRoBertaForSequenceClassification` is only available since in `Spark NLP 5.2.3` and after. So please make sure you have upgraded to the latest Spark NLP release\n",
+ "- You can import XLM-RoBERTa models trained/fine-tuned for sequence classification via `XlmRoBertaForSequenceClassification` or `TFXlmRoBertaForSequenceClassification`. These models are usually under `Text Classification` category and have `xlm-roberta` in their labels\n",
+ "- Reference: [TFXlmRoBertaForSequenceClassification](https://huggingface.co/docs/transformers/model_doc/xlm-roberta#transformers.TFXLMRobertaForSequenceClassification)\n",
+ "- Some [example models](https://huggingface.co/models?filter=xlm-roberta&pipeline_tag=text-classification)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Export and Save HuggingFace model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "VjZY8Zs2nOZy"
- },
- "source": [
- "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
- "\n",
- "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_XlmRoBertaForSequenceClassification.ipynb)"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.1/7.1 MB\u001b[0m \u001b[31m17.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m403.3/403.3 kB\u001b[0m \u001b[31m23.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m475.2/475.2 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m67.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.7/455.7 kB\u001b[0m \u001b[31m29.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m65.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m26.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m507.1/507.1 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.7/15.7 MB\u001b[0m \u001b[31m51.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m59.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m58.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m59.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m62.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m57.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m62.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m60.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m61.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m61.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m41.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m42.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m47.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m45.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m475.2/475.2 MB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.9/489.9 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m9.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.8/489.8 MB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m479.7/479.7 MB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m9.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.8/440.8 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m524.1/524.1 MB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m10.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m6.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m781.3/781.3 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m588.3/588.3 MB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m17.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m17.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m18.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m439.2/439.2 kB\u001b[0m \u001b[31m17.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m14.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m11.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m11.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.8/83.8 kB\u001b[0m \u001b[31m11.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m15.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m15.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m451.2/451.2 kB\u001b[0m \u001b[31m15.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m10.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m12.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.1/13.1 MB\u001b[0m \u001b[31m13.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+ "pandas-gbq 0.19.2 requires google-auth-oauthlib>=0.7.0, but you have google-auth-oauthlib 0.4.6 which is incompatible.\n",
+ "tensorflow-datasets 4.9.4 requires protobuf>=3.20, but you have protobuf 3.19.6 which is incompatible.\n",
+ "tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 3.19.6 which is incompatible.\u001b[0m\u001b[31m\n",
+ "\u001b[0m"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum tensorflow"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
+ "- We'll use [cardiffnlp/twitter-xlm-roberta-base-sentiment](https://huggingface.co/cardiffnlp/twitter-xlm-roberta-base-sentiment) model from HuggingFace as an example and load it as a `ORTModelForSequenceClassification`, representing an ONNX model."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "_7VIYuX3nOZ1"
- },
- "source": [
- "## Import ONNX XlmRoBertaForSequenceClassification models from HuggingFace 🤗 into Spark NLP 🚀\n",
- "\n",
- "Let's keep in mind a few things before we start 😊\n",
- "\n",
- "- ONNX support was introduced in `Spark NLP 5.0.0`, enabling high performance inference for models.\n",
- "- `XlmRoBertaForSequenceClassification` is only available since in `Spark NLP 5.2.3` and after. So please make sure you have upgraded to the latest Spark NLP release\n",
- "- You can import XLM-RoBERTa models trained/fine-tuned for sequence classification via `XlmRoBertaForSequenceClassification` or `TFXlmRoBertaForSequenceClassification`. These models are usually under `Text Classification` category and have `xlm-roberta` in their labels\n",
- "- Reference: [TFXlmRoBertaForSequenceClassification](https://huggingface.co/docs/transformers/model_doc/xlm-roberta#transformers.TFXLMRobertaForSequenceClassification)\n",
- "- Some [example models](https://huggingface.co/models?filter=xlm-roberta&pipeline_tag=text-classification)"
- ]
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n",
+ "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
+ "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
+ "You will be able to reuse this secret in all of your notebooks.\n",
+ "Please note that authentication is recommended but still optional to access public models or datasets.\n",
+ " warnings.warn(\n"
+ ]
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "HZGLjeyxnOZ1"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "0f95c2f60eca422a8b484df66cd738e7",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "## Export and Save HuggingFace model"
+ "text/plain": [
+ "config.json: 0%| | 0.00/841 [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "za0o-flhnOZ3"
- },
- "source": [
- "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully."
- ]
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Framework not specified. Using pt to export to ONNX.\n"
+ ]
},
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "VgbvC400nOZ3",
- "outputId": "8ee99c2f-2a8b-4db9-d5f1-84092eff5f65",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "d39f1f408a7241b8bb14dc1fd4ef9df6",
+ "version_major": 2,
+ "version_minor": 0
},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.1/7.1 MB\u001b[0m \u001b[31m17.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m403.3/403.3 kB\u001b[0m \u001b[31m23.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m475.2/475.2 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m67.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.7/455.7 kB\u001b[0m \u001b[31m29.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m65.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m26.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m507.1/507.1 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.7/15.7 MB\u001b[0m \u001b[31m51.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m59.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m58.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m59.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m62.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m57.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m62.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m60.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m61.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m61.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m41.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m42.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m47.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m45.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m475.2/475.2 MB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.9/489.9 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m9.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.8/489.8 MB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m479.7/479.7 MB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m9.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.8/440.8 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m524.1/524.1 MB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m10.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m6.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m781.3/781.3 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m588.3/588.3 MB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m17.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m17.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m18.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m439.2/439.2 kB\u001b[0m \u001b[31m17.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m14.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m11.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m11.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.8/83.8 kB\u001b[0m \u001b[31m11.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m15.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m15.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m451.2/451.2 kB\u001b[0m \u001b[31m15.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m10.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m12.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.1/13.1 MB\u001b[0m \u001b[31m13.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
- "pandas-gbq 0.19.2 requires google-auth-oauthlib>=0.7.0, but you have google-auth-oauthlib 0.4.6 which is incompatible.\n",
- "tensorflow-datasets 4.9.4 requires protobuf>=3.20, but you have protobuf 3.19.6 which is incompatible.\n",
- "tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 3.19.6 which is incompatible.\u001b[0m\u001b[31m\n",
- "\u001b[0m"
- ]
- }
- ],
- "source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum tensorflow"
+ "text/plain": [
+ "pytorch_model.bin: 0%| | 0.00/1.11G [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "oCS-FetznOZ4"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "e0ed0155dd944f5ebbcc905509f01b20",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
- "- We'll use [cardiffnlp/twitter-xlm-roberta-base-sentiment](https://huggingface.co/cardiffnlp/twitter-xlm-roberta-base-sentiment) model from HuggingFace as an example and load it as a `ORTModelForSequenceClassification`, representing an ONNX model."
+ "text/plain": [
+ "sentencepiece.bpe.model: 0%| | 0.00/5.07M [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "VREupl3enOZ4",
- "outputId": "f5d0f00f-081b-4f29-ad6f-eadd38e94d4d",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 373,
- "referenced_widgets": [
- "0f95c2f60eca422a8b484df66cd738e7",
- "251aee8a46f84ddbb3dc6a092f041911",
- "d01d4a2dcdbb4a88a9ec3df55e949f51",
- "39b3eeec515b46109ebd01a3c81cf839",
- "81386512a80d4c7986543ae245e2b128",
- "1aa9c4c2da4e499c8eb25eff14729039",
- "fb4a285fffd9454da534302c6fc17e7d",
- "861430374a7843d78a5e2499252f4e78",
- "4e476a3c03044b95a5d951a36643dfe5",
- "e38b9de9a6da4058956892c87ab4a29a",
- "33887ca8a56f44b8ab7b78dcdb604e5e",
- "d39f1f408a7241b8bb14dc1fd4ef9df6",
- "29a548e9b0484038b62d5f57175e3d58",
- "cbe05ca4a37745908898d47bb56c9774",
- "eb413258aa00462195161ba8b0047bca",
- "b6795898eeb24ed284b2bbd31040697a",
- "905c59c8b7bd42a9a305ecb52d93f875",
- "1a3c1b1b2d4d45fb96186d0d4b87d746",
- "3dcbba3ef4524613848833f7eddc7bf3",
- "28315f4670194975a577d772b73fa439",
- "f83361dcbeaa4a8b9ddbbe47dc28e3f1",
- "9f664ab32d0d43f5bddfe20417c10131",
- "e0ed0155dd944f5ebbcc905509f01b20",
- "7ec33b66132449a1804b9dd655dba44e",
- "1a177346302c400097ede29b9ddcdde3",
- "a5ecd770b12843a7af3fad2dae5be8e3",
- "c501c8ec56b340d49e6c6698d325398d",
- "54d98bec988246afa575f514b5fb538e",
- "e33aaf9937d346619cfafb14e2f2257e",
- "98c0bef2c2004631a1eea7dbeeb474ef",
- "4bcdfb214d224488bd647edf528a6474",
- "ffc6bd7026344b7aad522b9a2337bb6f",
- "3c088ffa5f1045a59667dfd0b5024db6",
- "2a6ea6ad829149abbb37ec45d0721651",
- "5c90d0f385f446a1a933231fd82f7c5c",
- "06bdcebdd8634c08be2a5c7edba7f20f",
- "5e85cd1b82374b5b9c7124a99c13784e",
- "6615c07fdecb4049b19f7a2178c3879d",
- "db3f53eaa65c44f987b410a60e0c04de",
- "a153a09450f8403998482ec3cf5e5424",
- "2209890ded0b42b2b11a53aaadc1dfc3",
- "ecf37d50caa6458fbaf4dc9300961c83",
- "babd560899554c3ab5eb12bbab99938b",
- "26aded6abf1242e8910a8051ee80f609"
- ]
- }
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "2a6ea6ad829149abbb37ec45d0721651",
+ "version_major": 2,
+ "version_minor": 0
},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n",
- "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
- "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
- "You will be able to reuse this secret in all of your notebooks.\n",
- "Please note that authentication is recommended but still optional to access public models or datasets.\n",
- " warnings.warn(\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "config.json: 0%| | 0.00/841 [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "0f95c2f60eca422a8b484df66cd738e7"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "Framework not specified. Using pt to export to ONNX.\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "pytorch_model.bin: 0%| | 0.00/1.11G [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "d39f1f408a7241b8bb14dc1fd4ef9df6"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "sentencepiece.bpe.model: 0%| | 0.00/5.07M [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "e0ed0155dd944f5ebbcc905509f01b20"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "special_tokens_map.json: 0%| | 0.00/150 [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "2a6ea6ad829149abbb37ec45d0721651"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "Using the export variant default. Available variants are:\n",
- " - default: The default ONNX variant.\n",
- "Using framework PyTorch: 2.1.0+cu121\n",
- "Overriding 1 configuration item(s)\n",
- "\t- use_cache -> False\n"
- ]
- }
- ],
- "source": [
- "from optimum.onnxruntime import ORTModelForSequenceClassification\n",
- "import tensorflow as tf\n",
- "\n",
- "MODEL_NAME = 'cardiffnlp/twitter-xlm-roberta-base-sentiment'\n",
- "ONNX_MODEL = f\"onnx_models/{MODEL_NAME}\"\n",
- "\n",
- "ort_model = ORTModelForSequenceClassification.from_pretrained(MODEL_NAME, export=True)\n",
- "\n",
- "# Save the ONNX model\n",
- "ort_model.save_pretrained(ONNX_MODEL)"
+ "text/plain": [
+ "special_tokens_map.json: 0%| | 0.00/150 [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "OtCJ9qBvnOZ5"
- },
- "source": [
- "Let's have a look inside this and see what we are dealing with:"
- ]
- },
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Using the export variant default. Available variants are:\n",
+ " - default: The default ONNX variant.\n",
+ "Using framework PyTorch: 2.1.0+cu121\n",
+ "Overriding 1 configuration item(s)\n",
+ "\t- use_cache -> False\n"
+ ]
+ }
+ ],
+ "source": [
+ "from optimum.onnxruntime import ORTModelForSequenceClassification\n",
+ "import tensorflow as tf\n",
+ "\n",
+ "MODEL_NAME = 'cardiffnlp/twitter-xlm-roberta-base-sentiment'\n",
+ "ONNX_MODEL = f\"onnx_models/{MODEL_NAME}\"\n",
+ "\n",
+ "ort_model = ORTModelForSequenceClassification.from_pretrained(MODEL_NAME, export=True)\n",
+ "\n",
+ "# Save the ONNX model\n",
+ "ort_model.save_pretrained(ONNX_MODEL)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's have a look inside this and see what we are dealing with:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "qIBR7cAqnOZ5",
- "outputId": "49d6906d-a710-4d12-e547-3c7638ec1ab4",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "total 1108048\n",
- "-rw-r--r-- 1 root root 915 Jan 9 19:15 config.json\n",
- "-rw-r--r-- 1 root root 1112465741 Jan 9 19:15 model.onnx\n",
- "-rw-r--r-- 1 root root 5069051 Jan 9 19:15 sentencepiece.bpe.model\n",
- "-rw-r--r-- 1 root root 167 Jan 9 19:15 special_tokens_map.json\n",
- "-rw-r--r-- 1 root root 471 Jan 9 19:15 tokenizer_config.json\n",
- "-rw-r--r-- 1 root root 17082730 Jan 9 19:15 tokenizer.json\n"
- ]
- }
- ],
- "source": [
- "!ls -l {ONNX_MODEL}"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "total 1108048\n",
+ "-rw-r--r-- 1 root root 915 Jan 9 19:15 config.json\n",
+ "-rw-r--r-- 1 root root 1112465741 Jan 9 19:15 model.onnx\n",
+ "-rw-r--r-- 1 root root 5069051 Jan 9 19:15 sentencepiece.bpe.model\n",
+ "-rw-r--r-- 1 root root 167 Jan 9 19:15 special_tokens_map.json\n",
+ "-rw-r--r-- 1 root root 471 Jan 9 19:15 tokenizer_config.json\n",
+ "-rw-r--r-- 1 root root 17082730 Jan 9 19:15 tokenizer.json\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls -l {ONNX_MODEL}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- As you can see, we need to move `sentencepiece.bpe.model` from the tokenizer to `assets` folder which Spark NLP will look for\n",
+ "- We also need `labels` and their `ids` which is saved inside the model's config. We will save this inside `labels.txt`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mkdir {ONNX_MODEL}/assets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# get label2id dictionary\n",
+ "labels = ort_model.config.id2label\n",
+ "# sort the dictionary based on the id\n",
+ "labels = [value for key,value in sorted(labels.items(), reverse=False)]\n",
+ "\n",
+ "with open(ONNX_MODEL + '/assets/labels.txt', 'w') as f:\n",
+ " f.write('\\n'.join(labels))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mv {ONNX_MODEL}/sentencepiece.bpe.model {ONNX_MODEL}/assets"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Voila! We have our `sentencepiece.bpe.model` and `labels.txt` inside assets directory"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "-Sp8nJu7nOZ5"
- },
- "source": [
- "- As you can see, we need to move `sentencepiece.bpe.model` from the tokenizer to `assets` folder which Spark NLP will look for\n",
- "- We also need `labels` and their `ids` which is saved inside the model's config. We will save this inside `labels.txt`"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "onnx_models/cardiffnlp/twitter-xlm-roberta-base-sentiment:\n",
+ "total 1103100\n",
+ "drwxr-xr-x 2 root root 4096 Jan 9 19:17 assets\n",
+ "-rw-r--r-- 1 root root 915 Jan 9 19:15 config.json\n",
+ "-rw-r--r-- 1 root root 1112465741 Jan 9 19:15 model.onnx\n",
+ "-rw-r--r-- 1 root root 167 Jan 9 19:15 special_tokens_map.json\n",
+ "-rw-r--r-- 1 root root 471 Jan 9 19:15 tokenizer_config.json\n",
+ "-rw-r--r-- 1 root root 17082730 Jan 9 19:15 tokenizer.json\n",
+ "\n",
+ "onnx_models/cardiffnlp/twitter-xlm-roberta-base-sentiment/assets:\n",
+ "total 4956\n",
+ "-rw-r--r-- 1 root root 25 Jan 9 19:16 labels.txt\n",
+ "-rw-r--r-- 1 root root 5069051 Jan 9 19:15 sentencepiece.bpe.model\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls -lR {ONNX_MODEL}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Import and Save RoBertaForSequenceClassification in Spark NLP\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's install and setup Spark NLP in Google Colab\n",
+ "- This part is pretty easy via our simple script"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "2KsG42cmnOZ5"
- },
- "outputs": [],
- "source": [
- "!mkdir {ONNX_MODEL}/assets"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "--2023-10-16 21:08:22-- http://setup.johnsnowlabs.com/colab.sh\n",
+ "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n",
+ "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n",
+ "HTTP request sent, awaiting response... 302 Moved Temporarily\n",
+ "Location: https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n",
+ "--2023-10-16 21:08:23-- https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n",
+ "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.111.133, ...\n",
+ "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n",
+ "HTTP request sent, awaiting response... 200 OK\n",
+ "Length: 1191 (1.2K) [text/plain]\n",
+ "Saving to: ‘STDOUT’\n",
+ "\n",
+ "- 100%[===================>] 1.16K --.-KB/s in 0s \n",
+ "\n",
+ "2023-10-16 21:08:23 (93.8 MB/s) - written to stdout [1191/1191]\n",
+ "\n",
+ "Installing PySpark 3.2.3 and Spark NLP 5.1.3\n",
+ "setup Colab for PySpark 3.2.3 and Spark NLP 5.1.3\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 MB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m537.5/537.5 kB\u001b[0m \u001b[31m41.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.7/199.7 kB\u001b[0m \u001b[31m21.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
+ ]
+ }
+ ],
+ "source": [
+ "! wget http://setup.johnsnowlabs.com/colab.sh -O - | bash"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's start Spark with Spark NLP included via our simple `start()` function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "fKElx2rtnOZ5"
- },
- "outputs": [],
- "source": [
- "# get label2id dictionary\n",
- "labels = ort_model.config.id2label\n",
- "# sort the dictionary based on the id\n",
- "labels = [value for key,value in sorted(labels.items(), reverse=False)]\n",
- "\n",
- "with open(ONNX_MODEL + '/assets/labels.txt', 'w') as f:\n",
- " f.write('\\n'.join(labels))"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Apache Spark version: 3.2.3\n"
+ ]
+ }
+ ],
+ "source": [
+ "import sparknlp\n",
+ "# let's start Spark with Spark NLP\n",
+ "spark = sparknlp.start()\n",
+ "\n",
+ "print(\"Apache Spark version: {}\".format(spark.version))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's use `loadSavedModel` functon in `XlmRoBertaForSequenceClassification` which allows us to load TensorFlow model in SavedModel format\n",
+ "- Most params can be set later when you are loading this model in `XlmRoBertaForSequenceClassification` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n",
+ "- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n",
+ "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sparknlp.annotator import *\n",
+ "from sparknlp.base import *\n",
+ "\n",
+ "sequenceClassifier = XlmRoBertaForSequenceClassification.loadSavedModel(\n",
+ " ONNX_MODEL,\n",
+ " spark\n",
+ " )\\\n",
+ " .setInputCols([\"document\",'token'])\\\n",
+ " .setOutputCol(\"class\")\\\n",
+ " .setCaseSensitive(True)\\\n",
+ " .setMaxSentenceLength(128)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sequenceClassifier.write().overwrite().save(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's clean up stuff we don't need anymore"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!rm -rf {ONNX_MODEL}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Awesome 😎 !\n",
+ "\n",
+ "This is your XlmRoBertaForSequenceClassification model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "JrxxMgNpnOZ5"
- },
- "outputs": [],
- "source": [
- "!mv {ONNX_MODEL}/sentencepiece.bpe.model {ONNX_MODEL}/assets"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "total 487524\n",
+ "drwxr-xr-x 5 root root 4096 Oct 16 21:15 fields\n",
+ "drwxr-xr-x 2 root root 4096 Oct 16 21:15 metadata\n",
+ "-rw-r--r-- 1 root root 499209257 Oct 16 21:16 roberta_classification_onnx\n"
+ ]
+ }
+ ],
+ "source": [
+ "! ls -l {ONNX_MODEL}_spark_nlp_onnx"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny RoBertaForSequenceClassification model 😊"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sequenceClassifier_loaded = XlmRoBertaForSequenceClassification.load(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))\\\n",
+ " .setInputCols([\"document\",'token'])\\\n",
+ " .setOutputCol(\"class\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You can see what labels were used to train this model via `getClasses` function:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "WaxO1clenOZ6"
- },
- "source": [
- "Voila! We have our `sentencepiece.bpe.model` and `labels.txt` inside assets directory"
+ "data": {
+ "text/plain": [
+ "['disgust',\n",
+ " 'optimism',\n",
+ " 'embarrassment',\n",
+ " 'amusement',\n",
+ " 'realization',\n",
+ " 'surprise',\n",
+ " 'grief',\n",
+ " 'caring',\n",
+ " 'disapproval',\n",
+ " 'disappointment',\n",
+ " 'joy',\n",
+ " 'confusion',\n",
+ " 'excitement',\n",
+ " 'approval',\n",
+ " 'curiosity',\n",
+ " 'anger',\n",
+ " 'love',\n",
+ " 'admiration',\n",
+ " 'gratitude',\n",
+ " 'annoyance',\n",
+ " 'remorse',\n",
+ " 'nervousness',\n",
+ " 'neutral',\n",
+ " 'pride',\n",
+ " 'fear',\n",
+ " 'sadness',\n",
+ " 'desire',\n",
+ " 'relief']"
]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# .getClasses was introduced in spark-nlp==3.4.0\n",
+ "sequenceClassifier_loaded.getClasses()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This is how you can use your loaded classifier model in Spark NLP 🚀 pipeline:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sparknlp.annotator import *\n",
+ "from sparknlp.base import *\n",
+ "\n",
+ "document_assembler = DocumentAssembler() \\\n",
+ " .setInputCol('text') \\\n",
+ " .setOutputCol('document')\n",
+ "\n",
+ "tokenizer = Tokenizer() \\\n",
+ " .setInputCols(['document']) \\\n",
+ " .setOutputCol('token')\n",
+ "\n",
+ "pipeline = Pipeline(stages=[\n",
+ " document_assembler,\n",
+ " tokenizer,\n",
+ " sequenceClassifier_loaded\n",
+ "])\n",
+ "\n",
+ "# couple of simple examples\n",
+ "example = spark.createDataFrame([[\"I love you!\"], ['I feel lucky to be here.']]).toDF(\"text\")\n",
+ "\n",
+ "result = pipeline.fit(example).transform(example)\n",
+ "\n",
+ "# result is a DataFrame\n",
+ "result.select(\"text\", \"class.result\").show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "That's it! You can now go wild and use hundreds of `XlmRoBertaForSequenceClassification` models from HuggingFace 🤗 in Spark NLP 🚀\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "gpuType": "T4",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "06bdcebdd8634c08be2a5c7edba7f20f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_2209890ded0b42b2b11a53aaadc1dfc3",
+ "max": 150,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_ecf37d50caa6458fbaf4dc9300961c83",
+ "value": 150
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "C_sD6vcDnOZ6",
- "outputId": "ee31714d-f3ff-4e7c-874f-d9f3a2358700",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "onnx_models/cardiffnlp/twitter-xlm-roberta-base-sentiment:\n",
- "total 1103100\n",
- "drwxr-xr-x 2 root root 4096 Jan 9 19:17 assets\n",
- "-rw-r--r-- 1 root root 915 Jan 9 19:15 config.json\n",
- "-rw-r--r-- 1 root root 1112465741 Jan 9 19:15 model.onnx\n",
- "-rw-r--r-- 1 root root 167 Jan 9 19:15 special_tokens_map.json\n",
- "-rw-r--r-- 1 root root 471 Jan 9 19:15 tokenizer_config.json\n",
- "-rw-r--r-- 1 root root 17082730 Jan 9 19:15 tokenizer.json\n",
- "\n",
- "onnx_models/cardiffnlp/twitter-xlm-roberta-base-sentiment/assets:\n",
- "total 4956\n",
- "-rw-r--r-- 1 root root 25 Jan 9 19:16 labels.txt\n",
- "-rw-r--r-- 1 root root 5069051 Jan 9 19:15 sentencepiece.bpe.model\n"
- ]
- }
+ "0f95c2f60eca422a8b484df66cd738e7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_251aee8a46f84ddbb3dc6a092f041911",
+ "IPY_MODEL_d01d4a2dcdbb4a88a9ec3df55e949f51",
+ "IPY_MODEL_39b3eeec515b46109ebd01a3c81cf839"
],
- "source": [
- "!ls -lR {ONNX_MODEL}"
- ]
+ "layout": "IPY_MODEL_81386512a80d4c7986543ae245e2b128"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "WoRaIuTgnOZ6"
- },
- "source": [
- "## Import and Save RoBertaForSequenceClassification in Spark NLP\n"
- ]
+ "1a177346302c400097ede29b9ddcdde3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_98c0bef2c2004631a1eea7dbeeb474ef",
+ "max": 5069051,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_4bcdfb214d224488bd647edf528a6474",
+ "value": 5069051
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "rmyTRnmTnOZ6"
- },
- "source": [
- "- Let's install and setup Spark NLP in Google Colab\n",
- "- This part is pretty easy via our simple script"
- ]
+ "1a3c1b1b2d4d45fb96186d0d4b87d746": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "VjgCKRjxnOZ6",
- "outputId": "f8d62151-4ae3-4212-d2e6-be61f24cfcc8"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "--2023-10-16 21:08:22-- http://setup.johnsnowlabs.com/colab.sh\n",
- "Resolving setup.johnsnowlabs.com (setup.johnsnowlabs.com)... 51.158.130.125\n",
- "Connecting to setup.johnsnowlabs.com (setup.johnsnowlabs.com)|51.158.130.125|:80... connected.\n",
- "HTTP request sent, awaiting response... 302 Moved Temporarily\n",
- "Location: https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh [following]\n",
- "--2023-10-16 21:08:23-- https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp/master/scripts/colab_setup.sh\n",
- "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.111.133, ...\n",
- "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n",
- "HTTP request sent, awaiting response... 200 OK\n",
- "Length: 1191 (1.2K) [text/plain]\n",
- "Saving to: ‘STDOUT’\n",
- "\n",
- "- 100%[===================>] 1.16K --.-KB/s in 0s \n",
- "\n",
- "2023-10-16 21:08:23 (93.8 MB/s) - written to stdout [1191/1191]\n",
- "\n",
- "Installing PySpark 3.2.3 and Spark NLP 5.1.3\n",
- "setup Colab for PySpark 3.2.3 and Spark NLP 5.1.3\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 MB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m537.5/537.5 kB\u001b[0m \u001b[31m41.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.7/199.7 kB\u001b[0m \u001b[31m21.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
- ]
- }
- ],
- "source": [
- "! wget http://setup.johnsnowlabs.com/colab.sh -O - | bash"
- ]
+ "1aa9c4c2da4e499c8eb25eff14729039": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "-QbKgNWUnOZ6"
- },
- "source": [
- "Let's start Spark with Spark NLP included via our simple `start()` function"
- ]
+ "2209890ded0b42b2b11a53aaadc1dfc3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "t8nE1WMKnOZ6",
- "outputId": "58c3086e-cb83-4472-f0a0-07d87ee70371"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Apache Spark version: 3.2.3\n"
- ]
- }
+ "251aee8a46f84ddbb3dc6a092f041911": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_1aa9c4c2da4e499c8eb25eff14729039",
+ "placeholder": "",
+ "style": "IPY_MODEL_fb4a285fffd9454da534302c6fc17e7d",
+ "value": "config.json: 100%"
+ }
+ },
+ "26aded6abf1242e8910a8051ee80f609": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "28315f4670194975a577d772b73fa439": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "29a548e9b0484038b62d5f57175e3d58": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_905c59c8b7bd42a9a305ecb52d93f875",
+ "placeholder": "",
+ "style": "IPY_MODEL_1a3c1b1b2d4d45fb96186d0d4b87d746",
+ "value": "pytorch_model.bin: 100%"
+ }
+ },
+ "2a6ea6ad829149abbb37ec45d0721651": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_5c90d0f385f446a1a933231fd82f7c5c",
+ "IPY_MODEL_06bdcebdd8634c08be2a5c7edba7f20f",
+ "IPY_MODEL_5e85cd1b82374b5b9c7124a99c13784e"
],
- "source": [
- "import sparknlp\n",
- "# let's start Spark with Spark NLP\n",
- "spark = sparknlp.start()\n",
- "\n",
- "print(\"Apache Spark version: {}\".format(spark.version))"
- ]
+ "layout": "IPY_MODEL_6615c07fdecb4049b19f7a2178c3879d"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "yCoPZcMmnOZ6"
- },
- "source": [
- "- Let's use `loadSavedModel` functon in `XlmRoBertaForSequenceClassification` which allows us to load TensorFlow model in SavedModel format\n",
- "- Most params can be set later when you are loading this model in `XlmRoBertaForSequenceClassification` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n",
- "- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n",
- "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively."
- ]
+ "33887ca8a56f44b8ab7b78dcdb604e5e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "hwPVKZyinOZ6"
- },
- "outputs": [],
- "source": [
- "from sparknlp.annotator import *\n",
- "from sparknlp.base import *\n",
- "\n",
- "sequenceClassifier = XlmRoBertaForSequenceClassification.loadSavedModel(\n",
- " ONNX_MODEL,\n",
- " spark\n",
- " )\\\n",
- " .setInputCols([\"document\",'token'])\\\n",
- " .setOutputCol(\"class\")\\\n",
- " .setCaseSensitive(True)\\\n",
- " .setMaxSentenceLength(128)"
- ]
+ "39b3eeec515b46109ebd01a3c81cf839": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e38b9de9a6da4058956892c87ab4a29a",
+ "placeholder": "",
+ "style": "IPY_MODEL_33887ca8a56f44b8ab7b78dcdb604e5e",
+ "value": " 841/841 [00:00<00:00, 52.8kB/s]"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "sBaSiegrnOZ6"
- },
- "source": [
- "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
- ]
+ "3c088ffa5f1045a59667dfd0b5024db6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "wJM6A2ZMnOZ6"
- },
- "outputs": [],
- "source": [
- "sequenceClassifier.write().overwrite().save(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))"
- ]
+ "3dcbba3ef4524613848833f7eddc7bf3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "BSseNI1ZnOZ6"
- },
- "source": [
- "Let's clean up stuff we don't need anymore"
- ]
+ "4bcdfb214d224488bd647edf528a6474": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "-t_ST7fznOZ6"
- },
- "outputs": [],
- "source": [
- "!rm -rf {ONNX_MODEL}"
- ]
+ "4e476a3c03044b95a5d951a36643dfe5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "HrIRyrwJnOZ7"
- },
- "source": [
- "Awesome 😎 !\n",
- "\n",
- "This is your XlmRoBertaForSequenceClassification model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀"
- ]
+ "54d98bec988246afa575f514b5fb538e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "x18SNGz5nOZ7",
- "outputId": "b58ae4c0-385a-49f7-a989-9f43c1654648"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "total 487524\n",
- "drwxr-xr-x 5 root root 4096 Oct 16 21:15 fields\n",
- "drwxr-xr-x 2 root root 4096 Oct 16 21:15 metadata\n",
- "-rw-r--r-- 1 root root 499209257 Oct 16 21:16 roberta_classification_onnx\n"
- ]
- }
- ],
- "source": [
- "! ls -l {ONNX_MODEL}_spark_nlp_onnx"
- ]
+ "5c90d0f385f446a1a933231fd82f7c5c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_db3f53eaa65c44f987b410a60e0c04de",
+ "placeholder": "",
+ "style": "IPY_MODEL_a153a09450f8403998482ec3cf5e5424",
+ "value": "special_tokens_map.json: 100%"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "CNG-mf3nnOZ7"
- },
- "source": [
- "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny RoBertaForSequenceClassification model 😊"
- ]
+ "5e85cd1b82374b5b9c7124a99c13784e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_babd560899554c3ab5eb12bbab99938b",
+ "placeholder": "",
+ "style": "IPY_MODEL_26aded6abf1242e8910a8051ee80f609",
+ "value": " 150/150 [00:00<00:00, 7.91kB/s]"
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "E-YVoU8xnOZ7"
- },
- "outputs": [],
- "source": [
- "sequenceClassifier_loaded = XlmRoBertaForSequenceClassification.load(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))\\\n",
- " .setInputCols([\"document\",'token'])\\\n",
- " .setOutputCol(\"class\")"
- ]
+ "6615c07fdecb4049b19f7a2178c3879d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "VpFTaC7GnOZ7"
- },
- "source": [
- "You can see what labels were used to train this model via `getClasses` function:"
- ]
+ "7ec33b66132449a1804b9dd655dba44e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_54d98bec988246afa575f514b5fb538e",
+ "placeholder": "",
+ "style": "IPY_MODEL_e33aaf9937d346619cfafb14e2f2257e",
+ "value": "sentencepiece.bpe.model: 100%"
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "OkxqlnoBnOZ7",
- "outputId": "2dd0576e-8abe-4d8b-8e2c-598783ba116a"
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "['disgust',\n",
- " 'optimism',\n",
- " 'embarrassment',\n",
- " 'amusement',\n",
- " 'realization',\n",
- " 'surprise',\n",
- " 'grief',\n",
- " 'caring',\n",
- " 'disapproval',\n",
- " 'disappointment',\n",
- " 'joy',\n",
- " 'confusion',\n",
- " 'excitement',\n",
- " 'approval',\n",
- " 'curiosity',\n",
- " 'anger',\n",
- " 'love',\n",
- " 'admiration',\n",
- " 'gratitude',\n",
- " 'annoyance',\n",
- " 'remorse',\n",
- " 'nervousness',\n",
- " 'neutral',\n",
- " 'pride',\n",
- " 'fear',\n",
- " 'sadness',\n",
- " 'desire',\n",
- " 'relief']"
- ]
- },
- "execution_count": null,
- "metadata": {},
- "output_type": "execute_result"
- }
+ "81386512a80d4c7986543ae245e2b128": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "861430374a7843d78a5e2499252f4e78": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "905c59c8b7bd42a9a305ecb52d93f875": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "98c0bef2c2004631a1eea7dbeeb474ef": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "9f664ab32d0d43f5bddfe20417c10131": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a153a09450f8403998482ec3cf5e5424": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a5ecd770b12843a7af3fad2dae5be8e3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_ffc6bd7026344b7aad522b9a2337bb6f",
+ "placeholder": "",
+ "style": "IPY_MODEL_3c088ffa5f1045a59667dfd0b5024db6",
+ "value": " 5.07M/5.07M [00:00<00:00, 124MB/s]"
+ }
+ },
+ "b6795898eeb24ed284b2bbd31040697a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "babd560899554c3ab5eb12bbab99938b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "c501c8ec56b340d49e6c6698d325398d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "cbe05ca4a37745908898d47bb56c9774": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_3dcbba3ef4524613848833f7eddc7bf3",
+ "max": 1112271561,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_28315f4670194975a577d772b73fa439",
+ "value": 1112271561
+ }
+ },
+ "d01d4a2dcdbb4a88a9ec3df55e949f51": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_861430374a7843d78a5e2499252f4e78",
+ "max": 841,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_4e476a3c03044b95a5d951a36643dfe5",
+ "value": 841
+ }
+ },
+ "d39f1f408a7241b8bb14dc1fd4ef9df6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_29a548e9b0484038b62d5f57175e3d58",
+ "IPY_MODEL_cbe05ca4a37745908898d47bb56c9774",
+ "IPY_MODEL_eb413258aa00462195161ba8b0047bca"
],
- "source": [
- "# .getClasses was introduced in spark-nlp==3.4.0\n",
- "sequenceClassifier_loaded.getClasses()"
- ]
+ "layout": "IPY_MODEL_b6795898eeb24ed284b2bbd31040697a"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "c62SdOTdnOZ7"
- },
- "source": [
- "This is how you can use your loaded classifier model in Spark NLP 🚀 pipeline:"
- ]
+ "db3f53eaa65c44f987b410a60e0c04de": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "1Di5xRn1nOZ7"
- },
- "outputs": [],
- "source": [
- "from sparknlp.annotator import *\n",
- "from sparknlp.base import *\n",
- "\n",
- "document_assembler = DocumentAssembler() \\\n",
- " .setInputCol('text') \\\n",
- " .setOutputCol('document')\n",
- "\n",
- "tokenizer = Tokenizer() \\\n",
- " .setInputCols(['document']) \\\n",
- " .setOutputCol('token')\n",
- "\n",
- "pipeline = Pipeline(stages=[\n",
- " document_assembler,\n",
- " tokenizer,\n",
- " sequenceClassifier_loaded\n",
- "])\n",
- "\n",
- "# couple of simple examples\n",
- "example = spark.createDataFrame([[\"I love you!\"], ['I feel lucky to be here.']]).toDF(\"text\")\n",
- "\n",
- "result = pipeline.fit(example).transform(example)\n",
- "\n",
- "# result is a DataFrame\n",
- "result.select(\"text\", \"class.result\").show()"
- ]
+ "e0ed0155dd944f5ebbcc905509f01b20": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_7ec33b66132449a1804b9dd655dba44e",
+ "IPY_MODEL_1a177346302c400097ede29b9ddcdde3",
+ "IPY_MODEL_a5ecd770b12843a7af3fad2dae5be8e3"
+ ],
+ "layout": "IPY_MODEL_c501c8ec56b340d49e6c6698d325398d"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "_ka-wmU-nOZ7"
- },
- "source": [
- "That's it! You can now go wild and use hundreds of `XlmRoBertaForSequenceClassification` models from HuggingFace 🤗 in Spark NLP 🚀\n"
- ]
- }
- ],
- "metadata": {
- "accelerator": "GPU",
- "colab": {
- "gpuType": "T4",
- "provenance": []
+ "e33aaf9937d346619cfafb14e2f2257e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
+ "e38b9de9a6da4058956892c87ab4a29a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3"
+ "eb413258aa00462195161ba8b0047bca": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_f83361dcbeaa4a8b9ddbbe47dc28e3f1",
+ "placeholder": "",
+ "style": "IPY_MODEL_9f664ab32d0d43f5bddfe20417c10131",
+ "value": " 1.11G/1.11G [00:07<00:00, 180MB/s]"
+ }
+ },
+ "ecf37d50caa6458fbaf4dc9300961c83": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "f83361dcbeaa4a8b9ddbbe47dc28e3f1": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- "widgets": {
- "application/vnd.jupyter.widget-state+json": {
- "0f95c2f60eca422a8b484df66cd738e7": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_251aee8a46f84ddbb3dc6a092f041911",
- "IPY_MODEL_d01d4a2dcdbb4a88a9ec3df55e949f51",
- "IPY_MODEL_39b3eeec515b46109ebd01a3c81cf839"
- ],
- "layout": "IPY_MODEL_81386512a80d4c7986543ae245e2b128"
- }
- },
- "251aee8a46f84ddbb3dc6a092f041911": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_1aa9c4c2da4e499c8eb25eff14729039",
- "placeholder": "",
- "style": "IPY_MODEL_fb4a285fffd9454da534302c6fc17e7d",
- "value": "config.json: 100%"
- }
- },
- "d01d4a2dcdbb4a88a9ec3df55e949f51": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_861430374a7843d78a5e2499252f4e78",
- "max": 841,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_4e476a3c03044b95a5d951a36643dfe5",
- "value": 841
- }
- },
- "39b3eeec515b46109ebd01a3c81cf839": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_e38b9de9a6da4058956892c87ab4a29a",
- "placeholder": "",
- "style": "IPY_MODEL_33887ca8a56f44b8ab7b78dcdb604e5e",
- "value": " 841/841 [00:00<00:00, 52.8kB/s]"
- }
- },
- "81386512a80d4c7986543ae245e2b128": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "1aa9c4c2da4e499c8eb25eff14729039": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "fb4a285fffd9454da534302c6fc17e7d": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "861430374a7843d78a5e2499252f4e78": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "4e476a3c03044b95a5d951a36643dfe5": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "e38b9de9a6da4058956892c87ab4a29a": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "33887ca8a56f44b8ab7b78dcdb604e5e": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "d39f1f408a7241b8bb14dc1fd4ef9df6": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_29a548e9b0484038b62d5f57175e3d58",
- "IPY_MODEL_cbe05ca4a37745908898d47bb56c9774",
- "IPY_MODEL_eb413258aa00462195161ba8b0047bca"
- ],
- "layout": "IPY_MODEL_b6795898eeb24ed284b2bbd31040697a"
- }
- },
- "29a548e9b0484038b62d5f57175e3d58": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_905c59c8b7bd42a9a305ecb52d93f875",
- "placeholder": "",
- "style": "IPY_MODEL_1a3c1b1b2d4d45fb96186d0d4b87d746",
- "value": "pytorch_model.bin: 100%"
- }
- },
- "cbe05ca4a37745908898d47bb56c9774": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_3dcbba3ef4524613848833f7eddc7bf3",
- "max": 1112271561,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_28315f4670194975a577d772b73fa439",
- "value": 1112271561
- }
- },
- "eb413258aa00462195161ba8b0047bca": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_f83361dcbeaa4a8b9ddbbe47dc28e3f1",
- "placeholder": "",
- "style": "IPY_MODEL_9f664ab32d0d43f5bddfe20417c10131",
- "value": " 1.11G/1.11G [00:07<00:00, 180MB/s]"
- }
- },
- "b6795898eeb24ed284b2bbd31040697a": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "905c59c8b7bd42a9a305ecb52d93f875": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "1a3c1b1b2d4d45fb96186d0d4b87d746": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "3dcbba3ef4524613848833f7eddc7bf3": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "28315f4670194975a577d772b73fa439": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "f83361dcbeaa4a8b9ddbbe47dc28e3f1": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "9f664ab32d0d43f5bddfe20417c10131": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "e0ed0155dd944f5ebbcc905509f01b20": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_7ec33b66132449a1804b9dd655dba44e",
- "IPY_MODEL_1a177346302c400097ede29b9ddcdde3",
- "IPY_MODEL_a5ecd770b12843a7af3fad2dae5be8e3"
- ],
- "layout": "IPY_MODEL_c501c8ec56b340d49e6c6698d325398d"
- }
- },
- "7ec33b66132449a1804b9dd655dba44e": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_54d98bec988246afa575f514b5fb538e",
- "placeholder": "",
- "style": "IPY_MODEL_e33aaf9937d346619cfafb14e2f2257e",
- "value": "sentencepiece.bpe.model: 100%"
- }
- },
- "1a177346302c400097ede29b9ddcdde3": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_98c0bef2c2004631a1eea7dbeeb474ef",
- "max": 5069051,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_4bcdfb214d224488bd647edf528a6474",
- "value": 5069051
- }
- },
- "a5ecd770b12843a7af3fad2dae5be8e3": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_ffc6bd7026344b7aad522b9a2337bb6f",
- "placeholder": "",
- "style": "IPY_MODEL_3c088ffa5f1045a59667dfd0b5024db6",
- "value": " 5.07M/5.07M [00:00<00:00, 124MB/s]"
- }
- },
- "c501c8ec56b340d49e6c6698d325398d": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "54d98bec988246afa575f514b5fb538e": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "e33aaf9937d346619cfafb14e2f2257e": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "98c0bef2c2004631a1eea7dbeeb474ef": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "4bcdfb214d224488bd647edf528a6474": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "ffc6bd7026344b7aad522b9a2337bb6f": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "3c088ffa5f1045a59667dfd0b5024db6": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "2a6ea6ad829149abbb37ec45d0721651": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_5c90d0f385f446a1a933231fd82f7c5c",
- "IPY_MODEL_06bdcebdd8634c08be2a5c7edba7f20f",
- "IPY_MODEL_5e85cd1b82374b5b9c7124a99c13784e"
- ],
- "layout": "IPY_MODEL_6615c07fdecb4049b19f7a2178c3879d"
- }
- },
- "5c90d0f385f446a1a933231fd82f7c5c": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_db3f53eaa65c44f987b410a60e0c04de",
- "placeholder": "",
- "style": "IPY_MODEL_a153a09450f8403998482ec3cf5e5424",
- "value": "special_tokens_map.json: 100%"
- }
- },
- "06bdcebdd8634c08be2a5c7edba7f20f": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_2209890ded0b42b2b11a53aaadc1dfc3",
- "max": 150,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_ecf37d50caa6458fbaf4dc9300961c83",
- "value": 150
- }
- },
- "5e85cd1b82374b5b9c7124a99c13784e": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_babd560899554c3ab5eb12bbab99938b",
- "placeholder": "",
- "style": "IPY_MODEL_26aded6abf1242e8910a8051ee80f609",
- "value": " 150/150 [00:00<00:00, 7.91kB/s]"
- }
- },
- "6615c07fdecb4049b19f7a2178c3879d": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "db3f53eaa65c44f987b410a60e0c04de": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "a153a09450f8403998482ec3cf5e5424": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "2209890ded0b42b2b11a53aaadc1dfc3": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "ecf37d50caa6458fbaf4dc9300961c83": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "babd560899554c3ab5eb12bbab99938b": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "26aded6abf1242e8910a8051ee80f609": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- }
- }
+ "fb4a285fffd9454da534302c6fc17e7d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "ffc6bd7026344b7aad522b9a2337bb6f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
}
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
\ No newline at end of file
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_XlmRoBertaForTokenClassification.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_XlmRoBertaForTokenClassification.ipynb
index 0cc16cf9245d9b..d601203cc11078 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_XlmRoBertaForTokenClassification.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_XlmRoBertaForTokenClassification.ipynb
@@ -1,2144 +1,2006 @@
{
- "cells": [
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
+ "\n",
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_XlmRoBertaForTokenClassification.ipynb)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Import ONNX XlmRoBertaForTokenClassification models from HuggingFace 🤗 into Spark NLP 🚀\n",
+ "\n",
+ "Let's keep in mind a few things before we start 😊\n",
+ "\n",
+ "- ONNX support was introduced in `Spark NLP 5.0.0`, enabling high performance inference for models.\n",
+ "- `XlmRoBertaForTokenClassification` is only available since in `Spark NLP 5.2.3` and after. So please make sure you have upgraded to the latest Spark NLP release\n",
+ "- You can import XLM-RoBERTa models trained/fine-tuned for token classification via `XlmRoBertaForTokenClassification` or `TFXlmRoBertaForTokenClassification`. These models are usually under `Token Classification` category and have `roberta` in their labels\n",
+ "- Reference: [TFXlmRoBertaForTokenClassification](https://huggingface.co/docs/transformers/model_doc/xlm-roberta#transformers.TFXLMRobertaForTokenClassification)\n",
+ "- Some [example models](https://huggingface.co/models?filter=xlm-roberta&pipeline_tag=token-classification)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Export and Save HuggingFace model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases\n",
+ "- Albert uses SentencePiece, so we will have to install that as well"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "PT2s_38mqpqS"
- },
- "source": [
- "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n",
- "\n",
- "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_XlmRoBertaForTokenClassification.ipynb)"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.1/7.1 MB\u001b[0m \u001b[31m15.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m403.3/403.3 kB\u001b[0m \u001b[31m19.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m475.2/475.2 MB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m67.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.7/455.7 kB\u001b[0m \u001b[31m29.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m65.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m21.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m507.1/507.1 kB\u001b[0m \u001b[31m38.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.7/15.7 MB\u001b[0m \u001b[31m65.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m70.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m97.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m111.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m102.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m100.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m102.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m109.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m96.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m16.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m100.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m81.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m44.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m100.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m475.2/475.2 MB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.9/489.9 MB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m67.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m35.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m64.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m67.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.8/489.8 MB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m479.7/479.7 MB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m84.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m104.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.8/440.8 kB\u001b[0m \u001b[31m45.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m524.1/524.1 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m81.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m100.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m45.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m76.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m781.3/781.3 kB\u001b[0m \u001b[31m62.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m98.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m102.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m588.3/588.3 MB\u001b[0m \u001b[31m1.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m86.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m69.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m105.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m439.2/439.2 kB\u001b[0m \u001b[31m43.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m98.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m14.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m15.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.8/83.8 kB\u001b[0m \u001b[31m10.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m42.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m43.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m451.2/451.2 kB\u001b[0m \u001b[31m42.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m78.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m83.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m79.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m93.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.1/13.1 MB\u001b[0m \u001b[31m28.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+ "pandas-gbq 0.19.2 requires google-auth-oauthlib>=0.7.0, but you have google-auth-oauthlib 0.4.6 which is incompatible.\n",
+ "tensorflow-datasets 4.9.4 requires protobuf>=3.20, but you have protobuf 3.19.6 which is incompatible.\n",
+ "tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 3.19.6 which is incompatible.\u001b[0m\u001b[31m\n",
+ "\u001b[0m"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum tensorflow"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
+ "- We'll use [xlm-roberta-large-finetuned-conll03-english](https://huggingface.co/xlm-roberta-large-finetuned-conll03-english) model from HuggingFace as an example and load it as a `ORTModelForTokenClassification`, representing an ONNX model."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "iss2RqRIqpqV"
- },
- "source": [
- "## Import ONNX XlmRoBertaForTokenClassification models from HuggingFace 🤗 into Spark NLP 🚀\n",
- "\n",
- "Let's keep in mind a few things before we start 😊\n",
- "\n",
- "- ONNX support was introduced in `Spark NLP 5.0.0`, enabling high performance inference for models.\n",
- "- `XlmRoBertaForTokenClassification` is only available since in `Spark NLP 5.2.3` and after. So please make sure you have upgraded to the latest Spark NLP release\n",
- "- You can import XLM-RoBERTa models trained/fine-tuned for token classification via `XlmRoBertaForTokenClassification` or `TFXlmRoBertaForTokenClassification`. These models are usually under `Token Classification` category and have `roberta` in their labels\n",
- "- Reference: [TFXlmRoBertaForTokenClassification](https://huggingface.co/docs/transformers/model_doc/xlm-roberta#transformers.TFXLMRobertaForTokenClassification)\n",
- "- Some [example models](https://huggingface.co/models?filter=xlm-roberta&pipeline_tag=token-classification)"
- ]
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n",
+ "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
+ "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
+ "You will be able to reuse this secret in all of your notebooks.\n",
+ "Please note that authentication is recommended but still optional to access public models or datasets.\n",
+ " warnings.warn(\n"
+ ]
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "yhZZmLjgqpqX"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "1544c9cc98bc469b98ae804569204420",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "## Export and Save HuggingFace model"
+ "text/plain": [
+ "config.json: 0%| | 0.00/852 [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "rUErF-PMqpqX"
- },
- "source": [
- "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases\n",
- "- Albert uses SentencePiece, so we will have to install that as well"
- ]
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Framework not specified. Using pt to export to ONNX.\n"
+ ]
},
{
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "id": "IairJqDCqpqY",
- "outputId": "b93fb73a-4bb0-442b-f6da-7228362ef1ee",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "e53d77092ea646b5bff9e5c4051f0709",
+ "version_major": 2,
+ "version_minor": 0
},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.1/7.1 MB\u001b[0m \u001b[31m15.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m403.3/403.3 kB\u001b[0m \u001b[31m19.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m475.2/475.2 MB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m67.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.7/455.7 kB\u001b[0m \u001b[31m29.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m65.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m21.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m507.1/507.1 kB\u001b[0m \u001b[31m38.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.7/15.7 MB\u001b[0m \u001b[31m65.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m70.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m97.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m111.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m102.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m100.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m102.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m109.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m96.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m16.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m100.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m81.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m44.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m100.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m475.2/475.2 MB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.9/489.9 MB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m67.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m35.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m64.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m67.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.8/489.8 MB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m479.7/479.7 MB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m84.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m104.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.8/440.8 kB\u001b[0m \u001b[31m45.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m524.1/524.1 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m81.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m100.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m45.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m76.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m781.3/781.3 kB\u001b[0m \u001b[31m62.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m98.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m102.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m588.3/588.3 MB\u001b[0m \u001b[31m1.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m86.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m69.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m105.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m439.2/439.2 kB\u001b[0m \u001b[31m43.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m98.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m14.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m15.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.8/83.8 kB\u001b[0m \u001b[31m10.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m42.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m43.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m451.2/451.2 kB\u001b[0m \u001b[31m42.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m78.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m83.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m79.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m93.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.1/13.1 MB\u001b[0m \u001b[31m28.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
- "pandas-gbq 0.19.2 requires google-auth-oauthlib>=0.7.0, but you have google-auth-oauthlib 0.4.6 which is incompatible.\n",
- "tensorflow-datasets 4.9.4 requires protobuf>=3.20, but you have protobuf 3.19.6 which is incompatible.\n",
- "tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 3.19.6 which is incompatible.\u001b[0m\u001b[31m\n",
- "\u001b[0m"
- ]
- }
- ],
- "source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum tensorflow"
+ "text/plain": [
+ "model.safetensors: 0%| | 0.00/2.24G [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "jjV1pA-nqpqY"
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "ee4965a2b5ad435c8c82b377006aa73e",
+ "version_major": 2,
+ "version_minor": 0
},
- "source": [
- "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n",
- "- We'll use [xlm-roberta-large-finetuned-conll03-english](https://huggingface.co/xlm-roberta-large-finetuned-conll03-english) model from HuggingFace as an example and load it as a `ORTModelForTokenClassification`, representing an ONNX model."
+ "text/plain": [
+ "sentencepiece.bpe.model: 0%| | 0.00/5.07M [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {
- "id": "WATHU1YWqpqY",
- "outputId": "9c48ca4b-a967-418b-b350-a7a5827be281",
- "colab": {
- "referenced_widgets": [
- "1544c9cc98bc469b98ae804569204420",
- "94da2b3a4e7743a2b7dd362292c5496f",
- "6494c0f839ff4418a93df6e88c012d07",
- "df8afa14db524e2992b832b30bc0f692",
- "78b0860d0ed643d785ef00633d9e17e8",
- "451d9aec16b8417fb3b9565e5a73cb52",
- "e7df5ab266744d59b29e8c11106dcb65",
- "547e218edc3d47cab11d876641955409",
- "86abb923170a4927a99c0289540ecdf4",
- "dff16b39b66a4422a407096064fa182e",
- "119f12a1e8204bf7b7bbf1b4d7cca247",
- "e53d77092ea646b5bff9e5c4051f0709",
- "7538f72687754318ac6657cd98f1f5ae",
- "b80a55635ff342eaa98451556f4908d3",
- "b3ee928046b94c9194b5b5e30c61becb",
- "a2f65e14834a47e69687d32ee896d31d",
- "e0e53ea997404498850c7dff6f80a5fb",
- "68e4ba7bf6c5483abcae494fcdd46c6a",
- "349c635d1e8c47d6ab1d0fe3819dd837",
- "eded9c7da1eb4f6c9e713c8b23b4327a",
- "59a857c998734b0a95af2b96252aa130",
- "ace71ecb2fae4e4196f5c75b86a522f8",
- "ee4965a2b5ad435c8c82b377006aa73e",
- "9c3ec5377e884b3faebd24fb815b6a85",
- "9ca46de0beaa4c5d9ec526820d7aa94f",
- "238132625e604ddf85bdbf4931889d51",
- "61335821e4c94fcba501cb1c94541d07",
- "883c34b77a4c4558b74d5dde797e22ab",
- "a85eea6afaf0480eac17817e4844539a",
- "7277b2423de14e3aada27e5191f096e5",
- "0faedd1c4d4148fa965bcec52325bd08",
- "c2ab7313174c4231825be28c4a3181b8",
- "45123f4cdc0a4aa8ac90aa29d357240e",
- "0bd10f7cb5244da29d0a7da73ae52335",
- "825857db473849d2bb498ffb5fcfb962",
- "a88c300d3a81439fb3da9d46a023dc47",
- "636fccb3b002475a90c888f987b36400",
- "89c7d83dc8e640cbb93ccfe2bb3030f0",
- "87671904fcdc4af993d4ba61f3a5f9e9",
- "2510ee2600af466a851566f4634e7fe9",
- "44e1f77b21e04d32ad83a405ab62ca38",
- "106c462bc57243018162577b103db007",
- "1826afdb3fd94748941c71d8621682e3",
- "21ca8729098a4bd498b29de51a92e8bd"
- ],
- "base_uri": "https://localhost:8080/",
- "height": 391
- }
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "0bd10f7cb5244da29d0a7da73ae52335",
+ "version_major": 2,
+ "version_minor": 0
},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n",
- "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
- "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
- "You will be able to reuse this secret in all of your notebooks.\n",
- "Please note that authentication is recommended but still optional to access public models or datasets.\n",
- " warnings.warn(\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "config.json: 0%| | 0.00/852 [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "1544c9cc98bc469b98ae804569204420"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "Framework not specified. Using pt to export to ONNX.\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "model.safetensors: 0%| | 0.00/2.24G [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "e53d77092ea646b5bff9e5c4051f0709"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "sentencepiece.bpe.model: 0%| | 0.00/5.07M [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "ee4965a2b5ad435c8c82b377006aa73e"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "tokenizer.json: 0%| | 0.00/9.10M [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "0bd10f7cb5244da29d0a7da73ae52335"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "Using the export variant default. Available variants are:\n",
- " - default: The default ONNX variant.\n",
- "Using framework PyTorch: 2.1.0+cu121\n",
- "Overriding 1 configuration item(s)\n",
- "\t- use_cache -> False\n",
- "Saving external data to one file...\n"
- ]
- }
- ],
- "source": [
- "from optimum.onnxruntime import ORTModelForTokenClassification\n",
- "import tensorflow as tf\n",
- "\n",
- "MODEL_NAME = 'xlm-roberta-large-finetuned-conll03-english'\n",
- "ONNX_MODEL = f\"onnx_models/{MODEL_NAME}\"\n",
- "\n",
- "ort_model = ORTModelForTokenClassification.from_pretrained(MODEL_NAME, export=True)\n",
- "\n",
- "# Save the ONNX model\n",
- "ort_model.save_pretrained(ONNX_MODEL)"
+ "text/plain": [
+ "tokenizer.json: 0%| | 0.00/9.10M [00:00, ?B/s]"
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "lH-d7yGXqpqZ"
- },
- "source": [
- "Let's have a look inside the directory and see what we are dealing with:"
- ]
- },
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Using the export variant default. Available variants are:\n",
+ " - default: The default ONNX variant.\n",
+ "Using framework PyTorch: 2.1.0+cu121\n",
+ "Overriding 1 configuration item(s)\n",
+ "\t- use_cache -> False\n",
+ "Saving external data to one file...\n"
+ ]
+ }
+ ],
+ "source": [
+ "from optimum.onnxruntime import ORTModelForTokenClassification\n",
+ "import tensorflow as tf\n",
+ "\n",
+ "MODEL_NAME = 'xlm-roberta-large-finetuned-conll03-english'\n",
+ "ONNX_MODEL = f\"onnx_models/{MODEL_NAME}\"\n",
+ "\n",
+ "ort_model = ORTModelForTokenClassification.from_pretrained(MODEL_NAME, export=True)\n",
+ "\n",
+ "# Save the ONNX model\n",
+ "ort_model.save_pretrained(ONNX_MODEL)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's have a look inside the directory and see what we are dealing with:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "id": "fp6t5TETqpqZ",
- "outputId": "3eab09a6-51c3-48f8-d8e5-74e76fe22585",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "total 2205260\n",
- "-rw-r--r-- 1 root root 1046 Jan 9 19:26 config.json\n",
- "-rw-r--r-- 1 root root 617783 Jan 9 19:26 model.onnx\n",
- "-rw-r--r-- 1 root root 2235396096 Jan 9 19:26 model.onnx_data\n",
- "-rw-r--r-- 1 root root 5069051 Jan 9 19:26 sentencepiece.bpe.model\n",
- "-rw-r--r-- 1 root root 280 Jan 9 19:26 special_tokens_map.json\n",
- "-rw-r--r-- 1 root root 418 Jan 9 19:26 tokenizer_config.json\n",
- "-rw-r--r-- 1 root root 17082660 Jan 9 19:26 tokenizer.json\n"
- ]
- }
- ],
- "source": [
- "!ls -l {ONNX_MODEL}"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "total 2205260\n",
+ "-rw-r--r-- 1 root root 1046 Jan 9 19:26 config.json\n",
+ "-rw-r--r-- 1 root root 617783 Jan 9 19:26 model.onnx\n",
+ "-rw-r--r-- 1 root root 2235396096 Jan 9 19:26 model.onnx_data\n",
+ "-rw-r--r-- 1 root root 5069051 Jan 9 19:26 sentencepiece.bpe.model\n",
+ "-rw-r--r-- 1 root root 280 Jan 9 19:26 special_tokens_map.json\n",
+ "-rw-r--r-- 1 root root 418 Jan 9 19:26 tokenizer_config.json\n",
+ "-rw-r--r-- 1 root root 17082660 Jan 9 19:26 tokenizer.json\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls -l {ONNX_MODEL}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mkdir {ONNX_MODEL}/assets"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- As you can see, we need to move `sentencepiece.bpe.model` from the tokenizer to `assets` folder which Spark NLP will look for\n",
+ "- We also need `labels` and their `ids` which is saved inside the model's config. We will save this inside `labels.txt`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# get label2id dictionary\n",
+ "labels = ort_model.config.id2label\n",
+ "# sort the dictionary based on the id\n",
+ "labels = [value for key,value in sorted(labels.items(), reverse=False)]\n",
+ "\n",
+ "with open(ONNX_MODEL + '/assets/labels.txt', 'w') as f:\n",
+ " f.write('\\n'.join(labels))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mv {ONNX_MODEL}/sentencepiece.bpe.model {ONNX_MODEL}/assets"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Voila! We have our `sentencepiece.bpe.model` inside assets directory"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {
- "id": "u5kdjGpdqpqZ"
- },
- "outputs": [],
- "source": [
- "!mkdir {ONNX_MODEL}/assets"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "onnx_models/xlm-roberta-large-finetuned-conll03-english:\n",
+ "total 2200312\n",
+ "drwxr-xr-x 2 root root 4096 Jan 9 19:26 assets\n",
+ "-rw-r--r-- 1 root root 1046 Jan 9 19:26 config.json\n",
+ "-rw-r--r-- 1 root root 617783 Jan 9 19:26 model.onnx\n",
+ "-rw-r--r-- 1 root root 2235396096 Jan 9 19:26 model.onnx_data\n",
+ "-rw-r--r-- 1 root root 280 Jan 9 19:26 special_tokens_map.json\n",
+ "-rw-r--r-- 1 root root 418 Jan 9 19:26 tokenizer_config.json\n",
+ "-rw-r--r-- 1 root root 17082660 Jan 9 19:26 tokenizer.json\n",
+ "\n",
+ "onnx_models/xlm-roberta-large-finetuned-conll03-english/assets:\n",
+ "total 4956\n",
+ "-rw-r--r-- 1 root root 45 Jan 9 19:26 labels.txt\n",
+ "-rw-r--r-- 1 root root 5069051 Jan 9 19:26 sentencepiece.bpe.model\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls -lR {ONNX_MODEL}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Import and Save XlmRoBertaForTokenClassification in Spark NLP\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's install and setup Spark NLP in Google Colab\n",
+ "- This part is pretty easy via our simple script"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "q53KnN90qpqZ"
- },
- "source": [
- "- As you can see, we need to move `sentencepiece.bpe.model` from the tokenizer to `assets` folder which Spark NLP will look for\n",
- "- We also need `labels` and their `ids` which is saved inside the model's config. We will save this inside `labels.txt`"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Installing PySpark 3.2.3 and Spark NLP 5.1.3\n",
+ "setup Colab for PySpark 3.2.3 and Spark NLP 5.1.3\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 MB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m537.5/537.5 kB\u001b[0m \u001b[31m33.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.7/199.7 kB\u001b[0m \u001b[31m26.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
+ ]
+ }
+ ],
+ "source": [
+ "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's start Spark with Spark NLP included via our simple `start()` function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {
- "id": "NYYI6xnTqpqa"
- },
- "outputs": [],
- "source": [
- "# get label2id dictionary\n",
- "labels = ort_model.config.id2label\n",
- "# sort the dictionary based on the id\n",
- "labels = [value for key,value in sorted(labels.items(), reverse=False)]\n",
- "\n",
- "with open(ONNX_MODEL + '/assets/labels.txt', 'w') as f:\n",
- " f.write('\\n'.join(labels))"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Apache Spark version: 3.2.3\n"
+ ]
+ }
+ ],
+ "source": [
+ "import sparknlp\n",
+ "# let's start Spark with Spark NLP\n",
+ "spark = sparknlp.start()\n",
+ "\n",
+ "print(\"Apache Spark version: {}\".format(spark.version))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's use `loadSavedModel` functon in `RoBertaForTokenClassification` which allows us to load TensorFlow model in SavedModel format\n",
+ "- Most params can be set later when you are loading this model in `RoBertaForTokenClassification` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n",
+ "- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n",
+ "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sparknlp.annotator import *\n",
+ "from sparknlp.base import *\n",
+ "\n",
+ "tokenClassifier = XlmRoBertaForTokenClassification\\\n",
+ " .loadSavedModel(ONNX_MODEL, spark)\\\n",
+ " .setInputCols([\"document\",'token'])\\\n",
+ " .setOutputCol(\"ner\")\\\n",
+ " .setCaseSensitive(True)\\\n",
+ " .setMaxSentenceLength(128)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tokenClassifier.write().overwrite().save(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's clean up stuff we don't need anymore"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!rm -rf {ONNX_MODEL}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Awesome 😎 !\n",
+ "\n",
+ "This is your XlmRoBertaForTokenClassification model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {
- "id": "hx6uf2PPqpqa"
- },
- "outputs": [],
- "source": [
- "!mv {ONNX_MODEL}/sentencepiece.bpe.model {ONNX_MODEL}/assets"
- ]
- },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "total 318696\n",
+ "drwxr-xr-x 5 root root 4096 Oct 16 22:21 fields\n",
+ "drwxr-xr-x 2 root root 4096 Oct 16 22:21 metadata\n",
+ "-rw-r--r-- 1 root root 326328924 Oct 16 22:21 roberta_classification_onnx\n"
+ ]
+ }
+ ],
+ "source": [
+ "! ls -l {ONNX_MODEL}_spark_nlp_onnx"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny XlmRoBertaForTokenClassification model 😊"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tokenClassifier_loaded = XlmRoBertaForTokenClassification.load(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))\\\n",
+ " .setInputCols([\"document\",'token'])\\\n",
+ " .setOutputCol(\"ner\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You can see what labels were used to train this model via `getClasses` function:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "idrz2RCWqpqa"
- },
- "source": [
- "Voila! We have our `sentencepiece.bpe.model` inside assets directory"
+ "data": {
+ "text/plain": [
+ "['B-LOC', 'I-ORG', 'I-LOC', 'I-PER', 'B-ORG', 'O', 'B-PER']"
]
- },
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# .getClasses was introduced in spark-nlp==3.4.0\n",
+ "tokenClassifier_loaded.getClasses()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This is how you can use your loaded classifier model in Spark NLP 🚀 pipeline:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {
- "id": "T5YSOXhLqpqa",
- "outputId": "028c70df-7f80-4bf6-9779-e8b26ee574aa",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "onnx_models/xlm-roberta-large-finetuned-conll03-english:\n",
- "total 2200312\n",
- "drwxr-xr-x 2 root root 4096 Jan 9 19:26 assets\n",
- "-rw-r--r-- 1 root root 1046 Jan 9 19:26 config.json\n",
- "-rw-r--r-- 1 root root 617783 Jan 9 19:26 model.onnx\n",
- "-rw-r--r-- 1 root root 2235396096 Jan 9 19:26 model.onnx_data\n",
- "-rw-r--r-- 1 root root 280 Jan 9 19:26 special_tokens_map.json\n",
- "-rw-r--r-- 1 root root 418 Jan 9 19:26 tokenizer_config.json\n",
- "-rw-r--r-- 1 root root 17082660 Jan 9 19:26 tokenizer.json\n",
- "\n",
- "onnx_models/xlm-roberta-large-finetuned-conll03-english/assets:\n",
- "total 4956\n",
- "-rw-r--r-- 1 root root 45 Jan 9 19:26 labels.txt\n",
- "-rw-r--r-- 1 root root 5069051 Jan 9 19:26 sentencepiece.bpe.model\n"
- ]
- }
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "+--------------------+--------------------+\n",
+ "| text| result|\n",
+ "+--------------------+--------------------+\n",
+ "|My name is Clara ...|[O, O, O, B-PER, ...|\n",
+ "|My name is Clara ...|[O, O, O, B-PER, ...|\n",
+ "+--------------------+--------------------+\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "document_assembler = DocumentAssembler() \\\n",
+ " .setInputCol('text') \\\n",
+ " .setOutputCol('document')\n",
+ "\n",
+ "tokenizer = Tokenizer() \\\n",
+ " .setInputCols(['document']) \\\n",
+ " .setOutputCol('token')\n",
+ "\n",
+ "pipeline = Pipeline(stages=[\n",
+ " document_assembler,\n",
+ " tokenizer,\n",
+ " tokenClassifier_loaded\n",
+ "])\n",
+ "\n",
+ "# couple of simple examples\n",
+ "example = spark.createDataFrame([[\"My name is Clara and I live in Berkeley, California.\"], ['My name is Clara and I live in Berkeley, California.']]).toDF(\"text\")\n",
+ "\n",
+ "result = pipeline.fit(example).transform(example)\n",
+ "\n",
+ "# result is a DataFrame\n",
+ "result.select(\"text\", \"ner.result\").show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "That's it! You can now go wild and use hundreds of `RoBertaForTokenClassification` models from HuggingFace 🤗 in Spark NLP 🚀\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "gpuType": "T4",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "0bd10f7cb5244da29d0a7da73ae52335": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_825857db473849d2bb498ffb5fcfb962",
+ "IPY_MODEL_a88c300d3a81439fb3da9d46a023dc47",
+ "IPY_MODEL_636fccb3b002475a90c888f987b36400"
],
- "source": [
- "!ls -lR {ONNX_MODEL}"
- ]
+ "layout": "IPY_MODEL_89c7d83dc8e640cbb93ccfe2bb3030f0"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "yC4lfOb_qpqb"
- },
- "source": [
- "## Import and Save RoBertaForTokenClassification in Spark NLP\n"
- ]
+ "0faedd1c4d4148fa965bcec52325bd08": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "_T57R-wBqpqb"
- },
- "source": [
- "- Let's install and setup Spark NLP in Google Colab\n",
- "- This part is pretty easy via our simple script"
- ]
+ "106c462bc57243018162577b103db007": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "vqzkT2Tbqpqb",
- "outputId": "3d1b295e-e6e9-409c-f73f-e778352aa7ff"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Installing PySpark 3.2.3 and Spark NLP 5.1.3\n",
- "setup Colab for PySpark 3.2.3 and Spark NLP 5.1.3\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 MB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m537.5/537.5 kB\u001b[0m \u001b[31m33.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.7/199.7 kB\u001b[0m \u001b[31m26.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
- ]
- }
+ "119f12a1e8204bf7b7bbf1b4d7cca247": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "1544c9cc98bc469b98ae804569204420": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_94da2b3a4e7743a2b7dd362292c5496f",
+ "IPY_MODEL_6494c0f839ff4418a93df6e88c012d07",
+ "IPY_MODEL_df8afa14db524e2992b832b30bc0f692"
],
- "source": [
- "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash"
- ]
+ "layout": "IPY_MODEL_78b0860d0ed643d785ef00633d9e17e8"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "e_C8Rt6Iqpqb"
- },
- "source": [
- "Let's start Spark with Spark NLP included via our simple `start()` function"
- ]
+ "1826afdb3fd94748941c71d8621682e3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "44kpKSG-qpqb",
- "outputId": "d556353a-cd63-4e2a-a5e6-fcfbfa72fa57"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Apache Spark version: 3.2.3\n"
- ]
- }
- ],
- "source": [
- "import sparknlp\n",
- "# let's start Spark with Spark NLP\n",
- "spark = sparknlp.start()\n",
- "\n",
- "print(\"Apache Spark version: {}\".format(spark.version))"
- ]
+ "21ca8729098a4bd498b29de51a92e8bd": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "IHPVcE9nqpqc"
- },
- "source": [
- "- Let's use `loadSavedModel` functon in `RoBertaForTokenClassification` which allows us to load TensorFlow model in SavedModel format\n",
- "- Most params can be set later when you are loading this model in `RoBertaForTokenClassification` in runtime like `setMaxSentenceLength`, so don't worry what you are setting them now\n",
- "- `loadSavedModel` accepts two params, first is the path to the TF SavedModel. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n",
- "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively."
- ]
+ "238132625e604ddf85bdbf4931889d51": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_c2ab7313174c4231825be28c4a3181b8",
+ "placeholder": "",
+ "style": "IPY_MODEL_45123f4cdc0a4aa8ac90aa29d357240e",
+ "value": " 5.07M/5.07M [00:00<00:00, 41.3MB/s]"
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "-_OWtRBHqpqc"
- },
- "outputs": [],
- "source": [
- "from sparknlp.annotator import *\n",
- "from sparknlp.base import *\n",
- "\n",
- "tokenClassifier = RoBertaForTokenClassification\\\n",
- " .loadSavedModel(ONNX_MODEL, spark)\\\n",
- " .setInputCols([\"document\",'token'])\\\n",
- " .setOutputCol(\"ner\")\\\n",
- " .setCaseSensitive(True)\\\n",
- " .setMaxSentenceLength(128)"
- ]
+ "2510ee2600af466a851566f4634e7fe9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "cgoFul55qpqc"
- },
- "source": [
- "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function"
- ]
+ "349c635d1e8c47d6ab1d0fe3819dd837": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "EzK8inoxqpqc"
- },
- "outputs": [],
- "source": [
- "tokenClassifier.write().overwrite().save(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))"
- ]
+ "44e1f77b21e04d32ad83a405ab62ca38": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "0VwXvPlbqpqc"
- },
- "source": [
- "Let's clean up stuff we don't need anymore"
- ]
+ "45123f4cdc0a4aa8ac90aa29d357240e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "1Ce2ZAEtqpqc"
- },
- "outputs": [],
- "source": [
- "!rm -rf {ONNX_MODEL}"
- ]
+ "451d9aec16b8417fb3b9565e5a73cb52": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "Z4QXBzVsqpqd"
- },
- "source": [
- "Awesome 😎 !\n",
- "\n",
- "This is your RoBertaForTokenClassification model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀"
- ]
+ "547e218edc3d47cab11d876641955409": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "wRKVUu9tqpqd",
- "outputId": "2278d83a-63be-4e1a-b574-29c963b4b7a1"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "total 318696\n",
- "drwxr-xr-x 5 root root 4096 Oct 16 22:21 fields\n",
- "drwxr-xr-x 2 root root 4096 Oct 16 22:21 metadata\n",
- "-rw-r--r-- 1 root root 326328924 Oct 16 22:21 roberta_classification_onnx\n"
- ]
- }
- ],
- "source": [
- "! ls -l {ONNX_MODEL}_spark_nlp_onnx"
- ]
+ "59a857c998734b0a95af2b96252aa130": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "qXl-kXeLqpqd"
- },
- "source": [
- "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny RoBertaForTokenClassification model 😊"
- ]
+ "61335821e4c94fcba501cb1c94541d07": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "QTrtB8u7qpqd"
- },
- "outputs": [],
- "source": [
- "tokenClassifier_loaded = RoBertaForTokenClassification.load(\"./{}_spark_nlp_onnx\".format(ONNX_MODEL))\\\n",
- " .setInputCols([\"document\",'token'])\\\n",
- " .setOutputCol(\"ner\")"
- ]
+ "636fccb3b002475a90c888f987b36400": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_1826afdb3fd94748941c71d8621682e3",
+ "placeholder": "",
+ "style": "IPY_MODEL_21ca8729098a4bd498b29de51a92e8bd",
+ "value": " 9.10M/9.10M [00:00<00:00, 25.6MB/s]"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "UDHQves4qpqd"
- },
- "source": [
- "You can see what labels were used to train this model via `getClasses` function:"
- ]
+ "6494c0f839ff4418a93df6e88c012d07": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_547e218edc3d47cab11d876641955409",
+ "max": 852,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_86abb923170a4927a99c0289540ecdf4",
+ "value": 852
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "01Aw5e47qpqe",
- "outputId": "69cfded4-763c-41a3-f7ea-4ef56a744741"
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "['B-LOC', 'I-ORG', 'I-LOC', 'I-PER', 'B-ORG', 'O', 'B-PER']"
- ]
- },
- "execution_count": null,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# .getClasses was introduced in spark-nlp==3.4.0\n",
- "tokenClassifier_loaded.getClasses()"
- ]
+ "68e4ba7bf6c5483abcae494fcdd46c6a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "F7Kbxqvxqpqe"
- },
- "source": [
- "This is how you can use your loaded classifier model in Spark NLP 🚀 pipeline:"
- ]
+ "7277b2423de14e3aada27e5191f096e5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "2f0E2Gvxqpqe",
- "outputId": "fc05a614-cc89-417b-fad8-1290b34905e0"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "+--------------------+--------------------+\n",
- "| text| result|\n",
- "+--------------------+--------------------+\n",
- "|My name is Clara ...|[O, O, O, B-PER, ...|\n",
- "|My name is Clara ...|[O, O, O, B-PER, ...|\n",
- "+--------------------+--------------------+\n",
- "\n"
- ]
- }
- ],
- "source": [
- "document_assembler = DocumentAssembler() \\\n",
- " .setInputCol('text') \\\n",
- " .setOutputCol('document')\n",
- "\n",
- "tokenizer = Tokenizer() \\\n",
- " .setInputCols(['document']) \\\n",
- " .setOutputCol('token')\n",
- "\n",
- "pipeline = Pipeline(stages=[\n",
- " document_assembler,\n",
- " tokenizer,\n",
- " tokenClassifier_loaded\n",
- "])\n",
- "\n",
- "# couple of simple examples\n",
- "example = spark.createDataFrame([[\"My name is Clara and I live in Berkeley, California.\"], ['My name is Clara and I live in Berkeley, California.']]).toDF(\"text\")\n",
- "\n",
- "result = pipeline.fit(example).transform(example)\n",
- "\n",
- "# result is a DataFrame\n",
- "result.select(\"text\", \"ner.result\").show()"
- ]
+ "7538f72687754318ac6657cd98f1f5ae": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e0e53ea997404498850c7dff6f80a5fb",
+ "placeholder": "",
+ "style": "IPY_MODEL_68e4ba7bf6c5483abcae494fcdd46c6a",
+ "value": "model.safetensors: 100%"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "WOGLNugSqpqe"
- },
- "source": [
- "That's it! You can now go wild and use hundreds of `RoBertaForTokenClassification` models from HuggingFace 🤗 in Spark NLP 🚀\n"
- ]
- }
- ],
- "metadata": {
- "accelerator": "GPU",
- "colab": {
- "gpuType": "T4",
- "provenance": []
+ "78b0860d0ed643d785ef00633d9e17e8": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
+ "825857db473849d2bb498ffb5fcfb962": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_87671904fcdc4af993d4ba61f3a5f9e9",
+ "placeholder": "",
+ "style": "IPY_MODEL_2510ee2600af466a851566f4634e7fe9",
+ "value": "tokenizer.json: 100%"
+ }
},
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3"
+ "86abb923170a4927a99c0289540ecdf4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "87671904fcdc4af993d4ba61f3a5f9e9": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "883c34b77a4c4558b74d5dde797e22ab": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
},
- "widgets": {
- "application/vnd.jupyter.widget-state+json": {
- "1544c9cc98bc469b98ae804569204420": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_94da2b3a4e7743a2b7dd362292c5496f",
- "IPY_MODEL_6494c0f839ff4418a93df6e88c012d07",
- "IPY_MODEL_df8afa14db524e2992b832b30bc0f692"
- ],
- "layout": "IPY_MODEL_78b0860d0ed643d785ef00633d9e17e8"
- }
- },
- "94da2b3a4e7743a2b7dd362292c5496f": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_451d9aec16b8417fb3b9565e5a73cb52",
- "placeholder": "",
- "style": "IPY_MODEL_e7df5ab266744d59b29e8c11106dcb65",
- "value": "config.json: 100%"
- }
- },
- "6494c0f839ff4418a93df6e88c012d07": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_547e218edc3d47cab11d876641955409",
- "max": 852,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_86abb923170a4927a99c0289540ecdf4",
- "value": 852
- }
- },
- "df8afa14db524e2992b832b30bc0f692": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_dff16b39b66a4422a407096064fa182e",
- "placeholder": "",
- "style": "IPY_MODEL_119f12a1e8204bf7b7bbf1b4d7cca247",
- "value": " 852/852 [00:00<00:00, 13.4kB/s]"
- }
- },
- "78b0860d0ed643d785ef00633d9e17e8": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "451d9aec16b8417fb3b9565e5a73cb52": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "e7df5ab266744d59b29e8c11106dcb65": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "547e218edc3d47cab11d876641955409": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "86abb923170a4927a99c0289540ecdf4": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "dff16b39b66a4422a407096064fa182e": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "119f12a1e8204bf7b7bbf1b4d7cca247": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "e53d77092ea646b5bff9e5c4051f0709": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_7538f72687754318ac6657cd98f1f5ae",
- "IPY_MODEL_b80a55635ff342eaa98451556f4908d3",
- "IPY_MODEL_b3ee928046b94c9194b5b5e30c61becb"
- ],
- "layout": "IPY_MODEL_a2f65e14834a47e69687d32ee896d31d"
- }
- },
- "7538f72687754318ac6657cd98f1f5ae": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_e0e53ea997404498850c7dff6f80a5fb",
- "placeholder": "",
- "style": "IPY_MODEL_68e4ba7bf6c5483abcae494fcdd46c6a",
- "value": "model.safetensors: 100%"
- }
- },
- "b80a55635ff342eaa98451556f4908d3": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_349c635d1e8c47d6ab1d0fe3819dd837",
- "max": 2239643256,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_eded9c7da1eb4f6c9e713c8b23b4327a",
- "value": 2239643256
- }
- },
- "b3ee928046b94c9194b5b5e30c61becb": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_59a857c998734b0a95af2b96252aa130",
- "placeholder": "",
- "style": "IPY_MODEL_ace71ecb2fae4e4196f5c75b86a522f8",
- "value": " 2.24G/2.24G [00:17<00:00, 173MB/s]"
- }
- },
- "a2f65e14834a47e69687d32ee896d31d": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "e0e53ea997404498850c7dff6f80a5fb": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "68e4ba7bf6c5483abcae494fcdd46c6a": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "349c635d1e8c47d6ab1d0fe3819dd837": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "eded9c7da1eb4f6c9e713c8b23b4327a": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "59a857c998734b0a95af2b96252aa130": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "ace71ecb2fae4e4196f5c75b86a522f8": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "ee4965a2b5ad435c8c82b377006aa73e": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_9c3ec5377e884b3faebd24fb815b6a85",
- "IPY_MODEL_9ca46de0beaa4c5d9ec526820d7aa94f",
- "IPY_MODEL_238132625e604ddf85bdbf4931889d51"
- ],
- "layout": "IPY_MODEL_61335821e4c94fcba501cb1c94541d07"
- }
- },
- "9c3ec5377e884b3faebd24fb815b6a85": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_883c34b77a4c4558b74d5dde797e22ab",
- "placeholder": "",
- "style": "IPY_MODEL_a85eea6afaf0480eac17817e4844539a",
- "value": "sentencepiece.bpe.model: 100%"
- }
- },
- "9ca46de0beaa4c5d9ec526820d7aa94f": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_7277b2423de14e3aada27e5191f096e5",
- "max": 5069051,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_0faedd1c4d4148fa965bcec52325bd08",
- "value": 5069051
- }
- },
- "238132625e604ddf85bdbf4931889d51": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_c2ab7313174c4231825be28c4a3181b8",
- "placeholder": "",
- "style": "IPY_MODEL_45123f4cdc0a4aa8ac90aa29d357240e",
- "value": " 5.07M/5.07M [00:00<00:00, 41.3MB/s]"
- }
- },
- "61335821e4c94fcba501cb1c94541d07": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "883c34b77a4c4558b74d5dde797e22ab": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "a85eea6afaf0480eac17817e4844539a": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "7277b2423de14e3aada27e5191f096e5": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "0faedd1c4d4148fa965bcec52325bd08": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "c2ab7313174c4231825be28c4a3181b8": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "45123f4cdc0a4aa8ac90aa29d357240e": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "0bd10f7cb5244da29d0a7da73ae52335": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_825857db473849d2bb498ffb5fcfb962",
- "IPY_MODEL_a88c300d3a81439fb3da9d46a023dc47",
- "IPY_MODEL_636fccb3b002475a90c888f987b36400"
- ],
- "layout": "IPY_MODEL_89c7d83dc8e640cbb93ccfe2bb3030f0"
- }
- },
- "825857db473849d2bb498ffb5fcfb962": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_87671904fcdc4af993d4ba61f3a5f9e9",
- "placeholder": "",
- "style": "IPY_MODEL_2510ee2600af466a851566f4634e7fe9",
- "value": "tokenizer.json: 100%"
- }
- },
- "a88c300d3a81439fb3da9d46a023dc47": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_44e1f77b21e04d32ad83a405ab62ca38",
- "max": 9096718,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_106c462bc57243018162577b103db007",
- "value": 9096718
- }
- },
- "636fccb3b002475a90c888f987b36400": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_1826afdb3fd94748941c71d8621682e3",
- "placeholder": "",
- "style": "IPY_MODEL_21ca8729098a4bd498b29de51a92e8bd",
- "value": " 9.10M/9.10M [00:00<00:00, 25.6MB/s]"
- }
- },
- "89c7d83dc8e640cbb93ccfe2bb3030f0": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "87671904fcdc4af993d4ba61f3a5f9e9": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "2510ee2600af466a851566f4634e7fe9": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "44e1f77b21e04d32ad83a405ab62ca38": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "106c462bc57243018162577b103db007": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "1826afdb3fd94748941c71d8621682e3": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "21ca8729098a4bd498b29de51a92e8bd": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- }
- }
+ "89c7d83dc8e640cbb93ccfe2bb3030f0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "94da2b3a4e7743a2b7dd362292c5496f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_451d9aec16b8417fb3b9565e5a73cb52",
+ "placeholder": "",
+ "style": "IPY_MODEL_e7df5ab266744d59b29e8c11106dcb65",
+ "value": "config.json: 100%"
+ }
+ },
+ "9c3ec5377e884b3faebd24fb815b6a85": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_883c34b77a4c4558b74d5dde797e22ab",
+ "placeholder": "",
+ "style": "IPY_MODEL_a85eea6afaf0480eac17817e4844539a",
+ "value": "sentencepiece.bpe.model: 100%"
+ }
+ },
+ "9ca46de0beaa4c5d9ec526820d7aa94f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_7277b2423de14e3aada27e5191f096e5",
+ "max": 5069051,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_0faedd1c4d4148fa965bcec52325bd08",
+ "value": 5069051
+ }
+ },
+ "a2f65e14834a47e69687d32ee896d31d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a85eea6afaf0480eac17817e4844539a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a88c300d3a81439fb3da9d46a023dc47": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_44e1f77b21e04d32ad83a405ab62ca38",
+ "max": 9096718,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_106c462bc57243018162577b103db007",
+ "value": 9096718
+ }
+ },
+ "ace71ecb2fae4e4196f5c75b86a522f8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "b3ee928046b94c9194b5b5e30c61becb": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_59a857c998734b0a95af2b96252aa130",
+ "placeholder": "",
+ "style": "IPY_MODEL_ace71ecb2fae4e4196f5c75b86a522f8",
+ "value": " 2.24G/2.24G [00:17<00:00, 173MB/s]"
+ }
+ },
+ "b80a55635ff342eaa98451556f4908d3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_349c635d1e8c47d6ab1d0fe3819dd837",
+ "max": 2239643256,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_eded9c7da1eb4f6c9e713c8b23b4327a",
+ "value": 2239643256
+ }
+ },
+ "c2ab7313174c4231825be28c4a3181b8": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "df8afa14db524e2992b832b30bc0f692": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_dff16b39b66a4422a407096064fa182e",
+ "placeholder": "",
+ "style": "IPY_MODEL_119f12a1e8204bf7b7bbf1b4d7cca247",
+ "value": " 852/852 [00:00<00:00, 13.4kB/s]"
+ }
+ },
+ "dff16b39b66a4422a407096064fa182e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e0e53ea997404498850c7dff6f80a5fb": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e53d77092ea646b5bff9e5c4051f0709": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_7538f72687754318ac6657cd98f1f5ae",
+ "IPY_MODEL_b80a55635ff342eaa98451556f4908d3",
+ "IPY_MODEL_b3ee928046b94c9194b5b5e30c61becb"
+ ],
+ "layout": "IPY_MODEL_a2f65e14834a47e69687d32ee896d31d"
+ }
+ },
+ "e7df5ab266744d59b29e8c11106dcb65": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "eded9c7da1eb4f6c9e713c8b23b4327a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "ee4965a2b5ad435c8c82b377006aa73e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_9c3ec5377e884b3faebd24fb815b6a85",
+ "IPY_MODEL_9ca46de0beaa4c5d9ec526820d7aa94f",
+ "IPY_MODEL_238132625e604ddf85bdbf4931889d51"
+ ],
+ "layout": "IPY_MODEL_61335821e4c94fcba501cb1c94541d07"
+ }
}
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
\ No newline at end of file
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_XlmRoBertaSentenceEmbeddings.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_XlmRoBertaSentenceEmbeddings.ipynb
index d27928d67b9441..4cff73dd823aa2 100644
--- a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_XlmRoBertaSentenceEmbeddings.ipynb
+++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_XlmRoBertaSentenceEmbeddings.ipynb
@@ -421,11 +421,10 @@
"gpuType": "T4",
"provenance": []
},
- "kernelspec": {
- "display_name": "Python [conda env:sparknlp_dev]",
- "language": "python",
- "name": "conda-env-sparknlp_dev-py"
- },
+ "kernelspec": ,{
+ "display_name": "Python 3",
+ "name": "python3"
+ }
"language_info": {
"codemirror_mode": {
"name": "ipython",
diff --git a/examples/python/transformers/onnx/ONNX_Configs_in_Spark_NLP_AlbertForQuestionAnswering.ipynb b/examples/python/transformers/onnx/ONNX_Configs_in_Spark_NLP_AlbertForQuestionAnswering.ipynb
index b8134b3d7703b5..d3a6a9e0e8759e 100644
--- a/examples/python/transformers/onnx/ONNX_Configs_in_Spark_NLP_AlbertForQuestionAnswering.ipynb
+++ b/examples/python/transformers/onnx/ONNX_Configs_in_Spark_NLP_AlbertForQuestionAnswering.ipynb
@@ -93,7 +93,7 @@
"metadata": {},
"source": [
"- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n",
- "- We lock `transformers` on version `4.29.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n",
+ "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully.\n",
"- Albert uses SentencePiece, so we will have to install that as well"
]
},
@@ -162,7 +162,7 @@
}
],
"source": [
- "!pip install -q --upgrade transformers[onnx]==4.29.1 optimum sentencepiece tensorflow"
+ "!pip install -q --upgrade transformers[onnx]==4.34.1 optimum sentencepiece tensorflow"
]
},
{
diff --git a/python/sparknlp/annotator/embeddings/bge_embeddings.py b/python/sparknlp/annotator/embeddings/bge_embeddings.py
index fcb9e7a1b2b9ed..ea2d2a72aedbdf 100644
--- a/python/sparknlp/annotator/embeddings/bge_embeddings.py
+++ b/python/sparknlp/annotator/embeddings/bge_embeddings.py
@@ -26,6 +26,8 @@ class BGEEmbeddings(AnnotatorModel,
BGE, or BAAI General Embeddings, a model that can map any text to a low-dimensional dense
vector which can be used for tasks like retrieval, classification, clustering, or semantic search.
+
+ Note that this annotator is only supported for Spark Versions 3.4 and up.
Pretrained models can be loaded with `pretrained` of the companion object:
diff --git a/python/sparknlp/annotator/embeddings/e5_embeddings.py b/python/sparknlp/annotator/embeddings/e5_embeddings.py
index ee372290b1a333..c06a708bb9f12e 100644
--- a/python/sparknlp/annotator/embeddings/e5_embeddings.py
+++ b/python/sparknlp/annotator/embeddings/e5_embeddings.py
@@ -25,6 +25,8 @@ class E5Embeddings(AnnotatorModel,
"""Sentence embeddings using E5.
E5, a weakly supervised text embedding model that can generate text embeddings tailored to any task (e.g., classification, retrieval, clustering, text evaluation, etc.)
+ Note that this annotator is only supported for Spark Versions 3.4 and up.
+
Pretrained models can be loaded with :meth:`.pretrained` of the companion
object:
diff --git a/python/sparknlp/annotator/embeddings/mpnet_embeddings.py b/python/sparknlp/annotator/embeddings/mpnet_embeddings.py
index a1393462d41b0d..0d72cf3c388319 100644
--- a/python/sparknlp/annotator/embeddings/mpnet_embeddings.py
+++ b/python/sparknlp/annotator/embeddings/mpnet_embeddings.py
@@ -28,6 +28,8 @@ class MPNetEmbeddings(AnnotatorModel,
to inherit the advantages of masked language modeling and permuted language modeling for
natural language understanding.
+ Note that this annotator is only supported for Spark Versions 3.4 and up.
+
Pretrained models can be loaded with :meth:`.pretrained` of the companion
object:
diff --git a/src/main/scala/com/johnsnowlabs/nlp/embeddings/BGEEmbeddings.scala b/src/main/scala/com/johnsnowlabs/nlp/embeddings/BGEEmbeddings.scala
index 139f9efd2dbf66..3f1428d27a8506 100644
--- a/src/main/scala/com/johnsnowlabs/nlp/embeddings/BGEEmbeddings.scala
+++ b/src/main/scala/com/johnsnowlabs/nlp/embeddings/BGEEmbeddings.scala
@@ -42,6 +42,8 @@ import org.slf4j.{Logger, LoggerFactory}
* vector which can be used for tasks like retrieval, classification, clustering, or semantic
* search.
*
+ * Note that this annotator is only supported for Spark Versions 3.4 and up.
+ *
* Pretrained models can be loaded with `pretrained` of the companion object:
* {{{
* val embeddings = BGEEmbeddings.pretrained()
@@ -117,7 +119,7 @@ import org.slf4j.{Logger, LoggerFactory}
* | result|
* +--------------------------------------------------------------------------------+
* |[[8.0190285E-4, -0.005974853, -0.072875895, 0.007944068, 0.026059335, -0.0080...|
- * [[0.050514214, 0.010061974, -0.04340176, -0.020937217, 0.05170225, 0.01157857...|
+ * |[[0.050514214, 0.010061974, -0.04340176, -0.020937217, 0.05170225, 0.01157857...|
* +--------------------------------------------------------------------------------+
* }}}
*
diff --git a/src/main/scala/com/johnsnowlabs/nlp/embeddings/E5Embeddings.scala b/src/main/scala/com/johnsnowlabs/nlp/embeddings/E5Embeddings.scala
index 38ead9b55ac086..1e516d26199b28 100644
--- a/src/main/scala/com/johnsnowlabs/nlp/embeddings/E5Embeddings.scala
+++ b/src/main/scala/com/johnsnowlabs/nlp/embeddings/E5Embeddings.scala
@@ -41,6 +41,8 @@ import org.slf4j.{Logger, LoggerFactory}
* E5, an instruction-finetuned text embedding model that can generate text embeddings tailored
* to any task (e.g., classification, retrieval, clustering, text evaluation, etc.)
*
+ * Note that this annotator is only supported for Spark Versions 3.4 and up.
+ *
* Pretrained models can be loaded with `pretrained` of the companion object:
* {{{
* val embeddings = E5Embeddings.pretrained()
diff --git a/src/main/scala/com/johnsnowlabs/nlp/embeddings/MPNetEmbeddings.scala b/src/main/scala/com/johnsnowlabs/nlp/embeddings/MPNetEmbeddings.scala
index 0f9b0288a14436..710dce6a25db5d 100644
--- a/src/main/scala/com/johnsnowlabs/nlp/embeddings/MPNetEmbeddings.scala
+++ b/src/main/scala/com/johnsnowlabs/nlp/embeddings/MPNetEmbeddings.scala
@@ -43,6 +43,8 @@ import org.slf4j.{Logger, LoggerFactory}
* pre-training method, named masked and permuted language modeling, to inherit the advantages of
* masked language modeling and permuted language modeling for natural language understanding.
*
+ * Note that this annotator is only supported for Spark Versions 3.4 and up.
+ *
* Pretrained models can be loaded with `pretrained` of the companion object:
* {{{
* val embeddings = MPNetEmbeddings.pretrained()
From 66163235b73230b1d0a5da63d0a8e723ece872d7 Mon Sep 17 00:00:00 2001
From: Lev
Date: Mon, 27 May 2024 17:06:00 +0300
Subject: [PATCH 2/3] Fixies (#14307)
---
docs/en/auxiliary.md | 2 +-
docs/en/install.md | 2 +-
docs/en/transformers.md | 2 +-
docs/index.md | 4 ++--
4 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/docs/en/auxiliary.md b/docs/en/auxiliary.md
index 0ad000ca9e72a1..1c65c3621ca3cb 100644
--- a/docs/en/auxiliary.md
+++ b/docs/en/auxiliary.md
@@ -66,7 +66,7 @@ import com.johnsnowlabs.nlp.Annotation
**Examples:**
Complete usage examples can be seen here:
-https://github.com/JohnSnowLabs/spark-nlp-workshop/blob/234-release-candidate/jupyter/annotation/english/spark-nlp-basics/spark-nlp-basics-functions.ipynb
+[https://github.com/JohnSnowLabs/spark-nlp-workshop/blob/234-release-candidate/jupyter/annotation/english/spark-nlp-basics/spark-nlp-basics-functions.ipynb](https://github.com/JohnSnowLabs/spark-nlp-workshop/blob/234-release-candidate/jupyter/annotation/english/spark-nlp-basics/spark-nlp-basics-functions.ipynb)
diff --git a/docs/en/install.md b/docs/en/install.md
index f405cab46030d8..dbd4ade00c0e26 100644
--- a/docs/en/install.md
+++ b/docs/en/install.md
@@ -760,7 +760,7 @@ Finally, use **jupyter_notebook_config.json** for the password:
In order to fully take advantage of Spark NLP on Windows (8 or 10), you need to setup/install Apache Spark, Apache Hadoop, Java and a Pyton environment correctly by following the following instructions: [https://github.com/JohnSnowLabs/spark-nlp/discussions/1022](https://github.com/JohnSnowLabs/spark-nlp/discussions/1022)
-
\
+
### How to correctly install Spark NLP on Windows
diff --git a/docs/en/transformers.md b/docs/en/transformers.md
index f73f644d3f3606..e17cf3de529afc 100644
--- a/docs/en/transformers.md
+++ b/docs/en/transformers.md
@@ -9,7 +9,7 @@ modify_date: "2023-06-18"
use_language_switcher: "Python-Scala-Java"
show_nav: true
sidebar:
-nav: sparknlp
+ nav: sparknlp
---
diff --git a/docs/index.md b/docs/index.md
index fcce82b00d9d01..154824a38a2398 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -314,7 +314,7 @@ data:
- title:
image:
- src: https://upload.wikimedia.org/wikipedia/fr/thumb/8/8e/Centre_national_de_la_recherche_scientifique.svg/2048px-Centre_national_de_la_recherche_scientifique.svg.png
+ src: https://iscpif.fr/wp-content/uploads/2023/11/Logo-CNRS-ISCPIF.png
url: https://iscpif.fr/
style: "padding: 30px;"
is_row: true
@@ -344,7 +344,7 @@ data:
is_row: true
- title:
image:
- src: https://upload.wikimedia.org/wikipedia/commons/thumb/f/f1/Columbia_University_shield.svg/1184px-Columbia_University_shield.svg.png
+ src: https://miro.medium.com/v2/resize:fit:1024/0*3qIWoFnZgVUtsXB-.png
url: https://www.columbia.edu/
style: "padding: 25px;"
is_row: true
From a5e4a7ab17ca5e1a8f7563dc5d506a7f8fb4fa58 Mon Sep 17 00:00:00 2001
From: ahmedlone127
Date: Tue, 11 Jun 2024 18:33:39 +0500
Subject: [PATCH 3/3] adding fix for broken annotators
---
.../scala/com/johnsnowlabs/ml/ai/Bert.scala | 24 ++++++++++-----
.../com/johnsnowlabs/ml/ai/XlmRoberta.scala | 29 ++++++++++++-------
2 files changed, 34 insertions(+), 19 deletions(-)
diff --git a/src/main/scala/com/johnsnowlabs/ml/ai/Bert.scala b/src/main/scala/com/johnsnowlabs/ml/ai/Bert.scala
index 6de0eabd36ce1f..d218426dfaeed8 100644
--- a/src/main/scala/com/johnsnowlabs/ml/ai/Bert.scala
+++ b/src/main/scala/com/johnsnowlabs/ml/ai/Bert.scala
@@ -16,12 +16,12 @@
package com.johnsnowlabs.ml.ai
-import ai.onnxruntime.OnnxTensor
+import ai.onnxruntime.{OnnxTensor, TensorInfo}
import com.johnsnowlabs.ml.ai.util.PrepareEmbeddings
import com.johnsnowlabs.ml.onnx.{OnnxSession, OnnxWrapper}
import com.johnsnowlabs.ml.tensorflow.sign.{ModelSignatureConstants, ModelSignatureManager}
import com.johnsnowlabs.ml.tensorflow.{TensorResources, TensorflowWrapper}
-import com.johnsnowlabs.ml.util.{ModelArch, ONNX, TensorFlow}
+import com.johnsnowlabs.ml.util.{LinAlg, ModelArch, ONNX, TensorFlow}
import com.johnsnowlabs.nlp.annotators.common._
import com.johnsnowlabs.nlp.{Annotation, AnnotatorType}
import org.slf4j.{Logger, LoggerFactory}
@@ -206,10 +206,13 @@ private[johnsnowlabs] class Bert(
val tokenTensors =
OnnxTensor.createTensor(env, batch.map(x => x.map(x => x.toLong)).toArray)
+ val attentionMask = batch
+ .map(sentence => sentence.map(x => if (x == 0) 0L else 1L))
+ .toArray
val maskTensors =
OnnxTensor.createTensor(
env,
- batch.map(sentence => sentence.map(x => if (x == 0L) 0L else 1L)).toArray)
+ attentionMask)
val segmentTensors =
OnnxTensor.createTensor(env, batch.map(x => Array.fill(maxSentenceLength)(0L)).toArray)
@@ -222,8 +225,11 @@ private[johnsnowlabs] class Bert(
try {
val results = runner.run(inputs)
+ val lastHiddenState = results.get("last_hidden_state").get()
+ val info = lastHiddenState.getInfo.asInstanceOf[TensorInfo]
+ val shape = info.getShape
try {
- val embeddings = results
+ val flattenEmbeddings = results
.get("last_hidden_state")
.get()
.asInstanceOf[OnnxTensor]
@@ -235,7 +241,9 @@ private[johnsnowlabs] class Bert(
// runner.close()
// env.close()
//
- embeddings
+ val embeddings = LinAlg.avgPooling(flattenEmbeddings, attentionMask, shape)
+ val normalizedEmbeddings = LinAlg.l2Normalize(embeddings)
+ LinAlg.denseMatrixToArray(normalizedEmbeddings)
} finally if (results != null) results.close()
} catch {
case e: Exception =>
@@ -287,12 +295,12 @@ private[johnsnowlabs] class Bert(
tensors.clearSession(outs)
tensors.clearTensors()
- embeddings
+ val dim = embeddings.length / batchLength
+ embeddings.grouped(dim).toArray
}
- val dim = embeddings.length / batchLength
- embeddings.grouped(dim).toArray
+ embeddings
}
def tagSequenceSBert(batch: Seq[Array[Int]]): Array[Array[Float]] = {
diff --git a/src/main/scala/com/johnsnowlabs/ml/ai/XlmRoberta.scala b/src/main/scala/com/johnsnowlabs/ml/ai/XlmRoberta.scala
index df7dbeb4a106a9..ed8055a0b72674 100644
--- a/src/main/scala/com/johnsnowlabs/ml/ai/XlmRoberta.scala
+++ b/src/main/scala/com/johnsnowlabs/ml/ai/XlmRoberta.scala
@@ -16,13 +16,13 @@
package com.johnsnowlabs.ml.ai
-import ai.onnxruntime.OnnxTensor
+import ai.onnxruntime.{OnnxTensor, TensorInfo}
import com.johnsnowlabs.ml.ai.util.PrepareEmbeddings
import com.johnsnowlabs.ml.onnx.{OnnxSession, OnnxWrapper}
import com.johnsnowlabs.ml.tensorflow.sentencepiece.{SentencePieceWrapper, SentencepieceEncoder}
import com.johnsnowlabs.ml.tensorflow.sign.{ModelSignatureConstants, ModelSignatureManager}
import com.johnsnowlabs.ml.tensorflow.{TensorResources, TensorflowWrapper}
-import com.johnsnowlabs.ml.util.{ModelArch, ONNX, TensorFlow}
+import com.johnsnowlabs.ml.util.{LinAlg, ModelArch, ONNX, TensorFlow}
import com.johnsnowlabs.nlp.annotators.common._
import com.johnsnowlabs.nlp.{Annotation, AnnotatorType}
import org.slf4j.{Logger, LoggerFactory}
@@ -213,22 +213,26 @@ private[johnsnowlabs] class XlmRoberta(
val embeddings = detectedEngine match {
case ONNX.name =>
val (runner, env) = onnxWrapper.get.getSession(onnxSessionOptions)
-
+ val attentionMask = batch
+ .map(sentence => sentence.map(x => if (x == SentencePadTokenId) 0L else 1L))
+ .toArray
val tokenTensors =
OnnxTensor.createTensor(env, batch.map(x => x.map(x => x.toLong)).toArray)
val maskTensors =
OnnxTensor.createTensor(
env,
- batch
- .map(sentence => sentence.map(x => if (x == SentencePadTokenId) 0L else 1L))
- .toArray)
+ attentionMask
+ )
val inputs =
Map("input_ids" -> tokenTensors, "attention_mask" -> maskTensors).asJava
val results = runner.run(inputs)
+ val lastHiddenState = results.get("last_hidden_state").get()
+ val info = lastHiddenState.getInfo.asInstanceOf[TensorInfo]
+ val shape = info.getShape
try {
- val embeddings = results
+ val flattenEmbeddings = results
.get("last_hidden_state")
.get()
.asInstanceOf[OnnxTensor]
@@ -236,8 +240,10 @@ private[johnsnowlabs] class XlmRoberta(
.array()
tokenTensors.close()
maskTensors.close()
- embeddings
+ val embeddings = LinAlg.avgPooling(flattenEmbeddings, attentionMask, shape)
+ val normalizedEmbeddings = LinAlg.l2Normalize(embeddings)
+ LinAlg.denseMatrixToArray(normalizedEmbeddings)
} finally if (results != null) results.close()
case TensorFlow.name =>
val tensors = new TensorResources()
@@ -277,11 +283,12 @@ private[johnsnowlabs] class XlmRoberta(
tensors.clearSession(outs)
tensors.clearTensors()
- embeddings
+ val dim = embeddings.length / batchLength
+ embeddings.grouped(dim).toArray
}
- val dim = embeddings.length / batchLength
- embeddings.grouped(dim).toArray
+ embeddings
+
}
def predict(