Merge branch 'huggingface:main' into ig/hs-2880-llama3fp8

imangohari1 · Sep 16, 2024 · cb876df · cb876df
2 parents daf7429 + 520c875
commit cb876df
Show file tree

Hide file tree

Showing 110 changed files with 3,537 additions and 339 deletions.
diff --git a/.github/workflows/check_code_quality.yml b/.github/workflows/check_code_quality.yml
@@ -35,9 +35,13 @@ jobs:
         source venv/bin/activate
         ruff check . setup.py
         ruff format --check . setup.py
-  post-comment:
-    if: failure() && github.event_name == 'pull_request'
-    needs: check
-    uses: ./.github/workflows/failed_code_quality_check_comment.yml
-    with:
-      pr-number: ${{ github.event.number }}
+    - name: Store PR number if failure to post comment
+      if: failure() && github.event_name == 'pull_request'
+      env:
+        PR_NUMBER: ${{ github.event.number }}
+      run: echo $PR_NUMBER > ./pr_number
+    - uses: actions/upload-artifact@v4
+      if: failure() && github.event_name == 'pull_request'
+      with:
+        name: pr-number
+        path: ./pr_number
diff --git a/.github/workflows/failed_code_quality_check_comment.yml b/.github/workflows/failed_code_quality_check_comment.yml
@@ -1,18 +1,36 @@
 name: Post comment in PR for failed code quality check
 
 on:
-  workflow_call:
-    inputs:
-      pr-number:
-        required: true
-        type: number
+  workflow_run:
+    workflows: ["Check code quality"]
+    types:
+      - completed
 
 jobs:
   post-comment:
     runs-on: ubuntu-latest
+    if: github.event.workflow_run.event == 'pull_request' && github.event.workflow_run.conclusion == 'failure'
     name: Post comment to run make style
+    permissions:
+      pull-requests: write
     steps:
+      - name: Download artifact
+        uses: dawidd6/action-download-artifact@v6
+        with:
+          name: pr-number
+          run_id: ${{github.event.workflow_run.id }}
+      - name: Get PR number
+        id: github-context
+        run: |
+          content_pr_number=$(cat ./pr_number)
+          if [[ $content_pr_number =~ ^[0-9]+$ ]]; then
+            echo "pr_number=$content_pr_number" >> $GITHUB_OUTPUT
+            rm -rf ./pr_number
+          else
+            echo "Encountered an invalid PR number"
+            exit 1
+          fi
       - uses: peter-evans/create-or-update-comment@v4
         with:
-          issue-number: ${{ inputs.pr-number }}
+          issue-number: ${{ steps.github-context.outputs.pr_number }}
           body: The code quality check failed, please run `make style`.
diff --git a/.github/workflows/slow_tests_gaudi2.yml b/.github/workflows/slow_tests_gaudi2.yml
@@ -21,12 +21,15 @@ jobs:
       - name: Run tests
         run: |
             docker run \
+            --rm \
             -v $PWD:/root/workspace \
+            -v /scratch-1:/data \
             --workdir=/root/workspace \
             --runtime=habana \
             -e HABANA_VISIBLE_DEVICES=all \
             -e OMPI_MCA_btl_vader_single_copy_mechanism=none \
             -e GAUDI2_CI=1 \
+            -e HF_HOME=/data \
             --cap-add=sys_nice \
             --net=host \
             --ipc=host \
@@ -47,17 +50,20 @@ jobs:
       - name: Run tests
         run: |
             docker run \
+            --rm \
             -v $PWD:/root/workspace \
+            -v /scratch-1:/data \
             --workdir=/root/workspace \
             --runtime=habana \
             -e HABANA_VISIBLE_DEVICES=all \
             -e OMPI_MCA_btl_vader_single_copy_mechanism=none \
             -e GAUDI2_CI=1 \
+            -e HF_HOME=/data \
             --cap-add=sys_nice \
             --net=host \
             --ipc=host \
             vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest \
-            /bin/bash tests/ci/slow_tests_deepspeed.sh
+            pip install huggingface_hub && huggingface-cli login --token ${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }} && /bin/bash tests/ci/slow_tests_deepspeed.sh
   fsdp:
     name: Test FSDP models
     if: ${{ !cancelled() && (success() || failure()) }}
@@ -73,12 +79,15 @@ jobs:
       - name: Run tests
         run: |
             docker run \
+            --rm \
             -v $PWD:/root/workspace \
+            -v /scratch-1:/data \
             --workdir=/root/workspace \
             --runtime=habana \
             -e HABANA_VISIBLE_DEVICES=all \
             -e OMPI_MCA_btl_vader_single_copy_mechanism=none \
             -e GAUDI2_CI=1 \
+            -e HF_HOME=/data \
             --cap-add=sys_nice \
             --net=host \
             --ipc=host \
@@ -99,12 +108,15 @@ jobs:
       - name: Run tests
         run: |
             docker run \
+            --rm \
             -v $PWD:/root/workspace \
+            -v /scratch-1:/data \
             --workdir=/root/workspace \
             --runtime=habana \
             -e HABANA_VISIBLE_DEVICES=all \
             -e OMPI_MCA_btl_vader_single_copy_mechanism=none \
             -e GAUDI2_CI=1 \
+            -e HF_HOME=/data \
             --cap-add=sys_nice \
             --net=host \
             --ipc=host \
@@ -122,17 +134,20 @@ jobs:
         uses: actions/checkout@v2
       - name: Pull image
         run: |
-            docker pull vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest:latest
+            docker pull vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest
       - name: Run tests
         run: |
             docker run \
+            --rm \
             -v $PWD:/root/workspace \
+            -v /scratch-1:/data \
             --workdir=/root/workspace \
             --runtime=habana \
             -e HABANA_VISIBLE_DEVICES=all \
             -e OMPI_MCA_btl_vader_single_copy_mechanism=none \
             -e GAUDI2_CI=1 \
             -e RUN_ALBERT_XXL_1X=1 \
+            -e HF_HOME=/data \
             --cap-add=sys_nice \
             --net=host \
             --ipc=host \
@@ -155,12 +170,15 @@ jobs:
       - name: Run tests
         run: |
             docker run \
+            --rm \
             -v $PWD:/root/workspace \
+            -v /scratch-1:/data \
             --workdir=/root/workspace \
             --runtime=habana \
             -e HABANA_VISIBLE_DEVICES=all \
             -e OMPI_MCA_btl_vader_single_copy_mechanism=none \
             -e GAUDI2_CI=1 \
+            -e HF_HOME=/data \
             --cap-add=sys_nice \
             --net=host \
             --ipc=host \
@@ -181,12 +199,15 @@ jobs:
       - name: Run tests
         run: |
             docker run \
+            --rm \
             -v $PWD:/root/workspace \
+            -v /scratch-1:/data \
             --workdir=/root/workspace \
             --runtime=habana \
             -e HABANA_VISIBLE_DEVICES=all \
             -e OMPI_MCA_btl_vader_single_copy_mechanism=none \
             -e GAUDI2_CI=1 \
+            -e HF_HOME=/data \
             --cap-add=sys_nice \
             --net=host \
             --ipc=host \
@@ -215,12 +236,15 @@ jobs:
       - name: Run tests
         run: |
             docker run \
+            --rm \
             -v $PWD:/root/workspace \
+            -v /scratch-1:/data \
             --workdir=/root/workspace \
             --runtime=habana \
             -e HABANA_VISIBLE_DEVICES=all \
-            -e GAUDI2_CI=1 \
             -e OMPI_MCA_btl_vader_single_copy_mechanism=none \
+            -e GAUDI2_CI=1 \
+            -e HF_HOME=/data \
             --cap-add=sys_nice \
             --net=host \
             --ipc=host \

diff --git a/README.md b/README.md
@@ -59,9 +59,9 @@ The `--upgrade-strategy eager` option is needed to ensure `optimum-habana` is up
 To use the example associated with the latest stable release, run:
 > ```
 > git clone https://github.com/huggingface/optimum-habana
-> cd optimum-habana && git checkout v1.13.0
+> cd optimum-habana && git checkout v1.13.1
 > ```
-> with `v1.13.0` the version number of this release.
+> with `v1.13.1` the version number of this release.
 
 ### Option 2: Use the latest main branch under development
 
@@ -72,6 +72,18 @@ pip install git+https://github.com/huggingface/optimum-habana.git
 git clone https://github.com/huggingface/optimum-habana
 ```
 
+### Option 3: Use the `transformers_future` branch to have the latest changes from Transformers
+
+The `transformers_future` branch is regularly updated with the latest changes from the main branches of Optimum Habana and Transformers. This enables you to try out new Transformers features that have not been merged into the main branch yet.
+
+> [!WARNING]
+> The `transformers_future` branch may have some regressions or bugs and may be less stable than the main branch.
+
+```bash
+pip install git+https://github.com/huggingface/optimum-habana.git@transformers_future
+git clone -b transformers_future https://github.com/huggingface/optimum-habana
+```
+
 ## Install dependencies
 
 To use DeepSpeed on HPUs, you also need to run the following command:
@@ -141,7 +153,7 @@ You can generate images from prompts using Stable Diffusion on Intel Gaudi using
 + from optimum.habana.diffusers import GaudiDDIMScheduler, GaudiStableDiffusionPipeline
 
 
-model_name = "runwayml/stable-diffusion-v1-5"
+model_name = "CompVis/stable-diffusion-v1-4"
 
 - scheduler = DDIMScheduler.from_pretrained(model_name, subfolder="scheduler")
 + scheduler = GaudiDDIMScheduler.from_pretrained(model_name, subfolder="scheduler")
@@ -229,7 +241,9 @@ The following model architectures, tasks and device distributions have been vali
 |------------------|:--------:|:--------------------:|:------|
 | Stable Diffusion | <li>[textual inversion](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion/training#textual-inversion)</li><li>[ControlNet](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion/training#controlnet-training)</li> | <li>Single card</li> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)</li> |
 | Stable Diffusion XL | <li>[fine-tuning](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion/training#fine-tuning-for-stable-diffusion-xl)</li> | <li>Single card</li> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)</li> |
+| Stable Diffusion Depth2img | | <li>Single card</li> | <li>[depth-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)</li> |
 | LDM3D            |          | <li>Single card</li> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)</li> |
+| Text to Video    |          | <li>Single card</li> | <li>[text-to-video generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-to-video)</li> |
 
 </div>
 

diff --git a/docs/source/index.mdx b/docs/source/index.mdx
@@ -83,7 +83,9 @@ In the tables below, ✅ means single-card, multi-card and DeepSpeed have all be
 |---------------------|:--------:|:---------:|:------|
 | Stable Diffusion    | <li>[textual inversion](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion/training#textual-inversion)</li><li>[ControlNet](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion/training#controlnet-training)</li> | <div style="text-align:left"><li>Single card</li></div> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)</li> |
 | Stable Diffusion XL | <li>[fine-tuning](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion/training#fine-tuning-for-stable-diffusion-xl)</li> | <div style="text-align:left"><li>Single card</li></div> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)</li> |
+| Stable Diffusion Depth2img | | <li>Single card</li> | <li>[depth-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)</li> |
 | LDM3D               |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)</li> |
+| Text to Video    |          | <li>Single card</li> | <li>[text-to-video generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-to-video)</li> |
 
 - PyTorch Image Models/TIMM:
 

diff --git a/docs/source/quickstart.mdx b/docs/source/quickstart.mdx
@@ -62,7 +62,7 @@ Here is how to use it and the differences with the 🤗 Diffusers library:
 + from optimum.habana.diffusers import GaudiDDIMScheduler, GaudiStableDiffusionPipeline
 
 
-model_name = "runwayml/stable-diffusion-v1-5"
+model_name = "CompVis/stable-diffusion-v1-4"
 
 - scheduler = DDIMScheduler.from_pretrained(model_name, subfolder="scheduler")
 + scheduler = GaudiDDIMScheduler.from_pretrained(model_name, subfolder="scheduler")

diff --git a/docs/source/tutorials/stable_diffusion.mdx b/docs/source/tutorials/stable_diffusion.mdx
@@ -33,7 +33,7 @@ Finally, you will need to specify a [Gaudi configuration](https://huggingface.co
 ```python
 from optimum.habana.diffusers import GaudiDDIMScheduler, GaudiStableDiffusionPipeline
 
-model_name = "runwayml/stable-diffusion-v1-5"
+model_name = "CompVis/stable-diffusion-v1-4"
 
 scheduler = GaudiDDIMScheduler.from_pretrained(model_name, subfolder="scheduler")
 
@@ -166,7 +166,7 @@ Here is how to do it:
 import torch
 
 pipeline = GaudiStableDiffusionPipeline.from_pretrained(
-    "runwayml/stable-diffusion-v1-5",
+    "CompVis/stable-diffusion-v1-4",
     scheduler=scheduler,
     use_habana=True,
     use_hpu_graphs=True,

diff --git a/docs/source/usage_guides/accelerate_inference.mdx b/docs/source/usage_guides/accelerate_inference.mdx
@@ -76,7 +76,7 @@ trainer = GaudiTrainer(
 ```python
 from optimum.habana.diffusers import GaudiDDIMScheduler, GaudiStableDiffusionPipeline
 
-model_name = "runwayml/stable-diffusion-v1-5"
+model_name = "CompVis/stable-diffusion-v1-4"
 
 scheduler = GaudiDDIMScheduler.from_pretrained(model_name, subfolder="scheduler")
 

diff --git a/examples/audio-classification/run_audio_classification.py b/examples/audio-classification/run_audio_classification.py
@@ -47,7 +47,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
 check_min_version("4.43.0")
-check_optimum_habana_min_version("1.13.0")
+check_optimum_habana_min_version("1.14.0.dev0")
 
 require_version("datasets>=1.14.0", "To fix: pip install -r examples/pytorch/audio-classification/requirements.txt")
 

diff --git a/examples/contrastive-image-text/run_bridgetower.py b/examples/contrastive-image-text/run_bridgetower.py
@@ -57,7 +57,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
 check_min_version("4.43.0")
-check_optimum_habana_min_version("1.13.0")
+check_optimum_habana_min_version("1.14.0.dev0")
 
 require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/contrastive-image-text/requirements.txt")
 

diff --git a/examples/contrastive-image-text/run_clip.py b/examples/contrastive-image-text/run_clip.py
@@ -62,7 +62,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
 check_min_version("4.43.0")
-check_optimum_habana_min_version("1.13.0")
+check_optimum_habana_min_version("1.14.0.dev0")
 
 require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/contrastive-image-text/requirements.txt")
 

diff --git a/examples/image-classification/run_image_classification.py b/examples/image-classification/run_image_classification.py
@@ -64,7 +64,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
 check_min_version("4.43.0")
-check_optimum_habana_min_version("1.13.0")
+check_optimum_habana_min_version("1.14.0.dev0")
 
 require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/image-classification/requirements.txt")