Merge branch 'master' into AgenticRAGapp

openvinotoolkit · Nov 29, 2024 · 0d98c96 · 0d98c96
2 parents 648fcd8 + f6500e4
commit 0d98c96
Show file tree

Hide file tree

Showing 40 changed files with 1,871 additions and 705 deletions.
diff --git a/.github/reusable-steps/setup-os/action.yml b/.github/reusable-steps/setup-os/action.yml
@@ -0,0 +1,10 @@
+name: OS setup
+
+runs:
+  using: 'composite'
+  steps:
+    - name: Install coreutils
+      if: runner.os == 'macOS'
+      shell: bash
+      run: |
+        brew install coreutils
diff --git a/.github/reusable-steps/setup-python/action.yml b/.github/reusable-steps/setup-python/action.yml
@@ -0,0 +1,29 @@
+name: Python setup
+
+inputs:
+  python:
+    required: true
+  project:
+    required: true
+
+runs:
+  using: 'composite'
+  steps:
+    - name: Download sample video file
+      shell: bash
+      run: |
+        cd ${{ inputs.project }}
+        curl -L -o sample_video.mp4 https://sample-videos.com/video321/mp4/720/big_buck_bunny_720p_1mb.mp4
+    - name: Set up Python ${{ inputs.python }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ inputs.python }}
+    - name: Install dependencies
+      shell: bash
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r ${{ inputs.project }}/requirements.txt
+    - name: List dependencies
+      shell: bash
+      run: |
+        pip list
diff --git a/.github/reusable-steps/timeouted-action/action.yml b/.github/reusable-steps/timeouted-action/action.yml
@@ -0,0 +1,25 @@
+name: Run action with timeout
+
+inputs:
+  command:
+    required: true
+  project:
+    required: true
+  timeout:
+    required: false
+    default: 1h
+
+runs:
+  using: 'composite'
+  steps:
+    - name: Run JS Project
+      shell: bash
+      run: |
+        cd ${{ inputs.project }}
+        # linux requires a virtual display
+        if [ "${{ runner.os }}" == "Linux" ]; then
+          # the timeout trick "gracefully" kills the app after specified time (waiting for user input otherwise) 
+          timeout ${{ inputs.timeout }} xvfb-run ${{ inputs.command }} || [[ $? -eq 124 ]]
+        else 
+          timeout ${{ inputs.timeout }} ${{ inputs.command }} || [[ $? -eq 124 ]]
+        fi
diff --git a/.github/workflows/sanity-check.yml b/.github/workflows/sanity-check.yml
@@ -0,0 +1,179 @@
+name: Sanity check
+
+on:
+  schedule:
+    - cron: "0 2 * * *"
+  pull_request:
+    branches: [master]
+  push:
+    branches: [master]
+
+permissions:
+  contents: read
+
+jobs:
+  find-subprojects:
+    runs-on: ubuntu-latest
+    outputs:
+      notebook: ${{ steps.categorize-subprojects.outputs.notebook }}
+      gradio: ${{ steps.categorize-subprojects.outputs.gradio }}
+      webcam: ${{ steps.categorize-subprojects.outputs.webcam }}
+      js: ${{ steps.categorize-subprojects.outputs.js }}
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Determine subprojects to test
+        run: |
+          if [[ "${{ github.event_name }}" == "pull_request" ]]; then
+            # Get unique subproject directories from changed files in PR
+            subproject_dirs=$(git diff --name-only origin/master..HEAD | grep '^demos/' | xargs -I{} dirname "{}" | sort -u)
+          else
+            # For scheduled runs, process all subprojects
+            subproject_dirs=$(find demos -type d -mindepth 1 -maxdepth 1 ! -name utils)
+          fi
+          echo "subproject_dirs=$subproject_dirs" >> $GITHUB_ENV
+      - name: Categorize subprojects
+        id: categorize-subprojects
+        run: |
+          notebook=()
+          gradio=()
+          webcam=()
+          js=()
+
+          for dir in ${{ env.subproject_dirs }}; do
+            if [ -f "$dir/package.json" ]; then
+              js+=("$dir")
+            elif [ -f "$dir/main.ipynb" ]; then
+              notebook+=("$dir")
+            elif grep -q "gradio" "$dir/requirements.txt"; then
+              gradio+=("$dir")
+            elif grep -q -- "--stream" "$dir/main.py"; then
+              webcam+=("$dir")
+            fi
+          done
+
+          notebook_json=$(printf '%s\n' "${notebook[@]}" | jq -R -s -c 'split("\n") | map(select(length > 0))')
+          gradio_json=$(printf '%s\n' "${gradio[@]}" | jq -R -s -c 'split("\n") | map(select(length > 0))')
+          webcam_json=$(printf '%s\n' "${webcam[@]}" | jq -R -s -c 'split("\n") | map(select(length > 0))')
+          js_json=$(printf '%s\n' "${js[@]}" | jq -R -s -c 'split("\n") | map(select(length > 0))')
+
+          echo "notebook=$notebook_json" >> $GITHUB_OUTPUT
+          echo "gradio=$gradio_json" >> $GITHUB_OUTPUT
+          echo "webcam=$webcam_json" >> $GITHUB_OUTPUT
+          echo "js=$js_json" >> $GITHUB_OUTPUT
+      - name: Print subprojects to test
+        run: |
+          echo "Notebook subprojects: ${{ steps.categorize-subprojects.outputs.notebook }}"
+          echo "Gradio subprojects: ${{ steps.categorize-subprojects.outputs.gradio }}"
+          echo "Webcam subprojects: ${{ steps.categorize-subprojects.outputs.webcam }}"
+          echo "JS subprojects: ${{ steps.categorize-subprojects.outputs.js }}"
+          
+  notebook:
+    needs: find-subprojects
+    if: ${{ needs.find-subprojects.outputs.notebook != '[]' }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+        python: [3.9, 3.12]
+        subproject: ${{ fromJson(needs.find-subprojects.outputs.notebook) }}
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python ${{ matrix.python }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+      - uses: ./.github/reusable-steps/setup-python
+        with:
+          python: ${{ matrix.python }}
+          project: ${{ matrix.subproject }}
+      - uses: ./.github/reusable-steps/timeouted-action
+        name: Run Notebook
+        with:
+          command: jupyter nbconvert --to notebook --execute main.ipynb
+          project: ${{ matrix.subproject }}
+
+  gradio:
+    needs: find-subprojects
+    if: ${{ needs.find-subprojects.outputs.gradio != '[]' }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+        python: [3.9, 3.12]
+        subproject: ${{ fromJson(needs.find-subprojects.outputs.gradio) }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/reusable-steps/setup-os
+      - name: Set up Python ${{ matrix.python }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+      - uses: ./.github/reusable-steps/setup-python
+        with:
+          python: ${{ matrix.python }}
+          project: ${{ matrix.subproject }}
+      - name: Login to HF
+        shell: bash
+        run: |
+          huggingface-cli login --token ${{ secrets.HF_TOKEN }}
+      - uses: ./.github/reusable-steps/timeouted-action
+        name: Run Gradio App
+        with:
+          command: python main.py
+          project: ${{ matrix.subproject }}
+          timeout: 30m
+
+  webcam:
+    needs: find-subprojects
+    if: ${{ needs.find-subprojects.outputs.webcam != '[]' }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+        python: [3.9, 3.12]
+        subproject: ${{ fromJson(needs.find-subprojects.outputs.webcam) }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/reusable-steps/setup-os
+      - uses: ./.github/reusable-steps/setup-python
+        with:
+          python: ${{ matrix.python }}
+          project: ${{ matrix.subproject }}
+      - uses: ./.github/reusable-steps/timeouted-action
+        name: Run Webcam Demo
+        with:
+          command: python main.py --stream sample_video.mp4
+          project: ${{ matrix.subproject }}
+
+  js:
+    needs: find-subprojects
+    if: ${{ needs.find-subprojects.outputs.js != '[]' }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+        subproject: ${{ fromJson(needs.find-subprojects.outputs.js) }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/reusable-steps/setup-os
+      - name: Install Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: '22'
+      - name: Install dependencies
+        run: |
+          cd ${{ matrix.subproject }}
+          npm install
+      - uses: ./.github/reusable-steps/timeouted-action
+        name: Run JS Project
+        with:
+          command: npm start
+          project: ${{ matrix.subproject }}
+          timeout: 1m
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
@@ -0,0 +1,22 @@
+name: Close inactive issues
+on:
+  schedule:
+    - cron: "30 1 * * *"
+
+jobs:
+  close-issues:
+    runs-on: ubuntu-latest
+    permissions:
+      issues: write
+      pull-requests: write
+    steps:
+      - uses: actions/stale@v9.0.0
+        with:
+          days-before-issue-stale: 30
+          days-before-issue-close: 14
+          stale-issue-label: "stale"
+          stale-issue-message: "This issue has been marked because it has been open for 30 days with no activity. It is scheduled to close in 14 days."
+          close-issue-message: "This issue was closed automatically because it has been inactive for 14 days since being marked as stale. Please reopen if needed."
+          days-before-pr-stale: -1
+          days-before-pr-close: -1
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/ai_ref_kits/agentic_llm_rag/requirements.txt b/ai_ref_kits/agentic_llm_rag/requirements.txt
@@ -21,4 +21,5 @@ pyyaml==6.0.1
 PyMuPDF==1.24.10
 
 gradio==4.42.0
-fastapi==0.112.4
+fastapi==0.112.4
+pydantic==2.9.2
diff --git a/ai_ref_kits/conversational_ai_chatbot/README.md b/ai_ref_kits/conversational_ai_chatbot/README.md
@@ -17,7 +17,7 @@ This kit uses the following technology stack:
 - [OpenVINO toolkit](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/overview.html) ([Docs](https://docs.openvino.ai/))
 - [Meta’s Llama 3.2](https://llama.meta.com/llama3/)
 - [OpenAI Whisper](https://openai.com/index/whisper/)
-- [Microsoft SpeechT5](https://github.com/microsoft/SpeechT5)
+- [MeloTTS](https://github.com/myshell-ai/MeloTTS/tree/main)
 - [Gradio interface](https://www.gradio.app/docs/gradio/chatinterface)
 
 For other Intel AI kits, see the [Edge AI Reference Kits repository](/).
@@ -104,12 +104,20 @@ This activates the virtual environment and changes your shell's prompt to indica
 
 ### Install the Packages
 
-To install the required packages, run the following commands:
+MeloTTS is a high-quality multilingual text-to-speech library by MIT and MyShell.ai. However, the installation of this model's dependencies needs to separated from the rest of the dependency installation process, due to some potential conflict issues. Details of this model could be found [here](https://github.com/myshell-ai/MeloTTS). Using the following command to install MeloTTS locally.
 
 ```shell
 python -m pip install --upgrade pip 
+pip install git+https://github.com/myshell-ai/MeloTTS.git@5b538481e24e0d578955be32a95d88fcbde26dc8 --no-deps
+python -m unidic download
+```
+
+To install the other packages, run the following commands:
+
+```shell 
 pip install -r requirements.txt
 ```
+
 ## Get Access to Llama
 
 _NOTE: If you already have access to the Llama model weights, you can proceed to the authentication step, which is mandatory to convert the Llama model._
@@ -161,7 +169,7 @@ python convert_and_optimize_chat.py --chat_model_type llama3.2-3B --embedding_mo
 
 ### Step 3. Text-to-Speech (TTS) Model Conversion
 
-The text-to-speech (TTS) model converts the chatbot's text responses to spoken words, which enables voice output. The application uses Microsoft's SpeechT5 model for TTS. The TTS model and vocoder don’t require conversion. They are compiled at runtime using ``torch.compile`` with the OpenVINO backend.
+The text-to-speech (TTS) model converts the chatbot's text responses to spoken words, which enables voice output. The application uses MeloTTS model for TTS. The TTS model doesn't require conversion. They are compiled at runtime using ``torch.compile`` with the OpenVINO backend.
 
 After you run the conversion scripts, you can run app.py to launch the application.
 
@@ -173,9 +181,6 @@ _NOTE: This application requires more than 16GB of memory because the models are
 
 For the python script, you must include the following model directory arguments.
 
-- `--personality path/to/personality.yaml`: The path to your custom personality YAML file (for example, `concierge_personality.yaml`).  
-This file defines the assistant's personality, including instructions, system configuration, and greeting prompts. You can create and specify your own custom personality file.
-
 - `--asr_model path/to/asr_model`: The path to your ASR (Automatic Speech Recognition) model directory, which uses `int8` precision (for example,  `model/distil-whisper-large-v3-int8`) for efficient speech recognition.
 
 - `--chat_model path/to/chat_model`: The path to your chat model directory (for example, `model/llama3.2-3B-INT4`) that drives conversation flow and response generation.
@@ -184,22 +189,23 @@ This file defines the assistant's personality, including instructions, system co
 
 - `--reranker_model path/to/reranker_model`: The path to your reranker model directory (for example, `model/bge-reranker-large-FP32`). This model reranks responses to ensure relevance and accuracy.
 
-- `--tts_model tts_model_name`: The Hugging Face name of your TTS (text-to-speech) model (for example, `microsoft/speecht5_tts`) for converting text responses to spoken words.
+- `--personality path/to/personality.yaml`: The path to your custom personality YAML file (for example, `concierge_personality.yaml`).  
+This file defines the assistant's personality, including instructions, system configuration, and greeting prompts. You can create and specify your own custom personality file.
 
-- `--vocoder_model vocoder_model_name`: The Hugging Face name of your vocoder model (for example, `microsoft/speecht5_hifigan`), which enhances the audio quality of the spoken responses.
+- `--example_pdf path/to/personality.yaml`: The path to your custom PDF file which is an additional context (for example, `Grand_Azure_Resort_Spa_Full_Guide.pdf`).  
+This file defines the knowledge of the resort in this concierge use case. You can use your own custom file to build a local knowledge base.
 
 - `--public`: Include this flag to make the Gradio interface publicly accessible over the network. Without this flag, the interface is only available on your local machine.
 
 To run the application, execute the `app.py` script with the following command. Make sure to include all necessary model directory arguments.
 ```shell
 python app.py \
-  --personality concierge_personality.yaml \
   --asr_model path/to/asr_model \
   --chat_model path/to/chat_model \
   --embedding_model path/to/embedding_model \
   --reranker_model path/to/reranker_model \
-  --tts_model tts_model_name \
-  --vocoder_model vocoder_model_name \
+  --personality concierge_personality.yaml \
+  --example_pdf Grand_Azure_Resort_Spa_Full_Guide.pdf \
   --public
 ```
 
@@ -260,4 +266,4 @@ Enjoy exploring the capabilities of your Conversational AI Chatbot!
 - Learn more about [OpenVINO](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/overview.html)
 - Explore [OpenVINO’s documentation](https://docs.openvino.ai/2024/home.html)
 
-<p align="right"><a href="#top">Back to top ⬆️</a></p>
+<p align="right"><a href="#top">Back to top ⬆️</a></p>