Merge branch 'main' of https://github.com/microsoft/onnxruntime-genai …

…into baijumeswani/whisper
microsoft · Sep 16, 2024 · 028a792 · 028a792
2 parents a3ddc38 + f852ae8
commit 028a792
Show file tree

Hide file tree

Showing 49 changed files with 869 additions and 184 deletions.
diff --git a/.github/workflows/linux-cpu-arm64-build.yml b/.github/workflows/linux-cpu-arm64-build.yml
@@ -92,4 +92,4 @@ jobs:
         run: |
           docker run --rm \
           --volume $GITHUB_WORKSPACE:/onnxruntime_src \
-          -w /onnxruntime_src ort_genai_linux_arm64_gha bash -c "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/onnxruntime_src/build/cpu/ /onnxruntime_src/build/cpu/test/unit_tests"
+          -w /onnxruntime_src ort_genai_linux_arm64_gha bash -c "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/onnxruntime_src/ort/lib/ /onnxruntime_src/build/cpu/test/unit_tests"
diff --git a/.github/workflows/linux-cpu-x64-build.yml b/.github/workflows/linux-cpu-x64-build.yml
@@ -78,29 +78,28 @@ jobs:
           echo "::add-mask::$HF_TOKEN"
           echo "HF_TOKEN=$HF_TOKEN" >> $GITHUB_ENV
 
-      - name: Remove the ort lib and header files
+      - name: Verify Build Artifacts
+        if: always()
+        continue-on-error: true
         run: |
-          rm -rf ort
+          ls -l ${{ github.workspace }}/build/cpu
 
       # This will also download all the test models to the test/test_models directory
       # These models are used by the python tests as well as C#, C++ and others.
       - name: Run the python tests
         run: |
+          export ORTGENAI_LOG_ORT_LIB=1
           python3 test/python/test_onnxruntime_genai.py --cwd test/python --test_models test/test_models
 
       - name: Build the C# API and Run the C# Tests
         run: |
+          export ORTGENAI_LOG_ORT_LIB=1
           cd test/csharp
-          dotnet test /p:Configuration=Release /p:NativeBuildOutputDir="../../build/cpu/"
-
-      - name: Verify Build Artifacts
-        if: always()
-        continue-on-error: true
-        run: |
-          ls -l ${{ github.workspace }}/build/cpu
+          dotnet test /p:Configuration=Release /p:NativeBuildOutputDir="../../build/cpu/" /p:OrtLibDir="../../ort/lib/"
 
       - name: Run tests
         run: |
           set -e -x
-          export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GITHUB_WORKSPACE/build/cpu/
+          export ORTGENAI_LOG_ORT_LIB=1
+          export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GITHUB_WORKSPACE/ort/lib
           ./build/cpu/test/unit_tests
diff --git a/.github/workflows/linux-gpu-x64-build.yml b/.github/workflows/linux-gpu-x64-build.yml
@@ -116,10 +116,6 @@ jobs:
           echo "::add-mask::$HF_TOKEN"
           echo "HF_TOKEN=$HF_TOKEN" >> $GITHUB_ENV
 
-      - name: Remove the ort lib and header files
-        run: |
-          rm -rf ort
-
       - name: Install the onnxruntime-genai Python wheel and run python test
         run: |
           echo "Installing the onnxruntime-genai Python wheel and running the Python tests"
@@ -154,4 +150,4 @@ jobs:
             --rm \
             --volume /data/ortgenai_pytorch_models:/data/ortgenai_pytorch_models \
             --volume $GITHUB_WORKSPACE:/ort_genai_src \
-            -w /ort_genai_src onnxruntimecudabuildx64 bash -c "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/ort_genai_src/build/cuda/ /ort_genai_src/build/cuda/test/unit_tests"
+            -w /ort_genai_src onnxruntimecudabuildx64 bash -c "ORTGENAI_LOG_ORT_LIB=1 LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/ort_genai_src/build/cuda/ /ort_genai_src/build/cuda/test/unit_tests"
diff --git a/.github/workflows/mac-cpu-arm64-build.yml b/.github/workflows/mac-cpu-arm64-build.yml
@@ -14,7 +14,7 @@ env:
   ORT_PACKAGE_NAME: "Microsoft.ML.OnnxRuntime"
 jobs:
   mac-cpu-arm64-build:
-    runs-on: macos-latest
+    runs-on: macos-latest # arm64
     steps:
       - name: Checkout OnnxRuntime GenAI repo
         uses: actions/checkout@v4
@@ -36,26 +36,50 @@ jobs:
           mv ${{ env.ORT_PACKAGE_NAME }}/build/native/include ort/
           mv ${{ env.ORT_PACKAGE_NAME }}/runtimes/osx-arm64/native/* ort/lib/
 
-
       - name: Configure CMake
         run: |
-          cmake --preset macos_cpu_release
+          cmake --preset macos_arm64_cpu_release
 
       - name: Build with CMake
         run: |
-          cmake --build --preset macos_cpu_release --parallel
+          cmake --build --preset macos_arm64_cpu_release --parallel
         continue-on-error: false
 
+      - name: Install the python wheel and test dependencies
+        run: |
+          python3 -m venv genai-macos-venv
+          source genai-macos-venv/bin/activate
+          python3 -m pip install -r test/python/requirements.txt
+          python3 -m pip install -r test/python/requirements-macos.txt
+          python3 -m pip install build/cpu/osx-arm64/wheel/onnxruntime_genai*.whl --no-deps
+
+      - name: Remove the ort lib and header files
+        run: |
+          rm -rf ort
+
       - name: Verify Build Artifacts
         if: always()
         continue-on-error: true
         run: |
-          ls -l ${{ github.workspace }}/build
+          ls -l ${{ github.workspace }}/build/cpu/osx-arm64
 
-      - name: Upload Build Artifacts
-        uses: actions/upload-artifact@v3
-        with:
-          name: onnxruntime-genai-mac-cpu-arm64
-          path: ${{ github.workspace }}/build/**/*.a
+      # This will also download all the test models to the test/test_models directory
+      # These models are used by the python tests as well as C#, C++ and others.
+      - name: Run the python tests
+        run: |
+          source genai-macos-venv/bin/activate
+          export ORTGENAI_LOG_ORT_LIB=1
+          python3 test/python/test_onnxruntime_genai.py --cwd test/python --test_models test/test_models
 
+      - name: Build the C# API and Run the C# Tests
+        run: |
+          export ORTGENAI_LOG_ORT_LIB=1
+          cd test/csharp
+          dotnet test /p:Configuration=Release /p:NativeBuildOutputDir="../../build/cpu/osx-arm64"
 
+      - name: Run tests
+        run: |
+          set -e -x
+          export ORTGENAI_LOG_ORT_LIB=1
+          export DYLD_LIBRARY_PATH=$DYLD_LIBRARY_PATH:$GITHUB_WORKSPACE/build/cpu/osx-arm64
+          ./build/cpu/osx-arm64/test/unit_tests
diff --git a/.github/workflows/win-cpu-arm64-build.yml b/.github/workflows/win-cpu-arm64-build.yml
@@ -64,7 +64,7 @@ jobs:
     - name: Build the C# API and Run the C# Tests
       run: |
         cd test\csharp
-        dotnet test /p:NativeBuildOutputDir="$env:GITHUB_WORKSPACE\$env:binaryDir\Release"
+        dotnet test /p:NativeBuildOutputDir="$env:GITHUB_WORKSPACE\$env:binaryDir\Release" /p:OrtLibDir="$env:GITHUB_WORKSPACE\ort\lib"
 
     - name: Install the Python Wheel and Test Dependencies
       run: |
@@ -85,4 +85,5 @@ jobs:
 
     - name: Run tests
       run: |
+        copy $env:GITHUB_WORKSPACE\ort\lib\* .\$env:binaryDir\test\Release
         & .\$env:binaryDir\test\Release\unit_tests.exe
diff --git a/.github/workflows/win-cpu-x64-build.yml b/.github/workflows/win-cpu-x64-build.yml
@@ -92,7 +92,7 @@ jobs:
     - name: Build the C# API and Run the C# Tests
       run: |
         cd test\csharp
-        dotnet test /p:NativeBuildOutputDir="$env:GITHUB_WORKSPACE\$env:binaryDir\Release"
+        dotnet test /p:NativeBuildOutputDir="$env:GITHUB_WORKSPACE\$env:binaryDir\Release" /p:OrtLibDir="$env:GITHUB_WORKSPACE\ort\lib"
 
     - name: Verify Build Artifacts
       if: always()
@@ -103,6 +103,7 @@ jobs:
 
     - name: Run tests
       run: |
+        copy $env:GITHUB_WORKSPACE\ort\lib\* .\$env:binaryDir\test\Release
         & .\$env:binaryDir\test\Release\unit_tests.exe
 
     - name: Perform CodeQL Analysis

diff --git a/.github/workflows/win-cuda-x64-build.yml b/.github/workflows/win-cuda-x64-build.yml
@@ -76,7 +76,7 @@ jobs:
     - name: Build the C# API and Run the C# Tests
       run: |
         cd test\csharp
-        dotnet test /p:Configuration=release /p:NativeBuildOutputDir="$env:GITHUB_WORKSPACE\$env:binaryDir\Release"
+        dotnet test /p:Configuration=release /p:NativeBuildOutputDir="$env:GITHUB_WORKSPACE\$env:binaryDir\Release" /p:OrtLibDir="$env:GITHUB_WORKSPACE\ort\lib"
 
     - name: Install the Python Wheel and Test Dependencies
       run: |
@@ -106,4 +106,5 @@ jobs:
       run: |
         $env:PATH = "${{ env.cuda_dir }}\\v${{ env.cuda_version }}\\bin;" + $env:PATH 
         echo "Current PATH variable is: $env:PATH" 
+        copy $env:GITHUB_WORKSPACE\ort\lib\* .\$env:binaryDir\test\Release
         & .\$env:binaryDir\test\Release\unit_tests.exe
diff --git a/.github/workflows/win-directml-x64-build.yml b/.github/workflows/win-directml-x64-build.yml
@@ -14,16 +14,14 @@ concurrency:
 env:
   AZCOPY_AUTO_LOGIN_TYPE: MSI
   AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
-  ort_dir: "Microsoft.ML.OnnxRuntime.DirectML.1.17.3"
-  ort_zip: "Microsoft.ML.OnnxRuntime.DirectML.1.17.3.zip"
-  # TODO: Update with nightly ORT-DML build
-  ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.17.3/Microsoft.ML.OnnxRuntime.DirectML.1.17.3.zip"
-  dml_dir: "Microsoft.AI.DirectML.1.15.1"
-  dml_zip: "Microsoft.AI.DirectML.1.15.1.zip"
-  dml_url: "https://www.nuget.org/api/v2/package/Microsoft.AI.DirectML/1.15.1"
-  d3d12_dir: "Microsoft.Direct3D.D3D12.1.614.0"
-  d3d12_zip: "Microsoft.Direct3D.D3D12.1.614.0.zip"
-  d3d12_url: "https://www.nuget.org/api/v2/package/Microsoft.Direct3D.D3D12/1.614.0"
+  ORT_NIGHTLY_REST_API: "https://feeds.dev.azure.com/aiinfra/PublicPackages/_apis/packaging/Feeds/ORT-Nightly/packages?packageNameQuery=Microsoft.ML.OnnxRuntime&api-version=6.0-preview.1"
+  ORT_PACKAGE_NAME: "Microsoft.ML.OnnxRuntime.DirectML"
+  dml_dir: "Microsoft.AI.DirectML.1.15.2"
+  dml_zip: "Microsoft.AI.DirectML.1.15.2.zip"
+  dml_url: "https://www.nuget.org/api/v2/package/Microsoft.AI.DirectML/1.15.2"
+  d3d12_dir: "Microsoft.Direct3D.D3D12.1.614.1"
+  d3d12_zip: "Microsoft.Direct3D.D3D12.1.614.1.zip"
+  d3d12_url: "https://www.nuget.org/api/v2/package/Microsoft.Direct3D.D3D12/1.614.1"
   binaryDir: 'build/directml/win-x64'
 
 
@@ -41,9 +39,14 @@ jobs:
         python-version: '3.11.x'
         architecture: 'x64'
 
-    - name: Download OnnxRuntime
+    - name: Download OnnxRuntime Nightly
+      shell: pwsh
       run: |
-        Invoke-WebRequest -Uri $env:ort_url -OutFile $env:ort_zip
+        $resp = Invoke-RestMethod "${{ env.ORT_NIGHTLY_REST_API }}"
+        $ORT_NIGHTLY_VERSION = $resp.value[0].versions[0].normalizedVersion
+        Write-Host "$ORT_NIGHTLY_VERSION"
+        "ORT_NIGHTLY_VERSION=$ORT_NIGHTLY_VERSION" | Out-File -FilePath $env:GITHUB_ENV -Append
+        nuget install ${{ env.ORT_PACKAGE_NAME }} -version $ORT_NIGHTLY_VERSION -x -NonInteractive
 
     - name: Download DirectML
       run: |
@@ -53,10 +56,11 @@ jobs:
       run: |
         Invoke-WebRequest -Uri $env:d3d12_url -OutFile $env:d3d12_zip
 
-    - name: Unzip OnnxRuntime
+    - name: Extract OnnxRuntime library and header files
       run: |
-        Expand-Archive $env:ort_zip -DestinationPath $env:ort_dir
-        Remove-Item -Path $env:ort_zip
+        mkdir ort/lib
+        move ${{ env.ORT_PACKAGE_NAME }}/build/native/include ort/
+        move ${{ env.ORT_PACKAGE_NAME }}/runtimes/win-x64/native/* ort/lib/
 
     - name: Unzip DirectML
       run: |
@@ -70,14 +74,8 @@ jobs:
 
     - name: Move the files to the ort directory
       run: |
-        mkdir ort/lib
-        mkdir ort/include
-        mv $env:ort_dir\runtimes\win-x64\native\onnxruntime.dll ort\lib
-        mv $env:ort_dir\runtimes\win-x64\native\onnxruntime.lib ort\lib
         mv $env:dml_dir\bin\x64-win\DirectML.dll ort\lib
         mv $env:d3d12_dir\build\native\bin\x64\D3D12Core.dll ort\lib
-        mv $env:ort_dir\build\native\include\dml_provider_factory.h ort\include
-        mv $env:ort_dir\build\native\include\onnxruntime_c_api.h ort\include
         mv $env:dml_dir\include\DirectML.h ort\include
 
     - name: Configure CMake
@@ -102,4 +100,5 @@ jobs:
 
     - name: Run tests
       run: |
+        copy $env:GITHUB_WORKSPACE\ort\lib\* $env:GITHUB_WORKSPACE\$env:binaryDir\test\Release
         & .\$env:binaryDir\test\Release\unit_tests.exe
diff --git a/.pipelines/nuget-publishing.yml b/.pipelines/nuget-publishing.yml
@@ -33,6 +33,11 @@ parameters:
   type: boolean
   default: true
 
+- name: enable_macos_cpu
+  displayName: 'Whether MacOS CPU package is built.'
+  type: boolean
+  default: true
+
 - name: ort_version
   displayName: 'OnnxRuntime version'
   type: string
@@ -83,6 +88,7 @@ stages:
     enable_linux_cuda: ${{ parameters.enable_linux_cuda }}
     enable_win_dml: ${{ parameters.enable_win_dml }}
     enable_win_arm64: ${{ parameters.enable_win_arm64 }}
+    enable_macos_cpu: ${{ parameters.enable_macos_cpu }}
     ort_version: ${{ parameters.ort_version }}
     ort_cuda_version: ${{ parameters.ort_cuda_version }}
     ort_dml_version: ${{ parameters.ort_dml_version }}
@@ -97,6 +103,7 @@ stages:
     enable_linux_cuda: ${{ parameters.enable_linux_cuda }}
     enable_win_dml: ${{ parameters.enable_win_dml }}
     enable_win_arm64: ${{ parameters.enable_win_arm64 }}
+    enable_macos_cpu: ${{ parameters.enable_macos_cpu }}
     ort_version: ${{ parameters.ort_version }}
     ort_cuda_version: ${{ parameters.ort_cuda_version }}
     ort_dml_version: ${{ parameters.ort_dml_version }}
@@ -111,6 +118,7 @@ stages:
       enable_linux_cuda: ${{ parameters.enable_linux_cuda }}
       enable_win_dml: ${{ parameters.enable_win_dml }}
       enable_win_arm64: ${{ parameters.enable_win_arm64 }}
+      enable_macos_cpu: ${{ parameters.enable_macos_cpu }}
       ort_version: ${{ parameters.ort_version }}
       ort_cuda_version: ${{ parameters.ort_cuda_version }}
       ort_dml_version: ${{ parameters.ort_dml_version }}

diff --git a/.pipelines/pypl-publishing.yml b/.pipelines/pypl-publishing.yml
@@ -39,6 +39,11 @@ parameters:
   type: boolean
   default: true
 
+- name: enable_macos_cpu
+  displayName: 'Whether MacOS CPU package is built.'
+  type: boolean
+  default: true
+
 - name: ort_version
   displayName: 'OnnxRuntime version'
   type: string
@@ -97,6 +102,7 @@ stages:
     enable_win_cuda: ${{ parameters.enable_win_cuda }}
     enable_win_dml: ${{ parameters.enable_win_dml }}
     enable_win_arm64_cpu: ${{ parameters.enable_win_arm64_cpu }}
+    enable_macos_cpu: ${{ parameters.enable_macos_cpu }}
     ort_version: ${{ parameters.ort_version }}
     ort_cuda_118_version: ${{ parameters.ort_cuda_118_version }}
     ort_cuda_122_version: ${{ parameters.ort_cuda_122_version }}
@@ -113,6 +119,7 @@ stages:
       enable_win_cuda: ${{ parameters.enable_win_cuda }}
       enable_win_dml: ${{ parameters.enable_win_dml }}
       enable_win_arm64_cpu: ${{ parameters.enable_win_arm64_cpu }}
+      enable_macos_cpu: ${{ parameters.enable_macos_cpu }}
       ort_version: ${{ parameters.ort_version }}
       ort_cuda_118_version: ${{ parameters.ort_cuda_118_version }}
       ort_cuda_122_version: ${{ parameters.ort_cuda_122_version }}

diff --git a/.pipelines/stages/capi-packaging-stage.yml b/.pipelines/stages/capi-packaging-stage.yml
@@ -11,6 +11,8 @@ parameters:
   type: boolean
 - name: enable_linux_cuda
   type: boolean
+- name: enable_macos_cpu
+  type: boolean
 - name: ort_version
   type: string
 - name: ort_cuda_version
@@ -91,4 +93,21 @@ stages:
         ep: 'cuda'
         ort_version: ${{ parameters.ort_cuda_version }}
         os: 'linux'
-        build_config: ${{ parameters.build_config }}
+        build_config: ${{ parameters.build_config }}
+
+  - ${{ if eq(parameters.enable_macos_cpu, true) }}:
+    - template: jobs/capi-packaging-job.yml
+      parameters:
+        arch: 'x64'
+        ep: 'cpu'
+        ort_version: ${{ parameters.ort_version }}
+        os: 'osx'
+        build_config: ${{ parameters.build_config }}
+
+    - template: jobs/capi-packaging-job.yml
+      parameters:
+        arch: 'arm64'
+        ep: 'cpu'
+        ort_version: ${{ parameters.ort_version }}
+        os: 'osx'
+        build_config: ${{ parameters.build_config }}
diff --git a/.pipelines/stages/jobs/capi-packaging-job.yml b/.pipelines/stages/jobs/capi-packaging-job.yml
@@ -13,6 +13,7 @@ parameters:
   values:
   - 'linux'
   - 'win'
+  - 'osx'
 - name: build_config
   type: string
   default: 'release'
@@ -33,6 +34,10 @@ jobs:
       pool: 'onnxruntime-genai-windows-vs-2022-arm64'
     ${{ else }}:
       pool: 'onnxruntime-Win-CPU-2022'
+  ${{ if eq(parameters.os, 'osx') }}:
+    pool:
+      vmImage: 'macOS-latest'
+
   timeoutInMinutes: 180
   #  set variables here to be used in the template and steps
   variables:
@@ -130,4 +135,12 @@ jobs:
         ep: ${{ parameters.ep }}
         build_config: ${{ parameters.build_config }}
 
+  - ${{ if eq(parameters.os, 'osx') }}:
+    - template: steps/capi-macos-step.yml
+      parameters:
+        target: 'onnxruntime-genai'
+        arch: ${{ parameters.arch }}
+        ep: ${{ parameters.ep }}
+        build_config: ${{ parameters.build_config }}
+
   - template: steps/compliant-and-cleanup-step.yml