diff --git a/examples/c/.gitignore b/examples/c/.gitignore index 8bcf73af8..6f8fabd2e 100644 --- a/examples/c/.gitignore +++ b/examples/c/.gitignore @@ -1,3 +1,9 @@ build include/* lib/* +*.vcxproj +*.vcxproj.filters +*.sln +*.zip +*.tar.gz + diff --git a/examples/c/README.md b/examples/c/README.md index fe4d8952c..866b45145 100644 --- a/examples/c/README.md +++ b/examples/c/README.md @@ -9,45 +9,55 @@ git clone https://github.com/microsoft/onnxruntime-genai.git cd onnxruntime-genai/examples/c ``` -## Download a model +## Phi-3 mini -This example uses the [Phi-3 mini model](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) and the [Phi-3 vision model](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) optimized to run on CPU. You can clone this entire model repository or download individual model variants. To download individual variants, you need to install the HuggingFace CLI. For example: +### Download model + +This example uses the [Phi-3 mini model](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct). + +You can clone this entire model repository or download individual model variants. To download individual variants, you need to install the HuggingFace CLI. ```bash huggingface-cli download microsoft/Phi-3-mini-4k-instruct-onnx --include cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/* --local-dir . ``` -## Install the onnxruntime and onnxruntime-genai binaries +### Install the onnxruntime and onnxruntime-genai binaries -### Windows +#### Windows ``` -curl -L https://github.com/microsoft/onnxruntime/releases/download/v1.18.1/onnxruntime-win-x64-1.18.1.zip -o onnxruntime-win-x64-1.18.1.zip -tar xvf onnxruntime-win-x64-1.18.1.zip -copy onnxruntime-win-x64-1.18.1\include\* include -copy onnxruntime-win-x64-1.18.1\lib\* lib -curl -L https://github.com/microsoft/onnxruntime-genai/releases/download/v0.3.0/onnxruntime-genai-0.3.0-win-x64.zip -o onnxruntime-genai-0.3.0-win-x64.zip -tar xvf onnxruntime-genai-0.3.0-win-x64.zip -copy onnxruntime-genai-0.3.0-win-x64\include\* include -copy onnxruntime-genai-0.3.0-win-x64\lib\* lib +curl -L https://github.com/microsoft/onnxruntime/releases/download/v1.19.0/onnxruntime-win-x64-1.19.0.zip -o onnxruntime-win-x64-1.19.0.zip +tar xvf onnxruntime-win-x64-1.19.0.zip +copy onnxruntime-win-x64-1.19.0\include\* include +copy onnxruntime-win-x64-1.19.0\lib\* lib +curl -L https://github.com/microsoft/onnxruntime-genai/releases/download/v0.4.0/onnxruntime-genai-win-cpu-x64-capi.zip -o onnxruntime-genai-win-cpu-x64-capi.zip +tar xvf onnxruntime-genai-win-cpu-x64-capi.zip +cd onnxruntime-genai-win-cpu-x64-capi +tar xvf onnxruntime-genai-0.4.0-win-x64.zip +copy onnxruntime-genai-0.4.0-win-x64\include\* ..\include +copy onnxruntime-genai-0.4.0-win-x64\lib\* ..\lib +cd .. ``` -### Linux +#### Linux ``` -curl -L https://github.com/microsoft/onnxruntime/releases/download/v1.18.1/onnxruntime-linux-x64-gpu-1.18.1.tgz -o onnxruntime-linux-x64-gpu-1.18.1.tgz -tar xvzf onnxruntime-linux-x64-gpu-1.18.1.tgz -cp onnxruntime-linux-x64-gpu-1.18.1/include/* include -cp onnxruntime-linux-x64-gpu-1.18.1/lib/* lib -curl -L https://github.com/microsoft/onnxruntime-genai/releases/download/v0.3.0/onnxruntime-genai-0.3.0-linux-x64.tar.gz -o onnxruntime-genai-0.3.0-linux-x64.tar.gz -tar xvzf onnxruntime-genai-0.3.0-linux-x64.tar.gz -cp onnxruntime-genai-0.3.0-linux-x64/include/* include -cp onnxruntime-genai-0.3.0-linux-x64/lib/* lib +curl -L https://github.com/microsoft/onnxruntime/releases/download/v1.19.0/onnxruntime-linux-x64-1.19.0.tgz -o onnxruntime-linux-x64-1.19.0.tgz +tar xvzf onnxruntime-linux-x64-1.19.0.tgz +cp onnxruntime-linux-x64-1.19.0/include/* include +cp onnxruntime-linux-x64-1.19.0/lib/* lib +curl -L https://github.com/microsoft/onnxruntime-genai/releases/download/v0.4.0/onnxruntime-genai-linux-cpu-x64-capi.zip -o onnxruntime-genai-linux-cpu-x64-capi.zip +unzip onnxruntime-genai-linux-cpu-x64-capi.zip +cd onnxruntime-genai-linux-cpu-x64-capi +tar xvzf onnxruntime-genai-0.4.0-linux-x64.tar.gz +cp onnxruntime-genai-0.4.0-linux-x64/include/* ../include +cp onnxruntime-genai-0.4.0-linux-x64/lib/* ../lib +cd .. ``` -## Build this sample +### Build this sample -### Windows +#### Windows ```bash cmake -G "Visual Studio 17 2022" -A x64 -S . -B build @@ -55,7 +65,7 @@ cd build cmake --build . --config Release ``` -### Linux +#### Linux Build with CUDA: @@ -69,25 +79,123 @@ cmake --build . --config Release Build for CPU: ```bash -mkdir build +cmake . cd build -cmake ../ cmake --build . --config Release ``` -## Run the sample - -### Run the language model +### Run the sample ```bash cd build\\Release .\phi3.exe path_to_model ``` -### Run the vision model +## Phi-3 vision + +### Download model + +You can use one of the following models for this sample: +* [Phi-3 vision model for CPU](https://huggingface.co/microsoft/Phi-3-vision-128k-instruct-onnx-cpu) +* [Phi-3 vision model for CUDA](https://huggingface.co/microsoft/Phi-3-vision-128k-instruct-onnx-cuda) +* [Phi-3 vision model for DirectML](https://huggingface.co/microsoft/Phi-3-vision-128k-instruct-onnx-directml) + +Clone one of the models above. + +### Run on Windows + +#### Install the required headers and binaries + +Change into the onnxruntime-genai folder. + +1. Install onnxruntime + +```cmd +cd examples\c +curl -L https://github.com/microsoft/onnxruntime/releases/download/v1.19.0/onnxruntime-win-x64-1.19.0.zip -o onnxruntime-win-x64-1.19.0.zip +tar xvf onnxruntime-win-x64-1.19.0.zip +copy onnxruntime-win-x64-1.19.0\include\* include +copy onnxruntime-win-x64-1.19.0\lib\* lib +``` + +2. Install onnxruntime-genai + +This example requires onnxruntime-genai to be built from source. + +```cmd +cd ..\.. +python build.py --config Release --ort_home examples\c +copy src\ort_genai.h examples\c\include +copy src\ort_genai_c.h examples\c\include +copy build\Windows\Release\Release\*.dll examples\c\lib +cd examples\c +``` + +#### Build this sample + +```bash +cmake -G "Visual Studio 17 2022" -A x64 -S . -B build +cd build +cmake --build . --config Release +``` + +#### Run the sample ```bash cd build\\Release .\phi3v.exe path_to_model ``` +### Run on Linux + +#### Install the required headers and binaries + +Change into the onnxruntime-genai directory. + +1. Install onnxruntime + +```bash +cd examples/c +curl -L https://github.com/microsoft/onnxruntime/releases/download/v1.19.0/onnxruntime-linux-x64-1.19.0.tgz -o onnxruntime-linux-x64-1.19.0.tgz +tar xvzf onnxruntime-linux-x64-1.19.0.tgz +cp onnxruntime-linux-x64-1.19.0/include/* include +cp onnxruntime-linux-x64-1.19.0/lib/* lib +cd ../.. +``` + +2. Build onnxruntime-genai from source and install + +```bash +# This should be run from the root of the onnxruntime-genai folder +python build.py --config Release --ort_home examples\c +cp src/ort_genai.h examples/c/include +cp src/ort_genai_c.h examples/c/include +cp build/Linux/release cd libonnxruntime-genai.so examples/c/lib +cd examples/c +``` + +#### Build this sample + +Build to run with CUDA: + +```bash +cmake . -B build -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_ARCHITECTURES=80 -DUSE_CUDA=ON +cd build +cmake --build . --config Release +``` + +Build for CPU: + +```bash +cmake . -B build +cd build +cmake --build . --config Release +``` + +#### Run the sample + +```bash +cd build/Release +./phi3v path_to_model +``` + diff --git a/examples/c/src/main.cpp b/examples/c/src/main.cpp index 5017108e6..669dedebc 100644 --- a/examples/c/src/main.cpp +++ b/examples/c/src/main.cpp @@ -35,6 +35,19 @@ void CXX_API(const char* model_path) { generator->ComputeLogits(); generator->GenerateNextToken(); + // Show usage of GetOutput + std::unique_ptr output_logits = generator->GetOutput("logits"); + + // Assuming output_logits.Type() is float as it's logits + // Assuming shape is 1 dimensional with shape[0] being the size + auto logits = reinterpret_cast(output_logits->Data()); + + // Print out the logits using the following snippet, if needed + //auto shape = output_logits->Shape(); + //for (size_t i=0; i < shape[0]; i++) + // std::cout << logits[i] << " "; + //std::cout << std::endl; + const auto num_tokens = generator->GetSequenceCount(0); const auto new_token = generator->GetSequenceData(0)[num_tokens - 1]; std::cout << tokenizer_stream->Decode(new_token) << std::flush; diff --git a/examples/csharp/Genny/Genny/Genny.csproj b/examples/csharp/Genny/Genny/Genny.csproj index 1aad829e8..357e5190e 100644 --- a/examples/csharp/Genny/Genny/Genny.csproj +++ b/examples/csharp/Genny/Genny/Genny.csproj @@ -13,9 +13,9 @@ - - - + + + diff --git a/examples/csharp/HelloPhi/HelloPhi.csproj b/examples/csharp/HelloPhi/HelloPhi.csproj index 083a2d89e..3cc34b8e9 100644 --- a/examples/csharp/HelloPhi/HelloPhi.csproj +++ b/examples/csharp/HelloPhi/HelloPhi.csproj @@ -6,12 +6,13 @@ enable enable Debug;Release;Debug_Cuda;Release_Cuda;Debug_DirectML;Release_DirectML + AnyCPU;x64 - - - + + + diff --git a/examples/csharp/HelloPhi/HelloPhi.sln b/examples/csharp/HelloPhi/HelloPhi.sln index ed4468ddc..2caee7ca4 100644 --- a/examples/csharp/HelloPhi/HelloPhi.sln +++ b/examples/csharp/HelloPhi/HelloPhi.sln @@ -8,25 +8,43 @@ EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug_Cuda|Any CPU = Debug_Cuda|Any CPU + Debug_Cuda|x64 = Debug_Cuda|x64 Debug_DirectML|Any CPU = Debug_DirectML|Any CPU + Debug_DirectML|x64 = Debug_DirectML|x64 Debug|Any CPU = Debug|Any CPU + Debug|x64 = Debug|x64 Release_Cuda|Any CPU = Release_Cuda|Any CPU + Release_Cuda|x64 = Release_Cuda|x64 Release_DirectML|Any CPU = Release_DirectML|Any CPU + Release_DirectML|x64 = Release_DirectML|x64 Release|Any CPU = Release|Any CPU + Release|x64 = Release|x64 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {89932021-18FC-490C-8675-73F2AD1DEB2A}.Debug_Cuda|Any CPU.ActiveCfg = Debug_Cuda|Any CPU {89932021-18FC-490C-8675-73F2AD1DEB2A}.Debug_Cuda|Any CPU.Build.0 = Debug_Cuda|Any CPU + {89932021-18FC-490C-8675-73F2AD1DEB2A}.Debug_Cuda|x64.ActiveCfg = Debug_Cuda|x64 + {89932021-18FC-490C-8675-73F2AD1DEB2A}.Debug_Cuda|x64.Build.0 = Debug_Cuda|x64 {89932021-18FC-490C-8675-73F2AD1DEB2A}.Debug_DirectML|Any CPU.ActiveCfg = Debug_DirectML|Any CPU {89932021-18FC-490C-8675-73F2AD1DEB2A}.Debug_DirectML|Any CPU.Build.0 = Debug_DirectML|Any CPU + {89932021-18FC-490C-8675-73F2AD1DEB2A}.Debug_DirectML|x64.ActiveCfg = Debug_DirectML|x64 + {89932021-18FC-490C-8675-73F2AD1DEB2A}.Debug_DirectML|x64.Build.0 = Debug_DirectML|x64 {89932021-18FC-490C-8675-73F2AD1DEB2A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {89932021-18FC-490C-8675-73F2AD1DEB2A}.Debug|Any CPU.Build.0 = Debug|Any CPU + {89932021-18FC-490C-8675-73F2AD1DEB2A}.Debug|x64.ActiveCfg = Debug|x64 + {89932021-18FC-490C-8675-73F2AD1DEB2A}.Debug|x64.Build.0 = Debug|x64 {89932021-18FC-490C-8675-73F2AD1DEB2A}.Release_Cuda|Any CPU.ActiveCfg = Release_Cuda|Any CPU {89932021-18FC-490C-8675-73F2AD1DEB2A}.Release_Cuda|Any CPU.Build.0 = Release_Cuda|Any CPU + {89932021-18FC-490C-8675-73F2AD1DEB2A}.Release_Cuda|x64.ActiveCfg = Release_Cuda|x64 + {89932021-18FC-490C-8675-73F2AD1DEB2A}.Release_Cuda|x64.Build.0 = Release_Cuda|x64 {89932021-18FC-490C-8675-73F2AD1DEB2A}.Release_DirectML|Any CPU.ActiveCfg = Release_DirectML|Any CPU {89932021-18FC-490C-8675-73F2AD1DEB2A}.Release_DirectML|Any CPU.Build.0 = Release_DirectML|Any CPU + {89932021-18FC-490C-8675-73F2AD1DEB2A}.Release_DirectML|x64.ActiveCfg = Release_DirectML|x64 + {89932021-18FC-490C-8675-73F2AD1DEB2A}.Release_DirectML|x64.Build.0 = Release_DirectML|x64 {89932021-18FC-490C-8675-73F2AD1DEB2A}.Release|Any CPU.ActiveCfg = Release|Any CPU {89932021-18FC-490C-8675-73F2AD1DEB2A}.Release|Any CPU.Build.0 = Release|Any CPU + {89932021-18FC-490C-8675-73F2AD1DEB2A}.Release|x64.ActiveCfg = Release|x64 + {89932021-18FC-490C-8675-73F2AD1DEB2A}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE