Skip to content

Commit

Permalink
Cherry Pick v4 (#801)
Browse files Browse the repository at this point in the history
Cherry pick PRs and update version to 0.4.0
  • Loading branch information
ajindal1 authored Aug 21, 2024
1 parent a61454c commit b77e768
Show file tree
Hide file tree
Showing 26 changed files with 373 additions and 162 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/linux-cpu-arm64-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ jobs:
--container-registry onnxruntimebuildcache \
--repository ort_genai_linux_arm64_gha
- name: Doker -- Configure with CMake and GCC
- name: Docker -- Configure with CMake and GCC
run: |
docker run --rm \
--volume $GITHUB_WORKSPACE:/onnxruntime_src \
Expand All @@ -84,7 +84,7 @@ jobs:
--volume $GITHUB_WORKSPACE:/onnxruntime_src \
-w /onnxruntime_src ort_genai_linux_arm64_gha bash -c "/usr/bin/cmake --build --preset linux_gcc_cpu_release"
- name: Dokcer -- check test directory
- name: Docker -- Check test directory
run: |
docker run --rm \
--volume $GITHUB_WORKSPACE:/onnxruntime_src \
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/linux-gpu-x64-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -129,19 +129,21 @@ jobs:
docker run \
--gpus all \
--rm \
--volume /data/ortgenai_pytorch_models:/data/ortgenai_pytorch_models \
--volume $GITHUB_WORKSPACE:/ort_genai_src \
-e HF_TOKEN=$HF_TOKEN \
-w /ort_genai_src onnxruntimecudabuildx64 bash -c " \
${{ env.PYTHON_EXECUTABLE }} -m pip install -r test/python/requirements.txt --user && \
${{ env.PYTHON_EXECUTABLE }} -m pip install -r test/python/requirements-cuda.txt --user && \
${{ env.PYTHON_EXECUTABLE }} -m pip install /ort_genai_src/build/cuda/wheel/onnxruntime_genai*manylinux*.whl --user && \
${{ env.PYTHON_EXECUTABLE }} test/python/test_onnxruntime_genai.py --cwd test/python --test_models test/test_models"
${{ env.PYTHON_EXECUTABLE }} test/python/test_onnxruntime_genai.py --cwd test/python --test_models test/test_models --e2e"
- name: Docker -- Run unit tests
run: |
echo "Running docker image onnxruntimecudabuildx64"
docker run \
--gpus all \
--rm \
--volume /data/ortgenai_pytorch_models:/data/ortgenai_pytorch_models \
--volume $GITHUB_WORKSPACE:/ort_genai_src \
-w /ort_genai_src onnxruntimecudabuildx64 bash -c "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/ort_genai_src/build/cuda/ /ort_genai_src/build/cuda/test/unit_tests"
2 changes: 0 additions & 2 deletions .github/workflows/win-cpu-x64-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,6 @@ jobs:
run: |
python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models"
- name: Verify Build Artifacts
if: always()
continue-on-error: true
Expand Down
3 changes: 1 addition & 2 deletions .github/workflows/win-cuda-x64-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,7 @@ jobs:
- name: Run the Python Tests
run: |
python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models"
python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models" --e2e
- name: Verify Build Artifacts
if: always()
Expand Down
21 changes: 11 additions & 10 deletions .pipelines/stages/jobs/nuget-validation-job.yml
Original file line number Diff line number Diff line change
Expand Up @@ -116,15 +116,16 @@ jobs:
inputs:
version: '8.x'

- template: steps/utils/download-huggingface-model.yml
parameters:
StepName: 'Download Model from HuggingFace'
HuggingFaceRepo: 'microsoft/Phi-3-mini-4k-instruct-onnx'
RepoFolder: $(prebuild_phi3_mini_model_folder)
LocalFolder: 'models'
WorkingDirectory: '$(Build.Repository.LocalPath)/examples/csharp/HelloPhi'
HuggingFaceToken: $(HF_TOKEN)
os: ${{ parameters.os }}
- ${{ if ne(parameters.arch, 'arm64') }}:
- template: steps/utils/download-huggingface-model.yml
parameters:
StepName: 'Download Model from HuggingFace'
HuggingFaceRepo: 'microsoft/Phi-3-mini-4k-instruct-onnx'
RepoFolder: $(prebuild_phi3_mini_model_folder)
LocalFolder: 'models'
WorkingDirectory: '$(Build.Repository.LocalPath)/examples/csharp/HelloPhi'
HuggingFaceToken: $(HF_TOKEN)
os: ${{ parameters.os }}

- template: steps/utils//flex-download-pipeline-artifact.yml
parameters:
Expand All @@ -134,7 +135,7 @@ jobs:
SpecificArtifact: ${{ parameters.specificArtifact }}
BuildId: ${{ parameters.BuildId }}

- ${{ if eq(parameters.os, 'win') }}:
- ${{ if and(eq(parameters.os, 'win'), ne(parameters.arch, 'arm64')) }}:
- ${{ if eq(parameters.ep, 'cuda') }}:
- powershell: |
$env:AZCOPY_MSI_CLIENT_ID = "63b63039-6328-442f-954b-5a64d124e5b4";
Expand Down
27 changes: 14 additions & 13 deletions .pipelines/stages/jobs/py-validation-job.yml
Original file line number Diff line number Diff line change
Expand Up @@ -164,15 +164,16 @@ jobs:
SpecificArtifact: ${{ parameters.specificArtifact }}
BuildId: ${{ parameters.BuildId }}

- template: steps/utils/download-huggingface-model.yml
parameters:
StepName: 'Download Model from HuggingFace'
HuggingFaceRepo: 'microsoft/Phi-3-mini-4k-instruct-onnx'
RepoFolder: $(prebuild_phi3_mini_model_folder)
LocalFolder: 'models'
WorkingDirectory: '$(Build.Repository.LocalPath)/examples/python'
HuggingFaceToken: $(HF_TOKEN)
os: ${{ parameters.os }}
- ${{ if ne(parameters.arch, 'arm64') }}:
- template: steps/utils/download-huggingface-model.yml
parameters:
StepName: 'Download Model from HuggingFace'
HuggingFaceRepo: 'microsoft/Phi-3-mini-4k-instruct-onnx'
RepoFolder: $(prebuild_phi3_mini_model_folder)
LocalFolder: 'models'
WorkingDirectory: '$(Build.Repository.LocalPath)/examples/python'
HuggingFaceToken: $(HF_TOKEN)
os: ${{ parameters.os }}

- ${{ if eq(parameters.os, 'linux') }}:
- ${{ if eq(parameters.ep, 'cuda') }}:
Expand All @@ -195,7 +196,7 @@ jobs:
$python_exe -m pip install -r /ort_genai_src/test/python/requirements.txt && \
$python_exe -m pip install -r /ort_genai_src/test/python/requirements-cuda.txt && \
cd /ort_genai_src/examples/python && \
$python_exe -m pip install --no-index --find-links=/ort_genai_binary/wheel $(pip_package_name) && \
$python_exe -m pip install --find-links=/ort_genai_binary/wheel $(pip_package_name) && \
$python_exe model-generate.py -m ./models/$(prebuild_phi3_mini_model_folder) --min_length 25 --max_length 50 --verbose"
displayName: 'Run Example With Artifact'
Expand All @@ -206,12 +207,12 @@ jobs:
python -m pip install -r test/python/requirements.txt
python -m pip install -r test/python/requirements-cpu.txt
cd examples/python
python -m pip install --no-index --find-links=$(Build.BinariesDirectory)/wheel $(pip_package_name)
python -m pip install --find-links=$(Build.BinariesDirectory)/wheel $(pip_package_name)
python model-generate.py -m ./models/$(prebuild_phi3_mini_model_folder) --min_length 25 --max_length 50 --verbose
displayName: 'Run Example With Artifact'
workingDirectory: '$(Build.Repository.LocalPath)'
- ${{ if eq(parameters.os, 'win') }}:
- ${{ if and(eq(parameters.os, 'win'), ne(parameters.arch, 'arm64'), ne(parameters.ep, 'directml')) }}:
- ${{ if eq(parameters.ep, 'cuda') }}:
- powershell: |
$env:AZCOPY_MSI_CLIENT_ID = "63b63039-6328-442f-954b-5a64d124e5b4";
Expand All @@ -233,7 +234,7 @@ jobs:
python -m pip install -r test/python/requirements-cpu.txt
}
cd examples\python
python -m pip install --no-index --find-links=$(Build.BinariesDirectory)/wheel $(pip_package_name)
python -m pip install --find-links=$(Build.BinariesDirectory)/wheel $(pip_package_name)
python model-generate.py -m .\models\$(prebuild_phi3_mini_model_folder) --min_length 25 --max_length 50 --verbose
displayName: 'Run Example With Artifact'
Expand Down
2 changes: 1 addition & 1 deletion VERSION_INFO
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.4.0-rc1
0.4.0
8 changes: 7 additions & 1 deletion cmake/global_variables.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,13 @@ set(VERSION_INFO ${ver})
# VERSION_PATCH: 0
string(REPLACE "-" ";" VERSION_LIST ${VERSION_INFO})
list(GET VERSION_LIST 0 VERSION_STR)
list(GET VERSION_LIST 1 VERSION_SUFFIX)
# Check if it is a stable or dev version
list(LENGTH VERSION_LIST VERSION_LIST_LENGTH)
if(VERSION_LIST_LENGTH GREATER 1)
list(GET VERSION_LIST 1 VERSION_SUFFIX)
else()
set(VERSION_SUFFIX "") # Set VERSION_SUFFIX to empty if stable version
endif()
string(REPLACE "." ";" VERSION_LIST ${VERSION_STR})
list(GET VERSION_LIST 0 VERSION_MAJOR)
list(GET VERSION_LIST 1 VERSION_MINOR)
Expand Down
6 changes: 3 additions & 3 deletions examples/csharp/HelloPhi/HelloPhi.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI" Version="[0.4.0-rc1]" Condition=" '$(Configuration)' == 'Debug' OR '$(Configuration)' == 'Release' " />
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" Version="[0.4.0-rc1]" Condition=" '$(Configuration)' == 'Debug_Cuda' OR '$(Configuration)' == 'Release_Cuda' " />
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.DirectML" Version="[0.4.0-rc1]" Condition=" '$(Configuration)' == 'Debug_DirectML' OR '$(Configuration)' == 'Release_DirectML' " />
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI" Version="[0.4.0]" Condition=" '$(Configuration)' == 'Debug' OR '$(Configuration)' == 'Release' " />
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" Version="[0.4.0]" Condition=" '$(Configuration)' == 'Debug_Cuda' OR '$(Configuration)' == 'Release_Cuda' " />
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.DirectML" Version="[0.4.0]" Condition=" '$(Configuration)' == 'Debug_DirectML' OR '$(Configuration)' == 'Release_DirectML' " />
</ItemGroup>

<ItemGroup>
Expand Down
18 changes: 9 additions & 9 deletions examples/csharp/HelloPhi/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,18 @@ void PrintUsage()
{
Console.WriteLine("Usage:");
Console.WriteLine(" -m model_path");
Console.WriteLine(" -i (optional): Intereactive mode");
Console.WriteLine(" -i (optional): Interactive mode");
}

OgaHandle ogaHandle = new OgaHandle();
using OgaHandle ogaHandle = new OgaHandle();

if (args.Length < 1)
{
PrintUsage();
Environment.Exit(-1);
}

bool intereactive = false;
bool interactive = false;
string modelPath = string.Empty;

uint i = 0;
Expand All @@ -25,7 +25,7 @@ void PrintUsage()
var arg = args[i];
if (arg == "-i")
{
intereactive = true;
interactive = true;
}
else if (arg == "-m")
{
Expand All @@ -47,13 +47,13 @@ void PrintUsage()
Console.WriteLine("-------------");

Console.WriteLine("Model path: " + modelPath);
Console.WriteLine("Intereactive: " + intereactive);
Console.WriteLine("Interactive: " + interactive);

using Model model = new Model(modelPath);
using Tokenizer tokenizer = new Tokenizer(model);

var option = 2;
if (intereactive)
if (interactive)
{
Console.WriteLine("Please enter option number:");
Console.WriteLine("1. Complete Output");
Expand All @@ -64,15 +64,15 @@ void PrintUsage()
do
{
string prompt = "def is_prime(num):"; // Example prompt
if (intereactive)
if (interactive)
{
Console.WriteLine("Prompt:");
prompt = Console.ReadLine();
}
if (string.IsNullOrEmpty(prompt))
{
continue;
}
}
var sequences = tokenizer.Encode($"<|user|>{prompt}<|end|><|assistant|>");

using GeneratorParams generatorParams = new GeneratorParams(model);
Expand All @@ -99,4 +99,4 @@ void PrintUsage()
}
Console.WriteLine();
}
} while (intereactive);
} while (interactive);
3 changes: 3 additions & 0 deletions nuget/MANAGED_PACKAGE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
## About

This package is a dependency of [Microsoft.ML.OnnxRuntimeGenAI](https://www.nuget.org/packages/Microsoft.ML.OnnxRuntimeGenAI) and does not need to be installed directly.
2 changes: 1 addition & 1 deletion nuget/Microsoft.ML.OnnxRuntimeGenAI.Managed.nuspec
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
</metadata>
<files>
<file src="..\LICENSE" target="LICENSE" />
<file src="..\src\csharp\README.md" target="README.md" />
<file src="MANAGED_PACKAGE.md" target="README.md" />
<file src="..\ThirdPartyNotices.txt" target="ThirdPartyNotices.txt" />

<file src="..\src\csharp\bin\$configuration$\netstandard2.0\Microsoft.ML.OnnxRuntimeGenAI.dll" target="lib\netstandard2.0" />
Expand Down
File renamed without changes.
7 changes: 6 additions & 1 deletion src/csharp/Microsoft.ML.OnnxRuntimeGenAI.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,15 @@
<Output TaskParameter="Lines" PropertyName="VersionInfoStr" />
</ReadLinesFromFile>

<PropertyGroup>
<PropertyGroup Condition=" '$(VersionInfoStr.Contains(-))' == 'true' ">
<VersionInfo>$(VersionInfoStr.Split(-)[0])</VersionInfo>
<VersionSuffix>$(VersionInfoStr.Split(-)[1])</VersionSuffix>
</PropertyGroup>

<PropertyGroup Condition=" '$(VersionInfoStr.Contains(-))' == 'false' ">
<VersionInfo>$(VersionInfoStr)</VersionInfo>
<VersionSuffix></VersionSuffix>
</PropertyGroup>
</Target>

<Target Name="WriteAssemblyInfo" BeforeTargets="CoreCompile" DependsOnTargets="PrepareForBuild;ReadVersionFromFile">
Expand Down
24 changes: 23 additions & 1 deletion src/csharp/Utils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,33 @@

namespace Microsoft.ML.OnnxRuntimeGenAI
{
public class OgaHandle
public class OgaHandle: IDisposable
{
private bool _disposed = false;

public OgaHandle()
{
}

~OgaHandle()
{
Dispose(false);
}

public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}

protected virtual void Dispose(bool disposing)
{
if (_disposed)
{
return;
}
NativeMethods.OgaShutdown();
_disposed = true;
}
}

Expand Down
6 changes: 6 additions & 0 deletions src/ort_genai.h
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,12 @@ struct OgaGenerator : OgaAbstract {
return OgaGenerator_GetSequenceData(this, index);
}

std::unique_ptr<OgaTensor> GetOutput(const char* name) {
OgaTensor* out;
OgaCheckResult(OgaGenerator_GetOutput(this, name, &out));
return std::unique_ptr<OgaTensor>(out);
}

#if __cplusplus >= 202002L
std::span<const int32_t> GetSequence(size_t index) const {
return {GetSequenceData(index), GetSequenceCount(index)};
Expand Down
44 changes: 44 additions & 0 deletions src/ort_genai_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,50 @@ OgaResult* OGA_API_CALL OgaGenerator_GenerateNextToken(OgaGenerator* generator)
OGA_CATCH
}

OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator, const char* name, OgaTensor** out) {
OGA_TRY
auto& generator = *reinterpret_cast<const Generators::Generator*>(oga_generator);
auto* ortvalue_output = generator.state_->GetOutput(name);
auto type_info = ortvalue_output->GetTensorTypeAndShapeInfo();
std::unique_ptr<OrtValue> ortvalue_clone = OrtValue::CreateTensor(generator.model_->allocator_cpu_,
type_info->GetShape(),
type_info->GetElementType());
// Copy data to ortvalue_clone
auto element_size = Generators::SizeOf(type_info->GetElementType());
auto data_size = type_info->GetElementCount() * element_size;
if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_GPU && generator.model_->device_type_ == Generators::DeviceType::CUDA) {
#if USE_CUDA
cudaMemcpy(ortvalue_clone->GetTensorMutableRawData(), ortvalue_output->GetTensorMutableRawData(), data_size, cudaMemcpyDeviceToHost);
#endif
} else if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_GPU && generator.model_->device_type_ == Generators::DeviceType::DML) {
#if USE_DML
ComPtr<ID3D12Resource> gpu_resource;
Ort::ThrowOnError(generator.model_->GetOrtDmlApi()->GetD3D12ResourceFromAllocation(
generator.model_->allocator_device_,
ortvalue_output->GetTensorMutableRawData(),
&gpu_resource));
auto cpu_tensor = ortvalue_clone->GetTensorMutableRawData();
generator.model_->GetDmlReadbackHeap()->ReadbackFromGpu(
std::span(reinterpret_cast<uint8_t*>(cpu_tensor), data_size),
gpu_resource.Get(),
0,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
#endif
} else if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_CPU) {
std::copy(static_cast<uint8_t*>(ortvalue_output->GetTensorMutableRawData()),
static_cast<uint8_t*>(ortvalue_output->GetTensorMutableRawData()) + data_size,
static_cast<uint8_t*>(ortvalue_clone->GetTensorMutableRawData()));
} else {
throw std::runtime_error("Unsupported Device type: " + ortvalue_output->GetTensorMemoryInfo().GetDeviceType());
}

auto tensor = std::make_shared<Generators::Tensor>(std::move(ortvalue_clone));
tensor->external_owner_ = tensor;
*out = reinterpret_cast<OgaTensor*>(tensor.get());
return nullptr;
OGA_CATCH
}

size_t OGA_API_CALL OgaGenerator_GetSequenceCount(const OgaGenerator* oga_generator, size_t index) {
auto& generator = *reinterpret_cast<const Generators::Generator*>(oga_generator);
return generator.GetSequence(static_cast<int>(index)).GetCPU().size();
Expand Down
Loading

0 comments on commit b77e768

Please sign in to comment.