Skip to content

Commit b77e768

Browse files
authored
Cherry Pick v4 (#801)
Cherry pick PRs and update version to 0.4.0
1 parent a61454c commit b77e768

26 files changed

+373
-162
lines changed

.github/workflows/linux-cpu-arm64-build.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ jobs:
7272
--container-registry onnxruntimebuildcache \
7373
--repository ort_genai_linux_arm64_gha
7474
75-
- name: Doker -- Configure with CMake and GCC
75+
- name: Docker -- Configure with CMake and GCC
7676
run: |
7777
docker run --rm \
7878
--volume $GITHUB_WORKSPACE:/onnxruntime_src \
@@ -84,7 +84,7 @@ jobs:
8484
--volume $GITHUB_WORKSPACE:/onnxruntime_src \
8585
-w /onnxruntime_src ort_genai_linux_arm64_gha bash -c "/usr/bin/cmake --build --preset linux_gcc_cpu_release"
8686
87-
- name: Dokcer -- check test directory
87+
- name: Docker -- Check test directory
8888
run: |
8989
docker run --rm \
9090
--volume $GITHUB_WORKSPACE:/onnxruntime_src \

.github/workflows/linux-gpu-x64-build.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,19 +129,21 @@ jobs:
129129
docker run \
130130
--gpus all \
131131
--rm \
132+
--volume /data/ortgenai_pytorch_models:/data/ortgenai_pytorch_models \
132133
--volume $GITHUB_WORKSPACE:/ort_genai_src \
133134
-e HF_TOKEN=$HF_TOKEN \
134135
-w /ort_genai_src onnxruntimecudabuildx64 bash -c " \
135136
${{ env.PYTHON_EXECUTABLE }} -m pip install -r test/python/requirements.txt --user && \
136137
${{ env.PYTHON_EXECUTABLE }} -m pip install -r test/python/requirements-cuda.txt --user && \
137138
${{ env.PYTHON_EXECUTABLE }} -m pip install /ort_genai_src/build/cuda/wheel/onnxruntime_genai*manylinux*.whl --user && \
138-
${{ env.PYTHON_EXECUTABLE }} test/python/test_onnxruntime_genai.py --cwd test/python --test_models test/test_models"
139+
${{ env.PYTHON_EXECUTABLE }} test/python/test_onnxruntime_genai.py --cwd test/python --test_models test/test_models --e2e"
139140
140141
- name: Docker -- Run unit tests
141142
run: |
142143
echo "Running docker image onnxruntimecudabuildx64"
143144
docker run \
144145
--gpus all \
145146
--rm \
147+
--volume /data/ortgenai_pytorch_models:/data/ortgenai_pytorch_models \
146148
--volume $GITHUB_WORKSPACE:/ort_genai_src \
147149
-w /ort_genai_src onnxruntimecudabuildx64 bash -c "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/ort_genai_src/build/cuda/ /ort_genai_src/build/cuda/test/unit_tests"

.github/workflows/win-cpu-x64-build.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,6 @@ jobs:
9494
run: |
9595
python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models"
9696
97-
98-
9997
- name: Verify Build Artifacts
10098
if: always()
10199
continue-on-error: true

.github/workflows/win-cuda-x64-build.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,7 @@ jobs:
9393
9494
- name: Run the Python Tests
9595
run: |
96-
python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models"
97-
96+
python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models" --e2e
9897
9998
- name: Verify Build Artifacts
10099
if: always()

.pipelines/stages/jobs/nuget-validation-job.yml

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -116,15 +116,16 @@ jobs:
116116
inputs:
117117
version: '8.x'
118118

119-
- template: steps/utils/download-huggingface-model.yml
120-
parameters:
121-
StepName: 'Download Model from HuggingFace'
122-
HuggingFaceRepo: 'microsoft/Phi-3-mini-4k-instruct-onnx'
123-
RepoFolder: $(prebuild_phi3_mini_model_folder)
124-
LocalFolder: 'models'
125-
WorkingDirectory: '$(Build.Repository.LocalPath)/examples/csharp/HelloPhi'
126-
HuggingFaceToken: $(HF_TOKEN)
127-
os: ${{ parameters.os }}
119+
- ${{ if ne(parameters.arch, 'arm64') }}:
120+
- template: steps/utils/download-huggingface-model.yml
121+
parameters:
122+
StepName: 'Download Model from HuggingFace'
123+
HuggingFaceRepo: 'microsoft/Phi-3-mini-4k-instruct-onnx'
124+
RepoFolder: $(prebuild_phi3_mini_model_folder)
125+
LocalFolder: 'models'
126+
WorkingDirectory: '$(Build.Repository.LocalPath)/examples/csharp/HelloPhi'
127+
HuggingFaceToken: $(HF_TOKEN)
128+
os: ${{ parameters.os }}
128129

129130
- template: steps/utils//flex-download-pipeline-artifact.yml
130131
parameters:
@@ -134,7 +135,7 @@ jobs:
134135
SpecificArtifact: ${{ parameters.specificArtifact }}
135136
BuildId: ${{ parameters.BuildId }}
136137

137-
- ${{ if eq(parameters.os, 'win') }}:
138+
- ${{ if and(eq(parameters.os, 'win'), ne(parameters.arch, 'arm64')) }}:
138139
- ${{ if eq(parameters.ep, 'cuda') }}:
139140
- powershell: |
140141
$env:AZCOPY_MSI_CLIENT_ID = "63b63039-6328-442f-954b-5a64d124e5b4";

.pipelines/stages/jobs/py-validation-job.yml

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -164,15 +164,16 @@ jobs:
164164
SpecificArtifact: ${{ parameters.specificArtifact }}
165165
BuildId: ${{ parameters.BuildId }}
166166

167-
- template: steps/utils/download-huggingface-model.yml
168-
parameters:
169-
StepName: 'Download Model from HuggingFace'
170-
HuggingFaceRepo: 'microsoft/Phi-3-mini-4k-instruct-onnx'
171-
RepoFolder: $(prebuild_phi3_mini_model_folder)
172-
LocalFolder: 'models'
173-
WorkingDirectory: '$(Build.Repository.LocalPath)/examples/python'
174-
HuggingFaceToken: $(HF_TOKEN)
175-
os: ${{ parameters.os }}
167+
- ${{ if ne(parameters.arch, 'arm64') }}:
168+
- template: steps/utils/download-huggingface-model.yml
169+
parameters:
170+
StepName: 'Download Model from HuggingFace'
171+
HuggingFaceRepo: 'microsoft/Phi-3-mini-4k-instruct-onnx'
172+
RepoFolder: $(prebuild_phi3_mini_model_folder)
173+
LocalFolder: 'models'
174+
WorkingDirectory: '$(Build.Repository.LocalPath)/examples/python'
175+
HuggingFaceToken: $(HF_TOKEN)
176+
os: ${{ parameters.os }}
176177

177178
- ${{ if eq(parameters.os, 'linux') }}:
178179
- ${{ if eq(parameters.ep, 'cuda') }}:
@@ -195,7 +196,7 @@ jobs:
195196
$python_exe -m pip install -r /ort_genai_src/test/python/requirements.txt && \
196197
$python_exe -m pip install -r /ort_genai_src/test/python/requirements-cuda.txt && \
197198
cd /ort_genai_src/examples/python && \
198-
$python_exe -m pip install --no-index --find-links=/ort_genai_binary/wheel $(pip_package_name) && \
199+
$python_exe -m pip install --find-links=/ort_genai_binary/wheel $(pip_package_name) && \
199200
$python_exe model-generate.py -m ./models/$(prebuild_phi3_mini_model_folder) --min_length 25 --max_length 50 --verbose"
200201
201202
displayName: 'Run Example With Artifact'
@@ -206,12 +207,12 @@ jobs:
206207
python -m pip install -r test/python/requirements.txt
207208
python -m pip install -r test/python/requirements-cpu.txt
208209
cd examples/python
209-
python -m pip install --no-index --find-links=$(Build.BinariesDirectory)/wheel $(pip_package_name)
210+
python -m pip install --find-links=$(Build.BinariesDirectory)/wheel $(pip_package_name)
210211
python model-generate.py -m ./models/$(prebuild_phi3_mini_model_folder) --min_length 25 --max_length 50 --verbose
211212
displayName: 'Run Example With Artifact'
212213
workingDirectory: '$(Build.Repository.LocalPath)'
213214
214-
- ${{ if eq(parameters.os, 'win') }}:
215+
- ${{ if and(eq(parameters.os, 'win'), ne(parameters.arch, 'arm64'), ne(parameters.ep, 'directml')) }}:
215216
- ${{ if eq(parameters.ep, 'cuda') }}:
216217
- powershell: |
217218
$env:AZCOPY_MSI_CLIENT_ID = "63b63039-6328-442f-954b-5a64d124e5b4";
@@ -233,7 +234,7 @@ jobs:
233234
python -m pip install -r test/python/requirements-cpu.txt
234235
}
235236
cd examples\python
236-
python -m pip install --no-index --find-links=$(Build.BinariesDirectory)/wheel $(pip_package_name)
237+
python -m pip install --find-links=$(Build.BinariesDirectory)/wheel $(pip_package_name)
237238
238239
python model-generate.py -m .\models\$(prebuild_phi3_mini_model_folder) --min_length 25 --max_length 50 --verbose
239240
displayName: 'Run Example With Artifact'

VERSION_INFO

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.4.0-rc1
1+
0.4.0

cmake/global_variables.cmake

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,13 @@ set(VERSION_INFO ${ver})
1313
# VERSION_PATCH: 0
1414
string(REPLACE "-" ";" VERSION_LIST ${VERSION_INFO})
1515
list(GET VERSION_LIST 0 VERSION_STR)
16-
list(GET VERSION_LIST 1 VERSION_SUFFIX)
16+
# Check if it is a stable or dev version
17+
list(LENGTH VERSION_LIST VERSION_LIST_LENGTH)
18+
if(VERSION_LIST_LENGTH GREATER 1)
19+
list(GET VERSION_LIST 1 VERSION_SUFFIX)
20+
else()
21+
set(VERSION_SUFFIX "") # Set VERSION_SUFFIX to empty if stable version
22+
endif()
1723
string(REPLACE "." ";" VERSION_LIST ${VERSION_STR})
1824
list(GET VERSION_LIST 0 VERSION_MAJOR)
1925
list(GET VERSION_LIST 1 VERSION_MINOR)

examples/csharp/HelloPhi/HelloPhi.csproj

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99
</PropertyGroup>
1010

1111
<ItemGroup>
12-
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI" Version="[0.4.0-rc1]" Condition=" '$(Configuration)' == 'Debug' OR '$(Configuration)' == 'Release' " />
13-
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" Version="[0.4.0-rc1]" Condition=" '$(Configuration)' == 'Debug_Cuda' OR '$(Configuration)' == 'Release_Cuda' " />
14-
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.DirectML" Version="[0.4.0-rc1]" Condition=" '$(Configuration)' == 'Debug_DirectML' OR '$(Configuration)' == 'Release_DirectML' " />
12+
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI" Version="[0.4.0]" Condition=" '$(Configuration)' == 'Debug' OR '$(Configuration)' == 'Release' " />
13+
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" Version="[0.4.0]" Condition=" '$(Configuration)' == 'Debug_Cuda' OR '$(Configuration)' == 'Release_Cuda' " />
14+
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.DirectML" Version="[0.4.0]" Condition=" '$(Configuration)' == 'Debug_DirectML' OR '$(Configuration)' == 'Release_DirectML' " />
1515
</ItemGroup>
1616

1717
<ItemGroup>

examples/csharp/HelloPhi/Program.cs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,18 @@ void PrintUsage()
55
{
66
Console.WriteLine("Usage:");
77
Console.WriteLine(" -m model_path");
8-
Console.WriteLine(" -i (optional): Intereactive mode");
8+
Console.WriteLine(" -i (optional): Interactive mode");
99
}
1010

11-
OgaHandle ogaHandle = new OgaHandle();
11+
using OgaHandle ogaHandle = new OgaHandle();
1212

1313
if (args.Length < 1)
1414
{
1515
PrintUsage();
1616
Environment.Exit(-1);
1717
}
1818

19-
bool intereactive = false;
19+
bool interactive = false;
2020
string modelPath = string.Empty;
2121

2222
uint i = 0;
@@ -25,7 +25,7 @@ void PrintUsage()
2525
var arg = args[i];
2626
if (arg == "-i")
2727
{
28-
intereactive = true;
28+
interactive = true;
2929
}
3030
else if (arg == "-m")
3131
{
@@ -47,13 +47,13 @@ void PrintUsage()
4747
Console.WriteLine("-------------");
4848

4949
Console.WriteLine("Model path: " + modelPath);
50-
Console.WriteLine("Intereactive: " + intereactive);
50+
Console.WriteLine("Interactive: " + interactive);
5151

5252
using Model model = new Model(modelPath);
5353
using Tokenizer tokenizer = new Tokenizer(model);
5454

5555
var option = 2;
56-
if (intereactive)
56+
if (interactive)
5757
{
5858
Console.WriteLine("Please enter option number:");
5959
Console.WriteLine("1. Complete Output");
@@ -64,15 +64,15 @@ void PrintUsage()
6464
do
6565
{
6666
string prompt = "def is_prime(num):"; // Example prompt
67-
if (intereactive)
67+
if (interactive)
6868
{
6969
Console.WriteLine("Prompt:");
7070
prompt = Console.ReadLine();
7171
}
7272
if (string.IsNullOrEmpty(prompt))
7373
{
7474
continue;
75-
}
75+
}
7676
var sequences = tokenizer.Encode($"<|user|>{prompt}<|end|><|assistant|>");
7777

7878
using GeneratorParams generatorParams = new GeneratorParams(model);
@@ -99,4 +99,4 @@ void PrintUsage()
9999
}
100100
Console.WriteLine();
101101
}
102-
} while (intereactive);
102+
} while (interactive);

nuget/MANAGED_PACKAGE.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
## About
2+
3+
This package is a dependency of [Microsoft.ML.OnnxRuntimeGenAI](https://www.nuget.org/packages/Microsoft.ML.OnnxRuntimeGenAI) and does not need to be installed directly.

nuget/Microsoft.ML.OnnxRuntimeGenAI.Managed.nuspec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
</metadata>
2020
<files>
2121
<file src="..\LICENSE" target="LICENSE" />
22-
<file src="..\src\csharp\README.md" target="README.md" />
22+
<file src="MANAGED_PACKAGE.md" target="README.md" />
2323
<file src="..\ThirdPartyNotices.txt" target="ThirdPartyNotices.txt" />
2424

2525
<file src="..\src\csharp\bin\$configuration$\netstandard2.0\Microsoft.ML.OnnxRuntimeGenAI.dll" target="lib\netstandard2.0" />
File renamed without changes.

src/csharp/Microsoft.ML.OnnxRuntimeGenAI.csproj

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,15 @@
3636
<Output TaskParameter="Lines" PropertyName="VersionInfoStr" />
3737
</ReadLinesFromFile>
3838

39-
<PropertyGroup>
39+
<PropertyGroup Condition=" '$(VersionInfoStr.Contains(-))' == 'true' ">
4040
<VersionInfo>$(VersionInfoStr.Split(-)[0])</VersionInfo>
4141
<VersionSuffix>$(VersionInfoStr.Split(-)[1])</VersionSuffix>
4242
</PropertyGroup>
43+
44+
<PropertyGroup Condition=" '$(VersionInfoStr.Contains(-))' == 'false' ">
45+
<VersionInfo>$(VersionInfoStr)</VersionInfo>
46+
<VersionSuffix></VersionSuffix>
47+
</PropertyGroup>
4348
</Target>
4449

4550
<Target Name="WriteAssemblyInfo" BeforeTargets="CoreCompile" DependsOnTargets="PrepareForBuild;ReadVersionFromFile">

src/csharp/Utils.cs

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,33 @@
77

88
namespace Microsoft.ML.OnnxRuntimeGenAI
99
{
10-
public class OgaHandle
10+
public class OgaHandle: IDisposable
1111
{
12+
private bool _disposed = false;
13+
14+
public OgaHandle()
15+
{
16+
}
17+
1218
~OgaHandle()
1319
{
20+
Dispose(false);
21+
}
22+
23+
public void Dispose()
24+
{
25+
Dispose(true);
26+
GC.SuppressFinalize(this);
27+
}
28+
29+
protected virtual void Dispose(bool disposing)
30+
{
31+
if (_disposed)
32+
{
33+
return;
34+
}
1435
NativeMethods.OgaShutdown();
36+
_disposed = true;
1537
}
1638
}
1739

src/ort_genai.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,12 @@ struct OgaGenerator : OgaAbstract {
232232
return OgaGenerator_GetSequenceData(this, index);
233233
}
234234

235+
std::unique_ptr<OgaTensor> GetOutput(const char* name) {
236+
OgaTensor* out;
237+
OgaCheckResult(OgaGenerator_GetOutput(this, name, &out));
238+
return std::unique_ptr<OgaTensor>(out);
239+
}
240+
235241
#if __cplusplus >= 202002L
236242
std::span<const int32_t> GetSequence(size_t index) const {
237243
return {GetSequenceData(index), GetSequenceCount(index)};

src/ort_genai_c.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,50 @@ OgaResult* OGA_API_CALL OgaGenerator_GenerateNextToken(OgaGenerator* generator)
208208
OGA_CATCH
209209
}
210210

211+
OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator, const char* name, OgaTensor** out) {
212+
OGA_TRY
213+
auto& generator = *reinterpret_cast<const Generators::Generator*>(oga_generator);
214+
auto* ortvalue_output = generator.state_->GetOutput(name);
215+
auto type_info = ortvalue_output->GetTensorTypeAndShapeInfo();
216+
std::unique_ptr<OrtValue> ortvalue_clone = OrtValue::CreateTensor(generator.model_->allocator_cpu_,
217+
type_info->GetShape(),
218+
type_info->GetElementType());
219+
// Copy data to ortvalue_clone
220+
auto element_size = Generators::SizeOf(type_info->GetElementType());
221+
auto data_size = type_info->GetElementCount() * element_size;
222+
if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_GPU && generator.model_->device_type_ == Generators::DeviceType::CUDA) {
223+
#if USE_CUDA
224+
cudaMemcpy(ortvalue_clone->GetTensorMutableRawData(), ortvalue_output->GetTensorMutableRawData(), data_size, cudaMemcpyDeviceToHost);
225+
#endif
226+
} else if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_GPU && generator.model_->device_type_ == Generators::DeviceType::DML) {
227+
#if USE_DML
228+
ComPtr<ID3D12Resource> gpu_resource;
229+
Ort::ThrowOnError(generator.model_->GetOrtDmlApi()->GetD3D12ResourceFromAllocation(
230+
generator.model_->allocator_device_,
231+
ortvalue_output->GetTensorMutableRawData(),
232+
&gpu_resource));
233+
auto cpu_tensor = ortvalue_clone->GetTensorMutableRawData();
234+
generator.model_->GetDmlReadbackHeap()->ReadbackFromGpu(
235+
std::span(reinterpret_cast<uint8_t*>(cpu_tensor), data_size),
236+
gpu_resource.Get(),
237+
0,
238+
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
239+
#endif
240+
} else if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_CPU) {
241+
std::copy(static_cast<uint8_t*>(ortvalue_output->GetTensorMutableRawData()),
242+
static_cast<uint8_t*>(ortvalue_output->GetTensorMutableRawData()) + data_size,
243+
static_cast<uint8_t*>(ortvalue_clone->GetTensorMutableRawData()));
244+
} else {
245+
throw std::runtime_error("Unsupported Device type: " + ortvalue_output->GetTensorMemoryInfo().GetDeviceType());
246+
}
247+
248+
auto tensor = std::make_shared<Generators::Tensor>(std::move(ortvalue_clone));
249+
tensor->external_owner_ = tensor;
250+
*out = reinterpret_cast<OgaTensor*>(tensor.get());
251+
return nullptr;
252+
OGA_CATCH
253+
}
254+
211255
size_t OGA_API_CALL OgaGenerator_GetSequenceCount(const OgaGenerator* oga_generator, size_t index) {
212256
auto& generator = *reinterpret_cast<const Generators::Generator*>(oga_generator);
213257
return generator.GetSequence(static_cast<int>(index)).GetCPU().size();

0 commit comments

Comments
 (0)