Skip to content

Commit

Permalink
Support tensor dumping for DML (#931)
Browse files Browse the repository at this point in the history
  • Loading branch information
PatriceVignola authored Sep 27, 2024
1 parent c842508 commit a7ed199
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 9 deletions.
33 changes: 29 additions & 4 deletions src/models/debugging.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@
#include "utils.h"
#include <cinttypes>

#if USE_DML
#include "../dml/dml_helpers.h"
#include "model.h"
#endif

namespace Generators {
static constexpr size_t c_value_count = 10; // Dump this many values from the start of a tensor

Expand Down Expand Up @@ -71,7 +76,7 @@ void DumpValues(std::ostream& stream, ONNXTensorElementDataType type, const void
stream << SGR::Fg_Green << "]" << SGR::Reset << std::endl;
}

void DumpTensor(std::ostream& stream, OrtValue* value, bool dump_value) {
void DumpTensor(const Model& model, std::ostream& stream, OrtValue* value, bool dump_value) {
auto type_info = value->GetTensorTypeAndShapeInfo();
auto shape = type_info->GetShape();
stream << SGR::Fg_Green << "Shape[ " << SGR::Reset;
Expand Down Expand Up @@ -100,9 +105,29 @@ void DumpTensor(std::ostream& stream, OrtValue* value, bool dump_value) {
size_t element_size = SizeOf(type);
auto cpu_copy = std::make_unique<uint8_t[]>(element_size * element_count);
CudaCheck() == cudaMemcpy(cpu_copy.get(), value->GetTensorRawData(), element_size * element_count, cudaMemcpyDeviceToHost);
DumpValues(stream, type, cpu_copy.get(), element_count);
#elif USE_DML
auto type = type_info->GetElementType();
size_t element_size = SizeOf(type);
auto cpu_copy = std::make_unique<uint8_t[]>(element_size * element_count);

if (value->GetTensorMutableRawData()) {
ComPtr<ID3D12Resource> gpu_resource;
Ort::ThrowOnError(model.GetOrtDmlApi()->GetD3D12ResourceFromAllocation(
model.allocator_device_,
value->GetTensorMutableRawData(),
&gpu_resource));

model.GetDmlReadbackHeap()->ReadbackFromGpu(
std::span(cpu_copy.get(), element_size * element_count),
gpu_resource.Get(),
0,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
}

DumpValues(stream, type, cpu_copy.get(), element_count);
#else
stream << "Unexpected, using GPU memory but not compiled with CUDA?";
stream << "Unexpected, using GPU memory but not compiled with CUDA or DML?";
#endif
break;
}
Expand All @@ -112,10 +137,10 @@ void DumpTensor(std::ostream& stream, OrtValue* value, bool dump_value) {
}
}

void DumpTensors(std::ostream& stream, OrtValue** values, const char** names, size_t count, bool dump_values) {
void DumpTensors(const Model& model, std::ostream& stream, OrtValue** values, const char** names, size_t count, bool dump_values) {
for (size_t i = 0; i < count; i++) {
stream << SGR::Fg_Green << "Name: " << SGR::Reset << names[i] << ' ';
DumpTensor(stream, values[i], dump_values);
DumpTensor(model, stream, values[i], dump_values);
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/models/debugging.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
// Licensed under the MIT License.
namespace Generators {

void DumpTensor(std::ostream& stream, OrtValue* value, bool dump_value);
void DumpTensors(std::ostream& stream, OrtValue** values, const char** names, size_t count, bool dump_values);
void DumpTensor(const Model& model, std::ostream& stream, OrtValue* value, bool dump_value);
void DumpTensors(const Model& model, std::ostream& stream, OrtValue** values, const char** names, size_t count, bool dump_values);

template <typename T>
void DumpSpan(std::ostream& stream, std::span<const T> values);
Expand Down
6 changes: 3 additions & 3 deletions src/models/model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,21 +58,21 @@ void State::Run(OrtSession& session, OrtRunOptions& run_options, int new_batch_s
if (g_log.enabled && g_log.model_input_values) {
auto& stream = Log("model_input_values");
stream << std::endl;
DumpTensors(stream, inputs_.data(), input_names_.data(), input_names_.size(), true);
DumpTensors(model_, stream, inputs_.data(), input_names_.data(), input_names_.size(), true);
}

if (g_log.enabled && g_log.model_output_shapes) {
auto& stream = Log("model_output_shapes");
stream << std::endl;
DumpTensors(stream, outputs_.data(), output_names_.data(), output_names_.size(), false);
DumpTensors(model_, stream, outputs_.data(), output_names_.data(), output_names_.size(), false);
}

session.Run(&run_options, input_names_.data(), inputs_.data(), input_names_.size(), output_names_.data(), outputs_.data(), output_names_.size());

if (g_log.enabled && g_log.model_output_values) {
auto& stream = Log("model_output_values");
stream << std::endl;
DumpTensors(stream, outputs_.data(), output_names_.data(), output_names_.size(), true);
DumpTensors(model_, stream, outputs_.data(), output_names_.data(), output_names_.size(), true);
}
}

Expand Down

0 comments on commit a7ed199

Please sign in to comment.