Skip to content

Commit

Permalink
[webgpu] dump test
Browse files Browse the repository at this point in the history
  • Loading branch information
axinging committed Oct 17, 2023
1 parent 6832b68 commit 4645de3
Show file tree
Hide file tree
Showing 9 changed files with 123 additions and 12 deletions.
1 change: 1 addition & 0 deletions cmake/onnxruntime_webassembly.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,7 @@ else()
if (onnxruntime_USE_WEBNN)
set_property(TARGET onnxruntime_webassembly APPEND_STRING PROPERTY LINK_FLAGS " --bind -sWASM_BIGINT")
endif()
set_property(TARGET onnxruntime_webassembly APPEND_STRING PROPERTY LINK_FLAGS " --bind -sWASM_BIGINT")

# Set link flag to enable exceptions support, this will override default disabling exception throwing behavior when disable exceptions.
target_link_options(onnxruntime_webassembly PRIVATE "SHELL:-s DISABLE_EXCEPTION_THROWING=0")
Expand Down
2 changes: 2 additions & 0 deletions js/web/lib/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
// So we import code inside the if-clause to allow bundler remove the code safely.

export * from 'onnxruntime-common';
export * from './onnxjs/model';
export * as onnxProto from './onnxjs/ort-schema/protobuf/onnx';
import {registerBackend, env} from 'onnxruntime-common';
import {version} from './version';

Expand Down
6 changes: 6 additions & 0 deletions js/web/lib/onnxjs/graph.ts
Original file line number Diff line number Diff line change
Expand Up @@ -118,15 +118,19 @@ class Node implements Graph.Node {
this.attributes = new Attribute(ProtoUtil.tensorAttributesFromORTFormat(_nodeProto));
}

this.inputNames = [];
this.inputs = [];
this.outputs = [];
this.outputNames = [];
this.executeNode = true;
}

name: string;
opType: string;
inputs: number[];
inputNames: string[];
outputs: number[];
outputNames: string[];
attributes: Attribute;
executeNode: boolean;
}
Expand Down Expand Up @@ -297,6 +301,7 @@ class GraphImpl implements Graph, Graph.Transformer {
dataIndices.set(output, dataIndex);
}
node.outputs.push(dataIndex);
node.outputNames.push(output);

if (this._allData[dataIndex]._from !== undefined) {
throw new Error(`multiple nodes output to one data value: ${dataIndex}`);
Expand Down Expand Up @@ -340,6 +345,7 @@ class GraphImpl implements Graph, Graph.Transformer {
throw new Error(`unrecognized input '${input}' for node: ${nodeProto.name}`);
}
node.inputs.push(dataIndex);
node.inputNames.push(input);

this._allData[dataIndex]._to.push(i);
}
Expand Down
2 changes: 1 addition & 1 deletion js/web/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
"@webgpu/types": "^0.1.30",
"base64-js": "^1.5.1",
"chai": "^4.3.7",
"electron": "^23.1.2",
"electron": "^23.3.13",
"globby": "^13.1.3",
"karma": "^6.4.1",
"karma-browserstack-launcher": "^1.6.0",
Expand Down
97 changes: 90 additions & 7 deletions onnxruntime/core/framework/debug_node_inputs_outputs_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
// Licensed under the MIT License.

#ifdef DEBUG_NODE_INPUTS_OUTPUTS

//#include <iostream>
//#include <string>
#include <emscripten/emscripten.h>
#include "core/framework/debug_node_inputs_outputs_utils.h"
#include "core/framework/print_tensor_utils.h"
#include "core/framework/print_tensor_statistics_utils.h"
Expand Down Expand Up @@ -59,7 +61,73 @@ bool FilterNode(const NodeDumpOptions& dump_options, const Node& node) {
}

template <typename T>
void DumpTensorToStdOut(const Tensor& tensor, const NodeDumpOptions& dump_options) {
void DumpTensorToStdOut(const Tensor& tensor, const std::string tensor_name, const NodeDumpOptions& dump_options) {
auto data = tensor.Data<T>();
const auto& shape = tensor.Shape();
auto num_items = shape.Size();
auto numDimensions = shape.NumDimensions();
int64_t shape_array[numDimensions];
for (size_t i =0 ; i < numDimensions; i ++) {
shape_array[i] = shape[i];
}
auto tensor_type = DataTypeImpl::ToString(tensor.DataType());
std::cout<<"tensor data type: "<<DataTypeImpl::ToString(tensor.DataType())<<"\n";

EM_ASM(
{
DataView.prototype.getUint64 = function(byteOffset, littleEndian) {
// split 64-bit number into two 32-bit parts
const left = this.getUint32(byteOffset, littleEndian);
const right = this.getUint32(byteOffset+4, littleEndian);
const combined = littleEndian? left + 2**32*right : 2**32*left + right;

if (!Number.isSafeInteger(combined))
console.warn(combined, 'exceeds MAX_SAFE_INTEGER. Precision may be lost');
return combined;
};

BigInt.prototype.toJSON = function () {
return Number(this.toString(16));
};
function SaveObjectsToFile(json_object, name) {
// const name = json_object['name'];
const object = json_object;
const file_name = `${name}.json`;
const a = document.createElement('a');
const file = new Blob([JSON.stringify(object)], {
type: 'application/json'
});
a.href = URL.createObjectURL(file);
a.download = file_name;
a.click();
}

const name = UTF8ToString($0);

const buffer = $1;
const buffer_size = $2;
console.log(buffer_size);
const bytes = new Uint8Array(buffer_size);
bytes.set(HEAPU8.subarray(buffer, buffer + buffer_size));

const tensor_type = UTF8ToString($3);;
const shape_ptr = $4;
const shape_size = $5 * 8;
console.log(shape_size);
const shape_bytes = new Uint8Array(shape_size);
shape_bytes.set(HEAPU8.subarray(shape_ptr, shape_ptr + shape_size));

const shape_int64 = new BigInt64Array(shape_bytes.buffer);
SaveObjectsToFile({'data': Array.from(new Float32Array(bytes.buffer)),
'dims':Array.from(shape_int64), 'type': tensor_type}, name);
},
reinterpret_cast<int32_t>(tensor_name.c_str()),
reinterpret_cast<int32_t>(data),
static_cast<int32_t>(num_items*4),
reinterpret_cast<int32_t>(tensor_type),
shape_array,
numDimensions);

onnxruntime::utils::PrintCpuTensor<T>(tensor, dump_options.snippet_threshold, dump_options.snippet_edge_items);
if (dump_options.dump_flags & NodeDumpOptions::DumpFlags::StatisticsData) {
onnxruntime::utils::PrintCpuTensorStats<T>(tensor);
Expand Down Expand Up @@ -298,11 +366,12 @@ void DumpCpuTensor(
const Tensor& tensor, const TensorMetadata& tensor_metadata) {
switch (dump_options.data_destination) {
case NodeDumpOptions::DataDestination::StdOut: {
DispatchOnTensorType(tensor.DataType(), DumpTensorToStdOut, tensor, dump_options);
DispatchOnTensorType(tensor.DataType(), DumpTensorToStdOut, tensor, tensor_metadata.name, dump_options);
break;
}
case NodeDumpOptions::DataDestination::TensorProtoFiles: {
const Path tensor_file = dump_options.output_dir / Path::Parse(MakeTensorFileName(tensor_metadata.name, dump_options));
std::cout<<" tensor_file =" <<tensor_file.ToPathString() <<", tensor_metadata.name="<<tensor_metadata.name<<"\n";
DumpTensorToFile(tensor, tensor_metadata.name, tensor_file);
break;
}
Expand All @@ -325,6 +394,7 @@ void DumpTensor(
const SessionState& session_state) {
// check tensor is on CPU before dumping it
auto& tensor_location = tensor.Location();
std::cout<< __FUNCTION__<<":"<<__LINE__<<"\n";
if (tensor_location.device.Type() == OrtDevice::CPU ||
tensor_location.mem_type == OrtMemTypeCPUInput ||
tensor_location.mem_type == OrtMemTypeCPUOutput) {
Expand Down Expand Up @@ -447,6 +517,17 @@ static void PrintIf(bool boolean_expression, const std::string& message) {
}
}

void DumpCpuTensorFromFrame(const Tensor& tensor, const SessionState& session_state, const std::string& name) {
TensorMetadata tensor_metadata;
tensor_metadata.name = name + "_Dump";
tensor_metadata.step = 1;
tensor_metadata.consumer = "unknowConsumer";
utils::NodeDumpOptions opts{};
opts.dump_flags |= utils::NodeDumpOptions::DumpFlags::InputData;
opts.dump_flags |= utils::NodeDumpOptions::DumpFlags::OutputData;
DumpTensor(opts, tensor, tensor_metadata, session_state);
}

void DumpNodeInputs(
const NodeDumpOptions& dump_options,
const NodeDumpContext& dump_context,
Expand Down Expand Up @@ -491,12 +572,12 @@ void DumpNodeInputs(
const bool is_shape_set = (dump_options.dump_flags & NodeDumpOptions::DumpFlags::Shape) != 0;
PrintIf(is_shape_set, MakeString(" Shape: ", shape, "\n"));

if ((dump_options.dump_flags & NodeDumpOptions::DumpFlags::InputData) != 0) {
//if ((dump_options.dump_flags & NodeDumpOptions::DumpFlags::InputData) != 0) {
tensor_metadata.name = input_defs[i]->Name();
tensor_metadata.step = dump_context.iteration;
tensor_metadata.consumer = node.Name() + ":" + std::to_string(i);
DumpTensor(dump_options, *tensor, tensor_metadata, session_state);
}
//}
} else {
std::cout << " is empty optional tensor.\n";
}
Expand Down Expand Up @@ -562,12 +643,14 @@ void DumpNodeOutputs(
const bool is_shape_set = (dump_options.dump_flags & NodeDumpOptions::DumpFlags::Shape) != 0;
PrintIf(is_shape_set, MakeString(" Shape: ", shape, "\n"));

if ((dump_options.dump_flags & NodeDumpOptions::DumpFlags::OutputData) != 0) {
//if ((dump_options.dump_flags & NodeDumpOptions::DumpFlags::OutputData) != 0) {
tensor_metadata.name = output_defs[i]->Name();
tensor_metadata.step = dump_context.iteration;
tensor_metadata.producer = node.Name() + ":" + std::to_string(i);
std::cout<< __FUNCTION__<<":"<<__LINE__<<"\n";
DumpTensor(dump_options, *tensor, tensor_metadata, session_state);
}
std::cout<< __FUNCTION__<<":"<<__LINE__<<"\n";
//}
} else {
std::cout << " is empty optional tensor.\n";
}
Expand Down
2 changes: 2 additions & 0 deletions onnxruntime/core/framework/debug_node_inputs_outputs_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,8 @@ void DumpNodeOutputs(
const Node& node,
const SessionState& session_state);

void DumpCpuTensorFromFrame(const Tensor& tensor, const SessionState& session_state, const std::string& name);

} // namespace utils
} // namespace onnxruntime

Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/framework/print_tensor_statistics_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ void PrintCpuTensorStats(const Tensor& tensor) {
}

const T* data = tensor.Data<T>();
PrintTensorStats<T>(data, num_items);
PrintTensorStats<T>(data, (size_t)num_items);
std::cout << std::endl;
}

Expand Down
21 changes: 19 additions & 2 deletions onnxruntime/core/framework/sequential_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ class KernelScope {
#endif

#ifdef DEBUG_NODE_INPUTS_OUTPUTS
utils::DumpNodeInputs(dump_context_, kernel_context_, kernel_.Node(), session_state_);
// utils::DumpNodeInputs(dump_context_, kernel_context_, kernel_.Node(), session_state_);
#endif

#ifdef ENABLE_NVTX_PROFILE
Expand Down Expand Up @@ -401,6 +401,7 @@ class KernelScope {
#endif

#ifdef DEBUG_NODE_INPUTS_OUTPUTS
utils::DumpNodeInputs(dump_context_, kernel_context_, kernel_.Node(), session_state_);
utils::DumpNodeOutputs(dump_context_, kernel_context_, kernel_.Node(), session_state_);
#endif
} //~KernelScope
Expand Down Expand Up @@ -607,7 +608,23 @@ onnxruntime::Status ExecuteThePlan(const SessionState& session_state, gsl::span<
ORT_RETURN_IF_ERROR(session_state.UpdateMemoryPatternGroupCache(feeds, std::move(mem_patterns)));
}
}

{
/*
// auto frame = ctx.GetExecutionFrame();
//auto ort_value_idx_map = session_state.GetOrtValueNameIdxMap()
auto num_tensor = static_cast<size_t>(session_state.GetOrtValueNameIdxMap().MaxIdx()) + 1;
std::cout<<"ort_value_idx_map: "<<num_tensor<< "\n";
for (size_t i =0 ; i < num_tensor; i ++) {
std::string name;// = '';
auto status = session_state.GetOrtValueNameIdxMap().GetName(i, name);
std::cout<<status<<", name: "<<name<< ", "<< i <<"\n";
OrtValue* p_ml_value = ctx.GetExecutionFrame().GetMutableNodeInputOrOutputMLValue(i);
Tensor* tensor = p_ml_value ? p_ml_value->GetMutable<Tensor>() : nullptr;
utils::DumpCpuTensorFromFrame(*tensor, session_state, name);
}*/
}
return Status::OK();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ std::vector<std::unique_ptr<GraphTransformer>> GeneratePreTrainingTransformers(
// Put ConstantSharing before CommonSubexpressionElimination by intention as it can create more opportunities for
// CSE. For example, if A and B nodes both do Add operation with a same value but different initializers, by
// default, CSE will not merge them, because the different initializers are represented by different NodeArg.
transformers.emplace_back(std::make_unique<ConstantSharing>(compatible_eps));
// transformers.emplace_back(std::make_unique<ConstantSharing>(compatible_eps));
// LayerNormFusion must be applied before CommonSubexpressionElimination as the latter will break the pattern when 2 LayerNormFusion share the same input.
transformers.emplace_back(std::make_unique<LayerNormFusion>(compatible_eps));
// Remove duplicate nodes. Must be applied before any recompute transformations.
Expand Down

0 comments on commit 4645de3

Please sign in to comment.