From 4645de320e4d30b4e4ebf06ac721755890a19fab Mon Sep 17 00:00:00 2001 From: Xu Xing Date: Thu, 12 Oct 2023 13:34:26 +0800 Subject: [PATCH] [webgpu] dump test --- cmake/onnxruntime_webassembly.cmake | 1 + js/web/lib/index.ts | 2 + js/web/lib/onnxjs/graph.ts | 6 ++ js/web/package.json | 2 +- .../debug_node_inputs_outputs_utils.cc | 97 +++++++++++++++++-- .../debug_node_inputs_outputs_utils.h | 2 + .../framework/print_tensor_statistics_utils.h | 2 +- .../core/framework/sequential_executor.cc | 21 +++- .../core/optimizer/graph_transformer_utils.cc | 2 +- 9 files changed, 123 insertions(+), 12 deletions(-) diff --git a/cmake/onnxruntime_webassembly.cmake b/cmake/onnxruntime_webassembly.cmake index c6510c97a617e..dc8c803bd315c 100644 --- a/cmake/onnxruntime_webassembly.cmake +++ b/cmake/onnxruntime_webassembly.cmake @@ -265,6 +265,7 @@ else() if (onnxruntime_USE_WEBNN) set_property(TARGET onnxruntime_webassembly APPEND_STRING PROPERTY LINK_FLAGS " --bind -sWASM_BIGINT") endif() + set_property(TARGET onnxruntime_webassembly APPEND_STRING PROPERTY LINK_FLAGS " --bind -sWASM_BIGINT") # Set link flag to enable exceptions support, this will override default disabling exception throwing behavior when disable exceptions. target_link_options(onnxruntime_webassembly PRIVATE "SHELL:-s DISABLE_EXCEPTION_THROWING=0") diff --git a/js/web/lib/index.ts b/js/web/lib/index.ts index c5c27a4318049..c3b1c82a8788d 100644 --- a/js/web/lib/index.ts +++ b/js/web/lib/index.ts @@ -7,6 +7,8 @@ // So we import code inside the if-clause to allow bundler remove the code safely. export * from 'onnxruntime-common'; +export * from './onnxjs/model'; +export * as onnxProto from './onnxjs/ort-schema/protobuf/onnx'; import {registerBackend, env} from 'onnxruntime-common'; import {version} from './version'; diff --git a/js/web/lib/onnxjs/graph.ts b/js/web/lib/onnxjs/graph.ts index f16da42815957..3f71e8628da4b 100644 --- a/js/web/lib/onnxjs/graph.ts +++ b/js/web/lib/onnxjs/graph.ts @@ -118,15 +118,19 @@ class Node implements Graph.Node { this.attributes = new Attribute(ProtoUtil.tensorAttributesFromORTFormat(_nodeProto)); } + this.inputNames = []; this.inputs = []; this.outputs = []; + this.outputNames = []; this.executeNode = true; } name: string; opType: string; inputs: number[]; + inputNames: string[]; outputs: number[]; + outputNames: string[]; attributes: Attribute; executeNode: boolean; } @@ -297,6 +301,7 @@ class GraphImpl implements Graph, Graph.Transformer { dataIndices.set(output, dataIndex); } node.outputs.push(dataIndex); + node.outputNames.push(output); if (this._allData[dataIndex]._from !== undefined) { throw new Error(`multiple nodes output to one data value: ${dataIndex}`); @@ -340,6 +345,7 @@ class GraphImpl implements Graph, Graph.Transformer { throw new Error(`unrecognized input '${input}' for node: ${nodeProto.name}`); } node.inputs.push(dataIndex); + node.inputNames.push(input); this._allData[dataIndex]._to.push(i); } diff --git a/js/web/package.json b/js/web/package.json index 15f13600c050e..ff72409861ec4 100644 --- a/js/web/package.json +++ b/js/web/package.json @@ -45,7 +45,7 @@ "@webgpu/types": "^0.1.30", "base64-js": "^1.5.1", "chai": "^4.3.7", - "electron": "^23.1.2", + "electron": "^23.3.13", "globby": "^13.1.3", "karma": "^6.4.1", "karma-browserstack-launcher": "^1.6.0", diff --git a/onnxruntime/core/framework/debug_node_inputs_outputs_utils.cc b/onnxruntime/core/framework/debug_node_inputs_outputs_utils.cc index ec50bb7d6a5cb..2b96a445fe240 100644 --- a/onnxruntime/core/framework/debug_node_inputs_outputs_utils.cc +++ b/onnxruntime/core/framework/debug_node_inputs_outputs_utils.cc @@ -2,7 +2,9 @@ // Licensed under the MIT License. #ifdef DEBUG_NODE_INPUTS_OUTPUTS - +//#include +//#include +#include #include "core/framework/debug_node_inputs_outputs_utils.h" #include "core/framework/print_tensor_utils.h" #include "core/framework/print_tensor_statistics_utils.h" @@ -59,7 +61,73 @@ bool FilterNode(const NodeDumpOptions& dump_options, const Node& node) { } template -void DumpTensorToStdOut(const Tensor& tensor, const NodeDumpOptions& dump_options) { +void DumpTensorToStdOut(const Tensor& tensor, const std::string tensor_name, const NodeDumpOptions& dump_options) { + auto data = tensor.Data(); + const auto& shape = tensor.Shape(); + auto num_items = shape.Size(); + auto numDimensions = shape.NumDimensions(); + int64_t shape_array[numDimensions]; + for (size_t i =0 ; i < numDimensions; i ++) { + shape_array[i] = shape[i]; + } + auto tensor_type = DataTypeImpl::ToString(tensor.DataType()); + std::cout<<"tensor data type: "<(tensor_name.c_str()), + reinterpret_cast(data), + static_cast(num_items*4), + reinterpret_cast(tensor_type), + shape_array, + numDimensions); + onnxruntime::utils::PrintCpuTensor(tensor, dump_options.snippet_threshold, dump_options.snippet_edge_items); if (dump_options.dump_flags & NodeDumpOptions::DumpFlags::StatisticsData) { onnxruntime::utils::PrintCpuTensorStats(tensor); @@ -298,11 +366,12 @@ void DumpCpuTensor( const Tensor& tensor, const TensorMetadata& tensor_metadata) { switch (dump_options.data_destination) { case NodeDumpOptions::DataDestination::StdOut: { - DispatchOnTensorType(tensor.DataType(), DumpTensorToStdOut, tensor, dump_options); + DispatchOnTensorType(tensor.DataType(), DumpTensorToStdOut, tensor, tensor_metadata.name, dump_options); break; } case NodeDumpOptions::DataDestination::TensorProtoFiles: { const Path tensor_file = dump_options.output_dir / Path::Parse(MakeTensorFileName(tensor_metadata.name, dump_options)); + std::cout<<" tensor_file =" <Name(); tensor_metadata.step = dump_context.iteration; tensor_metadata.consumer = node.Name() + ":" + std::to_string(i); DumpTensor(dump_options, *tensor, tensor_metadata, session_state); - } + //} } else { std::cout << " is empty optional tensor.\n"; } @@ -562,12 +643,14 @@ void DumpNodeOutputs( const bool is_shape_set = (dump_options.dump_flags & NodeDumpOptions::DumpFlags::Shape) != 0; PrintIf(is_shape_set, MakeString(" Shape: ", shape, "\n")); - if ((dump_options.dump_flags & NodeDumpOptions::DumpFlags::OutputData) != 0) { + //if ((dump_options.dump_flags & NodeDumpOptions::DumpFlags::OutputData) != 0) { tensor_metadata.name = output_defs[i]->Name(); tensor_metadata.step = dump_context.iteration; tensor_metadata.producer = node.Name() + ":" + std::to_string(i); + std::cout<< __FUNCTION__<<":"<<__LINE__<<"\n"; DumpTensor(dump_options, *tensor, tensor_metadata, session_state); - } + std::cout<< __FUNCTION__<<":"<<__LINE__<<"\n"; + //} } else { std::cout << " is empty optional tensor.\n"; } diff --git a/onnxruntime/core/framework/debug_node_inputs_outputs_utils.h b/onnxruntime/core/framework/debug_node_inputs_outputs_utils.h index bde005fc204c8..f5b0a44ffe118 100644 --- a/onnxruntime/core/framework/debug_node_inputs_outputs_utils.h +++ b/onnxruntime/core/framework/debug_node_inputs_outputs_utils.h @@ -158,6 +158,8 @@ void DumpNodeOutputs( const Node& node, const SessionState& session_state); +void DumpCpuTensorFromFrame(const Tensor& tensor, const SessionState& session_state, const std::string& name); + } // namespace utils } // namespace onnxruntime diff --git a/onnxruntime/core/framework/print_tensor_statistics_utils.h b/onnxruntime/core/framework/print_tensor_statistics_utils.h index fd036114f3e76..40341c5547dd2 100644 --- a/onnxruntime/core/framework/print_tensor_statistics_utils.h +++ b/onnxruntime/core/framework/print_tensor_statistics_utils.h @@ -139,7 +139,7 @@ void PrintCpuTensorStats(const Tensor& tensor) { } const T* data = tensor.Data(); - PrintTensorStats(data, num_items); + PrintTensorStats(data, (size_t)num_items); std::cout << std::endl; } diff --git a/onnxruntime/core/framework/sequential_executor.cc b/onnxruntime/core/framework/sequential_executor.cc index ba68bc1d7d834..e47397567fc4e 100644 --- a/onnxruntime/core/framework/sequential_executor.cc +++ b/onnxruntime/core/framework/sequential_executor.cc @@ -327,7 +327,7 @@ class KernelScope { #endif #ifdef DEBUG_NODE_INPUTS_OUTPUTS - utils::DumpNodeInputs(dump_context_, kernel_context_, kernel_.Node(), session_state_); + // utils::DumpNodeInputs(dump_context_, kernel_context_, kernel_.Node(), session_state_); #endif #ifdef ENABLE_NVTX_PROFILE @@ -401,6 +401,7 @@ class KernelScope { #endif #ifdef DEBUG_NODE_INPUTS_OUTPUTS + utils::DumpNodeInputs(dump_context_, kernel_context_, kernel_.Node(), session_state_); utils::DumpNodeOutputs(dump_context_, kernel_context_, kernel_.Node(), session_state_); #endif } //~KernelScope @@ -607,7 +608,23 @@ onnxruntime::Status ExecuteThePlan(const SessionState& session_state, gsl::span< ORT_RETURN_IF_ERROR(session_state.UpdateMemoryPatternGroupCache(feeds, std::move(mem_patterns))); } } - + { + /* + // auto frame = ctx.GetExecutionFrame(); + //auto ort_value_idx_map = session_state.GetOrtValueNameIdxMap() + auto num_tensor = static_cast(session_state.GetOrtValueNameIdxMap().MaxIdx()) + 1; + + std::cout<<"ort_value_idx_map: "<GetMutable() : nullptr; + utils::DumpCpuTensorFromFrame(*tensor, session_state, name); + }*/ + } return Status::OK(); } diff --git a/orttraining/orttraining/core/optimizer/graph_transformer_utils.cc b/orttraining/orttraining/core/optimizer/graph_transformer_utils.cc index e5c65b2a96d8c..3a461e2214317 100644 --- a/orttraining/orttraining/core/optimizer/graph_transformer_utils.cc +++ b/orttraining/orttraining/core/optimizer/graph_transformer_utils.cc @@ -116,7 +116,7 @@ std::vector> GeneratePreTrainingTransformers( // Put ConstantSharing before CommonSubexpressionElimination by intention as it can create more opportunities for // CSE. For example, if A and B nodes both do Add operation with a same value but different initializers, by // default, CSE will not merge them, because the different initializers are represented by different NodeArg. - transformers.emplace_back(std::make_unique(compatible_eps)); + // transformers.emplace_back(std::make_unique(compatible_eps)); // LayerNormFusion must be applied before CommonSubexpressionElimination as the latter will break the pattern when 2 LayerNormFusion share the same input. transformers.emplace_back(std::make_unique(compatible_eps)); // Remove duplicate nodes. Must be applied before any recompute transformations.