From 5cbb81efb0f17e993f5e653d17be1609a0533abc Mon Sep 17 00:00:00 2001 From: Xu Xing Date: Thu, 12 Oct 2023 13:34:26 +0800 Subject: [PATCH] [webgpu] dump test --- cmake/onnxruntime_webassembly.cmake | 1 + js/web/lib/index.ts | 3 + js/web/lib/onnxjs/graph.ts | 6 + js/web/package.json | 2 +- .../debug_node_inputs_outputs_utils.cc | 107 ++++++++++++++++-- .../debug_node_inputs_outputs_utils.h | 2 + .../framework/print_tensor_statistics_utils.h | 2 +- .../core/framework/sequential_executor.cc | 1 - onnxruntime/core/graph/model.cc | 9 +- .../core/optimizer/graph_transformer_utils.cc | 2 + .../core/optimizer/graph_transformer_utils.cc | 2 +- 11 files changed, 121 insertions(+), 16 deletions(-) diff --git a/cmake/onnxruntime_webassembly.cmake b/cmake/onnxruntime_webassembly.cmake index c6510c97a617e..e64878feef558 100644 --- a/cmake/onnxruntime_webassembly.cmake +++ b/cmake/onnxruntime_webassembly.cmake @@ -265,6 +265,7 @@ else() if (onnxruntime_USE_WEBNN) set_property(TARGET onnxruntime_webassembly APPEND_STRING PROPERTY LINK_FLAGS " --bind -sWASM_BIGINT") endif() + # set_property(TARGET onnxruntime_webassembly APPEND_STRING PROPERTY LINK_FLAGS " --bind -sWASM_BIGINT") # Set link flag to enable exceptions support, this will override default disabling exception throwing behavior when disable exceptions. target_link_options(onnxruntime_webassembly PRIVATE "SHELL:-s DISABLE_EXCEPTION_THROWING=0") diff --git a/js/web/lib/index.ts b/js/web/lib/index.ts index 6060271ced156..44e42c854a71c 100644 --- a/js/web/lib/index.ts +++ b/js/web/lib/index.ts @@ -10,6 +10,9 @@ export * from 'onnxruntime-common'; import * as ort from 'onnxruntime-common'; export default ort; +export * from './onnxjs/model'; +export * as JsTensor from './onnxjs/tensor'; +export * as OnnxProto from './onnxjs/ort-schema/protobuf/onnx'; import {registerBackend, env} from 'onnxruntime-common'; import {version} from './version'; diff --git a/js/web/lib/onnxjs/graph.ts b/js/web/lib/onnxjs/graph.ts index f16da42815957..3f71e8628da4b 100644 --- a/js/web/lib/onnxjs/graph.ts +++ b/js/web/lib/onnxjs/graph.ts @@ -118,15 +118,19 @@ class Node implements Graph.Node { this.attributes = new Attribute(ProtoUtil.tensorAttributesFromORTFormat(_nodeProto)); } + this.inputNames = []; this.inputs = []; this.outputs = []; + this.outputNames = []; this.executeNode = true; } name: string; opType: string; inputs: number[]; + inputNames: string[]; outputs: number[]; + outputNames: string[]; attributes: Attribute; executeNode: boolean; } @@ -297,6 +301,7 @@ class GraphImpl implements Graph, Graph.Transformer { dataIndices.set(output, dataIndex); } node.outputs.push(dataIndex); + node.outputNames.push(output); if (this._allData[dataIndex]._from !== undefined) { throw new Error(`multiple nodes output to one data value: ${dataIndex}`); @@ -340,6 +345,7 @@ class GraphImpl implements Graph, Graph.Transformer { throw new Error(`unrecognized input '${input}' for node: ${nodeProto.name}`); } node.inputs.push(dataIndex); + node.inputNames.push(input); this._allData[dataIndex]._to.push(i); } diff --git a/js/web/package.json b/js/web/package.json index 7271fed99d709..66e3b093c4bb3 100644 --- a/js/web/package.json +++ b/js/web/package.json @@ -45,7 +45,7 @@ "@webgpu/types": "^0.1.30", "base64-js": "^1.5.1", "chai": "^4.3.7", - "electron": "^23.1.2", + "electron": "^23.3.13", "globby": "^13.1.3", "karma": "^6.4.1", "karma-browserstack-launcher": "^1.6.0", diff --git a/onnxruntime/core/framework/debug_node_inputs_outputs_utils.cc b/onnxruntime/core/framework/debug_node_inputs_outputs_utils.cc index ec50bb7d6a5cb..14ac25a8f54a5 100644 --- a/onnxruntime/core/framework/debug_node_inputs_outputs_utils.cc +++ b/onnxruntime/core/framework/debug_node_inputs_outputs_utils.cc @@ -2,7 +2,9 @@ // Licensed under the MIT License. #ifdef DEBUG_NODE_INPUTS_OUTPUTS - +//#include +//#include +#include #include "core/framework/debug_node_inputs_outputs_utils.h" #include "core/framework/print_tensor_utils.h" #include "core/framework/print_tensor_statistics_utils.h" @@ -59,8 +61,83 @@ bool FilterNode(const NodeDumpOptions& dump_options, const Node& node) { } template -void DumpTensorToStdOut(const Tensor& tensor, const NodeDumpOptions& dump_options) { - onnxruntime::utils::PrintCpuTensor(tensor, dump_options.snippet_threshold, dump_options.snippet_edge_items); +void DumpTensorToStdOut(const Tensor& tensor, const std::string tensor_name, const NodeDumpOptions& dump_options) { + auto data = tensor.Data(); + const auto& shape = tensor.Shape(); + auto num_items = shape.Size(); + auto numDimensions = shape.NumDimensions(); + int64_t shape_array[numDimensions]; + for (size_t i =0 ; i < numDimensions; i ++) { + shape_array[i] = shape[i]; + } + auto tensor_type = DataTypeImpl::ToString(tensor.DataType()); + + EM_ASM( + { + if (window.dump != 1) { + return; + } + + DataView.prototype.getUint64 = function(byteOffset, littleEndian) { + // split 64-bit number into two 32-bit parts + const left = this.getUint32(byteOffset, littleEndian); + const right = this.getUint32(byteOffset+4, littleEndian); + const combined = littleEndian? left + 2**32*right : 2**32*left + right; + + if (!Number.isSafeInteger(combined)) + console.warn(combined, 'exceeds MAX_SAFE_INTEGER. Precision may be lost'); + return combined; + }; + + BigInt.prototype.toJSON = function () { + return Number(this.toString()); + }; + + function SaveObjectToFile(object, name) { + if (window.dumpBlobUrlMap == null) { + window.dumpBlobUrlMap = new Map(); + } + const file = new Blob([JSON.stringify(object)], { + type: 'application/json' + }); + console.log(name); + const url = URL.createObjectURL(file); + window.dumpBlobUrlMap.set(name, url); + } + + const name = UTF8ToString($0); + const buffer = $1; + const tensor_type = UTF8ToString($3); + let data_buffer; + if (tensor_type === 'int64') { + const buffer_size = $2*8; + const bytes = new Uint8Array(buffer_size); + bytes.set(HEAPU8.subarray(buffer, buffer + buffer_size)); + data_buffer = new BigInt64Array(bytes.buffer); + } else { + const buffer_size = $2*4; + const bytes = new Uint8Array(buffer_size); + bytes.set(HEAPU8.subarray(buffer, buffer + buffer_size)); + data_buffer = new Float32Array(bytes.buffer) + } + + const shape_ptr = $4; + const shape_size = $5 * 8; + const shape_bytes = new Uint8Array(shape_size); + shape_bytes.set(HEAPU8.subarray(shape_ptr, shape_ptr + shape_size)); + + const shape_int64 = new BigInt64Array(shape_bytes.buffer); + SaveObjectToFile({'data': Array.from(data_buffer), + 'dims':Array.from(shape_int64), 'type': tensor_type}, name); + }, + reinterpret_cast(tensor_name.c_str()), + reinterpret_cast(data), + static_cast(num_items), + reinterpret_cast(tensor_type), + shape_array, + numDimensions); + + // onnxruntime::utils::PrintCpuTensor(tensor, dump_options.snippet_threshold, dump_options.snippet_edge_items); if (dump_options.dump_flags & NodeDumpOptions::DumpFlags::StatisticsData) { onnxruntime::utils::PrintCpuTensorStats(tensor); } @@ -298,11 +375,12 @@ void DumpCpuTensor( const Tensor& tensor, const TensorMetadata& tensor_metadata) { switch (dump_options.data_destination) { case NodeDumpOptions::DataDestination::StdOut: { - DispatchOnTensorType(tensor.DataType(), DumpTensorToStdOut, tensor, dump_options); + DispatchOnTensorType(tensor.DataType(), DumpTensorToStdOut, tensor, tensor_metadata.name, dump_options); break; } case NodeDumpOptions::DataDestination::TensorProtoFiles: { const Path tensor_file = dump_options.output_dir / Path::Parse(MakeTensorFileName(tensor_metadata.name, dump_options)); + std::cout<<" tensor_file =" <Exists()) { std::cout << "Input " << i << " Name: " << input_defs[i]->Name() << "\n"; - const auto* type = context.InputType(i); - if (type) { if (type->IsTensorType()) { if (const auto* tensor = context.Input(i); tensor != nullptr) { @@ -491,12 +578,12 @@ void DumpNodeInputs( const bool is_shape_set = (dump_options.dump_flags & NodeDumpOptions::DumpFlags::Shape) != 0; PrintIf(is_shape_set, MakeString(" Shape: ", shape, "\n")); - if ((dump_options.dump_flags & NodeDumpOptions::DumpFlags::InputData) != 0) { + //if ((dump_options.dump_flags & NodeDumpOptions::DumpFlags::InputData) != 0) { tensor_metadata.name = input_defs[i]->Name(); tensor_metadata.step = dump_context.iteration; tensor_metadata.consumer = node.Name() + ":" + std::to_string(i); DumpTensor(dump_options, *tensor, tensor_metadata, session_state); - } + //} } else { std::cout << " is empty optional tensor.\n"; } @@ -562,12 +649,12 @@ void DumpNodeOutputs( const bool is_shape_set = (dump_options.dump_flags & NodeDumpOptions::DumpFlags::Shape) != 0; PrintIf(is_shape_set, MakeString(" Shape: ", shape, "\n")); - if ((dump_options.dump_flags & NodeDumpOptions::DumpFlags::OutputData) != 0) { + //if ((dump_options.dump_flags & NodeDumpOptions::DumpFlags::OutputData) != 0) { tensor_metadata.name = output_defs[i]->Name(); tensor_metadata.step = dump_context.iteration; tensor_metadata.producer = node.Name() + ":" + std::to_string(i); DumpTensor(dump_options, *tensor, tensor_metadata, session_state); - } + //} } else { std::cout << " is empty optional tensor.\n"; } diff --git a/onnxruntime/core/framework/debug_node_inputs_outputs_utils.h b/onnxruntime/core/framework/debug_node_inputs_outputs_utils.h index bde005fc204c8..f5b0a44ffe118 100644 --- a/onnxruntime/core/framework/debug_node_inputs_outputs_utils.h +++ b/onnxruntime/core/framework/debug_node_inputs_outputs_utils.h @@ -158,6 +158,8 @@ void DumpNodeOutputs( const Node& node, const SessionState& session_state); +void DumpCpuTensorFromFrame(const Tensor& tensor, const SessionState& session_state, const std::string& name); + } // namespace utils } // namespace onnxruntime diff --git a/onnxruntime/core/framework/print_tensor_statistics_utils.h b/onnxruntime/core/framework/print_tensor_statistics_utils.h index fd036114f3e76..40341c5547dd2 100644 --- a/onnxruntime/core/framework/print_tensor_statistics_utils.h +++ b/onnxruntime/core/framework/print_tensor_statistics_utils.h @@ -139,7 +139,7 @@ void PrintCpuTensorStats(const Tensor& tensor) { } const T* data = tensor.Data(); - PrintTensorStats(data, num_items); + PrintTensorStats(data, (size_t)num_items); std::cout << std::endl; } diff --git a/onnxruntime/core/framework/sequential_executor.cc b/onnxruntime/core/framework/sequential_executor.cc index ba68bc1d7d834..a127f000f173b 100644 --- a/onnxruntime/core/framework/sequential_executor.cc +++ b/onnxruntime/core/framework/sequential_executor.cc @@ -607,7 +607,6 @@ onnxruntime::Status ExecuteThePlan(const SessionState& session_state, gsl::span< ORT_RETURN_IF_ERROR(session_state.UpdateMemoryPatternGroupCache(feeds, std::move(mem_patterns))); } } - return Status::OK(); } diff --git a/onnxruntime/core/graph/model.cc b/onnxruntime/core/graph/model.cc index 076332a65c8f2..be016d596139b 100644 --- a/onnxruntime/core/graph/model.cc +++ b/onnxruntime/core/graph/model.cc @@ -541,6 +541,10 @@ static Status SaveModel(Model& model, const T& file_path) { model_proto.SerializeToArray(buffer, buffer_size); EM_ASM(({ + if (window.dump != 2) { + console.log("not dump"); + return; + } const buffer = $0; const buffer_size = $1; const file_path = UTF8ToString($2); @@ -552,8 +556,9 @@ static Status SaveModel(Model& model, const T& file_path) { } else { // Browser const file = new File([bytes], file_path, {type: "application/octet-stream" }); - const url = URL.createObjectURL(file); - window.open(url, '_blank'); + // const url = URL.createObjectURL(file); + // window.open(url, '_blank'); + window.optmizedModelBlobUrl = URL.createObjectURL(file); } }), reinterpret_cast(buffer), diff --git a/onnxruntime/core/optimizer/graph_transformer_utils.cc b/onnxruntime/core/optimizer/graph_transformer_utils.cc index 5a441b1d1701e..9331283492098 100644 --- a/onnxruntime/core/optimizer/graph_transformer_utils.cc +++ b/onnxruntime/core/optimizer/graph_transformer_utils.cc @@ -219,7 +219,9 @@ InlinedVector> GenerateTransformers( excluded_initializers.insert(p.first); } const InlinedHashSet no_limit_empty_ep_list = {}; + #ifndef DEBUG_NODE_INPUTS_OUTPUTS transformers.emplace_back(std::make_unique(no_limit_empty_ep_list, excluded_initializers)); + #endif transformers.emplace_back(std::make_unique()); transformers.emplace_back(std::make_unique(cpu_execution_provider, !disable_quant_qdq)); diff --git a/orttraining/orttraining/core/optimizer/graph_transformer_utils.cc b/orttraining/orttraining/core/optimizer/graph_transformer_utils.cc index 57d76577f1ba7..51ea0b5cad75b 100644 --- a/orttraining/orttraining/core/optimizer/graph_transformer_utils.cc +++ b/orttraining/orttraining/core/optimizer/graph_transformer_utils.cc @@ -117,7 +117,7 @@ std::vector> GeneratePreTrainingTransformers( // Put ConstantSharing before CommonSubexpressionElimination by intention as it can create more opportunities for // CSE. For example, if A and B nodes both do Add operation with a same value but different initializers, by // default, CSE will not merge them, because the different initializers are represented by different NodeArg. - transformers.emplace_back(std::make_unique(compatible_eps)); + // transformers.emplace_back(std::make_unique(compatible_eps)); // LayerNormFusion must be applied before CommonSubexpressionElimination as the latter will break the pattern when 2 LayerNormFusion share the same input. transformers.emplace_back(std::make_unique(compatible_eps)); // Remove duplicate nodes. Must be applied before any recompute transformations.