Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CoreML: Aggregated changes to add all required ops for priority model #21472

Merged
merged 11 commits into from
Jul 25, 2024
1 change: 1 addition & 0 deletions .lintrunner.toml
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ exclude_patterns = [
'onnxruntime/core/mickey/gemm/**', # CUTLASS based libs recommends NO automatic code formatting
'winml/lib/Api.Image/shaders/**', # Contains data chunks
'onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_launch_template.h', # Bool Switches hang Clang
'onnxruntime/core/providers/coreml/mlprogram_test_scripts/**', # test scripts only
]
command = [
'python',
Expand Down
24 changes: 15 additions & 9 deletions include/onnxruntime/core/graph/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -1408,6 +1408,11 @@
RuntimeOptimizationRecordContainer& MutableRuntimeOptimizations() {
return runtime_optimizations_;
}

// We don't run Graph::Resolve() on an ORT format model, but a compiling EP may copy initializers to its
// compiled model during partitioning, leaving them unused in the ORT Graph. To allow the memory to be freed
// we need to manually run the cleanup that would usually happen as part of Graph::Resolve.
Status RemovedUnusedInitializersOrtFormat();
#endif // !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)

// This friendship relationship should only be used to call Graph::Graph and
Expand Down Expand Up @@ -1541,12 +1546,6 @@

common::Status PerformTypeAndShapeInferencing(const ResolveOptions& options);

// Recursively find all subgraphs including nested subgraphs
void FindAllSubgraphs(std::vector<Graph*>& subgraphs);

// Iterate this Graph instance and all subgraphs, calling the provided function for each.
common::Status ForThisAndAllSubgraphs(const std::vector<Graph*>& subgraphs, std::function<Status(Graph&)> func);

common::Status InferAndVerifyTypeMatch(Node& node, const ONNX_NAMESPACE::OpSchema& op, const ResolveOptions& options);

// perform type and shape inferencing on the subgraph and Resolve to validate
Expand Down Expand Up @@ -1576,9 +1575,6 @@
// Implementation for initializer replacement
Status ReplaceInitializedTensorImpl(ONNX_NAMESPACE::TensorProto new_initializer, bool is_external);

// Clear all unused initializers and NodeArgs
void CleanUnusedInitializersAndNodeArgs(const std::unordered_set<std::string>* initializer_names_to_preserve = nullptr);

std::vector<NodeArg*> CreateNodeArgs(const google::protobuf::RepeatedPtrField<std::string>& names,
const ArgNameToTypeMap& name_to_type_map);

Expand All @@ -1587,6 +1583,16 @@
#endif // !defined(ORT_MINIMAL_BUILD)

#if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)

// Recursively find all subgraphs including nested subgraphs
void FindAllSubgraphs(std::vector<Graph*>& subgraphs);

// Iterate this Graph instance and all subgraphs, calling the provided function for each.
common::Status ForThisAndAllSubgraphs(const std::vector<Graph*>& subgraphs, std::function<Status(Graph&)> func);

// Clear all unused initializers and NodeArgs
void CleanUnusedInitializersAndNodeArgs(const std::unordered_set<std::string>* initializer_names_to_preserve = nullptr);

Check warning on line 1594 in include/onnxruntime/core/graph/graph.h

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Lines should be <= 120 characters long [whitespace/line_length] [2] Raw Output: include/onnxruntime/core/graph/graph.h:1594: Lines should be <= 120 characters long [whitespace/line_length] [2]

Status PopulateNodeArgToProducerConsumerLookupsFromNodes();

template <typename TInstance>
Expand Down
60 changes: 37 additions & 23 deletions onnxruntime/core/graph/graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3254,27 +3254,6 @@ Status Graph::PerformTypeAndShapeInferencing(const ResolveOptions& options) {
return Status::OK();
}

void Graph::FindAllSubgraphs(std::vector<Graph*>& subgraphs) {
for (auto& node : Nodes()) {
for (auto& subgraph : node.MutableSubgraphs()) {
subgraphs.push_back(subgraph.get());
subgraph->FindAllSubgraphs(subgraphs);
}
}
}

Status Graph::ForThisAndAllSubgraphs(const std::vector<Graph*>& subgraphs, std::function<Status(Graph&)> func) {
auto status = func(*this);
ORT_RETURN_IF_ERROR(status);

for (auto& subgraph : subgraphs) {
status = func(*subgraph);
ORT_RETURN_IF_ERROR(status);
}

return status;
}

Status Graph::Resolve(const ResolveOptions& options) {
if (parent_graph_) {
// Resolve must start at the top level graph in-order to handle outer scope
Expand Down Expand Up @@ -3387,6 +3366,39 @@ void Graph::AddInitializedTensor(const TensorProto& tensor) {
ORT_IGNORE_RETURN_VALUE(GetOrCreateNodeArg(tensor.name(), &t));
}
}

void Graph::FindAllSubgraphs(std::vector<Graph*>& subgraphs) {
for (auto& node : Nodes()) {
for (auto& subgraph : node.MutableSubgraphs()) {
subgraphs.push_back(subgraph.get());
subgraph->FindAllSubgraphs(subgraphs);
}
}
}

Status Graph::ForThisAndAllSubgraphs(const std::vector<Graph*>& subgraphs, std::function<Status(Graph&)> func) {
auto status = func(*this);
ORT_RETURN_IF_ERROR(status);

for (auto& subgraph : subgraphs) {
status = func(*subgraph);
ORT_RETURN_IF_ERROR(status);
}

return status;
}

Status Graph::RemovedUnusedInitializersOrtFormat() {
std::vector<Graph*> all_subgraphs;
FindAllSubgraphs(all_subgraphs);
auto cleanup_func = [](Graph& graph) {
graph.CleanUnusedInitializersAndNodeArgs(nullptr);
return Status::OK();
};

auto result = ForThisAndAllSubgraphs(all_subgraphs, cleanup_func);
return result;
}
#endif // !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)

const std::string& Graph::Name() const noexcept {
Expand Down Expand Up @@ -4122,6 +4134,9 @@ void Graph::ToGraphProtoInternal(ONNX_NAMESPACE::GraphProto& graph_proto) const
}
}

#endif // !defined(ORT_MINIMAL_BUILD)

#if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
void Graph::CleanUnusedInitializersAndNodeArgs(const std::unordered_set<std::string>* initializer_names_to_preserve) {
// Node Args being used
std::unordered_set<const NodeArg*> used_args;
Expand Down Expand Up @@ -4253,8 +4268,7 @@ void Graph::CleanUnusedInitializersAndNodeArgs(const std::unordered_set<std::str
}
}
}

#endif // !defined(ORT_MINIMAL_BUILD)
#endif // !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)

void Graph::ComputeOverridableInitializers() {
graph_overridable_initializers_.clear();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,19 +83,30 @@
using namespace CoreML::Specification::MILSpec;
// https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html#module-coremltools.converters.mil.mil.ops.defs.iOS15.activation
std::string_view coreml_op_type;
bool add_alpha = false;
if (op_type == "Sigmoid") {
coreml_op_type = "sigmoid";
} else if (op_type == "Tanh") {
coreml_op_type = "tanh";
} else if (op_type == "Relu") {
coreml_op_type = "relu";
} else if (op_type == "LeakyRelu") {
coreml_op_type = "leaky_relu";
add_alpha = true;
} else {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"ActivationOpBuilder::AddToModelBuilderImpl, unknown op: ", op_type);
}

std::unique_ptr<Operation> op = model_builder.CreateOperation(node, coreml_op_type);
AddOperationInput(*op, "x", node.InputDefs()[0]->Name());

if (add_alpha) {
NodeAttrHelper helper(node);
const auto alpha = helper.Get("alpha", 0.01f);
AddOperationInput(*op, "alpha", model_builder.AddScalarConstant(op->type(), "alpha", alpha));
}

AddOperationOutput(*op, *node.OutputDefs()[0]);

model_builder.AddOperation(std::move(op));
Expand Down Expand Up @@ -198,7 +209,7 @@

#if defined(COREML_ENABLE_MLPROGRAM)
if (input_params.create_mlprogram) {
if (op_type == "PRelu" || op_type == "LeakyRelu") {
if (op_type == "PRelu") { // TODO: ML Program supports this so should be easy to enable

Check warning on line 212 in onnxruntime/core/providers/coreml/builders/impl/activation_op_builder.cc

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2] Raw Output: onnxruntime/core/providers/coreml/builders/impl/activation_op_builder.cc:212: Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2]
return false;
}
} else
Expand Down
24 changes: 23 additions & 1 deletion onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -309,11 +309,33 @@

void AddOperationInput(MILSpec::Operation& op, std::string_view input_name, std::string_view value_name) {
MILSpec::Argument arg;
arg.mutable_arguments()->Add()->set_name(std::string(value_name));
arg.mutable_arguments()->Add()->set_name(value_name.data(), value_name.size());

(*op.mutable_inputs())[input_name] = std::move(arg);
}

void AddOperationVariadicInput(MILSpec::Operation& op, std::string_view input_name,
const std::vector<std::string_view>& value_names) {
MILSpec::Argument arg;
for (const auto& value : value_names) {
arg.mutable_arguments()->Add()->set_name(value.data(), value.size());
}

(*op.mutable_inputs())[input_name] = std::move(arg);

Check warning on line 324 in onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <utility> for move [build/include_what_you_use] [4] Raw Output: onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc:324: Add #include <utility> for move [build/include_what_you_use] [4]
}

void AddIntermediateOperationOutput(COREML_SPEC::MILSpec::Operation& op, std::string_view output_name,
int32_t element_type, std::optional<gsl::span<const int64_t>> shape) {
auto& outputs = *op.mutable_outputs();
auto& output_arg = *outputs.Add();
output_arg.set_name(output_name.data(), output_name.size());

MILSpec::ValueType& value = *output_arg.mutable_type();
MILSpec::TensorType& tensor_type = *value.mutable_tensortype();

SetTensorTypeInfo(tensor_type, OnnxDataTypeToMILSpec(element_type), shape, /*convert_scalar*/ true);
}

void AddOperationOutput(COREML_SPEC::MILSpec::Operation& op, const NodeArg& output,
std::optional<int32_t> override_element_type) {
auto& outputs = *op.mutable_outputs();
Expand Down
20 changes: 20 additions & 0 deletions onnxruntime/core/providers/coreml/builders/impl/builder_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,26 @@
void AddOperationInput(COREML_SPEC::MILSpec::Operation& op,
std::string_view input_name, std::string_view value_name);

/// <summary>
/// Add a variadic input argument to a MILSpec::Operation
/// </summary>
/// <param name="op">Operation to update.</param>
/// <param name="input name">The input name defined by the spec for the operation. </param>
/// <param name="value_names">The input value names.</param>
void AddOperationVariadicInput(COREML_SPEC::MILSpec::Operation& op, std::string_view input_name,
const std::vector<std::string_view>& value_names);

Check warning on line 139 in onnxruntime/core/providers/coreml/builders/impl/builder_utils.h

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <vector> for vector<> [build/include_what_you_use] [4] Raw Output: onnxruntime/core/providers/coreml/builders/impl/builder_utils.h:139: Add #include <vector> for vector<> [build/include_what_you_use] [4]

/// Add an output to a MILSpec::Operation for an intermediate operation when the implementation is composed of
/// multiple MLProgram operations. In this case we don't have a NodeArg for the output.
/// </summary>
/// <param name="op">Operation to update.</param>
/// <param name="output_name">Name of the intermediate output. Create using ModelBuilder::GetUniqueName.</param>
/// <param name="element_type">onnx::TensorProto_DataType element type of the output.
/// int32_t as that is what TensorShapeProto uses to store the value.</param>
/// <param name="shape">Shape of the output if known.</param>
void AddIntermediateOperationOutput(COREML_SPEC::MILSpec::Operation& op, std::string_view output_name,
int32_t element_type, std::optional<gsl::span<const int64_t>> shape);

/// <summary>
/// Add an output to a MILSpec::Operation. Name, data type and shape are used from the NodeArg.
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "core/providers/common.h"
#include "core/providers/coreml/builders/helper.h"
#include "core/providers/coreml/builders/impl/base_op_builder.h"
#include "core/providers/coreml/builders/impl/builder_utils.h"
#include "core/providers/coreml/builders/model_builder.h"
#include "core/providers/coreml/builders/op_builder_factory.h"
#include "core/providers/coreml/shape_utils.h"
Expand All @@ -18,27 +19,51 @@

bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
const logging::Logger& logger) const override;

bool SupportsMLProgram() const override { return true; }
};

Status ConcatOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
const Node& node,
const logging::Logger& logger) const {
std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);

layer->mutable_concat()->set_sequenceconcat(false);

for (const auto* input : node.InputDefs()) {
LOGS(logger, VERBOSE) << "input name " << input->Name();
*layer->mutable_input()->Add() = input->Name();
#if defined(COREML_ENABLE_MLPROGRAM)
if (model_builder.CreateMLProgram()) {
using namespace CoreML::Specification::MILSpec; // NOLINT

NodeAttrHelper helper(node);
const auto axis = helper.GetInt64("axis"); // required
const auto interleave = false;

std::unique_ptr<Operation> op = model_builder.CreateOperation(node, "concat");
std::vector<std::string_view> input_names;
for (const auto* input : node.InputDefs()) {
input_names.emplace_back(input->Name());
}
AddOperationVariadicInput(*op, "values", input_names);
AddOperationInput(*op, "axis", model_builder.AddScalarConstant(op->type(), "axis", *axis));
AddOperationInput(*op, "interleave", model_builder.AddScalarConstant(op->type(), "interleave", interleave));
AddOperationOutput(*op, *node.OutputDefs()[0]);
model_builder.AddOperation(std::move(op));
} else // NOLINT
#endif // defined(COREML_ENABLE_MLPROGRAM)
{
std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);

layer->mutable_concat()->set_sequenceconcat(false);

for (const auto* input : node.InputDefs()) {
LOGS(logger, VERBOSE) << "input name " << input->Name();
*layer->mutable_input()->Add() = input->Name();
}

*layer->mutable_output()->Add() = node.OutputDefs()[0]->Name();

model_builder.AddLayer(std::move(layer));
}

*layer->mutable_output()->Add() = node.OutputDefs()[0]->Name();

model_builder.AddLayer(std::move(layer));
return Status::OK();
}

bool ConcatOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& /* input_params */,
bool ConcatOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
const logging::Logger& logger) const {
const auto& input_defs = node.InputDefs();
if (input_defs.size() < 2) {
Expand All @@ -50,23 +75,25 @@
if (!GetShape(*input_defs[0], input_shape, logger))
return false;

auto rank = input_shape.size();
if (rank != 4) {
// For some reason, the concat in CoreML running on 3d tensor will concat on wrong axis
// Instead of concat on axis 0, it will concat on axis 1
// Disable Concat support for 3d tensor for now
// TODO, add ExpandDims and Squeeze, 3d -ExpandDims-> 4d -> Concat -Squeeze-> 3d
LOGS(logger, VERBOSE) << "Concat only support 4d shape for now, input is "
<< rank << "d shape";
return false;
}

NodeAttrHelper helper(node);
auto axis = static_cast<size_t>(HandleNegativeAxis(helper.Get("axis", 1), rank));
if (rank != axis + 3) {
LOGS(logger, VERBOSE) << "Concat only support axis to be -3, actual axis: " << axis
<< ", actual rank: " << rank;
return false;
if (!input_params.create_mlprogram) {
auto rank = input_shape.size();
if (rank != 4) {
// For some reason, the concat in CoreML running on 3d tensor will concat on wrong axis
// Instead of concat on axis 0, it will concat on axis 1
// Disable Concat support for 3d tensor for now
// TODO: add ExpandDims and Squeeze, 3d -ExpandDims-> 4d -> Concat -Squeeze-> 3d

Check warning on line 84 in onnxruntime/core/providers/coreml/builders/impl/concat_op_builder.cc

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2] Raw Output: onnxruntime/core/providers/coreml/builders/impl/concat_op_builder.cc:84: Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2]
LOGS(logger, VERBOSE) << "Concat only support 4d shape for now, input is "
<< rank << "d shape";
return false;
}

NodeAttrHelper helper(node);
auto axis = static_cast<size_t>(HandleNegativeAxis(helper.Get("axis", 1), rank));
if (rank != axis + 3) {
LOGS(logger, VERBOSE) << "Concat only support axis to be -3, actual axis: " << axis
<< ", actual rank: " << rank;
return false;
}
}

return true;
Expand Down
Loading
Loading