microsoft · skottmckay · Jul 25, 2024 · Jul 24, 2024 · Jul 24, 2024 · Jul 24, 2024
diff --git a/onnxruntime/core/framework/allocation_planner.cc b/onnxruntime/core/framework/allocation_planner.cc
@@ -225,7 +225,8 @@ class PlannerImpl {
   }
 
   int& UseCount(OrtValueIndex n) {
-    ORT_ENFORCE(n >= 0 && static_cast<size_t>(n) < ort_value_info_.size(), "invalid value index: ", n, " against size ", ort_value_info_.size());
+    ORT_ENFORCE(n >= 0 && static_cast<size_t>(n) < ort_value_info_.size(),
+                "invalid value index: ", n, " against size ", ort_value_info_.size());
     return ort_value_info_[n].usecount;
   }
   int& UseCount(const OrtValueName& name) { return UseCount(Index(name)); }
@@ -642,9 +643,21 @@ class PlannerImpl {
     }
 
     // All initializers should be treated as input
+    //
+    // Special case: ORT format model where an EP takes nodes and copies initializers into the compiled model.
+    // Those initializers become unused so don't end up in ort_value_name_idx_map_, but as we don't run
+    // Graph::Resolve with an ORT format model they will still exist in GetAllInitializedTensors.
+    // We can ignore lookup failures in this case.
+    const bool unresolved_graph = graph_viewer_.GetGraph().GraphResolveNeeded();
     for (const auto& pair : graph_viewer_.GetAllInitializedTensors()) {
       const auto& initializer_name = pair.first;
-      UseCount(initializer_name)++;
+      OrtValueIndex index = -1;
+      auto status = ort_value_name_idx_map_.GetIdx(initializer_name, index);
+      if (status.IsOK()) {
+        UseCount(initializer_name)++;
+      } else {
+        ORT_ENFORCE(unresolved_graph, status.ErrorMessage());
+      }
     }
 
     for (auto& stream_execution_order : stream_nodes_) {
@@ -709,10 +722,21 @@ class PlannerImpl {
     }
 
     // All initializers should be treated as input
+    //
+    // Special case: ORT format model where an EP takes nodes and copies initializers into the compiled model.
+    // Those initializers become unused so don't end up in ort_value_name_idx_map_, but as we don't run
+    // Graph::Resolve with an ORT format model they will still exist in GetAllInitializedTensors.
+    // We can ignore lookup failures in this case.
+    const bool unresolved_graph = graph_viewer_.GetGraph().GraphResolveNeeded();
     for (const auto& pair : graph_viewer_.GetAllInitializedTensors()) {
       const auto& initializer_name = pair.first;
-      OrtValueIndex index = Index(initializer_name);
-      ProcessDef(index, graph_viewer_.GetNodeArg(pair.first));
+      OrtValueIndex index = -1;
+      auto status = ort_value_name_idx_map_.GetIdx(initializer_name, index);
+      if (status.IsOK()) {
+        ProcessDef(index, graph_viewer_.GetNodeArg(initializer_name));
+      } else {
+        ORT_ENFORCE(unresolved_graph, status.ErrorMessage());
+      }
     }
 
     InlinedHashSet<OrtValueIndex> set_node_arg_has_explicit_consumer;

diff --git a/onnxruntime/core/framework/session_state_utils.cc b/onnxruntime/core/framework/session_state_utils.cc
@@ -228,9 +228,22 @@ common::Status SaveInitializedTensors(
   id_to_initialized_tensor.reserve(initialized_tensor_set.size());
   user_supplied_initializer_ids.reserve(initialized_tensor_set.size());
 
+  // Special case: ORT format model where an EP takes nodes and copies initializers into the compiled model.
+  // Those initializers become unused so don't end up in ort_value_name_idx_map, but as we don't run
+  // Graph::Resolve with an ORT format model they will still exist in GetAllInitializedTensors.
+  // We can ignore lookup failures in this case.
+  const bool unresolved_graph = graph.GetGraph().GraphResolveNeeded();
   for (const auto& entry : initialized_tensor_set) {
     int ort_value_index;
-    ORT_RETURN_IF_ERROR(ort_value_name_idx_map.GetIdx(entry.first, ort_value_index));
+
+    if (auto status = ort_value_name_idx_map.GetIdx(entry.first, ort_value_index); !status.IsOK()) {
+      if (unresolved_graph) {
+        continue;
+      }
+
+      return status;
+    }
+
     if (use_user_supplied_initializer(entry.first)) {
       user_supplied_initializer_ids.insert(ort_value_index);
     }

diff --git a/onnxruntime/core/providers/coreml/builders/impl/activation_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/activation_op_builder.cc
@@ -83,19 +83,30 @@
     using namespace CoreML::Specification::MILSpec;
     // https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html#module-coremltools.converters.mil.mil.ops.defs.iOS15.activation
     std::string_view coreml_op_type;
+    bool add_alpha = false;
     if (op_type == "Sigmoid") {
       coreml_op_type = "sigmoid";
     } else if (op_type == "Tanh") {
       coreml_op_type = "tanh";
     } else if (op_type == "Relu") {
       coreml_op_type = "relu";
+    } else if (op_type == "LeakyRelu") {
+      coreml_op_type = "leaky_relu";
+      add_alpha = true;
     } else {
       return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                              "ActivationOpBuilder::AddToModelBuilderImpl, unknown op: ", op_type);
     }
 
     std::unique_ptr<Operation> op = model_builder.CreateOperation(node, coreml_op_type);
     AddOperationInput(*op, "x", node.InputDefs()[0]->Name());
+
+    if (add_alpha) {
+      NodeAttrHelper helper(node);
+      const auto alpha = helper.Get("alpha", 0.01f);
+      AddOperationInput(*op, "alpha", model_builder.AddScalarConstant(op->type(), "alpha", alpha));
+    }
+
     AddOperationOutput(*op, *node.OutputDefs()[0]);
 
     model_builder.AddOperation(std::move(op));
@@ -198,7 +209,7 @@
 
 #if defined(COREML_ENABLE_MLPROGRAM)
   if (input_params.create_mlprogram) {
-    if (op_type == "PRelu" || op_type == "LeakyRelu") {
+    if (op_type == "PRelu") {  // TODO: ML Program supports this so should be easy to enable
       return false;
     }
   } else

diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc
@@ -314,6 +314,28 @@
   (*op.mutable_inputs())[input_name] = std::move(arg);
 }
 
+void AddOperationInputs(MILSpec::Operation& op, std::string_view input_name,
+                        const std::vector<std::string_view>& value_names) {
+  MILSpec::Argument arg;
+  for (const auto& value : value_names) {
+    arg.mutable_arguments()->Add()->set_name(std::string(value));
+  }
+
+  (*op.mutable_inputs())[input_name] = std::move(arg);
+}
+
+void AddIntermediateOperationOutput(COREML_SPEC::MILSpec::Operation& op, const std::string& output_name,
+                                    int32_t element_type, std::optional<gsl::span<const int64_t>> shape) {
+  auto& outputs = *op.mutable_outputs();
+  auto& output_arg = *outputs.Add();
+  output_arg.set_name(output_name);
+
+  MILSpec::ValueType& value = *output_arg.mutable_type();
+  MILSpec::TensorType& tensor_type = *value.mutable_tensortype();
+
+  SetTensorTypeInfo(tensor_type, OnnxDataTypeToMILSpec(element_type), shape, /*convert_scalar*/ true);
+}
+
 void AddOperationOutput(COREML_SPEC::MILSpec::Operation& op, const NodeArg& output,
                         std::optional<int32_t> override_element_type) {
   auto& outputs = *op.mutable_outputs();

diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h
@@ -129,6 +129,26 @@
 void AddOperationInput(COREML_SPEC::MILSpec::Operation& op,
                        std::string_view input_name, std::string_view value_name);
 
+/// <summary>
+/// Add a variadic input argument to a MILSpec::Operation
+/// </summary>
+/// <param name="op">Operation to update.</param>
+/// <param name="input name">The input name defined by the spec for the operation. </param>
+/// <param name="value_names">The input value names.</param>
+void AddOperationInputs(COREML_SPEC::MILSpec::Operation& op, std::string_view input_name,
+                        const std::vector<std::string_view>& value_names);
+
+/// Add an output to a MILSpec::Operation for an intermediate operation when the implementation is composed of
+/// multiple MLProgram operations. In this case we don't have a NodeArg for the output.
+/// </summary>
+/// <param name="op">Operation to update.</param>
+/// <param name="output_name">Name of the intermediate output. Create using ModelBuilder::GetUniqueName.</param>
+/// <param name="element_type">onnx::TensorProto_DataType element type of the output.
+///   int32_t as that is what TensorShapeProto uses to store the value.</param>
+/// <param name="shape">Shape of the output if known.</param>
+void AddIntermediateOperationOutput(COREML_SPEC::MILSpec::Operation& op, const std::string& output_name,
+                                    int32_t element_type, std::optional<gsl::span<const int64_t>> shape);
+
 /// <summary>
 /// Add an output to a MILSpec::Operation. Name, data type and shape are used from the NodeArg.
 /// </summary>

diff --git a/onnxruntime/core/providers/coreml/builders/impl/concat_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/concat_op_builder.cc
@@ -4,6 +4,7 @@
 #include "core/providers/common.h"
 #include "core/providers/coreml/builders/helper.h"
 #include "core/providers/coreml/builders/impl/base_op_builder.h"
+#include "core/providers/coreml/builders/impl/builder_utils.h"
 #include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/coreml/builders/op_builder_factory.h"
 #include "core/providers/coreml/shape_utils.h"
@@ -18,27 +19,52 @@
 
   bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                          const logging::Logger& logger) const override;
+
+  bool SupportsMLProgram() const override { return true; }
 };
 
 Status ConcatOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
                                               const Node& node,
                                               const logging::Logger& logger) const {
-  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
-
-  layer->mutable_concat()->set_sequenceconcat(false);
-
-  for (const auto* input : node.InputDefs()) {
-    LOGS(logger, VERBOSE) << "input name " << input->Name();
-    *layer->mutable_input()->Add() = input->Name();
+#if defined(COREML_ENABLE_MLPROGRAM)
+  if (model_builder.CreateMLProgram()) {
+    using namespace CoreML::Specification::MILSpec;  // NOLINT
+
+    NodeAttrHelper helper(node);
+    const auto axis = helper.GetInt64("axis");  // required
+    const auto interleave = false;
+
+    std::unique_ptr<Operation> op = model_builder.CreateOperation(node, "concat");
+    std::vector<std::string_view> input_names;
+    for (const auto* input : node.InputDefs()) {
+      input_names.emplace_back(input->Name());
+    }
+    AddOperationInputs(*op, "values", input_names);
+    AddOperationInput(*op, "axis", model_builder.AddScalarConstant(op->type(), "axis", *axis));
+    AddOperationInput(*op, "interleave", model_builder.AddScalarConstant(op->type(), "interleave", interleave));
+    AddOperationOutput(*op, *node.OutputDefs()[0]);
+    model_builder.AddOperation(std::move(op));
+
+  } else
+#endif  // defined(COREML_ENABLE_MLPROGRAM)
+  {
+    std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
+
+    layer->mutable_concat()->set_sequenceconcat(false);
+
+    for (const auto* input : node.InputDefs()) {
+      LOGS(logger, VERBOSE) << "input name " << input->Name();
+      *layer->mutable_input()->Add() = input->Name();
+    }
+
+    *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name();
+
+    model_builder.AddLayer(std::move(layer));
   }
-
-  *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name();
-
-  model_builder.AddLayer(std::move(layer));
   return Status::OK();
 }
 
-bool ConcatOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& /* input_params */,
+bool ConcatOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                                         const logging::Logger& logger) const {
   const auto& input_defs = node.InputDefs();
   if (input_defs.size() < 2) {
@@ -50,23 +76,25 @@
   if (!GetShape(*input_defs[0], input_shape, logger))
     return false;
 
-  auto rank = input_shape.size();
-  if (rank != 4) {
-    // For some reason, the concat in CoreML running on 3d tensor will concat on wrong axis
-    // Instead of concat on axis 0, it will concat on axis 1
-    // Disable Concat support for 3d tensor for now
-    // TODO, add ExpandDims and Squeeze, 3d -ExpandDims-> 4d -> Concat -Squeeze-> 3d
-    LOGS(logger, VERBOSE) << "Concat only support 4d shape for now, input is "
-                          << rank << "d shape";
-    return false;
-  }
-
-  NodeAttrHelper helper(node);
-  auto axis = static_cast<size_t>(HandleNegativeAxis(helper.Get("axis", 1), rank));
-  if (rank != axis + 3) {
-    LOGS(logger, VERBOSE) << "Concat only support axis to be -3, actual axis: " << axis
-                          << ", actual rank: " << rank;
-    return false;
+  if (!input_params.create_mlprogram) {
+    auto rank = input_shape.size();
+    if (rank != 4) {
+      // For some reason, the concat in CoreML running on 3d tensor will concat on wrong axis
+      // Instead of concat on axis 0, it will concat on axis 1
+      // Disable Concat support for 3d tensor for now
+      // TODO, add ExpandDims and Squeeze, 3d -ExpandDims-> 4d -> Concat -Squeeze-> 3d
+      LOGS(logger, VERBOSE) << "Concat only support 4d shape for now, input is "
+                            << rank << "d shape";
+      return false;
+    }
+
+    NodeAttrHelper helper(node);
+    auto axis = static_cast<size_t>(HandleNegativeAxis(helper.Get("axis", 1), rank));
+    if (rank != axis + 3) {
+      LOGS(logger, VERBOSE) << "Concat only support axis to be -3, actual axis: " << axis
+                            << ", actual rank: " << rank;
+      return false;
+    }
   }
 
   return true;