[CoreML] Add support for int64 (#24462)

carzh · web-flow · commit a470da1e6552 · 2025-04-30T12:01:46.000-07:00
### Description Add int64 as a supported datatype for moving nodes to the CoreML EP. We already convert constants automatically from int64 to int32 for CoreML by calling narrow. Adding the conversion for outputs as well. ### Motivation and Context - More nodes supported on CoreML ### Note on the Unsqueeze op According to #22975 there is a bug with the Unsqueeze op with scalar inputs on x86. I was running into a bug for unsqueezes that unsqueezed a scalar input to a tensor of shape [1] since CoreML doesn't support scalar values for MLProgram. I adapted the HandleX86ArchUnsqueeze method but alternatively, can replace with an identity operator or add some additional checks. I went with adapting the HandleX86ArchUnsqueeze method since it seemed like the fastest solution.
diff --git a/onnxruntime/core/providers/coreml/builders/impl/argmax_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/argmax_op_builder.cc
@@ -41,9 +41,7 @@ Status ArgMaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
     AddOperationInput(*op, "axis", model_builder.AddScalarConstant(op->type(), "axis", axis));
     AddOperationInput(*op, "keep_dims", model_builder.AddScalarConstant(op->type(), "keep_dims", bool(keepdims)));
 
-    int32_t output_datatype = ONNX_NAMESPACE::TensorProto_DataType_INT32;
-    // the output of ArgMax must be int32
-    AddOperationOutput(*op, *node.OutputDefs()[0], output_datatype);
+    AddOperationOutput(*op, *node.OutputDefs()[0]);
     model_builder.AddOperation(std::move(op));
   } else {
     auto* coreml_argmax = layer->mutable_argmax();
diff --git a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc
@@ -115,8 +115,9 @@ bool BaseOpBuilder::IsInputDtypeSupport(const Node& node, size_t idx,
   }
 
 #if CAN_BUILD_COREML6_OR_LATER
-  // only MLProgram support FP16
-  if (input_params.create_mlprogram && input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16) {
+  // only MLProgram support FP16 and INT64
+  if (input_params.create_mlprogram && (input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16 ||
+                                        input_type == ONNX_NAMESPACE::TensorProto_DataType_INT64)) {
     return true;
   }
 #endif
diff --git a/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc
@@ -54,6 +54,17 @@ bool CheckIfBothInputShapesMatch(const Node& node, const logging::Logger& logger
                     y_shape_proto->dim().begin(), y_shape_proto->dim().end(),
                     dim_eq);
 }
+
+bool ShouldUseFloorDiv(const Node& node, const logging::Logger& logger) {
+  // since ONNX spec requires both inputs to have the same type, we only need
+  // to check the first input type
+  const auto& input0 = *node.InputDefs()[0];
+  int32_t input_type0 = ONNX_NAMESPACE::TensorProto_DataType_UNDEFINED;
+  GetType(input0, input_type0, logger);
+
+  return input_type0 == ONNX_NAMESPACE::TensorProto_DataType_INT32 ||
+         input_type0 == ONNX_NAMESPACE::TensorProto_DataType_INT64;
+}
 }  // namespace
 
 static std::vector<int64_t> InferOutputShape(const std::vector<int64_t>& a, const std::vector<int64_t>& b) {
@@ -131,9 +142,13 @@ Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
     } else if (op_type == "Sub") {
       coreml_op_type = "sub";
     } else if (op_type == "Div") {
-      // we support fp32/fp16 currently. when we add support for integers we need to check the type and use
-      // "floor_div" or "real_div" accordingly
-      coreml_op_type = "real_div";
+      // Use "floor_div" op for integer division (int32 or int64)
+      // use "real_div" for float division (fp16 or fp32)
+      if (ShouldUseFloorDiv(node, logger)) {
+        coreml_op_type = "floor_div";
+      } else {
+        coreml_op_type = "real_div";
+      }
     } else if (op_type == "Pow") {
       coreml_op_type = "pow";
     } else {
diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc
@@ -261,9 +261,10 @@ MILSpec::DataType OnnxDataTypeToMILSpec(int onnx_type) {
     case ONNX_NAMESPACE::TensorProto_DataType_INT16:
       return MILSpec::DataType::INT16;
     case ONNX_NAMESPACE::TensorProto_DataType_INT32:
-      return MILSpec::DataType::INT32;
     case ONNX_NAMESPACE::TensorProto_DataType_INT64:
-      return MILSpec::DataType::INT64;
+      // CoreML only supports int32 for its operations and can only produce int32 values so
+      // we convert any int64 to int32.
+      return MILSpec::DataType::INT32;
 
     case ONNX_NAMESPACE::TensorProto_DataType_UINT8:
       return MILSpec::DataType::UINT8;
@@ -367,19 +368,15 @@ void AddIntermediateOperationOutput(COREML_SPEC::MILSpec::Operation& op, std::st
   SetTensorTypeInfo(tensor_type, OnnxDataTypeToMILSpec(element_type), shape, /*convert_scalar*/ true);
 }
 
-void AddOperationOutput(COREML_SPEC::MILSpec::Operation& op, const NodeArg& output,
-                        std::optional<int32_t> override_element_type) {
+void AddOperationOutput(COREML_SPEC::MILSpec::Operation& op, const NodeArg& output) {
   auto& outputs = *op.mutable_outputs();
   auto& output_arg = *outputs.Add();
   output_arg.set_name(output.Name());
 
   MILSpec::ValueType& value = *output_arg.mutable_type();
   MILSpec::TensorType& tensor_type = *value.mutable_tensortype();
 
-  auto elem_type = override_element_type ? *override_element_type
-                                         : output.TypeAsProto()->tensor_type().elem_type();
-
-  SetTensorTypeInfo(tensor_type, OnnxDataTypeToMILSpec(elem_type), output.Shape(), /*convert_scalar*/ true);
+  SetTensorTypeInfo(tensor_type, OnnxDataTypeToMILSpec(output.TypeAsProto()->tensor_type().elem_type()), output.Shape(), /*convert_scalar*/ true);
 }
 
 void AddPadTypeAndPads(COREML_SPEC::MILSpec::Operation& op, ModelBuilder& model_builder, std::string_view op_type,
diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h
@@ -98,6 +98,7 @@ COREML_SPEC::MILSpec::DataType DataTypeToMILSpec() {
 
 // The TensorProto.data_type field is an int, but must be a valid TensorProto_DataType value.
 // Use int for the arg so the caller can pass TensorProto.data_type() value and do the cast to enum internally
+// This method also automatically converts int64 to int32 since only int32 is supported for CoreML operations.
 COREML_SPEC::MILSpec::DataType OnnxDataTypeToMILSpec(int onnx_type);
 
 /// <summary>
@@ -156,12 +157,7 @@ void AddIntermediateOperationOutput(COREML_SPEC::MILSpec::Operation& op, std::st
 /// </summary>
 /// <param name="op">Operation to update.</param>
 /// <param name="output">NodeArg with details of output to add.</param>
-/// <param name="override_element_type">
-///   Override the element type. Only set to handle cases where we believe the data at runtime will be int32 but
-///   the original ONNX node has type int64.
-/// </param>
-void AddOperationOutput(COREML_SPEC::MILSpec::Operation& op, const NodeArg& output,
-                        std::optional<int32_t> override_element_type = std::nullopt);
+void AddOperationOutput(COREML_SPEC::MILSpec::Operation& op, const NodeArg& output);
 
 /// <summary>
 /// Add pad_type and pad values.
diff --git a/onnxruntime/core/providers/coreml/builders/impl/cast_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/cast_op_builder.cc
@@ -44,7 +44,6 @@ Status CastOpBuilder::AddToModelBuilderImpl([[maybe_unused]] ModelBuilder& model
       // CoreML operators can only produce int32 and not int64 values.
       // Due to that there should be no actual int64 values inside the CoreML model and we can infer any
       // ONNX_NAMESPACE::TensorProto::INT64 values to be int32.
-      cast_to_type = ONNX_NAMESPACE::TensorProto::INT32;
     } else if (cast_to_type == ONNX_NAMESPACE::TensorProto::FLOAT) {
       to_dtype = "fp32";
     } else if (cast_to_type == ONNX_NAMESPACE::TensorProto::FLOAT16) {
@@ -69,7 +68,7 @@ Status CastOpBuilder::AddToModelBuilderImpl([[maybe_unused]] ModelBuilder& model
     if (op_type == "cast") {
       AddOperationInput(*op, "dtype", model_builder.AddScalarConstant(op->type(), "dtype", std::string(to_dtype)));
     }
-    AddOperationOutput(*op, *node.OutputDefs()[0], cast_to_type);
+    AddOperationOutput(*op, *node.OutputDefs()[0]);
     model_builder.AddOperation(std::move(op));
   }
 
diff --git a/onnxruntime/core/providers/coreml/builders/impl/gather_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/gather_op_builder.cc
@@ -35,23 +35,14 @@ Status GatherOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
     using CoreML::Specification::MILSpec::Operation;
     std::unique_ptr<Operation> op = model_builder.CreateOperation(node, "gather");
 
-    std::optional<int32_t> output_datatype;
-
-    int32_t input_type;
-    ORT_RETURN_IF_NOT(GetType(*node.InputDefs()[0], input_type, logger), "Failed to get input type");
-
-    if (input_type == ONNX_NAMESPACE::TensorProto_DataType_INT64) {
-      output_datatype = ONNX_NAMESPACE::TensorProto_DataType_INT32;
-    }
-
     const auto axis = GetAxisAttribute(node);
     // coreml docs claims validate_indices is optional but in practice it is required
     const auto validate_indices = false;
     AddOperationInput(*op, "x", node.InputDefs()[0]->Name());                                   // data
     AddOperationInput(*op, "indices", node.InputDefs()[1]->Name());                             // indices
     AddOperationInput(*op, "axis", model_builder.AddScalarConstant(op->type(), "axis", axis));  // axis attr
     AddOperationInput(*op, "validate_indices", model_builder.AddScalarConstant(op->type(), "validate_indices", validate_indices));
-    AddOperationOutput(*op, *node.OutputDefs()[0], output_datatype);  // output
+    AddOperationOutput(*op, *node.OutputDefs()[0]);  // output
     model_builder.AddOperation(std::move(op));
   } else {
     auto layer = model_builder.CreateNNLayer(node);
diff --git a/onnxruntime/core/providers/coreml/builders/impl/pad_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/pad_op_builder.cc
@@ -150,6 +150,14 @@ bool PadOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParam
       LOGS(logger, VERBOSE) << "constant_value must be a constant initializer.";
       return false;
     }
+
+    int32_t constant_value_type;
+    GetType(*input_defs[2], constant_value_type, logger);
+
+    if (constant_value_type != ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
+      LOGS(logger, VERBOSE) << "Only float constant_value is supported, got type: " << constant_value_type;
+      return false;
+    }
   }
 
   {
diff --git a/onnxruntime/core/providers/coreml/builders/impl/shape_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/shape_op_builder.cc
@@ -56,10 +56,10 @@ Status ShapeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
       std::vector<int64_t> sizes = {size};
       AddOperationInput(*slice_op, "begin", model_builder.AddConstant(slice_op->type(), "begin", starts));
       AddOperationInput(*slice_op, "size", model_builder.AddConstant(slice_op->type(), "size", sizes));
-      AddOperationOutput(*slice_op, *node.OutputDefs()[0], output_datatype);
+      AddOperationOutput(*slice_op, *node.OutputDefs()[0]);
       model_builder.AddOperation(std::move(slice_op));
     } else {
-      AddOperationOutput(*op, *node.OutputDefs()[0], output_datatype);
+      AddOperationOutput(*op, *node.OutputDefs()[0]);
       model_builder.AddOperation(std::move(op));
     }
   } else {
@@ -127,7 +127,8 @@ bool ShapeOpBuilder::HasSupportedInputsImpl(const Node& node,
   if (input_params.create_mlprogram) {
     if ((input_type == ONNX_NAMESPACE::TensorProto_DataType_INT32 ||
          input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT ||
-         input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16)) {
+         input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16 ||
+         input_type == ONNX_NAMESPACE::TensorProto_DataType_INT64)) {
       return true;
     } else {
       LOGS(logger, VERBOSE) << "[" << node.OpType()
diff --git a/onnxruntime/core/providers/coreml/builders/impl/slice_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/slice_op_builder.cc
@@ -143,21 +143,6 @@ Status SliceOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
       }
     }
 
-    // Int32, float and float16 are supported by CoreML slice_by_index.
-    // We convert any int64 model input to int32 when running the CoreML model for the partition.
-    // Any other integer data created at runtime is the output from CoreML operations, and should int32 not int64.
-    // Based on that, we assume that the actual input when running will be int32, so we override the output data
-    // type to reflect this.
-    // If we were to leave it as TensorProto_DataType_INT64 the CoreML model would be invalid.
-    std::optional<int32_t> output_datatype;
-
-    int32_t input_type;
-    ORT_RETURN_IF_NOT(GetType(*node.InputDefs()[0], input_type, logger), "Failed to get input type");
-
-    if (input_type == ONNX_NAMESPACE::TensorProto_DataType_INT64) {
-      output_datatype = ONNX_NAMESPACE::TensorProto_DataType_INT32;
-    }
-
     auto op = model_builder.CreateOperation(node, "slice_by_index");
 
     auto begin = model_builder.AddConstant(op->type(), "begin", AsSpan(compute_metadata.starts_));
@@ -173,7 +158,7 @@ Status SliceOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
     AddOperationInput(*op, "begin_mask", begin_mask);
     AddOperationInput(*op, "end_mask", end_mask);
 
-    AddOperationOutput(*op, *output_defs[0], output_datatype);
+    AddOperationOutput(*op, *output_defs[0]);
 
     model_builder.AddOperation(std::move(op));
 
diff --git a/onnxruntime/core/providers/coreml/builders/impl/squeeze_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/squeeze_op_builder.cc
@@ -58,8 +58,8 @@ void SqueezeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const
   }
 }
 
-void HandleX86ArchUnsqueezeScalarInput(ModelBuilder& model_builder,
-                                       const Node& node, const logging::Logger& logger) {
+void HandleUnsqueezeScalarInput(ModelBuilder& model_builder,
+                                const Node& node, const logging::Logger& logger) {
   const auto& input_defs(node.InputDefs());
   TensorShapeVector axes;
   GetAxes(model_builder, node, axes);
@@ -86,13 +86,14 @@ Status SqueezeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
   if (model_builder.CreateMLProgram()) {
     using namespace CoreML::Specification::MILSpec;
 
-#if defined(TARGET_CPU_X86_64) && TARGET_CPU_X86_64
-    // expand_dims has limited requirements for static shape, however, X86_64 has a bug that it can't handle scalar input
+    // MLProgram does not support scalar values -- we convert the scalars to 1D tensors.
+    // So there is a bug when we attempt to unsqueeze what is a
+    // scalar value in the ONNX graph to a 1D tensor.
     if (node.OpType() == "Unsqueeze" && input_defs[0]->Shape()->dim_size() < 2) {
-      HandleX86ArchUnsqueezeScalarInput(model_builder, node, logger);
+      HandleUnsqueezeScalarInput(model_builder, node, logger);
       return Status::OK();
     }
-#endif
+
     std::string_view coreml_op_type = node.OpType() == "Squeeze" ? "squeeze" : "expand_dims";
     std::unique_ptr<Operation> op = model_builder.CreateOperation(node, coreml_op_type);
     AddOperationInput(*op, "x", input_defs[0]->Name());
diff --git a/onnxruntime/core/providers/coreml/builders/model_builder.cc b/onnxruntime/core/providers/coreml/builders/model_builder.cc
@@ -382,10 +382,7 @@ MILSpec::Value OnnxTensorToCoreMLTensor(const ONNX_NAMESPACE::TensorProto& tenso
   MILSpec::ValueType& value_type = *value.mutable_type();
   MILSpec::TensorType& tensor_type = *value_type.mutable_tensortype();
   MILSpec::DataType data_type = OnnxDataTypeToMILSpec(tensor_proto.data_type());
-  MILSpec::DataType converted_data_type = data_type == MILSpec::DataType::INT64
-                                              ? MILSpec::DataType::INT32
-                                              : data_type;
-  tensor_type.set_datatype(converted_data_type);
+  tensor_type.set_datatype(data_type);
 
   tensor_type.set_rank(tensor_proto.dims().size());
   for (const auto& dim : tensor_proto.dims()) {
@@ -931,11 +928,9 @@ Status ModelBuilder::RegisterModelInputOutput(const NodeArg& node_arg, bool is_i
       // the model inputs need to be wired up as args to the 'main' function.
       auto tensor_value_type = CreateNamedTensorValueType(node_arg, /*convert_scalar*/ true);
 
-      // we need to convert int64 to int32 here as well
-      if (data_type == ONNX_NAMESPACE::TensorProto_DataType_INT64) {
-        tensor_value_type.mutable_type()->mutable_tensortype()->set_datatype(
-            OnnxDataTypeToMILSpec(ONNX_NAMESPACE::TensorProto_DataType_INT32));
-      }
+      // Handle conversion from int64 to int32
+      tensor_value_type.mutable_type()->mutable_tensortype()->set_datatype(
+          OnnxDataTypeToMILSpec(data_type));
 
       tensor_value_type.set_name(name);
 

Original file line number	Diff line number	Diff line change
`@@ -115,8 +115,9 @@ bool BaseOpBuilder::IsInputDtypeSupport(const Node& node, size_t idx,`
`115`	`115`	`}`
`116`	`116`
`117`	`117`	`#if CAN_BUILD_COREML6_OR_LATER`
`118`		`- // only MLProgram support FP16`
`119`		`- if (input_params.create_mlprogram && input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16) {`
	`118`	`+ // only MLProgram support FP16 and INT64`
	`119`	`+ if (input_params.create_mlprogram && (input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16 \|\|`
	`120`	`+ input_type == ONNX_NAMESPACE::TensorProto_DataType_INT64)) {`
`120`	`121`	`return true;`
`121`	`122`	`}`
`122`	`123`	`#endif`
Original file line number	Diff line number	Diff line change
`@@ -150,6 +150,14 @@ bool PadOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParam`
`150`	`150`	`LOGS(logger, VERBOSE) << "constant_value must be a constant initializer.";`
`151`	`151`	`return false;`
`152`	`152`	`}`
	`153`	`+`
	`154`	`+ int32_t constant_value_type;`
	`155`	`+ GetType(*input_defs[2], constant_value_type, logger);`
	`156`	`+`
	`157`	`+ if (constant_value_type != ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {`
	`158`	`+ LOGS(logger, VERBOSE) << "Only float constant_value is supported, got type: " << constant_value_type;`
	`159`	`+ return false;`
	`160`	`+ }`
`153`	`161`	`}`
`154`	`162`
`155`	`163`	`{`