[WebNN EP] Enable IO Bindings with MLTensor (microsoft#21301)

### Description Enables using the MLTensor to pass data between models. ### Motivation and Context Using MLTensor instead of ArrayBuffers reduces the number of copies between the CPU and devices as well as the renderer and GPU process in Chromium.
axinging · Sep 28, 2024 · 52a8c1c · 52a8c1c
1 parent ebda23b
commit 52a8c1c
Show file tree

Hide file tree

Showing 33 changed files with 1,287 additions and 73 deletions.
diff --git a/include/onnxruntime/core/framework/allocator.h b/include/onnxruntime/core/framework/allocator.h
@@ -53,6 +53,7 @@ constexpr const char* OpenVINO_GPU = "OpenVINO_GPU";
 constexpr const char* OpenVINO_RT = "OpenVINO_RT";
 constexpr const char* OpenVINO_RT_NPU = "OpenVINO_RT_NPU";
 constexpr const char* WEBGPU_BUFFER = "WebGPU_Buffer";
+constexpr const char* WEBNN_TENSOR = "WebNN_Tensor";
 
 constexpr size_t kAllocAlignment = 256;
 

diff --git a/js/common/lib/tensor-factory-impl.ts b/js/common/lib/tensor-factory-impl.ts
@@ -11,6 +11,7 @@ import {
   TensorFromImageBitmapOptions,
   TensorFromImageDataOptions,
   TensorFromImageElementOptions,
+  TensorFromMLTensorOptions,
   TensorFromTextureOptions,
   TensorFromUrlOptions,
 } from './tensor-factory.js';
@@ -310,6 +311,17 @@ export const tensorFromGpuBuffer = <T extends TensorInterface.GpuBufferDataTypes
   return new Tensor({ location: 'gpu-buffer', type: dataType ?? 'float32', gpuBuffer, dims, download, dispose });
 };
 
+/**
+ * implementation of Tensor.fromMLTensor().
+ */
+export const tensorFromMLTensor = <T extends TensorInterface.MLTensorDataTypes>(
+  mlTensor: TensorInterface.MLTensorType,
+  options: TensorFromMLTensorOptions<T>,
+): Tensor => {
+  const { dataType, dims, download, dispose } = options;
+  return new Tensor({ location: 'ml-tensor', type: dataType ?? 'float32', mlTensor, dims, download, dispose });
+};
+
 /**
  * implementation of Tensor.fromPinnedBuffer().
  */

diff --git a/js/common/lib/tensor-factory.ts b/js/common/lib/tensor-factory.ts
@@ -86,6 +86,20 @@ export interface GpuBufferConstructorParameters<T extends Tensor.GpuBufferDataTy
   readonly gpuBuffer: Tensor.GpuBufferType;
 }
 
+export interface MLTensorConstructorParameters<T extends Tensor.MLTensorDataTypes = Tensor.MLTensorDataTypes>
+  extends CommonConstructorParameters<T>,
+    GpuResourceConstructorParameters<T> {
+  /**
+   * Specify the location of the data to be 'ml-tensor'.
+   */
+  readonly location: 'ml-tensor';
+
+  /**
+   * Specify the WebNN MLTensor that holds the tensor data.
+   */
+  readonly mlTensor: Tensor.MLTensorType;
+}
+
 // #endregion
 
 // the following region contains type definitions of each individual options.
@@ -219,6 +233,15 @@ export interface TensorFromGpuBufferOptions<T extends Tensor.GpuBufferDataTypes>
   dataType?: T;
 }
 
+export interface TensorFromMLTensorOptions<T extends Tensor.MLTensorDataTypes>
+  extends Pick<Tensor, 'dims'>,
+    GpuResourceConstructorParameters<T> {
+  /**
+   * Describes the data type of the tensor.
+   */
+  dataType?: T;
+}
+
 // #endregion
 
 /**
@@ -336,6 +359,29 @@ export interface TensorFactory {
     options: TensorFromGpuBufferOptions<T>,
   ): TypedTensor<T>;
 
+  /**
+   * create a tensor from a WebNN MLTensor
+   *
+   * @param tensor - the MLTensor object to create tensor from
+   * @param options - An optional object representing options for creating tensor from a WebNN MLTensor.
+   *
+   * The options include following properties:
+   * - `dataType`: the data type of the tensor. If omitted, assume 'float32'.
+   * - `dims`: the dimension of the tensor. Required.
+   * - `download`: an optional function to download the tensor data from the MLTensor to CPU. If omitted, the MLTensor
+   * data will not be able to download. Usually, this is provided by the WebNN backend for the inference outputs.
+   * Users don't need to provide this function.
+   * - `dispose`: an optional function to dispose the tensor data on the WebNN MLTensor. If omitted, the MLTensor will
+   * not be disposed. Usually, this is provided by the WebNN backend for the inference outputs. Users don't need to
+   * provide this function.
+   *
+   * @returns a tensor object
+   */
+  fromMLTensor<T extends Tensor.MLTensorDataTypes>(
+    tensor: Tensor.MLTensorType,
+    options: TensorFromMLTensorOptions<T>,
+  ): TypedTensor<T>;
+
   /**
    * create a tensor from a pre-allocated buffer. The buffer will be used as a pinned buffer.
    *

diff --git a/js/common/lib/tensor-impl.ts b/js/common/lib/tensor-impl.ts
@@ -6,16 +6,19 @@ import { TensorToDataUrlOptions, TensorToImageDataOptions } from './tensor-conve
 import {
   tensorFromGpuBuffer,
   tensorFromImage,
+  tensorFromMLTensor,
   tensorFromPinnedBuffer,
   tensorFromTexture,
 } from './tensor-factory-impl.js';
 import {
   CpuPinnedConstructorParameters,
   GpuBufferConstructorParameters,
+  MLTensorConstructorParameters,
   TensorFromGpuBufferOptions,
   TensorFromImageBitmapOptions,
   TensorFromImageDataOptions,
   TensorFromImageElementOptions,
+  TensorFromMLTensorOptions,
   TensorFromTextureOptions,
   TensorFromUrlOptions,
   TextureConstructorParameters,
@@ -37,6 +40,7 @@ type TensorDataType = TensorInterface.DataType;
 type TensorDataLocation = TensorInterface.DataLocation;
 type TensorTextureType = TensorInterface.TextureType;
 type TensorGpuBufferType = TensorInterface.GpuBufferType;
+type TensorMLTensorType = TensorInterface.MLTensorType;
 
 /**
  * the implementation of Tensor interface.
@@ -86,6 +90,15 @@ export class Tensor implements TensorInterface {
    */
   constructor(params: GpuBufferConstructorParameters);
 
+  /**
+   * Construct a new tensor object from the WebNN MLTensor with the given type and dims.
+   *
+   * Tensor's location will be set to 'ml-tensor'.
+   *
+   * @param params - Specify the parameters to construct the tensor.
+   */
+  constructor(params: MLTensorConstructorParameters);
+
   /**
    * implementation.
    */
@@ -98,7 +111,8 @@ export class Tensor implements TensorInterface {
       | readonly boolean[]
       | CpuPinnedConstructorParameters
       | TextureConstructorParameters
-      | GpuBufferConstructorParameters,
+      | GpuBufferConstructorParameters
+      | MLTensorConstructorParameters,
     arg1?: TensorDataType | Uint8ClampedArray | readonly number[] | readonly string[] | readonly boolean[],
     arg2?: readonly number[],
   ) {
@@ -155,6 +169,25 @@ export class Tensor implements TensorInterface {
           this.disposer = arg0.dispose;
           break;
         }
+        case 'ml-tensor': {
+          if (
+            type !== 'float32' &&
+            type !== 'float16' &&
+            type !== 'int32' &&
+            type !== 'int64' &&
+            type !== 'uint32' &&
+            type !== 'uint64' &&
+            type !== 'int8' &&
+            type !== 'uint8' &&
+            type !== 'bool'
+          ) {
+            throw new TypeError(`unsupported type "${type}" to create tensor from MLTensor`);
+          }
+          this.mlTensorData = arg0.mlTensor;
+          this.downloader = arg0.download;
+          this.disposer = arg0.dispose;
+          break;
+        }
         default:
           throw new Error(`Tensor constructor: unsupported location '${this.dataLocation}'`);
       }
@@ -325,6 +358,13 @@ export class Tensor implements TensorInterface {
     return tensorFromGpuBuffer(gpuBuffer, options);
   }
 
+  static fromMLTensor<T extends TensorInterface.MLTensorDataTypes>(
+    mlTensor: TensorMLTensorType,
+    options: TensorFromMLTensorOptions<T>,
+  ): TensorInterface {
+    return tensorFromMLTensor(mlTensor, options);
+  }
+
   static fromPinnedBuffer<T extends TensorInterface.CpuPinnedDataTypes>(
     type: T,
     buffer: TensorInterface.DataTypeMap[T],
@@ -373,6 +413,11 @@ export class Tensor implements TensorInterface {
    */
   private gpuBufferData?: TensorGpuBufferType;
 
+  /**
+   * stores the underlying WebNN MLTensor when location is 'ml-tensor'. otherwise empty.
+   */
+  private mlTensorData?: TensorMLTensorType;
+
   /**
    * stores an optional downloader function to download data from GPU to CPU.
    */
@@ -420,6 +465,14 @@ export class Tensor implements TensorInterface {
     }
     return this.gpuBufferData;
   }
+
+  get mlTensor(): TensorMLTensorType {
+    this.ensureValid();
+    if (!this.mlTensorData) {
+      throw new Error('The data is not stored as a WebNN MLTensor.');
+    }
+    return this.mlTensorData;
+  }
   // #endregion
 
   // #region methods
@@ -431,7 +484,8 @@ export class Tensor implements TensorInterface {
       case 'cpu-pinned':
         return this.data;
       case 'texture':
-      case 'gpu-buffer': {
+      case 'gpu-buffer':
+      case 'ml-tensor': {
         if (!this.downloader) {
           throw new Error('The current tensor is not created with a specified data downloader.');
         }
@@ -472,6 +526,7 @@ export class Tensor implements TensorInterface {
     this.cpuData = undefined;
     this.gpuTextureData = undefined;
     this.gpuBufferData = undefined;
+    this.mlTensorData = undefined;
     this.downloader = undefined;
     this.isDownloading = undefined;
 

diff --git a/js/common/lib/tensor-utils-impl.ts b/js/common/lib/tensor-utils-impl.ts
@@ -4,6 +4,7 @@
 import {
   CpuPinnedConstructorParameters,
   GpuBufferConstructorParameters,
+  MLTensorConstructorParameters,
   TextureConstructorParameters,
 } from './tensor-factory.js';
 import { Tensor } from './tensor-impl.js';
@@ -56,6 +57,13 @@ export const tensorReshape = (tensor: Tensor, dims: readonly number[]): Tensor =
         type: tensor.type as GpuBufferConstructorParameters['type'],
         dims,
       });
+    case 'ml-tensor':
+      return new Tensor({
+        location: 'ml-tensor',
+        mlTensor: tensor.mlTensor,
+        type: tensor.type as MLTensorConstructorParameters['type'],
+        dims,
+      });
     default:
       throw new Error(`tensorReshape: tensor location ${tensor.location} is not supported`);
   }

diff --git a/js/common/lib/tensor.ts b/js/common/lib/tensor.ts
@@ -42,6 +42,13 @@ interface TypedTensorBase<T extends Tensor.Type> {
    */
   readonly gpuBuffer: Tensor.GpuBufferType;
 
+  /**
+   * Get the WebNN MLTensor that holds the tensor data.
+   *
+   * If the data is not in a WebNN MLTensor, throw error.
+   */
+  readonly mlTensor: Tensor.MLTensorType;
+
   /**
    * Get the buffer data of the tensor.
    *
@@ -136,15 +143,36 @@ export declare namespace Tensor {
    */
   export type GpuBufferType = { size: number; mapState: 'unmapped' | 'pending' | 'mapped' };
 
+  /**
+   * type alias for WebNN MLTensor
+   *
+   * The specification for WebNN's MLTensor is currently in flux.
+   */
+  export type MLTensorType = unknown;
+
   /**
    * supported data types for constructing a tensor from a WebGPU buffer
    */
   export type GpuBufferDataTypes = 'float32' | 'float16' | 'int32' | 'int64' | 'uint32' | 'uint8' | 'bool';
 
+  /**
+   * supported data types for constructing a tensor from a WebNN MLTensor
+   */
+  export type MLTensorDataTypes =
+    | 'float32'
+    | 'float16'
+    | 'int8'
+    | 'uint8'
+    | 'int32'
+    | 'uint32'
+    | 'int64'
+    | 'uint64'
+    | 'bool';
+
   /**
    * represent where the tensor data is stored
    */
-  export type DataLocation = 'none' | 'cpu' | 'cpu-pinned' | 'texture' | 'gpu-buffer';
+  export type DataLocation = 'none' | 'cpu' | 'cpu-pinned' | 'texture' | 'gpu-buffer' | 'ml-tensor';
 
   /**
    * represent the data type of a tensor