Skip to content

[Web] [E:onnxruntime:Default, webgpu_context.cc:120 operator()] WebGPU device error(2): Out of memory #25195

Open
@Dexterp37

Description

@Dexterp37

Describe the issue

Using the model referenced below (see STR) with the WebGPU EP I trigger OOM on both Firefox Nightly and Chrome. This is the same model / test of #25029 , except I'm trying the WebGPU EP as the JSEP EP produces the wrong output.

To reproduce

  1. Host the following page, the needed files are this image and this model.
<html>

<head>
  <script src="onnx/ort.webgpu.min.js"></script>
</head>

<body>
    <h1>Example</h1>
    <div>
        <button id="webinference" onclick="runBrowser()">Run Browser Inference</button>
        <img id="image" src="turtle2.png" alt="Image preview..."  hidden>
        <canvas id="mycanvas"></canvas>
    </div>
    <div>
      <img id="resized" />
    </div>

</body>
<script>
  // This polyfill is needed for Firefox Nightly until https://bugzilla.mozilla.org/show_bug.cgi?id=1973770
  // is merged.
  (async () => {
    if (!navigator.gpu) {
      console.warn("WebGPU is not supported in this browser.");
      return;
    }

    const originalRequestAdapter = navigator.gpu.requestAdapter.bind(navigator.gpu);

    navigator.gpu.requestAdapter = async function(...args) {
      const adapter = await originalRequestAdapter(...args);
      if (!adapter) {
        console.warn("Failed to get GPU adapter.");
        return null;
      }

      const adapterInfo = adapter.info || {};
      const {
        vendor = "unknown",
        architecture = "unknown",
        device = "unknown",
        description = "No description available",
        subgroupMaxSize = 1,
        subgroupMinSize = 1,
      } = adapterInfo;

      const stubAdapterInfo = {
        vendor,
        architecture,
        device,
        description,
        subgroupMaxSize,
        subgroupMinSize,
      };

      const originalRequestDevice = adapter.requestDevice.bind(adapter);
      adapter.requestDevice = async function(...deviceArgs) {
        const device = await originalRequestDevice(...deviceArgs);

        if (!('adapterInfo' in device)) {
          Object.defineProperty(device, "adapterInfo", {
            value: stubAdapterInfo,
            writable: false,
            enumerable: true,
            configurable: true
          });
        }

        return device;
      };

      return adapter;
    };
  })();

    async function startInferenceSession(modelPath) {
      const session = await ort
        .InferenceSession
        .create(
          modelPath,
          {
            executionProviders: [
              // TODO: set to 'wasm' for debugging. It will take tens of minutes to work!
              //'wasm',
              'webgpu'
            ],
            graphOptimizationLevel: 'all',
          }
        );
      console.log('Inference session created');
      return session;
    }


    async function runInference(session, input, inputSize) {
      const feeds = {};
      feeds[session.inputNames[0]] = input;
      if (session.inputNames[1]) {
        feeds[session.inputNames[1]] = inputSize;
      }
      const outputData = await session.run(feeds);
      return outputData;
    }
    
  
    async function testInference(session, imageData) {
      console.log('Testing inference on', imageData);

      const tensor = await ort.Tensor.fromImage(imageData)
      console.log('Tensor:', tensor)

      const inputDims = new ort.Tensor(
        "int64",
        new BigInt64Array([BigInt(640), BigInt(640)]),
        [1, 2],
      )

      const outData = await runInference(session, tensor, inputDims)

      console.log('Output data:', outData)
      console.log('Best score: ', Math.max(...outData.scores.data))
    }

    async function runBrowser() {
      let img = document.getElementById('image');
      let canvas = document.getElementById("mycanvas");
      canvas.width = img.width;
      canvas.height = img.height;
      var ctx = canvas.getContext("2d");
      ctx.drawImage(img, 0, 0, img.width, img.height);
      const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height)

      let session = await startInferenceSession(
        "./test-model.onnx"
      );
      await testInference(session, imageData);
    }

    async function main() {
        let img = document.getElementById('image');
        let canvas = document.getElementById("mycanvas");
        canvas.width = img.width;
        canvas.height = img.height;
        var ctx = canvas.getContext("2d");
        ctx.drawImage(img, 0, 0, img.width, img.height);
    }
    main();

</script>
</html>
  1. Load the page, click on the "Run Browser Inference" button and inspect the browser console.
  2. The following log will be shown:

** Firefox Nightly **

�[0;93m2025-06-27 10:31:06.537998 [W:onnxruntime:, session_state.cc:1276 VerifyEachNodeIsAssignedToAnEp] Some nodes were not assigned to the preferred execution providers which may or may not have an negative impact on performance. e.g. ORT explicitly assigns shape related ops to CPU to improve perf.�[m ort-wasm-simd-threaded.asyncify.mjs:7:90109
�[0;93m2025-06-27 10:31:06.538999 [W:onnxruntime:, session_state.cc:1278 VerifyEachNodeIsAssignedToAnEp] Rerunning with verbose output on a non-minimal build will show node assignments.�[m ort-wasm-simd-threaded.asyncify.mjs:7:90109
Inference session created minimal-to-share.html:91:15
Testing inference on
ImageData { width: 3840, height: 2160, data: Uint8ClampedArray(33177600) }
minimal-to-share.html:114:15
Tensor:
Object { cpuData: Float32Array(24883200), dataLocation: "cpu", type: "float32", dims: (4) […], size: 24883200 }
minimal-to-share.html:117:15
Uncaptured WebGPU error: Out of memory
�[1;31m2025-06-27 10:31:07.955999 [E:onnxruntime:Default, webgpu_context.cc:120 operator()] WebGPU device error(2): Out of memory�[m ort-wasm-simd-threaded.asyncify.mjs:7:90109
Uncaught (in promise) Error: failed to call OrtRun(). ERROR_CODE: 1, ERROR_MESSAGE: WebGPU validation failed. Buffer with '' label is invalid
S wasm-utils.ts:64
Re wasm-core-impl.ts:855
Fr proxy-wrapper.ts:253
run session-handler-inference.ts:137
run inference-session-impl.ts:112
runInference minimal-to-share.html:108
testInference minimal-to-share.html:125
runBrowser minimal-to-share.html:150
onclick minimal-to-share.html:1
ort.webgpu.min.js:6:26149

** Chrome **

wasm-utils.ts:64 Uncaught (in promise) Error: failed to call OrtRun(). ERROR_CODE: 1, ERROR_MESSAGE: WebGPU validation failed. Buffer size (3185049600) exceeds the max buffer size limit (1073741824).

  • While calling [Device].CreateBuffer([BufferDescriptor]).

    at S (wasm-utils.ts:64:11)
    at Re (wasm-core-impl.ts:855:7)
    at async Ze.run (session-handler-inference.ts:137:21)
    at async t.run (inference-session-impl.ts:112:21)
    at async runInference (minimal-to-share.html:54:26)
    at async testInference (minimal-to-share.html:71:23)
    at async runBrowser (minimal-to-share.html:95:7)


Urgency

My research project is unfortunately blocked on this, as the JSEP implementation can't be used due to #25029

ONNX Runtime Installation

Built from Source

ONNX Runtime Version or Commit ID

7a6cef6

Execution Provider

'webgpu' (WebGPU)

Metadata

Metadata

Assignees

Labels

ep:WebGPUort-web webgpu providerplatform:webissues related to ONNX Runtime web; typically submitted using template

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions