Add WebGPU sample application and update other web demos.
This is still rough around the edges, but it demonstrates usage and will be used for development
* Build script compiles programs with `--iree-hal-target-backends=webgpu`
* Build script configures CMake with `-DIREE_EXTERNAL_HAL_DRIVERS=webgpu`
* Code shows how to interface between runtime C code, browser JavaScript APIs, and Emscripten
diff --git a/build_tools/cmake/iree_copts.cmake b/build_tools/cmake/iree_copts.cmake
index 61f92c2..0687050 100644
--- a/build_tools/cmake/iree_copts.cmake
+++ b/build_tools/cmake/iree_copts.cmake
@@ -378,13 +378,17 @@
"-natvis:${IREE_ROOT_DIR}/runtime/iree.natvis"
)
-# Our Emscripten library code uses dynCall, which needs these link flags.
-# TODO(scotttodd): Find a way to refactor this, this is nasty to always set :(
-if(EMSCRIPTEN)
+if(EMSCRIPTEN AND IREE_EXTERNAL_WEBGPU_HAL_DRIVER_FOUND)
iree_select_compiler_opts(IREE_DEFAULT_LINKOPTS
ALL
- "-sDYNCALLS=1"
- "-sEXPORTED_RUNTIME_METHODS=['dynCall']"
+ # TODO(scotttodd): Only add when using WebGPU in a library/binary?
+ "-sUSE_WEBGPU"
+ # Hack: Used to create sync versions of requestAdapter and requestDevice
+ # TODO(scotttodd): Only set for test binaries, avoid sync code in apps
+ # this doesn't _break_ apps that don't use the sync functions, but it
+ # does bloat their binary size (and each Emscripten flag comes with
+ # some risk of breaking compatibility with other features)
+ "-sASYNCIFY"
)
endif()
diff --git a/experimental/web/sample_dynamic/CMakeLists.txt b/experimental/web/sample_dynamic/CMakeLists.txt
index cc4acd0..8577022 100644
--- a/experimental/web/sample_dynamic/CMakeLists.txt
+++ b/experimental/web/sample_dynamic/CMakeLists.txt
@@ -38,7 +38,7 @@
target_link_options(${_NAME} PRIVATE
# https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html#interacting-with-code-ccall-cwrap
"-sEXPORTED_FUNCTIONS=['_setup_sample', '_cleanup_sample', '_load_program', '_inspect_program', '_unload_program', '_call_function', '_malloc']"
- "-sEXPORTED_RUNTIME_METHODS=['ccall','cwrap']"
+ "-sEXPORTED_RUNTIME_METHODS=['ccall','cwrap','UTF8ToString']"
#
"-sASSERTIONS=1"
#
diff --git a/experimental/web/sample_dynamic/build_sample.sh b/experimental/web/sample_dynamic/build_sample.sh
index f826200..0b0deb7 100755
--- a/experimental/web/sample_dynamic/build_sample.sh
+++ b/experimental/web/sample_dynamic/build_sample.sh
@@ -91,6 +91,7 @@
-DIREE_BUILD_EXPERIMENTAL_WEB_SAMPLES=ON \
-DIREE_HAL_DRIVER_DEFAULTS=OFF \
-DIREE_HAL_DRIVER_LOCAL_SYNC=ON \
+ -UIREE_EXTERNAL_HAL_DRIVERS \
-DIREE_BUILD_COMPILER=OFF \
-DIREE_BUILD_TESTS=OFF \
.
diff --git a/experimental/web/sample_dynamic/iree_worker.js b/experimental/web/sample_dynamic/iree_worker.js
index 3a3c862..bb08616 100644
--- a/experimental/web/sample_dynamic/iree_worker.js
+++ b/experimental/web/sample_dynamic/iree_worker.js
@@ -32,7 +32,7 @@
wasmInspectProgramFn = Module.cwrap('inspect_program', null, ['number']);
wasmUnloadProgramFn = Module.cwrap('unload_program', null, ['number']);
wasmCallFunctionFn = Module.cwrap(
- 'call_function', 'string', ['number', 'string', 'string', 'number']);
+ 'call_function', 'number', ['number', 'string', 'string', 'number']);
sampleState = wasmSetupSampleFn();
@@ -120,8 +120,11 @@
return;
}
- const returnValue =
+ // Receive as a pointer, convert, then free. This avoids a memory leak, see
+ // https://github.com/emscripten-core/emscripten/issues/6484
+ const returnValuePtr =
wasmCallFunctionFn(programState, functionName, inputsJoined, iterations);
+ const returnValue = Module.UTF8ToString(returnValuePtr);
if (returnValue === '') {
postMessage({
@@ -130,16 +133,12 @@
'error': 'Wasm module error, check console for details',
});
} else {
+ Module._free(returnValuePtr);
postMessage({
'messageType': 'callResult',
'id': id,
'payload': JSON.parse(returnValue),
});
- // TODO(scotttodd): free char* buffer? Or does Emscripten handle that?
- // Could refactor to
- // 1) return void*
- // 2) convert to String manually using UTF8ToString(pointer)
- // 3) Module._free(pointer)
}
}
diff --git a/experimental/web/sample_dynamic/main.c b/experimental/web/sample_dynamic/main.c
index 0daf821..4f7d2d5 100644
--- a/experimental/web/sample_dynamic/main.c
+++ b/experimental/web/sample_dynamic/main.c
@@ -6,6 +6,7 @@
#include <stdint.h>
#include <stdio.h>
+#include <string.h>
#include "iree/base/api.h"
#include "iree/hal/api.h"
@@ -436,5 +437,7 @@
}
// Note: this leaks the buffer. It's up to the caller to free it after use.
- return iree_string_builder_buffer(&outputs_builder);
+ char* outputs = strdup(iree_string_builder_buffer(&outputs_builder));
+ iree_string_builder_deinitialize(&outputs_builder);
+ return outputs;
}
diff --git a/experimental/web/sample_webgpu/CMakeLists.txt b/experimental/web/sample_webgpu/CMakeLists.txt
new file mode 100644
index 0000000..d26c00a
--- /dev/null
+++ b/experimental/web/sample_webgpu/CMakeLists.txt
@@ -0,0 +1,49 @@
+# Copyright 2022 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+if(NOT EMSCRIPTEN)
+ return()
+endif()
+
+set(_NAME "iree_experimental_web_sample_webgpu")
+add_executable(${_NAME} "")
+target_sources(${_NAME}
+ PRIVATE
+ main.c
+ device_webgpu.c
+)
+set_target_properties(${_NAME} PROPERTIES OUTPUT_NAME "web-sample-webgpu")
+
+target_compile_options(${_NAME} PRIVATE ${IREE_DEFAULT_COPTS})
+
+# Note: we have to be very careful about dependencies here.
+#
+# The general purpose libraries link in multiple executable loaders and HAL
+# drivers/devices, which include code not compatible with Emscripten.
+target_link_libraries(${_NAME}
+ iree_runtime_runtime
+ iree_experimental_webgpu_webgpu
+ iree_experimental_webgpu_platform_emscripten_emscripten
+)
+
+target_link_options(${_NAME} PRIVATE
+ # https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html#interacting-with-code-ccall-cwrap
+ "-sEXPORTED_FUNCTIONS=['_setup_sample', '_cleanup_sample', '_load_program', '_inspect_program', '_unload_program', '_call_function', '_malloc', '_free']"
+ "-sEXPORTED_RUNTIME_METHODS=['ccall','cwrap','UTF8ToString']"
+ #
+ "-sASSERTIONS=1"
+ #
+ # Programs loaded dynamically can require additional memory, so allow growth.
+ "-sALLOW_MEMORY_GROWTH"
+ #
+ # For https://emscripten.org/docs/debugging/Sanitizers.html#address-sanitizer
+ # "-fsanitize=address"
+ # "-sALLOW_MEMORY_GROWTH"
+ #
+ # https://developer.chrome.com/blog/wasm-debugging-2020/
+ "-g"
+ "-gseparate-dwarf"
+)
diff --git a/experimental/web/sample_webgpu/README.md b/experimental/web/sample_webgpu/README.md
new file mode 100644
index 0000000..43eccf4
--- /dev/null
+++ b/experimental/web/sample_webgpu/README.md
@@ -0,0 +1,42 @@
+# WebGPU Sample
+
+This experimental sample demonstrates one way to target the web platform with
+IREE, using WebGPU. The output artifact is a web page that loads separately
+provided IREE `.vmfb` (compiled ML model) files and allows for calling
+functions on them.
+
+## Quickstart
+
+**Note**: you will need a WebGPU-compatible browser. Chrome Canary with the
+`#enable-unsafe-webgpu` flag is a good choice (you may need the flag or an
+origin trial token for `localhost`).
+
+1. Install IREE's host tools (e.g. by building the `install` target with CMake)
+2. Install the Emscripten SDK by
+ [following these directions](https://emscripten.org/docs/getting_started/downloads.html)
+3. Initialize your Emscripten environment (e.g. run `emsdk_env.bat`)
+4. From this directory, run `bash ./build_sample.sh [path to install] && bash ./serve_sample.sh`
+5. Open the localhost address linked in the script output
+
+To rebuild most parts of the sample (C runtime, sample HTML, CMake config,
+etc.), just `control + C` to stop the local webserver and rerun the script.
+
+## How it works
+
+[Emscripten](https://emscripten.org/) is used (via the `emcmake` CMake wrapper)
+to compile the runtime into WebAssembly and JavaScript files.
+
+Any supported IREE program, such as
+[simple_abs.mlir](../../../samples/models/simple_abs.mlir), is compiled using
+the WebGPU compiler target. This generates WGSL shader code and IREE VM
+bytecode, which the IREE runtime is able to load and run using the browser's
+WebGPU APIs.
+
+### Asynchronous API
+
+[`iree_api_webgpu.js`](./iree_api_webgpu.js)
+
+* exposes a Promise-based API to the hosting application in
+ [`index.html`](./index.html)
+* preinitializes a WebGPU adapter and device
+* includes Emscripten's JS code and instantiates the WebAssembly module
diff --git a/experimental/web/sample_webgpu/build_sample.sh b/experimental/web/sample_webgpu/build_sample.sh
new file mode 100755
index 0000000..9360374
--- /dev/null
+++ b/experimental/web/sample_webgpu/build_sample.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+# Copyright 2022 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# Builds the sample, running host tools, Emscripten, and CMake as needed.
+#
+# Prerequisites:
+# * Environment must be configured for Emscripten
+# * Host tools must be built (default at IREE_SOURCE_DIR/build-host/install).
+# The build_tools/cmake/build_host_tools.sh script can do this for you.
+#
+# Usage:
+# build_sample.sh (optional install path) && serve_sample.sh
+
+set -e
+
+###############################################################################
+# Setup and checking for dependencies #
+###############################################################################
+
+if ! command -v emcmake &> /dev/null
+then
+ echo "'emcmake' not found, setup environment according to https://emscripten.org/docs/getting_started/downloads.html"
+ exit 1
+fi
+
+CMAKE_BIN=${CMAKE_BIN:-$(which cmake)}
+ROOT_DIR=$(git rev-parse --show-toplevel)
+SOURCE_DIR=${ROOT_DIR}/experimental/web/sample_webgpu
+
+BUILD_DIR=${ROOT_DIR?}/build-emscripten
+mkdir -p ${BUILD_DIR}
+
+BINARY_DIR=${BUILD_DIR}/experimental/web/sample_webgpu
+mkdir -p ${BINARY_DIR}
+
+INSTALL_ROOT="${1:-${ROOT_DIR}/build-host/install}"
+
+###############################################################################
+# Compile from .mlir input to portable .vmfb file using host tools #
+###############################################################################
+
+echo "=== Compiling sample MLIR files to VM FlatBuffer outputs (.vmfb) ==="
+COMPILE_TOOL="${INSTALL_ROOT?}/bin/iree-compile"
+
+# TODO(#11321): Enable iree-codegen-gpu-native-math-precision by default?
+compile_sample() {
+ echo " Compiling '$1' sample for WebGPU..."
+ ${COMPILE_TOOL?} $3 \
+ --iree-input-type=$2 \
+ --iree-hal-target-backends=webgpu \
+ --iree-codegen-gpu-native-math-precision=true \
+ --o ${BINARY_DIR}/$1_webgpu.vmfb
+}
+
+compile_sample "simple_abs" "none" "${ROOT_DIR?}/samples/models/simple_abs.mlir"
+compile_sample "fullyconnected" "mhlo" "${ROOT_DIR?}/tests/e2e/models/fullyconnected.mlir"
+
+# Does not run yet (uses internal readback, which needs async buffer mapping?)
+# compile_sample "collatz" "${ROOT_DIR?}/tests/e2e/models/collatz.mlir"
+
+# Slow, so just run on demand
+# compile_sample "mobilebert" "tosa" "D:/dev/projects/iree-data/models/2022_10_28/mobilebertsquad.tflite.mlir"
+# compile_sample "posenet" "tosa" "D:/dev/projects/iree-data/models/2022_10_28/posenet.tflite.mlir"
+# compile_sample "mobilessd" "tosa" "D:/dev/projects/iree-data/models/2022_10_28/mobile_ssd_v2_float_coco.tflite.mlir"
+
+###############################################################################
+# Build the web artifacts using Emscripten #
+###############################################################################
+
+echo "=== Building web artifacts using Emscripten ==="
+
+pushd ${BUILD_DIR}
+
+# Configure using Emscripten's CMake wrapper, then build.
+# Note: The sample creates a device directly, so no drivers are required.
+emcmake "${CMAKE_BIN?}" -G Ninja .. \
+ -DCMAKE_BUILD_TYPE=RelWithDebInfo \
+ -DIREE_HOST_BIN_DIR="${INSTALL_ROOT}/bin" \
+ -DIREE_BUILD_EXPERIMENTAL_WEB_SAMPLES=ON \
+ -DIREE_ENABLE_THREADING=OFF \
+ -DIREE_HAL_DRIVER_DEFAULTS=OFF \
+ -DIREE_HAL_DRIVER_LOCAL_SYNC=OFF \
+ -DIREE_HAL_DRIVER_LOCAL_TASK=OFF \
+ -DIREE_EXTERNAL_HAL_DRIVERS=webgpu \
+ -DIREE_ENABLE_ASAN=OFF \
+ -DIREE_BUILD_COMPILER=OFF \
+ -DIREE_BUILD_TESTS=OFF
+
+"${CMAKE_BIN?}" --build . --target \
+ iree_experimental_web_sample_webgpu
+
+popd
+
+echo "=== Copying static files (.html, .js) to the build directory ==="
+
+cp ${SOURCE_DIR?}/index.html ${BINARY_DIR}
+cp "${ROOT_DIR}/docs/website/overrides/.icons/iree/ghost.svg" "${BINARY_DIR}"
+cp ${SOURCE_DIR?}/iree_api_webgpu.js ${BINARY_DIR}
diff --git a/experimental/web/sample_webgpu/device_webgpu.c b/experimental/web/sample_webgpu/device_webgpu.c
new file mode 100644
index 0000000..41b1c8a
--- /dev/null
+++ b/experimental/web/sample_webgpu/device_webgpu.c
@@ -0,0 +1,30 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <emscripten/html5.h>
+#include <emscripten/html5_webgpu.h>
+
+#include "experimental/webgpu/api.h"
+#include "experimental/webgpu/platform/webgpu.h"
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+
+iree_status_t create_device(iree_allocator_t host_allocator,
+ iree_hal_device_t** out_device) {
+ WGPUDevice wgpu_device = emscripten_webgpu_get_device();
+ if (!wgpu_device) {
+ return iree_make_status(
+ IREE_STATUS_UNAVAILABLE,
+ "emscripten_webgpu_get_device() failed to return a WGPUDevice");
+ }
+
+ iree_hal_webgpu_device_options_t default_options;
+ iree_hal_webgpu_device_options_initialize(&default_options);
+
+ return iree_hal_webgpu_wrap_device(IREE_SV("webgpu-emscripten"),
+ &default_options, wgpu_device,
+ host_allocator, out_device);
+}
diff --git a/experimental/web/sample_webgpu/index.html b/experimental/web/sample_webgpu/index.html
new file mode 100644
index 0000000..1196e72
--- /dev/null
+++ b/experimental/web/sample_webgpu/index.html
@@ -0,0 +1,392 @@
+<!DOCTYPE html>
+<html>
+
+<!--
+Copyright 2022 The IREE Authors
+
+Licensed under the Apache License v2.0 with LLVM Exceptions.
+See https://llvm.org/LICENSE.txt for license information.
+SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+-->
+
+<head>
+ <meta charset="utf-8" />
+ <title>IREE WebGPU Sample</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <link rel="icon" href="./ghost.svg" type="image/svg+xml">
+
+ <style>
+ body {
+ padding: 16px;
+ }
+
+ .drop-target {
+ border: 3px solid #2244CC;
+ background-color: #c0c0c0;
+ color: #222222;
+ width: 300px;
+ height: 140px;
+ margin: 20px;
+ padding: 8px;
+ display: flex;
+ align-items: center;
+ justify-content: center;
+ user-select: none;
+ }
+
+ .drop-target p {
+ pointer-events: none;
+ }
+ </style>
+
+ <!-- https://getbootstrap.com/ for some webpage styling-->
+ <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-1BmE4kWBq78iYhFldvKuhfTAU6auU8tT94WrHftjDbrCEXSU1oBoqyl2QvZ6jIW3" crossorigin="anonymous">
+ <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/js/bootstrap.bundle.min.js" integrity="sha384-ka7Sk0Gln4gmtz2MlQnikT1wXgYsOg+OMhuP+IlRH9sENBO0LRn5q+8nbTov4+1p" crossorigin="anonymous"></script>
+
+ <script src="./iree_api_webgpu.js"></script>
+</head>
+
+<body>
+ <div class="container">
+ <h1>IREE WebGPU Sample</h1>
+
+ <p>
+ This tool works similarly to
+ <a href="https://github.com/iree-org/iree/blob/main/tools/iree-run-module-main.cc"><code>iree-run-module</code></a>
+ (<a href="https://github.com/iree-org/iree/blob/main/docs/developers/developing_iree/developer_overview.md#iree-run-module">docs</a>).
+ <br>It loads a compiled IREE program then lets you call exported functions.
+ <br><b>Note:</b> Some outputs are logged to the console.</p>
+ </p>
+
+ <h2>1. Load a program</h2>
+
+ <div id="drop-zone" class="drop-target">
+ <p style="margin:0px">Drag a compiled IREE program<br>(.vmfb file) here to load it</p>
+ </div>
+ <p>
+ Currently loaded program:
+ <b><span id="loaded-program-name" style="display: inline;">(None)</span></b>
+ </p>
+
+ <h2>2. Call functions on a loaded program</h2>
+
+ <form>
+ <p>
+ <label for="function-name-input" class="form-label">Function name:</label>
+ <input type="text" id="function-name-input" class="form-control"
+ style="width:400px; font-family: monospace;" value="main"></input>
+ </p>
+
+ <p>
+ <label for="function-arguments-input" class="form-label">Function arguments:</label>
+ <br><span class="form-text">In the form <code>dim1xdim2xtype=val1,val2,...</code>, one per line</span>
+ <textarea type="text" id="function-arguments-input" spellcheck="false" class="form-control"
+ style="min-width:400px; width:initial; min-height:100px; resize:both; font-family: monospace;"></textarea>
+ </p>
+
+ <p>
+ <label for="benchmark-iterations-input" class="form-label">
+ Benchmark iterations (inner invoke call):</label>
+ <input type="number" id="benchmark-iterations-input" class="form-control"
+ style="width:400px; font-family: monospace;" value="1" min="1"></input>
+ </p>
+
+ <button id="call-function" class="btn btn-primary" type="button"
+ onclick="callFunctionWithFormInputs()" disabled>Call function</button>
+ <button id="update-url" class="btn btn-secondary" type="button"
+ onclick="updateUrlWithFormValues()">Update URL</button>
+ <button id="update-url" class="btn btn-secondary" type="button"
+ onclick="clearUrl()">Clear URL</button>
+ </form>
+
+ <p>
+ <h4><label for="function-outputs" class="form-label">Function outputs:</label></h4>
+ <textarea type="text" id="function-outputs" readonly spellcheck="false" class="form-control"
+ style="min-width:400px; width:initial; height:100px; resize:both; font-family: monospace;"></textarea>
+ </p>
+
+ <p>Total time (including overheads):
+ <code id="benchmark-time-js-output" style="font-family: monospace;"></code></p>
+ <p>Mean inference time (Wasm only):
+ <code id="benchmark-time-wasm-output" style="font-family: monospace;"></code></p>
+
+ <hr>
+ <h2>Samples</h2>
+
+ <p>
+ Click to load a sample program, function, and arguments list.
+ <br>These links will automatically update the URL.
+ </p>
+
+ <div class="container" style="width:fit-content; margin-left:0px">
+ <div class="row" style="padding:4px">
+ <div class="col-sm">
+ simple_abs
+ (<a href="https://github.com/iree-org/iree/blob/main/iree/samples/models/simple_abs.mlir">source</a>)
+ </div>
+ <div class="col-sm-auto">
+ <button class="btn btn-secondary" onclick="loadSample('simple_abs')">Load sample</button>
+ </div>
+ </div>
+ <div class="row" style="padding:4px">
+ <div class="col-sm">
+ fullyconnected
+ (<a href="https://github.com/iree-org/iree/blob/main/tests/e2e/models/fullyconnected.mlir">source</a>)
+ </div>
+ <div class="col-sm-auto">
+ <button class="btn btn-secondary" onclick="loadSample('fullyconnected')">Load sample</button>
+ </div>
+ </div>
+ <div class="row" style="padding:4px">
+ <div class="col-sm">
+ mobilebert
+ (<a href="https://tfhub.dev/iree/lite-model/mobilebert/fp32/1">source</a>)
+ </div>
+ <div class="col-sm-auto">
+ <button class="btn btn-secondary" onclick="loadSample('mobilebert')">Load sample</button>
+ </div>
+ </div>
+ <div class="row" style="padding:4px">
+ <div class="col-sm">
+ posenet
+ (<a href="https://tfhub.dev/tensorflow/lite-model/posenet/mobilenet/float/075/1/default/1">source</a>)
+ </div>
+ <div class="col-sm-auto">
+ <button class="btn btn-secondary" onclick="loadSample('posenet')">Load sample</button>
+ </div>
+ </div>
+ <div class="row" style="padding:4px">
+ <div class="col-sm">
+ mobilessd
+ (<a href="https://storage.googleapis.com/download.tensorflow.org/models/tflite/gpu/mobile_ssd_v2_float_coco.tflite">source</a>)
+ </div>
+ <div class="col-sm-auto">
+ <button class="btn btn-secondary" onclick="loadSample('mobilessd')">Load sample</button>
+ </div>
+ </div>
+ </div>
+
+ <hr>
+ <h2>Compile your own program</h2>
+
+ <p>
+ Programs must be compiled for WebGPU to run on this page, using options
+ to <code>iree-compile</code> such as:
+ </p>
+
+ <textarea type="text" readonly spellcheck="false"
+ class="form-control" style="width:610px; height:90px; resize:none; font-family: monospace;">
+--iree-hal-target-backends=webgpu \
+--iree-codegen-gpu-native-math-precision=true \</textarea>
+
+ </div>
+
+ <script>
+ const initializePromise = ireeInitialize();
+ initializePromise.then(() => {
+ console.log("IREE initialized, ready to load programs.");
+ }).catch((error) => {
+ console.error("Failed to initialize IREE, error:");
+ console.error(error);
+ });
+
+ let loadedProgram = null;
+ const programNameElement = document.getElementById("loaded-program-name");
+ const callFunctionButton = document.getElementById("call-function");
+ const functionNameInput = document.getElementById("function-name-input");
+ const functionArgumentsInput = document.getElementById("function-arguments-input");
+ const benchmarkIterationsInput = document.getElementById("benchmark-iterations-input");
+ const functionOutputsElement = document.getElementById("function-outputs");
+ const timeJsOutputElement = document.getElementById("benchmark-time-js-output");
+ const timeWasmOutputElement = document.getElementById("benchmark-time-wasm-output");
+
+ async function finishLoadingProgram(newProgram, newProgramName) {
+ if (loadedProgram !== null) {
+ // Unload the previous program. We could keep a list of loaded programs
+ // and let users select between them.
+ await ireeUnloadProgram(loadedProgram);
+ }
+
+ await ireeInspectProgram(newProgram);
+
+ loadedProgram = newProgram;
+ programNameElement.innerText = newProgramName;
+ callFunctionButton.disabled = false;
+ }
+
+ async function tryLoadFromUrlParams() {
+ // Fetch IREE program from ?program=[file.vmfb] URL query parameter.
+ const searchParams = new URLSearchParams(window.location.search);
+
+ if (searchParams.has("function")) {
+ functionNameInput.value = searchParams.get("function");
+ }
+
+ if (searchParams.has("arguments")) {
+ functionArgumentsInput.value = searchParams.get("arguments");
+ }
+
+ if (searchParams.has("iterations")) {
+ benchmarkIterationsInput.value = searchParams.get("iterations");
+ }
+
+ if (searchParams.has("program")) {
+ const programPath = searchParams.get("program");
+
+ await initializePromise;
+ const program = await ireeLoadProgram(programPath);
+
+ // Set name to what is hopefully the file component of the path.
+ finishLoadingProgram(program, programPath.split("/").pop());
+ }
+ }
+
+ async function tryLoadFromBuffer(programDataBuffer, programName) {
+ // Clear 'program' from the URL.
+ const searchParams = new URLSearchParams(window.location.search);
+ searchParams.delete("program");
+ replaceUrlWithSearchParams(searchParams);
+
+ await initializePromise;
+ const program = await ireeLoadProgram(programDataBuffer);
+
+ finishLoadingProgram(program, programName);
+ }
+
+ // ------------------------------------------------------------------------
+ // Drag-and-drop to load from your local filesystem.
+ const dropZone = document.getElementById("drop-zone");
+ dropZone.addEventListener("drop", (dropEvent) => {
+ dropEvent.preventDefault();
+ dropEvent.target.style.border = "";
+
+ // Assume exactly one file was dropped.
+ const uploadedFile = dropEvent.dataTransfer.items[0].getAsFile();
+ const fileReader = new FileReader();
+ fileReader.onload = (fileLoadEvent) => {
+ tryLoadFromBuffer(fileLoadEvent.target.result, uploadedFile.name)
+ .catch((error) => {
+ console.error("Error loading program from drop: '" + error + "'");
+ });
+ };
+ fileReader.readAsArrayBuffer(uploadedFile);
+ });
+ dropZone.addEventListener("dragover", (event) => {
+ event.preventDefault();
+ });
+ dropZone.addEventListener("dragenter", (event) => {
+ if (event.target !== dropZone) return;
+ event.target.style.border = "3px dotted red";
+ });
+ dropZone.addEventListener("dragleave", (event) => {
+ if (event.target !== dropZone) return;
+ event.target.style.border = "";
+ });
+ // ------------------------------------------------------------------------
+
+ // ------------------------------------------------------------------------
+ // Form inputs.
+ function callFunctionWithFormInputs() {
+ if (loadedProgram === null) {
+ console.error("Can't call a function with no loaded program");
+ return;
+ }
+
+ const functionName = functionNameInput.value;
+ const inputs = functionArgumentsInput.value.split("\n");
+ const iterations = benchmarkIterationsInput.value;
+ const startJsTime = performance.now();
+
+ ireeCallFunction(loadedProgram, functionName, inputs, iterations)
+ .then((resultObject) => {
+ functionOutputsElement.value =
+ resultObject['outputs'].replace(";", "\n");
+
+ const endJsTime = performance.now();
+ const totalJsTime = endJsTime - startJsTime;
+ timeJsOutputElement.textContent = totalJsTime.toFixed(3) + "ms";
+
+ const totalWasmTimeMs = resultObject['total_invoke_time_ms'];
+ const meanWasmTimeMs = totalWasmTimeMs / iterations;
+ timeWasmOutputElement.textContent = meanWasmTimeMs.toFixed(3) +
+ "ms / iteration over " + iterations + " iteration(s)";
+ })
+ .catch((error) => {
+ console.error("Function call error: '" + error + "'");
+ });
+ }
+
+ function replaceUrlWithSearchParams(searchParams) {
+ let newUrl = window.location.protocol + "//" + window.location.host +
+ window.location.pathname;
+ const searchString = searchParams.toString();
+ if (searchString !== "") newUrl += "?" + searchParams;
+ window.history.replaceState({path: newUrl}, "", newUrl);
+ }
+
+ function updateUrlWithFormValues() {
+ const searchParams = new URLSearchParams(window.location.search);
+ searchParams.set("function", functionNameInput.value);
+ searchParams.set("arguments", functionArgumentsInput.value);
+ searchParams.set("iterations", benchmarkIterationsInput.value);
+ replaceUrlWithSearchParams(searchParams);
+ }
+
+ function clearUrl() {
+ const searchParams = new URLSearchParams(window.location.search);
+ searchParams.delete("program");
+ searchParams.delete("function");
+ searchParams.delete("arguments");
+ searchParams.delete("iterations");
+ replaceUrlWithSearchParams(searchParams);
+ }
+ // ------------------------------------------------------------------------
+
+ // ------------------------------------------------------------------------
+ // Load samples programs / inputs.
+ function loadSample(sampleName) {
+ const searchParams = new URLSearchParams(window.location.search);
+ searchParams.set("program", sampleName + "_webgpu.vmfb");
+ replaceUrlWithSearchParams(searchParams);
+
+ if (sampleName === "simple_abs") {
+ functionNameInput.value = "abs";
+ functionArgumentsInput.value = "f32=-1.23";
+ } else if (sampleName === "fullyconnected") {
+ functionNameInput.value = "main";
+ functionArgumentsInput.value = [
+ "1x5xf32=1,-2,-3,4,-5",
+ "1x5x3x1xf32=15,14,13,12,11,10,9,8,7,6,5,4,3,2,1",
+ ].join("\n");
+ } else if (sampleName === "mobilebert") {
+ functionNameInput.value = "main";
+ functionArgumentsInput.value = [
+ "1x384xi32",
+ "1x384xi32",
+ "1x384xi32",
+ ].join("\n");
+ } else if (sampleName === "posenet") {
+ functionNameInput.value = "main";
+ functionArgumentsInput.value = "1x353x257x3xf32";
+ } else if (sampleName === "mobilessd") {
+ functionNameInput.value = "main";
+ functionArgumentsInput.value = "1x320x320x3xf32";
+ }
+
+ updateUrlWithFormValues();
+
+ tryLoadFromUrlParams().catch((error) => {
+ console.error("Error loading sample program: '" + error + "'");
+ });
+ }
+ // ------------------------------------------------------------------------
+
+ window.addEventListener("load", () => {
+ tryLoadFromUrlParams().catch((error) => {
+ console.error("Error loading program from URL: '" + error + "'");
+ });
+ });
+ </script>
+</body>
+
+</html>
diff --git a/experimental/web/sample_webgpu/iree_api_webgpu.js b/experimental/web/sample_webgpu/iree_api_webgpu.js
new file mode 100644
index 0000000..e97afef
--- /dev/null
+++ b/experimental/web/sample_webgpu/iree_api_webgpu.js
@@ -0,0 +1,219 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// Promise-based API for interacting with the IREE runtime.
+
+const EMSCRIPTEN_SCRIPT_URL = 'web-sample-webgpu.js';
+
+// ------------------------------------------------------------------------- //
+// - API - //
+// ------------------------------------------------------------------------- //
+
+// Initializes IREE's runtime.
+async function ireeInitialize() {
+ return _ireeInitialize();
+}
+
+// Loads an IREE program stored in a .vmfb file.
+//
+// Accepts either a string path to a file (XMLHttpRequest compatible) or an
+// ArrayBuffer containing an already loaded file.
+//
+// In order to call functions on the program it must be compiled in a supported
+// configuration, such as with these flags:
+// --iree-hal-target-backends=webgpu
+//
+// Resolves with an opaque pointer to the program state on success.
+async function ireeLoadProgram(vmfbPathOrBuffer) {
+ return _ireeLoadProgram(vmfbPathOrBuffer);
+}
+
+// Inspects a program.
+async function ireeInspectProgram(programState) {
+ return _ireeInspectProgram(programState);
+}
+
+// Unloads a program.
+async function ireeUnloadProgram(programState) {
+ return _ireeUnloadProgram(programState);
+}
+
+// Calls a function on a loaded program.
+//
+// Resolves with a parsed JSON object on success:
+// {
+// "total_invoke_time_ms": [number],
+// "outputs": [semicolon delimited list of formatted outputs]
+// }
+async function ireeCallFunction(
+ programState, functionName, inputs, iterations) {
+ return _ireeCallFunction(programState, functionName, inputs, iterations);
+}
+
+// ------------------------------------------------------------------------- //
+// - Implementation - //
+// ------------------------------------------------------------------------- //
+
+// TODO(scotttodd): namespace / scope these (don't pollute window object)
+let wasmSetupSampleFn;
+let wasmCleanupSampleFn;
+let wasmLoadProgramFn;
+let wasmInspectProgramFn;
+let wasmUnloadProgramFn;
+let wasmCallFunctionFn;
+
+let initializedPromise, initializePromiseResolve, initializePromiseReject;
+let sampleState;
+
+var Module = {
+ print: function(text) {
+ console.log('(C)', text);
+ },
+ printErr: function(text) {
+ console.error('(C)', text);
+ },
+ onRuntimeInitialized: function() {
+ wasmSetupSampleFn = Module.cwrap('setup_sample', 'number', []);
+ wasmCleanupSampleFn = Module.cwrap('cleanup_sample', null, ['number']);
+ wasmLoadProgramFn = Module.cwrap(
+ 'load_program',
+ 'number',
+ ['number', 'number', 'number'],
+ );
+ wasmInspectProgramFn = Module.cwrap('inspect_program', null, ['number']);
+ wasmUnloadProgramFn = Module.cwrap('unload_program', null, ['number']);
+ wasmCallFunctionFn = Module.cwrap(
+ 'call_function',
+ 'number',
+ ['number', 'string', 'string', 'number'],
+ );
+
+ sampleState = wasmSetupSampleFn();
+ if (!sampleState) {
+ initializePromiseReject('Runtime initialization failed');
+ return;
+ }
+ initializePromiseResolve();
+ },
+ noInitialRun: true,
+};
+
+async function _ireeInitialize() {
+ if (initializedPromise) return initializedPromise;
+
+ initializedPromise = new Promise((resolve, reject) => {
+ initializePromiseResolve = resolve;
+ initializePromiseReject = reject;
+ });
+
+ // Preinitialize a WebGPU device here. We could let the C program request the
+ // adapter and device itself, but that would jump through layers of Emscripten
+ // binding code and C/JS callbacks. This is much more concise.
+ // const instance = -1; // No wgpuCreateInstance function in JS (yet?).
+ if (navigator['gpu'] === undefined) {
+ throw 'No \'gpu\' property on navigator, can\'t initialize WebGPU (missing #enable-unsafe-webgpu or an origin trial?)';
+ }
+ const adapter = await navigator['gpu']['requestAdapter']();
+ const deviceDescriptor = {
+ 'label': 'IREE WebGPU device',
+ 'requiredFeatures': [],
+ 'requiredLimits': {
+ 'maxBindGroups': 4,
+ 'maxStorageBuffersPerShaderStage': 8,
+ },
+ 'defaultQueue': {},
+ };
+ const device = await adapter['requestDevice'](deviceDescriptor);
+ // Emscripten makes this available via emscripten_webgpu_get_device() in C.
+ Module['preinitializedWebGPUDevice'] = device;
+
+ const mainScript = document.createElement('script');
+ mainScript.setAttribute('src', EMSCRIPTEN_SCRIPT_URL);
+ document.body.appendChild(mainScript);
+
+ return initializedPromise;
+}
+
+function _ireeLoadProgramBuffer(programDataBuffer) {
+ const programDataView = new Int8Array(programDataBuffer);
+
+ const programDataWasmBuffer = Module._malloc(
+ programDataView.length * programDataView.BYTES_PER_ELEMENT);
+ Module.HEAP8.set(programDataView, programDataWasmBuffer);
+
+ // Note: we transfer ownership of the FlatBuffer data here, so there is
+ // no need to call `Module._free(programDataWasmBuffer)` later.
+ const programState = wasmLoadProgramFn(
+ sampleState, programDataWasmBuffer, programDataBuffer.byteLength);
+ return programState;
+}
+
+function _ireeLoadProgram(vmfbPathOrBuffer) {
+ if (vmfbPathOrBuffer instanceof ArrayBuffer) {
+ const programState = _ireeLoadProgramBuffer(vmfbPathOrBuffer);
+ if (programState !== 0) {
+ return Promise.resolve(programState);
+ } else {
+ return Promise.reject('Wasm module error loading program');
+ }
+ }
+
+ return new Promise((resolve, reject) => {
+ const fetchRequest = new XMLHttpRequest();
+ fetchRequest.onload = function(progressEvent) {
+ const programState =
+ _ireeLoadProgramBuffer(progressEvent.target.response);
+ if (programState !== 0) {
+ resolve(programState);
+ } else {
+ reject('Wasm module error loading program');
+ }
+ };
+ fetchRequest.onerror = function(progressEvent) {
+ reject(progressEvent.error);
+ };
+ fetchRequest.open('GET', vmfbPathOrBuffer);
+ fetchRequest.responseType = 'arraybuffer';
+ fetchRequest.send();
+ });
+}
+
+function _ireeInspectProgram(programState) {
+ wasmInspectProgramFn(programState);
+ return Promise.resolve();
+}
+
+function _ireeUnloadProgram(programState) {
+ wasmUnloadProgramFn(programState);
+ return Promise.resolve();
+}
+
+function _ireeCallFunction(programState, functionName, inputs, iterations) {
+ iterations = iterations !== undefined ? iterations : 1;
+
+ let inputsJoined;
+ if (Array.isArray(inputs)) {
+ inputsJoined = inputs.join(';');
+ } else if (typeof (inputs) === 'string') {
+ inputsJoined = inputs;
+ } else {
+ return Promise.reject(
+ 'Expected \'inputs\' to be a String or an array of Strings');
+ }
+
+ // Receive as a pointer, convert, then free. This avoids a memory leak, see
+ // https://github.com/emscripten-core/emscripten/issues/6484
+ const returnValuePtr =
+ wasmCallFunctionFn(programState, functionName, inputsJoined, iterations);
+ const returnValue = Module.UTF8ToString(returnValuePtr);
+
+ if (returnValue === '') {
+ return Promise.reject('Wasm module error calling function');
+ } else {
+ Module._free(returnValuePtr);
+ return Promise.resolve(JSON.parse(returnValue));
+ }
+}
diff --git a/experimental/web/sample_webgpu/main.c b/experimental/web/sample_webgpu/main.c
new file mode 100644
index 0000000..0e2ac66
--- /dev/null
+++ b/experimental/web/sample_webgpu/main.c
@@ -0,0 +1,691 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+// Must be first.
+#include "experimental/webgpu/platform/webgpu.h"
+
+// NOTE: include order matters.
+#include "experimental/webgpu/buffer.h"
+#include "experimental/webgpu/webgpu_device.h"
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/modules/hal/module.h"
+#include "iree/runtime/api.h"
+#include "iree/vm/bytecode/module.h"
+
+//===----------------------------------------------------------------------===//
+// Public API
+//===----------------------------------------------------------------------===//
+
+// Opaque state for the sample, shared between multiple loaded programs.
+typedef struct iree_sample_state_t iree_sample_state_t;
+
+// Initializes the sample and returns its state.
+iree_sample_state_t* setup_sample();
+
+// Shuts down the sample and frees its state.
+// Requires that all programs first be unloaded with |unload_program|.
+void cleanup_sample(iree_sample_state_t* sample_state);
+
+// Opaque state for an individual loaded program.
+typedef struct iree_program_state_t iree_program_state_t;
+
+// Loads a program into the sample from the provided data.
+// Note: this takes ownership of |vmfb_data|.
+iree_program_state_t* load_program(iree_sample_state_t* sample_state,
+ uint8_t* vmfb_data, size_t length);
+
+// Inspects metadata about a loaded program, printing to stdout.
+void inspect_program(iree_program_state_t* program_state);
+
+// Unloads a program and frees its state.
+void unload_program(iree_program_state_t* program_state);
+
+// Calls a function synchronously.
+//
+// Returns a semicolon-delimited list of formatted outputs on success or the
+// empty string on failure. Note: This is in need of some real API bindings
+// that marshal structured data between C <-> JS.
+//
+// * |function_name| is the fully qualified function name, like 'module.abs'.
+// * |inputs| is a semicolon delimited list of VM scalars and buffers, as
+// described in iree/tooling/vm_util and used in IREE's CLI tools.
+// For example, the CLI `--function_input=f32=1 --function_input=f32=2`
+// should be passed here as `f32=1;f32=2`.
+// * |iterations| is the number of times to call the function, for benchmarking
+const char* call_function(iree_program_state_t* program_state,
+ const char* function_name, const char* inputs,
+ int iterations);
+
+//===----------------------------------------------------------------------===//
+// Implementation
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_sample_state_t {
+ iree_runtime_instance_t* instance;
+ iree_hal_device_t* device;
+} iree_sample_state_t;
+
+typedef struct iree_program_state_t {
+ iree_runtime_session_t* session;
+ iree_vm_module_t* module;
+} iree_program_state_t;
+
+extern iree_status_t create_device(iree_allocator_t host_allocator,
+ iree_hal_device_t** out_device);
+
+iree_sample_state_t* setup_sample() {
+ iree_sample_state_t* sample_state = NULL;
+ iree_status_t status =
+ iree_allocator_malloc(iree_allocator_system(),
+ sizeof(iree_sample_state_t), (void**)&sample_state);
+
+ iree_runtime_instance_options_t instance_options;
+ iree_runtime_instance_options_initialize(&instance_options);
+ // Note: no call to iree_runtime_instance_options_use_all_available_drivers().
+
+ if (iree_status_is_ok(status)) {
+ status = iree_runtime_instance_create(
+ &instance_options, iree_allocator_system(), &sample_state->instance);
+ }
+
+ if (iree_status_is_ok(status)) {
+ status = create_device(iree_allocator_system(), &sample_state->device);
+ }
+
+ if (!iree_status_is_ok(status)) {
+ iree_status_fprint(stderr, status);
+ iree_status_free(status);
+ cleanup_sample(sample_state);
+ return NULL;
+ }
+
+ return sample_state;
+}
+
+void cleanup_sample(iree_sample_state_t* sample_state) {
+ iree_hal_device_release(sample_state->device);
+ iree_runtime_instance_release(sample_state->instance);
+ free(sample_state);
+}
+
+iree_program_state_t* load_program(iree_sample_state_t* sample_state,
+ uint8_t* vmfb_data, size_t length) {
+ iree_program_state_t* program_state = NULL;
+ iree_status_t status = iree_allocator_malloc(iree_allocator_system(),
+ sizeof(iree_program_state_t),
+ (void**)&program_state);
+
+ iree_runtime_session_options_t session_options;
+ iree_runtime_session_options_initialize(&session_options);
+ if (iree_status_is_ok(status)) {
+ status = iree_runtime_session_create_with_device(
+ sample_state->instance, &session_options, sample_state->device,
+ iree_runtime_instance_host_allocator(sample_state->instance),
+ &program_state->session);
+ }
+
+ if (iree_status_is_ok(status)) {
+ // Take ownership of the FlatBuffer data so JavaScript doesn't need to
+ // explicitly call `Module._free()`.
+ status = iree_vm_bytecode_module_create(
+ iree_runtime_instance_vm_instance(sample_state->instance),
+ iree_make_const_byte_span(vmfb_data, length),
+ /*flatbuffer_allocator=*/iree_allocator_system(),
+ iree_allocator_system(), &program_state->module);
+ } else {
+ // Must clean up the FlatBuffer data directly.
+ iree_allocator_free(iree_allocator_system(), (void*)vmfb_data);
+ }
+
+ if (iree_status_is_ok(status)) {
+ status = iree_runtime_session_append_module(program_state->session,
+ program_state->module);
+ }
+
+ if (!iree_status_is_ok(status)) {
+ iree_status_fprint(stderr, status);
+ iree_status_free(status);
+ unload_program(program_state);
+ return NULL;
+ }
+
+ return program_state;
+}
+
+void inspect_program(iree_program_state_t* program_state) {
+ fprintf(stdout, "=== program properties ===\n");
+
+ iree_vm_module_t* module = program_state->module;
+ iree_string_view_t module_name = iree_vm_module_name(module);
+ fprintf(stdout, " module name: '%.*s'\n", (int)module_name.size,
+ module_name.data);
+
+ iree_vm_module_signature_t module_signature =
+ iree_vm_module_signature(module);
+ fprintf(stdout, " module signature:\n");
+ fprintf(stdout, " %" PRIhsz " imported functions\n",
+ module_signature.import_function_count);
+ fprintf(stdout, " %" PRIhsz " exported functions\n",
+ module_signature.export_function_count);
+ fprintf(stdout, " %" PRIhsz " internal functions\n",
+ module_signature.internal_function_count);
+
+ fprintf(stdout, " exported functions:\n");
+ for (iree_host_size_t i = 0; i < module_signature.export_function_count;
+ ++i) {
+ iree_vm_function_t function;
+ iree_status_t status = iree_vm_module_lookup_function_by_ordinal(
+ module, IREE_VM_FUNCTION_LINKAGE_EXPORT, i, &function);
+ if (!iree_status_is_ok(status)) {
+ iree_status_fprint(stderr, status);
+ iree_status_free(status);
+ continue;
+ }
+
+ iree_string_view_t function_name = iree_vm_function_name(&function);
+ iree_vm_function_signature_t function_signature =
+ iree_vm_function_signature(&function);
+ iree_string_view_t calling_convention =
+ function_signature.calling_convention;
+ fprintf(stdout, " function name: '%.*s', calling convention: %.*s'\n",
+ (int)function_name.size, function_name.data,
+ (int)calling_convention.size, calling_convention.data);
+ }
+}
+
+void unload_program(iree_program_state_t* program_state) {
+ iree_vm_module_release(program_state->module);
+ iree_runtime_session_release(program_state->session);
+ free(program_state);
+}
+
+static iree_status_t parse_input_into_call(
+ iree_runtime_call_t* call, iree_hal_allocator_t* device_allocator,
+ iree_string_view_t input) {
+ bool has_equal =
+ iree_string_view_find_char(input, '=', 0) != IREE_STRING_VIEW_NPOS;
+ bool has_x =
+ iree_string_view_find_char(input, 'x', 0) != IREE_STRING_VIEW_NPOS;
+ if (has_equal || has_x) {
+ // Buffer view (either just a shape or a shape=value) or buffer.
+ bool is_storage_reference =
+ iree_string_view_consume_prefix(&input, iree_make_cstring_view("&"));
+ iree_hal_buffer_view_t* buffer_view = NULL;
+ IREE_RETURN_IF_ERROR(
+ iree_hal_buffer_view_parse(input, device_allocator, &buffer_view),
+ "parsing value '%.*s'", (int)input.size, input.data);
+ if (is_storage_reference) {
+ // Storage buffer reference; just take the storage for the buffer view -
+ // it'll still have whatever contents were specified (or 0) but we'll
+ // discard the metadata.
+ iree_vm_ref_t buffer_ref =
+ iree_hal_buffer_retain_ref(iree_hal_buffer_view_buffer(buffer_view));
+ iree_hal_buffer_view_release(buffer_view);
+ return iree_vm_list_push_ref_move(call->inputs, &buffer_ref);
+ } else {
+ iree_vm_ref_t buffer_view_ref =
+ iree_hal_buffer_view_move_ref(buffer_view);
+ return iree_vm_list_push_ref_move(call->inputs, &buffer_view_ref);
+ }
+ } else {
+ // Scalar.
+ bool has_dot =
+ iree_string_view_find_char(input, '.', 0) != IREE_STRING_VIEW_NPOS;
+ iree_vm_value_t val;
+ if (has_dot) {
+ // Float.
+ val = iree_vm_value_make_f32(0.0f);
+ if (!iree_string_view_atof(input, &val.f32)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "parsing value '%.*s' as f32", (int)input.size,
+ input.data);
+ }
+ } else {
+ // Integer.
+ val = iree_vm_value_make_i32(0);
+ if (!iree_string_view_atoi_int32(input, &val.i32)) {
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "parsing value '%.*s' as i32", (int)input.size,
+ input.data);
+ }
+ }
+ return iree_vm_list_push_value(call->inputs, &val);
+ }
+
+ return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+ "Unhandled function input (unreachable?)");
+}
+
+static iree_status_t parse_inputs_into_call(
+ iree_runtime_call_t* call, iree_hal_allocator_t* device_allocator,
+ iree_string_view_t inputs) {
+ if (inputs.size == 0) return iree_ok_status();
+
+ // Inputs are provided in a semicolon-delimited list.
+ // Split inputs from the list until no semicolons are left.
+ iree_string_view_t remaining_inputs = inputs;
+ intptr_t split_index = 0;
+ do {
+ iree_string_view_t next_input;
+ split_index = iree_string_view_split(remaining_inputs, ';', &next_input,
+ &remaining_inputs);
+ IREE_RETURN_IF_ERROR(
+ parse_input_into_call(call, device_allocator, next_input));
+ } while (split_index != -1);
+
+ return iree_ok_status();
+}
+
+typedef struct iree_buffer_map_userdata_t {
+ iree_hal_buffer_view_t* source_buffer_view;
+ iree_hal_buffer_t* readback_buffer;
+} iree_buffer_map_userdata_t;
+
+static void iree_webgpu_mapped_buffer_release(void* user_data,
+ iree_hal_buffer_t* buffer) {
+ WGPUBuffer buffer_handle = (WGPUBuffer)user_data;
+ wgpuBufferUnmap(buffer_handle);
+}
+
+// TODO(scotttodd): move async mapping into webgpu/buffer.h/.c?
+static void buffer_map_sync_callback(WGPUBufferMapAsyncStatus map_status,
+ void* userdata_ptr) {
+ iree_buffer_map_userdata_t* userdata =
+ (iree_buffer_map_userdata_t*)userdata_ptr;
+ switch (map_status) {
+ case WGPUBufferMapAsyncStatus_Success:
+ break;
+ case WGPUBufferMapAsyncStatus_Error:
+ fprintf(stderr, " buffer_map_sync_callback status: Error\n");
+ break;
+ case WGPUBufferMapAsyncStatus_DeviceLost:
+ fprintf(stderr, " buffer_map_sync_callback status: DeviceLost\n");
+ break;
+ case WGPUBufferMapAsyncStatus_Unknown:
+ default:
+ fprintf(stderr, " buffer_map_sync_callback status: Unknown\n");
+ break;
+ }
+
+ if (map_status != WGPUBufferMapAsyncStatus_Success) {
+ iree_hal_buffer_view_release(userdata->source_buffer_view);
+ iree_hal_buffer_release(userdata->readback_buffer);
+ iree_allocator_free(iree_allocator_system(), userdata);
+ return;
+ }
+
+ iree_status_t status = iree_ok_status();
+
+ // TODO(scotttodd): bubble result(s) up to the caller (async + callback API)
+
+ iree_device_size_t data_offset = iree_hal_buffer_byte_offset(
+ iree_hal_buffer_view_buffer(userdata->source_buffer_view));
+ iree_device_size_t data_length =
+ iree_hal_buffer_view_byte_length(userdata->source_buffer_view);
+ WGPUBuffer buffer_handle =
+ iree_hal_webgpu_buffer_handle(userdata->readback_buffer);
+
+ // For this sample we want to print arbitrary buffers, which is easiest
+ // using the |iree_hal_buffer_view_format| function. Internally, that
+ // function requires synchronous buffer mapping, so we'll first wrap the
+ // already (async) mapped GPU memory into a heap buffer. In a less general
+ // application (or one not requiring pretty logging like this), we could
+ // skip a few buffer copies and other data transformations here.
+
+ const void* data_ptr =
+ wgpuBufferGetConstMappedRange(buffer_handle, data_offset, data_length);
+
+ iree_hal_buffer_t* heap_buffer = NULL;
+ if (iree_status_is_ok(status)) {
+ // The buffer we get from WebGPU may not be aligned to 64.
+ iree_hal_memory_access_t memory_access =
+ IREE_HAL_MEMORY_ACCESS_READ | IREE_HAL_MEMORY_ACCESS_UNALIGNED;
+ status = iree_hal_heap_buffer_wrap(
+ userdata->readback_buffer->device_allocator,
+ IREE_HAL_MEMORY_TYPE_HOST_LOCAL, memory_access,
+ IREE_HAL_BUFFER_USAGE_MAPPING, data_length,
+ iree_make_byte_span((void*)data_ptr, data_length),
+ (iree_hal_buffer_release_callback_t){
+ .fn = iree_webgpu_mapped_buffer_release,
+ .user_data = buffer_handle,
+ },
+ &heap_buffer);
+ }
+
+ // Copy the original buffer_view, backed by the mapped heap buffer instead.
+ iree_hal_buffer_view_t* heap_buffer_view = NULL;
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_buffer_view_create_like(
+ heap_buffer, userdata->source_buffer_view, iree_allocator_system(),
+ &heap_buffer_view);
+ }
+
+ if (iree_status_is_ok(status)) {
+ fprintf(stdout, "Call output:\n");
+ status = iree_hal_buffer_view_fprint(stdout, heap_buffer_view,
+ /*max_element_count=*/4096,
+ iree_allocator_system());
+ fprintf(stdout, "\n");
+ }
+ iree_hal_buffer_view_release(heap_buffer_view);
+ iree_hal_buffer_release(heap_buffer);
+
+ if (!iree_status_is_ok(status)) {
+ fprintf(stderr, "buffer_map_sync_callback error:\n");
+ iree_status_fprint(stderr, status);
+ iree_status_free(status);
+ }
+
+ iree_hal_buffer_view_release(userdata->source_buffer_view);
+ iree_hal_buffer_release(userdata->readback_buffer);
+ iree_allocator_free(iree_allocator_system(), userdata);
+}
+
+static iree_status_t print_buffer_view(iree_hal_device_t* device,
+ iree_hal_buffer_view_t* buffer_view) {
+ iree_status_t status = iree_ok_status();
+
+ iree_hal_buffer_t* buffer = iree_hal_buffer_view_buffer(buffer_view);
+ iree_device_size_t data_offset = iree_hal_buffer_byte_offset(buffer);
+ iree_device_size_t data_length =
+ iree_hal_buffer_view_byte_length(buffer_view);
+
+ // ----------------------------------------------
+ // Allocate mappable host memory.
+ // Note: iree_hal_webgpu_simple_allocator_allocate_buffer only supports
+ // CopySrc today, so we'll create the buffer directly with
+ // wgpuDeviceCreateBuffer and then wrap it using iree_hal_webgpu_buffer_wrap.
+ WGPUBufferDescriptor descriptor = {
+ .nextInChain = NULL,
+ .label = "IREE_readback",
+ .usage = WGPUBufferUsage_MapRead | WGPUBufferUsage_CopyDst,
+ .size = data_length,
+ .mappedAtCreation = false,
+ };
+ WGPUBuffer readback_buffer_handle = NULL;
+ if (iree_status_is_ok(status)) {
+ readback_buffer_handle = wgpuDeviceCreateBuffer(
+ iree_hal_webgpu_device_handle(device), &descriptor);
+ if (!readback_buffer_handle) {
+ status = iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
+ "unable to allocate buffer of size %" PRIdsz,
+ data_length);
+ }
+ }
+ iree_device_size_t target_offset = 0;
+ const iree_hal_buffer_params_t target_params = {
+ .usage = IREE_HAL_BUFFER_USAGE_TRANSFER | IREE_HAL_BUFFER_USAGE_MAPPING,
+ .type =
+ IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE,
+ .access = IREE_HAL_MEMORY_ACCESS_ALL,
+ };
+ iree_hal_buffer_t* readback_buffer = NULL;
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_webgpu_buffer_wrap(
+ device, iree_hal_device_allocator(device), target_params.type,
+ target_params.access, target_params.usage, data_length,
+ /*byte_offset=*/0,
+ /*byte_length=*/data_length, readback_buffer_handle,
+ iree_allocator_system(), &readback_buffer);
+ }
+ // ----------------------------------------------
+
+ // ----------------------------------------------
+ // Transfer from device memory to mappable host memory.
+ const iree_hal_transfer_command_t transfer_command = {
+ .type = IREE_HAL_TRANSFER_COMMAND_TYPE_COPY,
+ .copy =
+ {
+ .source_buffer = buffer,
+ .source_offset = data_offset,
+ .target_buffer = readback_buffer,
+ .target_offset = target_offset,
+ .length = data_length,
+ },
+ };
+ iree_hal_command_buffer_t* command_buffer = NULL;
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_create_transfer_command_buffer(
+ device, IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT,
+ IREE_HAL_QUEUE_AFFINITY_ANY, /*transfer_count=*/1, &transfer_command,
+ &command_buffer);
+ }
+ iree_hal_semaphore_t* fence_semaphore = NULL;
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_semaphore_create(device, 0ull, &fence_semaphore);
+ }
+ uint64_t signal_value = 1ull;
+ if (iree_status_is_ok(status)) {
+ iree_hal_semaphore_list_t signal_semaphores = {
+ .count = 1,
+ .semaphores = &fence_semaphore,
+ .payload_values = &signal_value,
+ };
+ status = iree_hal_device_queue_execute(
+ device, IREE_HAL_QUEUE_AFFINITY_ANY, iree_hal_semaphore_list_empty(),
+ signal_semaphores, 1, &command_buffer);
+ }
+ // TODO(scotttodd): Make this async - pass a wait source to iree_loop_wait_one
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_semaphore_wait(fence_semaphore, signal_value,
+ iree_infinite_timeout());
+ }
+ iree_hal_command_buffer_release(command_buffer);
+ iree_hal_semaphore_release(fence_semaphore);
+ // ----------------------------------------------
+
+ iree_buffer_map_userdata_t* userdata = NULL;
+ if (iree_status_is_ok(status)) {
+ status = iree_allocator_malloc(iree_allocator_system(),
+ sizeof(iree_buffer_map_userdata_t),
+ (void**)&userdata);
+ iree_hal_buffer_view_retain(buffer_view); // Released in the callback.
+ userdata->source_buffer_view = buffer_view;
+ userdata->readback_buffer = readback_buffer;
+ }
+
+ if (iree_status_is_ok(status)) {
+ wgpuBufferMapAsync(readback_buffer_handle, WGPUMapMode_Read, /*offset=*/0,
+ /*size=*/data_length, buffer_map_sync_callback,
+ /*userdata=*/userdata);
+ }
+
+ return status;
+}
+
+static iree_status_t print_outputs_from_call(
+ iree_runtime_call_t* call, iree_string_builder_t* outputs_builder) {
+ iree_vm_list_t* variants_list = iree_runtime_call_outputs(call);
+ for (iree_host_size_t i = 0; i < iree_vm_list_size(variants_list); ++i) {
+ iree_vm_variant_t variant = iree_vm_variant_empty();
+ IREE_RETURN_IF_ERROR(
+ iree_vm_list_get_variant_assign(variants_list, i, &variant),
+ "variant %" PRIhsz " not present", i);
+
+ if (iree_vm_variant_is_value(variant)) {
+ switch (iree_vm_type_def_as_value(variant.type)) {
+ case IREE_VM_VALUE_TYPE_I8: {
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ outputs_builder, "i8=%" PRIi8, variant.i8));
+ break;
+ }
+ case IREE_VM_VALUE_TYPE_I16: {
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ outputs_builder, "i16=%" PRIi16, variant.i16));
+ break;
+ }
+ case IREE_VM_VALUE_TYPE_I32: {
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ outputs_builder, "i32=%" PRIi32, variant.i32));
+ break;
+ }
+ case IREE_VM_VALUE_TYPE_I64: {
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ outputs_builder, "i64=%" PRIi64, variant.i64));
+ break;
+ }
+ case IREE_VM_VALUE_TYPE_F32: {
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ outputs_builder, "f32=%f", variant.f32));
+ break;
+ }
+ case IREE_VM_VALUE_TYPE_F64: {
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+ outputs_builder, "f64=%lf", variant.f64));
+ break;
+ }
+ default: {
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(outputs_builder, "?"));
+ break;
+ }
+ }
+ } else if (iree_vm_variant_is_ref(variant)) {
+ if (iree_hal_buffer_view_isa(variant.ref)) {
+ iree_hal_buffer_view_t* buffer_view =
+ iree_hal_buffer_view_deref(variant.ref);
+ // TODO(scotttodd): join async outputs together and return to caller
+ iree_hal_device_t* device = iree_runtime_session_device(call->session);
+ IREE_RETURN_IF_ERROR(print_buffer_view(device, buffer_view));
+ } else {
+ IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(
+ outputs_builder, "(no printer)"));
+ }
+ } else {
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(outputs_builder, "(null)"));
+ }
+
+ if (i < iree_vm_list_size(variants_list) - 1) {
+ IREE_RETURN_IF_ERROR(
+ iree_string_builder_append_cstring(outputs_builder, ";"));
+ }
+ }
+
+ iree_vm_list_resize(variants_list, 0);
+
+ return iree_ok_status();
+}
+
+iree_status_t invoke_callback(void* user_data, iree_loop_t loop,
+ iree_status_t status, iree_vm_list_t* outputs) {
+ iree_vm_async_invoke_state_t* invoke_state =
+ (iree_vm_async_invoke_state_t*)user_data;
+
+ if (!iree_status_is_ok(status)) {
+ fprintf(stderr, "iree_vm_async_invoke_callback_fn_t error:\n");
+ iree_status_fprint(stderr, status);
+ iree_status_free(status);
+ }
+
+ iree_vm_list_release(outputs);
+
+ iree_allocator_free(iree_allocator_system(), (void*)invoke_state);
+ return iree_ok_status();
+}
+
+const char* call_function(iree_program_state_t* program_state,
+ const char* function_name, const char* inputs,
+ int iterations) {
+ iree_status_t status = iree_ok_status();
+
+ // Fully qualify the function name. This sample only supports loading one
+ // module (i.e. 'program') per session, so we can do this.
+ iree_string_builder_t name_builder;
+ iree_string_builder_initialize(iree_allocator_system(), &name_builder);
+ if (iree_status_is_ok(status)) {
+ iree_string_view_t module_name = iree_vm_module_name(program_state->module);
+ status = iree_string_builder_append_format(&name_builder, "%.*s.%s",
+ (int)module_name.size,
+ module_name.data, function_name);
+ }
+
+ iree_runtime_call_t call;
+ if (iree_status_is_ok(status)) {
+ status = iree_runtime_call_initialize_by_name(
+ program_state->session, iree_string_builder_view(&name_builder), &call);
+ }
+ iree_string_builder_deinitialize(&name_builder);
+
+ if (iree_status_is_ok(status)) {
+ status = parse_inputs_into_call(
+ &call, iree_runtime_session_device_allocator(program_state->session),
+ iree_make_cstring_view(inputs));
+ }
+
+ // Note: Timing has ~millisecond precision on the web to mitigate timing /
+ // side-channel security threats.
+ // https://developer.mozilla.org/en-US/docs/Web/API/Performance/now#reduced_time_precision
+ iree_time_t start_time = iree_time_now();
+
+ // TODO(scotttodd): benchmark iterations (somehow with async)
+
+ iree_vm_async_invoke_state_t* invoke_state = NULL;
+ if (iree_status_is_ok(status)) {
+ status = iree_allocator_malloc(iree_allocator_system(),
+ sizeof(iree_vm_async_invoke_state_t),
+ (void**)&invoke_state);
+ }
+ // TODO(scotttodd): emscripten / browser loop here
+ iree_status_t loop_status = iree_ok_status();
+ iree_loop_t loop = iree_loop_inline(&loop_status);
+ if (iree_status_is_ok(status)) {
+ iree_vm_context_t* vm_context = iree_runtime_session_context(call.session);
+ iree_vm_function_t vm_function = call.function;
+ iree_vm_list_t* inputs = call.inputs;
+ iree_vm_list_t* outputs = call.outputs;
+
+ status = iree_vm_async_invoke(loop, invoke_state, vm_context, vm_function,
+ IREE_VM_INVOCATION_FLAG_NONE, /*policy=*/NULL,
+ inputs, outputs, iree_allocator_system(),
+ invoke_callback,
+ /*user_data=*/invoke_state);
+ }
+
+ // TODO(scotttodd): record end time in async callback instead of here
+ // TODO(scotttodd): print outputs in async callback instead of here
+
+ iree_time_t end_time = iree_time_now();
+ iree_time_t time_elapsed = end_time - start_time;
+
+ iree_string_builder_t outputs_builder;
+ iree_string_builder_initialize(iree_allocator_system(), &outputs_builder);
+
+ // Output a JSON object as a string:
+ // {
+ // "total_invoke_time_ms": [number],
+ // "outputs": [semicolon delimited list of formatted outputs]
+ // }
+ if (iree_status_is_ok(status)) {
+ status = iree_string_builder_append_format(
+ &outputs_builder,
+ "{ \"total_invoke_time_ms\": %" PRId64 ", \"outputs\": \"",
+ time_elapsed / 1000000);
+ }
+ if (iree_status_is_ok(status)) {
+ status = print_outputs_from_call(&call, &outputs_builder);
+ }
+ if (iree_status_is_ok(status)) {
+ status = iree_string_builder_append_cstring(&outputs_builder, "\"}");
+ }
+
+ if (!iree_status_is_ok(status)) {
+ iree_string_builder_deinitialize(&outputs_builder);
+ iree_status_fprint(stderr, status);
+ iree_status_free(status);
+ return "";
+ }
+
+ // Note: this leaks the buffer. It's up to the caller to free it after use.
+ char* outputs_string = strdup(iree_string_builder_buffer(&outputs_builder));
+ iree_string_builder_deinitialize(&outputs_builder);
+ return outputs_string;
+}
diff --git a/experimental/web/sample_webgpu/serve_sample.sh b/experimental/web/sample_webgpu/serve_sample.sh
new file mode 100755
index 0000000..7f43439
--- /dev/null
+++ b/experimental/web/sample_webgpu/serve_sample.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+# Copyright 2022 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+ROOT_DIR=$(git rev-parse --show-toplevel)
+BUILD_DIR=${ROOT_DIR?}/build-emscripten
+BINARY_DIR=${BUILD_DIR}/experimental/web/sample_webgpu
+
+echo "=== Running local webserver, open at http://localhost:8000/ ==="
+
+python3 ${ROOT_DIR?}/build_tools/scripts/local_web_server.py --directory ${BINARY_DIR}
diff --git a/experimental/web/testing/build_tests.sh b/experimental/web/testing/build_tests.sh
old mode 100644
new mode 100755
index 692804d..280f89d
--- a/experimental/web/testing/build_tests.sh
+++ b/experimental/web/testing/build_tests.sh
@@ -53,12 +53,16 @@
-DIREE_HOST_BIN_DIR="${INSTALL_ROOT}/bin" \
-DIREE_BUILD_COMPILER=OFF \
-DIREE_HAL_DRIVER_DEFAULTS=OFF \
+ -DIREE_EXTERNAL_HAL_DRIVERS=webgpu \
-DIREE_HAL_DRIVER_LOCAL_SYNC=ON \
-DIREE_HAL_DRIVER_LOCAL_TASK=ON \
-DIREE_HAL_EXECUTABLE_LOADER_DEFAULTS=OFF \
-DIREE_HAL_EXECUTABLE_LOADER_VMVX_MODULE=ON \
-DIREE_HAL_EXECUTABLE_PLUGIN_DEFAULTS=OFF \
+ -DIREE_HAL_EXECUTABLE_LOADER_SYSTEM_LIBRARY=ON \
+ -DIREE_BUILD_EXPERIMENTAL_WEB_SAMPLES=OFF \
-DIREE_BUILD_SAMPLES=OFF \
+ -DIREE_ENABLE_THREADING=ON \
-DIREE_ENABLE_CPUINFO=OFF \
-DIREE_ENABLE_ASAN=OFF \
-DIREE_BUILD_TESTS=ON
diff --git a/experimental/web/testing/serve_tests.sh b/experimental/web/testing/serve_tests.sh
old mode 100644
new mode 100755