Add WebGPU sample application and update other web demos.

This is still rough around the edges, but it demonstrates usage and will be used for development
  * Build script compiles programs with `--iree-hal-target-backends=webgpu`
  * Build script configures CMake with `-DIREE_EXTERNAL_HAL_DRIVERS=webgpu`
  * Code shows how to interface between runtime C code, browser JavaScript APIs, and Emscripten
diff --git a/build_tools/cmake/iree_copts.cmake b/build_tools/cmake/iree_copts.cmake
index 61f92c2..0687050 100644
--- a/build_tools/cmake/iree_copts.cmake
+++ b/build_tools/cmake/iree_copts.cmake
@@ -378,13 +378,17 @@
     "-natvis:${IREE_ROOT_DIR}/runtime/iree.natvis"
 )
 
-# Our Emscripten library code uses dynCall, which needs these link flags.
-# TODO(scotttodd): Find a way to refactor this, this is nasty to always set :(
-if(EMSCRIPTEN)
+if(EMSCRIPTEN AND IREE_EXTERNAL_WEBGPU_HAL_DRIVER_FOUND)
   iree_select_compiler_opts(IREE_DEFAULT_LINKOPTS
     ALL
-      "-sDYNCALLS=1"
-      "-sEXPORTED_RUNTIME_METHODS=['dynCall']"
+      # TODO(scotttodd): Only add when using WebGPU in a library/binary?
+      "-sUSE_WEBGPU"
+      # Hack: Used to create sync versions of requestAdapter and requestDevice
+      # TODO(scotttodd): Only set for test binaries, avoid sync code in apps
+      #   this doesn't _break_ apps that don't use the sync functions, but it
+      #   does bloat their binary size (and each Emscripten flag comes with
+      #   some risk of breaking compatibility with other features)
+      "-sASYNCIFY"
   )
 endif()
 
diff --git a/experimental/web/sample_dynamic/CMakeLists.txt b/experimental/web/sample_dynamic/CMakeLists.txt
index cc4acd0..8577022 100644
--- a/experimental/web/sample_dynamic/CMakeLists.txt
+++ b/experimental/web/sample_dynamic/CMakeLists.txt
@@ -38,7 +38,7 @@
 target_link_options(${_NAME} PRIVATE
   # https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html#interacting-with-code-ccall-cwrap
   "-sEXPORTED_FUNCTIONS=['_setup_sample', '_cleanup_sample', '_load_program', '_inspect_program', '_unload_program', '_call_function', '_malloc']"
-  "-sEXPORTED_RUNTIME_METHODS=['ccall','cwrap']"
+  "-sEXPORTED_RUNTIME_METHODS=['ccall','cwrap','UTF8ToString']"
   #
   "-sASSERTIONS=1"
   #
diff --git a/experimental/web/sample_dynamic/build_sample.sh b/experimental/web/sample_dynamic/build_sample.sh
index f826200..0b0deb7 100755
--- a/experimental/web/sample_dynamic/build_sample.sh
+++ b/experimental/web/sample_dynamic/build_sample.sh
@@ -91,6 +91,7 @@
   -DIREE_BUILD_EXPERIMENTAL_WEB_SAMPLES=ON \
   -DIREE_HAL_DRIVER_DEFAULTS=OFF \
   -DIREE_HAL_DRIVER_LOCAL_SYNC=ON \
+  -UIREE_EXTERNAL_HAL_DRIVERS \
   -DIREE_BUILD_COMPILER=OFF \
   -DIREE_BUILD_TESTS=OFF \
   .
diff --git a/experimental/web/sample_dynamic/iree_worker.js b/experimental/web/sample_dynamic/iree_worker.js
index 3a3c862..bb08616 100644
--- a/experimental/web/sample_dynamic/iree_worker.js
+++ b/experimental/web/sample_dynamic/iree_worker.js
@@ -32,7 +32,7 @@
     wasmInspectProgramFn = Module.cwrap('inspect_program', null, ['number']);
     wasmUnloadProgramFn = Module.cwrap('unload_program', null, ['number']);
     wasmCallFunctionFn = Module.cwrap(
-        'call_function', 'string', ['number', 'string', 'string', 'number']);
+        'call_function', 'number', ['number', 'string', 'string', 'number']);
 
     sampleState = wasmSetupSampleFn();
 
@@ -120,8 +120,11 @@
     return;
   }
 
-  const returnValue =
+  // Receive as a pointer, convert, then free. This avoids a memory leak, see
+  // https://github.com/emscripten-core/emscripten/issues/6484
+  const returnValuePtr =
       wasmCallFunctionFn(programState, functionName, inputsJoined, iterations);
+  const returnValue = Module.UTF8ToString(returnValuePtr);
 
   if (returnValue === '') {
     postMessage({
@@ -130,16 +133,12 @@
       'error': 'Wasm module error, check console for details',
     });
   } else {
+    Module._free(returnValuePtr);
     postMessage({
       'messageType': 'callResult',
       'id': id,
       'payload': JSON.parse(returnValue),
     });
-    // TODO(scotttodd): free char* buffer? Or does Emscripten handle that?
-    // Could refactor to
-    //   1) return void*
-    //   2) convert to String manually using UTF8ToString(pointer)
-    //   3) Module._free(pointer)
   }
 }
 
diff --git a/experimental/web/sample_dynamic/main.c b/experimental/web/sample_dynamic/main.c
index 0daf821..4f7d2d5 100644
--- a/experimental/web/sample_dynamic/main.c
+++ b/experimental/web/sample_dynamic/main.c
@@ -6,6 +6,7 @@
 
 #include <stdint.h>
 #include <stdio.h>
+#include <string.h>
 
 #include "iree/base/api.h"
 #include "iree/hal/api.h"
@@ -436,5 +437,7 @@
   }
 
   // Note: this leaks the buffer. It's up to the caller to free it after use.
-  return iree_string_builder_buffer(&outputs_builder);
+  char* outputs = strdup(iree_string_builder_buffer(&outputs_builder));
+  iree_string_builder_deinitialize(&outputs_builder);
+  return outputs;
 }
diff --git a/experimental/web/sample_webgpu/CMakeLists.txt b/experimental/web/sample_webgpu/CMakeLists.txt
new file mode 100644
index 0000000..d26c00a
--- /dev/null
+++ b/experimental/web/sample_webgpu/CMakeLists.txt
@@ -0,0 +1,49 @@
+# Copyright 2022 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+if(NOT EMSCRIPTEN)
+  return()
+endif()
+
+set(_NAME "iree_experimental_web_sample_webgpu")
+add_executable(${_NAME} "")
+target_sources(${_NAME}
+  PRIVATE
+    main.c
+    device_webgpu.c
+)
+set_target_properties(${_NAME} PROPERTIES OUTPUT_NAME "web-sample-webgpu")
+
+target_compile_options(${_NAME} PRIVATE ${IREE_DEFAULT_COPTS})
+
+# Note: we have to be very careful about dependencies here.
+#
+# The general purpose libraries link in multiple executable loaders and HAL
+# drivers/devices, which include code not compatible with Emscripten.
+target_link_libraries(${_NAME}
+  iree_runtime_runtime
+  iree_experimental_webgpu_webgpu
+  iree_experimental_webgpu_platform_emscripten_emscripten
+)
+
+target_link_options(${_NAME} PRIVATE
+  # https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html#interacting-with-code-ccall-cwrap
+  "-sEXPORTED_FUNCTIONS=['_setup_sample', '_cleanup_sample', '_load_program', '_inspect_program', '_unload_program', '_call_function', '_malloc', '_free']"
+  "-sEXPORTED_RUNTIME_METHODS=['ccall','cwrap','UTF8ToString']"
+  #
+  "-sASSERTIONS=1"
+  #
+  # Programs loaded dynamically can require additional memory, so allow growth.
+  "-sALLOW_MEMORY_GROWTH"
+  #
+  # For https://emscripten.org/docs/debugging/Sanitizers.html#address-sanitizer
+  # "-fsanitize=address"
+  # "-sALLOW_MEMORY_GROWTH"
+  #
+  # https://developer.chrome.com/blog/wasm-debugging-2020/
+  "-g"
+  "-gseparate-dwarf"
+)
diff --git a/experimental/web/sample_webgpu/README.md b/experimental/web/sample_webgpu/README.md
new file mode 100644
index 0000000..43eccf4
--- /dev/null
+++ b/experimental/web/sample_webgpu/README.md
@@ -0,0 +1,42 @@
+# WebGPU Sample
+
+This experimental sample demonstrates one way to target the web platform with
+IREE, using WebGPU. The output artifact is a web page that loads separately
+provided IREE `.vmfb` (compiled ML model) files and allows for calling
+functions on them.
+
+## Quickstart
+
+**Note**: you will need a WebGPU-compatible browser. Chrome Canary with the
+`#enable-unsafe-webgpu` flag is a good choice (you may need the flag or an
+origin trial token for `localhost`).
+
+1. Install IREE's host tools (e.g. by building the `install` target with CMake)
+2. Install the Emscripten SDK by
+   [following these directions](https://emscripten.org/docs/getting_started/downloads.html)
+3. Initialize your Emscripten environment (e.g. run `emsdk_env.bat`)
+4. From this directory, run `bash ./build_sample.sh [path to install] && bash ./serve_sample.sh`
+5. Open the localhost address linked in the script output
+
+To rebuild most parts of the sample (C runtime, sample HTML, CMake config,
+etc.), just `control + C` to stop the local webserver and rerun the script.
+
+## How it works
+
+[Emscripten](https://emscripten.org/) is used (via the `emcmake` CMake wrapper)
+to compile the runtime into WebAssembly and JavaScript files.
+
+Any supported IREE program, such as
+[simple_abs.mlir](../../../samples/models/simple_abs.mlir), is compiled using
+the WebGPU compiler target. This generates WGSL shader code and IREE VM
+bytecode, which the IREE runtime is able to load and run using the browser's
+WebGPU APIs.
+
+### Asynchronous API
+
+[`iree_api_webgpu.js`](./iree_api_webgpu.js)
+
+* exposes a Promise-based API to the hosting application in
+  [`index.html`](./index.html)
+* preinitializes a WebGPU adapter and device
+* includes Emscripten's JS code and instantiates the WebAssembly module
diff --git a/experimental/web/sample_webgpu/build_sample.sh b/experimental/web/sample_webgpu/build_sample.sh
new file mode 100755
index 0000000..9360374
--- /dev/null
+++ b/experimental/web/sample_webgpu/build_sample.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+# Copyright 2022 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# Builds the sample, running host tools, Emscripten, and CMake as needed.
+#
+# Prerequisites:
+#   * Environment must be configured for Emscripten
+#   * Host tools must be built (default at IREE_SOURCE_DIR/build-host/install).
+#     The build_tools/cmake/build_host_tools.sh script can do this for you.
+#
+# Usage:
+#   build_sample.sh (optional install path) && serve_sample.sh
+
+set -e
+
+###############################################################################
+# Setup and checking for dependencies                                         #
+###############################################################################
+
+if ! command -v emcmake &> /dev/null
+then
+  echo "'emcmake' not found, setup environment according to https://emscripten.org/docs/getting_started/downloads.html"
+  exit 1
+fi
+
+CMAKE_BIN=${CMAKE_BIN:-$(which cmake)}
+ROOT_DIR=$(git rev-parse --show-toplevel)
+SOURCE_DIR=${ROOT_DIR}/experimental/web/sample_webgpu
+
+BUILD_DIR=${ROOT_DIR?}/build-emscripten
+mkdir -p ${BUILD_DIR}
+
+BINARY_DIR=${BUILD_DIR}/experimental/web/sample_webgpu
+mkdir -p ${BINARY_DIR}
+
+INSTALL_ROOT="${1:-${ROOT_DIR}/build-host/install}"
+
+###############################################################################
+# Compile from .mlir input to portable .vmfb file using host tools            #
+###############################################################################
+
+echo "=== Compiling sample MLIR files to VM FlatBuffer outputs (.vmfb) ==="
+COMPILE_TOOL="${INSTALL_ROOT?}/bin/iree-compile"
+
+# TODO(#11321): Enable iree-codegen-gpu-native-math-precision by default?
+compile_sample() {
+  echo "  Compiling '$1' sample for WebGPU..."
+  ${COMPILE_TOOL?} $3 \
+    --iree-input-type=$2 \
+    --iree-hal-target-backends=webgpu \
+    --iree-codegen-gpu-native-math-precision=true \
+    --o ${BINARY_DIR}/$1_webgpu.vmfb
+}
+
+compile_sample "simple_abs"     "none" "${ROOT_DIR?}/samples/models/simple_abs.mlir"
+compile_sample "fullyconnected" "mhlo" "${ROOT_DIR?}/tests/e2e/models/fullyconnected.mlir"
+
+# Does not run yet (uses internal readback, which needs async buffer mapping?)
+# compile_sample "collatz"        "${ROOT_DIR?}/tests/e2e/models/collatz.mlir"
+
+# Slow, so just run on demand
+# compile_sample "mobilebert" "tosa" "D:/dev/projects/iree-data/models/2022_10_28/mobilebertsquad.tflite.mlir"
+# compile_sample "posenet"    "tosa" "D:/dev/projects/iree-data/models/2022_10_28/posenet.tflite.mlir"
+# compile_sample "mobilessd"  "tosa" "D:/dev/projects/iree-data/models/2022_10_28/mobile_ssd_v2_float_coco.tflite.mlir"
+
+###############################################################################
+# Build the web artifacts using Emscripten                                    #
+###############################################################################
+
+echo "=== Building web artifacts using Emscripten ==="
+
+pushd ${BUILD_DIR}
+
+# Configure using Emscripten's CMake wrapper, then build.
+# Note: The sample creates a device directly, so no drivers are required.
+emcmake "${CMAKE_BIN?}" -G Ninja .. \
+  -DCMAKE_BUILD_TYPE=RelWithDebInfo \
+  -DIREE_HOST_BIN_DIR="${INSTALL_ROOT}/bin" \
+  -DIREE_BUILD_EXPERIMENTAL_WEB_SAMPLES=ON \
+  -DIREE_ENABLE_THREADING=OFF \
+  -DIREE_HAL_DRIVER_DEFAULTS=OFF \
+  -DIREE_HAL_DRIVER_LOCAL_SYNC=OFF \
+  -DIREE_HAL_DRIVER_LOCAL_TASK=OFF \
+  -DIREE_EXTERNAL_HAL_DRIVERS=webgpu \
+  -DIREE_ENABLE_ASAN=OFF \
+  -DIREE_BUILD_COMPILER=OFF \
+  -DIREE_BUILD_TESTS=OFF
+
+"${CMAKE_BIN?}" --build . --target \
+  iree_experimental_web_sample_webgpu
+
+popd
+
+echo "=== Copying static files (.html, .js) to the build directory ==="
+
+cp ${SOURCE_DIR?}/index.html ${BINARY_DIR}
+cp "${ROOT_DIR}/docs/website/overrides/.icons/iree/ghost.svg" "${BINARY_DIR}"
+cp ${SOURCE_DIR?}/iree_api_webgpu.js ${BINARY_DIR}
diff --git a/experimental/web/sample_webgpu/device_webgpu.c b/experimental/web/sample_webgpu/device_webgpu.c
new file mode 100644
index 0000000..41b1c8a
--- /dev/null
+++ b/experimental/web/sample_webgpu/device_webgpu.c
@@ -0,0 +1,30 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <emscripten/html5.h>
+#include <emscripten/html5_webgpu.h>
+
+#include "experimental/webgpu/api.h"
+#include "experimental/webgpu/platform/webgpu.h"
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+
+iree_status_t create_device(iree_allocator_t host_allocator,
+                            iree_hal_device_t** out_device) {
+  WGPUDevice wgpu_device = emscripten_webgpu_get_device();
+  if (!wgpu_device) {
+    return iree_make_status(
+        IREE_STATUS_UNAVAILABLE,
+        "emscripten_webgpu_get_device() failed to return a WGPUDevice");
+  }
+
+  iree_hal_webgpu_device_options_t default_options;
+  iree_hal_webgpu_device_options_initialize(&default_options);
+
+  return iree_hal_webgpu_wrap_device(IREE_SV("webgpu-emscripten"),
+                                     &default_options, wgpu_device,
+                                     host_allocator, out_device);
+}
diff --git a/experimental/web/sample_webgpu/index.html b/experimental/web/sample_webgpu/index.html
new file mode 100644
index 0000000..1196e72
--- /dev/null
+++ b/experimental/web/sample_webgpu/index.html
@@ -0,0 +1,392 @@
+<!DOCTYPE html>
+<html>
+
+<!--
+Copyright 2022 The IREE Authors
+
+Licensed under the Apache License v2.0 with LLVM Exceptions.
+See https://llvm.org/LICENSE.txt for license information.
+SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+-->
+
+<head>
+  <meta charset="utf-8" />
+  <title>IREE WebGPU Sample</title>
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <link rel="icon" href="./ghost.svg" type="image/svg+xml">
+
+  <style>
+    body {
+      padding: 16px;
+    }
+
+    .drop-target {
+      border: 3px solid #2244CC;
+      background-color: #c0c0c0;
+      color: #222222;
+      width:  300px;
+      height: 140px;
+      margin: 20px;
+      padding: 8px;
+      display: flex;
+      align-items: center;
+      justify-content: center;
+      user-select: none;
+    }
+
+    .drop-target p {
+      pointer-events: none;
+    }
+  </style>
+
+  <!-- https://getbootstrap.com/ for some webpage styling-->
+  <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-1BmE4kWBq78iYhFldvKuhfTAU6auU8tT94WrHftjDbrCEXSU1oBoqyl2QvZ6jIW3" crossorigin="anonymous">
+  <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/js/bootstrap.bundle.min.js" integrity="sha384-ka7Sk0Gln4gmtz2MlQnikT1wXgYsOg+OMhuP+IlRH9sENBO0LRn5q+8nbTov4+1p" crossorigin="anonymous"></script>
+
+  <script src="./iree_api_webgpu.js"></script>
+</head>
+
+<body>
+  <div class="container">
+    <h1>IREE WebGPU Sample</h1>
+
+    <p>
+      This tool works similarly to
+      <a href="https://github.com/iree-org/iree/blob/main/tools/iree-run-module-main.cc"><code>iree-run-module</code></a>
+      (<a href="https://github.com/iree-org/iree/blob/main/docs/developers/developing_iree/developer_overview.md#iree-run-module">docs</a>).
+      <br>It loads a compiled IREE program then lets you call exported functions.
+      <br><b>Note:</b> Some outputs are logged to the console.</p>
+    </p>
+
+    <h2>1. Load a program</h2>
+
+    <div id="drop-zone" class="drop-target">
+      <p style="margin:0px">Drag a compiled IREE program<br>(.vmfb file) here to load it</p>
+    </div>
+    <p>
+      Currently loaded program:
+      <b><span id="loaded-program-name" style="display: inline;">(None)</span></b>
+    </p>
+
+    <h2>2. Call functions on a loaded program</h2>
+
+    <form>
+      <p>
+        <label for="function-name-input" class="form-label">Function name:</label>
+        <input type="text" id="function-name-input" class="form-control"
+               style="width:400px; font-family: monospace;" value="main"></input>
+      </p>
+
+      <p>
+        <label for="function-arguments-input" class="form-label">Function arguments:</label>
+        <br><span class="form-text">In the form <code>dim1xdim2xtype=val1,val2,...</code>, one per line</span>
+        <textarea type="text" id="function-arguments-input" spellcheck="false" class="form-control"
+                  style="min-width:400px; width:initial; min-height:100px; resize:both; font-family: monospace;"></textarea>
+      </p>
+
+      <p>
+        <label for="benchmark-iterations-input" class="form-label">
+          Benchmark iterations (inner invoke call):</label>
+        <input type="number" id="benchmark-iterations-input" class="form-control"
+               style="width:400px; font-family: monospace;" value="1" min="1"></input>
+      </p>
+
+      <button id="call-function" class="btn btn-primary" type="button"
+              onclick="callFunctionWithFormInputs()" disabled>Call function</button>
+      <button id="update-url" class="btn btn-secondary" type="button"
+              onclick="updateUrlWithFormValues()">Update URL</button>
+      <button id="update-url" class="btn btn-secondary" type="button"
+              onclick="clearUrl()">Clear URL</button>
+    </form>
+
+    <p>
+      <h4><label for="function-outputs" class="form-label">Function outputs:</label></h4>
+      <textarea type="text" id="function-outputs" readonly spellcheck="false" class="form-control"
+                style="min-width:400px; width:initial; height:100px; resize:both; font-family: monospace;"></textarea>
+    </p>
+
+    <p>Total time (including overheads):
+      <code id="benchmark-time-js-output" style="font-family: monospace;"></code></p>
+    <p>Mean inference time (Wasm only):
+      <code id="benchmark-time-wasm-output" style="font-family: monospace;"></code></p>
+
+    <hr>
+    <h2>Samples</h2>
+
+    <p>
+      Click to load a sample program, function, and arguments list.
+      <br>These links will automatically update the URL.
+    </p>
+
+    <div class="container" style="width:fit-content; margin-left:0px">
+      <div class="row" style="padding:4px">
+        <div class="col-sm">
+          simple_abs
+          (<a href="https://github.com/iree-org/iree/blob/main/iree/samples/models/simple_abs.mlir">source</a>)
+        </div>
+        <div class="col-sm-auto">
+          <button class="btn btn-secondary" onclick="loadSample('simple_abs')">Load sample</button>
+        </div>
+      </div>
+      <div class="row" style="padding:4px">
+        <div class="col-sm">
+          fullyconnected
+          (<a href="https://github.com/iree-org/iree/blob/main/tests/e2e/models/fullyconnected.mlir">source</a>)
+        </div>
+        <div class="col-sm-auto">
+          <button class="btn btn-secondary" onclick="loadSample('fullyconnected')">Load sample</button>
+        </div>
+      </div>
+      <div class="row" style="padding:4px">
+        <div class="col-sm">
+          mobilebert
+          (<a href="https://tfhub.dev/iree/lite-model/mobilebert/fp32/1">source</a>)
+        </div>
+        <div class="col-sm-auto">
+          <button class="btn btn-secondary" onclick="loadSample('mobilebert')">Load sample</button>
+        </div>
+      </div>
+      <div class="row" style="padding:4px">
+        <div class="col-sm">
+          posenet
+          (<a href="https://tfhub.dev/tensorflow/lite-model/posenet/mobilenet/float/075/1/default/1">source</a>)
+        </div>
+        <div class="col-sm-auto">
+          <button class="btn btn-secondary" onclick="loadSample('posenet')">Load sample</button>
+        </div>
+      </div>
+      <div class="row" style="padding:4px">
+        <div class="col-sm">
+          mobilessd
+          (<a href="https://storage.googleapis.com/download.tensorflow.org/models/tflite/gpu/mobile_ssd_v2_float_coco.tflite">source</a>)
+        </div>
+        <div class="col-sm-auto">
+          <button class="btn btn-secondary" onclick="loadSample('mobilessd')">Load sample</button>
+        </div>
+      </div>
+    </div>
+
+    <hr>
+    <h2>Compile your own program</h2>
+
+    <p>
+      Programs must be compiled for WebGPU to run on this page, using options
+      to <code>iree-compile</code> such as:
+    </p>
+
+    <textarea type="text" readonly spellcheck="false"
+    class="form-control" style="width:610px; height:90px; resize:none; font-family: monospace;">
+--iree-hal-target-backends=webgpu \
+--iree-codegen-gpu-native-math-precision=true \</textarea>
+
+  </div>
+
+  <script>
+    const initializePromise = ireeInitialize();
+    initializePromise.then(() => {
+      console.log("IREE initialized, ready to load programs.");
+    }).catch((error) => {
+      console.error("Failed to initialize IREE, error:");
+      console.error(error);
+    });
+
+    let loadedProgram = null;
+    const programNameElement = document.getElementById("loaded-program-name");
+    const callFunctionButton = document.getElementById("call-function");
+    const functionNameInput = document.getElementById("function-name-input");
+    const functionArgumentsInput = document.getElementById("function-arguments-input");
+    const benchmarkIterationsInput = document.getElementById("benchmark-iterations-input");
+    const functionOutputsElement = document.getElementById("function-outputs");
+    const timeJsOutputElement = document.getElementById("benchmark-time-js-output");
+    const timeWasmOutputElement = document.getElementById("benchmark-time-wasm-output");
+
+    async function finishLoadingProgram(newProgram, newProgramName) {
+      if (loadedProgram !== null) {
+        // Unload the previous program. We could keep a list of loaded programs
+        // and let users select between them.
+        await ireeUnloadProgram(loadedProgram);
+      }
+
+      await ireeInspectProgram(newProgram);
+
+      loadedProgram = newProgram;
+      programNameElement.innerText = newProgramName;
+      callFunctionButton.disabled = false;
+    }
+
+    async function tryLoadFromUrlParams() {
+      // Fetch IREE program from ?program=[file.vmfb] URL query parameter.
+      const searchParams = new URLSearchParams(window.location.search);
+
+      if (searchParams.has("function")) {
+        functionNameInput.value = searchParams.get("function");
+      }
+
+      if (searchParams.has("arguments")) {
+        functionArgumentsInput.value = searchParams.get("arguments");
+      }
+
+      if (searchParams.has("iterations")) {
+        benchmarkIterationsInput.value = searchParams.get("iterations");
+      }
+
+      if (searchParams.has("program")) {
+        const programPath = searchParams.get("program");
+
+        await initializePromise;
+        const program = await ireeLoadProgram(programPath);
+
+        // Set name to what is hopefully the file component of the path.
+        finishLoadingProgram(program, programPath.split("/").pop());
+      }
+    }
+
+    async function tryLoadFromBuffer(programDataBuffer, programName) {
+      // Clear 'program' from the URL.
+      const searchParams = new URLSearchParams(window.location.search);
+      searchParams.delete("program");
+      replaceUrlWithSearchParams(searchParams);
+
+      await initializePromise;
+      const program = await ireeLoadProgram(programDataBuffer);
+
+      finishLoadingProgram(program, programName);
+    }
+
+    // ------------------------------------------------------------------------
+    // Drag-and-drop to load from your local filesystem.
+    const dropZone = document.getElementById("drop-zone");
+    dropZone.addEventListener("drop", (dropEvent) => {
+      dropEvent.preventDefault();
+      dropEvent.target.style.border = "";
+
+      // Assume exactly one file was dropped.
+      const uploadedFile = dropEvent.dataTransfer.items[0].getAsFile();
+      const fileReader = new FileReader();
+      fileReader.onload = (fileLoadEvent) => {
+        tryLoadFromBuffer(fileLoadEvent.target.result, uploadedFile.name)
+          .catch((error) => {
+            console.error("Error loading program from drop: '" + error + "'");
+          });
+      };
+      fileReader.readAsArrayBuffer(uploadedFile);
+    });
+    dropZone.addEventListener("dragover", (event) => {
+      event.preventDefault();
+    });
+    dropZone.addEventListener("dragenter", (event) => {
+      if (event.target !== dropZone) return;
+      event.target.style.border = "3px dotted red";
+    });
+    dropZone.addEventListener("dragleave", (event) => {
+      if (event.target !== dropZone) return;
+      event.target.style.border = "";
+    });
+    // ------------------------------------------------------------------------
+
+    // ------------------------------------------------------------------------
+    // Form inputs.
+    function callFunctionWithFormInputs() {
+      if (loadedProgram === null) {
+        console.error("Can't call a function with no loaded program");
+        return;
+      }
+
+      const functionName = functionNameInput.value;
+      const inputs = functionArgumentsInput.value.split("\n");
+      const iterations = benchmarkIterationsInput.value;
+      const startJsTime = performance.now();
+
+      ireeCallFunction(loadedProgram, functionName, inputs, iterations)
+          .then((resultObject) => {
+            functionOutputsElement.value =
+                resultObject['outputs'].replace(";", "\n");
+
+            const endJsTime = performance.now();
+            const totalJsTime = endJsTime - startJsTime;
+            timeJsOutputElement.textContent = totalJsTime.toFixed(3) + "ms";
+
+            const totalWasmTimeMs = resultObject['total_invoke_time_ms'];
+            const meanWasmTimeMs = totalWasmTimeMs / iterations;
+            timeWasmOutputElement.textContent = meanWasmTimeMs.toFixed(3) +
+                "ms / iteration over " + iterations + " iteration(s)";
+          })
+          .catch((error) => {
+            console.error("Function call error: '" + error + "'");
+          });
+    }
+
+    function replaceUrlWithSearchParams(searchParams) {
+      let newUrl = window.location.protocol + "//" + window.location.host +
+          window.location.pathname;
+      const searchString = searchParams.toString();
+      if (searchString !== "") newUrl += "?" + searchParams;
+      window.history.replaceState({path: newUrl}, "", newUrl);
+    }
+
+    function updateUrlWithFormValues() {
+      const searchParams = new URLSearchParams(window.location.search);
+      searchParams.set("function", functionNameInput.value);
+      searchParams.set("arguments", functionArgumentsInput.value);
+      searchParams.set("iterations", benchmarkIterationsInput.value);
+      replaceUrlWithSearchParams(searchParams);
+    }
+
+    function clearUrl() {
+      const searchParams = new URLSearchParams(window.location.search);
+      searchParams.delete("program");
+      searchParams.delete("function");
+      searchParams.delete("arguments");
+      searchParams.delete("iterations");
+      replaceUrlWithSearchParams(searchParams);
+    }
+    // ------------------------------------------------------------------------
+
+    // ------------------------------------------------------------------------
+    // Load samples programs / inputs.
+    function loadSample(sampleName) {
+      const searchParams = new URLSearchParams(window.location.search);
+      searchParams.set("program", sampleName + "_webgpu.vmfb");
+      replaceUrlWithSearchParams(searchParams);
+
+      if (sampleName === "simple_abs") {
+        functionNameInput.value = "abs";
+        functionArgumentsInput.value = "f32=-1.23";
+      } else if (sampleName === "fullyconnected") {
+        functionNameInput.value = "main";
+        functionArgumentsInput.value = [
+          "1x5xf32=1,-2,-3,4,-5",
+          "1x5x3x1xf32=15,14,13,12,11,10,9,8,7,6,5,4,3,2,1",
+        ].join("\n");
+      } else if (sampleName === "mobilebert") {
+        functionNameInput.value = "main";
+        functionArgumentsInput.value = [
+          "1x384xi32",
+          "1x384xi32",
+          "1x384xi32",
+        ].join("\n");
+      } else if (sampleName === "posenet") {
+        functionNameInput.value = "main";
+        functionArgumentsInput.value = "1x353x257x3xf32";
+      } else if (sampleName === "mobilessd") {
+        functionNameInput.value = "main";
+        functionArgumentsInput.value = "1x320x320x3xf32";
+      }
+
+      updateUrlWithFormValues();
+
+      tryLoadFromUrlParams().catch((error) => {
+        console.error("Error loading sample program: '" + error + "'");
+      });
+    }
+    // ------------------------------------------------------------------------
+
+    window.addEventListener("load", () => {
+      tryLoadFromUrlParams().catch((error) => {
+        console.error("Error loading program from URL: '" + error + "'");
+      });
+    });
+  </script>
+</body>
+
+</html>
diff --git a/experimental/web/sample_webgpu/iree_api_webgpu.js b/experimental/web/sample_webgpu/iree_api_webgpu.js
new file mode 100644
index 0000000..e97afef
--- /dev/null
+++ b/experimental/web/sample_webgpu/iree_api_webgpu.js
@@ -0,0 +1,219 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// Promise-based API for interacting with the IREE runtime.
+
+const EMSCRIPTEN_SCRIPT_URL = 'web-sample-webgpu.js';
+
+// ------------------------------------------------------------------------- //
+// - API                                                                   - //
+// ------------------------------------------------------------------------- //
+
+// Initializes IREE's runtime.
+async function ireeInitialize() {
+  return _ireeInitialize();
+}
+
+// Loads an IREE program stored in a .vmfb file.
+//
+// Accepts either a string path to a file (XMLHttpRequest compatible) or an
+// ArrayBuffer containing an already loaded file.
+//
+// In order to call functions on the program it must be compiled in a supported
+// configuration, such as with these flags:
+//     --iree-hal-target-backends=webgpu
+//
+// Resolves with an opaque pointer to the program state on success.
+async function ireeLoadProgram(vmfbPathOrBuffer) {
+  return _ireeLoadProgram(vmfbPathOrBuffer);
+}
+
+// Inspects a program.
+async function ireeInspectProgram(programState) {
+  return _ireeInspectProgram(programState);
+}
+
+// Unloads a program.
+async function ireeUnloadProgram(programState) {
+  return _ireeUnloadProgram(programState);
+}
+
+// Calls a function on a loaded program.
+//
+// Resolves with a parsed JSON object on success:
+// {
+//   "total_invoke_time_ms": [number],
+//   "outputs": [semicolon delimited list of formatted outputs]
+// }
+async function ireeCallFunction(
+    programState, functionName, inputs, iterations) {
+  return _ireeCallFunction(programState, functionName, inputs, iterations);
+}
+
+// ------------------------------------------------------------------------- //
+// - Implementation                                                        - //
+// ------------------------------------------------------------------------- //
+
+// TODO(scotttodd): namespace / scope these (don't pollute window object)
+let wasmSetupSampleFn;
+let wasmCleanupSampleFn;
+let wasmLoadProgramFn;
+let wasmInspectProgramFn;
+let wasmUnloadProgramFn;
+let wasmCallFunctionFn;
+
+let initializedPromise, initializePromiseResolve, initializePromiseReject;
+let sampleState;
+
+var Module = {
+  print: function(text) {
+    console.log('(C)', text);
+  },
+  printErr: function(text) {
+    console.error('(C)', text);
+  },
+  onRuntimeInitialized: function() {
+    wasmSetupSampleFn = Module.cwrap('setup_sample', 'number', []);
+    wasmCleanupSampleFn = Module.cwrap('cleanup_sample', null, ['number']);
+    wasmLoadProgramFn = Module.cwrap(
+        'load_program',
+        'number',
+        ['number', 'number', 'number'],
+    );
+    wasmInspectProgramFn = Module.cwrap('inspect_program', null, ['number']);
+    wasmUnloadProgramFn = Module.cwrap('unload_program', null, ['number']);
+    wasmCallFunctionFn = Module.cwrap(
+        'call_function',
+        'number',
+        ['number', 'string', 'string', 'number'],
+    );
+
+    sampleState = wasmSetupSampleFn();
+    if (!sampleState) {
+      initializePromiseReject('Runtime initialization failed');
+      return;
+    }
+    initializePromiseResolve();
+  },
+  noInitialRun: true,
+};
+
+async function _ireeInitialize() {
+  if (initializedPromise) return initializedPromise;
+
+  initializedPromise = new Promise((resolve, reject) => {
+    initializePromiseResolve = resolve;
+    initializePromiseReject = reject;
+  });
+
+  // Preinitialize a WebGPU device here. We could let the C program request the
+  // adapter and device itself, but that would jump through layers of Emscripten
+  // binding code and C/JS callbacks. This is much more concise.
+  // const instance = -1; // No wgpuCreateInstance function in JS (yet?).
+  if (navigator['gpu'] === undefined) {
+    throw 'No \'gpu\' property on navigator, can\'t initialize WebGPU (missing #enable-unsafe-webgpu or an origin trial?)';
+  }
+  const adapter = await navigator['gpu']['requestAdapter']();
+  const deviceDescriptor = {
+    'label': 'IREE WebGPU device',
+    'requiredFeatures': [],
+    'requiredLimits': {
+      'maxBindGroups': 4,
+      'maxStorageBuffersPerShaderStage': 8,
+    },
+    'defaultQueue': {},
+  };
+  const device = await adapter['requestDevice'](deviceDescriptor);
+  // Emscripten makes this available via emscripten_webgpu_get_device() in C.
+  Module['preinitializedWebGPUDevice'] = device;
+
+  const mainScript = document.createElement('script');
+  mainScript.setAttribute('src', EMSCRIPTEN_SCRIPT_URL);
+  document.body.appendChild(mainScript);
+
+  return initializedPromise;
+}
+
+function _ireeLoadProgramBuffer(programDataBuffer) {
+  const programDataView = new Int8Array(programDataBuffer);
+
+  const programDataWasmBuffer = Module._malloc(
+      programDataView.length * programDataView.BYTES_PER_ELEMENT);
+  Module.HEAP8.set(programDataView, programDataWasmBuffer);
+
+  // Note: we transfer ownership of the FlatBuffer data here, so there is
+  // no need to call `Module._free(programDataWasmBuffer)` later.
+  const programState = wasmLoadProgramFn(
+      sampleState, programDataWasmBuffer, programDataBuffer.byteLength);
+  return programState;
+}
+
+function _ireeLoadProgram(vmfbPathOrBuffer) {
+  if (vmfbPathOrBuffer instanceof ArrayBuffer) {
+    const programState = _ireeLoadProgramBuffer(vmfbPathOrBuffer);
+    if (programState !== 0) {
+      return Promise.resolve(programState);
+    } else {
+      return Promise.reject('Wasm module error loading program');
+    }
+  }
+
+  return new Promise((resolve, reject) => {
+    const fetchRequest = new XMLHttpRequest();
+    fetchRequest.onload = function(progressEvent) {
+      const programState =
+          _ireeLoadProgramBuffer(progressEvent.target.response);
+      if (programState !== 0) {
+        resolve(programState);
+      } else {
+        reject('Wasm module error loading program');
+      }
+    };
+    fetchRequest.onerror = function(progressEvent) {
+      reject(progressEvent.error);
+    };
+    fetchRequest.open('GET', vmfbPathOrBuffer);
+    fetchRequest.responseType = 'arraybuffer';
+    fetchRequest.send();
+  });
+}
+
+function _ireeInspectProgram(programState) {
+  wasmInspectProgramFn(programState);
+  return Promise.resolve();
+}
+
+function _ireeUnloadProgram(programState) {
+  wasmUnloadProgramFn(programState);
+  return Promise.resolve();
+}
+
+function _ireeCallFunction(programState, functionName, inputs, iterations) {
+  iterations = iterations !== undefined ? iterations : 1;
+
+  let inputsJoined;
+  if (Array.isArray(inputs)) {
+    inputsJoined = inputs.join(';');
+  } else if (typeof (inputs) === 'string') {
+    inputsJoined = inputs;
+  } else {
+    return Promise.reject(
+        'Expected \'inputs\' to be a String or an array of Strings');
+  }
+
+  // Receive as a pointer, convert, then free. This avoids a memory leak, see
+  // https://github.com/emscripten-core/emscripten/issues/6484
+  const returnValuePtr =
+      wasmCallFunctionFn(programState, functionName, inputsJoined, iterations);
+  const returnValue = Module.UTF8ToString(returnValuePtr);
+
+  if (returnValue === '') {
+    return Promise.reject('Wasm module error calling function');
+  } else {
+    Module._free(returnValuePtr);
+    return Promise.resolve(JSON.parse(returnValue));
+  }
+}
diff --git a/experimental/web/sample_webgpu/main.c b/experimental/web/sample_webgpu/main.c
new file mode 100644
index 0000000..0e2ac66
--- /dev/null
+++ b/experimental/web/sample_webgpu/main.c
@@ -0,0 +1,691 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+// Must be first.
+#include "experimental/webgpu/platform/webgpu.h"
+
+// NOTE: include order matters.
+#include "experimental/webgpu/buffer.h"
+#include "experimental/webgpu/webgpu_device.h"
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/modules/hal/module.h"
+#include "iree/runtime/api.h"
+#include "iree/vm/bytecode/module.h"
+
+//===----------------------------------------------------------------------===//
+// Public API
+//===----------------------------------------------------------------------===//
+
+// Opaque state for the sample, shared between multiple loaded programs.
+typedef struct iree_sample_state_t iree_sample_state_t;
+
+// Initializes the sample and returns its state.
+iree_sample_state_t* setup_sample();
+
+// Shuts down the sample and frees its state.
+// Requires that all programs first be unloaded with |unload_program|.
+void cleanup_sample(iree_sample_state_t* sample_state);
+
+// Opaque state for an individual loaded program.
+typedef struct iree_program_state_t iree_program_state_t;
+
+// Loads a program into the sample from the provided data.
+// Note: this takes ownership of |vmfb_data|.
+iree_program_state_t* load_program(iree_sample_state_t* sample_state,
+                                   uint8_t* vmfb_data, size_t length);
+
+// Inspects metadata about a loaded program, printing to stdout.
+void inspect_program(iree_program_state_t* program_state);
+
+// Unloads a program and frees its state.
+void unload_program(iree_program_state_t* program_state);
+
+// Calls a function synchronously.
+//
+// Returns a semicolon-delimited list of formatted outputs on success or the
+// empty string on failure. Note: This is in need of some real API bindings
+// that marshal structured data between C <-> JS.
+//
+// * |function_name| is the fully qualified function name, like 'module.abs'.
+// * |inputs| is a semicolon delimited list of VM scalars and buffers, as
+//   described in iree/tooling/vm_util and used in IREE's CLI tools.
+//   For example, the CLI `--function_input=f32=1 --function_input=f32=2`
+//   should be passed here as `f32=1;f32=2`.
+// * |iterations| is the number of times to call the function, for benchmarking
+const char* call_function(iree_program_state_t* program_state,
+                          const char* function_name, const char* inputs,
+                          int iterations);
+
+//===----------------------------------------------------------------------===//
+// Implementation
+//===----------------------------------------------------------------------===//
+
+typedef struct iree_sample_state_t {
+  iree_runtime_instance_t* instance;
+  iree_hal_device_t* device;
+} iree_sample_state_t;
+
+typedef struct iree_program_state_t {
+  iree_runtime_session_t* session;
+  iree_vm_module_t* module;
+} iree_program_state_t;
+
+extern iree_status_t create_device(iree_allocator_t host_allocator,
+                                   iree_hal_device_t** out_device);
+
+iree_sample_state_t* setup_sample() {
+  iree_sample_state_t* sample_state = NULL;
+  iree_status_t status =
+      iree_allocator_malloc(iree_allocator_system(),
+                            sizeof(iree_sample_state_t), (void**)&sample_state);
+
+  iree_runtime_instance_options_t instance_options;
+  iree_runtime_instance_options_initialize(&instance_options);
+  // Note: no call to iree_runtime_instance_options_use_all_available_drivers().
+
+  if (iree_status_is_ok(status)) {
+    status = iree_runtime_instance_create(
+        &instance_options, iree_allocator_system(), &sample_state->instance);
+  }
+
+  if (iree_status_is_ok(status)) {
+    status = create_device(iree_allocator_system(), &sample_state->device);
+  }
+
+  if (!iree_status_is_ok(status)) {
+    iree_status_fprint(stderr, status);
+    iree_status_free(status);
+    cleanup_sample(sample_state);
+    return NULL;
+  }
+
+  return sample_state;
+}
+
+void cleanup_sample(iree_sample_state_t* sample_state) {
+  iree_hal_device_release(sample_state->device);
+  iree_runtime_instance_release(sample_state->instance);
+  free(sample_state);
+}
+
+iree_program_state_t* load_program(iree_sample_state_t* sample_state,
+                                   uint8_t* vmfb_data, size_t length) {
+  iree_program_state_t* program_state = NULL;
+  iree_status_t status = iree_allocator_malloc(iree_allocator_system(),
+                                               sizeof(iree_program_state_t),
+                                               (void**)&program_state);
+
+  iree_runtime_session_options_t session_options;
+  iree_runtime_session_options_initialize(&session_options);
+  if (iree_status_is_ok(status)) {
+    status = iree_runtime_session_create_with_device(
+        sample_state->instance, &session_options, sample_state->device,
+        iree_runtime_instance_host_allocator(sample_state->instance),
+        &program_state->session);
+  }
+
+  if (iree_status_is_ok(status)) {
+    // Take ownership of the FlatBuffer data so JavaScript doesn't need to
+    // explicitly call `Module._free()`.
+    status = iree_vm_bytecode_module_create(
+        iree_runtime_instance_vm_instance(sample_state->instance),
+        iree_make_const_byte_span(vmfb_data, length),
+        /*flatbuffer_allocator=*/iree_allocator_system(),
+        iree_allocator_system(), &program_state->module);
+  } else {
+    // Must clean up the FlatBuffer data directly.
+    iree_allocator_free(iree_allocator_system(), (void*)vmfb_data);
+  }
+
+  if (iree_status_is_ok(status)) {
+    status = iree_runtime_session_append_module(program_state->session,
+                                                program_state->module);
+  }
+
+  if (!iree_status_is_ok(status)) {
+    iree_status_fprint(stderr, status);
+    iree_status_free(status);
+    unload_program(program_state);
+    return NULL;
+  }
+
+  return program_state;
+}
+
+void inspect_program(iree_program_state_t* program_state) {
+  fprintf(stdout, "=== program properties ===\n");
+
+  iree_vm_module_t* module = program_state->module;
+  iree_string_view_t module_name = iree_vm_module_name(module);
+  fprintf(stdout, "  module name: '%.*s'\n", (int)module_name.size,
+          module_name.data);
+
+  iree_vm_module_signature_t module_signature =
+      iree_vm_module_signature(module);
+  fprintf(stdout, "  module signature:\n");
+  fprintf(stdout, "    %" PRIhsz " imported functions\n",
+          module_signature.import_function_count);
+  fprintf(stdout, "    %" PRIhsz " exported functions\n",
+          module_signature.export_function_count);
+  fprintf(stdout, "    %" PRIhsz " internal functions\n",
+          module_signature.internal_function_count);
+
+  fprintf(stdout, "  exported functions:\n");
+  for (iree_host_size_t i = 0; i < module_signature.export_function_count;
+       ++i) {
+    iree_vm_function_t function;
+    iree_status_t status = iree_vm_module_lookup_function_by_ordinal(
+        module, IREE_VM_FUNCTION_LINKAGE_EXPORT, i, &function);
+    if (!iree_status_is_ok(status)) {
+      iree_status_fprint(stderr, status);
+      iree_status_free(status);
+      continue;
+    }
+
+    iree_string_view_t function_name = iree_vm_function_name(&function);
+    iree_vm_function_signature_t function_signature =
+        iree_vm_function_signature(&function);
+    iree_string_view_t calling_convention =
+        function_signature.calling_convention;
+    fprintf(stdout, "    function name: '%.*s', calling convention: %.*s'\n",
+            (int)function_name.size, function_name.data,
+            (int)calling_convention.size, calling_convention.data);
+  }
+}
+
+void unload_program(iree_program_state_t* program_state) {
+  iree_vm_module_release(program_state->module);
+  iree_runtime_session_release(program_state->session);
+  free(program_state);
+}
+
+static iree_status_t parse_input_into_call(
+    iree_runtime_call_t* call, iree_hal_allocator_t* device_allocator,
+    iree_string_view_t input) {
+  bool has_equal =
+      iree_string_view_find_char(input, '=', 0) != IREE_STRING_VIEW_NPOS;
+  bool has_x =
+      iree_string_view_find_char(input, 'x', 0) != IREE_STRING_VIEW_NPOS;
+  if (has_equal || has_x) {
+    // Buffer view (either just a shape or a shape=value) or buffer.
+    bool is_storage_reference =
+        iree_string_view_consume_prefix(&input, iree_make_cstring_view("&"));
+    iree_hal_buffer_view_t* buffer_view = NULL;
+    IREE_RETURN_IF_ERROR(
+        iree_hal_buffer_view_parse(input, device_allocator, &buffer_view),
+        "parsing value '%.*s'", (int)input.size, input.data);
+    if (is_storage_reference) {
+      // Storage buffer reference; just take the storage for the buffer view -
+      // it'll still have whatever contents were specified (or 0) but we'll
+      // discard the metadata.
+      iree_vm_ref_t buffer_ref =
+          iree_hal_buffer_retain_ref(iree_hal_buffer_view_buffer(buffer_view));
+      iree_hal_buffer_view_release(buffer_view);
+      return iree_vm_list_push_ref_move(call->inputs, &buffer_ref);
+    } else {
+      iree_vm_ref_t buffer_view_ref =
+          iree_hal_buffer_view_move_ref(buffer_view);
+      return iree_vm_list_push_ref_move(call->inputs, &buffer_view_ref);
+    }
+  } else {
+    // Scalar.
+    bool has_dot =
+        iree_string_view_find_char(input, '.', 0) != IREE_STRING_VIEW_NPOS;
+    iree_vm_value_t val;
+    if (has_dot) {
+      // Float.
+      val = iree_vm_value_make_f32(0.0f);
+      if (!iree_string_view_atof(input, &val.f32)) {
+        return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                                "parsing value '%.*s' as f32", (int)input.size,
+                                input.data);
+      }
+    } else {
+      // Integer.
+      val = iree_vm_value_make_i32(0);
+      if (!iree_string_view_atoi_int32(input, &val.i32)) {
+        return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                                "parsing value '%.*s' as i32", (int)input.size,
+                                input.data);
+      }
+    }
+    return iree_vm_list_push_value(call->inputs, &val);
+  }
+
+  return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                          "Unhandled function input (unreachable?)");
+}
+
+static iree_status_t parse_inputs_into_call(
+    iree_runtime_call_t* call, iree_hal_allocator_t* device_allocator,
+    iree_string_view_t inputs) {
+  if (inputs.size == 0) return iree_ok_status();
+
+  // Inputs are provided in a semicolon-delimited list.
+  // Split inputs from the list until no semicolons are left.
+  iree_string_view_t remaining_inputs = inputs;
+  intptr_t split_index = 0;
+  do {
+    iree_string_view_t next_input;
+    split_index = iree_string_view_split(remaining_inputs, ';', &next_input,
+                                         &remaining_inputs);
+    IREE_RETURN_IF_ERROR(
+        parse_input_into_call(call, device_allocator, next_input));
+  } while (split_index != -1);
+
+  return iree_ok_status();
+}
+
+typedef struct iree_buffer_map_userdata_t {
+  iree_hal_buffer_view_t* source_buffer_view;
+  iree_hal_buffer_t* readback_buffer;
+} iree_buffer_map_userdata_t;
+
+static void iree_webgpu_mapped_buffer_release(void* user_data,
+                                              iree_hal_buffer_t* buffer) {
+  WGPUBuffer buffer_handle = (WGPUBuffer)user_data;
+  wgpuBufferUnmap(buffer_handle);
+}
+
+// TODO(scotttodd): move async mapping into webgpu/buffer.h/.c?
+static void buffer_map_sync_callback(WGPUBufferMapAsyncStatus map_status,
+                                     void* userdata_ptr) {
+  iree_buffer_map_userdata_t* userdata =
+      (iree_buffer_map_userdata_t*)userdata_ptr;
+  switch (map_status) {
+    case WGPUBufferMapAsyncStatus_Success:
+      break;
+    case WGPUBufferMapAsyncStatus_Error:
+      fprintf(stderr, "  buffer_map_sync_callback status: Error\n");
+      break;
+    case WGPUBufferMapAsyncStatus_DeviceLost:
+      fprintf(stderr, "  buffer_map_sync_callback status: DeviceLost\n");
+      break;
+    case WGPUBufferMapAsyncStatus_Unknown:
+    default:
+      fprintf(stderr, "  buffer_map_sync_callback status: Unknown\n");
+      break;
+  }
+
+  if (map_status != WGPUBufferMapAsyncStatus_Success) {
+    iree_hal_buffer_view_release(userdata->source_buffer_view);
+    iree_hal_buffer_release(userdata->readback_buffer);
+    iree_allocator_free(iree_allocator_system(), userdata);
+    return;
+  }
+
+  iree_status_t status = iree_ok_status();
+
+  // TODO(scotttodd): bubble result(s) up to the caller (async + callback API)
+
+  iree_device_size_t data_offset = iree_hal_buffer_byte_offset(
+      iree_hal_buffer_view_buffer(userdata->source_buffer_view));
+  iree_device_size_t data_length =
+      iree_hal_buffer_view_byte_length(userdata->source_buffer_view);
+  WGPUBuffer buffer_handle =
+      iree_hal_webgpu_buffer_handle(userdata->readback_buffer);
+
+  // For this sample we want to print arbitrary buffers, which is easiest
+  // using the |iree_hal_buffer_view_format| function. Internally, that
+  // function requires synchronous buffer mapping, so we'll first wrap the
+  // already (async) mapped GPU memory into a heap buffer. In a less general
+  // application (or one not requiring pretty logging like this), we could
+  // skip a few buffer copies and other data transformations here.
+
+  const void* data_ptr =
+      wgpuBufferGetConstMappedRange(buffer_handle, data_offset, data_length);
+
+  iree_hal_buffer_t* heap_buffer = NULL;
+  if (iree_status_is_ok(status)) {
+    // The buffer we get from WebGPU may not be aligned to 64.
+    iree_hal_memory_access_t memory_access =
+        IREE_HAL_MEMORY_ACCESS_READ | IREE_HAL_MEMORY_ACCESS_UNALIGNED;
+    status = iree_hal_heap_buffer_wrap(
+        userdata->readback_buffer->device_allocator,
+        IREE_HAL_MEMORY_TYPE_HOST_LOCAL, memory_access,
+        IREE_HAL_BUFFER_USAGE_MAPPING, data_length,
+        iree_make_byte_span((void*)data_ptr, data_length),
+        (iree_hal_buffer_release_callback_t){
+            .fn = iree_webgpu_mapped_buffer_release,
+            .user_data = buffer_handle,
+        },
+        &heap_buffer);
+  }
+
+  // Copy the original buffer_view, backed by the mapped heap buffer instead.
+  iree_hal_buffer_view_t* heap_buffer_view = NULL;
+  if (iree_status_is_ok(status)) {
+    status = iree_hal_buffer_view_create_like(
+        heap_buffer, userdata->source_buffer_view, iree_allocator_system(),
+        &heap_buffer_view);
+  }
+
+  if (iree_status_is_ok(status)) {
+    fprintf(stdout, "Call output:\n");
+    status = iree_hal_buffer_view_fprint(stdout, heap_buffer_view,
+                                         /*max_element_count=*/4096,
+                                         iree_allocator_system());
+    fprintf(stdout, "\n");
+  }
+  iree_hal_buffer_view_release(heap_buffer_view);
+  iree_hal_buffer_release(heap_buffer);
+
+  if (!iree_status_is_ok(status)) {
+    fprintf(stderr, "buffer_map_sync_callback error:\n");
+    iree_status_fprint(stderr, status);
+    iree_status_free(status);
+  }
+
+  iree_hal_buffer_view_release(userdata->source_buffer_view);
+  iree_hal_buffer_release(userdata->readback_buffer);
+  iree_allocator_free(iree_allocator_system(), userdata);
+}
+
+static iree_status_t print_buffer_view(iree_hal_device_t* device,
+                                       iree_hal_buffer_view_t* buffer_view) {
+  iree_status_t status = iree_ok_status();
+
+  iree_hal_buffer_t* buffer = iree_hal_buffer_view_buffer(buffer_view);
+  iree_device_size_t data_offset = iree_hal_buffer_byte_offset(buffer);
+  iree_device_size_t data_length =
+      iree_hal_buffer_view_byte_length(buffer_view);
+
+  // ----------------------------------------------
+  // Allocate mappable host memory.
+  // Note: iree_hal_webgpu_simple_allocator_allocate_buffer only supports
+  // CopySrc today, so we'll create the buffer directly with
+  // wgpuDeviceCreateBuffer and then wrap it using iree_hal_webgpu_buffer_wrap.
+  WGPUBufferDescriptor descriptor = {
+      .nextInChain = NULL,
+      .label = "IREE_readback",
+      .usage = WGPUBufferUsage_MapRead | WGPUBufferUsage_CopyDst,
+      .size = data_length,
+      .mappedAtCreation = false,
+  };
+  WGPUBuffer readback_buffer_handle = NULL;
+  if (iree_status_is_ok(status)) {
+    readback_buffer_handle = wgpuDeviceCreateBuffer(
+        iree_hal_webgpu_device_handle(device), &descriptor);
+    if (!readback_buffer_handle) {
+      status = iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
+                                "unable to allocate buffer of size %" PRIdsz,
+                                data_length);
+    }
+  }
+  iree_device_size_t target_offset = 0;
+  const iree_hal_buffer_params_t target_params = {
+      .usage = IREE_HAL_BUFFER_USAGE_TRANSFER | IREE_HAL_BUFFER_USAGE_MAPPING,
+      .type =
+          IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE,
+      .access = IREE_HAL_MEMORY_ACCESS_ALL,
+  };
+  iree_hal_buffer_t* readback_buffer = NULL;
+  if (iree_status_is_ok(status)) {
+    status = iree_hal_webgpu_buffer_wrap(
+        device, iree_hal_device_allocator(device), target_params.type,
+        target_params.access, target_params.usage, data_length,
+        /*byte_offset=*/0,
+        /*byte_length=*/data_length, readback_buffer_handle,
+        iree_allocator_system(), &readback_buffer);
+  }
+  // ----------------------------------------------
+
+  // ----------------------------------------------
+  // Transfer from device memory to mappable host memory.
+  const iree_hal_transfer_command_t transfer_command = {
+      .type = IREE_HAL_TRANSFER_COMMAND_TYPE_COPY,
+      .copy =
+          {
+              .source_buffer = buffer,
+              .source_offset = data_offset,
+              .target_buffer = readback_buffer,
+              .target_offset = target_offset,
+              .length = data_length,
+          },
+  };
+  iree_hal_command_buffer_t* command_buffer = NULL;
+  if (iree_status_is_ok(status)) {
+    status = iree_hal_create_transfer_command_buffer(
+        device, IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT,
+        IREE_HAL_QUEUE_AFFINITY_ANY, /*transfer_count=*/1, &transfer_command,
+        &command_buffer);
+  }
+  iree_hal_semaphore_t* fence_semaphore = NULL;
+  if (iree_status_is_ok(status)) {
+    status = iree_hal_semaphore_create(device, 0ull, &fence_semaphore);
+  }
+  uint64_t signal_value = 1ull;
+  if (iree_status_is_ok(status)) {
+    iree_hal_semaphore_list_t signal_semaphores = {
+        .count = 1,
+        .semaphores = &fence_semaphore,
+        .payload_values = &signal_value,
+    };
+    status = iree_hal_device_queue_execute(
+        device, IREE_HAL_QUEUE_AFFINITY_ANY, iree_hal_semaphore_list_empty(),
+        signal_semaphores, 1, &command_buffer);
+  }
+  // TODO(scotttodd): Make this async - pass a wait source to iree_loop_wait_one
+  if (iree_status_is_ok(status)) {
+    status = iree_hal_semaphore_wait(fence_semaphore, signal_value,
+                                     iree_infinite_timeout());
+  }
+  iree_hal_command_buffer_release(command_buffer);
+  iree_hal_semaphore_release(fence_semaphore);
+  // ----------------------------------------------
+
+  iree_buffer_map_userdata_t* userdata = NULL;
+  if (iree_status_is_ok(status)) {
+    status = iree_allocator_malloc(iree_allocator_system(),
+                                   sizeof(iree_buffer_map_userdata_t),
+                                   (void**)&userdata);
+    iree_hal_buffer_view_retain(buffer_view);  // Released in the callback.
+    userdata->source_buffer_view = buffer_view;
+    userdata->readback_buffer = readback_buffer;
+  }
+
+  if (iree_status_is_ok(status)) {
+    wgpuBufferMapAsync(readback_buffer_handle, WGPUMapMode_Read, /*offset=*/0,
+                       /*size=*/data_length, buffer_map_sync_callback,
+                       /*userdata=*/userdata);
+  }
+
+  return status;
+}
+
+static iree_status_t print_outputs_from_call(
+    iree_runtime_call_t* call, iree_string_builder_t* outputs_builder) {
+  iree_vm_list_t* variants_list = iree_runtime_call_outputs(call);
+  for (iree_host_size_t i = 0; i < iree_vm_list_size(variants_list); ++i) {
+    iree_vm_variant_t variant = iree_vm_variant_empty();
+    IREE_RETURN_IF_ERROR(
+        iree_vm_list_get_variant_assign(variants_list, i, &variant),
+        "variant %" PRIhsz " not present", i);
+
+    if (iree_vm_variant_is_value(variant)) {
+      switch (iree_vm_type_def_as_value(variant.type)) {
+        case IREE_VM_VALUE_TYPE_I8: {
+          IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+              outputs_builder, "i8=%" PRIi8, variant.i8));
+          break;
+        }
+        case IREE_VM_VALUE_TYPE_I16: {
+          IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+              outputs_builder, "i16=%" PRIi16, variant.i16));
+          break;
+        }
+        case IREE_VM_VALUE_TYPE_I32: {
+          IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+              outputs_builder, "i32=%" PRIi32, variant.i32));
+          break;
+        }
+        case IREE_VM_VALUE_TYPE_I64: {
+          IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+              outputs_builder, "i64=%" PRIi64, variant.i64));
+          break;
+        }
+        case IREE_VM_VALUE_TYPE_F32: {
+          IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+              outputs_builder, "f32=%f", variant.f32));
+          break;
+        }
+        case IREE_VM_VALUE_TYPE_F64: {
+          IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
+              outputs_builder, "f64=%lf", variant.f64));
+          break;
+        }
+        default: {
+          IREE_RETURN_IF_ERROR(
+              iree_string_builder_append_cstring(outputs_builder, "?"));
+          break;
+        }
+      }
+    } else if (iree_vm_variant_is_ref(variant)) {
+      if (iree_hal_buffer_view_isa(variant.ref)) {
+        iree_hal_buffer_view_t* buffer_view =
+            iree_hal_buffer_view_deref(variant.ref);
+        // TODO(scotttodd): join async outputs together and return to caller
+        iree_hal_device_t* device = iree_runtime_session_device(call->session);
+        IREE_RETURN_IF_ERROR(print_buffer_view(device, buffer_view));
+      } else {
+        IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(
+            outputs_builder, "(no printer)"));
+      }
+    } else {
+      IREE_RETURN_IF_ERROR(
+          iree_string_builder_append_cstring(outputs_builder, "(null)"));
+    }
+
+    if (i < iree_vm_list_size(variants_list) - 1) {
+      IREE_RETURN_IF_ERROR(
+          iree_string_builder_append_cstring(outputs_builder, ";"));
+    }
+  }
+
+  iree_vm_list_resize(variants_list, 0);
+
+  return iree_ok_status();
+}
+
+iree_status_t invoke_callback(void* user_data, iree_loop_t loop,
+                              iree_status_t status, iree_vm_list_t* outputs) {
+  iree_vm_async_invoke_state_t* invoke_state =
+      (iree_vm_async_invoke_state_t*)user_data;
+
+  if (!iree_status_is_ok(status)) {
+    fprintf(stderr, "iree_vm_async_invoke_callback_fn_t error:\n");
+    iree_status_fprint(stderr, status);
+    iree_status_free(status);
+  }
+
+  iree_vm_list_release(outputs);
+
+  iree_allocator_free(iree_allocator_system(), (void*)invoke_state);
+  return iree_ok_status();
+}
+
+const char* call_function(iree_program_state_t* program_state,
+                          const char* function_name, const char* inputs,
+                          int iterations) {
+  iree_status_t status = iree_ok_status();
+
+  // Fully qualify the function name. This sample only supports loading one
+  // module (i.e. 'program') per session, so we can do this.
+  iree_string_builder_t name_builder;
+  iree_string_builder_initialize(iree_allocator_system(), &name_builder);
+  if (iree_status_is_ok(status)) {
+    iree_string_view_t module_name = iree_vm_module_name(program_state->module);
+    status = iree_string_builder_append_format(&name_builder, "%.*s.%s",
+                                               (int)module_name.size,
+                                               module_name.data, function_name);
+  }
+
+  iree_runtime_call_t call;
+  if (iree_status_is_ok(status)) {
+    status = iree_runtime_call_initialize_by_name(
+        program_state->session, iree_string_builder_view(&name_builder), &call);
+  }
+  iree_string_builder_deinitialize(&name_builder);
+
+  if (iree_status_is_ok(status)) {
+    status = parse_inputs_into_call(
+        &call, iree_runtime_session_device_allocator(program_state->session),
+        iree_make_cstring_view(inputs));
+  }
+
+  // Note: Timing has ~millisecond precision on the web to mitigate timing /
+  // side-channel security threats.
+  // https://developer.mozilla.org/en-US/docs/Web/API/Performance/now#reduced_time_precision
+  iree_time_t start_time = iree_time_now();
+
+  // TODO(scotttodd): benchmark iterations (somehow with async)
+
+  iree_vm_async_invoke_state_t* invoke_state = NULL;
+  if (iree_status_is_ok(status)) {
+    status = iree_allocator_malloc(iree_allocator_system(),
+                                   sizeof(iree_vm_async_invoke_state_t),
+                                   (void**)&invoke_state);
+  }
+  // TODO(scotttodd): emscripten / browser loop here
+  iree_status_t loop_status = iree_ok_status();
+  iree_loop_t loop = iree_loop_inline(&loop_status);
+  if (iree_status_is_ok(status)) {
+    iree_vm_context_t* vm_context = iree_runtime_session_context(call.session);
+    iree_vm_function_t vm_function = call.function;
+    iree_vm_list_t* inputs = call.inputs;
+    iree_vm_list_t* outputs = call.outputs;
+
+    status = iree_vm_async_invoke(loop, invoke_state, vm_context, vm_function,
+                                  IREE_VM_INVOCATION_FLAG_NONE, /*policy=*/NULL,
+                                  inputs, outputs, iree_allocator_system(),
+                                  invoke_callback,
+                                  /*user_data=*/invoke_state);
+  }
+
+  // TODO(scotttodd): record end time in async callback instead of here
+  // TODO(scotttodd): print outputs in async callback instead of here
+
+  iree_time_t end_time = iree_time_now();
+  iree_time_t time_elapsed = end_time - start_time;
+
+  iree_string_builder_t outputs_builder;
+  iree_string_builder_initialize(iree_allocator_system(), &outputs_builder);
+
+  // Output a JSON object as a string:
+  // {
+  //   "total_invoke_time_ms": [number],
+  //   "outputs": [semicolon delimited list of formatted outputs]
+  // }
+  if (iree_status_is_ok(status)) {
+    status = iree_string_builder_append_format(
+        &outputs_builder,
+        "{ \"total_invoke_time_ms\": %" PRId64 ", \"outputs\": \"",
+        time_elapsed / 1000000);
+  }
+  if (iree_status_is_ok(status)) {
+    status = print_outputs_from_call(&call, &outputs_builder);
+  }
+  if (iree_status_is_ok(status)) {
+    status = iree_string_builder_append_cstring(&outputs_builder, "\"}");
+  }
+
+  if (!iree_status_is_ok(status)) {
+    iree_string_builder_deinitialize(&outputs_builder);
+    iree_status_fprint(stderr, status);
+    iree_status_free(status);
+    return "";
+  }
+
+  // Note: this leaks the buffer. It's up to the caller to free it after use.
+  char* outputs_string = strdup(iree_string_builder_buffer(&outputs_builder));
+  iree_string_builder_deinitialize(&outputs_builder);
+  return outputs_string;
+}
diff --git a/experimental/web/sample_webgpu/serve_sample.sh b/experimental/web/sample_webgpu/serve_sample.sh
new file mode 100755
index 0000000..7f43439
--- /dev/null
+++ b/experimental/web/sample_webgpu/serve_sample.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+# Copyright 2022 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+ROOT_DIR=$(git rev-parse --show-toplevel)
+BUILD_DIR=${ROOT_DIR?}/build-emscripten
+BINARY_DIR=${BUILD_DIR}/experimental/web/sample_webgpu
+
+echo "=== Running local webserver, open at http://localhost:8000/ ==="
+
+python3 ${ROOT_DIR?}/build_tools/scripts/local_web_server.py --directory ${BINARY_DIR}
diff --git a/experimental/web/testing/build_tests.sh b/experimental/web/testing/build_tests.sh
old mode 100644
new mode 100755
index 692804d..280f89d
--- a/experimental/web/testing/build_tests.sh
+++ b/experimental/web/testing/build_tests.sh
@@ -53,12 +53,16 @@
     -DIREE_HOST_BIN_DIR="${INSTALL_ROOT}/bin" \
     -DIREE_BUILD_COMPILER=OFF \
     -DIREE_HAL_DRIVER_DEFAULTS=OFF \
+    -DIREE_EXTERNAL_HAL_DRIVERS=webgpu \
     -DIREE_HAL_DRIVER_LOCAL_SYNC=ON \
     -DIREE_HAL_DRIVER_LOCAL_TASK=ON \
     -DIREE_HAL_EXECUTABLE_LOADER_DEFAULTS=OFF \
     -DIREE_HAL_EXECUTABLE_LOADER_VMVX_MODULE=ON \
     -DIREE_HAL_EXECUTABLE_PLUGIN_DEFAULTS=OFF \
+    -DIREE_HAL_EXECUTABLE_LOADER_SYSTEM_LIBRARY=ON \
+    -DIREE_BUILD_EXPERIMENTAL_WEB_SAMPLES=OFF \
     -DIREE_BUILD_SAMPLES=OFF \
+    -DIREE_ENABLE_THREADING=ON \
     -DIREE_ENABLE_CPUINFO=OFF \
     -DIREE_ENABLE_ASAN=OFF \
     -DIREE_BUILD_TESTS=ON
diff --git a/experimental/web/testing/serve_tests.sh b/experimental/web/testing/serve_tests.sh
old mode 100644
new mode 100755