Rollup of HAL/runtime/infra changes for WebGPU HAL. (#13953)

This includes most of the changes outside of `experimental/` from
https://github.com/openxla/iree/pull/13952.

* Organize Tint and spirv-tools dependency versioning in CMake
* Add a `cc_library` for `webgpu_headers`
* Promote `iree_hal_heap_buffer_wrap` to a public API
* Add `IREE_HAL_MEMORY_ACCESS_UNALIGNED` to bypass alignment checks if
needed

Co-authored-by: Ben Vanik <benvanik@google.com>
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 034a6d2..984c787 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -866,14 +866,23 @@
 endif()
 
 if(IREE_TARGET_BACKEND_METAL_SPIRV)
-  iree_set_spirv_cross_cmake_options()
   # SPIRV-Cross is needed to cross compile SPIR-V into MSL source code.
+  iree_set_spirv_cross_cmake_options()
   add_subdirectory(third_party/spirv_cross EXCLUDE_FROM_ALL)
 endif()
 
 if(IREE_TARGET_BACKEND_WEBGPU)
   # Tint is needed to compile SPIR-V into WGSL source code.
   # Tint also requires SPIRV-Tools, which requires SPIRV-Headers.
+
+  # NOTE: these can be synced by referencing one of these repositories:
+  #   * https://dawn.googlesource.com/dawn/
+  #   * https://dawn.googlesource.com/tint/
+  #   * https://chromium.googlesource.com/vulkan-deps/+/refs/heads/main/DEPS
+  # or they can be updated independently
+  set(IREE_TINT_TAG         "fdb8787e9c1b79770bd98a8faf37fbe48a3077a4")  # 2023-03-06
+  set(IREE_SPIRV_TOOLS_TAG  "95f93810bbae12e1a601a3a5a5d975e5558a2994")  # 2023-02-15
+
   iree_set_spirv_headers_cmake_options()
   add_subdirectory(third_party/spirv_headers EXCLUDE_FROM_ALL)
   add_subdirectory(build_tools/third_party/spirv-tools EXCLUDE_FROM_ALL)
diff --git a/build_tools/bazel/workspace.bzl b/build_tools/bazel/workspace.bzl
index 56c841c..f54c920 100644
--- a/build_tools/bazel/workspace.bzl
+++ b/build_tools/bazel/workspace.bzl
@@ -172,3 +172,10 @@
         build_file = iree_repo_alias + "//:build_tools/third_party/nccl/BUILD.overlay",
         path = paths.join(iree_path, "third_party/nccl"),
     )
+
+    maybe(
+        native.new_local_repository,
+        name = "webgpu_headers",
+        build_file = iree_repo_alias + "//:build_tools/third_party/webgpu-headers/BUILD.overlay",
+        path = paths.join(iree_path, "third_party/webgpu-headers"),
+    )
diff --git a/build_tools/bazel_to_cmake/bazel_to_cmake_targets.py b/build_tools/bazel_to_cmake/bazel_to_cmake_targets.py
index d2f24be..36d668f 100644
--- a/build_tools/bazel_to_cmake/bazel_to_cmake_targets.py
+++ b/build_tools/bazel_to_cmake/bazel_to_cmake_targets.py
@@ -145,6 +145,7 @@
         "@vulkan_memory_allocator//:impl_header_only": [
             "vulkan_memory_allocator"
         ],
+        "@webgpu_headers": [],
     })
 
     self._initialize()
diff --git a/build_tools/third_party/spirv-tools/CMakeLists.txt b/build_tools/third_party/spirv-tools/CMakeLists.txt
index aa7bfd5..1bc6e6d 100644
--- a/build_tools/third_party/spirv-tools/CMakeLists.txt
+++ b/build_tools/third_party/spirv-tools/CMakeLists.txt
@@ -9,7 +9,7 @@
 FetchContent_Declare(
   spirv-tools
   GIT_REPOSITORY https://github.com/KhronosGroup/SPIRV-Tools.git
-  GIT_TAG 95f93810bbae12e1a601a3a5a5d975e5558a2994 # 2023-02-15
+  GIT_TAG ${IREE_SPIRV_TOOLS_TAG}
 )
 
 set(SKIP_SPIRV_TOOLS_INSTALL OFF CACHE BOOL "" FORCE)
diff --git a/build_tools/third_party/tint/CMakeLists.txt b/build_tools/third_party/tint/CMakeLists.txt
index 73c30a6..c0e3e4c 100644
--- a/build_tools/third_party/tint/CMakeLists.txt
+++ b/build_tools/third_party/tint/CMakeLists.txt
@@ -9,22 +9,23 @@
 FetchContent_Declare(
   tint
   GIT_REPOSITORY https://dawn.googlesource.com/tint
-  GIT_TAG fdb8787e9c1b79770bd98a8faf37fbe48a3077a4 # 2023-03-06
+  GIT_TAG ${IREE_TINT_TAG}
 )
 
 set(TINT_BUILD_SAMPLES OFF CACHE BOOL "" FORCE)
+set(TINT_BUILD_DOCS  OFF CACHE BOOL "" FORCE)
 set(TINT_BUILD_TESTS OFF CACHE BOOL "" FORCE)
 
-# Our usage at compile time primarily needs the SPIR-V reader and WGSL writer,
-# but usage at runtime through Dawn needs a broader set.
+# Our usage at compile time primarily needs the SPIR-V reader and WGSL writer.
+# Other readers and writers could be enabled, but we want to keep this small.
 
 set(TINT_BUILD_SPV_READER ON CACHE BOOL "" FORCE)
-set(TINT_BUILD_WGSL_READER ON CACHE BOOL "" FORCE)
+set(TINT_BUILD_WGSL_READER OFF CACHE BOOL "" FORCE)
 
 set(TINT_BUILD_GLSL_WRITER OFF CACHE BOOL "" FORCE)
-set(TINT_BUILD_HLSL_WRITER ON CACHE BOOL "" FORCE)
-set(TINT_BUILD_MSL_WRITER ON CACHE BOOL "" FORCE)
-set(TINT_BUILD_SPV_WRITER ON CACHE BOOL "" FORCE)
+set(TINT_BUILD_HLSL_WRITER OFF CACHE BOOL "" FORCE)
+set(TINT_BUILD_MSL_WRITER OFF CACHE BOOL "" FORCE)
+set(TINT_BUILD_SPV_WRITER OFF CACHE BOOL "" FORCE)
 set(TINT_BUILD_WGSL_WRITER ON CACHE BOOL "" FORCE)
 
 FetchContent_MakeAvailable(tint)
diff --git a/build_tools/third_party/webgpu-headers/BUILD.overlay b/build_tools/third_party/webgpu-headers/BUILD.overlay
new file mode 100644
index 0000000..8feff9c
--- /dev/null
+++ b/build_tools/third_party/webgpu-headers/BUILD.overlay
@@ -0,0 +1,13 @@
+# Copyright 2022 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+package(default_visibility = ["//visibility:public"])
+
+cc_library(
+    name = "webgpu_headers",
+    hdrs = ["webgpu.h"],
+    include_prefix = "third_party/webgpu-headers",
+)
diff --git a/build_tools/third_party/webgpu-headers/CMakeLists.txt b/build_tools/third_party/webgpu-headers/CMakeLists.txt
new file mode 100644
index 0000000..aab0cc4
--- /dev/null
+++ b/build_tools/third_party/webgpu-headers/CMakeLists.txt
@@ -0,0 +1,18 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+set(WEBGPU_HEADERS_ROOT "${IREE_ROOT_DIR}/third_party/webgpu-headers/")
+
+external_cc_library(
+  PACKAGE
+    webgpu_headers
+  NAME
+    webgpu_headers
+  ROOT
+    ${WEBGPU_HEADERS_ROOT}
+  HDRS
+    "webgpu.h"
+)
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/WebGPU/WebGPUTarget.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/WebGPU/WebGPUTarget.cpp
index b03b43d..0aee9c3 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/WebGPU/WebGPUTarget.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/WebGPU/WebGPUTarget.cpp
@@ -64,9 +64,13 @@
   // NOTE: we could vary this based on the options such as 'webgpu-v2'.
   std::string name() const override { return "webgpu"; }
 
+  // TODO(scotttodd): Prune FlowDialect dep when WGSLReplacePushConstantsPass
+  //     does not use the Flow dialect (TranslateExecutables calls this
+  //     function and _does not_ query which passes are used by the dynamic
+  //     pipeline created by buildTranslationPassPipeline)
   void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<IREE::Codegen::IREECodegenDialect, spirv::SPIRVDialect,
-                    gpu::GPUDialect>();
+    registry.insert<IREE::Codegen::IREECodegenDialect, IREE::Flow::FlowDialect,
+                    spirv::SPIRVDialect, gpu::GPUDialect>();
   }
 
   IREE::HAL::DeviceTargetAttr getDefaultDeviceTarget(
diff --git a/runtime/src/iree/base/CMakeLists.txt b/runtime/src/iree/base/CMakeLists.txt
index 7418db3..0a4231b 100644
--- a/runtime/src/iree/base/CMakeLists.txt
+++ b/runtime/src/iree/base/CMakeLists.txt
@@ -209,5 +209,7 @@
       ::loop_test_hdrs
       iree::testing::gtest
       iree::testing::gtest_main
+    LINKOPTS
+      "-sEXPORTED_RUNTIME_METHODS=['dynCall']"
   )
 endif()
diff --git a/runtime/src/iree/hal/allocator.h b/runtime/src/iree/hal/allocator.h
index 95366b3..fb2bcfd 100644
--- a/runtime/src/iree/hal/allocator.h
+++ b/runtime/src/iree/hal/allocator.h
@@ -317,25 +317,6 @@
   } handle;
 } iree_hal_external_buffer_t;
 
-typedef void(IREE_API_PTR* iree_hal_buffer_release_fn_t)(
-    void* user_data, iree_hal_buffer_t* buffer);
-
-// A callback issued when a buffer is released.
-typedef struct {
-  // Callback function pointer.
-  iree_hal_buffer_release_fn_t fn;
-  // User data passed to the callback function. Unowned.
-  void* user_data;
-} iree_hal_buffer_release_callback_t;
-
-// Returns a no-op buffer release callback that implies that no cleanup is
-// required.
-static inline iree_hal_buffer_release_callback_t
-iree_hal_buffer_release_callback_null(void) {
-  iree_hal_buffer_release_callback_t callback = {NULL, NULL};
-  return callback;
-}
-
 //===----------------------------------------------------------------------===//
 // Statistics/reporting
 //===----------------------------------------------------------------------===//
diff --git a/runtime/src/iree/hal/allocator_heap.c b/runtime/src/iree/hal/allocator_heap.c
index f14e66c..97cb85d 100644
--- a/runtime/src/iree/hal/allocator_heap.c
+++ b/runtime/src/iree/hal/allocator_heap.c
@@ -32,8 +32,8 @@
     iree_string_view_t identifier, iree_allocator_t data_allocator,
     iree_allocator_t host_allocator, iree_hal_allocator_t** out_allocator) {
   IREE_ASSERT_ARGUMENT(out_allocator);
-  IREE_TRACE_ZONE_BEGIN(z0);
   *out_allocator = NULL;
+  IREE_TRACE_ZONE_BEGIN(z0);
 
   iree_hal_heap_allocator_t* allocator = NULL;
   iree_host_size_t total_size =
diff --git a/runtime/src/iree/hal/buffer.h b/runtime/src/iree/hal/buffer.h
index 8009920..3a67084 100644
--- a/runtime/src/iree/hal/buffer.h
+++ b/runtime/src/iree/hal/buffer.h
@@ -134,12 +134,15 @@
   // within or across operations. The lack of the flag indicates that the access
   // is guaranteed not to alias (ala C's `restrict` keyword).
   IREE_HAL_MEMORY_ACCESS_MAY_ALIAS = 1u << 3,
+  // A flag that can be applied to any access type to indicate that the buffer
+  // storage may not be aligned.
+  IREE_HAL_MEMORY_ACCESS_UNALIGNED = 1u << 4,
   // Memory access may perform any operation and should not be validated.
   // Used upon access to bypass access verification at the API boundary and
   // effectively provides a `void*`.
   // This should only be used by device-side code where it is known-safe to
   // bypass the access verification.
-  IREE_HAL_MEMORY_ACCESS_ANY = 1u << 4,
+  IREE_HAL_MEMORY_ACCESS_ANY = 1u << 5,
   // Memory may have any operation performed on it.
   IREE_HAL_MEMORY_ACCESS_ALL = IREE_HAL_MEMORY_ACCESS_READ |
                                IREE_HAL_MEMORY_ACCESS_WRITE |
@@ -486,6 +489,25 @@
 IREE_API_EXPORT iree_string_view_t iree_hal_buffer_usage_format(
     iree_hal_buffer_usage_t value, iree_bitfield_string_temp_t* out_temp);
 
+typedef void(IREE_API_PTR* iree_hal_buffer_release_fn_t)(
+    void* user_data, struct iree_hal_buffer_t* buffer);
+
+// A callback issued when a buffer is released.
+typedef struct {
+  // Callback function pointer.
+  iree_hal_buffer_release_fn_t fn;
+  // User data passed to the callback function. Unowned.
+  void* user_data;
+} iree_hal_buffer_release_callback_t;
+
+// Returns a no-op buffer release callback that implies that no cleanup is
+// required.
+static inline iree_hal_buffer_release_callback_t
+iree_hal_buffer_release_callback_null(void) {
+  iree_hal_buffer_release_callback_t callback = {NULL, NULL};
+  return callback;
+}
+
 //===----------------------------------------------------------------------===//
 // iree_hal_buffer_t
 //===----------------------------------------------------------------------===//
@@ -774,6 +796,25 @@
     iree_allocator_t host_allocator, iree_hal_buffer_t** out_buffer);
 
 //===----------------------------------------------------------------------===//
+// iree_hal_heap_buffer_t
+//===----------------------------------------------------------------------===//
+
+// Wraps an existing host allocation in a buffer.
+// When the buffer is destroyed the provided |release_callback| will be called.
+//
+// The buffer must be aligned to at least IREE_HAL_HEAP_BUFFER_ALIGNMENT and if
+// it is not the call will fail with IREE_STATUS_OUT_OF_RANGE.
+//
+// |out_buffer| must be released by the caller. |data| must be kept live for the
+// lifetime of the wrapping buffer.
+iree_status_t iree_hal_heap_buffer_wrap(
+    iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
+    iree_hal_memory_access_t allowed_access,
+    iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
+    iree_byte_span_t data, iree_hal_buffer_release_callback_t release_callback,
+    iree_hal_buffer_t** out_buffer);
+
+//===----------------------------------------------------------------------===//
 // iree_hal_buffer_t implementation details
 //===----------------------------------------------------------------------===//
 
diff --git a/runtime/src/iree/hal/buffer_heap.c b/runtime/src/iree/hal/buffer_heap.c
index 47ec037..b3f481c 100644
--- a/runtime/src/iree/hal/buffer_heap.c
+++ b/runtime/src/iree/hal/buffer_heap.c
@@ -185,7 +185,8 @@
   IREE_ASSERT_ARGUMENT(out_buffer);
   IREE_TRACE_ZONE_BEGIN(z0);
 
-  if (!iree_host_size_has_alignment((uintptr_t)data.data,
+  if (!iree_any_bit_set(allowed_access, IREE_HAL_MEMORY_ACCESS_UNALIGNED) &&
+      !iree_host_size_has_alignment((uintptr_t)data.data,
                                     IREE_HAL_HEAP_BUFFER_ALIGNMENT)) {
     IREE_TRACE_ZONE_END(z0);
     return iree_make_status(
diff --git a/runtime/src/iree/hal/buffer_heap_impl.h b/runtime/src/iree/hal/buffer_heap_impl.h
index 9481a3d..e0ab0c1 100644
--- a/runtime/src/iree/hal/buffer_heap_impl.h
+++ b/runtime/src/iree/hal/buffer_heap_impl.h
@@ -37,21 +37,6 @@
     iree_const_byte_span_t initial_data, iree_allocator_t data_allocator,
     iree_allocator_t host_allocator, iree_hal_buffer_t** out_buffer);
 
-// Wraps an existing host allocation in a buffer.
-// When the buffer is destroyed the provided |release_callback| will be called.
-//
-// The buffer must be aligned to at least IREE_HAL_HEAP_BUFFER_ALIGNMENT and if
-// it is not the call will fail with IREE_STATUS_OUT_OF_RANGE.
-//
-// |out_buffer| must be released by the caller. |data| must be kept live for the
-// lifetime of the wrapping buffer.
-iree_status_t iree_hal_heap_buffer_wrap(
-    iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
-    iree_hal_memory_access_t allowed_access,
-    iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
-    iree_byte_span_t data, iree_hal_buffer_release_callback_t release_callback,
-    iree_hal_buffer_t** out_buffer);
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif  // __cplusplus
diff --git a/runtime/src/iree/schemas/wgsl_executable_def.fbs b/runtime/src/iree/schemas/wgsl_executable_def.fbs
index 0d41449..79c821f 100644
--- a/runtime/src/iree/schemas/wgsl_executable_def.fbs
+++ b/runtime/src/iree/schemas/wgsl_executable_def.fbs
@@ -27,7 +27,7 @@
 
   // A mapping of executable entry point ordinals to the shader module in which
   // they reside.
-  entry_points:[int];
+  entry_points:[uint];
 }
 
 root_type ExecutableDef;