Add dep for ukernel internal headers (#15562)

The Bazel to CMake converter currently ignore internal headers Bazel
dependencies. This means that incremental build does not rebuild
ukernels upon changes to a header file which can lead to subtle ABI
issues.

The patch now uses ${CMAKE_SOURCE_DIR} or ${CMAKE_BINARY_DIR} when
converting a package-relative label to a path because DEPENDS in a
add_custom_command() resolve either relative to the
${CMAKE_CURRENT_BINARY_DIR} or ${CMAKE_CURRENT_SOURCE_DIR} whereas
reference in other Bazel package are relative to the root of the tree.

This is the next attempt after #15462 was reverted in #15552. The
difference is `cmake -E touch` instead of `touch` in the custom command.
diff --git a/build_tools/bazel_to_cmake/bazel_to_cmake.py b/build_tools/bazel_to_cmake/bazel_to_cmake.py
index efb9529..c43735e 100755
--- a/build_tools/bazel_to_cmake/bazel_to_cmake.py
+++ b/build_tools/bazel_to_cmake/bazel_to_cmake.py
@@ -290,6 +290,7 @@
         converted_build_file = bazel_to_cmake_converter.convert_build_file(
             build_file_code,
             repo_cfg=repo_cfg,
+            build_dir=directory_path,
             allow_partial_conversion=allow_partial_conversion,
         )
     except (NameError, NotImplementedError) as e:
diff --git a/build_tools/bazel_to_cmake/bazel_to_cmake_converter.py b/build_tools/bazel_to_cmake/bazel_to_cmake_converter.py
index 5b6e437..f9a90af 100644
--- a/build_tools/bazel_to_cmake/bazel_to_cmake_converter.py
+++ b/build_tools/bazel_to_cmake/bazel_to_cmake_converter.py
@@ -15,6 +15,7 @@
 
 import itertools
 import re
+import os
 
 import bazel_to_cmake_targets
 
@@ -23,10 +24,15 @@
     """Object passed to `exec` that has handlers for BUILD file functions."""
 
     def __init__(
-        self, *, converter: "Converter", targets: bazel_to_cmake_targets.TargetConverter
+        self,
+        *,
+        converter: "Converter",
+        targets: bazel_to_cmake_targets.TargetConverter,
+        build_dir: str,
     ):
         self._converter = converter
         self._targets = targets
+        self._build_dir = build_dir
         self._custom_initialize()
 
     def _custom_initialize(self):
@@ -112,18 +118,45 @@
         target = target.replace("::", "_")
         return self._convert_string_arg_block(name, target, quote=False)
 
-    def _convert_srcs_block(self, srcs):
-        if not srcs:
-            return ""
+    def _filegroup_dep_filename(self, src):
+        return f"{src}.stamp"
+
+    def _normalize_label(self, src):
+        """
+        Convert label to file path suitable for CMake to use as a dependency.
+        """
+
         # Bazel allows srcs to reference targets in the current package (leading
         # ':') or in other packages (leading '//'). We map that to paths by:
         # - dropping any leading ':' as in:
         #      ':generated.c' -> 'generated.c'
-        # - dropping any leading '//', and internal ':' by '/', as in:
-        #      '//path/to/package:generated.c' ->  'path/to/package/generated.c'
-        srcs = [s.lstrip("//").lstrip(":").replace(":", "/") for s in srcs]
+        # - replacing any leading '//' by '${CMAKE_SOURCE_DIR}/' or
+        #   '${CMAKE_BINARY_DIR}/' and any internal ':' by '/', as in:
+        #      '//path/to/package:source.c'
+        #      -> '${CMAKE_SOURCE_DIR}/path/to/package/source.c'
+        #      '//path/to/package:generated.c'
+        #      -> '${CMAKE_BINARY_DIR}/path/to/package/generated.c'
+        pkg_root_relative_label = src.startswith("//")
+        src = src.lstrip("/").lstrip(":").replace(":", "/")
+        if not pkg_root_relative_label:
+            return src
+        elif os.path.exists(os.path.join(self._build_dir, src)):
+            return f"${{CMAKE_SOURCE_DIR}}/{src}"
+        else:
+            return f"${{CMAKE_BINARY_DIR}}/{src}"
 
-        return self._convert_string_list_block("SRCS", srcs, sort=True)
+    def _convert_srcs_block(self, srcs, is_generated=False, block_name="SRCS"):
+        if not srcs:
+            return ""
+
+        srcs = [
+            self._normalize_label(s)
+            if s.startswith("$") or os.path.splitext(s)[1]
+            else self._filegroup_dep_filename(self._normalize_label(s))
+            for s in srcs
+        ]
+
+        return self._convert_string_list_block(block_name, srcs, sort=True)
 
     def _convert_td_file_block(self, td_file):
         if td_file.startswith("//iree"):
@@ -253,14 +286,42 @@
     def py_binary(self, *args, **kwargs):
         pass
 
-    def filegroup(self, name, **kwargs):
-        # Not implemented, but allowed for Bazel-only uses, such as declaring internal
-        # headers and other kinds of files that Bazel enforces but CMake doesn't care
-        # about. If we ever need to implement this, this might be a no-op, or may
-        # want to evaluate the srcs attribute and pass them along to any targets
-        # that depend on the filegroup.
-        # Cross-package dependencies and complicated globs could be hard to handle.
-        pass
+    def filegroup(self, name, srcs, **kwargs):
+        if not srcs:
+            return
+
+        # Converting a dependency on a filegroup requires either using the
+        # transitive dependency to the actual file or creating a similar
+        # abstraction in CMake.
+        #
+        # One way of doing the transitive dependency is peeking in the build
+        # file that defines a given filegroup but goes against the current
+        # design where each build file is processed independently.
+        #
+        # Alternatively, the build file that defines a filegroup could set a
+        # variable with the list of all the files in the filegroup which the
+        # CMakeLists.txt corresponding to the using build file would use.
+        # However that requires the variable to be defined before the
+        # add_directory() for the corresponding using CMakeLists.txt which is
+        # not a given.
+        #
+        # Instead, we generate a custom command that creates a stamp file that
+        # acts as an abstraction to the filegroup. The using CMakeLists.txt
+        # then creates a file dependency on that stamp file. We also need a
+        # custom target in the same CMakeLists.txt to ensure a rule for the
+        # custom command is actually created as per add_custom_command
+        # documentation.
+        depends_block = self._convert_srcs_block(srcs, block_name="DEPENDS")
+        stamp_file = self._filegroup_dep_filename(name)
+        self._converter.body += (
+            f"add_custom_command(OUTPUT {stamp_file}\n"
+            f"    COMMAND ${{CMAKE_COMMAND}} -E touch {stamp_file}\n"
+            f"{depends_block}"
+            f")\n\n"
+            f"add_custom_target({name}\n"
+            f"    DEPENDS {stamp_file}\n"
+            f")\n\n"
+        )
 
     def sh_binary(self, name, **kwargs):
         if self._should_skip_target(**kwargs):
@@ -507,6 +568,7 @@
     def iree_bitcode_library(self, name, arch, srcs, internal_hdrs=None, copts=None):
         name_block = self._convert_string_arg_block("NAME", name, quote=False)
         arch_block = self._convert_string_arg_block("ARCH", arch, quote=False)
+        hdrs_block = self._convert_srcs_block(internal_hdrs, block_name="INTERNAL_HDRS")
         srcs_block = self._convert_srcs_block(srcs)
         copts_block = self._convert_string_list_block("COPTS", copts, sort=False)
 
@@ -514,6 +576,7 @@
             f"iree_bitcode_library(\n"
             f"{name_block}"
             f"{arch_block}"
+            f"{hdrs_block}"
             f"{srcs_block}"
             f"{copts_block}"
             f")\n\n"
@@ -944,7 +1007,9 @@
     return ret
 
 
-def convert_build_file(build_file_code, repo_cfg, allow_partial_conversion=False):
+def convert_build_file(
+    build_file_code, repo_cfg, build_dir, allow_partial_conversion=False
+):
     converter = Converter()
     # Allow overrides of TargetConverter and BuildFileFunctions from repo cfg.
     repo_map = getattr(repo_cfg, "REPO_MAP", {})
@@ -953,7 +1018,7 @@
     )(repo_map=repo_map)
     build_file_functions = getattr(
         repo_cfg, "CustomBuildFileFunctions", BuildFileFunctions
-    )(converter=converter, targets=target_converter)
+    )(converter=converter, targets=target_converter, build_dir=build_dir)
 
     exec(build_file_code, GetDict(build_file_functions))
     converted_text = converter.convert()
diff --git a/build_tools/cmake/iree_bitcode_library.cmake b/build_tools/cmake/iree_bitcode_library.cmake
index 72c9bb2..7aae3cd 100644
--- a/build_tools/cmake/iree_bitcode_library.cmake
+++ b/build_tools/cmake/iree_bitcode_library.cmake
@@ -21,7 +21,7 @@
     _RULE
     ""
     "NAME;OUT;ARCH"
-    "SRCS;COPTS"
+    "INTERNAL_HDRS;SRCS;COPTS"
     ${ARGN}
   )
 
@@ -86,6 +86,7 @@
       DEPENDS
         "${IREE_CLANG_BINARY}"
         "${_SRC}"
+        "${_RULE_INTERNAL_HDRS}"
       COMMENT
         "Compiling ${_SRC} to ${_BITCODE_FILE}"
       VERBATIM
diff --git a/runtime/src/iree/builtins/device/CMakeLists.txt b/runtime/src/iree/builtins/device/CMakeLists.txt
index 25b5421..c3e91ed 100644
--- a/runtime/src/iree/builtins/device/CMakeLists.txt
+++ b/runtime/src/iree/builtins/device/CMakeLists.txt
@@ -31,6 +31,8 @@
     libdevice_wasm32_generic
   ARCH
     wasm_32
+  INTERNAL_HDRS
+    "device.h"
   SRCS
     "device_generic.c"
 )
@@ -40,6 +42,8 @@
     libdevice_wasm64_generic
   ARCH
     wasm_64
+  INTERNAL_HDRS
+    "device.h"
   SRCS
     "device_generic.c"
 )
diff --git a/runtime/src/iree/builtins/ukernel/CMakeLists.txt b/runtime/src/iree/builtins/ukernel/CMakeLists.txt
index fbea314..6224e34 100644
--- a/runtime/src/iree/builtins/ukernel/CMakeLists.txt
+++ b/runtime/src/iree/builtins/ukernel/CMakeLists.txt
@@ -30,6 +30,26 @@
   PUBLIC
 )
 
+add_custom_command(OUTPUT internal_headers_filegroup.stamp
+    COMMAND ${CMAKE_COMMAND} -E touch internal_headers_filegroup.stamp
+  DEPENDS
+    "common.h"
+    "exported_bits.h"
+    "mmt4d.h"
+    "mmt4d_internal.h"
+    "pack.h"
+    "pack_internal.h"
+    "query_tile_sizes.h"
+    "query_tile_sizes_internal.h"
+    "static_assert.h"
+    "unpack.h"
+    "unpack_internal.h"
+)
+
+add_custom_target(internal_headers_filegroup
+    DEPENDS internal_headers_filegroup.stamp
+)
+
 iree_cc_library(
   NAME
     internal_headers
@@ -110,6 +130,9 @@
     ukernel_bitcode_32bit_base
   ARCH
     wasm_32
+  INTERNAL_HDRS
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/schemas/cpu_data_headers_filegroup.stamp"
+    "internal_headers_filegroup.stamp"
   SRCS
     "mmt4d.c"
     "mmt4d_tile.c"
@@ -125,6 +148,9 @@
     ukernel_bitcode_64bit_base
   ARCH
     wasm_64
+  INTERNAL_HDRS
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/schemas/cpu_data_headers_filegroup.stamp"
+    "internal_headers_filegroup.stamp"
   SRCS
     "mmt4d.c"
     "mmt4d_tile.c"
@@ -139,10 +165,10 @@
   NAME
     embed_ukernel_bitcode
   SRCS
-    "runtime/src/iree/builtins/ukernel/arch/arm_64/ukernel_bitcode_arm_64.bc"
-    "runtime/src/iree/builtins/ukernel/arch/arm_64/ukernel_bitcode_arm_64_entry_points.bc"
-    "runtime/src/iree/builtins/ukernel/arch/x86_64/ukernel_bitcode_x86_64.bc"
-    "runtime/src/iree/builtins/ukernel/arch/x86_64/ukernel_bitcode_x86_64_entry_points.bc"
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/builtins/ukernel/arch/arm_64/ukernel_bitcode_arm_64.bc"
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/builtins/ukernel/arch/arm_64/ukernel_bitcode_arm_64_entry_points.bc"
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/builtins/ukernel/arch/x86_64/ukernel_bitcode_x86_64.bc"
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/builtins/ukernel/arch/x86_64/ukernel_bitcode_x86_64_entry_points.bc"
     "ukernel_bitcode_32bit_base.bc"
     "ukernel_bitcode_64bit_base.bc"
   DEPS
diff --git a/runtime/src/iree/builtins/ukernel/arch/arm_64/CMakeLists.txt b/runtime/src/iree/builtins/ukernel/arch/arm_64/CMakeLists.txt
index b5b765c..ed3dd84 100644
--- a/runtime/src/iree/builtins/ukernel/arch/arm_64/CMakeLists.txt
+++ b/runtime/src/iree/builtins/ukernel/arch/arm_64/CMakeLists.txt
@@ -18,6 +18,14 @@
     ukernel_bitcode_arm_64_entry_points
   ARCH
     wasm_64
+  INTERNAL_HDRS
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/builtins/ukernel/internal_headers_filegroup.stamp"
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/schemas/cpu_data_headers_filegroup.stamp"
+    "common_arm_64.h"
+    "common_arm_64_entry_point.h"
+    "mmt4d_arm_64_internal.h"
+    "pack_arm_64_internal.h"
+    "unpack_arm_64_internal.h"
   SRCS
     "mmt4d_arm_64_entry_point.c"
     "pack_arm_64_entry_point.c"
@@ -30,6 +38,14 @@
     ukernel_bitcode_arm_64_base
   ARCH
     arm_64
+  INTERNAL_HDRS
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/builtins/ukernel/internal_headers_filegroup.stamp"
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/schemas/cpu_data_headers_filegroup.stamp"
+    "common_arm_64.h"
+    "common_arm_64_entry_point.h"
+    "mmt4d_arm_64_internal.h"
+    "pack_arm_64_internal.h"
+    "unpack_arm_64_internal.h"
   SRCS
     "mmt4d_arm_64.c"
     "pack_arm_64.c"
@@ -41,6 +57,14 @@
     ukernel_bitcode_arm_64_fullfp16
   ARCH
     arm_64
+  INTERNAL_HDRS
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/builtins/ukernel/internal_headers_filegroup.stamp"
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/schemas/cpu_data_headers_filegroup.stamp"
+    "common_arm_64.h"
+    "common_arm_64_entry_point.h"
+    "mmt4d_arm_64_internal.h"
+    "pack_arm_64_internal.h"
+    "unpack_arm_64_internal.h"
   SRCS
     "mmt4d_arm_64_fullfp16.c"
   COPTS
@@ -52,6 +76,14 @@
     ukernel_bitcode_arm_64_fp16fml
   ARCH
     arm_64
+  INTERNAL_HDRS
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/builtins/ukernel/internal_headers_filegroup.stamp"
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/schemas/cpu_data_headers_filegroup.stamp"
+    "common_arm_64.h"
+    "common_arm_64_entry_point.h"
+    "mmt4d_arm_64_internal.h"
+    "pack_arm_64_internal.h"
+    "unpack_arm_64_internal.h"
   SRCS
     "mmt4d_arm_64_fp16fml.c"
   COPTS
@@ -63,6 +95,14 @@
     ukernel_bitcode_arm_64_bf16
   ARCH
     arm_64
+  INTERNAL_HDRS
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/builtins/ukernel/internal_headers_filegroup.stamp"
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/schemas/cpu_data_headers_filegroup.stamp"
+    "common_arm_64.h"
+    "common_arm_64_entry_point.h"
+    "mmt4d_arm_64_internal.h"
+    "pack_arm_64_internal.h"
+    "unpack_arm_64_internal.h"
   SRCS
     "mmt4d_arm_64_bf16.c"
   COPTS
@@ -74,6 +114,14 @@
     ukernel_bitcode_arm_64_dotprod
   ARCH
     arm_64
+  INTERNAL_HDRS
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/builtins/ukernel/internal_headers_filegroup.stamp"
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/schemas/cpu_data_headers_filegroup.stamp"
+    "common_arm_64.h"
+    "common_arm_64_entry_point.h"
+    "mmt4d_arm_64_internal.h"
+    "pack_arm_64_internal.h"
+    "unpack_arm_64_internal.h"
   SRCS
     "mmt4d_arm_64_dotprod.c"
   COPTS
@@ -85,6 +133,14 @@
     ukernel_bitcode_arm_64_i8mm
   ARCH
     arm_64
+  INTERNAL_HDRS
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/builtins/ukernel/internal_headers_filegroup.stamp"
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/schemas/cpu_data_headers_filegroup.stamp"
+    "common_arm_64.h"
+    "common_arm_64_entry_point.h"
+    "mmt4d_arm_64_internal.h"
+    "pack_arm_64_internal.h"
+    "unpack_arm_64_internal.h"
   SRCS
     "mmt4d_arm_64_i8mm.c"
   COPTS
diff --git a/runtime/src/iree/builtins/ukernel/arch/x86_64/CMakeLists.txt b/runtime/src/iree/builtins/ukernel/arch/x86_64/CMakeLists.txt
index 8da4a7e..ad67faa 100644
--- a/runtime/src/iree/builtins/ukernel/arch/x86_64/CMakeLists.txt
+++ b/runtime/src/iree/builtins/ukernel/arch/x86_64/CMakeLists.txt
@@ -18,6 +18,14 @@
     ukernel_bitcode_x86_64_entry_points
   ARCH
     wasm_64
+  INTERNAL_HDRS
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/builtins/ukernel/internal_headers_filegroup.stamp"
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/schemas/cpu_data_headers_filegroup.stamp"
+    "common_x86_64.h"
+    "common_x86_64_entry_point.h"
+    "mmt4d_x86_64_internal.h"
+    "pack_x86_64_internal.h"
+    "unpack_x86_64_internal.h"
   SRCS
     "mmt4d_x86_64_entry_point.c"
     "pack_x86_64_entry_point.c"
@@ -30,6 +38,14 @@
     ukernel_bitcode_x86_64_avx2_fma
   ARCH
     x86_64
+  INTERNAL_HDRS
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/builtins/ukernel/internal_headers_filegroup.stamp"
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/schemas/cpu_data_headers_filegroup.stamp"
+    "common_x86_64.h"
+    "common_x86_64_entry_point.h"
+    "mmt4d_x86_64_internal.h"
+    "pack_x86_64_internal.h"
+    "unpack_x86_64_internal.h"
   SRCS
     "mmt4d_x86_64_avx2_fma.c"
     "pack_x86_64_avx2_fma.c"
@@ -46,6 +62,14 @@
     ukernel_bitcode_x86_64_avx512_base
   ARCH
     x86_64
+  INTERNAL_HDRS
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/builtins/ukernel/internal_headers_filegroup.stamp"
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/schemas/cpu_data_headers_filegroup.stamp"
+    "common_x86_64.h"
+    "common_x86_64_entry_point.h"
+    "mmt4d_x86_64_internal.h"
+    "pack_x86_64_internal.h"
+    "unpack_x86_64_internal.h"
   SRCS
     "mmt4d_x86_64_avx512_base.c"
     "pack_x86_64_avx512_base.c"
@@ -67,6 +91,14 @@
     ukernel_bitcode_x86_64_avx512_vnni
   ARCH
     x86_64
+  INTERNAL_HDRS
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/builtins/ukernel/internal_headers_filegroup.stamp"
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/schemas/cpu_data_headers_filegroup.stamp"
+    "common_x86_64.h"
+    "common_x86_64_entry_point.h"
+    "mmt4d_x86_64_internal.h"
+    "pack_x86_64_internal.h"
+    "unpack_x86_64_internal.h"
   SRCS
     "mmt4d_x86_64_avx512_vnni.c"
   COPTS
@@ -87,6 +119,14 @@
     ukernel_bitcode_x86_64_avx512_bf16
   ARCH
     x86_64
+  INTERNAL_HDRS
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/builtins/ukernel/internal_headers_filegroup.stamp"
+    "${CMAKE_BINARY_DIR}/runtime/src/iree/schemas/cpu_data_headers_filegroup.stamp"
+    "common_x86_64.h"
+    "common_x86_64_entry_point.h"
+    "mmt4d_x86_64_internal.h"
+    "pack_x86_64_internal.h"
+    "unpack_x86_64_internal.h"
   SRCS
     "mmt4d_x86_64_avx512_bf16.c"
   COPTS
diff --git a/runtime/src/iree/schemas/CMakeLists.txt b/runtime/src/iree/schemas/CMakeLists.txt
index ef93a2b..4e959fd 100644
--- a/runtime/src/iree/schemas/CMakeLists.txt
+++ b/runtime/src/iree/schemas/CMakeLists.txt
@@ -88,6 +88,17 @@
   PUBLIC
 )
 
+add_custom_command(OUTPUT cpu_data_headers_filegroup.stamp
+    COMMAND ${CMAKE_COMMAND} -E touch cpu_data_headers_filegroup.stamp
+  DEPENDS
+    "cpu_data.h"
+    "cpu_feature_bits.inl"
+)
+
+add_custom_target(cpu_data_headers_filegroup
+    DEPENDS cpu_data_headers_filegroup.stamp
+)
+
 iree_cc_library(
   NAME
     cpu_data