`iree_c_embed_data` improvements (#13814)

* Reconcile CMake with Bazel by dropping the CMake-specific
`GENERATED_SRCS` argument and instead just putting everything in `SRCS`,
like in Bazel.
* To bridge that difference, `bazel_to_cmake_converter.py` was facing
the impossible task of choosing, for each `srcs` entry, whether to put
it in `SRCS` or `GENERATED_SRCS`. It was simply deciding that based on
whether the leading character was a `:`, offering a way to distinguish
generated files in the case where they were generated within the current
package. But that didn't generalize to generated files from other
packages.
* In `SRCS` support various syntaxes for files in the source or in the
build directory (generated files) and automatically do the right thing
to match Bazel. In particular, make it simple to reference generated
files in a subdirectory.
* Concretely, to address #13804, ukernel bitcode will need to reference
generated bitcode in architecture-specific subdirectories.
* In `generate_embed_data_main.cc`, generate clear errors on failure to
open an input/output file (would've saved some head scratching).

For some reason, this exposed an issue in `hal/cts`. The filenames were
missing a `"${TARGET_BACKEND}_"` prefix and somehow that was still
working; now with my changes that wasn't working anymore. Fixed by
adding the missing prefixes.

(This commit is one of a chain in completing
https://github.com/openxla/iree/issues/13804).
diff --git a/build_tools/bazel_to_cmake/bazel_to_cmake_converter.py b/build_tools/bazel_to_cmake/bazel_to_cmake_converter.py
index d10af72..91de990 100644
--- a/build_tools/bazel_to_cmake/bazel_to_cmake_converter.py
+++ b/build_tools/bazel_to_cmake/bazel_to_cmake_converter.py
@@ -111,19 +111,17 @@
     return self._convert_string_arg_block(name, target, quote=False)
 
   def _convert_srcs_block(self, srcs):
-    if srcs is None:
+    if not srcs:
       return ""
-    generated_srcs = [src for src in srcs if src.startswith(":")]
-    srcs = [src for src in srcs if src not in generated_srcs]
-    sets = []
-    if srcs:
-      sets.append(self._convert_string_list_block("SRCS", srcs, sort=True))
-    if generated_srcs:
-      sets.append(
-          self._convert_string_list_block("GENERATED_SRCS",
-                                          [src[1:] for src in generated_srcs],
-                                          sort=True))
-    return "\n".join(sets)
+    # Bazel allows srcs to reference targets in the current package (leading
+    # ':') or in other packages (leading '//'). We map that to paths by:
+    # - dropping any leading ':' as in:
+    #      ':generated.c' -> 'generated.c'
+    # - dropping any leading '//', and internal ':' by '/', as in:
+    #      '//path/to/package:generated.c' ->  'path/to/package/generated.c'
+    srcs = [s.lstrip('//').lstrip(':').replace(':', '/') for s in srcs]
+
+    return self._convert_string_list_block("SRCS", srcs, sort=True)
 
   def _convert_td_file_block(self, td_file):
     if td_file.startswith("//iree"):
diff --git a/build_tools/cmake/iree_bytecode_module.cmake b/build_tools/cmake/iree_bytecode_module.cmake
index 549c509..b82721c 100644
--- a/build_tools/cmake/iree_bytecode_module.cmake
+++ b/build_tools/cmake/iree_bytecode_module.cmake
@@ -158,7 +158,7 @@
         "${_RULE_NAME}_c"
       IDENTIFIER
         "${_RULE_C_IDENTIFIER}"
-      GENERATED_SRCS
+      SRCS
         "${_RULE_NAME}.vmfb"
       C_FILE_OUTPUT
         "${_RULE_NAME}_c.c"
diff --git a/build_tools/cmake/iree_c_embed_data.cmake b/build_tools/cmake/iree_c_embed_data.cmake
index adcf8d0..403acd7 100644
--- a/build_tools/cmake/iree_c_embed_data.cmake
+++ b/build_tools/cmake/iree_c_embed_data.cmake
@@ -13,10 +13,8 @@
 # Parameters:
 # PACKAGE: Name of the package (overrides actual path)
 # NAME: Name of target (see Note).
-# SRCS: List of source files to embed (non-absolute paths will be resolved
-#     relative to CMAKE_CURRENT_SRC_DIR).
+# SRCS: List of files to embed (in source or build directory).
 # INCLUDES: Include directories to add to dependencies
-# GENERATED_SRCS: List of generated source files to embed.
 # C_FILE_OUTPUT: The C implementation file to output.
 # H_FILE_OUTPUT: The H header file to output.
 # STRIP_PREFIX: Strips this verbatim prefix from filenames (in the TOC).
@@ -34,7 +32,7 @@
     _RULE
     "PUBLIC;TESTONLY;FLATTEN"
     "PACKAGE;NAME;IDENTIFIER;STRIP_PREFIX;C_FILE_OUTPUT;H_FILE_OUTPUT"
-    "DEPS;SRCS;GENERATED_SRCS;INCLUDES"
+    "DEPS;SRCS;INCLUDES"
     ${ARGN}
   )
 
@@ -59,21 +57,48 @@
     list(APPEND _ARGS "--flatten")
   endif()
 
+  set(_RELATIVE_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}")
+  cmake_path(RELATIVE_PATH _RELATIVE_BINARY_DIR BASE_DIRECTORY "${IREE_BINARY_DIR}")
+
   foreach(_SRC ${_RULE_SRCS})
-    if(IS_ABSOLUTE "${_SRC}")
-      list(APPEND _ARGS "${_SRC}")
+    if(_SRC MATCHES "^/")
+      # _SRC is an absolute path (starts with `/`).
+      list(APPEND _RESOLVED_SRCS "${_SRC}")
+    elseif(_SRC MATCHES "^[$]<")
+      # _SRC is a CMake generator expression (starts with `$<`).
+      list(APPEND _RESOLVED_SRCS "${_SRC}")
+    elseif(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${_SRC}")
+      # _SRC path exists relatively to current source dir.
+      list(APPEND _RESOLVED_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/${_SRC}")
+    elseif(EXISTS "${IREE_SOURCE_DIR}/${_SRC}")
+      # _SRC path exists relatively to root source dir.
+      list(APPEND _RESOLVED_SRCS "${IREE_SOURCE_DIR}/${_SRC}")
     else()
-      list(APPEND _ARGS "${CMAKE_CURRENT_SOURCE_DIR}/${_SRC}")
+      # All else having failed, interpret _SRC as a path in the binary dir,
+      # that is, a generated file. As the present logic executes before
+      # that file would be generated, we can't perform a if(EXISTS) test to
+      # validate that hypothesis.
+      #
+      # Additional difficulty: _SRC could be a path relative to either the
+      # current binary dir, or the root binary dir. Again, it's too early here
+      # to use a if(EXISTS) test to determine that. Instead, the following
+      # regex replacement strips the current binary dir as a prefix from _SRC.
+      # So if _SRC was relative to the root binary dir, now it is relative to
+      # the current binary dir. And if it was already relative to the current
+      # binary dir, then the regex should fail to match (unless we're very
+      # unlucky and we have paths of the from root/a/b/a/b, but that's not a
+      # problem that we have today. And if that ever happens, people will still
+      # be able to get the correct behavior by passing a path relative to the
+      # root dir).
+      string(REGEX REPLACE "^${_RELATIVE_BINARY_DIR}/" "" _SRC_RELATIVE "${_SRC}")
+      list(APPEND _RESOLVED_SRCS "${CMAKE_CURRENT_BINARY_DIR}/${_SRC_RELATIVE}")
     endif()
   endforeach(_SRC)
-  foreach(_SRC ${_RULE_GENERATED_SRCS})
-    list(APPEND _ARGS "${_SRC}")
-  endforeach(_SRC)
 
   add_custom_command(
     OUTPUT "${_RULE_H_FILE_OUTPUT}" "${_RULE_C_FILE_OUTPUT}"
-    COMMAND generate_embed_data ${_ARGS}
-    DEPENDS generate_embed_data ${_RULE_SRCS} ${_RULE_GENERATED_SRCS}
+    COMMAND generate_embed_data ${_ARGS} ${_RESOLVED_SRCS}
+    DEPENDS generate_embed_data ${_RESOLVED_SRCS}
   )
 
   if(_RULE_TESTONLY)
diff --git a/build_tools/cmake/iree_hal_cts_test_suite.cmake b/build_tools/cmake/iree_hal_cts_test_suite.cmake
index 691b76b..417cc45 100644
--- a/build_tools/cmake/iree_hal_cts_test_suite.cmake
+++ b/build_tools/cmake/iree_hal_cts_test_suite.cmake
@@ -111,7 +111,7 @@
       iree_c_embed_data(
         NAME
           ${_EXECUTABLES_TESTDATA_NAME}_c
-        GENERATED_SRCS
+        SRCS
           ${_EMBED_DATA_SOURCES}
         C_FILE_OUTPUT
           "${_EXECUTABLES_TESTDATA_NAME}_c.c"
@@ -180,6 +180,7 @@
     set(IREE_CTS_DRIVER_REGISTRATION_FN "${_RULE_DRIVER_REGISTRATION_FN}")
     set(IREE_CTS_TEST_CLASS_NAME "${_TEST_NAME}_test")
     set(IREE_CTS_DRIVER_NAME "${_RULE_DRIVER_NAME}")
+    set(IREE_CTS_TARGET_BACKEND "${_RULE_COMPILER_TARGET_BACKEND}")
 
     configure_file(
       "${IREE_ROOT_DIR}/runtime/src/iree/hal/cts/cts_test_template.cc.in"
diff --git a/build_tools/embed_data/generate_embed_data_main.cc b/build_tools/embed_data/generate_embed_data_main.cc
index a851cf5..298000b 100644
--- a/build_tools/embed_data/generate_embed_data_main.cc
+++ b/build_tools/embed_data/generate_embed_data_main.cc
@@ -90,6 +90,11 @@
                            const std::string& header_file,
                            const std::vector<std::string>& toc_files) {
   std::ofstream f(header_file, std::ios::out | std::ios::trunc);
+  if (!f) {
+    fprintf(stderr, "Failed to open '%s' for write.\n", header_file.c_str());
+    exit(EXIT_FAILURE);
+  }
+
   f << "#pragma once\n";  // Pragma once isn't great but is the best we can do.
   f << "#include <stddef.h>\n";
   GenerateTocStruct(f);
@@ -106,6 +111,10 @@
 static bool SlurpFile(const std::string& file_name, std::string* contents) {
   constexpr std::streamoff kMaxSize = 100000000;
   std::ifstream f(file_name, std::ios::in | std::ios::binary);
+  if (!f) {
+    fprintf(stderr, "Failed to open '%s' for read.\n", file_name.c_str());
+    exit(EXIT_FAILURE);
+  }
   // get length of file:
   f.seekg(0, f.end);
   std::streamoff length = f.tellg();
@@ -133,6 +142,11 @@
                          const std::vector<std::string>& input_files,
                          const std::vector<std::string>& toc_files) {
   std::ofstream f(impl_file, std::ios::out | std::ios::trunc);
+  if (!f) {
+    fprintf(stderr, "Failed to open '%s' for write.\n", impl_file.c_str());
+    exit(EXIT_FAILURE);
+  }
+
   f << "#include <stddef.h>\n";
   f << "#include <stdint.h>\n";
   f << R"(
diff --git a/runtime/src/iree/base/testing/CMakeLists.txt b/runtime/src/iree/base/testing/CMakeLists.txt
index 5f3f797..1b3b5c3 100644
--- a/runtime/src/iree/base/testing/CMakeLists.txt
+++ b/runtime/src/iree/base/testing/CMakeLists.txt
@@ -19,7 +19,7 @@
 iree_c_embed_data(
   NAME
     dynamic_library_test_library
-  GENERATED_SRCS
+  SRCS
     "$<TARGET_FILE:iree::base::testing::dynamic_library_test_library.so>"
   C_FILE_OUTPUT
     "dynamic_library_test_library_embed.c"
diff --git a/runtime/src/iree/builtins/device/CMakeLists.txt b/runtime/src/iree/builtins/device/CMakeLists.txt
index b71b7f7..19c83d3 100644
--- a/runtime/src/iree/builtins/device/CMakeLists.txt
+++ b/runtime/src/iree/builtins/device/CMakeLists.txt
@@ -71,7 +71,7 @@
 iree_c_embed_data(
   NAME
     libdevice_bitcode
-  GENERATED_SRCS
+  SRCS
     "libdevice_wasm32_generic.bc"
     "libdevice_wasm64_generic.bc"
   DEPS
diff --git a/runtime/src/iree/builtins/ukernel/CMakeLists.txt b/runtime/src/iree/builtins/ukernel/CMakeLists.txt
index 3da5fba..554d6a9 100644
--- a/runtime/src/iree/builtins/ukernel/CMakeLists.txt
+++ b/runtime/src/iree/builtins/ukernel/CMakeLists.txt
@@ -172,7 +172,7 @@
 iree_c_embed_data(
   NAME
     libukernel_bitcode
-  GENERATED_SRCS
+  SRCS
     "ukernel_bitcode.bc"
   DEPS
 
diff --git a/runtime/src/iree/hal/cts/cts_test_template.cc.in b/runtime/src/iree/hal/cts/cts_test_template.cc.in
index 7783f5e..33b0eef 100644
--- a/runtime/src/iree/hal/cts/cts_test_template.cc.in
+++ b/runtime/src/iree/hal/cts/cts_test_template.cc.in
@@ -12,6 +12,8 @@
 #cmakedefine IREE_CTS_DRIVER_NAME "@IREE_CTS_DRIVER_NAME@"
 #cmakedefine IREE_CTS_EXECUTABLE_FORMAT @IREE_CTS_EXECUTABLE_FORMAT@
 #cmakedefine IREE_CTS_EXECUTABLES_TESTDATA_HDR "@IREE_CTS_EXECUTABLES_TESTDATA_HDR@"
+#cmakedefine IREE_CTS_TARGET_BACKEND "@IREE_CTS_TARGET_BACKEND@"
+
 // clang-format on
 
 #include IREE_CTS_TEST_FILE_PATH
@@ -43,9 +45,15 @@
 iree_const_byte_span_t get_test_executable_data(iree_string_view_t file_name) {
 #ifdef IREE_CTS_EXECUTABLES_TESTDATA_HDR
   const struct iree_file_toc_t* toc = iree_cts_testdata_executables_create();
+  char file_name_with_target_backend[256];
+  snprintf(file_name_with_target_backend, sizeof file_name_with_target_backend,
+    "%s_%.*s", IREE_CTS_TARGET_BACKEND, static_cast<int>(file_name.size), file_name.data);
   for (size_t i = 0; i < iree_cts_testdata_executables_size(); ++i) {
     const auto& file = toc[i];
-    if (iree_string_view_equal(file_name, iree_make_cstring_view(file.name))) {
+    if (iree_string_view_equal(
+      iree_make_cstring_view(file_name_with_target_backend),
+      iree_make_cstring_view(file.name)))
+    {
       return iree_make_const_byte_span(file.data, file.size);
     }
   }
diff --git a/runtime/src/iree/vm/test/CMakeLists.txt b/runtime/src/iree/vm/test/CMakeLists.txt
index 8a55244..9ada5ef 100644
--- a/runtime/src/iree/vm/test/CMakeLists.txt
+++ b/runtime/src/iree/vm/test/CMakeLists.txt
@@ -17,7 +17,7 @@
 iree_c_embed_data(
   NAME
     all_bytecode_modules_c
-  GENERATED_SRCS
+  SRCS
     "arithmetic_ops.vmfb"
     "arithmetic_ops_f32.vmfb"
     "arithmetic_ops_i64.vmfb"
@@ -293,7 +293,7 @@
 iree_c_embed_data(
   NAME
     async_bytecode_modules_c
-  GENERATED_SRCS
+  SRCS
     "async_ops.vmfb"
   C_FILE_OUTPUT
     "async_bytecode_modules.c"
diff --git a/samples/static_library/CMakeLists.txt b/samples/static_library/CMakeLists.txt
index 6d60b11..7727bd8 100644
--- a/samples/static_library/CMakeLists.txt
+++ b/samples/static_library/CMakeLists.txt
@@ -55,7 +55,7 @@
     simple_mul_c
   IDENTIFIER
     iree_samples_static_library_simple_mul
-  GENERATED_SRCS
+  SRCS
     simple_mul.vmfb
   C_FILE_OUTPUT
     simple_mul_c.c