Introduce OSS support for embedding data into binaries and use it for SPIRV kernels in Bazel builds.

This doesn't fix flatbuffer reflection embedding (yet) since that is blocked on an upstream flatbuffers bug.

With this, more of the compiler builds (seemingly modulo some compiler/std version related errors).

PiperOrigin-RevId: 274185920
diff --git a/build_tools/embed_data/BUILD b/build_tools/embed_data/BUILD
new file mode 100644
index 0000000..e4153fe
--- /dev/null
+++ b/build_tools/embed_data/BUILD
@@ -0,0 +1,54 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Generates source files with embedded file contents.
+
+load(":build_defs.bzl", "cc_embed_data")
+
+package(
+    default_visibility = ["//visibility:public"],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+cc_binary(
+    name = "generate_cc_embed_data",
+    srcs = ["generate_cc_embed_data.cc"],
+    deps = [
+        "@com_google_absl//absl/flags:flag",
+        "@com_google_absl//absl/flags:parse",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/time",
+    ],
+)
+
+cc_embed_data(
+    name = "testembed1",
+    srcs = [
+        "file1.txt",
+        "data/file2.txt",
+    ],
+    cc_file_output = "testembed1.cc",
+    cpp_namespace = "foobar",
+    flatten = True,
+    h_file_output = "testembed1.h",
+)
+
+cc_test(
+    name = "testembed1_test",
+    srcs = ["testembed1_test.cc"],
+    deps = [
+        ":testembed1",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
diff --git a/build_tools/embed_data/build_defs.bzl b/build_tools/embed_data/build_defs.bzl
new file mode 100644
index 0000000..1695b7e
--- /dev/null
+++ b/build_tools/embed_data/build_defs.bzl
@@ -0,0 +1,80 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Embeds data files into a C++ module."""
+
+def cc_embed_data(
+        name,
+        srcs,
+        cc_file_output,
+        h_file_output,
+        cpp_namespace = None,
+        strip_prefix = None,
+        flatten = False):
+    """Embeds 'srcs' into a C++ module.
+
+    Generates a header like:
+      namespace foo {
+      struct FileToc {
+        const char* name;             // the file's original name
+        const char* data;             // beginning of the file
+        size_t size;                  // length of the file
+      };
+      extern const struct FileToc* this_rule_name_create();
+      }
+
+    The 'this_rule_name()' function will return an array of FileToc
+    structs terminated by one that has nullptr 'name' and 'data' fields.
+    The 'data' field always has an extra null terminator at the end (which
+    is not included in the size).
+
+    Args:
+      name: The rule name, which will also be the identifier of the generated
+        code symbol.
+      srcs: List of files to embed.
+      cc_file_output: The CC implementation file to output.
+      h_file_output: The H header file to output.
+      cpp_namespace: Wraps everything in a C++ namespace.
+      strip_prefix: Strips this verbatim prefix from filenames (in the TOC).
+      flatten: Removes all directory components from filenames (in the TOC).
+    """
+    generator = "//build_tools/embed_data:generate_cc_embed_data"
+    generator_location = "$(location %s)" % generator
+    flags = "--output_header='$(location %s)' --output_impl='$(location %s)'" % (
+        h_file_output,
+        cc_file_output,
+    )
+    flags += " --identifier='%s'" % (name,)
+    if cpp_namespace != None:
+        flags += " --cpp_namespace='%s'" % (cpp_namespace,)
+    if strip_prefix != None:
+        flags += " --strip_prefix='%s'" % (strip_prefix,)
+    if flatten:
+        flags += " --flatten"
+
+    native.genrule(
+        name = name + "__generator",
+        srcs = srcs,
+        outs = [
+            cc_file_output,
+            h_file_output,
+        ],
+        tools = [generator],
+        cmd = "%s $(SRCS) %s" % (generator_location, flags),
+    )
+    native.cc_library(
+        name = name,
+        hdrs = [h_file_output],
+        srcs = [cc_file_output],
+    )
diff --git a/build_tools/embed_data/data/file2.txt b/build_tools/embed_data/data/file2.txt
new file mode 100644
index 0000000..8de3113
--- /dev/null
+++ b/build_tools/embed_data/data/file2.txt
@@ -0,0 +1 @@
+¯\_(ツ)_/¯
diff --git a/build_tools/embed_data/file1.txt b/build_tools/embed_data/file1.txt
new file mode 100644
index 0000000..77c80ee
--- /dev/null
+++ b/build_tools/embed_data/file1.txt
@@ -0,0 +1 @@
+Are you '"Still"' here?
diff --git a/build_tools/embed_data/generate_cc_embed_data.cc b/build_tools/embed_data/generate_cc_embed_data.cc
new file mode 100644
index 0000000..8377b00
--- /dev/null
+++ b/build_tools/embed_data/generate_cc_embed_data.cc
@@ -0,0 +1,179 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+
+#include "absl/flags/flag.h"
+#include "absl/flags/parse.h"
+#include "absl/strings/escaping.h"
+#include "absl/strings/str_split.h"
+#include "absl/strings/strip.h"
+#include "absl/time/time.h"
+
+ABSL_FLAG(std::string, identifier, "resources",
+          "name of the resources function");
+ABSL_FLAG(std::string, output_header, "", "output header file");
+ABSL_FLAG(std::string, output_impl, "", "output cc impl file");
+ABSL_FLAG(std::string, cpp_namespace, "", "generate in a c++ namespace");
+ABSL_FLAG(std::string, strip_prefix, "", "strip prefix from filenames");
+ABSL_FLAG(bool, flatten, false,
+          "whether to flatten the directory structure (only include basename)");
+
+void GenerateNamespaceOpen(std::ofstream& f) {
+  const auto& ns = absl::GetFlag(FLAGS_cpp_namespace);
+  if (ns.empty()) return;
+
+  std::vector<std::string> ns_comps =
+      absl::StrSplit(absl::GetFlag(FLAGS_cpp_namespace), absl::ByString("::"));
+  for (const auto& ns_comp : ns_comps) {
+    f << "namespace " << ns_comp << " {\n";
+  }
+}
+
+void GenerateNamespaceClose(std::ofstream& f) {
+  const auto& ns = absl::GetFlag(FLAGS_cpp_namespace);
+  if (ns.empty()) return;
+
+  std::vector<std::string> ns_comps =
+      absl::StrSplit(absl::GetFlag(FLAGS_cpp_namespace), absl::ByString("::"));
+  for (size_t i = 0, e = ns_comps.size(); i < e; ++i) {
+    f << "}\n";
+  }
+}
+
+void GenerateTocStruct(std::ofstream& f) {
+  f << "struct FileToc {\n";
+  f << "  const char* name;             // the file's original name\n";
+  f << "  const char* data;             // beginning of the file\n";
+  f << "  std::size_t size;             // length of the file\n";
+  f << "};\n";
+}
+
+bool GenerateHeader(const std::string& header_file,
+                    const std::vector<std::string>& toc_files) {
+  std::ofstream f(header_file, std::ios::out | std::ios::trunc);
+  f << "#pragma once\n";  // Pragma once isn't great but is the best we can do.
+  f << "#include <cstddef>\n";
+  GenerateNamespaceOpen(f);
+  GenerateTocStruct(f);
+  f << "extern const struct FileToc* " << absl::GetFlag(FLAGS_identifier)
+    << "_create();\n";
+  f << "static std::size_t " << absl::GetFlag(FLAGS_identifier)
+    << "_size() { \n";
+  f << "  return " << toc_files.size() << ";\n";
+  f << "}\n";
+  GenerateNamespaceClose(f);
+  f.close();
+  return f.good();
+}
+
+bool SlurpFile(const std::string& file_name, std::string* contents) {
+  constexpr std::streamoff kMaxSize = 100000000;
+  std::ifstream f(file_name, std::ios::in | std::ios::binary);
+  // get length of file:
+  f.seekg(0, f.end);
+  std::streamoff length = f.tellg();
+  f.seekg(0, f.beg);
+  if (!f.good()) return false;
+
+  if (length > kMaxSize) {
+    std::cerr << "File " << file_name << " is too large\n";
+    return false;
+  }
+
+  size_t mem_length = static_cast<size_t>(length);
+  contents->resize(mem_length);
+  f.read(&(*contents)[0], mem_length);
+  f.close();
+  return f.good();
+}
+
+bool GenerateImpl(const std::string& impl_file,
+                  const std::vector<std::string>& input_files,
+                  const std::vector<std::string>& toc_files) {
+  std::ofstream f(impl_file, std::ios::out | std::ios::trunc);
+  f << "#include <cstddef>\n";
+  GenerateNamespaceOpen(f);
+  GenerateTocStruct(f);
+  f << "static const struct FileToc toc[] = {\n";
+  assert(input_files.size() == toc_files.size());
+  for (size_t i = 0, e = input_files.size(); i < e; ++i) {
+    f << "  {";
+    f << "\"" << absl::CEscape(toc_files[i]) << "\", ";
+    std::string contents;
+    if (!SlurpFile(input_files[i], &contents)) {
+      std::cerr << "Error reading file " << input_files[i] << "\n";
+      return false;
+    }
+    f << "\"" << absl::CHexEscape(contents) << "\\0\", ";
+    f << contents.size() << "},\n";
+  }
+  f << "  {nullptr, nullptr, 0},\n";
+  f << "};\n";
+  f << "const struct FileToc* " << absl::GetFlag(FLAGS_identifier)
+    << "_create() {\n";
+  f << "  return &toc[0];\n";
+  f << "}\n";
+
+  GenerateNamespaceClose(f);
+  f.close();
+  return f.good();
+}
+
+int main(int argc, char** argv) {
+  // Parse flags.
+  std::vector<char*> raw_positional_args = absl::ParseCommandLine(argc, argv);
+  std::vector<std::string> input_files;
+  input_files.reserve(raw_positional_args.size() - 1);
+  // Skip program name.
+  for (size_t i = 1, e = raw_positional_args.size(); i < e; ++i) {
+    input_files.push_back(std::string(raw_positional_args[i]));
+  }
+
+  // Generate TOC files by optionally removing a prefix.
+  std::vector<std::string> toc_files;
+  toc_files.reserve(input_files.size());
+  const std::string& strip_prefix = absl::GetFlag(FLAGS_strip_prefix);
+  for (const auto& input_file : input_files) {
+    std::string toc_file = input_file;
+    if (!strip_prefix.empty()) {
+      toc_file = std::string(absl::StripPrefix(toc_file, strip_prefix));
+    }
+    if (absl::GetFlag(FLAGS_flatten)) {
+      std::vector<std::string> comps =
+          absl::StrSplit(toc_file, absl::ByAnyChar("/\\"));
+      toc_file = comps.back();
+    }
+    toc_files.push_back(toc_file);
+  }
+
+  if (!absl::GetFlag(FLAGS_output_header).empty()) {
+    if (!GenerateHeader(absl::GetFlag(FLAGS_output_header), toc_files)) {
+      std::cerr << "Error generating headers.\n";
+      return 1;
+    }
+  }
+
+  if (!absl::GetFlag(FLAGS_output_impl).empty()) {
+    if (!GenerateImpl(absl::GetFlag(FLAGS_output_impl), input_files,
+                      toc_files)) {
+      std::cerr << "Error generating impl.\n";
+      return 2;
+    }
+  }
+
+  return 0;
+}
diff --git a/build_tools/embed_data/testembed1_test.cc b/build_tools/embed_data/testembed1_test.cc
new file mode 100644
index 0000000..f34d19b
--- /dev/null
+++ b/build_tools/embed_data/testembed1_test.cc
@@ -0,0 +1,44 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "iree/build_tools/embed_data/testembed1.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace {
+
+TEST(Generator, TestContents) {
+  auto* toc = ::foobar::testembed1_create();
+  ASSERT_EQ("file1.txt", string(toc->name));
+  ASSERT_EQ(R"(Are you '"Still"' here?)"
+            "\n",
+            string(toc->data));
+  ASSERT_EQ(24, toc->size);
+  ASSERT_EQ(0, *(toc->data + toc->size));
+
+  ++toc;
+  ASSERT_EQ("file2.txt", string(toc->name));
+  ASSERT_EQ(R"(¯\_(ツ)_/¯)"
+            "\n",
+            string(toc->data));
+  ASSERT_EQ(14, toc->size);
+  ASSERT_EQ(0, *(toc->data + toc->size));
+
+  ++toc;
+  ASSERT_EQ(nullptr, toc->name);
+  ASSERT_EQ(nullptr, toc->data);
+}
+
+}  // namespace
diff --git a/iree/compiler/Translation/SPIRV/Kernels/spirv_utils.bzl b/iree/compiler/Translation/SPIRV/Kernels/spirv_utils.bzl
index 2d65a0e..49f03aa 100644
--- a/iree/compiler/Translation/SPIRV/Kernels/spirv_utils.bzl
+++ b/iree/compiler/Translation/SPIRV/Kernels/spirv_utils.bzl
@@ -1,6 +1,7 @@
 """Utilities for handling hand-written SPIR-V files."""
 
-load("//iree:build_defs.bzl", "iree_cc_embed_data", "iree_glsl_vulkan")
+load("//iree:build_defs.bzl", "iree_glsl_vulkan")
+load("//build_tools/embed_data:build_defs.bzl", "cc_embed_data")
 
 def spirv_kernel_cc_library(name, srcs):
     """Compiles GLSL files into SPIR-V binaries and embeds them in a cc_library.
@@ -21,14 +22,11 @@
         name = name + "_files",
         srcs = spv_files,
     )
-    iree_cc_embed_data(
+    cc_embed_data(
         name = name,
         srcs = spv_files,
-        outs = [
-            name + ".cc",
-            name + ".h",
-        ],
-        embedopts = [
-            "--namespace=mlir::iree_compiler::spirv_kernels",
-        ],
+        cc_file_output = name + ".cc",
+        h_file_output = name + ".h",
+        cpp_namespace = "mlir::iree_compiler::spirv_kernels",
+        flatten = True,
     )