Correct 32bit/64bit separation in ukernel code. (#13878)

* Rename `iree_uk_ssize_t` to `iree_uk_index_t` to make it clear that
the primary requirement on this type is to match the compiler's `index`
type.
* Build non-arch/ ukernel code twice, for 32bit and 64bit architectures.
As in libdevice, wasm_32 / wasm_64 is chosen as a sane 32bit/64bit
architecture to pick when the only thing that matters is the bitness
(which only matters for choosing the right definition of
`iree_uk_index_t`).
* Drop `strip_target_info.py`, instead adopt `Device.cpp`'s code
dropping function attributes inside of the compiler after loading the
bitcode.
diff --git a/build_tools/bazel/iree_bitcode_library.bzl b/build_tools/bazel/iree_bitcode_library.bzl
index 09282f3..0a01299 100644
--- a/build_tools/bazel/iree_bitcode_library.bzl
+++ b/build_tools/bazel/iree_bitcode_library.bzl
@@ -41,20 +41,17 @@
 
 def iree_bitcode_library(
         name,
+        arch,
         srcs,
         internal_hdrs = [],
         copts = [],
         out = None,
-        arch = None,
         **kwargs):
     """Builds an LLVM bitcode library from an input file via clang.
 
     Args:
         name: Name of the target.
-        arch: Target architecture to compile for, in IREE_ARCH format. If left
-              empty, will produce architecture-independent bitcode by stripping
-              target triple and target attributes; that only makes sense if the
-              sources being compiled are truly architecture-independent.
+        arch: Target architecture to compile for, in IREE_ARCH format.
         srcs: source files to pass to clang.
         internal_hdrs: all headers transitively included by the source files.
                        Unlike typical Bazel `hdrs`, these are not exposed as
@@ -73,6 +70,10 @@
     builtin_headers_path = "external/llvm-project/clang/staging/include/"
 
     base_copts = [
+        # Target architecture
+        "-target",
+        iree_arch_to_llvm_arch(arch),
+
         # C17 with no system deps.
         "-std=c17",
         "-nostdinc",
@@ -97,20 +98,10 @@
         "-DIREE_DEVICE_STANDALONE=1",
     ]
 
-    llvmir_processing_tool = None
-    if arch:
-        # Compile to the specified target architecture.
-        base_copts.extend(["-target", iree_arch_to_llvm_arch(arch)])
-    else:
-        # Output text rather than binary serialization of LLVM IR for processing
-        base_copts.append("-S")
-
-        # Strip target information from generated LLVM IR.
-        llvmir_processing_tool = "//build_tools/scripts:strip_target_info"
-
     bitcode_files = []
     for src in srcs:
         bitcode_out = "%s_%s.bc" % (name, src)
+        bitcode_files.append(bitcode_out)
         native.genrule(
             name = "gen_%s" % (bitcode_out),
             srcs = [src, builtin_headers_dep] + internal_hdrs,
@@ -134,28 +125,6 @@
             **kwargs
         )
 
-        if llvmir_processing_tool:
-            processed_bitcode_out = "%s_%s.processed.bc" % (name, src)
-            native.genrule(
-                name = "gen_%s" % (processed_bitcode_out),
-                srcs = [bitcode_out],
-                outs = [processed_bitcode_out],
-                cmd = " ".join([
-                    "$(location %s)" % (llvmir_processing_tool),
-                    "< $(location %s)" % bitcode_out,
-                    "> $(location %s)" % processed_bitcode_out,
-                ]),
-                tools = [
-                    llvmir_processing_tool,
-                ],
-                message = "Processing %s into %s using %s..." % (bitcode_out, processed_bitcode_out, llvmir_processing_tool),
-                output_to_bindir = 1,
-                **kwargs
-            )
-            bitcode_files.append(processed_bitcode_out)
-        else:
-            bitcode_files.append(bitcode_out)
-
     if not out:
         out = "%s.bc" % (name)
     native.genrule(