Remove the ukernels standalone plugin (#14339)

This was done as a stepping stone towards ukernels-as-bitcode. Now that
we have that, we don't need the stepping stone anymore. Like the bitcode
build, this involved custom invocations of custom Clang to produce a
multi-arch library; this is expensive to maintain as it's a whole
separate build of all ukernel code to all target architectures.

The system-plugin remains as it's much simpler (it simply links the
native-build of ukernels as a regular library) and it's much more likely
to still be used by some classes of users going forward (people going to
places where only a system-provided toolchain can go, and not caring for
the multi-arch aspect).
diff --git a/experimental/cpu_ukernel/CMakeLists.txt b/experimental/cpu_ukernel/CMakeLists.txt
index 6e920f9..052f4db 100644
--- a/experimental/cpu_ukernel/CMakeLists.txt
+++ b/experimental/cpu_ukernel/CMakeLists.txt
@@ -11,7 +11,6 @@
 add_subdirectory(test)
 
 include(iree_experimental_system_plugin.cmake)
-include(iree_experimental_standalone_plugin.cmake)
 
 iree_experimental_system_plugin(
   NAME
@@ -21,81 +20,3 @@
   DEPS
     iree::builtins::ukernel
 )
-
-
-set(_STANDALONE_PLUGIN_ARCHS "")
-set(_STANDALONE_PLUGIN_ARCH_SRCS "")
-
-iree_compiler_targeting_iree_arch(_IREE_UKERNEL_BITCODE_BUILD_X86_64 "x86_64")
-if (_IREE_UKERNEL_BITCODE_BUILD_X86_64)
-  set(IREE_UK_X86_64_AVX2_FMA_COPTS
-    "-mavx2"
-    "-mfma"
-  )
-  set(IREE_UK_X86_64_AVX512_BASE_COPTS
-    ${IREE_UK_X86_64_AVX2_FMA_COPTS}
-    "-mavx512f"
-    "-mavx512vl"
-    "-mavx512cd"
-    "-mavx512bw"
-    "-mavx512dq"
-  )
-  set(IREE_UK_X86_64_AVX512_VNNI_COPTS
-    ${IREE_UK_X86_64_AVX512_BASE_COPTS}
-    "-mavx512vnni"
-  )
-  list(APPEND _STANDALONE_PLUGIN_ARCHS "x86_64")
-  list(APPEND _STANDALONE_PLUGIN_ARCH_SRCS
-    "x86_64:runtime/src/iree/builtins/ukernel/arch/x86_64/query_tile_sizes_x86_64.c"
-    "x86_64:runtime/src/iree/builtins/ukernel/arch/x86_64/unpack_x86_64.c"
-    "x86_64:runtime/src/iree/builtins/ukernel/arch/x86_64/pack_x86_64.c"
-    "x86_64:runtime/src/iree/builtins/ukernel/arch/x86_64/mmt4d_x86_64.c"
-    "x86_64:runtime/src/iree/builtins/ukernel/arch/x86_64/mmt4d_x86_64_avx2_fma.c:IREE_UK_X86_64_AVX2_FMA_COPTS"
-    "x86_64:runtime/src/iree/builtins/ukernel/arch/x86_64/pack_x86_64_avx2_fma.c:IREE_UK_X86_64_AVX2_FMA_COPTS"
-    "x86_64:runtime/src/iree/builtins/ukernel/arch/x86_64/unpack_x86_64_avx2_fma.c:IREE_UK_X86_64_AVX2_FMA_COPTS"
-    "x86_64:runtime/src/iree/builtins/ukernel/arch/x86_64/unpack_x86_64_avx512_base.c:IREE_UK_X86_64_AVX512_BASE_COPTS"
-    "x86_64:runtime/src/iree/builtins/ukernel/arch/x86_64/mmt4d_x86_64_avx512_base.c:IREE_UK_X86_64_AVX512_BASE_COPTS"
-    "x86_64:runtime/src/iree/builtins/ukernel/arch/x86_64/pack_x86_64_avx512_base.c:IREE_UK_X86_64_AVX512_BASE_COPTS"
-    "x86_64:runtime/src/iree/builtins/ukernel/arch/x86_64/mmt4d_x86_64_avx512_vnni.c:IREE_UK_X86_64_AVX512_VNNI_COPTS"
-  )
-endif()  # _IREE_UKERNEL_BITCODE_BUILD_X86_64
-
-iree_compiler_targeting_iree_arch(_IREE_UKERNEL_BITCODE_BUILD_ARM_64 "arm_64")
-if (_IREE_UKERNEL_BITCODE_BUILD_ARM_64)
-  set(IREE_UK_ARM_64_DOTPROD_COPTS
-    "-march=armv8.2-a+dotprod"
-  )
-
-  set(IREE_UK_ARM_64_I8MM_COPTS
-    "-march=armv8.2-a+i8mm"
-  )
-  list(APPEND _STANDALONE_PLUGIN_ARCHS "arm_64")
-  list(APPEND _STANDALONE_PLUGIN_ARCH_SRCS
-    "arm_64:runtime/src/iree/builtins/ukernel/arch/arm_64/query_tile_sizes_arm_64.c"
-    "arm_64:runtime/src/iree/builtins/ukernel/arch/arm_64/mmt4d_arm_64.c"
-    "arm_64:runtime/src/iree/builtins/ukernel/arch/arm_64/pack_arm_64.c"
-    "arm_64:runtime/src/iree/builtins/ukernel/arch/arm_64/unpack_arm_64.c"
-    "arm_64:runtime/src/iree/builtins/ukernel/arch/arm_64/mmt4d_arm_64_dotprod.c:IREE_UK_ARM_64_DOTPROD_COPTS"
-    "arm_64:runtime/src/iree/builtins/ukernel/arch/arm_64/mmt4d_arm_64_i8mm.c:IREE_UK_ARM_64_I8MM_COPTS"
-  )
-endif()
-
-iree_experimental_standalone_plugin(
-  NAME
-    builtin_ukernel_standalone_plugin
-  ARCHS
-    "${_STANDALONE_PLUGIN_ARCHS}"
-  SRCS
-    plugin.c
-    # ukernel/arch/ file come before ukernel/ (non-arch) files because they
-    # contains symbols that should override weak symbols in ukernel/weak.c.
-    "${_STANDALONE_PLUGIN_ARCH_SRCS}"
-    runtime/src/iree/builtins/ukernel/mmt4d.c
-    runtime/src/iree/builtins/ukernel/mmt4d_tile.c
-    runtime/src/iree/builtins/ukernel/unpack_tile.c
-    runtime/src/iree/builtins/ukernel/pack.c
-    runtime/src/iree/builtins/ukernel/query_tile_sizes.c
-    runtime/src/iree/builtins/ukernel/unpack.c
-    runtime/src/iree/builtins/ukernel/pack_tile.c
-    runtime/src/iree/builtins/ukernel/weak.c
-)
diff --git a/experimental/cpu_ukernel/iree_experimental_standalone_plugin.cmake b/experimental/cpu_ukernel/iree_experimental_standalone_plugin.cmake
deleted file mode 100644
index 3f78f7d..0000000
--- a/experimental/cpu_ukernel/iree_experimental_standalone_plugin.cmake
+++ /dev/null
@@ -1,228 +0,0 @@
-# Copyright 2023 The IREE Authors
-#
-# Licensed under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-# iree_experimental_standalone_plugin_arch()
-#
-# Helper for iree_experimental_standalone_plugin, building one
-# architecture.
-#
-# Parameters:
-# NAME: Name of the system plugin to create.
-# ARCH: Name of architecture (as in IREE_ARCH) to build for.
-#       Example: "arm_64".
-# COPTS: List of compiler options to be applied to all source files.
-# SRCS: List of source files. Each list entry may be of one of two forms:
-#         * Each entry that does not contain a colon is interpreted as a source
-#           file path, to be built unconditionally, with the compiler options
-#           specified in `COPTS`.
-#         * Each entry that contains a colon is interpreted as a colon-separated
-#           list of length either 2 or 3. Format:
-#           `ARCH:FILE[:FILE_COPTS_VAR_NAME]`.
-#           Any entry whose `ARCH` does not match this rules's `ARCH` parameter
-#           is filtered out. Remaining files are compiled with the
-#           architecture-wide compiler options (see `COPTS`) and, if provided,
-#           with the file-specific compiler options from expanding the variable
-#           specified in `FILE_COPTS_VAR_NAME`.
-#           Example:  "x86_64:some_file_for_x86_64_using_avx512_instructions.c:NAME_OF_VARIABLE_CONTAINING_COPTS_FOR_X86_64_AVX512".
-function(iree_experimental_standalone_plugin_arch)
-  cmake_parse_arguments(
-    _RULE
-    ""
-    "NAME;ARCH"
-    "SRCS;COPTS"
-    ${ARGN}
-  )
-
-  iree_package_name(_PACKAGE_NAME)
-  set(_NAME "${_PACKAGE_NAME}_${_RULE_NAME}_${_RULE_ARCH}")
-  iree_arch_to_llvm_arch(LLVM_ARCH "${_RULE_ARCH}")
-
-  foreach(_SRC_ENTRY_COLON_SEPARATED IN LISTS _RULE_SRCS)
-    string(REPLACE ":" ";" _SRC_ENTRY_LIST "${_SRC_ENTRY_COLON_SEPARATED}")
-    list(LENGTH _SRC_ENTRY_LIST _SRC_ENTRY_LIST_LENGTH)
-    set(_SRC_COPTS_VAR_NAME "")
-    set(_SRC_FILE "")
-    if(_SRC_ENTRY_LIST_LENGTH EQUAL 1)
-      set(_SRC_FILE "${_SRC_ENTRY_LIST}")
-    else()  # NOT _SRC_ENTRY_LIST_LENGTH EQUAL 1
-      list(GET _SRC_ENTRY_LIST 0 _SRC_ARCH)
-      if(NOT _SRC_ARCH STREQUAL _RULE_ARCH)
-        continue()
-      endif()
-      list(GET _SRC_ENTRY_LIST 1 _SRC_FILE)
-      if(_SRC_ENTRY_LIST_LENGTH EQUAL 3)
-        list(GET _SRC_ENTRY_LIST 2 _SRC_COPTS_VAR_NAME)
-      endif()
-    endif()  # NOT _SRC_ENTRY_LIST_LENGTH EQUAL 1
-
-    set(_SRC_COPTS "${${_SRC_COPTS_VAR_NAME}}")
-
-    get_filename_component(_SRC_FILE_BASENAME "${_SRC_FILE}" NAME)
-
-    if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${_SRC_FILE}")
-      set(_SRC_FILE "${CMAKE_CURRENT_SOURCE_DIR}/${_SRC_FILE}")
-    endif()
-
-    if(EXISTS "${PROJECT_SOURCE_DIR}/${_SRC_FILE}")
-      set(_SRC_FILE "${PROJECT_SOURCE_DIR}/${_SRC_FILE}")
-    endif()
-
-    set(_OBJECT_FILE "${_SRC_FILE_BASENAME}.${_RULE_ARCH}.o")
-    list(APPEND _OBJECT_FILES "${CMAKE_CURRENT_BINARY_DIR}/${_OBJECT_FILE}")
-    add_custom_command(
-      OUTPUT
-        "${_OBJECT_FILE}"
-      DEPENDS
-        "${_SRC_FILE}"
-        "${IREE_CLANG_TARGET}"
-      COMMAND "${IREE_CLANG_BINARY}"
-        # Flags copied from
-        # compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/internal/EmbeddedLinkerTool.cpp
-        -target "${LLVM_ARCH}-none-elf"
-        -isystem "${IREE_CLANG_BUILTIN_HEADERS_PATH}"
-        -std=c17
-        -fasm  # Added for inline-asm support.
-        -fPIC
-        -ffreestanding
-        -fvisibility=hidden
-        -fno-plt
-        -fno-rtti
-        -fno-exceptions
-        -fdata-sections
-        -ffunction-sections
-        -funique-section-names
-        -DIREE_DEVICE_STANDALONE
-        -I "${IREE_SOURCE_DIR}/runtime/src/"
-        -c "${_SRC_FILE}"
-        -o "${CMAKE_CURRENT_BINARY_DIR}/${_OBJECT_FILE}"
-        ${_RULE_COPTS}
-        ${_SRC_COPTS}
-      VERBATIM
-    )
-  endforeach()
-  set(_OUTPUT_SO_FILE "${CMAKE_CURRENT_BINARY_DIR}/${_RULE_NAME}.${_RULE_ARCH}.so")
-  add_custom_command(
-    OUTPUT
-      ${_OUTPUT_SO_FILE}
-    DEPENDS
-      ${_OBJECT_FILES}
-      ${IREE_LLD_TARGET}
-    COMMAND ${IREE_LLD_BINARY}
-      -flavor gnu
-      --build-id=none
-      -nostdlib
-      -static
-      -shared
-      --no-undefined
-      --no-allow-shlib-undefined
-      --allow-multiple-definition
-      --gc-sections
-      -z now
-      -z relro
-      --discard-all
-      --icf=all
-      --ignore-data-address-equality
-      --ignore-function-address-equality
-      --hash-style=sysv
-      --strip-debug
-      ${_OBJECT_FILES}
-      -o "${_OUTPUT_SO_FILE}"
-    VERBATIM
-  )
-  add_custom_target(${_NAME} DEPENDS
-    "${_OUTPUT_SO_FILE}"
-  )
-endfunction()
-
-# iree_experimental_standalone_plugin()
-#
-# Creates a standalone plugin library, that is built using our in-tree Clang
-# toolchain for multiple target architectures, generating a fat embedded-elf,
-# and may be loaded with the embedded dynamic library loaded. 
-#
-# Contrast with: iree_experimental_system_plugin.
-#
-# Parameters:
-# NAME: Name of the system plugin to create.
-# ARCHS: List of architectures (as in IREE_ARCH) to build. Format:
-#        `ARCH[:ARCH_COPTS_VAR_NAME]`. If provided, `ARCH_COPTS_VAR_NAME` is
-#        interpreted as the name of a variable to be expanded into all compiler
-#        command lines used for architecture `ARCH`.
-#        Example: "arm_64:NAME_OF_VARIABLE_CONTAINING_COPTS_FOR_ARM_64".
-# SRCS: List of source files. Each list entry may be of one of two forms:
-#         * Each entry that does not contain a colon is interpreted as a source
-#           file path, to be built for all architectures with the
-#           architecture-wide compiler options provided for each architecture
-#           (see `ARCHS`).
-#         * Each entry that contains a colon is interpreted as a colon-separated
-#           list of length either 2 or 3. Format:
-#           `ARCH:FILE[:FILE_COPTS_VAR_NAME]`.
-#           The specified source `FILE` is compiled only for the specified
-#           architecture `ARCH` and is skipped on other architectures. It is
-#           compiled with the architecture-wide compiler options
-#           (see `ARCHS`) and, if provided, with the file-specific compiler
-#           options from expanding the variable specified in
-#           `FILE_COPTS_VAR_NAME`.
-#           Example:  "x86_64:some_file_for_x86_64_using_avx512_instructions.c:NAME_OF_VARIABLE_CONTAINING_COPTS_FOR_X86_64_AVX512".
-function(iree_experimental_standalone_plugin)
-  # Early return if we don't have our own build of Clang and LLD available.
-  if (NOT (IREE_CLANG_TARGET AND IREE_LLD_TARGET))
-    return()
-  endif()
-
-  cmake_parse_arguments(
-    _RULE
-    ""
-    "NAME"
-    "SRCS;ARCHS"
-    ${ARGN}
-  )
-
-  # Iterate over architectures. For each of them, build the architecture-specific
-  # shared library (iree_experimental_standalone_plugin_arch).
-  foreach(_ARCH_ENTRY_COLON_SEPARATED IN LISTS _RULE_ARCHS)
-    # Turn the colon-separated ARCH entry into a CMake list (semicolon-separated)
-    string(REPLACE ":" ";" _ARCH_ENTRY_LIST "${_ARCH_ENTRY_COLON_SEPARATED}")
-    list(GET _ARCH_ENTRY_LIST 0 _ARCH)
-    list(LENGTH _ARCH_ENTRY_LIST _ARCH_ENTRY_LIST_LENGTH)
-    # Get optional architecture-wide copts into _COPTS.
-    set(_COPTS_VAR_NAME "")
-    if(_ARCH_ENTRY_LIST_LENGTH EQUAL 2)
-      list(GET _ARCH_ENTRY_LIST 1 _COPTS_VAR_NAME)
-    endif()
-    set(_COPTS "${${_COPTS_VAR_NAME}}")
-    # Build the architecture-specific shared library.
-    iree_experimental_standalone_plugin_arch(
-      NAME
-        "${_RULE_NAME}"
-      ARCH
-        "${_ARCH}"
-      SRCS
-        ${_RULE_SRCS}
-      COPTS
-        ${_COPTS}
-    )
-    list(APPEND _ARCH_SO_FILES "${CMAKE_CURRENT_BINARY_DIR}/${_RULE_NAME}.${_ARCH}.so")
-  endforeach()
-  # Generate the multi-architecture ELF file.
-  add_custom_command(
-    OUTPUT
-      "${_RULE_NAME}.sos"
-    DEPENDS
-      ${_ARCH_SO_FILES}
-      iree-fatelf
-    COMMAND iree-fatelf join
-      ${_ARCH_SO_FILES}
-      > ${CMAKE_CURRENT_BINARY_DIR}/${_RULE_NAME}.sos
-    VERBATIM
-  )
-  iree_package_name(_PACKAGE_NAME)
-  set(_NAME "${_PACKAGE_NAME}_${_RULE_NAME}")
-  add_custom_target("${_NAME}" DEPENDS
-    "${CMAKE_CURRENT_BINARY_DIR}/${_RULE_NAME}.sos"
-  )
-  add_dependencies(iree-test-deps "${_NAME}")
-endfunction()
diff --git a/experimental/cpu_ukernel/plugin.c b/experimental/cpu_ukernel/plugin.c
index 22b7fbd..85876a8 100644
--- a/experimental/cpu_ukernel/plugin.c
+++ b/experimental/cpu_ukernel/plugin.c
@@ -9,12 +9,6 @@
 
 // Implementation of iree_uk_assert_fail failure is deferred to users code, i.e.
 // to us here, as core ukernel/ code can't use the standard library.
-#if defined(IREE_DEVICE_STANDALONE)  // Building a standalone plugin.
-void iree_uk_assert_fail(const char* file, int line, const char* function,
-                         const char* condition) {
-  // Doing nothing at the moment.
-}
-#else  // Building a system plugin.
 #include <stdio.h>
 #include <stdlib.h>
 void iree_uk_assert_fail(const char* file, int line, const char* function,
@@ -27,7 +21,6 @@
   fflush(stderr);
   abort();
 }
-#endif  // defined(IREE_DEVICE_STANDALONE)
 
 // Plugin entry points wrapping the actual ukernels.
 static int iree_uk_plugin_mmt4d(void* params_ptr, void* context,
@@ -122,23 +115,12 @@
     // Declares what library version is present: newer runtimes may support
     // loading older plugins but newer plugins cannot load on older runtimes.
     .version = IREE_HAL_EXECUTABLE_PLUGIN_VERSION_LATEST,
-#if defined(IREE_DEVICE_STANDALONE)  // Building a standalone plugin.
-    // Name and description are used for tracing/logging/diagnostics.
-    .name = "builtin_ukernel_standalone_plugin",
-    .description = "builtin ukernels as standalone plugin (" __FILE__ ")",
-    // Standalone plugins must declare that they are standalone so that the
-    // runtime can verify support.
-    .features = IREE_HAL_EXECUTABLE_PLUGIN_FEATURE_STANDALONE,
-    // Standalone plugins don't support sanitizers.
-    .sanitizer = IREE_HAL_EXECUTABLE_PLUGIN_SANITIZER_NONE,
-#else   // Building a system plugin.
     // Name and description are used for tracing/logging/diagnostics.
     .name = "builtin_ukernel_system_plugin",
     .description = "builtin ukernels as system plugin (" __FILE__ ")",
     .features = 0,
     // Let the runtime know what sanitizer this plugin was compiled with.
     .sanitizer = IREE_HAL_EXECUTABLE_PLUGIN_SANITIZER_KIND,
-#endif  // defined(IREE_DEVICE_STANDALONE)
   };
   static const iree_hal_executable_plugin_v0_t plugin = {
       .header = &header,
diff --git a/experimental/cpu_ukernel/test/CMakeLists.txt b/experimental/cpu_ukernel/test/CMakeLists.txt
index 53a81b4..42a8e61 100644
--- a/experimental/cpu_ukernel/test/CMakeLists.txt
+++ b/experimental/cpu_ukernel/test/CMakeLists.txt
@@ -20,20 +20,3 @@
   LABELS
     "hostonly"
 )
-
-iree_check_single_backend_test_suite(
-  NAME
-    builtin_ukernel_standalone_plugin_test
-  SRCS
-    "mmt4d.mlir"
-  TARGET_BACKEND
-    "llvm-cpu"
-  DRIVER
-    "local-sync"
-  COMPILER_FLAGS
-    "--iree-llvmcpu-enable-microkernels"
-  RUNNER_ARGS
-    "--executable_plugin=${PROJECT_BINARY_DIR}/experimental/cpu_ukernel/builtin_ukernel_standalone_plugin.sos"
-  LABELS
-    "hostonly"
-)