Reland #18804 (#18840)

Reland https://github.com/iree-org/iree/pull/18804 including
https://github.com/iree-org/iree/compare/main...ScottTodd:iree:ireegpu-api-fixes
but also with (currently) a hack for exposing symbols in
`compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.cpp`.

TODO
- [x] decide/find the right way to expose symbols in `IREEGPUAttrs.cpp`
on windows (alternatively move those C APIs)

EDIT: moved C wrappers to
`compiler/src/iree/compiler/API/Internal/IREEGPUDialectCAPI.cpp`.

win CI job: https://github.com/iree-org/iree/actions/runs/11411731188

---------

Signed-off-by: Maksim Levental <maksim.levental@gmail.com>
diff --git a/compiler/bindings/c/BUILD.bazel b/compiler/bindings/c/BUILD.bazel
index 21857d0..01c939c 100644
--- a/compiler/bindings/c/BUILD.bazel
+++ b/compiler/bindings/c/BUILD.bazel
@@ -14,6 +14,7 @@
     name = "headers",
     hdrs = [
         "iree/compiler/api_support.h",
+        "iree/compiler/dialects/iree_gpu.h",
         "iree/compiler/embedding_api.h",
         "iree/compiler/loader.h",
         "iree/compiler/mlir_interop.h",
diff --git a/compiler/bindings/c/CMakeLists.txt b/compiler/bindings/c/CMakeLists.txt
index 2ac9f5e..f13bf42 100644
--- a/compiler/bindings/c/CMakeLists.txt
+++ b/compiler/bindings/c/CMakeLists.txt
@@ -22,6 +22,7 @@
     headers
   HDRS
     "iree/compiler/api_support.h"
+    "iree/compiler/dialects/iree_gpu.h"
     "iree/compiler/embedding_api.h"
     "iree/compiler/loader.h"
     "iree/compiler/mlir_interop.h"
diff --git a/compiler/bindings/c/iree/compiler/dialects/iree_gpu.h b/compiler/bindings/c/iree/compiler/dialects/iree_gpu.h
new file mode 100644
index 0000000..6798ed3
--- /dev/null
+++ b/compiler/bindings/c/iree/compiler/dialects/iree_gpu.h
@@ -0,0 +1,59 @@
+// Copyright 2024 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_COMPILER_DIALECTS_IREE_GPU_H
+#define IREE_COMPILER_DIALECTS_IREE_GPU_H
+
+#include "mlir-c/IR.h"
+#include "mlir-c/Support.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum ireeGPUReorderWorkgroupsStrategyEnum {
+  ireeGPUReorderWorkgroupsStrategyEnumNone = 0,
+  ireeGPUReorderWorkgroupsStrategyEnumSwizzle = 1,
+  ireeGPUReorderWorkgroupsStrategyEnumTranspose = 2,
+};
+
+MLIR_CAPI_EXPORTED bool
+ireeAttributeIsAGPUReorderWorkgroupsStrategyAttr(MlirAttribute attr);
+
+MLIR_CAPI_EXPORTED MlirTypeID
+ireeGPUReorderWorkgroupsStrategyAttrGetTypeID(void);
+
+MLIR_CAPI_EXPORTED MlirAttribute ireeGPUReorderWorkgroupsStrategyAttrGet(
+    MlirContext mlirCtx, ireeGPUReorderWorkgroupsStrategyEnum value);
+
+MLIR_CAPI_EXPORTED ireeGPUReorderWorkgroupsStrategyEnum
+ireeGPUReorderWorkgroupsStrategyAttrGetValue(MlirAttribute attr);
+
+MLIR_CAPI_EXPORTED
+bool ireeAttributeIsAGPUPipelineOptionsAttr(MlirAttribute attr);
+
+MLIR_CAPI_EXPORTED MlirAttribute
+ireeGPUPipelineOptionsAttrGet(MlirContext mlirCtx, bool *prefetchSharedMemory,
+                              bool *noReduceSharedMemoryBankConflicts,
+                              MlirAttribute *reorderWorkgroupsStrategy);
+
+MLIR_CAPI_EXPORTED MlirAttribute
+ireeGPUPipelineOptionsAttrGetPrefetchSharedMemory(MlirAttribute attr);
+
+MLIR_CAPI_EXPORTED MlirAttribute
+ireeGPUPipelineOptionsAttrGetNoReduceSharedMemoryBankConflicts(
+    MlirAttribute attr);
+
+MLIR_CAPI_EXPORTED MlirAttribute
+ireeGPUPipelineOptionsAttrGetReorderWorkgroupsStrategy(MlirAttribute attr);
+
+MLIR_CAPI_EXPORTED MlirTypeID ireeGPUPipelineOptionsAttrGetTypeID(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // IREE_COMPILER_DIALECTS_IREE_GPU_H
diff --git a/compiler/bindings/python/CMakeLists.txt b/compiler/bindings/python/CMakeLists.txt
index e27e8e5..0a0b364 100644
--- a/compiler/bindings/python/CMakeLists.txt
+++ b/compiler/bindings/python/CMakeLists.txt
@@ -100,6 +100,15 @@
   DIALECT_NAME vm
 )
 
+declare_mlir_dialect_python_bindings(
+  ADD_TO_PARENT IREEPythonSources.Dialects
+  ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/iree/compiler"
+  TD_FILE dialects/IREEGPUBinding.td
+  GEN_ENUM_BINDINGS
+  SOURCES dialects/iree_gpu.py
+  DIALECT_NAME iree_gpu
+)
+
 declare_mlir_python_sources(IREECompilerAPIPythonCore
   ADD_TO_PARENT IREEPythonSources
   ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/iree/compiler"
@@ -165,6 +174,17 @@
     LLVMSupport
 )
 
+declare_mlir_python_extension(IREECompilerPythonExtensions.CompilerDialects
+  MODULE_NAME _ireeCompilerDialects
+  ADD_TO_PARENT IREECompilerPythonExtensions
+  SOURCES
+    IREECompilerDialectsModule.cpp
+  EMBED_CAPI_LINK_LIBS
+    iree_compiler_API_SharedImpl
+  PRIVATE_LINK_LIBS
+    LLVMSupport
+)
+
 ################################################################################
 # Generate packages and shared library
 ################################################################################
diff --git a/compiler/bindings/python/IREECompilerDialectsModule.cpp b/compiler/bindings/python/IREECompilerDialectsModule.cpp
new file mode 100644
index 0000000..18cca60
--- /dev/null
+++ b/compiler/bindings/python/IREECompilerDialectsModule.cpp
@@ -0,0 +1,123 @@
+// Copyright 2024 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/compiler/dialects/iree_gpu.h"
+#include "mlir-c/BuiltinAttributes.h"
+#include "mlir-c/IR.h"
+#include "mlir/Bindings/Python/PybindAdaptors.h"
+
+namespace py = pybind11;
+using namespace mlir::python::adaptors;
+
+PYBIND11_MODULE(_ireeCompilerDialects, m) {
+  m.doc() = "iree-compiler dialects python extension";
+
+  auto iree_gpu_module =
+      m.def_submodule("iree_gpu", "iree_gpu dialect bindings");
+
+  //===-------------------------------------------------------------------===//
+  // GPUReorderWorkgroupsStrategyAttr
+  //===-------------------------------------------------------------------===//
+
+  auto strategyEnum =
+      py::enum_<ireeGPUReorderWorkgroupsStrategyEnum>(
+          iree_gpu_module, "ReorderWorkgroupsStrategy", py::module_local())
+          .value("None_", ireeGPUReorderWorkgroupsStrategyEnumNone)
+          .value("Swizzle", ireeGPUReorderWorkgroupsStrategyEnumSwizzle)
+          .value("Transpose", ireeGPUReorderWorkgroupsStrategyEnumTranspose)
+          .def(
+              "__str__",
+              [](ireeGPUReorderWorkgroupsStrategyEnum &self) {
+                switch (self) {
+                case ireeGPUReorderWorkgroupsStrategyEnumNone:
+                  return "None";
+                case ireeGPUReorderWorkgroupsStrategyEnumSwizzle:
+                  return "Swizzle";
+                case ireeGPUReorderWorkgroupsStrategyEnumTranspose:
+                  return "Transpose";
+                default:
+                  llvm::report_fatal_error(
+                      "unknown ReorderWorkgroupsStrategy variant");
+                }
+              },
+              // pybind overloads are tried in the order they were registered.
+              // As a result, enums used the default __str__ method instead of
+              // the custom one. Adding py::prepend() fixes this issue.
+              py::prepend());
+
+  mlir_attribute_subclass(iree_gpu_module, "ReorderWorkgroupsStrategyAttr",
+                          ireeAttributeIsAGPUReorderWorkgroupsStrategyAttr,
+                          ireeGPUReorderWorkgroupsStrategyAttrGetTypeID)
+      .def_classmethod(
+          "get",
+          [](const py::object &, ireeGPUReorderWorkgroupsStrategyEnum value,
+             MlirContext ctx) {
+            return ireeGPUReorderWorkgroupsStrategyAttrGet(ctx, value);
+          },
+          "cls"_a, "value"_a, "ctx"_a = py::none(),
+          "Gets a gpu.reorder_workgroups_strategy from parameters.")
+      .def_property_readonly(
+          "value",
+          [](MlirAttribute self) -> ireeGPUReorderWorkgroupsStrategyEnum {
+            return ireeGPUReorderWorkgroupsStrategyAttrGetValue(self);
+          });
+
+  //===-------------------------------------------------------------------===//
+  // GPUPipelineOptionsAttr
+  //===-------------------------------------------------------------------===//
+
+  mlir_attribute_subclass(iree_gpu_module, "PipelineOptionsAttr",
+                          ireeAttributeIsAGPUPipelineOptionsAttr,
+                          ireeGPUPipelineOptionsAttrGetTypeID)
+      .def_classmethod(
+          "get",
+          [](const py::object &, std::optional<bool> prefetchSharedMemory,
+             std::optional<bool> noReduceSharedMemoryBankConflicts,
+             std::optional<MlirAttribute> reorderWorkgroupsStrategy,
+             MlirContext ctx) {
+            return ireeGPUPipelineOptionsAttrGet(
+                ctx,
+                prefetchSharedMemory.has_value() ? &*prefetchSharedMemory
+                                                 : nullptr,
+                noReduceSharedMemoryBankConflicts.has_value()
+                    ? &*noReduceSharedMemoryBankConflicts
+                    : nullptr,
+                reorderWorkgroupsStrategy.has_value()
+                    ? &*reorderWorkgroupsStrategy
+                    : nullptr);
+          },
+          "cls"_a, "prefetch_shared_memory"_a = py::none(),
+          "no_reduce_shared_memory_bank_conflicts"_a = py::none(),
+          "reorder_workgroups_strategy"_a = py::none(), py::kw_only(),
+          "ctx"_a = py::none(), "Gets a gpu.pipeline_options from parameters.")
+      .def_property_readonly(
+          "prefetch_shared_memory",
+          [](MlirAttribute self) -> std::optional<bool> {
+            auto attr = ireeGPUPipelineOptionsAttrGetPrefetchSharedMemory(self);
+            if (!mlirAttributeIsNull(attr))
+              return mlirBoolAttrGetValue(attr);
+            return std::nullopt;
+          })
+      .def_property_readonly(
+          "no_reduce_shared_memory_bank_conflicts",
+          [](MlirAttribute self) -> std::optional<bool> {
+            auto attr =
+                ireeGPUPipelineOptionsAttrGetNoReduceSharedMemoryBankConflicts(
+                    self);
+            if (!mlirAttributeIsNull(attr))
+              return mlirBoolAttrGetValue(attr);
+            return std::nullopt;
+          })
+      .def_property_readonly(
+          "reorder_workgroups_strategy",
+          [](MlirAttribute self) -> std::optional<MlirAttribute> {
+            auto attr =
+                ireeGPUPipelineOptionsAttrGetReorderWorkgroupsStrategy(self);
+            if (!mlirAttributeIsNull(attr))
+              return attr;
+            return std::nullopt;
+          });
+}
diff --git a/compiler/bindings/python/iree/compiler/dialects/IREEGPUBinding.td b/compiler/bindings/python/iree/compiler/dialects/IREEGPUBinding.td
new file mode 100644
index 0000000..7674967
--- /dev/null
+++ b/compiler/bindings/python/iree/compiler/dialects/IREEGPUBinding.td
@@ -0,0 +1,12 @@
+// Copyright 2024 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef PYTHON_BINDINGS_IREEGPU_OPS
+#define PYTHON_BINDINGS_IREEGPU_OPS
+
+include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUOps.td"
+
+#endif // PYTHON_BINDINGS_IREEGPU_OPS
diff --git a/compiler/bindings/python/iree/compiler/dialects/iree_gpu.py b/compiler/bindings/python/iree/compiler/dialects/iree_gpu.py
new file mode 100644
index 0000000..9058ec3
--- /dev/null
+++ b/compiler/bindings/python/iree/compiler/dialects/iree_gpu.py
@@ -0,0 +1,9 @@
+# Copyright 2024 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+from ._iree_gpu_ops_gen import *
+from ._iree_gpu_enum_gen import *
+from .._mlir_libs._ireeCompilerDialects.iree_gpu import *
diff --git a/compiler/bindings/python/test/ir/dialects_test.py b/compiler/bindings/python/test/ir/dialects_test.py
index 63a9603..cabd13c 100644
--- a/compiler/bindings/python/test/ir/dialects_test.py
+++ b/compiler/bindings/python/test/ir/dialects_test.py
@@ -7,10 +7,69 @@
 from iree.compiler import ir
 
 # Make sure that our dialects import.
-from iree.compiler.dialects import (
-    flow,
-    hal,
-    stream,
-    vm,
-    util,
-)
+from iree.compiler.dialects import flow, hal, stream, vm, util, iree_gpu
+
+
+@lambda _: _()
+def gpu_pipeline_options_attr():
+    with ir.Context() as ctx, ir.Location.unknown():
+        module = ir.Module.create()
+        with ir.InsertionPoint(module.body):
+            reorder_attr = iree_gpu.ReorderWorkgroupsStrategyAttr.get(
+                iree_gpu.ReorderWorkgroupsStrategy.Swizzle, ctx
+            )
+            gpu_attr = iree_gpu.PipelineOptionsAttr.get(
+                True,
+                False,
+                reorder_attr,
+            )
+            assert type(gpu_attr) is iree_gpu.PipelineOptionsAttr
+            assert gpu_attr.prefetch_shared_memory
+            assert not gpu_attr.no_reduce_shared_memory_bank_conflicts
+
+            gpu_attr = iree_gpu.PipelineOptionsAttr.get(
+                False,
+                True,
+                iree_gpu.ReorderWorkgroupsStrategyAttr.get(
+                    iree_gpu.ReorderWorkgroupsStrategy.Transpose, ctx
+                ),
+            )
+            assert not gpu_attr.prefetch_shared_memory
+            assert gpu_attr.no_reduce_shared_memory_bank_conflicts
+
+            gpu_attr = iree_gpu.PipelineOptionsAttr.get()
+            assert (
+                gpu_attr.prefetch_shared_memory is None
+                and gpu_attr.no_reduce_shared_memory_bank_conflicts is None
+                and gpu_attr.reorder_workgroups_strategy is None
+            )
+
+            gpu_attr = iree_gpu.PipelineOptionsAttr.get(True)
+            assert gpu_attr.prefetch_shared_memory
+            assert (
+                gpu_attr.no_reduce_shared_memory_bank_conflicts is None
+                and gpu_attr.reorder_workgroups_strategy is None
+            )
+
+            gpu_attr = iree_gpu.PipelineOptionsAttr.get(True, False)
+            assert gpu_attr.reorder_workgroups_strategy is None
+
+            gpu_attr = iree_gpu.PipelineOptionsAttr.get(
+                no_reduce_shared_memory_bank_conflicts=False
+            )
+            assert (
+                gpu_attr.no_reduce_shared_memory_bank_conflicts is not None
+                and not gpu_attr.no_reduce_shared_memory_bank_conflicts
+            )
+            assert gpu_attr.prefetch_shared_memory is None
+            assert gpu_attr.reorder_workgroups_strategy is None
+
+            gpu_attr = iree_gpu.PipelineOptionsAttr.get(
+                reorder_workgroups_strategy=reorder_attr
+            )
+            assert gpu_attr.reorder_workgroups_strategy is not None
+            assert (
+                gpu_attr.reorder_workgroups_strategy.value
+                # unfortunately not `is`
+                == iree_gpu.ReorderWorkgroupsStrategy.Swizzle
+            )
diff --git a/compiler/src/iree/compiler/API/CMakeLists.txt b/compiler/src/iree/compiler/API/CMakeLists.txt
index beada0d..00a7e39 100644
--- a/compiler/src/iree/compiler/API/CMakeLists.txt
+++ b/compiler/src/iree/compiler/API/CMakeLists.txt
@@ -78,6 +78,7 @@
   obj.MLIRCAPITransformDialectTransforms
   iree_compiler_API_Internal_CompilerDriver.objects
   iree_compiler_API_Internal_IREECompileToolEntryPoint.objects
+  iree_compiler_API_Internal_IREEGPUDialectCAPI.objects
   iree_compiler_API_Internal_IREEMLIRLSPServerToolEntryPoint.objects
   iree_compiler_API_Internal_IREEOptToolEntryPoint.objects
   iree_compiler_API_Internal_IREEReduceToolEntryPoint.objects
diff --git a/compiler/src/iree/compiler/API/Internal/BUILD.bazel b/compiler/src/iree/compiler/API/Internal/BUILD.bazel
index 4f98de9..c8ac455 100644
--- a/compiler/src/iree/compiler/API/Internal/BUILD.bazel
+++ b/compiler/src/iree/compiler/API/Internal/BUILD.bazel
@@ -125,3 +125,16 @@
         "@llvm-project//mlir:Support",
     ],
 )
+
+iree_compiler_cc_library(
+    name = "IREEGPUDialectCAPI",
+    srcs = [
+        "IREEGPUDialectCAPI.cpp",
+    ],
+    deps = [
+        "//compiler/bindings/c:headers",
+        "//compiler/src/iree/compiler/Codegen/Dialect/GPU/IR:IREEGPUDialect",
+        "@llvm-project//mlir:CAPIIR",
+        "@llvm-project//mlir:CAPIIRHeaders",
+    ],
+)
diff --git a/compiler/src/iree/compiler/API/Internal/CMakeLists.txt b/compiler/src/iree/compiler/API/Internal/CMakeLists.txt
index c25dcb3..61631e1 100644
--- a/compiler/src/iree/compiler/API/Internal/CMakeLists.txt
+++ b/compiler/src/iree/compiler/API/Internal/CMakeLists.txt
@@ -103,6 +103,19 @@
   PUBLIC
 )
 
+iree_cc_library(
+  NAME
+    IREEGPUDialectCAPI
+  SRCS
+    "IREEGPUDialectCAPI.cpp"
+  DEPS
+    IREELLVMIncludeSetup
+    MLIRCAPIIR
+    iree::compiler::Codegen::Dialect::GPU::IR::IREEGPUDialect
+    iree::compiler::bindings::c::headers
+  PUBLIC
+)
+
 ### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
 
 set(_lld_copts)
diff --git a/compiler/src/iree/compiler/API/Internal/IREEGPUDialectCAPI.cpp b/compiler/src/iree/compiler/API/Internal/IREEGPUDialectCAPI.cpp
new file mode 100644
index 0000000..9b4639b
--- /dev/null
+++ b/compiler/src/iree/compiler/API/Internal/IREEGPUDialectCAPI.cpp
@@ -0,0 +1,120 @@
+// Copyright 2024 The IREE Authors

+//

+// Licensed under the Apache License v2.0 with LLVM Exceptions.

+// See https://llvm.org/LICENSE.txt for license information.

+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

+

+#include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.h"

+#include "iree/compiler/dialects/iree_gpu.h"

+#include "mlir-c/IR.h"

+#include "mlir/CAPI/IR.h"

+#include "mlir/CAPI/Support.h"

+

+bool ireeAttributeIsAGPUPipelineOptionsAttr(MlirAttribute attr) {

+  return llvm::isa<mlir::iree_compiler::IREE::GPU::GPUPipelineOptionsAttr>(

+      unwrap(attr));

+}

+

+MlirAttribute

+ireeGPUPipelineOptionsAttrGet(MlirContext mlirCtx, bool *prefetchSharedMemory,

+                              bool *noReduceSharedMemoryBankConflicts,

+                              MlirAttribute *reorderWorkgroupsStrategy) {

+  mlir::MLIRContext *ctx = unwrap(mlirCtx);

+  mlir::Builder b(ctx);

+  auto prefetchSharedMemoryAttr = mlir::BoolAttr();

+  if (prefetchSharedMemory) {

+    prefetchSharedMemoryAttr = b.getBoolAttr(*prefetchSharedMemory);

+  }

+  auto noReduceSharedMemoryBankConflictsAttr = mlir::BoolAttr();

+  if (noReduceSharedMemoryBankConflicts) {

+    noReduceSharedMemoryBankConflictsAttr =

+        b.getBoolAttr(*noReduceSharedMemoryBankConflicts);

+  }

+  auto strategyAttr =

+      mlir::iree_compiler::IREE::GPU::ReorderWorkgroupsStrategyAttr();

+  if (reorderWorkgroupsStrategy) {

+    strategyAttr = llvm::dyn_cast<

+        mlir::iree_compiler::IREE::GPU::ReorderWorkgroupsStrategyAttr>(

+        unwrap(*reorderWorkgroupsStrategy));

+  }

+  return wrap(mlir::iree_compiler::IREE::GPU::GPUPipelineOptionsAttr::get(

+      ctx, prefetchSharedMemoryAttr, noReduceSharedMemoryBankConflictsAttr,

+      strategyAttr));

+}

+

+MlirAttribute

+ireeGPUPipelineOptionsAttrGetPrefetchSharedMemory(MlirAttribute attr) {

+  auto gpuAttr =

+      llvm::cast<mlir::iree_compiler::IREE::GPU::GPUPipelineOptionsAttr>(

+          unwrap(attr));

+  return wrap(gpuAttr.getPrefetchSharedMemory());

+}

+

+MlirAttribute ireeGPUPipelineOptionsAttrGetNoReduceSharedMemoryBankConflicts(

+    MlirAttribute attr) {

+  auto gpuAttr =

+      llvm::cast<mlir::iree_compiler::IREE::GPU::GPUPipelineOptionsAttr>(

+          unwrap(attr));

+  return wrap(gpuAttr.getNoReduceSharedMemoryBankConflicts());

+}

+

+MlirAttribute

+ireeGPUPipelineOptionsAttrGetReorderWorkgroupsStrategy(MlirAttribute attr) {

+  auto gpuAttr =

+      llvm::cast<mlir::iree_compiler::IREE::GPU::GPUPipelineOptionsAttr>(

+          unwrap(attr));

+  return wrap(gpuAttr.getReorderWorkgroupsStrategy());

+}

+

+MlirTypeID ireeGPUPipelineOptionsAttrGetTypeID() {

+  return wrap(

+      mlir::iree_compiler::IREE::GPU::GPUPipelineOptionsAttr::getTypeID());

+}

+

+static_assert(

+    static_cast<uint32_t>(ireeGPUReorderWorkgroupsStrategyEnumNone) ==

+            static_cast<uint32_t>(mlir::iree_compiler::IREE::GPU::

+                                      ReorderWorkgroupsStrategy::None) &&

+        static_cast<uint32_t>(ireeGPUReorderWorkgroupsStrategyEnumSwizzle) ==

+            static_cast<uint32_t>(mlir::iree_compiler::IREE::GPU::

+                                      ReorderWorkgroupsStrategy::Swizzle) &&

+        static_cast<uint32_t>(ireeGPUReorderWorkgroupsStrategyEnumTranspose) ==

+            static_cast<uint32_t>(mlir::iree_compiler::IREE::GPU::

+                                      ReorderWorkgroupsStrategy::Transpose) &&

+        static_cast<uint32_t>(ireeGPUReorderWorkgroupsStrategyEnumTranspose) ==

+            mlir::iree_compiler::IREE::GPU::

+                getMaxEnumValForReorderWorkgroupsStrategy(),

+    "ireeGPUReorderWorkgroupsStrategyEnum and "

+    "mlir::iree_compiler::IREE::GPU::ReorderWorkgroupsStrategy definitions "

+    "have diverged");

+

+bool ireeAttributeIsAGPUReorderWorkgroupsStrategyAttr(MlirAttribute attr) {

+  return llvm::isa<

+      mlir::iree_compiler::IREE::GPU::ReorderWorkgroupsStrategyAttr>(

+      unwrap(attr));

+}

+

+MlirTypeID ireeGPUReorderWorkgroupsStrategyAttrGetTypeID() {

+  return wrap(mlir::iree_compiler::IREE::GPU::ReorderWorkgroupsStrategyAttr::

+                  getTypeID());

+}

+

+MlirAttribute ireeGPUReorderWorkgroupsStrategyAttrGet(

+    MlirContext mlirCtx, ireeGPUReorderWorkgroupsStrategyEnum value) {

+  mlir::MLIRContext *ctx = unwrap(mlirCtx);

+  return wrap(

+      mlir::iree_compiler::IREE::GPU::ReorderWorkgroupsStrategyAttr::get(

+          ctx, static_cast<

+                   mlir::iree_compiler::IREE::GPU::ReorderWorkgroupsStrategy>(

+                   value)));

+}

+

+ireeGPUReorderWorkgroupsStrategyEnum

+ireeGPUReorderWorkgroupsStrategyAttrGetValue(MlirAttribute attr) {

+  assert(ireeAttributeIsAGPUReorderWorkgroupsStrategyAttr(attr) &&

+         "attr is not a GPUReorderWorkgroupsStrategyAttr");

+  return static_cast<ireeGPUReorderWorkgroupsStrategyEnum>(

+      llvm::cast<mlir::iree_compiler::IREE::GPU::ReorderWorkgroupsStrategyAttr>(

+          unwrap(attr))

+          .getValue());

+}

diff --git a/compiler/src/iree/compiler/API/api_exports.c b/compiler/src/iree/compiler/API/api_exports.c
index 7f08a29..b39373f 100644
--- a/compiler/src/iree/compiler/API/api_exports.c
+++ b/compiler/src/iree/compiler/API/api_exports.c
@@ -6,8 +6,12 @@
 
 // Generated by generate_exports.py: Do not edit.
 
+// clang-format off
+
 #include <stdint.h>
 
+extern void ireeAttributeIsAGPUPipelineOptionsAttr();
+extern void ireeAttributeIsAGPUReorderWorkgroupsStrategyAttr();
 extern void ireeCompilerEnumeratePlugins();
 extern void ireeCompilerEnumerateRegisteredHALTargetBackends();
 extern void ireeCompilerErrorDestroy();
@@ -59,6 +63,14 @@
 extern void ireeCompilerSourceOpenFile();
 extern void ireeCompilerSourceSplit();
 extern void ireeCompilerSourceWrapBuffer();
+extern void ireeGPUPipelineOptionsAttrGet();
+extern void ireeGPUPipelineOptionsAttrGetNoReduceSharedMemoryBankConflicts();
+extern void ireeGPUPipelineOptionsAttrGetPrefetchSharedMemory();
+extern void ireeGPUPipelineOptionsAttrGetReorderWorkgroupsStrategy();
+extern void ireeGPUPipelineOptionsAttrGetTypeID();
+extern void ireeGPUReorderWorkgroupsStrategyAttrGet();
+extern void ireeGPUReorderWorkgroupsStrategyAttrGetTypeID();
+extern void ireeGPUReorderWorkgroupsStrategyAttrGetValue();
 extern void ireeMlirLspServerRunMain();
 extern void ireeOptRunMain();
 extern void ireeReduceRunMain();
@@ -835,6 +847,8 @@
 
 uintptr_t __iree_compiler_hidden_force_extern() {
   uintptr_t x = 0;
+  x += (uintptr_t)&ireeAttributeIsAGPUPipelineOptionsAttr;
+  x += (uintptr_t)&ireeAttributeIsAGPUReorderWorkgroupsStrategyAttr;
   x += (uintptr_t)&ireeCompilerEnumeratePlugins;
   x += (uintptr_t)&ireeCompilerEnumerateRegisteredHALTargetBackends;
   x += (uintptr_t)&ireeCompilerErrorDestroy;
@@ -886,6 +900,14 @@
   x += (uintptr_t)&ireeCompilerSourceOpenFile;
   x += (uintptr_t)&ireeCompilerSourceSplit;
   x += (uintptr_t)&ireeCompilerSourceWrapBuffer;
+  x += (uintptr_t)&ireeGPUPipelineOptionsAttrGet;
+  x += (uintptr_t)&ireeGPUPipelineOptionsAttrGetNoReduceSharedMemoryBankConflicts;
+  x += (uintptr_t)&ireeGPUPipelineOptionsAttrGetPrefetchSharedMemory;
+  x += (uintptr_t)&ireeGPUPipelineOptionsAttrGetReorderWorkgroupsStrategy;
+  x += (uintptr_t)&ireeGPUPipelineOptionsAttrGetTypeID;
+  x += (uintptr_t)&ireeGPUReorderWorkgroupsStrategyAttrGet;
+  x += (uintptr_t)&ireeGPUReorderWorkgroupsStrategyAttrGetTypeID;
+  x += (uintptr_t)&ireeGPUReorderWorkgroupsStrategyAttrGetValue;
   x += (uintptr_t)&ireeMlirLspServerRunMain;
   x += (uintptr_t)&ireeOptRunMain;
   x += (uintptr_t)&ireeReduceRunMain;
@@ -1661,3 +1683,5 @@
   x += (uintptr_t)&mlirVectorTypeIsScalable;
   return x;
 }
+
+// clang-format off
diff --git a/compiler/src/iree/compiler/API/api_exports.def b/compiler/src/iree/compiler/API/api_exports.def
index dd40de2..a6cc72a 100644
--- a/compiler/src/iree/compiler/API/api_exports.def
+++ b/compiler/src/iree/compiler/API/api_exports.def
@@ -1,5 +1,7 @@
 ; Generated by generate_exports.py: Do not edit.
 EXPORTS
+  ireeAttributeIsAGPUPipelineOptionsAttr
+  ireeAttributeIsAGPUReorderWorkgroupsStrategyAttr
   ireeCompilerEnumeratePlugins
   ireeCompilerEnumerateRegisteredHALTargetBackends
   ireeCompilerErrorDestroy
@@ -51,6 +53,14 @@
   ireeCompilerSourceOpenFile
   ireeCompilerSourceSplit
   ireeCompilerSourceWrapBuffer
+  ireeGPUPipelineOptionsAttrGet
+  ireeGPUPipelineOptionsAttrGetNoReduceSharedMemoryBankConflicts
+  ireeGPUPipelineOptionsAttrGetPrefetchSharedMemory
+  ireeGPUPipelineOptionsAttrGetReorderWorkgroupsStrategy
+  ireeGPUPipelineOptionsAttrGetTypeID
+  ireeGPUReorderWorkgroupsStrategyAttrGet
+  ireeGPUReorderWorkgroupsStrategyAttrGetTypeID
+  ireeGPUReorderWorkgroupsStrategyAttrGetValue
   ireeMlirLspServerRunMain
   ireeOptRunMain
   ireeReduceRunMain
diff --git a/compiler/src/iree/compiler/API/api_exports.ld b/compiler/src/iree/compiler/API/api_exports.ld
index 7589a8a..ff43d9a 100644
--- a/compiler/src/iree/compiler/API/api_exports.ld
+++ b/compiler/src/iree/compiler/API/api_exports.ld
@@ -1,6 +1,8 @@
 # Generated by generate_exports.py: Do not edit.
 VER_0 {
   global:
+    ireeAttributeIsAGPUPipelineOptionsAttr;
+    ireeAttributeIsAGPUReorderWorkgroupsStrategyAttr;
     ireeCompilerEnumeratePlugins;
     ireeCompilerEnumerateRegisteredHALTargetBackends;
     ireeCompilerErrorDestroy;
@@ -52,6 +54,14 @@
     ireeCompilerSourceOpenFile;
     ireeCompilerSourceSplit;
     ireeCompilerSourceWrapBuffer;
+    ireeGPUPipelineOptionsAttrGet;
+    ireeGPUPipelineOptionsAttrGetNoReduceSharedMemoryBankConflicts;
+    ireeGPUPipelineOptionsAttrGetPrefetchSharedMemory;
+    ireeGPUPipelineOptionsAttrGetReorderWorkgroupsStrategy;
+    ireeGPUPipelineOptionsAttrGetTypeID;
+    ireeGPUReorderWorkgroupsStrategyAttrGet;
+    ireeGPUReorderWorkgroupsStrategyAttrGetTypeID;
+    ireeGPUReorderWorkgroupsStrategyAttrGetValue;
     ireeMlirLspServerRunMain;
     ireeOptRunMain;
     ireeReduceRunMain;
diff --git a/compiler/src/iree/compiler/API/api_exports.macos.lst b/compiler/src/iree/compiler/API/api_exports.macos.lst
index 6e9c690..26289a3 100644
--- a/compiler/src/iree/compiler/API/api_exports.macos.lst
+++ b/compiler/src/iree/compiler/API/api_exports.macos.lst
@@ -1,4 +1,6 @@
 # Generated by generate_exports.py: Do not edit.
+_ireeAttributeIsAGPUPipelineOptionsAttr
+_ireeAttributeIsAGPUReorderWorkgroupsStrategyAttr
 _ireeCompilerEnumeratePlugins
 _ireeCompilerEnumerateRegisteredHALTargetBackends
 _ireeCompilerErrorDestroy
@@ -50,6 +52,14 @@
 _ireeCompilerSourceOpenFile
 _ireeCompilerSourceSplit
 _ireeCompilerSourceWrapBuffer
+_ireeGPUPipelineOptionsAttrGet
+_ireeGPUPipelineOptionsAttrGetNoReduceSharedMemoryBankConflicts
+_ireeGPUPipelineOptionsAttrGetPrefetchSharedMemory
+_ireeGPUPipelineOptionsAttrGetReorderWorkgroupsStrategy
+_ireeGPUPipelineOptionsAttrGetTypeID
+_ireeGPUReorderWorkgroupsStrategyAttrGet
+_ireeGPUReorderWorkgroupsStrategyAttrGetTypeID
+_ireeGPUReorderWorkgroupsStrategyAttrGetValue
 _ireeMlirLspServerRunMain
 _ireeOptRunMain
 _ireeReduceRunMain
diff --git a/compiler/src/iree/compiler/API/generate_exports.py b/compiler/src/iree/compiler/API/generate_exports.py
index d7d2cd5..9b2c767 100755
--- a/compiler/src/iree/compiler/API/generate_exports.py
+++ b/compiler/src/iree/compiler/API/generate_exports.py
@@ -71,6 +71,10 @@
     "Dialects.h",
 ]
 
+IREE_COMPILER_DIALECTS_HEADER_FILES = [
+    "iree_gpu.h",
+]
+
 EXPLICIT_EXPORTS = [
     # MLIR registration functions that are part of generated code.
     "mlirRegisterGPUPasses",
@@ -107,6 +111,14 @@
             )
         )
 
+    # Collect symbols from iree compiler dialect header files.
+    for local_name in IREE_COMPILER_DIALECTS_HEADER_FILES:
+        export_symbols.extend(
+            collect_header_exports(
+                repo_root / "compiler/bindings/c/iree/compiler/dialects" / local_name
+            )
+        )
+
     # Collect symbols from mlir-c header files.
     mlir_c_dir = repo_root / "third_party/llvm-project/mlir/include/mlir-c"
     for local_name in MLIR_C_HEADER_FILES:
@@ -179,6 +191,8 @@
         f.write("\n")
         f.write("// Generated by generate_exports.py: Do not edit.\n")
         f.write("\n")
+        f.write("// clang-format off\n")
+        f.write("\n")
         f.write("#include <stdint.h>\n")
         f.write("\n")
         for symbol in symbols:
@@ -190,6 +204,8 @@
             f.write(f"  x += (uintptr_t)&{symbol};\n")
         f.write("  return x;\n")
         f.write("}\n")
+        f.write("\n")
+        f.write("// clang-format off\n")
 
 
 if __name__ == "__main__":
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/BUILD.bazel b/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/BUILD.bazel
index 2e9d283..a28d668 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/BUILD.bazel
+++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/BUILD.bazel
@@ -82,6 +82,7 @@
         ":IREEGPUEnums",
         ":IREEGPUInterfaces",
         ":IREEGPUOpsGen",
+        "//compiler/bindings/c:headers",
         "//compiler/src/iree/compiler/Codegen/Common:TileSwizzle",
         "//compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR:IREECodegenDialect",
         "//compiler/src/iree/compiler/Codegen/Dialect/VectorExt/IR:IREEVectorExtDialect",
@@ -92,6 +93,7 @@
         "@llvm-project//mlir:AffineDialect",
         "@llvm-project//mlir:ArithDialect",
         "@llvm-project//mlir:ArithUtils",
+        "@llvm-project//mlir:CAPIIR",
         "@llvm-project//mlir:ControlFlowInterfaces",
         "@llvm-project//mlir:DialectUtils",
         "@llvm-project//mlir:IR",
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/CMakeLists.txt
index 4c160b4..907f34b 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/CMakeLists.txt
@@ -51,6 +51,7 @@
     MLIRAffineDialect
     MLIRArithDialect
     MLIRArithUtils
+    MLIRCAPIIR
     MLIRControlFlowInterfaces
     MLIRIR
     MLIRLinalgDialect
@@ -67,6 +68,7 @@
     iree::compiler::Codegen::Dialect::VectorExt::IR::IREEVectorExtDialect
     iree::compiler::Codegen::Utils::VectorOpUtils
     iree::compiler::Dialect::LinalgExt::IR
+    iree::compiler::bindings::c::headers
   PUBLIC
 )
 
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.cpp b/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.cpp
index 471ba8b..45b3f4b 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.cpp
+++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.cpp
@@ -20,6 +20,9 @@
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "mlir-c/IR.h"
+#include "mlir/CAPI/IR.h"
+#include "mlir/CAPI/Support.h"
 #include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
@@ -33,7 +36,6 @@
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/DialectImplementation.h"
 #include "mlir/IR/OpDefinition.h"
-#include "mlir/IR/TypeUtilities.h"
 
 #define DEBUG_TYPE "iree-gpu-attrs"
 #define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ")
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.td b/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.td
index 1381289..1f2cad7 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.td
+++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.td
@@ -481,8 +481,8 @@
 //===----------------------------------------------------------------------===//
 
 def IREEGPU_ReorderWorkgroupsStrategyAttr :
-    EnumAttr<IREEGPU_Dialect, IREEGPU_ReorderWorkgroupsStrategy, ""> {
-  let assemblyFormat = "``$value";
+    EnumAttr<IREEGPU_Dialect, IREEGPU_ReorderWorkgroupsStrategy, "reorder_workgroups_strategy"> {
+  let assemblyFormat = "`<` $value `>`";
   let cppNamespace = "::mlir::iree_compiler::IREE::GPU";
 }
 
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_user_vector_distribute.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_user_vector_distribute.mlir
index 44eeb64..c439e04 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_user_vector_distribute.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_user_vector_distribute.mlir
@@ -83,14 +83,14 @@
 
 // -----
 
-// Check that applying the `reorder_workgroups_strategy = Transpose` pipeline option attribute enables workgroup reordering.
+// Check that applying the `reorder_workgroups_strategy = <Transpose>` pipeline option attribute enables workgroup reordering.
 
 // OPT-OUT:       #[[$TRANSLATION:.+]] = #iree_codegen.translation_info<LLVMGPUVectorDistribute workgroup_size = [128, 2, 1] subgroup_size = 64
-// OPT-OUT-SAME:    gpu_pipeline_options = #iree_gpu.pipeline_options<reorder_workgroups_strategy = Transpose>
+// OPT-OUT-SAME:    gpu_pipeline_options = #iree_gpu.pipeline_options<reorder_workgroups_strategy = <Transpose>>
 // OPT-OUT-SAME:    mma_schedule = #iree_gpu.mma_schedule<intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>,
 
 // OPT-IN:       #[[$TRANSLATION:.+]] = #iree_codegen.translation_info<LLVMGPUVectorDistribute workgroup_size = [128, 2, 1] subgroup_size = 64
-// OPT-IN-SAME:    gpu_pipeline_options = #iree_gpu.pipeline_options<reorder_workgroups_strategy = Transpose>
+// OPT-IN-SAME:    gpu_pipeline_options = #iree_gpu.pipeline_options<reorder_workgroups_strategy = <Transpose>>
 // OPT-IN-SAME:    mma_schedule = #iree_gpu.mma_schedule<intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>,
 #config = #iree_gpu.lowering_config<{workgroup = [128, 128, 0], reduction = [0, 0, 32]}>
 #pipeline_layout = #hal.pipeline.layout<bindings = [
@@ -126,7 +126,7 @@
       func.func @main_0_dispatch_0_matmul_transpose_b_2048x10240x1280_f16xf16xf32()
         attributes {translation_info = #iree_codegen.translation_info<LLVMGPUVectorDistribute workgroup_size = [128, 2, 1] subgroup_size = 64, {
           mma_schedule = #iree_gpu.mma_schedule<intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>, subgroup_m_count = 2, subgroup_n_count = 2>,
-          gpu_pipeline_options = #iree_gpu.pipeline_options<reorder_workgroups_strategy = Transpose>  // enable the 'reorderWorkgroups' pass.
+          gpu_pipeline_options = #iree_gpu.pipeline_options<reorder_workgroups_strategy = <Transpose>>  // enable the 'reorderWorkgroups' pass.
         }>} {
         %cst = arith.constant 0.000000e+00 : f16
         %c0 = arith.constant 0 : index
@@ -159,10 +159,10 @@
 }
 
 // -----
-// Check that applying the `reorder_workgroups_strategy = None` pipeline option disables workgroup reordering.
+// Check that applying the `reorder_workgroups_strategy = <None>` pipeline option disables workgroup reordering.
 
 // OPT-OUT:       #[[$TRANSLATION:.+]] = #iree_codegen.translation_info<LLVMGPUVectorDistribute workgroup_size = [128, 2, 1] subgroup_size = 64
-// OPT-OUT-SAME:    gpu_pipeline_options = #iree_gpu.pipeline_options<reorder_workgroups_strategy = None>
+// OPT-OUT-SAME:    gpu_pipeline_options = #iree_gpu.pipeline_options<reorder_workgroups_strategy = <None>>
 // OPT-OUT-SAME:    mma_schedule = #iree_gpu.mma_schedule<intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>,
 #config = #iree_gpu.lowering_config<{workgroup = [128, 128, 0], reduction = [0, 0, 32]}>
 #pipeline_layout = #hal.pipeline.layout<bindings = [
@@ -187,7 +187,7 @@
       func.func @main_0_dispatch_0_matmul_transpose_b_2048x10240x1280_f16xf16xf32()
         attributes {translation_info = #iree_codegen.translation_info<LLVMGPUVectorDistribute workgroup_size = [128, 2, 1] subgroup_size = 64, {
           mma_schedule = #iree_gpu.mma_schedule<intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>, subgroup_m_count = 2, subgroup_n_count = 2>,
-          gpu_pipeline_options = #iree_gpu.pipeline_options<reorder_workgroups_strategy = None>  // Disable the 'reorderWorkgroups' pass.
+          gpu_pipeline_options = #iree_gpu.pipeline_options<reorder_workgroups_strategy = <None>>  // Disable the 'reorderWorkgroups' pass.
         }>} {
         %cst = arith.constant 0.000000e+00 : f16
         %c0 = arith.constant 0 : index