Switch compiler to LLVM/MLIR formatting style (#14181) The high-level goal is to better align with the LLVM/MLIR coding style and allow for easier prototyping and code movement between the two. The `runtime/` and `tools/` directories still use the `Google` style, as the LLVM config is not always preferred for C code, and we do not expect much sharing/movement of this code with LLVM. This adds a new `.clang-format` file local to the `compiler/` directory with the same formatting settings as those used by MLIR. The only divergence is not having special case for include ordering that places llvm includes last. It also reformats this directory using the new formatting config. I used the following command to reformat the code: ```shell cd compiler fd -e h -e cpp -e cc -e c | xargs clang-format -i ``` I plan to add this commit to `.git-blame-ignore-revs` in a follow-up PR, once the final commit SHA is known. Issue: https://github.com/openxla/iree/issues/12866

commit: 3b652d46e86cd82cc81922a964327b12dc914428 [log] [tgz]
author: Jakub Kuderski <kubak@google.com> Fri Jun 23 20:51:35 2023 -0400
committer: GitHub <noreply@github.com> Fri Jun 23 20:51:35 2023 -0400
tree: 8239a9d798d89aeebc01b8555110e73ea5b3f612
parent: 1799e24030af4c4c990890a151d15dff36896940 [diff]
diff --git a/.clang-format b/.clang-format
index 0c40907..8514ff8 100644
--- a/.clang-format
+++ b/.clang-format

@@ -4,6 +4,6 @@
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-# IREE's runtime follows Google style while its compiler follows mostly LLVM
-# style (for naming/etc.) but using Google formatting.
+# IREE's runtime follows Google style while its compiler follows the
+# LLVM/MLIR variable naming and formatting style.
 BasedOnStyle: Google

diff --git a/compiler/.clang-format b/compiler/.clang-format
new file mode 100644
index 0000000..f50fe3d
--- /dev/null
+++ b/compiler/.clang-format

@@ -0,0 +1,25 @@
+# Copyright 2023 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# IREE's runtime follows Google style while its compiler follows the
+# LLVM/MLIR variable naming and formatting style. The only difference
+# compared to the MLIR style in the compiler code is more general include
+# ordering.
+BasedOnStyle: LLVM
+AlwaysBreakTemplateDeclarations: Yes
+IncludeCategories:
+  - Regex:           '^<.*\.h>'
+    Priority:        1
+    SortPriority:    0
+    CaseSensitive:   false
+  - Regex:           '^<.*'
+    Priority:        2
+    SortPriority:    0
+    CaseSensitive:   false
+  - Regex:           '.*'
+    Priority:        3
+    SortPriority:    0
+    CaseSensitive:   false

diff --git a/compiler/bindings/c/iree/compiler/api_support.h b/compiler/bindings/c/iree/compiler/api_support.h
index c504831..d5b4081 100644
--- a/compiler/bindings/c/iree/compiler/api_support.h
+++ b/compiler/bindings/c/iree/compiler/api_support.h

@@ -22,4 +22,4 @@
 #define IREE_EMBED_EXPORTED __attribute__((visibility("default")))
 #endif
 
-#endif  // IREE_COMPILER_API_SUPPORT_H
+#endif // IREE_COMPILER_API_SUPPORT_H

diff --git a/compiler/bindings/c/iree/compiler/embedding_api.h b/compiler/bindings/c/iree/compiler/embedding_api.h
index 62df77b..8fd1f91 100644
--- a/compiler/bindings/c/iree/compiler/embedding_api.h
+++ b/compiler/bindings/c/iree/compiler/embedding_api.h

@@ -49,8 +49,8 @@
 
 // Gets the message associated with the error as a C-string. The string will be
 // valid until the error is destroyed.
-IREE_EMBED_EXPORTED const char *ireeCompilerErrorGetMessage(
-    iree_compiler_error_t *error);
+IREE_EMBED_EXPORTED const char *
+ireeCompilerErrorGetMessage(iree_compiler_error_t *error);
 
 //===----------------------------------------------------------------------===//
 // Global initialization.
@@ -141,13 +141,14 @@
 IREE_EMBED_EXPORTED iree_compiler_session_t *ireeCompilerSessionCreate();
 
 // Destroys a session.
-IREE_EMBED_EXPORTED void ireeCompilerSessionDestroy(
-    iree_compiler_session_t *session);
+IREE_EMBED_EXPORTED void
+ireeCompilerSessionDestroy(iree_compiler_session_t *session);
 
 // Sets session-local flags. These are a subset of flags supported by CLI
 // tools and are privately scoped.
-IREE_EMBED_EXPORTED iree_compiler_error_t *ireeCompilerSessionSetFlags(
-    iree_compiler_session_t *session, int argc, const char *const *argv);
+IREE_EMBED_EXPORTED iree_compiler_error_t *
+ireeCompilerSessionSetFlags(iree_compiler_session_t *session, int argc,
+                            const char *const *argv);
 
 // Gets textual flags actually in effect from any source. Optionally, only
 // calls back for non-default valued flags.
@@ -168,8 +169,8 @@
   IREE_COMPILER_DIAGNOSTIC_SEVERITY_REMARK = 3,
 };
 
-IREE_EMBED_EXPORTED iree_compiler_invocation_t *ireeCompilerInvocationCreate(
-    iree_compiler_session_t *session);
+IREE_EMBED_EXPORTED iree_compiler_invocation_t *
+ireeCompilerInvocationCreate(iree_compiler_session_t *session);
 
 // Enables a callback to receive diagnostics. This is targeted at API use of
 // the compiler, allowing fine grained collection of formatted diagnostic
@@ -194,12 +195,12 @@
 // Enables default, pretty-printed diagnostics to the console. This is usually
 // the right thing to do for command-line tools, but other mechanisms are
 // preferred for library use.
-IREE_EMBED_EXPORTED void ireeCompilerInvocationEnableConsoleDiagnostics(
-    iree_compiler_invocation_t *inv);
+IREE_EMBED_EXPORTED void
+ireeCompilerInvocationEnableConsoleDiagnostics(iree_compiler_invocation_t *inv);
 
 // Destroys a run.
-IREE_EMBED_EXPORTED void ireeCompilerInvocationDestroy(
-    iree_compiler_invocation_t *inv);
+IREE_EMBED_EXPORTED void
+ireeCompilerInvocationDestroy(iree_compiler_invocation_t *inv);
 
 // Sets a crash handler on the invocation. In the event of a crash, the callback
 // will be invoked to create an output which will receive the crash dump.
@@ -216,18 +217,20 @@
 // Parses a source into this instance in preparation for performing a
 // compilation action.
 // Returns false and emits diagnostics on failure.
-IREE_EMBED_EXPORTED bool ireeCompilerInvocationParseSource(
-    iree_compiler_invocation_t *inv, iree_compiler_source_t *source);
+IREE_EMBED_EXPORTED bool
+ireeCompilerInvocationParseSource(iree_compiler_invocation_t *inv,
+                                  iree_compiler_source_t *source);
 
 // Sets a mnemonic phase name to run compilation to. Default is "end".
 // The meaning of this is pipeline specific. See IREEVMPipelinePhase
 // for the standard pipeline.
-IREE_EMBED_EXPORTED void ireeCompilerInvocationSetCompileToPhase(
-    iree_compiler_invocation_t *inv, const char *phase);
+IREE_EMBED_EXPORTED void
+ireeCompilerInvocationSetCompileToPhase(iree_compiler_invocation_t *inv,
+                                        const char *phase);
 
 // Enables/disables verification of IR after each pass. Defaults to enabled.
-IREE_EMBED_EXPORTED void ireeCompilerInvocationSetVerifyIR(
-    iree_compiler_invocation_t *inv, bool enable);
+IREE_EMBED_EXPORTED void
+ireeCompilerInvocationSetVerifyIR(iree_compiler_invocation_t *inv, bool enable);
 
 // Runs a compilation pipeline.
 // Returns false and emits diagnostics on failure.
@@ -235,12 +238,14 @@
   IREE_COMPILER_PIPELINE_STD = 0,
   IREE_COMPILER_PIPELINE_HAL_EXECUTABLE = 1,
 };
-IREE_EMBED_EXPORTED bool ireeCompilerInvocationPipeline(
-    iree_compiler_invocation_t *inv, enum iree_compiler_pipeline_t pipeline);
+IREE_EMBED_EXPORTED bool
+ireeCompilerInvocationPipeline(iree_compiler_invocation_t *inv,
+                               enum iree_compiler_pipeline_t pipeline);
 
 // Outputs the current compiler state as textual IR to the output.
-IREE_EMBED_EXPORTED iree_compiler_error_t *ireeCompilerInvocationOutputIR(
-    iree_compiler_invocation_t *inv, iree_compiler_output_t *output);
+IREE_EMBED_EXPORTED iree_compiler_error_t *
+ireeCompilerInvocationOutputIR(iree_compiler_invocation_t *inv,
+                               iree_compiler_output_t *output);
 
 // Assuming that the compiler has produced VM IR, converts it to bytecode
 // and outputs it. This is a valid next step after running the
@@ -275,25 +280,27 @@
 //===----------------------------------------------------------------------===//
 
 // Destroy source instances.
-IREE_EMBED_EXPORTED void ireeCompilerSourceDestroy(
-    iree_compiler_source_t *source);
+IREE_EMBED_EXPORTED void
+ireeCompilerSourceDestroy(iree_compiler_source_t *source);
 
 // Opens the source from a file. This is used for normal text assembly file
 // sources.
 // Must be destroyed with ireeCompilerSourceDestroy().
-IREE_EMBED_EXPORTED iree_compiler_error_t *ireeCompilerSourceOpenFile(
-    iree_compiler_session_t *session, const char *filePath,
-    iree_compiler_source_t **out_source);
+IREE_EMBED_EXPORTED iree_compiler_error_t *
+ireeCompilerSourceOpenFile(iree_compiler_session_t *session,
+                           const char *filePath,
+                           iree_compiler_source_t **out_source);
 
 // Wraps an existing buffer in memory.
 // If |isNullTerminated| is true, then the null must be accounted for in the
 // length. This is required for text buffers and it is permitted for binary
 // buffers.
 // Must be destroyed with ireeCompilerSourceDestroy().
-IREE_EMBED_EXPORTED iree_compiler_error_t *ireeCompilerSourceWrapBuffer(
-    iree_compiler_session_t *session, const char *bufferName,
-    const char *buffer, size_t length, bool isNullTerminated,
-    iree_compiler_source_t **out_source);
+IREE_EMBED_EXPORTED iree_compiler_error_t *
+ireeCompilerSourceWrapBuffer(iree_compiler_session_t *session,
+                             const char *bufferName, const char *buffer,
+                             size_t length, bool isNullTerminated,
+                             iree_compiler_source_t **out_source);
 
 // Splits the current source buffer, invoking a callback for each "split"
 // within it. This is per the usual MLIR split rules (see
@@ -318,32 +325,34 @@
 //===----------------------------------------------------------------------===//
 
 // Destroy output instances.
-IREE_EMBED_EXPORTED void ireeCompilerOutputDestroy(
-    iree_compiler_output_t *output);
+IREE_EMBED_EXPORTED void
+ireeCompilerOutputDestroy(iree_compiler_output_t *output);
 
 // Opens a file for the output.
 // Must be destroyed via ireeCompilerOutputDestroy().
-IREE_EMBED_EXPORTED iree_compiler_error_t *ireeCompilerOutputOpenFile(
-    const char *filePath, iree_compiler_output_t **out_output);
+IREE_EMBED_EXPORTED iree_compiler_error_t *
+ireeCompilerOutputOpenFile(const char *filePath,
+                           iree_compiler_output_t **out_output);
 
 // Opens a file descriptor for output.
 // Must be destroyed via ireeCompilerOutputDestroy().
-IREE_EMBED_EXPORTED iree_compiler_error_t *ireeCompilerOutputOpenFD(
-    int fd, iree_compiler_output_t **out_output);
+IREE_EMBED_EXPORTED iree_compiler_error_t *
+ireeCompilerOutputOpenFD(int fd, iree_compiler_output_t **out_output);
 
 // Opens an output to in-memory storage. Use the API
 // |ireeCompilerOutputMapMemory| to access the mapped contents once all
 // output has been written.
-IREE_EMBED_EXPORTED iree_compiler_error_t *ireeCompilerOutputOpenMembuffer(
-    iree_compiler_output_t **out_output);
+IREE_EMBED_EXPORTED iree_compiler_error_t *
+ireeCompilerOutputOpenMembuffer(iree_compiler_output_t **out_output);
 
 // Maps the contents of a compiler output opened via
 // |ireeCompilerOutputOpenMembuffer|. This may be something obtained via
 // mmap or a more ordinary temporary buffer. This may fail in platform
 // specific ways unless if the output was created via
 // |ireeCompilerOutputOpenMembuffer|.
-IREE_EMBED_EXPORTED iree_compiler_error_t *ireeCompilerOutputMapMemory(
-    iree_compiler_output_t *output, void **contents, uint64_t *size);
+IREE_EMBED_EXPORTED iree_compiler_error_t *
+ireeCompilerOutputMapMemory(iree_compiler_output_t *output, void **contents,
+                            uint64_t *size);
 
 // For file or other persistent outputs, by default they will be deleted on
 // |ireeCompilerOutputDestroy| (or exit). It is necessary to call
@@ -352,11 +361,12 @@
 IREE_EMBED_EXPORTED void ireeCompilerOutputKeep(iree_compiler_output_t *output);
 
 // Writes arbitrary data to the output.
-IREE_EMBED_EXPORTED iree_compiler_error_t *ireeCompilerOutputWrite(
-    iree_compiler_output_t *output, const void *data, size_t length);
+IREE_EMBED_EXPORTED iree_compiler_error_t *
+ireeCompilerOutputWrite(iree_compiler_output_t *output, const void *data,
+                        size_t length);
 
 #ifdef __cplusplus
 }
 #endif
 
-#endif  // IREE_COMPILER_EMBEDDING_API_H
+#endif // IREE_COMPILER_EMBEDDING_API_H

diff --git a/compiler/bindings/c/iree/compiler/loader.h b/compiler/bindings/c/iree/compiler/loader.h
index f531c28..7471fce 100644
--- a/compiler/bindings/c/iree/compiler/loader.h
+++ b/compiler/bindings/c/iree/compiler/loader.h

@@ -22,4 +22,4 @@
 }
 #endif
 
-#endif  // IREE_COMPILER_LOADER_H
+#endif // IREE_COMPILER_LOADER_H

diff --git a/compiler/bindings/c/iree/compiler/loader/loader.cpp b/compiler/bindings/c/iree/compiler/loader/loader.cpp
index de9f5f3..c563feb 100644
--- a/compiler/bindings/c/iree/compiler/loader/loader.cpp
+++ b/compiler/bindings/c/iree/compiler/loader/loader.cpp

@@ -19,7 +19,8 @@
 using DlHandle = HMODULE;
 DlHandle loadLibrary(const char *libraryPath) {
   HMODULE lib = LoadLibraryExA(libraryPath, nullptr, 0);
-  if (lib) return lib;
+  if (lib)
+    return lib;
   DWORD errorMessageID = GetLastError();
   LPSTR messageBuffer = nullptr;
   size_t size = FormatMessageA(
@@ -37,7 +38,7 @@
 void *lookupLibrarySymbol(DlHandle lib, const char *symbol) {
   return (void *)GetProcAddress(lib, symbol);
 }
-}  // namespace
+} // namespace
 #else
 // Posix impl
 #include <dlfcn.h>
@@ -47,7 +48,8 @@
   DlHandle lib = dlopen(libraryPath, RTLD_NOW | RTLD_LOCAL);
   if (!lib) {
     const char *reason = dlerror();
-    if (!reason) reason = "";
+    if (!reason)
+      reason = "";
     fprintf(stderr,
             "IREE COMPILER ERROR: Could not open compiler library %s : %s\n",
             libraryPath, reason);
@@ -58,7 +60,7 @@
 void *lookupLibrarySymbol(DlHandle lib, const char *symbol) {
   return dlsym(lib, symbol);
 }
-}  // namespace
+} // namespace
 #endif
 
 // Some operating systems have a prefix for cdecl exported symbols.
@@ -78,13 +80,14 @@
 #undef HANDLE_VERSIONED_SYMBOL
 
 void assertLoaded() {
-  if (libraryHandle) return;
+  if (libraryHandle)
+    return;
   fprintf(stderr,
           "FATAL ERROR: Attempt to call IREE compiler stub methods before "
           "library loaded\n");
   abort();
 }
-}  // namespace
+} // namespace
 
 bool ireeCompilerLoadLibrary(const char *libraryPath) {
   if (libraryHandle) {
@@ -100,27 +103,26 @@
   int (*apiVersionFn)() = (int (*)())lookupLibrarySymbol(
       localLibraryHandle, IREE_CDECL_SYMBOL_PREFIX "ireeCompilerGetAPIVersion");
   if (!apiVersionFn) {
-    fprintf(stderr,
-            "IREE COMPILER ERROR: Could not find symbol "
-            "'ireeCompilerGetAPIVersion'\n");
+    fprintf(stderr, "IREE COMPILER ERROR: Could not find symbol "
+                    "'ireeCompilerGetAPIVersion'\n");
     return false;
   }
   int packedApiVersion = apiVersionFn();
   int apiMinor = packedApiVersion & 0xffff;
   int apiMajor = packedApiVersion >> 16;
 
-#define HANDLE_SYMBOL(fn_name)                                           \
-  __##fn_name = (decltype(__##fn_name))lookupLibrarySymbol(              \
-      localLibraryHandle, IREE_CDECL_SYMBOL_PREFIX #fn_name);            \
-  if (!__##fn_name) {                                                    \
-    fprintf(stderr, "IREE COMPILER ERROR: Could not find symbol '%s'\n", \
-            IREE_CDECL_SYMBOL_PREFIX #fn_name);                          \
-    return false;                                                        \
+#define HANDLE_SYMBOL(fn_name)                                                 \
+  __##fn_name = (decltype(__##fn_name))lookupLibrarySymbol(                    \
+      localLibraryHandle, IREE_CDECL_SYMBOL_PREFIX #fn_name);                  \
+  if (!__##fn_name) {                                                          \
+    fprintf(stderr, "IREE COMPILER ERROR: Could not find symbol '%s'\n",       \
+            IREE_CDECL_SYMBOL_PREFIX #fn_name);                                \
+    return false;                                                              \
   }
-#define HANDLE_VERSIONED_SYMBOL(fn_name, availApiMajor, availApiMinor) \
-  if (apiMajor > availApiMajor ||                                      \
-      (apiMajor == availApiMajor && apiMinor >= availApiMinor)) {      \
-    HANDLE_SYMBOL(fn_name);                                            \
+#define HANDLE_VERSIONED_SYMBOL(fn_name, availApiMajor, availApiMinor)         \
+  if (apiMajor > availApiMajor ||                                              \
+      (apiMajor == availApiMajor && apiMinor >= availApiMinor)) {              \
+    HANDLE_SYMBOL(fn_name);                                                    \
   }
 #include "./handle_symbols.inc"
 #undef HANDLE_SYMBOL
@@ -201,8 +203,9 @@
   __ireeCompilerSessionDestroy(session);
 }
 
-iree_compiler_error_t *ireeCompilerSessionSetFlags(
-    iree_compiler_session_t *session, int argc, const char *const *argv) {
+iree_compiler_error_t *
+ireeCompilerSessionSetFlags(iree_compiler_session_t *session, int argc,
+                            const char *const *argv) {
   assertLoaded();
   return __ireeCompilerSessionSetFlags(session, argc, argv);
 }
@@ -215,8 +218,8 @@
                                        userData);
 }
 
-iree_compiler_invocation_t *ireeCompilerInvocationCreate(
-    iree_compiler_session_t *session) {
+iree_compiler_invocation_t *
+ireeCompilerInvocationCreate(iree_compiler_session_t *session) {
   return __ireeCompilerInvocationCreate(session);
 }
 
@@ -257,8 +260,9 @@
   __ireeCompilerInvocationSetCompileToPhase(run, phase);
 }
 
-IREE_EMBED_EXPORTED void ireeCompilerInvocationSetVerifyIR(
-    iree_compiler_invocation_t *run, bool enable) {
+IREE_EMBED_EXPORTED void
+ireeCompilerInvocationSetVerifyIR(iree_compiler_invocation_t *run,
+                                  bool enable) {
   __ireeCompilerInvocationSetVerifyIR(run, enable);
 }
 
@@ -267,23 +271,27 @@
   return __ireeCompilerInvocationPipeline(run, pipeline);
 }
 
-iree_compiler_error_t *ireeCompilerInvocationOutputIR(
-    iree_compiler_invocation_t *run, iree_compiler_output_t *output) {
+iree_compiler_error_t *
+ireeCompilerInvocationOutputIR(iree_compiler_invocation_t *run,
+                               iree_compiler_output_t *output) {
   return __ireeCompilerInvocationOutputIR(run, output);
 }
 
-iree_compiler_error_t *ireeCompilerInvocationOutputVMBytecode(
-    iree_compiler_invocation_t *run, iree_compiler_output_t *output) {
+iree_compiler_error_t *
+ireeCompilerInvocationOutputVMBytecode(iree_compiler_invocation_t *run,
+                                       iree_compiler_output_t *output) {
   return __ireeCompilerInvocationOutputVMBytecode(run, output);
 }
 
-iree_compiler_error_t *ireeCompilerInvocationOutputVMCSource(
-    iree_compiler_invocation_t *run, iree_compiler_output_t *output) {
+iree_compiler_error_t *
+ireeCompilerInvocationOutputVMCSource(iree_compiler_invocation_t *run,
+                                      iree_compiler_output_t *output) {
   return __ireeCompilerInvocationOutputVMCSource(run, output);
 }
 
-iree_compiler_error_t *ireeCompilerInvocationOutputHALExecutable(
-    iree_compiler_invocation_t *run, iree_compiler_output_t *output) {
+iree_compiler_error_t *
+ireeCompilerInvocationOutputHALExecutable(iree_compiler_invocation_t *run,
+                                          iree_compiler_output_t *output) {
   return __ireeCompilerInvocationOutputHALExecutable(run, output);
 }
 
@@ -291,16 +299,18 @@
   __ireeCompilerSourceDestroy(source);
 }
 
-iree_compiler_error_t *ireeCompilerSourceOpenFile(
-    iree_compiler_session_t *session, const char *filePath,
-    iree_compiler_source_t **out_source) {
+iree_compiler_error_t *
+ireeCompilerSourceOpenFile(iree_compiler_session_t *session,
+                           const char *filePath,
+                           iree_compiler_source_t **out_source) {
   return __ireeCompilerSourceOpenFile(session, filePath, out_source);
 }
 
-iree_compiler_error_t *ireeCompilerSourceWrapBuffer(
-    iree_compiler_session_t *session, const char *bufferName,
-    const char *buffer, size_t length, bool isNullTerminated,
-    iree_compiler_source_t **out_source) {
+iree_compiler_error_t *
+ireeCompilerSourceWrapBuffer(iree_compiler_session_t *session,
+                             const char *bufferName, const char *buffer,
+                             size_t length, bool isNullTerminated,
+                             iree_compiler_source_t **out_source) {
   return __ireeCompilerSourceWrapBuffer(session, bufferName, buffer, length,
                                         isNullTerminated, out_source);
 }
@@ -316,18 +326,19 @@
   __ireeCompilerOutputDestroy(output);
 }
 
-iree_compiler_error_t *ireeCompilerOutputOpenFile(
-    const char *filePath, iree_compiler_output_t **out_output) {
+iree_compiler_error_t *
+ireeCompilerOutputOpenFile(const char *filePath,
+                           iree_compiler_output_t **out_output) {
   return __ireeCompilerOutputOpenFile(filePath, out_output);
 }
 
-iree_compiler_error_t *ireeCompilerOutputOpenFD(
-    int fd, iree_compiler_output_t **out_output) {
+iree_compiler_error_t *
+ireeCompilerOutputOpenFD(int fd, iree_compiler_output_t **out_output) {
   return __ireeCompilerOutputOpenFD(fd, out_output);
 }
 
-iree_compiler_error_t *ireeCompilerOutputOpenMembuffer(
-    iree_compiler_output_t **out_output) {
+iree_compiler_error_t *
+ireeCompilerOutputOpenMembuffer(iree_compiler_output_t **out_output) {
   return __ireeCompilerOutputOpenMembuffer(out_output);
 }
 
@@ -335,8 +346,9 @@
   __ireeCompilerOutputKeep(output);
 }
 
-iree_compiler_error_t *ireeCompilerOutputMapMemory(
-    iree_compiler_output_t *output, void **contents, uint64_t *size) {
+iree_compiler_error_t *
+ireeCompilerOutputMapMemory(iree_compiler_output_t *output, void **contents,
+                            uint64_t *size) {
   return __ireeCompilerOutputMapMemory(output, contents, size);
 }
 

diff --git a/compiler/bindings/c/iree/compiler/loader/loader_test.c b/compiler/bindings/c/iree/compiler/loader/loader_test.c
index 5145e29..7a95a9d 100644
--- a/compiler/bindings/c/iree/compiler/loader/loader_test.c
+++ b/compiler/bindings/c/iree/compiler/loader/loader_test.c

@@ -68,9 +68,9 @@
   return true;
 }
 
-static bool invokeWithConsoleDiagnostics(
-    iree_compiler_session_t *session,
-    iree_compiler_source_t *sourceWithErrors) {
+static bool
+invokeWithConsoleDiagnostics(iree_compiler_session_t *session,
+                             iree_compiler_source_t *sourceWithErrors) {
   bool rc;
   printf(
       "--- INVOKING WITH CONSOLE DIAGNOSTICS (console error expected) ---\n");
@@ -96,9 +96,9 @@
   (*messageAccum)[currentSize + 1 + messageSize] = 0;
 }
 
-static bool invokeWithCallbackDiagnostics(
-    iree_compiler_session_t *session,
-    iree_compiler_source_t *sourceWithErrors) {
+static bool
+invokeWithCallbackDiagnostics(iree_compiler_session_t *session,
+                              iree_compiler_source_t *sourceWithErrors) {
   bool rc;
   printf(
       "--- INVOKING WITH CALLBACK DIAGNOSTICS (console error expected) ---\n");

diff --git a/compiler/bindings/c/iree/compiler/tool_entry_points_api.h b/compiler/bindings/c/iree/compiler/tool_entry_points_api.h
index 2e8842e..9957f2f 100644
--- a/compiler/bindings/c/iree/compiler/tool_entry_points_api.h
+++ b/compiler/bindings/c/iree/compiler/tool_entry_points_api.h

@@ -38,4 +38,4 @@
 }
 #endif
 
-#endif  // IREE_COMPILER_TOOL_ENTRY_POINTS_API_H
+#endif // IREE_COMPILER_TOOL_ENTRY_POINTS_API_H

diff --git a/compiler/plugins/target/CUDA/CUDATarget.cpp b/compiler/plugins/target/CUDA/CUDATarget.cpp
index 3fc10c0..1f46d35 100644
--- a/compiler/plugins/target/CUDA/CUDATarget.cpp
+++ b/compiler/plugins/target/CUDA/CUDATarget.cpp

@@ -106,7 +106,7 @@
         llvm::cl::desc("Passes the given additional parameters to ptxas."));
   }
 };
-}  // namespace
+} // namespace
 
 static constexpr char kPtxasCompilerName[] = "ptxas";
 
@@ -114,11 +114,14 @@
 static FailureOr<std::string> findPtxasCompiler(const CUDAOptions &options,
                                                 std::string *message) {
   std::string ptxasCompiler;
-  if (!options.clUsePtxasFrom.empty()) ptxasCompiler = options.clUsePtxasFrom;
-  if (llvm::sys::fs::exists(ptxasCompiler)) return ptxasCompiler;
+  if (!options.clUsePtxasFrom.empty())
+    ptxasCompiler = options.clUsePtxasFrom;
+  if (llvm::sys::fs::exists(ptxasCompiler))
+    return ptxasCompiler;
 
   ptxasCompiler = findTool(kPtxasCompilerName);
-  if (llvm::sys::fs::exists(ptxasCompiler)) return ptxasCompiler;
+  if (llvm::sys::fs::exists(ptxasCompiler))
+    return ptxasCompiler;
 
   *message = std::string(
       "Could not find ptxas compiler. Try passing it explicitly with "
@@ -165,12 +168,13 @@
   auto Tokenize = llvm::cl::TokenizeWindowsCommandLine;
 #else
   auto Tokenize = llvm::cl::TokenizeGNUCommandLine;
-#endif  // _WIN32
+#endif // _WIN32
   llvm::BumpPtrAllocator scratchAllocator;
   llvm::StringSaver stringSaver(scratchAllocator);
   SmallVector<const char *> rawArgs;
   Tokenize(ptxasParams, stringSaver, rawArgs, /*MarkEOLs=*/false);
-  for (auto rawArg : rawArgs) ArgVector.push_back(StringRef(rawArg));
+  for (auto rawArg : rawArgs)
+    ArgVector.push_back(StringRef(rawArg));
 
   std::optional<StringRef> redirects[] = {
       stdinFile.str(),
@@ -220,7 +224,8 @@
 // executable, let the runtime compile.
 static std::string produceGpuImage(const CUDAOptions &options,
                                    std::string &ptxImage) {
-  if (!options.clUsePtxas) return ptxImage;
+  if (!options.clUsePtxas)
+    return ptxImage;
 
   std::string message;
   FailureOr<std::string> ptxasCompiler = findPtxasCompiler(options, &message);
@@ -229,7 +234,8 @@
     FailureOr<std::string> maybeCubinImage =
         compileWithPtxas(ptxasCompiler.value(), options.clTargetChip,
                          options.clUsePtxasParams, ptxImage, &message);
-    if (succeeded(maybeCubinImage)) return maybeCubinImage.value();
+    if (succeeded(maybeCubinImage))
+      return maybeCubinImage.value();
   }
 
   llvm::WithColor::warning()
@@ -356,7 +362,7 @@
 }
 
 class CUDATargetBackend final : public TargetBackend {
- public:
+public:
   CUDATargetBackend(const CUDAOptions &options) : options(options) {}
 
   std::string name() const override { return "cuda"; }
@@ -372,8 +378,8 @@
     mlir::registerNVVMDialectTranslation(registry);
   }
 
-  IREE::HAL::DeviceTargetAttr getDefaultDeviceTarget(
-      MLIRContext *context) const override {
+  IREE::HAL::DeviceTargetAttr
+  getDefaultDeviceTarget(MLIRContext *context) const override {
     Builder b(context);
     SmallVector<NamedAttribute> configItems;
 
@@ -394,7 +400,8 @@
     // For now we disable translation if the variant has external object files.
     // We could instead perform linking with those objects (if they're bitcode
     // ala libdevice.bc, etc).
-    if (variantOp.isExternal()) return;
+    if (variantOp.isExternal())
+      return;
 
     buildLLVMGPUTransformPassPipeline(passManager, false);
   }
@@ -494,7 +501,8 @@
            llvm::zip_equal(variantOp.getOps<IREE::HAL::ExecutableExportOp>(),
                            workgroupSizes)) {
         auto *llvmFunc = llvmModule->getFunction(exportOp.getName());
-        if (llvmFunc->isDeclaration()) continue;
+        if (llvmFunc->isDeclaration())
+          continue;
 
         // setName will make sure the function name is unique.
         llvmFunc->setName(sanitizeSymbolName(exportOp.getName()));
@@ -633,7 +641,7 @@
     return success();
   }
 
- private:
+private:
   ArrayAttr getExecutableTargets(MLIRContext *context) const {
     SmallVector<Attribute> targetAttrs;
     // If we had multiple target environments we would generate one target attr
@@ -642,8 +650,8 @@
     return ArrayAttr::get(context, targetAttrs);
   }
 
-  IREE::HAL::ExecutableTargetAttr getExecutableTarget(
-      MLIRContext *context) const {
+  IREE::HAL::ExecutableTargetAttr
+  getExecutableTarget(MLIRContext *context) const {
     Builder b(context);
     SmallVector<NamedAttribute> configItems;
     // Add some configurations to the `hal.executable.target` attribute.
@@ -678,12 +686,12 @@
     });
   }
 };
-}  // namespace
+} // namespace
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
 extern "C" bool iree_register_compiler_plugin_hal_target_cuda(
     mlir::iree_compiler::PluginRegistrar *registrar) {

diff --git a/compiler/plugins/target/CUDA/LLVMPasses.h b/compiler/plugins/target/CUDA/LLVMPasses.h
index aa14173..acf691a 100644
--- a/compiler/plugins/target/CUDA/LLVMPasses.h
+++ b/compiler/plugins/target/CUDA/LLVMPasses.h

@@ -19,10 +19,10 @@
       : maxWorkgroupSize(maxWorkgroupSize) {}
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 
- private:
+private:
   std::array<int32_t, 3> maxWorkgroupSize;
 };
 
-}  // namespace llvm
+} // namespace llvm
 
-#endif  // IREE_COMPILER_DIALECT_HAL_TARGET_CUDA_PASS_H_
+#endif // IREE_COMPILER_DIALECT_HAL_TARGET_CUDA_PASS_H_

diff --git a/compiler/plugins/target/CUDA/SetBlockIdsRangePass.cpp b/compiler/plugins/target/CUDA/SetBlockIdsRangePass.cpp
index 5b4ab17..614065d 100644
--- a/compiler/plugins/target/CUDA/SetBlockIdsRangePass.cpp
+++ b/compiler/plugins/target/CUDA/SetBlockIdsRangePass.cpp

@@ -25,7 +25,8 @@
 // passed-in call instruction.
 static bool addRangeMetadata(uint64_t Low, uint64_t High, CallInst *C) {
   // This call already has range metadata, nothing to do.
-  if (C->getMetadata(LLVMContext::MD_range)) return false;
+  if (C->getMetadata(LLVMContext::MD_range))
+    return false;
 
   LLVMContext &Context = C->getParent()->getContext();
   IntegerType *Int32Ty = Type::getInt32Ty(Context);
@@ -47,53 +48,55 @@
   unsigned MaxGridSizeZ = 0xffff;
   for (Instruction &I : instructions(F)) {
     CallInst *Call = dyn_cast<CallInst>(&I);
-    if (!Call) continue;
+    if (!Call)
+      continue;
     Function *Callee = Call->getCalledFunction();
-    if (!Callee) continue;
+    if (!Callee)
+      continue;
     switch (Callee->getIntrinsicID()) {
-      // Index within block
-      case Intrinsic::nvvm_read_ptx_sreg_tid_x:
-        Changed |= addRangeMetadata(0, maxWorkgroupSize[0], Call);
-        break;
-      case Intrinsic::nvvm_read_ptx_sreg_tid_y:
-        Changed |= addRangeMetadata(0, maxWorkgroupSize[1], Call);
-        break;
-      case Intrinsic::nvvm_read_ptx_sreg_tid_z:
-        Changed |= addRangeMetadata(0, maxWorkgroupSize[2], Call);
-        break;
+    // Index within block
+    case Intrinsic::nvvm_read_ptx_sreg_tid_x:
+      Changed |= addRangeMetadata(0, maxWorkgroupSize[0], Call);
+      break;
+    case Intrinsic::nvvm_read_ptx_sreg_tid_y:
+      Changed |= addRangeMetadata(0, maxWorkgroupSize[1], Call);
+      break;
+    case Intrinsic::nvvm_read_ptx_sreg_tid_z:
+      Changed |= addRangeMetadata(0, maxWorkgroupSize[2], Call);
+      break;
 
-      // Block size
-      case Intrinsic::nvvm_read_ptx_sreg_ntid_x:
-        Changed |= addRangeMetadata(1, maxWorkgroupSize[0] + 1, Call);
-        break;
-      case Intrinsic::nvvm_read_ptx_sreg_ntid_y:
-        Changed |= addRangeMetadata(1, maxWorkgroupSize[1] + 1, Call);
-        break;
-      case Intrinsic::nvvm_read_ptx_sreg_ntid_z:
-        Changed |= addRangeMetadata(1, maxWorkgroupSize[2] + 1, Call);
-        break;
+    // Block size
+    case Intrinsic::nvvm_read_ptx_sreg_ntid_x:
+      Changed |= addRangeMetadata(1, maxWorkgroupSize[0] + 1, Call);
+      break;
+    case Intrinsic::nvvm_read_ptx_sreg_ntid_y:
+      Changed |= addRangeMetadata(1, maxWorkgroupSize[1] + 1, Call);
+      break;
+    case Intrinsic::nvvm_read_ptx_sreg_ntid_z:
+      Changed |= addRangeMetadata(1, maxWorkgroupSize[2] + 1, Call);
+      break;
 
-      // Index within grid
-      case Intrinsic::nvvm_read_ptx_sreg_ctaid_x:
-        Changed |= addRangeMetadata(0, MaxGridSizeX, Call);
-        break;
-      case Intrinsic::nvvm_read_ptx_sreg_ctaid_y:
-        Changed |= addRangeMetadata(0, MaxGridSizeY, Call);
-        break;
-      case Intrinsic::nvvm_read_ptx_sreg_ctaid_z:
-        Changed |= addRangeMetadata(0, MaxGridSizeZ, Call);
-        break;
+    // Index within grid
+    case Intrinsic::nvvm_read_ptx_sreg_ctaid_x:
+      Changed |= addRangeMetadata(0, MaxGridSizeX, Call);
+      break;
+    case Intrinsic::nvvm_read_ptx_sreg_ctaid_y:
+      Changed |= addRangeMetadata(0, MaxGridSizeY, Call);
+      break;
+    case Intrinsic::nvvm_read_ptx_sreg_ctaid_z:
+      Changed |= addRangeMetadata(0, MaxGridSizeZ, Call);
+      break;
 
-      // Grid size
-      case Intrinsic::nvvm_read_ptx_sreg_nctaid_x:
-        Changed |= addRangeMetadata(1, MaxGridSizeX + 1, Call);
-        break;
-      case Intrinsic::nvvm_read_ptx_sreg_nctaid_y:
-        Changed |= addRangeMetadata(1, MaxGridSizeY + 1, Call);
-        break;
-      case Intrinsic::nvvm_read_ptx_sreg_nctaid_z:
-        Changed |= addRangeMetadata(1, MaxGridSizeZ + 1, Call);
-        break;
+    // Grid size
+    case Intrinsic::nvvm_read_ptx_sreg_nctaid_x:
+      Changed |= addRangeMetadata(1, MaxGridSizeX + 1, Call);
+      break;
+    case Intrinsic::nvvm_read_ptx_sreg_nctaid_y:
+      Changed |= addRangeMetadata(1, MaxGridSizeY + 1, Call);
+      break;
+    case Intrinsic::nvvm_read_ptx_sreg_nctaid_z:
+      Changed |= addRangeMetadata(1, MaxGridSizeZ + 1, Call);
+      break;
     }
   }
   return Changed;

diff --git a/compiler/src/iree/compiler/API/Internal/Diagnostics.cpp b/compiler/src/iree/compiler/API/Internal/Diagnostics.cpp
index b33f5e0..cd279d9 100644
--- a/compiler/src/iree/compiler/API/Internal/Diagnostics.cpp
+++ b/compiler/src/iree/compiler/API/Internal/Diagnostics.cpp

@@ -22,7 +22,8 @@
 namespace {
 /// Return a processable CallSiteLoc from the given location.
 std::optional<CallSiteLoc> getCallSiteLoc(Location loc) {
-  if (auto callLoc = dyn_cast<CallSiteLoc>(loc)) return callLoc;
+  if (auto callLoc = dyn_cast<CallSiteLoc>(loc))
+    return callLoc;
   if (auto nameLoc = dyn_cast<NameLoc>(loc))
     return getCallSiteLoc(cast<NameLoc>(loc).getChildLoc());
   if (auto fusedLoc = dyn_cast<FusedLoc>(loc)) {
@@ -66,7 +67,7 @@
       });
 }
 
-}  // namespace
+} // namespace
 
 FormattingDiagnosticHandler::FormattingDiagnosticHandler(MLIRContext *ctx,
                                                          Callback callback)
@@ -148,4 +149,4 @@
   return success();
 }
 
-}  // namespace mlir::iree_compiler::embed
+} // namespace mlir::iree_compiler::embed

diff --git a/compiler/src/iree/compiler/API/Internal/Diagnostics.h b/compiler/src/iree/compiler/API/Internal/Diagnostics.h
index 0939611..7959cde 100644
--- a/compiler/src/iree/compiler/API/Internal/Diagnostics.h
+++ b/compiler/src/iree/compiler/API/Internal/Diagnostics.h

@@ -25,7 +25,7 @@
 /// that class relies on various low level properties of the stream to enable
 /// color, extract source lines, etc.
 class FormattingDiagnosticHandler {
- public:
+public:
   using Callback = std::function<void(DiagnosticSeverity severity,
                                       std::string_view message)>;
 
@@ -34,12 +34,12 @@
 
   LogicalResult emit(Diagnostic &diag);
 
- private:
+private:
   DiagnosticEngine::HandlerID handlerID;
   MLIRContext *ctx;
   Callback callback;
 };
 
-}  // namespace mlir::iree_compiler::embed
+} // namespace mlir::iree_compiler::embed
 
-#endif  // IREE_COMPILER_API_INTERNAL_DIAGNOSTICS_H
+#endif // IREE_COMPILER_API_INTERNAL_DIAGNOSTICS_H

diff --git a/compiler/src/iree/compiler/API/Internal/Embed.cpp b/compiler/src/iree/compiler/API/Internal/Embed.cpp
index 3a1b61f..de0da84 100644
--- a/compiler/src/iree/compiler/API/Internal/Embed.cpp
+++ b/compiler/src/iree/compiler/API/Internal/Embed.cpp

@@ -77,7 +77,7 @@
 #ifdef IREE_HAVE_C_OUTPUT_FORMAT
 #include "iree/compiler/Dialect/VM/Target/C/CModuleTarget.h"
 #include "iree/compiler/Dialect/VM/Target/C/TranslationFlags.h"
-#endif  // IREE_HAVE_C_OUTPUT_FORMAT
+#endif // IREE_HAVE_C_OUTPUT_FORMAT
 
 #ifdef _WIN32
 #include "llvm/Support/Windows/WindowsSupport.h"
@@ -256,8 +256,7 @@
 };
 
 Session::Session(GlobalInit &globalInit)
-    : globalInit(globalInit),
-      binder(OptionsBinder::local()),
+    : globalInit(globalInit), binder(OptionsBinder::local()),
       pluginSession(globalInit.pluginManager, binder, pluginManagerOptions) {
   context.allowUnregisteredDialects();
   context.appendDialectRegistry(globalInit.registry);
@@ -359,7 +358,8 @@
   SmallVector<StringRef, 8> rawSubBuffers;
   // Split dropping the last checkLen chars to enable flagging near misses.
   origMemBuffer->getBuffer().split(rawSubBuffers, splitMarker);
-  if (rawSubBuffers.empty()) return nullptr;
+  if (rawSubBuffers.empty())
+    return nullptr;
 
   for (StringRef subBuffer : rawSubBuffers) {
     auto splitLoc = SMLoc::getFromPointer(subBuffer.data());
@@ -454,7 +454,7 @@
   void *mapped_data = nullptr;
   uint64_t mapped_size = 0;
 
- private:
+private:
   // Fields for Type::File.
   // If the output was configured to a file, this is it.
   std::unique_ptr<llvm::ToolOutputFile> outputFile;
@@ -516,7 +516,8 @@
 }
 
 void Output::keep() {
-  if (outputFile) outputFile->keep();
+  if (outputFile)
+    outputFile->keep();
 }
 
 // Invocation corresponds to iree_compiler_invocation_t
@@ -586,21 +587,21 @@
         [this](DiagnosticSeverity severity, std::string_view message) {
           iree_compiler_diagnostic_severity_t cSeverity;
           switch (severity) {
-            case DiagnosticSeverity::Note:
-              cSeverity = IREE_COMPILER_DIAGNOSTIC_SEVERITY_NOTE;
-              break;
-            case DiagnosticSeverity::Warning:
-              cSeverity = IREE_COMPILER_DIAGNOSTIC_SEVERITY_WARNING;
-              break;
-            case DiagnosticSeverity::Error:
-              cSeverity = IREE_COMPILER_DIAGNOSTIC_SEVERITY_ERROR;
-              break;
-            case DiagnosticSeverity::Remark:
-              cSeverity = IREE_COMPILER_DIAGNOSTIC_SEVERITY_REMARK;
-              break;
-            default:
-              cSeverity = IREE_COMPILER_DIAGNOSTIC_SEVERITY_ERROR;
-              break;
+          case DiagnosticSeverity::Note:
+            cSeverity = IREE_COMPILER_DIAGNOSTIC_SEVERITY_NOTE;
+            break;
+          case DiagnosticSeverity::Warning:
+            cSeverity = IREE_COMPILER_DIAGNOSTIC_SEVERITY_WARNING;
+            break;
+          case DiagnosticSeverity::Error:
+            cSeverity = IREE_COMPILER_DIAGNOSTIC_SEVERITY_ERROR;
+            break;
+          case DiagnosticSeverity::Remark:
+            cSeverity = IREE_COMPILER_DIAGNOSTIC_SEVERITY_REMARK;
+            break;
+          default:
+            cSeverity = IREE_COMPILER_DIAGNOSTIC_SEVERITY_ERROR;
+            break;
           }
           diagnosticCallback(cSeverity, message.data(), message.size(),
                              diagnosticCallbackUserData);
@@ -650,50 +651,50 @@
 
 bool Invocation::runPipeline(enum iree_compiler_pipeline_t pipeline) {
   switch (pipeline) {
-    case IREE_COMPILER_PIPELINE_STD: {
-      // Parse the compile to phase name.
-      std::optional<IREEVMPipelinePhase> compileToPhase;
-      enumerateIREEVMPipelinePhases(
-          [&](IREEVMPipelinePhase phase, StringRef mnemonic, StringRef desc) {
-            if (mnemonic == compileToPhaseName) {
-              compileToPhase = phase;
-            }
-          });
-      if (!compileToPhase) {
-        (*parsedModule)->emitError()
-            << "unrecognized compile-to phase name: " << compileToPhaseName;
-        return false;
-      }
-
-      buildIREEVMTransformPassPipeline(
-          session.targetRegistry, session.bindingOptions, session.inputOptions,
-          session.preprocessingOptions, session.highLevelOptimizationOptions,
-          session.schedulingOptions, session.halTargetOptions,
-          session.vmTargetOptions, pipelineHooks, passManager, *compileToPhase);
-      break;
-    }
-    case IREE_COMPILER_PIPELINE_HAL_EXECUTABLE: {
-      auto &bodyBlock = (*parsedModule)->getRegion(0).front();
-      auto executableOps =
-          llvm::to_vector(bodyBlock.getOps<IREE::HAL::ExecutableOp>());
-      auto sourceOps =
-          llvm::to_vector(bodyBlock.getOps<IREE::HAL::ExecutableSourceOp>());
-      size_t usableOpCount = executableOps.size() + sourceOps.size();
-      if (usableOpCount != 1) {
-        (*parsedModule)->emitError()
-            << "HAL executable translation requires "
-               "exactly 1 top level hal.executable/hal.executable.source "
-               "op";
-        return false;
-      }
-      IREE::HAL::buildHALTransformPassPipeline(
-          passManager, session.targetRegistry, session.halTargetOptions);
-      break;
-    }
-    default:
+  case IREE_COMPILER_PIPELINE_STD: {
+    // Parse the compile to phase name.
+    std::optional<IREEVMPipelinePhase> compileToPhase;
+    enumerateIREEVMPipelinePhases(
+        [&](IREEVMPipelinePhase phase, StringRef mnemonic, StringRef desc) {
+          if (mnemonic == compileToPhaseName) {
+            compileToPhase = phase;
+          }
+        });
+    if (!compileToPhase) {
       (*parsedModule)->emitError()
-          << "unsupported pipeline type " << (int)pipeline;
+          << "unrecognized compile-to phase name: " << compileToPhaseName;
       return false;
+    }
+
+    buildIREEVMTransformPassPipeline(
+        session.targetRegistry, session.bindingOptions, session.inputOptions,
+        session.preprocessingOptions, session.highLevelOptimizationOptions,
+        session.schedulingOptions, session.halTargetOptions,
+        session.vmTargetOptions, pipelineHooks, passManager, *compileToPhase);
+    break;
+  }
+  case IREE_COMPILER_PIPELINE_HAL_EXECUTABLE: {
+    auto &bodyBlock = (*parsedModule)->getRegion(0).front();
+    auto executableOps =
+        llvm::to_vector(bodyBlock.getOps<IREE::HAL::ExecutableOp>());
+    auto sourceOps =
+        llvm::to_vector(bodyBlock.getOps<IREE::HAL::ExecutableSourceOp>());
+    size_t usableOpCount = executableOps.size() + sourceOps.size();
+    if (usableOpCount != 1) {
+      (*parsedModule)->emitError()
+          << "HAL executable translation requires "
+             "exactly 1 top level hal.executable/hal.executable.source "
+             "op";
+      return false;
+    }
+    IREE::HAL::buildHALTransformPassPipeline(
+        passManager, session.targetRegistry, session.halTargetOptions);
+    break;
+  }
+  default:
+    (*parsedModule)->emitError()
+        << "unsupported pipeline type " << (int)pipeline;
+    return false;
   }
 
   passManager.enableVerifier(enableVerifier);
@@ -774,8 +775,8 @@
   return output.getWriteError();
 }
 
-}  // namespace
-}  // namespace mlir::iree_compiler::embed
+} // namespace
+} // namespace mlir::iree_compiler::embed
 
 using namespace mlir::iree_compiler::embed;
 
@@ -801,7 +802,8 @@
 #endif
 #if LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO
   std::string CPU = std::string(sys::getHostCPUName());
-  if (CPU == "generic") CPU = "(unknown)";
+  if (CPU == "generic")
+    CPU = "(unknown)";
   os << ".\n"
      << "  Default target: " << sys::getDefaultTargetTriple() << '\n'
      << "  Host CPU: " << CPU;
@@ -845,7 +847,7 @@
   return (iree_compiler_output_t *)output;
 }
 
-}  // namespace
+} // namespace
 
 //===----------------------------------------------------------------------===//
 // C API implementation
@@ -962,9 +964,8 @@
 
 void ireeCompilerGlobalShutdown() {
   if (!globalInit || isShutdown) {
-    fprintf(stderr,
-            "FATAL ERROR: ireeCompilerGlobalShutdown called when not "
-            "initialized\n");
+    fprintf(stderr, "FATAL ERROR: ireeCompilerGlobalShutdown called when not "
+                    "initialized\n");
     abort();
   }
   if (globalInit) {
@@ -989,8 +990,9 @@
   delete unwrap(session);
 }
 
-iree_compiler_error_t *ireeCompilerSessionSetFlags(
-    iree_compiler_session_t *session, int argc, const char *const *argv) {
+iree_compiler_error_t *
+ireeCompilerSessionSetFlags(iree_compiler_session_t *session, int argc,
+                            const char *const *argv) {
   return wrap(unwrap(session)->setFlags(argc, argv));
 }
 
@@ -1000,8 +1002,8 @@
   unwrap(session)->getFlags(nonDefaultOnly, onFlag, userData);
 }
 
-iree_compiler_invocation_t *ireeCompilerInvocationCreate(
-    iree_compiler_session_t *session) {
+iree_compiler_invocation_t *
+ireeCompilerInvocationCreate(iree_compiler_session_t *session) {
   return wrap(new Invocation(*unwrap(session)));
 }
 
@@ -1088,18 +1090,20 @@
   delete unwrap(source);
 }
 
-iree_compiler_error_t *ireeCompilerSourceOpenFile(
-    iree_compiler_session_t *session, const char *filePath,
-    iree_compiler_source_t **out_source) {
+iree_compiler_error_t *
+ireeCompilerSourceOpenFile(iree_compiler_session_t *session,
+                           const char *filePath,
+                           iree_compiler_source_t **out_source) {
   auto source = new Source(*unwrap(session));
   *out_source = wrap(source);
   return wrap(source->openFile(filePath));
 }
 
-iree_compiler_error_t *ireeCompilerSourceWrapBuffer(
-    iree_compiler_session_t *session, const char *bufferName,
-    const char *buffer, size_t length, bool isNullTerminated,
-    iree_compiler_source_t **out_source) {
+iree_compiler_error_t *
+ireeCompilerSourceWrapBuffer(iree_compiler_session_t *session,
+                             const char *bufferName, const char *buffer,
+                             size_t length, bool isNullTerminated,
+                             iree_compiler_source_t **out_source) {
   auto source = new Source(*unwrap(session));
   *out_source = wrap(source);
   return wrap(source->wrapBuffer(bufferName, buffer, length, isNullTerminated));
@@ -1116,29 +1120,31 @@
   delete unwrap(output);
 }
 
-iree_compiler_error_t *ireeCompilerOutputOpenFile(
-    const char *filePath, iree_compiler_output_t **out_output) {
+iree_compiler_error_t *
+ireeCompilerOutputOpenFile(const char *filePath,
+                           iree_compiler_output_t **out_output) {
   auto output = new Output();
   *out_output = wrap(output);
   return wrap(output->openFile(filePath));
 }
 
-iree_compiler_error_t *ireeCompilerOutputOpenFD(
-    int fd, iree_compiler_output_t **out_output) {
+iree_compiler_error_t *
+ireeCompilerOutputOpenFD(int fd, iree_compiler_output_t **out_output) {
   auto output = new Output();
   *out_output = wrap(output);
   return wrap(output->openFD(fd));
 }
 
-iree_compiler_error_t *ireeCompilerOutputOpenMembuffer(
-    iree_compiler_output_t **out_output) {
+iree_compiler_error_t *
+ireeCompilerOutputOpenMembuffer(iree_compiler_output_t **out_output) {
   auto output = new Output();
   *out_output = wrap(output);
   return wrap(output->openMembuffer());
 }
 
-iree_compiler_error_t *ireeCompilerOutputMapMemory(
-    iree_compiler_output_t *output, void **contents, uint64_t *size) {
+iree_compiler_error_t *
+ireeCompilerOutputMapMemory(iree_compiler_output_t *output, void **contents,
+                            uint64_t *size) {
   return wrap(unwrap(output)->mapMemory(contents, size));
 }
 
@@ -1157,23 +1163,27 @@
   return wrap(unwrap(output)->getWriteError());
 }
 
-iree_compiler_error_t *ireeCompilerInvocationOutputIR(
-    iree_compiler_invocation_t *inv, iree_compiler_output_t *output) {
+iree_compiler_error_t *
+ireeCompilerInvocationOutputIR(iree_compiler_invocation_t *inv,
+                               iree_compiler_output_t *output) {
   return wrap(unwrap(inv)->outputIR(*unwrap(output)));
 }
 
-iree_compiler_error_t *ireeCompilerInvocationOutputVMBytecode(
-    iree_compiler_invocation_t *inv, iree_compiler_output_t *output) {
+iree_compiler_error_t *
+ireeCompilerInvocationOutputVMBytecode(iree_compiler_invocation_t *inv,
+                                       iree_compiler_output_t *output) {
   return wrap(unwrap(inv)->outputVMBytecode(*unwrap(output)));
 }
 
-iree_compiler_error_t *ireeCompilerInvocationOutputVMCSource(
-    iree_compiler_invocation_t *inv, iree_compiler_output_t *output) {
+iree_compiler_error_t *
+ireeCompilerInvocationOutputVMCSource(iree_compiler_invocation_t *inv,
+                                      iree_compiler_output_t *output) {
   return wrap(unwrap(inv)->outputVMCSource(*unwrap(output)));
 }
 
-iree_compiler_error_t *ireeCompilerInvocationOutputHALExecutable(
-    iree_compiler_invocation_t *inv, iree_compiler_output_t *output) {
+iree_compiler_error_t *
+ireeCompilerInvocationOutputHALExecutable(iree_compiler_invocation_t *inv,
+                                          iree_compiler_output_t *output) {
   return wrap(unwrap(inv)->outputHALExecutable(*unwrap(output)));
 }
 

diff --git a/compiler/src/iree/compiler/API/Internal/IREEOptToolEntryPoint.cpp b/compiler/src/iree/compiler/API/Internal/IREEOptToolEntryPoint.cpp
index 46e7d34..9163335 100644
--- a/compiler/src/iree/compiler/API/Internal/IREEOptToolEntryPoint.cpp
+++ b/compiler/src/iree/compiler/API/Internal/IREEOptToolEntryPoint.cpp

@@ -35,7 +35,7 @@
 
 #if defined(_MSC_VER)
 #define fileno _fileno
-#endif  // _MSC_VER
+#endif // _MSC_VER
 
 static LogicalResult ireeOptMainFromCL(int argc, char **argv,
                                        llvm::StringRef toolName,
@@ -91,7 +91,8 @@
   auto localBinder = mlir::iree_compiler::OptionsBinder::local();
   mlir::iree_compiler::PluginManagerSession pluginSession(
       pluginManager, localBinder, pluginManagerOptions);
-  if (failed(pluginSession.initializePlugins())) return failure();
+  if (failed(pluginSession.initializePlugins()))
+    return failure();
   pluginSession.registerDialects(registry);
 
   // In the normal compiler flow, activated plugins maintain a scoped registry

diff --git a/compiler/src/iree/compiler/API/Internal/LLDToolEntryPoint.cpp b/compiler/src/iree/compiler/API/Internal/LLDToolEntryPoint.cpp
index fed1423..d7756c0 100644
--- a/compiler/src/iree/compiler/API/Internal/LLDToolEntryPoint.cpp
+++ b/compiler/src/iree/compiler/API/Internal/LLDToolEntryPoint.cpp

@@ -66,9 +66,11 @@
 static Flavor parseFlavor(std::vector<const char *> &v) {
   // Parse -flavor option.
   if (v.size() > 1 && v[1] == StringRef("-flavor")) {
-    if (v.size() <= 2) die("missing arg value for '-flavor'");
+    if (v.size() <= 2)
+      die("missing arg value for '-flavor'");
     Flavor f = getFlavor(v[2]);
-    if (f == Invalid) die("Unknown flavor: " + StringRef(v[2]));
+    if (f == Invalid)
+      die("Unknown flavor: " + StringRef(v[2]));
     v.erase(v.begin() + 1, v.begin() + 3);
     return f;
   }
@@ -88,35 +90,34 @@
 
   std::vector<const char *> args(argv, argv + argc);
   switch (parseFlavor(args)) {
-    case Gnu:
+  case Gnu:
 #ifndef IREE_COMPILER_LLD_ELF_DISABLED
-      return !elf::link(args, stdoutOS, stderrOS, exitEarly, disableOutput);
+    return !elf::link(args, stdoutOS, stderrOS, exitEarly, disableOutput);
 #else
-      die("lld is not compiled with ELF support");
+    die("lld is not compiled with ELF support");
 #endif
-    case WinLink:
+  case WinLink:
 #ifndef IREE_COMPILER_LLD_COFF_DISABLED
-      return !coff::link(args, stdoutOS, stderrOS, exitEarly, disableOutput);
+    return !coff::link(args, stdoutOS, stderrOS, exitEarly, disableOutput);
 #else
-      die("lld is not compiled with COFF support");
+    die("lld is not compiled with COFF support");
 #endif
-    case Darwin:
+  case Darwin:
 #ifndef IREE_COMPILER_LLD_MACHO_DISABLED
-      return !macho::link(args, stdoutOS, stderrOS, exitEarly, disableOutput);
+    return !macho::link(args, stdoutOS, stderrOS, exitEarly, disableOutput);
 #else
-      die("lld is not compiled with MachO support");
+    die("lld is not compiled with MachO support");
 #endif
-    case Wasm:
+  case Wasm:
 #ifndef IREE_COMPILER_LLD_WASM_DISABLED
-      return !lld::wasm::link(args, stdoutOS, stderrOS, exitEarly,
-                              disableOutput);
+    return !lld::wasm::link(args, stdoutOS, stderrOS, exitEarly, disableOutput);
 #else
-      die("lld is not compiled with WASM support");
+    die("lld is not compiled with WASM support");
 #endif
-    default:
-      die("lld is a generic driver.\n"
-          "Invoke ld.lld (Unix), ld64.lld (macOS), lld-link (Windows), wasm-ld"
-          " (WebAssembly) instead");
+  default:
+    die("lld is a generic driver.\n"
+        "Invoke ld.lld (Unix), ld64.lld (macOS), lld-link (Windows), wasm-ld"
+        " (WebAssembly) instead");
   }
 }
-#endif  // IREE_COMPILER_LLD_DISABLED
+#endif // IREE_COMPILER_LLD_DISABLED

diff --git a/compiler/src/iree/compiler/API/MLIRInterop.h b/compiler/src/iree/compiler/API/MLIRInterop.h
index 121ec5b..79bef11 100644
--- a/compiler/src/iree/compiler/API/MLIRInterop.h
+++ b/compiler/src/iree/compiler/API/MLIRInterop.h

@@ -23,8 +23,8 @@
 #endif
 
 // Registers all dialects and extensions known to the IREE compiler.
-MLIR_CAPI_EXPORTED void ireeCompilerRegisterDialects(
-    MlirDialectRegistry registry);
+MLIR_CAPI_EXPORTED void
+ireeCompilerRegisterDialects(MlirDialectRegistry registry);
 
 // Gets the MlirContext that the session manages. The context is owned by the
 // session and valid until it is destroyed.
@@ -36,11 +36,12 @@
 // Ownership of the moduleOp is transferred to the invocation, regardless of
 // whether the call succeeds or fails.
 // On failure, returns false and issues diagnostics.
-MLIR_CAPI_EXPORTED bool ireeCompilerInvocationImportModule(
-    iree_compiler_invocation_t *inv, MlirOperation moduleOp);
+MLIR_CAPI_EXPORTED bool
+ireeCompilerInvocationImportModule(iree_compiler_invocation_t *inv,
+                                   MlirOperation moduleOp);
 
 #ifdef __cplusplus
 }
 #endif
 
-#endif  // IREE_COMPILER_API_MLIR_INTEROP_H
+#endif // IREE_COMPILER_API_MLIR_INTEROP_H

diff --git a/compiler/src/iree/compiler/API/test/api-test-main.c b/compiler/src/iree/compiler/API/test/api-test-main.c
index 405a133..50558cb 100644
--- a/compiler/src/iree/compiler/API/test/api-test-main.c
+++ b/compiler/src/iree/compiler/API/test/api-test-main.c

@@ -19,26 +19,26 @@
 #include "iree/compiler/embedding_api.h"
 
 struct compiler_state_t {
-  iree_compiler_session_t* session;
+  iree_compiler_session_t *session;
   MlirContext context;
 };
 
 struct invocation_state_t {
-  iree_compiler_invocation_t* inv;
+  iree_compiler_invocation_t *inv;
 };
 
-static void initializeCompiler(struct compiler_state_t* state) {
+static void initializeCompiler(struct compiler_state_t *state) {
   ireeCompilerGlobalInitialize();
   state->session = ireeCompilerSessionCreate();
   state->context = ireeCompilerSessionGetContext(state->session);
 }
 
-static void shutdownCompiler(struct compiler_state_t* state) {
+static void shutdownCompiler(struct compiler_state_t *state) {
   ireeCompilerSessionDestroy(state->session);
   ireeCompilerGlobalShutdown();
 }
 
-int main(int argc, char** argv) {
+int main(int argc, char **argv) {
   struct compiler_state_t state;
   initializeCompiler(&state);
 
@@ -62,8 +62,8 @@
   }
 
   // Set flags.
-  iree_compiler_error_t* err;
-  const char* flags[] = {
+  iree_compiler_error_t *err;
+  const char *flags[] = {
       "--iree-hal-target-backends=vmvx",
   };
   err = ireeCompilerSessionSetFlags(state.session, 1, flags);
@@ -75,7 +75,7 @@
   }
 
   // Import module.
-  iree_compiler_invocation_t* inv = ireeCompilerInvocationCreate(state.session);
+  iree_compiler_invocation_t *inv = ireeCompilerInvocationCreate(state.session);
   if (!ireeCompilerInvocationImportModule(inv, module)) {
     // ireeCompilerInvocationCreate takes ownership of the module regardless
     // of success or error, so we let it destroy it.
@@ -92,7 +92,7 @@
   }
 
   // Output.
-  iree_compiler_output_t* output;
+  iree_compiler_output_t *output;
   err = ireeCompilerOutputOpenMembuffer(&output);
   if (err) {
     fprintf(stderr, "ERROR: %s\n", ireeCompilerErrorGetMessage(err));
@@ -110,7 +110,7 @@
   }
 
   // Map memory and print size.
-  void* bytecode;
+  void *bytecode;
   uint64_t bytecodeSize;
   err = ireeCompilerOutputMapMemory(output, &bytecode, &bytecodeSize);
   if (err) {

diff --git a/compiler/src/iree/compiler/Bindings/Native/Transforms/ConvertStreamableOps.cpp b/compiler/src/iree/compiler/Bindings/Native/Transforms/ConvertStreamableOps.cpp
index aa47c5d..e9cec34 100644
--- a/compiler/src/iree/compiler/Bindings/Native/Transforms/ConvertStreamableOps.cpp
+++ b/compiler/src/iree/compiler/Bindings/Native/Transforms/ConvertStreamableOps.cpp

@@ -82,8 +82,9 @@
 
 // Converts a func.func with the iree.abi.streamable attribute into a flow.func
 // and fixes all func.call ops to be flow.call across the module.
-static std::optional<StreamableFunc> convertStreamableFunc(
-    mlir::ModuleOp moduleOp, func::FuncOp funcOp, SymbolTable &symbolTable) {
+static std::optional<StreamableFunc>
+convertStreamableFunc(mlir::ModuleOp moduleOp, func::FuncOp funcOp,
+                      SymbolTable &symbolTable) {
   OpBuilder moduleBuilder(funcOp);
   auto functionType = funcOp.getFunctionType();
 
@@ -264,7 +265,8 @@
     for (auto [i, resultType] : llvm::enumerate(callOp.getResultTypes())) {
       if (auto shapedType = llvm::dyn_cast<ShapedType>(resultType)) {
         const auto &resultDimArgs = streamableFunc.resultDimArgs[i];
-        if (resultDimArgs.empty()) continue;
+        if (resultDimArgs.empty())
+          continue;
         if (resultDimArgs.front() == kTiedDim) {
           // Source from a tied operand. Types must match exactly.
           assert(streamableFunc.tiedOperands[i] !=
@@ -299,9 +301,9 @@
   return success();
 }
 
-static LogicalResult convertStreamableCalls(
-    mlir::ModuleOp moduleOp,
-    DenseMap<StringRef, StreamableFunc> &streamableFuncs) {
+static LogicalResult
+convertStreamableCalls(mlir::ModuleOp moduleOp,
+                       DenseMap<StringRef, StreamableFunc> &streamableFuncs) {
   auto walkResult = moduleOp.walk([&](func::CallOp callOp) {
     auto it = streamableFuncs.find(callOp.getCallee());
     if (it != streamableFuncs.end()) {
@@ -316,7 +318,7 @@
 
 class ConvertStreamableOpsPass
     : public PassWrapper<ConvertStreamableOpsPass, OperationPass<ModuleOp>> {
- public:
+public:
   ConvertStreamableOpsPass() = default;
   ConvertStreamableOpsPass(const ConvertStreamableOpsPass &pass) {}
 
@@ -358,7 +360,8 @@
     for (auto originalFuncOp : originalFuncOps) {
       auto streamableFuncOr =
           convertStreamableFunc(moduleOp, originalFuncOp, symbolTable);
-      if (!streamableFuncOr.has_value()) return signalPassFailure();
+      if (!streamableFuncOr.has_value())
+        return signalPassFailure();
       auto streamableFunc = std::move(streamableFuncOr).value();
       streamableFuncs[streamableFunc.funcOp.getName()] =
           std::move(streamableFunc);
@@ -377,7 +380,7 @@
 
 static PassRegistration<ConvertStreamableOpsPass> pass;
 
-}  // namespace ABI
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace ABI
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Bindings/Native/Transforms/Passes.cpp b/compiler/src/iree/compiler/Bindings/Native/Transforms/Passes.cpp
index 70a73f2..c133d79 100644
--- a/compiler/src/iree/compiler/Bindings/Native/Transforms/Passes.cpp
+++ b/compiler/src/iree/compiler/Bindings/Native/Transforms/Passes.cpp

@@ -45,7 +45,7 @@
       });
 }
 
-}  // namespace ABI
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace ABI
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Bindings/Native/Transforms/Passes.h b/compiler/src/iree/compiler/Bindings/Native/Transforms/Passes.h
index 743abf6..b533529 100644
--- a/compiler/src/iree/compiler/Bindings/Native/Transforms/Passes.h
+++ b/compiler/src/iree/compiler/Bindings/Native/Transforms/Passes.h

@@ -72,9 +72,9 @@
   createWrapEntryPointsPass();
 }
 
-}  // namespace ABI
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace ABI
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_BINDINGS_NATIVE_TRANSFORMS_PASSES_H_
+#endif // IREE_COMPILER_BINDINGS_NATIVE_TRANSFORMS_PASSES_H_

diff --git a/compiler/src/iree/compiler/Bindings/Native/Transforms/WrapEntryPoints.cpp b/compiler/src/iree/compiler/Bindings/Native/Transforms/WrapEntryPoints.cpp
index 14e2046..20645f8 100644
--- a/compiler/src/iree/compiler/Bindings/Native/Transforms/WrapEntryPoints.cpp
+++ b/compiler/src/iree/compiler/Bindings/Native/Transforms/WrapEntryPoints.cpp

@@ -25,10 +25,11 @@
 namespace ABI {
 
 // Returns the invocation model specified on |op| or the |defaultModel|.
-static IREE::ABI::InvocationModel getInvocationModel(
-    Operation *op, IREE::ABI::InvocationModel defaultModel) {
+static IREE::ABI::InvocationModel
+getInvocationModel(Operation *op, IREE::ABI::InvocationModel defaultModel) {
   auto modelAttr = op->getAttrOfType<StringAttr>("iree.abi.model");
-  if (!modelAttr) return defaultModel;
+  if (!modelAttr)
+    return defaultModel;
   if (modelAttr == "coarse-fences") {
     return IREE::ABI::InvocationModel::CoarseFences;
   } else {
@@ -61,10 +62,10 @@
 }
 
 // Creates the corresponding wrapper function for the given import function.
-static func::FuncOp createImportWrapperFunc(
-    IREE::ABI::InvocationModel invocationModel, func::FuncOp importOp,
-    FunctionType oldImportType, FunctionType newImportType,
-    StringRef privateName) {
+static func::FuncOp
+createImportWrapperFunc(IREE::ABI::InvocationModel invocationModel,
+                        func::FuncOp importOp, FunctionType oldImportType,
+                        FunctionType newImportType, StringRef privateName) {
   // Create the internal wrapper function with the original import signature.
   auto wrapperOp =
       func::FuncOp::create(importOp.getLoc(), privateName, oldImportType);
@@ -81,13 +82,13 @@
   stripABIAttrs(resultAttrDict);
   wrapperOp.setAllResultAttrs(resultAttrDict);
   switch (invocationModel) {
-    default:
-    case IREE::ABI::InvocationModel::Sync:
-      break;
-    case IREE::ABI::InvocationModel::CoarseFences:
-      argAttrDict.push_back(nullptr);  // wait
-      argAttrDict.push_back(nullptr);  // signal
-      break;
+  default:
+  case IREE::ABI::InvocationModel::Sync:
+    break;
+  case IREE::ABI::InvocationModel::CoarseFences:
+    argAttrDict.push_back(nullptr); // wait
+    argAttrDict.push_back(nullptr); // signal
+    break;
   }
 
   // Update the import type and propagate back the attributes we may have
@@ -124,53 +125,53 @@
   Value waitFence;
   Value signalFence;
   switch (invocationModel) {
-    default:
-    case IREE::ABI::InvocationModel::Sync:
-      // No fences.
-      break;
-    case IREE::ABI::InvocationModel::CoarseFences: {
-      // HACK: this is relying on the fact that there's only one HAL device.
-      // We should instead have a way of creating fences on the device that
-      // is used to produce the tensors we're wrapping.
-      auto device =
-          entryBuilder.create<IREE::HAL::ExSharedDeviceOp>(importOp.getLoc());
+  default:
+  case IREE::ABI::InvocationModel::Sync:
+    // No fences.
+    break;
+  case IREE::ABI::InvocationModel::CoarseFences: {
+    // HACK: this is relying on the fact that there's only one HAL device.
+    // We should instead have a way of creating fences on the device that
+    // is used to produce the tensors we're wrapping.
+    auto device =
+        entryBuilder.create<IREE::HAL::ExSharedDeviceOp>(importOp.getLoc());
 
-      // When exporting a fence we need to put a barrier between the rest of the
-      // program and the tensors consumed by the import.
-      if (tensorArgs.empty()) {
-        // No tensors passed to the import - pass in an immediate signal.
-        waitFence = entryBuilder.create<IREE::Util::NullOp>(
-            importOp.getLoc(), entryBuilder.getType<IREE::HAL::FenceType>());
-      } else {
-        waitFence = entryBuilder.create<IREE::HAL::FenceCreateOp>(
-            importOp.getLoc(), entryBuilder.getType<IREE::HAL::FenceType>(),
-            device, IREE::HAL::FenceFlagBitfield::None);
-        auto barrierOp = entryBuilder.create<IREE::HAL::TensorBarrierOp>(
-            importOp.getLoc(), tensorArgs, waitFence);
-        for (auto [argIndex, readyArg] :
-             llvm::zip_equal(tensorArgIndices, barrierOp.getResults())) {
-          entryArgs[argIndex] = readyArg;
-        }
+    // When exporting a fence we need to put a barrier between the rest of the
+    // program and the tensors consumed by the import.
+    if (tensorArgs.empty()) {
+      // No tensors passed to the import - pass in an immediate signal.
+      waitFence = entryBuilder.create<IREE::Util::NullOp>(
+          importOp.getLoc(), entryBuilder.getType<IREE::HAL::FenceType>());
+    } else {
+      waitFence = entryBuilder.create<IREE::HAL::FenceCreateOp>(
+          importOp.getLoc(), entryBuilder.getType<IREE::HAL::FenceType>(),
+          device, IREE::HAL::FenceFlagBitfield::None);
+      auto barrierOp = entryBuilder.create<IREE::HAL::TensorBarrierOp>(
+          importOp.getLoc(), tensorArgs, waitFence);
+      for (auto [argIndex, readyArg] :
+           llvm::zip_equal(tensorArgIndices, barrierOp.getResults())) {
+        entryArgs[argIndex] = readyArg;
       }
-
-      // When the import produces resources we need to pass in a fence it can
-      // signal when execution is ready.
-      // TODO(benvanik): always pass in a signal fence? could be useful if we
-      // want to allow for async work using fences that's not device-related.
-      const bool haveTensorResults =
-          llvm::any_of(oldImportType.getResults(),
-                       [](Type type) { return llvm::isa<TensorType>(type); });
-      if (!haveTensorResults && !hasSideEffects) {
-        // No tensors returned from import - pass in an immediate signal.
-        signalFence = entryBuilder.create<IREE::Util::NullOp>(
-            importOp.getLoc(), entryBuilder.getType<IREE::HAL::FenceType>());
-      } else {
-        signalFence = entryBuilder.create<IREE::HAL::FenceCreateOp>(
-            importOp.getLoc(), entryBuilder.getType<IREE::HAL::FenceType>(),
-            device, IREE::HAL::FenceFlagBitfield::None);
-      }
-      break;
     }
+
+    // When the import produces resources we need to pass in a fence it can
+    // signal when execution is ready.
+    // TODO(benvanik): always pass in a signal fence? could be useful if we
+    // want to allow for async work using fences that's not device-related.
+    const bool haveTensorResults =
+        llvm::any_of(oldImportType.getResults(),
+                     [](Type type) { return llvm::isa<TensorType>(type); });
+    if (!haveTensorResults && !hasSideEffects) {
+      // No tensors returned from import - pass in an immediate signal.
+      signalFence = entryBuilder.create<IREE::Util::NullOp>(
+          importOp.getLoc(), entryBuilder.getType<IREE::HAL::FenceType>());
+    } else {
+      signalFence = entryBuilder.create<IREE::HAL::FenceCreateOp>(
+          importOp.getLoc(), entryBuilder.getType<IREE::HAL::FenceType>(),
+          device, IREE::HAL::FenceFlagBitfield::None);
+    }
+    break;
+  }
   }
 
   // Marshal arguments.
@@ -194,8 +195,10 @@
       arguments.push_back(arg);
     }
   }
-  if (waitFence) arguments.push_back(waitFence);
-  if (signalFence) arguments.push_back(signalFence);
+  if (waitFence)
+    arguments.push_back(waitFence);
+  if (signalFence)
+    arguments.push_back(signalFence);
 
   // Make the call with the updated types.
   auto callOp =
@@ -261,13 +264,13 @@
   }
   auto fenceType = IREE::HAL::FenceType::get(importOp.getContext());
   switch (invocationModel) {
-    default:
-    case IREE::ABI::InvocationModel::Sync:
-      break;
-    case IREE::ABI::InvocationModel::CoarseFences:
-      inputTypes.push_back(fenceType);  // wait
-      inputTypes.push_back(fenceType);  // signal
-      break;
+  default:
+  case IREE::ABI::InvocationModel::Sync:
+    break;
+  case IREE::ABI::InvocationModel::CoarseFences:
+    inputTypes.push_back(fenceType); // wait
+    inputTypes.push_back(fenceType); // signal
+    break;
   }
   SmallVector<Type> resultTypes;
   for (auto oldType : oldImportType.getResults()) {
@@ -280,7 +283,8 @@
   // calls out to the updated import using ABI types.
   auto wrapperOp = createImportWrapperFunc(
       invocationModel, importOp, oldImportType, newImportType, privateName);
-  if (!wrapperOp) return failure();
+  if (!wrapperOp)
+    return failure();
   moduleOp.insert(++Block::iterator(importOp), wrapperOp);
 
   // Update the import to the new type and mark it as being converted so we
@@ -304,13 +308,13 @@
   }
 
   switch (invocationModel) {
-    default:
-    case IREE::ABI::InvocationModel::Sync:
-      break;
-    case IREE::ABI::InvocationModel::CoarseFences:
-      attrs.emplace_back(StringAttr::get(context, "iree.abi.model"),
-                         StringAttr::get(context, "coarse-fences"));
-      break;
+  default:
+  case IREE::ABI::InvocationModel::Sync:
+    break;
+  case IREE::ABI::InvocationModel::CoarseFences:
+    attrs.emplace_back(StringAttr::get(context, "iree.abi.model"),
+                       StringAttr::get(context, "coarse-fences"));
+    break;
   }
 
   if (!attrs.empty()) {
@@ -342,9 +346,9 @@
 }
 
 // Creates the corresponding wrapper function for the given export function.
-static func::FuncOp createExportWrapperFunc(
-    IREE::ABI::InvocationModel invocationModel, func::FuncOp exportOp,
-    StringRef publicName) {
+static func::FuncOp
+createExportWrapperFunc(IREE::ABI::InvocationModel invocationModel,
+                        func::FuncOp exportOp, StringRef publicName) {
   // Copy arg/result attrs from the export op to the wrapper function.
   // We may want to remove them from the export but would need to filter.
   SmallVector<DictionaryAttr> argAttrDict;
@@ -366,15 +370,15 @@
   }
   auto fenceType = IREE::HAL::FenceType::get(exportOp.getContext());
   switch (invocationModel) {
-    default:
-    case IREE::ABI::InvocationModel::Sync:
-      break;
-    case IREE::ABI::InvocationModel::CoarseFences:
-      inputTypes.push_back(fenceType);  // wait
-      inputTypes.push_back(fenceType);  // signal
-      argAttrDict.push_back(nullptr);   // wait
-      argAttrDict.push_back(nullptr);   // signal
-      break;
+  default:
+  case IREE::ABI::InvocationModel::Sync:
+    break;
+  case IREE::ABI::InvocationModel::CoarseFences:
+    inputTypes.push_back(fenceType); // wait
+    inputTypes.push_back(fenceType); // signal
+    argAttrDict.push_back(nullptr);  // wait
+    argAttrDict.push_back(nullptr);  // signal
+    break;
   }
   SmallVector<Type> resultTypes;
   for (auto oldType : oldExportType.getResults()) {
@@ -404,7 +408,8 @@
   for (unsigned i = 0; i < exportOp.getNumArguments(); ++i) {
     auto outputAttr =
         exportOp.getArgAttrOfType<IntegerAttr>(i, "iree.abi.output");
-    if (!outputAttr) continue;
+    if (!outputAttr)
+      continue;
     // Today all outputs need to be a !hal.buffer - we could change this
     // in the future to be something more generalized.
     auto storageArg = entryBlock->getArgument(i);
@@ -425,13 +430,13 @@
   Value waitFence;
   Value signalFence;
   switch (invocationModel) {
-    default:
-    case IREE::ABI::InvocationModel::Sync:
-      break;
-    case IREE::ABI::InvocationModel::CoarseFences:
-      waitFence = entryBlock->getArgument(entryBlock->getNumArguments() - 2);
-      signalFence = entryBlock->getArgument(entryBlock->getNumArguments() - 1);
-      break;
+  default:
+  case IREE::ABI::InvocationModel::Sync:
+    break;
+  case IREE::ABI::InvocationModel::CoarseFences:
+    waitFence = entryBlock->getArgument(entryBlock->getNumArguments() - 2);
+    signalFence = entryBlock->getArgument(entryBlock->getNumArguments() - 1);
+    break;
   }
 
   // Marshal arguments.
@@ -529,7 +534,8 @@
   // marshals arguments/results to the original function.
   auto wrapperOp =
       createExportWrapperFunc(invocationModel, exportOp, publicName);
-  if (!wrapperOp) return failure();
+  if (!wrapperOp)
+    return failure();
   moduleOp.insert(Block::iterator(exportOp), wrapperOp);
 
   return success();
@@ -540,7 +546,7 @@
 // Imports are also handled as they are entry points in another module.
 class WrapEntryPointsPass
     : public PassWrapper<WrapEntryPointsPass, OperationPass<ModuleOp>> {
- public:
+public:
   WrapEntryPointsPass() = default;
   WrapEntryPointsPass(const WrapEntryPointsPass &pass) {}
   WrapEntryPointsPass(IREE::ABI::InvocationModel invocationModel) {
@@ -570,7 +576,8 @@
     SmallVector<func::FuncOp> exportOps;
     for (auto funcOp : moduleOp.getOps<func::FuncOp>()) {
       // Ignore functions already marked as having their ABI goo handled.
-      if (funcOp->hasAttr("iree.abi.stub")) continue;
+      if (funcOp->hasAttr("iree.abi.stub"))
+        continue;
       if (funcOp.isExternal()) {
         // Imported function.
         importOps.push_back(funcOp);
@@ -603,7 +610,7 @@
     }
   }
 
- private:
+private:
   Option<InvocationModel> invocationModel{
       *this,
       "invocation-model",
@@ -618,14 +625,14 @@
   };
 };
 
-std::unique_ptr<OperationPass<ModuleOp>> createWrapEntryPointsPass(
-    IREE::ABI::InvocationModel invocationModel) {
+std::unique_ptr<OperationPass<ModuleOp>>
+createWrapEntryPointsPass(IREE::ABI::InvocationModel invocationModel) {
   return std::make_unique<WrapEntryPointsPass>(invocationModel);
 }
 
 static PassRegistration<WrapEntryPointsPass> pass;
 
-}  // namespace ABI
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace ABI
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Bindings/TFLite/Transforms/Passes.cpp b/compiler/src/iree/compiler/Bindings/TFLite/Transforms/Passes.cpp
index 088c4fe..c4844e8 100644
--- a/compiler/src/iree/compiler/Bindings/TFLite/Transforms/Passes.cpp
+++ b/compiler/src/iree/compiler/Bindings/TFLite/Transforms/Passes.cpp

@@ -38,7 +38,7 @@
       });
 }
 
-}  // namespace TFLite
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace TFLite
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Bindings/TFLite/Transforms/Passes.h b/compiler/src/iree/compiler/Bindings/TFLite/Transforms/Passes.h
index 05a1af1..141c6ed 100644
--- a/compiler/src/iree/compiler/Bindings/TFLite/Transforms/Passes.h
+++ b/compiler/src/iree/compiler/Bindings/TFLite/Transforms/Passes.h

@@ -41,9 +41,9 @@
 
 inline void registerPasses() { createWrapEntryPointsPass(); }
 
-}  // namespace TFLite
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace TFLite
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_BINDINGS_TFLITE_TRANSFORMS_PASSES_H_
+#endif // IREE_COMPILER_BINDINGS_TFLITE_TRANSFORMS_PASSES_H_

diff --git a/compiler/src/iree/compiler/Bindings/TFLite/Transforms/WrapEntryPoints.cpp b/compiler/src/iree/compiler/Bindings/TFLite/Transforms/WrapEntryPoints.cpp
index 29fd001..c71b3f1 100644
--- a/compiler/src/iree/compiler/Bindings/TFLite/Transforms/WrapEntryPoints.cpp
+++ b/compiler/src/iree/compiler/Bindings/TFLite/Transforms/WrapEntryPoints.cpp

@@ -47,7 +47,7 @@
 //   util.global private mutable @_tflite_xx_arg0_dim2 : index
 class WrapEntryPointsPass
     : public PassWrapper<WrapEntryPointsPass, OperationPass<ModuleOp>> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<mlir::func::FuncDialect, mlir::arith::ArithDialect,
                     mlir::tensor::TensorDialect, IREE::HAL::HALDialect,
@@ -101,7 +101,7 @@
     wrapEntryPoint(entryFuncOps.front());
   }
 
- private:
+private:
   // Globals representing each dynamic dimension of an IO tensor.
   struct DynamicDims {
     TensorType tensorType;
@@ -348,7 +348,8 @@
     auto shapeType = dynamicDims.tensorType;
     unsigned dynamicDimIdx = 0;
     for (unsigned i = 0; i < shapeType.getRank(); ++i) {
-      if (!shapeType.isDynamicDim(i)) continue;
+      if (!shapeType.isDynamicDim(i))
+        continue;
       auto dimValue =
           builder
               .create<IREE::Util::ListGetOp>(
@@ -651,7 +652,7 @@
 
 static PassRegistration<WrapEntryPointsPass> pass;
 
-}  // namespace TFLite
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace TFLite
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/BubbleUpOrdinalOps.cpp b/compiler/src/iree/compiler/Codegen/Common/BubbleUpOrdinalOps.cpp
index a668b3b..1f03305 100644
--- a/compiler/src/iree/compiler/Codegen/Common/BubbleUpOrdinalOps.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/BubbleUpOrdinalOps.cpp

@@ -67,7 +67,7 @@
     : public BubbleUpOrdinalOpsBase<BubbleUpOrdinalOpsPass> {
   void runOnOperation() override;
 };
-}  // namespace
+} // namespace
 
 void BubbleUpOrdinalOpsPass::runOnOperation() {
   MLIRContext *context = &getContext();
@@ -83,5 +83,5 @@
   return std::make_unique<BubbleUpOrdinalOpsPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/BufferizationAnalysis.cpp b/compiler/src/iree/compiler/Codegen/Common/BufferizationAnalysis.cpp
index 993837e..82cbcb3 100644
--- a/compiler/src/iree/compiler/Codegen/Common/BufferizationAnalysis.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/BufferizationAnalysis.cpp

@@ -97,8 +97,9 @@
 
 /// Adds the result of the `flow.dispatch.tensor.load` op to the same
 /// equivalence class as the source.
-static LogicalResult analyseInterfaceLoadTensorOp(
-    IREE::Flow::DispatchTensorLoadOp loadOp, BufferizationPlan &plan) {
+static LogicalResult
+analyseInterfaceLoadTensorOp(IREE::Flow::DispatchTensorLoadOp loadOp,
+                             BufferizationPlan &plan) {
   plan.unionSets(loadOp.getResult(), loadOp.getSource());
   return success();
 }
@@ -112,10 +113,12 @@
   SmallVector<Value> mappedTensors = plan.getTensorsMappedToSameSet(value);
   for (auto v : mappedTensors) {
     auto definingOp = v.getDefiningOp<OpType>();
-    if (!definingOp) continue;
+    if (!definingOp)
+      continue;
     assert((!equivalentOp || equivalentOp == definingOp) &&
            "found two interface binding ops marked as equivalent");
-    if (!equivalentOp) equivalentOp = definingOp;
+    if (!equivalentOp)
+      equivalentOp = definingOp;
   }
   return equivalentOp;
 }
@@ -155,8 +158,9 @@
 ///   assumed that the equivalence classes contain only 1 such instruction.
 /// This method asserts that the `target` equivalence class already contains a
 /// `hal.interface.binding.subspan` op.'
-static bool canSetStoreValueAndTargetAsEquivalent(
-    IREE::Flow::DispatchTensorStoreOp storeOp, BufferizationPlan &plan) {
+static bool
+canSetStoreValueAndTargetAsEquivalent(IREE::Flow::DispatchTensorStoreOp storeOp,
+                                      BufferizationPlan &plan) {
   if (!canSetsBeMerged(storeOp.getValue(), storeOp.getTarget(), plan)) {
     return false;
   }
@@ -178,8 +182,9 @@
 }
 
 /// Tries to add the `value` and `target` to the same equivalence class.
-static LogicalResult analyseInterfaceStoreTensorOp(
-    IREE::Flow::DispatchTensorStoreOp storeOp, BufferizationPlan &plan) {
+static LogicalResult
+analyseInterfaceStoreTensorOp(IREE::Flow::DispatchTensorStoreOp storeOp,
+                              BufferizationPlan &plan) {
   // The value and target can be union-ed if the set the value is part of does
   // not contain any hal.interface.binding.subspan from a different binding.
   Value value = storeOp.getValue();
@@ -199,8 +204,9 @@
   return success();
 }
 
-static LogicalResult analyseInterfaceBindingSubspanOp(
-    IREE::HAL::InterfaceBindingSubspanOp subspanOp, BufferizationPlan &plan) {
+static LogicalResult
+analyseInterfaceBindingSubspanOp(IREE::HAL::InterfaceBindingSubspanOp subspanOp,
+                                 BufferizationPlan &plan) {
   plan.insert(subspanOp.getResult());
   return success();
 }
@@ -214,8 +220,9 @@
 
 /// For every result of the LinalgOp, gets the operands (`ins` or `outs`) whose
 /// buffer can be reused for the result.
-static SmallVector<Value> getTiedOperandsForDPSOps(
-    DestinationStyleOpInterface dpsOp, const BufferizationPlan &plan) {
+static SmallVector<Value>
+getTiedOperandsForDPSOps(DestinationStyleOpInterface dpsOp,
+                         const BufferizationPlan &plan) {
   SmallVector<Value> tiedOperands(dpsOp.getOperation()->getNumResults());
   auto outputOperands = dpsOp.getDpsInitOperands();
   for (auto [index, outTensor] : llvm::enumerate(outputOperands)) {
@@ -233,10 +240,12 @@
 /// same equivalence class.
 static LogicalResult analyseDPSOps(DestinationStyleOpInterface dpsOp,
                                    BufferizationPlan &plan) {
-  if (!dpsOp.hasTensorSemantics()) return success();
+  if (!dpsOp.hasTensorSemantics())
+    return success();
   auto results = dpsOp->getResults();
   auto tiedOperands = getTiedOperandsForDPSOps(dpsOp, plan);
-  if (tiedOperands.empty()) return failure();
+  if (tiedOperands.empty())
+    return failure();
   for (auto [index, resultTensor, tiedOperand] : llvm::zip_equal(
            llvm::seq<int64_t>(0, results.size()), results, tiedOperands)) {
     if (tiedOperand) {
@@ -307,11 +316,13 @@
 }
 
 static LogicalResult analyseScfIfOp(scf::IfOp ifOp, BufferizationPlan &plan) {
-  if (!ifOp.getNumResults()) return success();
+  if (!ifOp.getNumResults())
+    return success();
   for (auto [result, thenOperand, elseOperand] :
        llvm::zip_equal(ifOp.getResults(), ifOp.thenYield().getOperands(),
                        ifOp.elseYield().getOperands())) {
-    if (!llvm::isa<RankedTensorType>(result.getType())) continue;
+    if (!llvm::isa<RankedTensorType>(result.getType()))
+      continue;
     // All results and yields of the if-then-else are tied together.
     plan.unionSets(result, thenOperand);
     plan.unionSets(result, elseOperand);
@@ -321,7 +332,8 @@
 
 static LogicalResult analyseScfForOp(scf::ForOp forOp,
                                      BufferizationPlan &plan) {
-  if (forOp.getResults().empty()) return success();
+  if (forOp.getResults().empty())
+    return success();
   if (!llvm::all_of(forOp->getResultTypes(), [](Type resultType) {
         return llvm::isa<RankedTensorType>(resultType);
       })) {
@@ -385,7 +397,8 @@
   for (OpOperand &use : source.getUses()) {
     auto user = use.getOwner();
     // Process only update ops uses here.
-    if (!isUpdateOp(user)) continue;
+    if (!isUpdateOp(user))
+      continue;
     // If this is not the first use in a tensor::InsertSliceOp abort.
     if (updateOp) {
       return;
@@ -410,7 +423,8 @@
   Block *updateOpBlock = updateOp->getBlock();
   for (OpOperand &use : source.getUses()) {
     Operation *user = use.getOwner();
-    if (user == updateOp) continue;
+    if (user == updateOp)
+      continue;
     if (isReadOp(user)) {
       Value source = getSource(user);
       assert(source && "unable to find source from read op");
@@ -465,7 +479,8 @@
     for (OpOperand *input : linalgOp.getDpsInputOperands()) {
       auto tensorType =
           llvm::dyn_cast<RankedTensorType>(input->get().getType());
-      if (!tensorType) continue;
+      if (!tensorType)
+        continue;
       Type inputElementType = tensorType.getElementType();
       Type resultElementType =
           llvm::cast<RankedTensorType>(result->get().getType())
@@ -507,7 +522,8 @@
   unsigned numSets = 0;
   for (auto it = mappedTensors.begin(), ie = mappedTensors.end(); it != ie;
        ++it) {
-    if (!it->isLeader()) continue;
+    if (!it->isLeader())
+      continue;
     llvm::dbgs() << "\tSet " << numSets;
     if (storeLeaders.count(
             getLeaderValue(getValue(*mappedTensors.member_begin(it))))) {
@@ -658,5 +674,5 @@
   return success();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/BufferizationAnalysis.h b/compiler/src/iree/compiler/Codegen/Common/BufferizationAnalysis.h
index efeafe0..74e4e20 100644
--- a/compiler/src/iree/compiler/Codegen/Common/BufferizationAnalysis.h
+++ b/compiler/src/iree/compiler/Codegen/Common/BufferizationAnalysis.h

@@ -27,7 +27,7 @@
 /// light-weight wrapper around `llvm::EquivalenceClasses` to account for
 /// `Value` not directly supported as a value type by this class.
 class BufferizationPlan {
- public:
+public:
   llvm::EquivalenceClasses<void *>::iterator findValue(Value v) const {
     return mappedTensors.findValue(getPointer(v));
   }
@@ -67,13 +67,14 @@
   /// the dispatch region.
   bool isInStoreSet(Value v) {
     Value leader = getLeaderValue(v);
-    if (!leader) return false;
+    if (!leader)
+      return false;
     return storeLeaders.count(leader);
   }
 
   void dump();
 
- private:
+private:
   Value getLeaderValue(Value v1) const {
     void *ptr = getPointer(v1);
     auto it = mappedTensors.findLeader(ptr);
@@ -101,6 +102,6 @@
 LogicalResult createTensorEquivalenceClasses(func::FuncOp funcOp,
                                              BufferizationPlan &plan);
 
-}  // namespace iree_compiler
-}  // namespace mlir
-#endif  // IREE_COMPILER_CODEGEN_COMMON_BUFFERIZATIONANALYSIS_H
+} // namespace iree_compiler
+} // namespace mlir
+#endif // IREE_COMPILER_CODEGEN_COMMON_BUFFERIZATIONANALYSIS_H

diff --git a/compiler/src/iree/compiler/Codegen/Common/BufferizeCopyOnlyDispatchesPass.cpp b/compiler/src/iree/compiler/Codegen/Common/BufferizeCopyOnlyDispatchesPass.cpp
index 8c8fee5..007eaee 100644
--- a/compiler/src/iree/compiler/Codegen/Common/BufferizeCopyOnlyDispatchesPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/BufferizeCopyOnlyDispatchesPass.cpp

@@ -47,7 +47,7 @@
 
   void runOnOperation() override;
 };
-}  // namespace
+} // namespace
 
 void BufferizeCopyOnlyDispatchesPass::runOnOperation() {
   ModuleOp module = getOperation();
@@ -62,13 +62,15 @@
         [&](IREE::Flow::DispatchTensorStoreOp storeOp) -> WalkResult {
           return success(isReadOnly(storeOp.getValue()));
         });
-    if (walkResult.wasInterrupted()) continue;
+    if (walkResult.wasInterrupted())
+      continue;
     // The function is just a copy.
     copyOnlyFunctions.push_back(funcOp);
   }
 
   // There are no copy-only functions. So nothing to do.
-  if (copyOnlyFunctions.empty()) return;
+  if (copyOnlyFunctions.empty())
+    return;
 
   // Bufferize the dispatch to create a `linalg.generic` as a copy operation.
   // This can then be used by the backends to tile and distribute.
@@ -125,5 +127,5 @@
   return std::make_unique<BufferizeCopyOnlyDispatchesPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/CleanupBufferAllocViewPass.cpp b/compiler/src/iree/compiler/Codegen/Common/CleanupBufferAllocViewPass.cpp
index c97ecf5..ddb90d8 100644
--- a/compiler/src/iree/compiler/Codegen/Common/CleanupBufferAllocViewPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/CleanupBufferAllocViewPass.cpp

@@ -43,12 +43,12 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>>
 createCleanupBufferAllocViewPass() {
   return std::make_unique<CleanupBufferAllocViewPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/CommonPasses.h b/compiler/src/iree/compiler/Codegen/Common/CommonPasses.h
index f2189b2..76076b8 100644
--- a/compiler/src/iree/compiler/Codegen/Common/CommonPasses.h
+++ b/compiler/src/iree/compiler/Codegen/Common/CommonPasses.h

@@ -84,8 +84,8 @@
 
 /// Creates a pass to decompose tensor.pack and tensor.unpack ops. The pass does
 /// tiling and generalization. See implementation for more details.
-std::unique_ptr<OperationPass<func::FuncOp>> createDecomposePackUnPackOpsPass(
-    bool tileOuterToOne = false);
+std::unique_ptr<OperationPass<func::FuncOp>>
+createDecomposePackUnPackOpsPass(bool tileOuterToOne = false);
 
 /// A pass to eliminate tensor.empty ops that could turn into allocations
 /// during bufferization.
@@ -152,8 +152,9 @@
 std::unique_ptr<OperationPass<func::FuncOp>> createMemrefCopyToLinalgPass();
 
 /// Pass to optimize vector transfer_read and transfer_write.
-std::unique_ptr<OperationPass<func::FuncOp>> createOptimizeVectorTransferPass(
-    bool flatten = false, bool dropUnitDims = true);
+std::unique_ptr<OperationPass<func::FuncOp>>
+createOptimizeVectorTransferPass(bool flatten = false,
+                                 bool dropUnitDims = true);
 
 /// Pad dynamic alloc op to convert them into static one.
 std::unique_ptr<OperationPass<func::FuncOp>> createPadDynamicAlloc();
@@ -173,8 +174,8 @@
 /// `option` details.
 std::unique_ptr<OperationPass<func::FuncOp>>
 createSplitFullPartialTransferPass();
-std::unique_ptr<OperationPass<func::FuncOp>> createSplitFullPartialTransferPass(
-    StringRef option);
+std::unique_ptr<OperationPass<func::FuncOp>>
+createSplitFullPartialTransferPass(StringRef option);
 
 /// Tests iree-hal-preprocess-executables-with behavior.
 std::unique_ptr<OperationPass<void>> createTestExecutablePreprocessingPass();
@@ -240,7 +241,7 @@
 void populateVectorizePadPatterns(RewritePatternSet &patterns,
                                   PatternBenefit baseBenefit = 1);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_COMMON_PASSES_H_
+#endif // IREE_COMPILER_CODEGEN_COMMON_PASSES_H_

diff --git a/compiler/src/iree/compiler/Codegen/Common/ConcretizePadResultShape.cpp b/compiler/src/iree/compiler/Codegen/Common/ConcretizePadResultShape.cpp
index c485a62..a1a1d57 100644
--- a/compiler/src/iree/compiler/Codegen/Common/ConcretizePadResultShape.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/ConcretizePadResultShape.cpp

@@ -30,7 +30,8 @@
                              Location loc) {
   IntegerAttr attr;
   if (Value val = attrOrValue.dyn_cast<Value>()) {
-    if (val.getType().isIndex()) return val;
+    if (val.getType().isIndex())
+      return val;
     matchPattern(val, m_Constant(&attr));
   } else {
     attr = llvm::cast<IntegerAttr>(attrOrValue.get<Attribute>());
@@ -49,7 +50,8 @@
   LogicalResult matchAndRewrite(tensor::PadOp padOp,
                                 PatternRewriter &rewriter) const override {
     // If the result shape is already static, then nothing to do.
-    if (padOp.getResultType().hasStaticShape()) return failure();
+    if (padOp.getResultType().hasStaticShape())
+      return failure();
 
     int rank = padOp.getResultType().getRank();
     SmallVector<int64_t> staticShape;
@@ -57,7 +59,8 @@
 
     auto sourceIfxOp = dyn_cast_or_null<OffsetSizeAndStrideOpInterface>(
         padOp.getSource().getDefiningOp());
-    if (!sourceIfxOp) return failure();
+    if (!sourceIfxOp)
+      return failure();
 
     SmallVector<OpFoldResult> lowPad = padOp.getMixedLowPad();
     SmallVector<OpFoldResult> source = sourceIfxOp.getMixedSizes();
@@ -107,7 +110,8 @@
         affine::canonicalizeMapAndOperands(&map, &valueSizes);
         cstExpr = map.getResult(0).dyn_cast<AffineConstantExpr>();
       }
-      if (!cstExpr) return failure();
+      if (!cstExpr)
+        return failure();
 
       staticShape.push_back(cstExpr.getValue());
     }
@@ -124,7 +128,7 @@
 
 class ConcretizePadResultShapePass final
     : public ConcretizePadResultShapeBase<ConcretizePadResultShapePass> {
- public:
+public:
   ConcretizePadResultShapePass() = default;
   ConcretizePadResultShapePass(const ConcretizePadResultShapePass &pass) =
       default;
@@ -150,7 +154,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>>
 createConcretizePadResultShapePass() {
@@ -178,5 +182,5 @@
   patterns.add<ConcretizePadResultShape>(context);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/ConvertBf16ArithToF32.cpp b/compiler/src/iree/compiler/Codegen/Common/ConvertBf16ArithToF32.cpp
index 50d5a4d..7212b22 100644
--- a/compiler/src/iree/compiler/Codegen/Common/ConvertBf16ArithToF32.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/ConvertBf16ArithToF32.cpp

@@ -46,7 +46,8 @@
                          Location loc) {
   Type eTy = getElementTypeOrSelf(type);
   Type inputETy = getElementTypeOrSelf(inputs[0].getType());
-  if (!llvm::isa<FloatType>(getElementTypeOrSelf(type))) return nullptr;
+  if (!llvm::isa<FloatType>(getElementTypeOrSelf(type)))
+    return nullptr;
 
   if (inputETy.getIntOrFloatBitWidth() > eTy.getIntOrFloatBitWidth()) {
     return builder.create<arith::TruncFOp>(loc, type, inputs[0]);
@@ -65,7 +66,8 @@
   explicit PrimitiveTypeConverter() {
     addConversion([](Type type) { return type; });
     addConversion([&](SourceType type) -> Type {
-      if (!isSourceType(type)) return type;
+      if (!isSourceType(type))
+        return type;
       return getTargetType(type);
     });
     addConversion([&](ComplexType type) {
@@ -112,9 +114,9 @@
   GenericTypeConversionPattern(MLIRContext *context,
                                TypeConverter &typeConverter)
       : ConversionPattern(typeConverter, MatchAnyOpTypeTag(), 0, context) {}
-  LogicalResult matchAndRewrite(
-      Operation *op, ArrayRef<Value> operands,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
     // Convert attributes only if this is a constant-like op.
     // This is because some ops use typed attributes for structural information
     // - like linalg ops using i64 for dimension indices - and if we converted
@@ -161,9 +163,9 @@
           typename OperandToResultWidthLegalityRelation>
 struct ConvertTypeSensitiveArithCastOp : public OpConversionPattern<OpTy> {
   using OpConversionPattern<OpTy>::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      OpTy op, typename OpTy::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(OpTy op, typename OpTy::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto resultType =
         this->getTypeConverter()->convertType(op.getResult().getType());
     auto operandType = adaptor.getIn().getType();
@@ -200,7 +202,7 @@
 class PropagateCastF : public OpRewritePattern<SrcOp> {
   using OpRewritePattern<SrcOp>::OpRewritePattern;
 
- public:
+public:
   LogicalResult matchAndRewrite(SrcOp op,
                                 PatternRewriter &rewriter) const override {
     auto operand = op.getOperand();
@@ -260,13 +262,16 @@
 
     auto checkOp = [&](Operation *op) {
       for (Type type : op->getResultTypes()) {
-        if (!typeConverter.isLegal(type)) return false;
+        if (!typeConverter.isLegal(type))
+          return false;
       }
       for (Type type : op->getOperandTypes()) {
-        if (!typeConverter.isLegal(type)) return false;
+        if (!typeConverter.isLegal(type))
+          return false;
       }
       for (auto &region : op->getRegions()) {
-        if (!typeConverter.isLegal(&region)) return false;
+        if (!typeConverter.isLegal(&region))
+          return false;
       }
       return true;
     };
@@ -305,12 +310,12 @@
   PromoteBF16ToF32Converter typeConverter;
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>>
 createConvertBf16ArithToF32Pass() {
   return std::make_unique<ConvertBf16ArithToF32Pass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/ConvertBf16ToUInt16Buffers.cpp b/compiler/src/iree/compiler/Codegen/Common/ConvertBf16ToUInt16Buffers.cpp
index c20bb5f..0219f17 100644
--- a/compiler/src/iree/compiler/Codegen/Common/ConvertBf16ToUInt16Buffers.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/ConvertBf16ToUInt16Buffers.cpp

@@ -39,14 +39,15 @@
 namespace {
 
 class Bf16EmulationConverter : public TypeConverter {
- public:
+public:
   explicit Bf16EmulationConverter() {
     // Allow unknown types.
     addConversion([](Type ty) -> std::optional<Type> { return ty; });
 
     // Scalar case.
     addConversion([](FloatType ty) -> std::optional<Type> {
-      if (ty.isBF16()) return IntegerType::get(ty.getContext(), 16);
+      if (ty.isBF16())
+        return IntegerType::get(ty.getContext(), 16);
       return ty;
     });
 
@@ -63,9 +64,9 @@
     : OpConversionPattern<IREE::HAL::InterfaceBindingSubspanOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      IREE::HAL::InterfaceBindingSubspanOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::InterfaceBindingSubspanOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Type newResultTy = getTypeConverter()->convertType(op.getType());
     if (!newResultTy)
       return rewriter.notifyMatchFailure(
@@ -87,9 +88,9 @@
 struct ConvertMemRefAlloc final : OpConversionPattern<memref::AllocOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      memref::AllocOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(memref::AllocOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Type newTy = getTypeConverter()->convertType(op.getType());
     if (!newTy)
       return rewriter.notifyMatchFailure(
@@ -109,9 +110,9 @@
   GenericTypeConversionPattern(TypeConverter &typeConverter,
                                MLIRContext *context)
       : ConversionPattern(typeConverter, MatchAnyOpTypeTag(), 0, context) {}
-  LogicalResult matchAndRewrite(
-      Operation *op, ArrayRef<Value> operands,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
     // Convert attributes only if this is a constant-like op.
     // This is because some ops use typed attributes for structural information
     // - like linalg ops using i64 for dimension indices - and if we converted
@@ -128,9 +129,10 @@
           APInt apint = floatAttr.getValue().bitcastToAPInt();
           newAttr = rewriter.getI16IntegerAttr(apint.getZExtValue());
         } else if (auto denseAttr = dyn_cast<DenseFPElementsAttr>(oldAttr)) {
-          newAttr = denseAttr.mapValues(
-              rewriter.getI16Type(),
-              [&](APFloat src) { return src.bitcastToAPInt(); });
+          newAttr =
+              denseAttr.mapValues(rewriter.getI16Type(), [&](APFloat src) {
+                return src.bitcastToAPInt();
+              });
         }
 
         newAttrs.push_back(NamedAttribute(attr.getName(), newAttr));
@@ -168,9 +170,9 @@
 struct ConvertMemRefLoad final : OpConversionPattern<memref::LoadOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      memref::LoadOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(memref::LoadOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Type newResTy = getTypeConverter()->convertType(op.getType());
     if (!newResTy)
       return rewriter.notifyMatchFailure(
@@ -187,9 +189,9 @@
 struct ConvertMemRefStore final : OpConversionPattern<memref::StoreOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      memref::StoreOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(memref::StoreOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Type newTy = getTypeConverter()->convertType(op.getMemRefType());
     if (!newTy)
       return rewriter.notifyMatchFailure(
@@ -267,7 +269,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 //===----------------------------------------------------------------------===//
 // Public interface
@@ -278,5 +280,5 @@
   return std::make_unique<ConvertBf16ToUInt16BuffersPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/ConvertToDestinationPassingStylePass.cpp b/compiler/src/iree/compiler/Codegen/Common/ConvertToDestinationPassingStylePass.cpp
index 8cf0893..938fdc8 100644
--- a/compiler/src/iree/compiler/Codegen/Common/ConvertToDestinationPassingStylePass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/ConvertToDestinationPassingStylePass.cpp

@@ -53,7 +53,7 @@
 class ConvertToDestinationPassingStylePass
     : public ConvertToDestinationPassingStyleBase<
           ConvertToDestinationPassingStylePass> {
- public:
+public:
   ConvertToDestinationPassingStylePass() = default;
   ConvertToDestinationPassingStylePass(bool useWARForCooperativeMatrixCodegen) {
     this->useWARForCooperativeMatrixCodegen = useWARForCooperativeMatrixCodegen;
@@ -69,13 +69,14 @@
   }
   void runOnOperation() override;
 };
-}  // namespace
+} // namespace
 
 /// Returns the subview into the buffer that is supposed to be populated with
 /// the `value` of the `flow.dispatch.tensor.store` operation. This can be used
 /// to compute the results in place.
-static Value getTensorLoadOpForTensorStoreOp(
-    OpBuilder &b, IREE::Flow::DispatchTensorStoreOp storeOp) {
+static Value
+getTensorLoadOpForTensorStoreOp(OpBuilder &b,
+                                IREE::Flow::DispatchTensorStoreOp storeOp) {
   // Clone the offset, size and stride values. They will be CSE-ed later.
   SliceAndDynamicDims clonedVals = cloneOffsetsSizesAndStrides(b, storeOp);
   Value tensorLoadOp = b.create<IREE::Flow::DispatchTensorLoadOp>(
@@ -134,10 +135,10 @@
 /// `flow.dispatch.tensor.store` operation. For each use, gets the tied result
 /// and follow its uses. The traversed uses and thir tied results are returned
 /// in `traversedUses`.
-static IREE::Flow::DispatchTensorStoreOp walkUseToGetDispatchTensorStoreOp(
-    Value value, const BufferizationPlan &plan,
-    SmallVectorImpl<OpOperand *> &traversedUses,
-    llvm::DenseSet<Value> &processed) {
+static IREE::Flow::DispatchTensorStoreOp
+walkUseToGetDispatchTensorStoreOp(Value value, const BufferizationPlan &plan,
+                                  SmallVectorImpl<OpOperand *> &traversedUses,
+                                  llvm::DenseSet<Value> &processed) {
   Operation *user = nullptr;
   while (value.hasOneUse()) {
     processed.insert(value);
@@ -147,7 +148,8 @@
       return storeOp;
     }
     value = getTiedResultForOperand(use, plan);
-    if (!value) return nullptr;
+    if (!value)
+      return nullptr;
     traversedUses.push_back(&use);
   }
   // If the value has a use which is a store, then use that directly.
@@ -183,9 +185,10 @@
 
 /// For an operation whose `resultValue` is the result of the dispatch region,
 /// gets the buffer to use to compute the value in-place.
-static LogicalResult modifyResultToUseStoreBuffer(
-    OpBuilder &b, OpResult resultValue, const BufferizationPlan &plan,
-    llvm::DenseSet<Value> &processed) {
+static LogicalResult
+modifyResultToUseStoreBuffer(OpBuilder &b, OpResult resultValue,
+                             const BufferizationPlan &plan,
+                             llvm::DenseSet<Value> &processed) {
   // Traverse the use-def chains to get the `flow.dispatch.tensor.store`
   // operation keeping track of all the traversed operations. Note that the
   // equivalence set construction should ensure that all operations traversed
@@ -256,7 +259,8 @@
   auto walkResult = funcOp.walk<WalkOrder::PreOrder>(
       [&](tensor::EmptyOp emptyOp) -> WalkResult {
         for (auto result : emptyOp->getResults()) {
-          if (!llvm::isa<RankedTensorType>(result.getType())) continue;
+          if (!llvm::isa<RankedTensorType>(result.getType()))
+            continue;
           if (plan.isInStoreSet(result) && !processed.count(result)) {
             return modifyResultToUseStoreBuffer(b, result, plan, processed);
           }
@@ -287,24 +291,27 @@
 
 // Checks if the `inOperand` can be used in place of the `initOperand`
 // to mimic in-place update behavior for parallel elementwise ops.
-static bool canUseInOperandAsInitOperand(
-    OpOperand *inOperand, OpOperand *initOperand,
-    bool useWARForCooperativeMatrixCodegen = false) {
+static bool
+canUseInOperandAsInitOperand(OpOperand *inOperand, OpOperand *initOperand,
+                             bool useWARForCooperativeMatrixCodegen = false) {
   if (isReadOnly(inOperand->get())) {
     return false;
   }
 
-  if (inOperand->getOwner() != initOperand->getOwner()) return false;
+  if (inOperand->getOwner() != initOperand->getOwner())
+    return false;
 
   auto linalgOp = dyn_cast<linalg::LinalgOp>(inOperand->getOwner());
-  if (!linalgOp) return false;
+  if (!linalgOp)
+    return false;
 
   if (linalgOp.getMatchingIndexingMap(inOperand) !=
       linalgOp.getMatchingIndexingMap(initOperand)) {
     return false;
   }
 
-  if (inOperand->get().getType() != initOperand->get().getType()) return false;
+  if (inOperand->get().getType() != initOperand->get().getType())
+    return false;
 
   if (useWARForCooperativeMatrixCodegen) {
     return true;
@@ -321,16 +328,17 @@
 
 /// Checks if the use of a result of a compute op can be modified
 /// so that it can be moved into a store set.
-static std::optional<OpOperand *> canModifyUseToGetValueIntoStoreSet(
-    BufferizationPlan &plan, OpOperand *use,
-    bool useWARForCooperativeMatrixCodegen) {
+static std::optional<OpOperand *>
+canModifyUseToGetValueIntoStoreSet(BufferizationPlan &plan, OpOperand *use,
+                                   bool useWARForCooperativeMatrixCodegen) {
   assert(!plan.isInStoreSet(use->get()) &&
          "attempting to move a value into a store set, when it is already part "
          "of one");
 
   // Currently only look at use in linalg.generic ops.
   auto genericOpConsumer = dyn_cast<linalg::GenericOp>(use->getOwner());
-  if (!genericOpConsumer) return std::nullopt;
+  if (!genericOpConsumer)
+    return std::nullopt;
 
   // All loops need to be parallel.
   if (genericOpConsumer.getNumLoops() !=
@@ -338,14 +346,17 @@
     return std::nullopt;
   }
 
-  if (genericOpConsumer.isDpsInit(use)) return std::nullopt;
+  if (genericOpConsumer.isDpsInit(use))
+    return std::nullopt;
 
   for (auto [index, initOperand] :
        llvm::enumerate(genericOpConsumer.getDpsInitOperands())) {
     // Output tensor is unused in the body computation.
-    if (genericOpConsumer.payloadUsesValueFromOperand(initOperand)) continue;
+    if (genericOpConsumer.payloadUsesValueFromOperand(initOperand))
+      continue;
     // The result of this operation needs to be in a store set.
-    if (!plan.isInStoreSet(genericOpConsumer->getResult(index))) continue;
+    if (!plan.isInStoreSet(genericOpConsumer->getResult(index)))
+      continue;
     if (!canUseInOperandAsInitOperand(use, initOperand,
                                       useWARForCooperativeMatrixCodegen)) {
       continue;
@@ -437,7 +448,8 @@
         [&](TilingInterface computeOp) -> WalkResult {
       for (auto result : computeOp->getResults()) {
         // If result is already in a store set. Nothing to do.
-        if (plan.isInStoreSet(result)) continue;
+        if (plan.isInStoreSet(result))
+          continue;
 
         // Check if there are any uses that can be modified to reuse the output
         // buffer.
@@ -445,7 +457,8 @@
           std::optional<OpOperand *> reusableOperand =
               canModifyUseToGetValueIntoStoreSet(
                   plan, &use, useWARForCooperativeMatrixCodegen);
-          if (!reusableOperand) continue;
+          if (!reusableOperand)
+            continue;
           if (failed(modifyUseToGetValueIntoStoreSet(rewriter, &use,
                                                      reusableOperand.value())))
             continue;
@@ -479,7 +492,8 @@
       return;
     }
     auto emptyOp = unpackOp.getDest().getDefiningOp<tensor::EmptyOp>();
-    if (!emptyOp) return;
+    if (!emptyOp)
+      return;
 
     OpBuilder::InsertionGuard g(b);
     b.setInsertionPointAfter(emptyOp);
@@ -503,10 +517,13 @@
     Location loc = op.getLoc();
     for (OpOperand *opOperand : op.getDpsInitOperands()) {
       DenseElementsAttr attr;
-      if (!matchPattern(opOperand->get(), m_Constant(&attr))) continue;
-      if (!attr.isSplat()) continue;
+      if (!matchPattern(opOperand->get(), m_Constant(&attr)))
+        continue;
+      if (!attr.isSplat())
+        continue;
       auto type = llvm::dyn_cast<RankedTensorType>(attr.getType());
-      if (!type) continue;
+      if (!type)
+        continue;
       TypedAttr scalarAttr = attr.getValues<TypedAttr>()[0];
 
       modifiedOutput = true;
@@ -584,7 +601,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void ConvertToDestinationPassingStylePass::runOnOperation() {
   func::FuncOp funcOp = getOperation();
@@ -645,5 +662,5 @@
       useWARForCooperativeMatrixCodegen);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/DecomposeAffineOpsPass.cpp b/compiler/src/iree/compiler/Codegen/Common/DecomposeAffineOpsPass.cpp
index c45da7f..42514d3 100644
--- a/compiler/src/iree/compiler/Codegen/Common/DecomposeAffineOpsPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/DecomposeAffineOpsPass.cpp

@@ -18,7 +18,7 @@
   void runOnOperation() override;
 };
 
-}  // namespace
+} // namespace
 
 void DecomposeAffineOpsPass::runOnOperation() {
   IRRewriter rewriter(&getContext());
@@ -32,5 +32,5 @@
 std::unique_ptr<Pass> createDecomposeAffineOpsPass() {
   return std::make_unique<DecomposeAffineOpsPass>();
 }
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/DecomposeConvolutionToLowerDimOps.cpp b/compiler/src/iree/compiler/Codegen/Common/DecomposeConvolutionToLowerDimOps.cpp
index a5d8dc0..911969a 100644
--- a/compiler/src/iree/compiler/Codegen/Common/DecomposeConvolutionToLowerDimOps.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/DecomposeConvolutionToLowerDimOps.cpp

@@ -34,11 +34,11 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<Pass> createDecomposeConvolutionToLowerDimOpsPass() {
   return std::make_unique<DecomposeConvolutionToLowerDimOpsPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/DecomposeLinalgGeneric.cpp b/compiler/src/iree/compiler/Codegen/Common/DecomposeLinalgGeneric.cpp
index c009cc3..a22964a 100644
--- a/compiler/src/iree/compiler/Codegen/Common/DecomposeLinalgGeneric.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/DecomposeLinalgGeneric.cpp

@@ -34,11 +34,11 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<Pass> createDecomposeLinalgGenericPass() {
   return std::make_unique<DecomposeLinalgGenericPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/DecomposePackUnPackOps.cpp b/compiler/src/iree/compiler/Codegen/Common/DecomposePackUnPackOps.cpp
index 56cad06..df2f9d3 100644
--- a/compiler/src/iree/compiler/Codegen/Common/DecomposePackUnPackOps.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/DecomposePackUnPackOps.cpp

@@ -78,10 +78,12 @@
     int numDropDims = 0;
     ArrayRef<int64_t> perm = op.getPermutation();
     for (int idx = inputTy.getRank() - 1; idx >= 0; idx--) {
-      if (idx != perm[idx] || inputTy.getDimSize(idx) != 1) break;
+      if (idx != perm[idx] || inputTy.getDimSize(idx) != 1)
+        break;
       numDropDims++;
     }
-    if (numDropDims == 0) return failure();
+    if (numDropDims == 0)
+      return failure();
 
     Location loc = op.getLoc();
     SmallVector<OpFoldResult> srcMixedSizes =
@@ -127,7 +129,7 @@
 
   void runOnOperation() override;
 };
-}  // namespace
+} // namespace
 
 void DecomposePackUnPackOpsPass::runOnOperation() {
   MLIRContext *ctx = &getContext();
@@ -188,7 +190,8 @@
       FailureOr<scf::SCFTileAndFuseResult> tileAndFuseResult =
           scf::tileConsumerAndFuseProducerGreedilyUsingSCFForOp(
               rewriter, cast<TilingInterface>(op.getOperation()), packOptions);
-      if (failed(tileAndFuseResult)) return signalPassFailure();
+      if (failed(tileAndFuseResult))
+        return signalPassFailure();
       rewriter.replaceOp(op, tileAndFuseResult->replacements[op.getResult()]);
     });
 
@@ -215,7 +218,8 @@
       FailureOr<scf::SCFTilingResult> tilingResult = scf::tileUsingSCFForOp(
           rewriter, cast<TilingInterface>(op.getOperation()),
           unpackTilingOptions);
-      if (failed(tilingResult)) return signalPassFailure();
+      if (failed(tilingResult))
+        return signalPassFailure();
       rewriter.replaceOp(op, tilingResult->replacements);
     });
 
@@ -264,10 +268,10 @@
   }
 }
 
-std::unique_ptr<OperationPass<func::FuncOp>> createDecomposePackUnPackOpsPass(
-    bool tileOuterToOne) {
+std::unique_ptr<OperationPass<func::FuncOp>>
+createDecomposePackUnPackOpsPass(bool tileOuterToOne) {
   return std::make_unique<DecomposePackUnPackOpsPass>(tileOuterToOne);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/EncodingInfo.h b/compiler/src/iree/compiler/Codegen/Common/EncodingInfo.h
index f5bf182..313313e 100644
--- a/compiler/src/iree/compiler/Codegen/Common/EncodingInfo.h
+++ b/compiler/src/iree/compiler/Codegen/Common/EncodingInfo.h

@@ -24,12 +24,12 @@
     IREE::LinalgExt::MaterializeEncodingInfo &encodingInfo,
     ArrayRef<int64_t> shape);
 
-IREE::LinalgExt::MaterializeEncodingInfo chooseEncodingInfoForMatmul(
-    MatmulType type, MatmulOperandRole operandRole,
-    MatmulTileParams tileParams);
+IREE::LinalgExt::MaterializeEncodingInfo
+chooseEncodingInfoForMatmul(MatmulType type, MatmulOperandRole operandRole,
+                            MatmulTileParams tileParams);
 
-IREE::LinalgExt::MaterializeEncodingValueFn getMaterializeEncodingValueFn(
-    IREE::HAL::ExecutableTargetAttr targetAttr);
+IREE::LinalgExt::MaterializeEncodingValueFn
+getMaterializeEncodingValueFn(IREE::HAL::ExecutableTargetAttr targetAttr);
 
 void populateMaterializeEncodingIntoPackUnPackPatterns(
     RewritePatternSet &patterns,
@@ -45,6 +45,6 @@
 chooseDynamicEncodingInfoVMVXMicrokernels(RankedTensorType tensorType,
                                           OpBuilder &builder, Location loc);
 
-}  // namespace iree_compiler
-}  // namespace mlir
-#endif  // IREE_COMPILER_SRC_IREE_COMPILER_CODEGEN_COMMON_ENCODINGINFO_H_
+} // namespace iree_compiler
+} // namespace mlir
+#endif // IREE_COMPILER_SRC_IREE_COMPILER_CODEGEN_COMMON_ENCODINGINFO_H_

diff --git a/compiler/src/iree/compiler/Codegen/Common/EraseHALDescriptorTypeFromMemRef.cpp b/compiler/src/iree/compiler/Codegen/Common/EraseHALDescriptorTypeFromMemRef.cpp
index c22b82e..1ac61e5 100644
--- a/compiler/src/iree/compiler/Codegen/Common/EraseHALDescriptorTypeFromMemRef.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/EraseHALDescriptorTypeFromMemRef.cpp

@@ -38,9 +38,11 @@
     addConversion([](BaseMemRefType memRefType) -> std::optional<Type> {
       // Expect #hal.descriptor_type memory spaces.
       Attribute spaceAttr = memRefType.getMemorySpace();
-      if (!spaceAttr) return std::nullopt;
+      if (!spaceAttr)
+        return std::nullopt;
       auto dtAttr = llvm::dyn_cast<IREE::HAL::DescriptorTypeAttr>(spaceAttr);
-      if (!dtAttr) return std::nullopt;
+      if (!dtAttr)
+        return std::nullopt;
 
       // Erase the #hal.descriptor_type memory space.
       if (auto rankedType = llvm::dyn_cast<MemRefType>(memRefType)) {
@@ -81,9 +83,9 @@
   EraseMemorySpacePattern(MLIRContext *context, TypeConverter &converter)
       : ConversionPattern(converter, MatchAnyOpTypeTag(), 1, context) {}
 
-  LogicalResult matchAndRewrite(
-      Operation *op, ArrayRef<Value> operands,
-      ConversionPatternRewriter &rewriter) const override;
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override;
 };
 
 LogicalResult EraseMemorySpacePattern::matchAndRewrite(
@@ -123,7 +125,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 LogicalResult eraseHALDescriptorTypeFromMemRef(func::FuncOp funcOp) {
   MLIRContext *context = funcOp.getContext();
@@ -142,5 +144,5 @@
   return std::make_unique<EraseHALDescriptorTypeFromMemRefPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/ExtractAddressComputation.cpp b/compiler/src/iree/compiler/Codegen/Common/ExtractAddressComputation.cpp
index d299349..aa7ced7 100644
--- a/compiler/src/iree/compiler/Codegen/Common/ExtractAddressComputation.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/ExtractAddressComputation.cpp

@@ -71,8 +71,8 @@
                                           storeOp.getNontemporal());
 }
 
-SmallVector<OpFoldResult> getStoreOpViewSizeForEachDim(
-    RewriterBase &rewriter, memref::StoreOp storeOp) {
+SmallVector<OpFoldResult>
+getStoreOpViewSizeForEachDim(RewriterBase &rewriter, memref::StoreOp storeOp) {
   MemRefType ldTy = storeOp.getMemRefType();
   unsigned loadRank = ldTy.getRank();
   return SmallVector<OpFoldResult>(loadRank, rewriter.getIndexAttr(1));
@@ -101,7 +101,7 @@
     : public ExtractAddressComputationBase<ExtractAddressComputationPass> {
   void runOnOperation() override;
 };
-}  // namespace
+} // namespace
 
 void ExtractAddressComputationPass::runOnOperation() {
   RewritePatternSet patterns(&getContext());
@@ -115,5 +115,5 @@
 std::unique_ptr<Pass> createExtractAddressComputationPass() {
   return std::make_unique<ExtractAddressComputationPass>();
 }
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/ExtractAddressComputation.h b/compiler/src/iree/compiler/Codegen/Common/ExtractAddressComputation.h
index b98da14..69228ae 100644
--- a/compiler/src/iree/compiler/Codegen/Common/ExtractAddressComputation.h
+++ b/compiler/src/iree/compiler/Codegen/Common/ExtractAddressComputation.h

@@ -42,7 +42,8 @@
     auto ldTy = srcMemRef.getType().cast<MemRefType>();
     unsigned storeLoadRank = ldTy.getRank();
     // Don't waste compile time if there is nothing to rewrite.
-    if (storeLoadRank == 0) return failure();
+    if (storeLoadRank == 0)
+      return failure();
 
     // If our load already has only zeros as indices there is nothing
     // to do.
@@ -82,6 +83,6 @@
 /// before hand. In other words, the address computation is not part of
 /// the memory access anymore.
 void populateExtractAddressComputationPatterns(RewritePatternSet &patterns);
-}  // namespace iree_compiler
-}  // namespace mlir
-#endif  // IREE_COMPILER_CODEGEN_COMMON_EXTRACTADDRESSCOMPUTATION_H_
+} // namespace iree_compiler
+} // namespace mlir
+#endif // IREE_COMPILER_CODEGEN_COMMON_EXTRACTADDRESSCOMPUTATION_H_

diff --git a/compiler/src/iree/compiler/Codegen/Common/FlattenMemRefSubspanPass.cpp b/compiler/src/iree/compiler/Codegen/Common/FlattenMemRefSubspanPass.cpp
index 98795a6..069d2ef 100644
--- a/compiler/src/iree/compiler/Codegen/Common/FlattenMemRefSubspanPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/FlattenMemRefSubspanPass.cpp

@@ -146,11 +146,12 @@
 struct FlattenAlloc final : public OpConversionPattern<AllocOpTy> {
   using OpConversionPattern<AllocOpTy>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      AllocOpTy allocOp, typename AllocOpTy::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(AllocOpTy allocOp, typename AllocOpTy::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto oldType = llvm::dyn_cast<MemRefType>(allocOp.getType());
-    if (!oldType || !oldType.getLayout().isIdentity()) return failure();
+    if (!oldType || !oldType.getLayout().isIdentity())
+      return failure();
 
     Value dynamicDim = createTotalElementCountValue(
         oldType, allocOp.getDynamicSizes(), allocOp.getLoc(), rewriter);
@@ -168,7 +169,8 @@
   using OpConversionPattern::OpConversionPattern;
 
   static Attribute flattenAttribute(Attribute value, ShapedType newType) {
-    if (!value) return value;
+    if (!value)
+      return value;
     if (auto splatAttr = llvm::dyn_cast<SplatElementsAttr>(value)) {
       return splatAttr.reshape(newType);
     } else if (auto denseAttr = llvm::dyn_cast<DenseElementsAttr>(value)) {
@@ -177,11 +179,12 @@
     return {};
   }
 
-  LogicalResult matchAndRewrite(
-      memref::GlobalOp globalOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(memref::GlobalOp globalOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto oldType = llvm::dyn_cast<MemRefType>(globalOp.getType());
-    if (!oldType || !oldType.getLayout().isIdentity()) return failure();
+    if (!oldType || !oldType.getLayout().isIdentity())
+      return failure();
 
     auto tensorType = RankedTensorType::get({oldType.getNumElements()},
                                             oldType.getElementType());
@@ -203,15 +206,17 @@
     : public OpConversionPattern<memref::GetGlobalOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      memref::GetGlobalOp getOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(memref::GetGlobalOp getOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto oldType = llvm::dyn_cast<MemRefType>(getOp.getType());
-    if (!oldType || !oldType.getLayout().isIdentity()) return failure();
+    if (!oldType || !oldType.getLayout().isIdentity())
+      return failure();
 
     auto globalOp = dyn_cast_or_null<memref::GlobalOp>(
         SymbolTable::lookupNearestSymbolFrom(getOp, getOp.getNameAttr()));
-    if (!globalOp) return failure();
+    if (!globalOp)
+      return failure();
 
     auto loadedValue = rewriter.createOrFold<memref::GetGlobalOp>(
         getOp.getLoc(), globalOp.getType(), getOp.getNameAttr());
@@ -228,13 +233,15 @@
     : public OpConversionPattern<IREE::HAL::InterfaceBindingSubspanOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      IREE::HAL::InterfaceBindingSubspanOp subspanOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::InterfaceBindingSubspanOp subspanOp,
+                  OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto oldType = llvm::dyn_cast<MemRefType>(subspanOp.getType());
     // IREE subspan ops only use memref types with the default identity
     // layout maps.
-    if (!oldType) return failure();
+    if (!oldType)
+      return failure();
 
     OpFoldResult linearShape;
     if (oldType.hasStaticShape()) {
@@ -301,9 +308,9 @@
     : public OpConversionPattern<memref::ReinterpretCastOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      memref::ReinterpretCastOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(memref::ReinterpretCastOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (op.getResultRank() != 0) {
       return rewriter.notifyMatchFailure(
           op, "unhandled op with non-zero rank memref return type");
@@ -409,9 +416,9 @@
 struct FlattenSubView final : public OpConversionPattern<memref::SubViewOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      memref::SubViewOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(memref::SubViewOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (!isRankZeroOrOneMemRef(adaptor.getSource().getType())) {
       return rewriter.notifyMatchFailure(
           op, "expected converted memref of rank <= 1");
@@ -440,9 +447,9 @@
 struct LinearizeLoadIndices final : public OpConversionPattern<memref::LoadOp> {
   using OpConversionPattern<memref::LoadOp>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      memref::LoadOp loadOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(memref::LoadOp loadOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (!isRankZeroOrOneMemRef(adaptor.getMemref().getType())) {
       return rewriter.notifyMatchFailure(
           loadOp, "expected converted memref of rank <= 1");
@@ -465,9 +472,9 @@
     : public OpConversionPattern<gpu::SubgroupMmaLoadMatrixOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      gpu::SubgroupMmaLoadMatrixOp loadOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(gpu::SubgroupMmaLoadMatrixOp loadOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (!isRankZeroOrOneMemRef(adaptor.getSrcMemref().getType())) {
       return rewriter.notifyMatchFailure(
           loadOp, "expected converted memref of rank <= 1");
@@ -491,9 +498,9 @@
     : public OpConversionPattern<memref::StoreOp> {
   using OpConversionPattern<memref::StoreOp>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      memref::StoreOp storeOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(memref::StoreOp storeOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (!isRankZeroOrOneMemRef(adaptor.getMemref().getType())) {
       return rewriter.notifyMatchFailure(
           storeOp, "expected converted memref of rank <= 1");
@@ -516,9 +523,9 @@
     : public OpConversionPattern<gpu::SubgroupMmaStoreMatrixOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      gpu::SubgroupMmaStoreMatrixOp storeOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(gpu::SubgroupMmaStoreMatrixOp storeOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (!isRankZeroOrOneMemRef(adaptor.getDstMemref().getType())) {
       return rewriter.notifyMatchFailure(
           storeOp, "expected converted memref of rank <= 1");
@@ -543,14 +550,13 @@
     : public OpConversionPattern<vector::TransferReadOp> {
   using OpConversionPattern<vector::TransferReadOp>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      vector::TransferReadOp transferReadOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(vector::TransferReadOp transferReadOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (!transferReadOp.getPermutationMap().isMinorIdentity()) {
       return rewriter.notifyMatchFailure(
-          transferReadOp,
-          "cannot convert op with non-minor identity "
-          "map");
+          transferReadOp, "cannot convert op with non-minor identity "
+                          "map");
     }
     if (!isRankZeroOrOneMemRef(adaptor.getSource().getType())) {
       return rewriter.notifyMatchFailure(
@@ -577,14 +583,13 @@
     : public OpConversionPattern<vector::TransferWriteOp> {
   using OpConversionPattern<vector::TransferWriteOp>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      vector::TransferWriteOp transferWriteOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(vector::TransferWriteOp transferWriteOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (!transferWriteOp.getPermutationMap().isMinorIdentity()) {
       return rewriter.notifyMatchFailure(
-          transferWriteOp,
-          "cannot convert op with non-minor identity "
-          "map");
+          transferWriteOp, "cannot convert op with non-minor identity "
+                           "map");
     }
     if (!isRankZeroOrOneMemRef(adaptor.getSource().getType())) {
       return rewriter.notifyMatchFailure(
@@ -609,9 +614,9 @@
 struct FlattenDealloc final : public OpConversionPattern<memref::DeallocOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      memref::DeallocOp deallocOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(memref::DeallocOp deallocOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (!isRankZeroOrOneMemRef(adaptor.getMemref().getType())) {
       return rewriter.notifyMatchFailure(
           deallocOp, "expected converted memref of rank <= 1");
@@ -627,14 +632,16 @@
     : public OpConversionPattern<UnrealizedConversionCastOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      UnrealizedConversionCastOp castOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
-    if (castOp->getNumOperands() != 1) return failure();
+  LogicalResult
+  matchAndRewrite(UnrealizedConversionCastOp castOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    if (castOp->getNumOperands() != 1)
+      return failure();
 
     Value input = adaptor.getOperands().front();
     // We only want to handle cases where the cast op handles memref types.
-    if (!llvm::isa<BaseMemRefType>(input.getType())) return failure();
+    if (!llvm::isa<BaseMemRefType>(input.getType()))
+      return failure();
 
     if (!isRankZeroOrOneMemRef(input.getType())) {
       return rewriter.notifyMatchFailure(
@@ -656,9 +663,9 @@
 struct FoldMemRefReshape final : public OpConversionPattern<ReshapeOpTy> {
   using OpConversionPattern<ReshapeOpTy>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      ReshapeOpTy op, typename ReshapeOpTy::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(ReshapeOpTy op, typename ReshapeOpTy::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto typeConverter = OpConversionPattern<ReshapeOpTy>::typeConverter;
     if (!isRankZeroOrOneMemRef(adaptor.getSrc().getType())) {
       return rewriter.notifyMatchFailure(
@@ -672,7 +679,8 @@
     Type newSourceType = adaptor.getSrc().getType();
     Type neededResultType =
         typeConverter->convertType(op.getResult().getType());
-    if (!neededResultType) return failure();
+    if (!neededResultType)
+      return failure();
     if (newSourceType == neededResultType) {
       rewriter.replaceOp(op, adaptor.getSrc());
       return success();
@@ -694,10 +702,12 @@
 /// Note that this should be kept consistent with how the byte offset was
 /// calculated in the subspan ops!
 std::optional<int64_t> getNumBytes(Type type) {
-  if (type.isIntOrFloat()) return IREE::Util::getRoundedElementByteWidth(type);
+  if (type.isIntOrFloat())
+    return IREE::Util::getRoundedElementByteWidth(type);
   if (auto vectorType = llvm::dyn_cast<VectorType>(type)) {
     auto elementBytes = getNumBytes(vectorType.getElementType());
-    if (!elementBytes) return std::nullopt;
+    if (!elementBytes)
+      return std::nullopt;
     return elementBytes.value() * vectorType.getNumElements();
   }
   return std::nullopt;
@@ -706,7 +716,7 @@
 /// Erase alignment hints.
 struct RemoveAssumeAlignOp
     : public OpRewritePattern<memref::AssumeAlignmentOp> {
- public:
+public:
   using OpRewritePattern<memref::AssumeAlignmentOp>::OpRewritePattern;
 
   LogicalResult matchAndRewrite(memref::AssumeAlignmentOp op,
@@ -770,7 +780,8 @@
         [](MemRefType type) -> std::optional<Type> {
           // 0-D MemRef types can be used to represent raw pointers for
           // micro-kernel ABI purposes. Specially allow it.
-          if (isRankZeroMemRef(type)) return type;
+          if (isRankZeroMemRef(type))
+            return type;
 
           // Fall back to the default conversion flow.
           return std::nullopt;
@@ -785,7 +796,8 @@
     internalTypeConverter.addConversion(
         [](MemRefType type) -> std::optional<Type> {
           // 0-D or 1-D MemRef types are okay.
-          if (isRankZeroOrOneMemRef(type)) return type;
+          if (isRankZeroOrOneMemRef(type))
+            return type;
 
           // Fall back to the default conversion flow.
           return std::nullopt;
@@ -853,7 +865,8 @@
         });
     target.addDynamicallyLegalOp<UnrealizedConversionCastOp>(
         [](UnrealizedConversionCastOp castOp) {
-          if (castOp->getNumOperands() != 1) return false;
+          if (castOp->getNumOperands() != 1)
+            return false;
 
           Type inputType = castOp->getOperandTypes().front();
           return !llvm::isa<BaseMemRefType>(inputType) ||
@@ -898,11 +911,11 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<ModuleOp>> createFlattenMemRefSubspanPass() {
   return std::make_unique<FlattenMemRefSubspanPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/FoldAffineMinInDistributedLoops.cpp b/compiler/src/iree/compiler/Codegen/Common/FoldAffineMinInDistributedLoops.cpp
index 771ea01..c26a599 100644
--- a/compiler/src/iree/compiler/Codegen/Common/FoldAffineMinInDistributedLoops.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/FoldAffineMinInDistributedLoops.cpp

@@ -40,7 +40,8 @@
 /// attributes.
 static Value getAsValue(OpFoldResult attrOrValue, OpBuilder &builder,
                         Location loc) {
-  if (Value val = attrOrValue.dyn_cast<Value>()) return val;
+  if (Value val = attrOrValue.dyn_cast<Value>())
+    return val;
   auto attr = llvm::cast<IntegerAttr>(attrOrValue.get<Attribute>());
   return builder.create<arith::ConstantIndexOp>(loc, attr.getInt());
 }
@@ -58,14 +59,15 @@
 }
 #endif
 
-static FailureOr<affine::AffineApplyOp> canonicalizeMinMaxOp(
-    RewriterBase &rewriter, Operation *op,
-    affine::FlatAffineValueConstraints constraints) {
+static FailureOr<affine::AffineApplyOp>
+canonicalizeMinMaxOp(RewriterBase &rewriter, Operation *op,
+                     affine::FlatAffineValueConstraints constraints) {
   RewriterBase::InsertionGuard guard(rewriter);
   rewriter.setInsertionPoint(op);
   FailureOr<affine::AffineValueMap> simplified =
       mlir::affine::simplifyConstrainedMinMaxOp(op, std::move(constraints));
-  if (failed(simplified)) return failure();
+  if (failed(simplified))
+    return failure();
   return rewriter.replaceOpWithNewOp<affine::AffineApplyOp>(
       op, simplified->getAffineMap(), simplified->getOperands());
 }
@@ -96,18 +98,22 @@
     auto loopMatcher = [&](Value iv, OpFoldResult &lb, OpFoldResult &ub,
                            OpFoldResult &step) {
       scf::ForOp forOp = scf::getForInductionVarOwner(iv);
-      if (!forOp) return failure();
+      if (!forOp)
+        return failure();
 
       auto loopInfo = isTiledAndDistributedLoop(forOp);
-      if (!loopInfo) return failure();
+      if (!loopInfo)
+        return failure();
       LLVM_DEBUG(llvm::dbgs() << *loopInfo);
 
       std::optional<int64_t> untiledStep =
           getConstantIntValue(loopInfo->untiledStep);
       // For IREE right now the original untiled loop should have step 1..
-      if (!untiledStep || *untiledStep != 1) return failure();
+      if (!untiledStep || *untiledStep != 1)
+        return failure();
       // ..and we tile according to some static tile sizes for processors.
-      if (!loopInfo->tileSize) return failure();
+      if (!loopInfo->tileSize)
+        return failure();
 
       lb = loopInfo->untiledLowerBound;
       ub = loopInfo->untiledUpperBound;
@@ -135,12 +141,15 @@
     // Find all iteration variables among `minOp`'s operands add constrain them.
     for (Value operand : minOp->getOperands()) {
       // Skip duplicate ids.
-      if (!allIds.insert(operand).second) continue;
+      if (!allIds.insert(operand).second)
+        continue;
       auto idOp = operand.getDefiningOp<IREE::HAL::InterfaceWorkgroupIDOp>();
-      if (!idOp) continue;
+      if (!idOp)
+        continue;
       // Can't infer the range when workroupCount is unknown.
       unsigned index = idOp.getDimension().getZExtValue();
-      if (index >= numWorkgroup.size()) return failure();
+      if (index >= numWorkgroup.size())
+        return failure();
       constraints.appendDimVar({idOp});
       constraints.addBound(presburger::BoundType::LB, idOp, 0);
       constraints.addBound(presburger::BoundType::UB, idOp,
@@ -149,7 +158,7 @@
     return canonicalizeMinMaxOp(rewriter, minOp, constraints);
   }
 
- private:
+private:
   ArrayRef<int64_t> numWorkgroup;
 };
 
@@ -170,7 +179,7 @@
     }
   }
 };
-}  // namespace
+} // namespace
 
 void populateFoldAffineMinInDistributedLoopsPatterns(
     RewritePatternSet &patterns, ArrayRef<int64_t> numWorkgroups) {
@@ -187,5 +196,5 @@
   return std::make_unique<FoldAffineMinInDistributedLoopsPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/FoldTensorExtractOpPass.cpp b/compiler/src/iree/compiler/Codegen/Common/FoldTensorExtractOpPass.cpp
index e5d530d..23e51a9 100644
--- a/compiler/src/iree/compiler/Codegen/Common/FoldTensorExtractOpPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/FoldTensorExtractOpPass.cpp

@@ -18,7 +18,7 @@
 
 namespace {
 #include "iree/compiler/Codegen/Common/FoldTensorExtractOp.cpp.inc"
-}  // namespace
+} // namespace
 
 namespace {
 /// Upstream canonicalization passes fold
@@ -53,7 +53,7 @@
     : public FoldTensorExtractOpBase<FoldTensorExtractOpPass> {
   void runOnOperation() override;
 };
-}  // namespace
+} // namespace
 
 void FoldTensorExtractOpPass::runOnOperation() {
   RewritePatternSet patterns(&getContext());
@@ -66,5 +66,5 @@
   return std::make_unique<FoldTensorExtractOpPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/ForOpCanonicalizationPass.cpp b/compiler/src/iree/compiler/Codegen/Common/ForOpCanonicalizationPass.cpp
index 475fe56..37d4172 100644
--- a/compiler/src/iree/compiler/Codegen/Common/ForOpCanonicalizationPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/ForOpCanonicalizationPass.cpp

@@ -54,8 +54,8 @@
     : public OpRewritePattern<scf::ForOp> {
   using OpRewritePattern<scf::ForOp>::OpRewritePattern;
 
-  Value FoldCarryDep(scf::ForOp forOp, Operation* ivUser,
-                     Operation* ivDef) const {
+  Value FoldCarryDep(scf::ForOp forOp, Operation *ivUser,
+                     Operation *ivDef) const {
     if (auto shapeCast = dyn_cast<vector::ShapeCastOp>(ivUser)) {
       if (auto souceOp = dyn_cast<vector::ShapeCastOp>(ivDef)) {
         if (shapeCast.getType() == souceOp.getSource().getType()) {
@@ -80,8 +80,8 @@
     return Value();
   }
 
-  void transferBody(Block* source, Block* dest, ArrayRef<Value> results,
-                    PatternRewriter& rewriter) const {
+  void transferBody(Block *source, Block *dest, ArrayRef<Value> results,
+                    PatternRewriter &rewriter) const {
     // Move all operations to the destination block.
     rewriter.mergeBlocks(source, dest, dest->getArguments());
     // Replace the yield op by one that returns only the used values.
@@ -91,22 +91,24 @@
   }
 
   LogicalResult matchAndRewrite(scf::ForOp forOp,
-                                PatternRewriter& rewriter) const override {
+                                PatternRewriter &rewriter) const override {
     SmallVector<unsigned, 8> iteratorFolded;
-    SmallVector<Operation*, 8> resultOps;
+    SmallVector<Operation *, 8> resultOps;
     auto terminator = cast<scf::YieldOp>(forOp.getBody()->getTerminator());
     auto returnValues = llvm::to_vector<8>(terminator.getOperands());
     auto initArgs = llvm::to_vector<8>(forOp.getIterOperands());
     for (auto [index, iterArg] : llvm::enumerate(forOp.getRegionIterArgs())) {
-      if (!iterArg.hasOneUse()) continue;
-      Operation* op = iterArg.use_begin()->getOwner();
+      if (!iterArg.hasOneUse())
+        continue;
+      Operation *op = iterArg.use_begin()->getOwner();
       if (!isa<vector::ShapeCastOp, vector::ExtractOp,
                UnrealizedConversionCastOp>(op)) {
         continue;
       }
-      Operation* returnValDef = returnValues[index].getDefiningOp();
+      Operation *returnValDef = returnValues[index].getDefiningOp();
       Value newReturn = FoldCarryDep(forOp, op, returnValDef);
-      if (!newReturn) continue;
+      if (!newReturn)
+        continue;
       iteratorFolded.push_back(index);
       resultOps.push_back(returnValDef);
       returnValues[index] = newReturn;
@@ -115,7 +117,8 @@
       mapping.map(iterArg, initArgs[index]);
       initArgs[index] = rewriter.clone(*op, mapping)->getResult(0);
     }
-    if (iteratorFolded.empty()) return failure();
+    if (iteratorFolded.empty())
+      return failure();
     auto newLoop = rewriter.create<scf::ForOp>(
         forOp.getLoc(), forOp.getLowerBound(), forOp.getUpperBound(),
         forOp.getStep(), initArgs);
@@ -129,7 +132,7 @@
       mapping.map(returnValues[iter], newLoop.getResult(iter));
       repResults[index] =
           rewriter.clone(*resultOps[index], mapping)->getResult(0);
-      Operation* oldOp =
+      Operation *oldOp =
           newLoop.getRegionIterArgs()[index].use_begin()->getOwner();
       assert(oldOp->getNumResults() == 1 && "expected single result");
       rewriter.replaceAllUsesWith(oldOp->getResult(0),
@@ -151,15 +154,17 @@
   using OpRewritePattern<scf::ForOp>::OpRewritePattern;
 
   LogicalResult matchAndRewrite(scf::ForOp forOp,
-                                PatternRewriter& rewriter) const override {
+                                PatternRewriter &rewriter) const override {
     VectorType v8f16Type = VectorType::get({8}, rewriter.getF16Type());
     VectorType v4f32Type = VectorType::get({4}, rewriter.getF32Type());
 
     SmallVector<unsigned, 8> ivIndices;
     for (auto [index, iterArg] : llvm::enumerate(forOp.getRegionIterArgs())) {
-      if (iterArg.getType() == v8f16Type) ivIndices.push_back(index);
+      if (iterArg.getType() == v8f16Type)
+        ivIndices.push_back(index);
     }
-    if (ivIndices.empty()) return failure();
+    if (ivIndices.empty())
+      return failure();
 
     // Bit cast all init values from v8f16 to v4f32.
     auto ivInitValues = llvm::to_vector<8>(forOp.getIterOperands());
@@ -189,7 +194,7 @@
       // Replace all uses of the new induction variable with a bitcast. We need
       // to exclude the bitcast op itself given it also uses the induction
       // variable.
-      SmallPtrSet<Operation*, 1> exceptions{bitcastOp};
+      SmallPtrSet<Operation *, 1> exceptions{bitcastOp};
       newIv.replaceAllUsesExcept(bitcastOp, exceptions);
     }
 
@@ -206,7 +211,8 @@
     yieldOp->setOperands(ivRetValues);
 
     SmallVector<Value, 8> forRetValues;
-    for (Value result : newLoop.getResults()) forRetValues.push_back(result);
+    for (Value result : newLoop.getResults())
+      forRetValues.push_back(result);
 
     // Bit cast return values to the old type to fix for op uses.
     rewriter.setInsertionPointAfter(newLoop);
@@ -223,7 +229,7 @@
 
 struct ForOpCanonicalizationPass
     : public ForOpCanonicalizationBase<ForOpCanonicalizationPass> {
-  void getDependentDialects(DialectRegistry& registry) const override {
+  void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<scf::SCFDialect, vector::VectorDialect>();
   }
 
@@ -238,11 +244,11 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>> createForOpCanonicalizationPass() {
   return std::make_unique<ForOpCanonicalizationPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/FuseTensorPadWithConsumer.cpp b/compiler/src/iree/compiler/Codegen/Common/FuseTensorPadWithConsumer.cpp
index e1c2df4..9f9b44f 100644
--- a/compiler/src/iree/compiler/Codegen/Common/FuseTensorPadWithConsumer.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/FuseTensorPadWithConsumer.cpp

@@ -30,12 +30,12 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>>
 createFuseTensorPadWithConsumerPass() {
   return std::make_unique<FuseTensorPadWithConsumerPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/CommonGPUPasses.h b/compiler/src/iree/compiler/Codegen/Common/GPU/CommonGPUPasses.h
index efa6f03..7c6f4d0 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/CommonGPUPasses.h
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/CommonGPUPasses.h

@@ -41,9 +41,10 @@
   TransposeOpPattern = 1,
 };
 
-FailureOr<scf::ForOp> pipelineSharedMemoryCopy(
-    RewriterBase &rewriter, scf::ForOp forOp,
-    PipeliningSchedulingStrategy startegy, bool peelEpilogue, int64_t depth);
+FailureOr<scf::ForOp>
+pipelineSharedMemoryCopy(RewriterBase &rewriter, scf::ForOp forOp,
+                         PipeliningSchedulingStrategy startegy,
+                         bool peelEpilogue, int64_t depth);
 
 /// Tiles Linalg ops in the given `funcOp` along reduction dimensions to serial
 /// loops without distribution. If `fuseInputProducer` is true, input producers
@@ -76,14 +77,14 @@
 createGPUDistributeSharedMemoryCopy();
 
 /// Apply multi-buffering transformation.
-std::unique_ptr<OperationPass<func::FuncOp>> createGPUMultiBuffering(
-    unsigned numBuffers = 5);
+std::unique_ptr<OperationPass<func::FuncOp>>
+createGPUMultiBuffering(unsigned numBuffers = 5);
 
 /// Apply software pipelining.
-std::unique_ptr<OperationPass<func::FuncOp>> createGPUPipeliningPass(
-    bool epiloguePeeling = true, unsigned depth = 1,
-    PipeliningSchedulingStrategy schedule =
-        PipeliningSchedulingStrategy::loadGlobalStage0);
+std::unique_ptr<OperationPass<func::FuncOp>>
+createGPUPipeliningPass(bool epiloguePeeling = true, unsigned depth = 1,
+                        PipeliningSchedulingStrategy schedule =
+                            PipeliningSchedulingStrategy::loadGlobalStage0);
 
 /// Apply transformation to reduce the number of bank conflicts when accessing
 /// shared memory by padding fastest moving dimension with the specified size.
@@ -92,20 +93,21 @@
 
 // Creates a pass to tile reduction dimensions and create allocations for some
 // tensor values to use GPU shared memory.
-std::unique_ptr<OperationPass<func::FuncOp>> createGPUTensorAlloc(
-    GPUPromoteSharedMemPattern promoteSharedMemPattern =
-        GPUPromoteSharedMemPattern::ContractionOpPattern);
+std::unique_ptr<OperationPass<func::FuncOp>>
+createGPUTensorAlloc(GPUPromoteSharedMemPattern promoteSharedMemPattern =
+                         GPUPromoteSharedMemPattern::ContractionOpPattern);
 
 // Creates a pass to tile tensor (linalg) ops within a GPU workgroup.
-std::unique_ptr<OperationPass<func::FuncOp>> createGPUTensorTile(
-    bool distributeToWarp = false);
+std::unique_ptr<OperationPass<func::FuncOp>>
+createGPUTensorTile(bool distributeToWarp = false);
 
 /// Tile reductions and generate serial loops around reductions.
 std::unique_ptr<OperationPass<func::FuncOp>> createGPUTileReductionPass();
 
 /// Convert Linalg ops to Vector.
-std::unique_ptr<OperationPass<func::FuncOp>> createGPUVectorizationPass(
-    bool generateContract = true, int64_t maxVectorSize = 4096);
+std::unique_ptr<OperationPass<func::FuncOp>>
+createGPUVectorizationPass(bool generateContract = true,
+                           int64_t maxVectorSize = 4096);
 
 // Distributes vector ops to all threads/warps in a GPU workgroup.
 // `getWarpSize` is for deciding the warp size to use; it takes the
@@ -116,10 +118,10 @@
     std::function<int(func::FuncOp)> getWarpSize = nullptr);
 
 /// Converts vector ops to gpu dialect.
-std::unique_ptr<OperationPass<func::FuncOp>> createWorkGroupSwizzle(
-    unsigned swizzleLogTile = 0);
+std::unique_ptr<OperationPass<func::FuncOp>>
+createWorkGroupSwizzle(unsigned swizzleLogTile = 0);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_COMMON_GPU_COMMONGPUASSES_H_
+#endif // IREE_COMPILER_CODEGEN_COMMON_GPU_COMMONGPUASSES_H_

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUCheckResourceUsage.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUCheckResourceUsage.cpp
index ee9cc88..af419b5 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUCheckResourceUsage.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUCheckResourceUsage.cpp

@@ -18,7 +18,7 @@
 namespace {
 class GPUCheckResourceUsagePass final
     : public GPUCheckResourceUsageBase<GPUCheckResourceUsagePass> {
- public:
+public:
   explicit GPUCheckResourceUsagePass(
       std::function<unsigned(func::FuncOp)> getSharedMemoryLimit,
       std::function<unsigned(func::FuncOp)> getIndexBitwidth)
@@ -27,11 +27,11 @@
 
   void runOnOperation() override;
 
- private:
+private:
   std::function<unsigned(func::FuncOp)> getSharedMemoryLimit;
   std::function<unsigned(func::FuncOp)> getIndexBitwidth;
 };
-}  // namespace
+} // namespace
 
 static unsigned getDatalayoutIndexBitwidth(func::FuncOp func) {
   auto mod = func->getParentOfType<ModuleOp>();
@@ -39,12 +39,13 @@
   return options.getIndexBitwidth();
 }
 
-static int shapedTypeStaticSize(
-    memref::AllocOp allocOp, ShapedType shapedType,
-    std::function<unsigned(func::FuncOp)> getIndexBitwidth) {
+static int
+shapedTypeStaticSize(memref::AllocOp allocOp, ShapedType shapedType,
+                     std::function<unsigned(func::FuncOp)> getIndexBitwidth) {
   int allocSize = 1;
   for (auto dimSize : shapedType.getShape()) {
-    if (ShapedType::isDynamic(dimSize)) continue;
+    if (ShapedType::isDynamic(dimSize))
+      continue;
     allocSize *= dimSize;
   }
   if (auto elementType =
@@ -65,19 +66,22 @@
 
 /// Returns success if the total shared memory allocation size is less than the
 /// limit set by limit.
-static LogicalResult checkGPUAllocationSize(
-    func::FuncOp funcOp, unsigned limit,
-    std::function<unsigned(func::FuncOp)> getIndexBitwidth) {
-  if (funcOp.getBody().empty()) return success();
+static LogicalResult
+checkGPUAllocationSize(func::FuncOp funcOp, unsigned limit,
+                       std::function<unsigned(func::FuncOp)> getIndexBitwidth) {
+  if (funcOp.getBody().empty())
+    return success();
 
   SmallVector<memref::AllocOp> allocOps;
   funcOp.walk([&](memref::AllocOp allocOp) { allocOps.push_back(allocOp); });
-  if (allocOps.empty()) return success();
+  if (allocOps.empty())
+    return success();
 
   int cumSize = 0;
   for (auto allocOp : allocOps) {
     auto allocType = llvm::cast<MemRefType>(allocOp.getType());
-    if (!hasSharedMemoryAddressSpace(allocType)) continue;
+    if (!hasSharedMemoryAddressSpace(allocType))
+      continue;
 
     if (!allocOp.getDynamicSizes().empty()) {
       return allocOp.emitOpError(
@@ -122,5 +126,5 @@
                                                      getIndexBitwidth);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUDistribute.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUDistribute.cpp
index 7be6fc9..26947a3 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUDistribute.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUDistribute.cpp

@@ -20,12 +20,13 @@
 
 namespace {
 struct GPUDistributePass : public GPUDistributeBase<GPUDistributePass> {
-  void getDependentDialects(DialectRegistry& registry) const override {
+  void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<affine::AffineDialect, gpu::GPUDialect>();
   }
   void runOnOperation() override {
     auto funcOp = getOperation();
-    if (!isEntryPoint(funcOp)) return;
+    if (!isEntryPoint(funcOp))
+      return;
 
     auto workgroupSize = llvm::map_to_vector(
         getEntryPoint(funcOp)->getWorkgroupSize().value(),
@@ -37,14 +38,15 @@
         mlir::transform::gpu::mapNestedForallToThreadsImpl(
             rewriter, std::nullopt, funcOp, workgroupSize, /*warpDims=*/{},
             false);
-    if (!result.succeeded()) return signalPassFailure();
+    if (!result.succeeded())
+      return signalPassFailure();
   }
 };
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>> createGPUDistribute() {
   return std::make_unique<GPUDistributePass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUDistributeSharedMemoryCopy.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUDistributeSharedMemoryCopy.cpp
index 76b9347..f6ddd91 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUDistributeSharedMemoryCopy.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUDistributeSharedMemoryCopy.cpp

@@ -59,8 +59,9 @@
 /// Patterns for copy to shared memory mapping. Copy to shared memory are not
 /// part of the launch config but needs to be distributed on the workgroup
 /// picked by the root op.
-static void populateTilingCopyToWorkgroupMemPatterns(
-    RewritePatternSet &patterns, ArrayRef<int64_t> workgroupSize) {
+static void
+populateTilingCopyToWorkgroupMemPatterns(RewritePatternSet &patterns,
+                                         ArrayRef<int64_t> workgroupSize) {
   // Tile and distribute copy to workgroup memory.
   linalg::TileSizeComputationFunction wgCopyTileSizeFn =
       [](OpBuilder &builder, Operation *operation) {
@@ -75,7 +76,8 @@
 
         unsigned rank = dstMemRefType.getRank();
         // Return empty tile size for zero dim tensor.
-        if (rank == 0) return tileSizesVal;
+        if (rank == 0)
+          return tileSizesVal;
         int copyTileSize =
             copyVectorNumBits / dstMemRefType.getElementTypeBitWidth();
         for (unsigned i = 0; i < rank - 1; i++) {
@@ -87,12 +89,12 @@
             operation->getLoc(), copyTileSize));
         return tileSizesVal;
       };
-  auto getCopyThreadProcInfoFn = [workgroupSize](
-                                     OpBuilder &builder, Location loc,
-                                     ArrayRef<Range> parallelLoopRanges) {
-    return getGPUThreadIdsAndCounts(builder, loc, parallelLoopRanges.size(),
-                                    workgroupSize);
-  };
+  auto getCopyThreadProcInfoFn =
+      [workgroupSize](OpBuilder &builder, Location loc,
+                      ArrayRef<Range> parallelLoopRanges) {
+        return getGPUThreadIdsAndCounts(builder, loc, parallelLoopRanges.size(),
+                                        workgroupSize);
+      };
   linalg::LinalgLoopDistributionOptions copyInvocationDistributionOptions;
   copyInvocationDistributionOptions.procInfo = getCopyThreadProcInfoFn;
 
@@ -112,8 +114,9 @@
 
 /// Compute a tile size so that the numer of iteraton is equal to the flat
 /// workgroup size.
-static std::optional<SmallVector<int64_t>> getTileToDistributableSize(
-    linalg::GenericOp copyOp, int64_t flatWorkgroupSize) {
+static std::optional<SmallVector<int64_t>>
+getTileToDistributableSize(linalg::GenericOp copyOp,
+                           int64_t flatWorkgroupSize) {
   SmallVector<int64_t> shape = copyOp.getStaticLoopRanges();
   unsigned bitWidth =
       llvm::cast<MemRefType>(copyOp.getDpsInitOperand(0)->get().getType())
@@ -129,7 +132,8 @@
     unroll.push_back(numThreads * numElementPerThread);
     assert(threadsAvailable % numThreads == 0);
     threadsAvailable = threadsAvailable / numThreads;
-    if (threadsAvailable == 1) break;
+    if (threadsAvailable == 1)
+      break;
   }
   assert(threadsAvailable == 1);
   unroll.resize(shape.size(), 1);
@@ -145,7 +149,8 @@
       [flatWorkgroupSize](OpBuilder &builder, Operation *operation) {
         SmallVector<Value> tileSizesVal;
         auto copyOp = dyn_cast<linalg::GenericOp>(operation);
-        if (!copyOp) return tileSizesVal;
+        if (!copyOp)
+          return tileSizesVal;
         std::optional<SmallVector<int64_t>> staticSize =
             getTileToDistributableSize(copyOp, flatWorkgroupSize);
         for (int64_t dim : *staticSize) {
@@ -222,7 +227,8 @@
       [](OpBuilder &builder, Operation *operation) {
         SmallVector<Value> tileSizesVal;
         auto copyOp = dyn_cast<linalg::GenericOp>(operation);
-        if (!copyOp) return tileSizesVal;
+        if (!copyOp)
+          return tileSizesVal;
         SmallVector<int64_t> staticSize = getNativeDstShape(copyOp);
         for (int64_t dim : staticSize) {
           tileSizesVal.push_back(
@@ -230,11 +236,11 @@
         }
         return tileSizesVal;
       };
-  auto getCopyThreadProcInfoFn = [flatThreadId](
-                                     OpBuilder &builder, Location loc,
-                                     ArrayRef<Range> parallelLoopRanges) {
-    return getIds(builder, loc, parallelLoopRanges, flatThreadId);
-  };
+  auto getCopyThreadProcInfoFn =
+      [flatThreadId](OpBuilder &builder, Location loc,
+                     ArrayRef<Range> parallelLoopRanges) {
+        return getIds(builder, loc, parallelLoopRanges, flatThreadId);
+      };
   linalg::LinalgLoopDistributionOptions copyInvocationDistributionOptions;
   copyInvocationDistributionOptions.procInfo = getCopyThreadProcInfoFn;
 
@@ -286,7 +292,8 @@
 static void hoistAlloc(func::FuncOp funcOp) {
   SmallVector<memref::AllocOp> allocs;
   funcOp.walk([&](memref::AllocOp alloc) {
-    if (alloc.getOperands().empty()) allocs.push_back(alloc);
+    if (alloc.getOperands().empty())
+      allocs.push_back(alloc);
   });
   for (memref::AllocOp alloc : allocs) {
     alloc->moveBefore(&(*funcOp.getBlocks().begin()),
@@ -309,7 +316,8 @@
         prevOp = prevOp->getPrevNode();
       }
       if (prevOp && hasMarker(prevOp, getCopyToWorkgroupMemoryMarker())) {
-        for (Operation *op : redundantBarriers) op->erase();
+        for (Operation *op : redundantBarriers)
+          op->erase();
       }
     }
   });
@@ -329,11 +337,13 @@
 }
 
 /// Fully unroll all the static loops unless they are part of the ignore map.
-static void UnrollSharedMemoryLoops(
-    func::FuncOp funcOp, const llvm::SmallDenseSet<scf::ForOp> &loopsToIgnore) {
+static void
+UnrollSharedMemoryLoops(func::FuncOp funcOp,
+                        const llvm::SmallDenseSet<scf::ForOp> &loopsToIgnore) {
   SmallVector<scf::ForOp> forOpsToUnroll;
   funcOp.walk([&](scf::ForOp forOp) {
-    if (!loopsToIgnore.count(forOp)) forOpsToUnroll.push_back(forOp);
+    if (!loopsToIgnore.count(forOp))
+      forOpsToUnroll.push_back(forOp);
   });
   for (scf::ForOp forOp : llvm::reverse(forOpsToUnroll)) {
     (void)loopUnrollByFactor(forOp, numIteration(forOp));
@@ -351,7 +361,8 @@
   void runOnOperation() override {
     func::FuncOp funcOp = getOperation();
     FailureOr<IREE::HAL::ExecutableExportOp> exportOp = getEntryPoint(funcOp);
-    if (failed(exportOp)) return;
+    if (failed(exportOp))
+      return;
     auto workgroupSize = getWorkgroupSize(exportOp.value());
     workgroupSize.resize(3, 1);
     MLIRContext *context = &getContext();
@@ -360,7 +371,8 @@
       if (hasMarker(copyOp, getCopyToWorkgroupMemoryMarker()))
         copiesToWorkgroupMem.push_back(copyOp);
     });
-    if (copiesToWorkgroupMem.empty()) return;
+    if (copiesToWorkgroupMem.empty())
+      return;
 
     // Step 0. First clean up the IR.
     hoistAlloc(funcOp);
@@ -445,12 +457,12 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>>
 createGPUDistributeSharedMemoryCopy() {
   return std::make_unique<GPUDistributeSharedMemoryCopyPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUMultiBuffering.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUMultiBuffering.cpp
index c514a52..032cbf4 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUMultiBuffering.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUMultiBuffering.cpp

@@ -20,7 +20,7 @@
     : public GPUMultiBufferingBase<GPUMultiBufferingPass> {
   GPUMultiBufferingPass(unsigned numBuffers) : numBuffers(numBuffers) {}
 
-  void getDependentDialects(DialectRegistry& registry) const override {
+  void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<affine::AffineDialect>();
   }
 
@@ -50,9 +50,10 @@
     // Collect all the alloc operations.
     funcOp.walk([&](memref::AllocOp allocOp) {
       // Skip allocations not used in a loop.
-      for (Operation* user : allocOp->getUsers()) {
+      for (Operation *user : allocOp->getUsers()) {
         auto loop = user->getParentOfType<scf::ForOp>();
-        if (!loop) return WalkResult::advance();
+        if (!loop)
+          return WalkResult::advance();
       }
       allocs.push_back(allocOp);
       return WalkResult::advance();
@@ -68,15 +69,15 @@
     }
   }
 
- private:
+private:
   unsigned numBuffers;
 };
-}  // namespace
+} // namespace
 
-std::unique_ptr<OperationPass<func::FuncOp>> createGPUMultiBuffering(
-    unsigned numBuffers) {
+std::unique_ptr<OperationPass<func::FuncOp>>
+createGPUMultiBuffering(unsigned numBuffers) {
   return std::make_unique<GPUMultiBufferingPass>(numBuffers);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPatterns.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPatterns.cpp
index 13b13b0..a3934d1 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPatterns.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPatterns.cpp

@@ -55,9 +55,11 @@
     Value source = transferReadOp.getSource();
     MemRefType sourceType = llvm::dyn_cast<MemRefType>(source.getType());
     // Contiguity check is valid on tensors only.
-    if (!sourceType) return failure();
+    if (!sourceType)
+      return failure();
     // Already 2D or lower nothing to do.
-    if (vectorType.getRank() < 3) return failure();
+    if (vectorType.getRank() < 3)
+      return failure();
     // The innermost dim is always considered non-unit as it wont be dropped
     // Therefore, we initialize `numberOfNonUnitDims` to 1 and not 0
     int numberOfNonUnitDims = 1;
@@ -85,9 +87,12 @@
     }
     int rankOfCollapsedVector = 2;
     // TODO: generalize this pattern, relax the requirements here.
-    if (transferReadOp.hasOutOfBoundsDim()) return failure();
-    if (!transferReadOp.getPermutationMap().isMinorIdentity()) return failure();
-    if (transferReadOp.getMask()) return failure();
+    if (transferReadOp.hasOutOfBoundsDim())
+      return failure();
+    if (!transferReadOp.getPermutationMap().isMinorIdentity())
+      return failure();
+    if (transferReadOp.getMask())
+      return failure();
     ArrayAttr newInBoundsAttr = rewriter.getBoolArrayAttr(
         SmallVector<bool>(rankOfCollapsedVector, true));
     auto newidentityMap =
@@ -130,12 +135,13 @@
         loc, vectorTypeBroadcast, readCollapse);
     SmallVector<int64_t> tranposePermutation;
     for (int i = 0; i < vectorType.getRank(); i++) {
-      if (i == vectorType.getRank() - 2) continue;
+      if (i == vectorType.getRank() - 2)
+        continue;
       tranposePermutation.push_back(i);
     }
-    tranposePermutation.insert(
-        tranposePermutation.begin() + indexOfOuterNonUnitDim,
-        vectorType.getRank() - 2);
+    tranposePermutation.insert(tranposePermutation.begin() +
+                                   indexOfOuterNonUnitDim,
+                               vectorType.getRank() - 2);
     rewriter.replaceOpWithNewOp<vector::TransposeOp>(
         transferReadOp, readBroadcast, tranposePermutation);
     return success();
@@ -183,7 +189,8 @@
 /// Returns true if op is appropriate contract for promotion.
 static LogicalResult contractOpFilter(Operation *op) {
   auto linalgOp = dyn_cast<linalg::LinalgOp>(op);
-  if (!linalgOp) return failure();
+  if (!linalgOp)
+    return failure();
   // Limit promotion to matmul and batch matmul, there may be generic
   // ops with more batch dimensions we didn't distribute and therefore
   // cannot find a higher bound.
@@ -193,7 +200,7 @@
       linalgOp.getNumParallelLoops() <= 3);
 }
 
-}  // namespace
+} // namespace
 
 void populateVectorTransferToGPUMMAPreparationPatterns(
     RewritePatternSet &patterns) {
@@ -225,5 +232,5 @@
           .addFilter(contractOpFilter));
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPatterns.h b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPatterns.h
index b2730de..1875c5e 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPatterns.h
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPatterns.h

@@ -27,7 +27,7 @@
 void populateContractPromotionPatterns(RewritePatternSet &patterns,
                                        ArrayRef<int64_t> operandsToPromote);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_COMMON_GPUPATTERNS_H_
+#endif // IREE_COMPILER_CODEGEN_COMMON_GPUPATTERNS_H_

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPipelining.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPipelining.cpp
index 60c7ced..2cecafc 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPipelining.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPipelining.cpp

@@ -38,11 +38,13 @@
 /// 0 or a HAL descriptor type address space.
 static bool hasDefaultOrHALAddressSpace(MemRefType memrefType) {
   Attribute addrSpace = memrefType.getMemorySpace();
-  if (!addrSpace) return true;
+  if (!addrSpace)
+    return true;
   auto intAttr = llvm::dyn_cast<IntegerAttr>(addrSpace);
   // Accept both default numeric address space and HAL descriptor type address
   // space--the former is used by LLVMGPU while the latter is used by SPIR-V.
-  if (intAttr && intAttr.getInt() == 0) return true;
+  if (intAttr && intAttr.getInt() == 0)
+    return true;
   return llvm::isa<IREE::HAL::DescriptorTypeAttr>(addrSpace);
 }
 
@@ -52,14 +54,15 @@
 /// helper function predicates operations (where predication is avialable),
 /// checks if unpredicated operations are side-effect free and acceptable to
 /// execute speculatively.
-static Operation* replaceOpWithPredicatedOp(RewriterBase& rewriter,
-                                            Operation* op, Value pred) {
+static Operation *replaceOpWithPredicatedOp(RewriterBase &rewriter,
+                                            Operation *op, Value pred) {
   // Predication is only supported for AsyncCopyOp. Thus, for operations which
   // are *not* AsyncCopyOp additional checks are requrired in order to be issued
   // speculatively.
   if (!isa<nvgpu::DeviceAsyncCopyOp>(op)) {
     // Return/execute the op if it is a side effect free.
-    if (mlir::isMemoryEffectFree(op)) return op;
+    if (mlir::isMemoryEffectFree(op))
+      return op;
     // Return/execute the op if it is barrier, commit group, or ldmatrix op.
     if (isa<gpu::BarrierOp, nvgpu::DeviceAsyncCreateGroupOp, nvgpu::LdMatrixOp,
             nvgpu::DeviceAsyncWaitOp>(op))
@@ -67,11 +70,13 @@
     // Return/execute the op if it is a shared memory load.
     if (auto loadOp = dyn_cast<vector::LoadOp>(op)) {
       auto loadBaseType = llvm::cast<MemRefType>(loadOp.getBase().getType());
-      if (hasSharedMemoryAddressSpace(loadBaseType)) return op;
+      if (hasSharedMemoryAddressSpace(loadBaseType))
+        return op;
     }
     if (auto loadOp = dyn_cast<memref::LoadOp>(op)) {
       auto loadBaseType = loadOp.getMemRefType();
-      if (hasSharedMemoryAddressSpace(loadBaseType)) return op;
+      if (hasSharedMemoryAddressSpace(loadBaseType))
+        return op;
     }
     // If we are here that means the operation does not have predication support
     // and cannot be speculatively executed. Thus, unpeeled epilogue is not
@@ -109,26 +114,30 @@
 
 /// Helper to recursively add operation dependencies within `block` to `dep`
 /// set.
-static void addDepOps(llvm::SmallDenseSet<Operation*>& dep, Operation* op,
-                      Block* block) {
-  if (!dep.insert(op).second) return;
+static void addDepOps(llvm::SmallDenseSet<Operation *> &dep, Operation *op,
+                      Block *block) {
+  if (!dep.insert(op).second)
+    return;
   for (Value operand : op->getOperands()) {
-    Operation* defOp = operand.getDefiningOp();
-    if (defOp && defOp->getBlock() == block) addDepOps(dep, defOp, block);
+    Operation *defOp = operand.getDefiningOp();
+    if (defOp && defOp->getBlock() == block)
+      addDepOps(dep, defOp, block);
   }
 }
 
 /// Assign stages to the loop ops. Simple logic by default, put load from global
 /// memory in stage 0 and the rest in stage 1. If store_stage = 0 then put store
 /// to shared memory in stage 0 as well.
-static void getPipelineStages(scf::ForOp forOp,
-                              std::vector<std::pair<Operation*, unsigned>>& ops,
-                              unsigned depth) {
-  if (!forOp->hasAttr(kPipeliningLoopMarker)) return;
+static void
+getPipelineStages(scf::ForOp forOp,
+                  std::vector<std::pair<Operation *, unsigned>> &ops,
+                  unsigned depth) {
+  if (!forOp->hasAttr(kPipeliningLoopMarker))
+    return;
 
   // Track dependencies of stage 0 ops.
-  llvm::SmallDenseSet<Operation*> loadDep;
-  for (Operation& op : forOp.getBody()->getOperations()) {
+  llvm::SmallDenseSet<Operation *> loadDep;
+  for (Operation &op : forOp.getBody()->getOperations()) {
     if (op.hasAttr(kPipeliningFirstStage)) {
       addDepOps(loadDep, &op, forOp.getBody());
     }
@@ -137,16 +146,17 @@
   // it depends on in stage 0. Store to shared memory and computation are in
   // stage `maxDepth`. In order to have a correct scheduling even with back
   // edges we order stages in decreasing order.
-  for (Operation& op : forOp.getBody()->getOperations()) {
+  for (Operation &op : forOp.getBody()->getOperations()) {
     if (!loadDep.count(&op) && !isa<scf::YieldOp>(op))
       ops.push_back(std::make_pair(&op, depth));
   }
-  for (Operation& op : forOp.getBody()->getOperations()) {
-    if (loadDep.count(&op)) ops.push_back(std::make_pair(&op, 0));
+  for (Operation &op : forOp.getBody()->getOperations()) {
+    if (loadDep.count(&op))
+      ops.push_back(std::make_pair(&op, 0));
   }
 }
 
-static void setAsyncAnnotations(Operation* op,
+static void setAsyncAnnotations(Operation *op,
                                 scf::PipeliningOption::PipelinerPart part,
                                 unsigned iteration, unsigned depth,
                                 PipeliningSchedulingStrategy schedule) {
@@ -155,7 +165,8 @@
     // copies in flight.
     bool copyBeforeLoad =
         schedule == PipeliningSchedulingStrategy::nvidiaTensorCore;
-    if (waitOp.getNumGroups()) return;
+    if (waitOp.getNumGroups())
+      return;
     int numGroupInFlight = 0;
     if (part == scf::PipeliningOption::PipelinerPart::Kernel ||
         part == scf::PipeliningOption::PipelinerPart::Prologue) {
@@ -189,10 +200,11 @@
 static bool setPipeliningMarkers(scf::ForOp forOp, bool pipelineStoreStage) {
   bool copyToWorkgroupMemory = false;
   OpBuilder builder(forOp.getContext());
-  SmallVector<Operation*> barriers;
-  for (Operation& op : forOp.getBody()->getOperations()) {
+  SmallVector<Operation *> barriers;
+  for (Operation &op : forOp.getBody()->getOperations()) {
     // Pipeline the most inner for op that should be a flat region.
-    if (op.getNumRegions() > 0) return false;
+    if (op.getNumRegions() > 0)
+      return false;
     if (isa<gpu::BarrierOp>(op)) {
       barriers.push_back(&op);
       if (pipelineStoreStage == 0)
@@ -202,20 +214,24 @@
       copyToWorkgroupMemory = true;
       op.setAttr(kPipeliningFirstStage, builder.getUnitAttr());
       // async copy ops need to be moved along with previous barrier.
-      for (Operation* barrier : barriers) {
+      for (Operation *barrier : barriers) {
         barrier->setAttr(kPipeliningFirstStage, builder.getUnitAttr());
       }
       barriers.clear();
       continue;
     }
     auto ld = dyn_cast<vector::TransferReadOp>(op);
-    if (!ld) continue;
+    if (!ld)
+      continue;
     auto ldSrcType = llvm::cast<MemRefType>(ld.getSource().getType());
-    if (!hasDefaultOrHALAddressSpace(ldSrcType) || !ld->hasOneUse()) continue;
+    if (!hasDefaultOrHALAddressSpace(ldSrcType) || !ld->hasOneUse())
+      continue;
     auto st = dyn_cast<vector::TransferWriteOp>(ld->use_begin()->getOwner());
-    if (!st) continue;
+    if (!st)
+      continue;
     auto stSrcType = llvm::cast<MemRefType>(st.getSource().getType());
-    if (!hasSharedMemoryAddressSpace(stSrcType)) continue;
+    if (!hasSharedMemoryAddressSpace(stSrcType))
+      continue;
     copyToWorkgroupMemory = true;
     ld->setAttr(kPipeliningFirstStage, builder.getUnitAttr());
     if (pipelineStoreStage == 0)
@@ -235,11 +251,11 @@
 /// and their dependencies for a kgroup.
 struct WarpMmaOp {
   // Defining op and its dependencies for mma.sync's lhs/matrixA/OperandA.
-  llvm::SetVector<Operation*> lhsOperations;
+  llvm::SetVector<Operation *> lhsOperations;
   // Defining op and its dependencies for mma.sync's rhs/matrixB/OperandB.
-  llvm::SetVector<Operation*> rhsOperations;
+  llvm::SetVector<Operation *> rhsOperations;
   // Warp-level Tensor Core operations on operands in registers.
-  llvm::SetVector<Operation*> mmaOperations;
+  llvm::SetVector<Operation *> mmaOperations;
 };
 
 /// Structure to hold the matmul's mainloop information:
@@ -250,13 +266,13 @@
 struct MainLoopInfo {
   // Mainloop asyncronous copy operations:
   // `cp.async` GlobalMemory -> SharedMemory
-  llvm::SetVector<Operation*> copyGlobalToSharedOps;
-  llvm::SetVector<Operation*> asyncCreateGroupOp;
-  llvm::SetVector<Operation*> barrierOps;
-  llvm::SetVector<Operation*> asyncWaitOps;
+  llvm::SetVector<Operation *> copyGlobalToSharedOps;
+  llvm::SetVector<Operation *> asyncCreateGroupOp;
+  llvm::SetVector<Operation *> barrierOps;
+  llvm::SetVector<Operation *> asyncWaitOps;
 
   // Mainloop asyncronous copy operations dependencies
-  llvm::SetVector<Operation*> copyGlobalToSharedOpDeps;
+  llvm::SetVector<Operation *> copyGlobalToSharedOpDeps;
 
   // Warp-level syncronous operations:
   // `ldmatrix, ld.shared` SharedMemory -> Registers
@@ -264,11 +280,11 @@
   llvm::SmallVector<WarpMmaOp> warpOperations;
 
   // Set to track the dependencies already seen to a backward slice.
-  llvm::SetVector<Operation*> seenDepOps;
+  llvm::SetVector<Operation *> seenDepOps;
 
   // Set to track the mma operations in forward slice to count kgroups and
   // populate the warp-level warpOperations
-  llvm::SetVector<Operation*> seenMmaOps;
+  llvm::SetVector<Operation *> seenMmaOps;
 
   // Boolen to store if the mainloop can be pipelined (coarse-grained
   // scheduling) and the instructions can be interleaved (fine-grained
@@ -278,13 +294,14 @@
   // Populates the dependent operations in ``dependentOps`` for the given a op
   // recursively that are in the same block and not added to the backward slice
   // of some other op.
-  void backwardSliceOfDependentOps(llvm::SetVector<Operation*>& dependentOps,
-                                   Operation* op, Block* block) {
-    if (!seenDepOps.insert(op)) return;
+  void backwardSliceOfDependentOps(llvm::SetVector<Operation *> &dependentOps,
+                                   Operation *op, Block *block) {
+    if (!seenDepOps.insert(op))
+      return;
     // Add the unseen op to the dependentOps and recurse on its operands.
     dependentOps.insert(op);
     for (Value operand : op->getOperands()) {
-      Operation* defOp = operand.getDefiningOp();
+      Operation *defOp = operand.getDefiningOp();
       if (defOp && defOp->getBlock() == block)
         backwardSliceOfDependentOps(dependentOps, defOp, block);
     }
@@ -293,10 +310,11 @@
   // Obtains nvgpu.ldmatrix, memref.load, vector.extract_strided_slice, or
   // vector.insert operations that is the defining operations of the mma.sync
   // operand. The operations are added to a set of specific kgroup operations.
-  void mmaOperandDefOperation(Operation* op,
-                              llvm::SetVector<Operation*>& defOperation,
-                              Block* block) {
-    if (!op) return;
+  void mmaOperandDefOperation(Operation *op,
+                              llvm::SetVector<Operation *> &defOperation,
+                              Block *block) {
+    if (!op)
+      return;
 
     // If the operations defining the mma.sync's operand is one of the
     // qualifying operations, add the operations to the current kgroup defining
@@ -314,10 +332,11 @@
   // (start) to numKgroups (ends scf.yield).
   // Assumption: The mma operations are in a chain of monotonicaly increasing
   // kgroup order.
-  void vistMmaSyncOp(Operation* op, int kgroup) {
+  void vistMmaSyncOp(Operation *op, int kgroup) {
     // if the operation in an `scf.yield`, we reached the end of MmaSyncOp chain
     // return.
-    if (seenMmaOps.count(op) || isa<scf::YieldOp>(op)) return;
+    if (seenMmaOps.count(op) || isa<scf::YieldOp>(op))
+      return;
 
     seenMmaOps.insert(op);
 
@@ -348,7 +367,7 @@
   // generating an optimal *finer-grained* instruction interleaving of global
   // memory loads, shared memory loads, and math operations.
   void analyze(scf::ForOp forOp) {
-    for (Operation& op : forOp.getBody()->getOperations()) {
+    for (Operation &op : forOp.getBody()->getOperations()) {
       if (op.getNumRegions() > 0) {
         // Pipeline and schedule the most inner for op ,i.e., the mainloop that
         // should be a flat region.
@@ -416,12 +435,13 @@
       LDBG("-- missing warpOperations -> not schedulable");
       isSchedulable = false;
     }
-    if (!isSchedulable) return;
+    if (!isSchedulable)
+      return;
 
     // Collect the dependent operations for `cp.async` in the mainloop order for
     // coarse-grained software pipeling. The deps are collected in stage order,
     // i.e., `cp.async`'s deps in stage 0 are collected first.
-    for (Operation& op : forOp.getBody()->getOperations()) {
+    for (Operation &op : forOp.getBody()->getOperations()) {
       if (isa<nvgpu::DeviceAsyncCopyOp>(&op)) {
         backwardSliceOfDependentOps(copyGlobalToSharedOpDeps, &op,
                                     forOp.getBody());
@@ -432,7 +452,7 @@
     // operations. The operation and their dependencies are seperated by kgroups
     // for fine-grained instruction scheduling.
     for (int kgroup = 0; kgroup < getNumberOfKgroups(); ++kgroup) {
-      for (Operation& op : forOp.getBody()->getOperations()) {
+      for (Operation &op : forOp.getBody()->getOperations()) {
         if (isa<nvgpu::LdMatrixOp, memref::LoadOp,
                 vector::ExtractStridedSliceOp, vector::InsertOp>(&op)) {
           if (warpOperations[kgroup].lhsOperations.count(&op)) {
@@ -445,7 +465,7 @@
           }
         }
       }
-      for (Operation& op : forOp.getBody()->getOperations()) {
+      for (Operation &op : forOp.getBody()->getOperations()) {
         if (isa<nvgpu::MmaSyncOp>(&op)) {
           if (warpOperations[kgroup].mmaOperations.count(&op)) {
             backwardSliceOfDependentOps(warpOperations[kgroup].mmaOperations,
@@ -461,9 +481,9 @@
 };
 
 /// Prints the given `funcOp` after a leading `step` comment header.
-static void debugMainloopSchedule(
-    MainLoopInfo& mainloop, int numStages,
-    std::vector<std::pair<Operation*, unsigned>>& ops) {
+static void
+debugMainloopSchedule(MainLoopInfo &mainloop, int numStages,
+                      std::vector<std::pair<Operation *, unsigned>> &ops) {
   LLVM_DEBUG({
     llvm::dbgs() << "//--- Mainloop schedule generated for Nvidia Ampere "
                     "mma.sync TensorCore Pipeline. ---//\n";
@@ -472,7 +492,7 @@
                  << "\n";
     llvm::dbgs() << " Number of mainloop instructions " << ops.size() << "\n";
     llvm::dbgs() << " Mainloop instructions schedule and stage assignment: \n";
-    for (auto& stage_op_pair : ops) {
+    for (auto &stage_op_pair : ops) {
       llvm::dbgs() << " Stage (" << stage_op_pair.second << ") , Operation: ";
       stage_op_pair.first->dump();
     }
@@ -489,7 +509,7 @@
 /// @param ops a vector of pairs: [(operations, pipeline_stage)].
 /// @param numStages the total number of pipeline stages used for multi-buffer.
 static void getNvidiaAmpereTensorCorePipeline(
-    scf::ForOp forOp, std::vector<std::pair<Operation*, unsigned>>& ops,
+    scf::ForOp forOp, std::vector<std::pair<Operation *, unsigned>> &ops,
     unsigned numStages) {
   // Analyze the main loop and obtain information for coarse-grained pipelining
   // and fine-grained instruction scheduling.
@@ -531,7 +551,7 @@
     // into and mma.sync operations to hide load latencies.
 
     // Load the next kgroup into registers.
-    for (Operation& op : forOp.getBody()->getOperations()) {
+    for (Operation &op : forOp.getBody()->getOperations()) {
       if (mainloop.warpOperations[kgroup + 1].lhsOperations.count(&op) ||
           mainloop.warpOperations[kgroup + 1].rhsOperations.count(&op)) {
         ops.push_back(std::make_pair(&op, numStages - 1));
@@ -539,7 +559,7 @@
     }
 
     // Issue mma.sync on previous loaded kgroup.
-    for (Operation& op : forOp.getBody()->getOperations()) {
+    for (Operation &op : forOp.getBody()->getOperations()) {
       if (mainloop.warpOperations[kgroup].mmaOperations.count(&op))
         ops.push_back(std::make_pair(&op, numStages - 1));
     }
@@ -552,7 +572,7 @@
   // TODO: Distribute cp.async throughout the main loop and do not concentrate
   // it at one place.
   // Schedule all cp.async and one cp.async.commit_group.
-  for (Operation& op : forOp.getBody()->getOperations()) {
+  for (Operation &op : forOp.getBody()->getOperations()) {
     if (mainloop.copyGlobalToSharedOpDeps.count(&op))
       ops.push_back(std::make_pair(&op, 0 /*pipelineStage*/));
   }
@@ -571,14 +591,14 @@
 
   // Schedule the Shared Memory loads for the first kgroup and pipeline them
   // into one stage ahead.
-  for (Operation& op : forOp.getBody()->getOperations()) {
+  for (Operation &op : forOp.getBody()->getOperations()) {
     if (mainloop.warpOperations[0].lhsOperations.count(&op) ||
         mainloop.warpOperations[0].rhsOperations.count(&op))
       ops.push_back(std::make_pair(&op, numStages - 2));
   }
 
   // Issue mma.sync on for the last kgroup at the end of the mainloop.
-  for (Operation& op : forOp.getBody()->getOperations()) {
+  for (Operation &op : forOp.getBody()->getOperations()) {
     if (mainloop.warpOperations[numKgroups - 1].mmaOperations.count(&op))
       ops.push_back(std::make_pair(&op, numStages - 1));
   }
@@ -591,9 +611,9 @@
 // Apply pipeline rewrite pattern assuming the operations were already
 // annotated with stage information.
 // TODO: move away from using attribute annotations.
-static FailureOr<scf::ForOp> applyPipelining(
-    scf::ForOp forOp, int64_t depth, bool epiloguePeeling,
-    PipeliningSchedulingStrategy schedule) {
+static FailureOr<scf::ForOp>
+applyPipelining(scf::ForOp forOp, int64_t depth, bool epiloguePeeling,
+                PipeliningSchedulingStrategy schedule) {
   // TODO: Refactor schedules to not rely on markers.
   if (schedule == PipeliningSchedulingStrategy::loadGlobalStage0 ||
       schedule == PipeliningSchedulingStrategy::loadStoreStage0) {
@@ -606,18 +626,18 @@
 
   scf::PipeliningOption options;
   unsigned maxDepth = depth;
-  auto getSchedule = [maxDepth, schedule](
-                         scf::ForOp forOp,
-                         std::vector<std::pair<Operation*, unsigned>>& ops) {
-    if (schedule == PipeliningSchedulingStrategy::nvidiaTensorCore) {
-      return getNvidiaAmpereTensorCorePipeline(forOp, ops, maxDepth);
-    }
-    return getPipelineStages(forOp, ops, maxDepth);
-  };
-  auto setAnnotation = [maxDepth, schedule](
-                           Operation* op,
-                           scf::PipeliningOption::PipelinerPart part,
-                           unsigned iteration) {
+  auto getSchedule =
+      [maxDepth, schedule](scf::ForOp forOp,
+                           std::vector<std::pair<Operation *, unsigned>> &ops) {
+        if (schedule == PipeliningSchedulingStrategy::nvidiaTensorCore) {
+          return getNvidiaAmpereTensorCorePipeline(forOp, ops, maxDepth);
+        }
+        return getPipelineStages(forOp, ops, maxDepth);
+      };
+  auto setAnnotation = [maxDepth,
+                        schedule](Operation *op,
+                                  scf::PipeliningOption::PipelinerPart part,
+                                  unsigned iteration) {
     return setAsyncAnnotations(op, part, iteration, maxDepth, schedule);
   };
   options.getScheduleFn = getSchedule;
@@ -627,7 +647,7 @@
   // is avialable a.k.a. AsyncCopyOp.
   if (!epiloguePeeling) {
     options.peelEpilogue = false;
-    options.predicateFn = [](RewriterBase& rewriter, Operation* op,
+    options.predicateFn = [](RewriterBase &rewriter, Operation *op,
                              Value pred) {
       return replaceOpWithPredicatedOp(rewriter, op, pred);
     };
@@ -664,20 +684,22 @@
     // Remove extra barriers from the prologue assuming appropriate
     // multi-buffering.
     funcOp.walk([](gpu::BarrierOp barrierOp) {
-      if (barrierOp->hasAttr(kPipeliningExtraBarrier)) barrierOp->erase();
+      if (barrierOp->hasAttr(kPipeliningExtraBarrier))
+        barrierOp->erase();
     });
   }
 
- private:
+private:
   int64_t depth;
   PipeliningSchedulingStrategy schedule;
   bool epiloguePeeling;
 };
-}  // namespace
+} // namespace
 
-FailureOr<scf::ForOp> pipelineSharedMemoryCopy(
-    RewriterBase& rewriter, scf::ForOp forOp,
-    PipeliningSchedulingStrategy strategy, bool peelEpilogue, int64_t depth) {
+FailureOr<scf::ForOp>
+pipelineSharedMemoryCopy(RewriterBase &rewriter, scf::ForOp forOp,
+                         PipeliningSchedulingStrategy strategy,
+                         bool peelEpilogue, int64_t depth) {
   return applyPipelining(forOp, depth, peelEpilogue, strategy);
 }
 
@@ -686,11 +708,11 @@
 /// true  : Peel epilogue (no additional checks required)
 /// false : Try and use unpeeled epilogue (check if predication is supported
 /// is avialable)
-std::unique_ptr<OperationPass<func::FuncOp>> createGPUPipeliningPass(
-    bool epiloguePeeling, unsigned depth,
-    PipeliningSchedulingStrategy schedule) {
+std::unique_ptr<OperationPass<func::FuncOp>>
+createGPUPipeliningPass(bool epiloguePeeling, unsigned depth,
+                        PipeliningSchedulingStrategy schedule) {
   return std::make_unique<GPUPipeliningPass>(epiloguePeeling, depth, schedule);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUReduceBankConflicts.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUReduceBankConflicts.cpp
index f5776f3..4c3d091 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUReduceBankConflicts.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUReduceBankConflicts.cpp

@@ -18,7 +18,8 @@
 static void padAlloc(MLIRContext *context, memref::AllocOp allocOp,
                      int64_t paddingSizeBits) {
   int64_t innerDim = allocOp.getType().getShape().back();
-  if (ShapedType::isDynamic(innerDim)) return;
+  if (ShapedType::isDynamic(innerDim))
+    return;
   Type elType = allocOp.getType().getElementType();
   unsigned bitwidth =
       mlir::DataLayout::closest(allocOp).getTypeSizeInBits(elType);
@@ -51,10 +52,10 @@
 /// be removed once the better solution is implemented.
 struct GPUReduceBankConflictsPass
     : public GPUReduceBankConflictsBase<GPUReduceBankConflictsPass> {
- private:
+private:
   int64_t paddingSizeBits;
 
- public:
+public:
   GPUReduceBankConflictsPass(int64_t paddingSizeBits)
       : paddingSizeBits(paddingSizeBits) {}
 
@@ -73,12 +74,12 @@
       padAlloc(context, alloc, paddingSizeBits);
   }
 };
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>>
 createGPUReduceSharedMemoryBankConflicts(int64_t paddingSizeBits) {
   return std::make_unique<GPUReduceBankConflictsPass>(paddingSizeBits);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTensorAlloc.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTensorAlloc.cpp
index a16ecf1..16072f5 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTensorAlloc.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTensorAlloc.cpp

@@ -27,7 +27,8 @@
 /// Filter to decide which contract ops need allocations.
 static bool contractOpFilter(Operation *op) {
   auto linalgOp = dyn_cast<linalg::LinalgOp>(op);
-  if (!linalgOp) return false;
+  if (!linalgOp)
+    return false;
 
   if (!linalg::isaContractionOpInterface(linalgOp)) {
     return false;
@@ -36,7 +37,8 @@
   // The workgroup specialization already makes static shapes available for the
   // main tile part and makes the partial tile computation small, so promoting
   // to shared memory for the partial tile actually hurts the performance.
-  if (linalgOp.hasDynamicShape()) return false;
+  if (linalgOp.hasDynamicShape())
+    return false;
 
   // Check if the shape is tile-distributable. The leading dimension must be a
   // multiple of the target vector size, which is 128b / the element bit width.
@@ -72,7 +74,8 @@
 /// Filter to decide which transpose ops need allocations.
 static bool transposeOpFilter(Operation *op) {
   auto linalgOp = dyn_cast<linalg::LinalgOp>(op);
-  if (!linalgOp) return false;
+  if (!linalgOp)
+    return false;
   LinalgOpInfo opInfo(linalgOp, sharedMemTransposeFilter);
   return opInfo.isTranspose();
 }
@@ -111,15 +114,18 @@
 
   LogicalResult matchAndRewrite(bufferization::AllocTensorOp allocOp,
                                 PatternRewriter &rewriter) const override {
-    if (!allocOp.getCopy()) return failure();
+    if (!allocOp.getCopy())
+      return failure();
     auto linalgOp = allocOp.getCopy().getDefiningOp<linalg::LinalgOp>();
-    if (!linalgOp) return failure();
+    if (!linalgOp)
+      return failure();
 
     // Make sure we don't use the initial values for the linalg output we are
     // copying during the tensor allocation.
     unsigned resultNumber = cast<OpResult>(allocOp.getCopy()).getResultNumber();
     OpOperand *initOperand = linalgOp.getDpsInitOperand(resultNumber);
-    if (linalgOp.payloadUsesValueFromOperand(initOperand)) return failure();
+    if (linalgOp.payloadUsesValueFromOperand(initOperand))
+      return failure();
 
     rewriter.setInsertionPoint(linalgOp);
     std::optional<Attribute> memorySpace = allocOp.getMemorySpace();
@@ -140,11 +146,11 @@
 };
 
 struct GPUTensorAllocPass : public GPUTensorAllocBase<GPUTensorAllocPass> {
- private:
+private:
   GPUPromoteSharedMemPattern promoteSharedMemPattern =
       GPUPromoteSharedMemPattern::ContractionOpPattern;
 
- public:
+public:
   GPUTensorAllocPass(GPUPromoteSharedMemPattern promoteSharedMemPattern)
       : promoteSharedMemPattern(promoteSharedMemPattern) {}
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -168,12 +174,14 @@
     SmallVector<Operation *> opsToPromote;
     funcOp.walk([&](Operation *op) {
       switch (promoteSharedMemPattern) {
-        case GPUPromoteSharedMemPattern::ContractionOpPattern:
-          if (contractOpFilter(op)) opsToPromote.push_back(op);
-          break;
-        case GPUPromoteSharedMemPattern::TransposeOpPattern:
-          if (transposeOpFilter(op)) opsToPromote.push_back(op);
-          break;
+      case GPUPromoteSharedMemPattern::ContractionOpPattern:
+        if (contractOpFilter(op))
+          opsToPromote.push_back(op);
+        break;
+      case GPUPromoteSharedMemPattern::TransposeOpPattern:
+        if (transposeOpFilter(op))
+          opsToPromote.push_back(op);
+        break;
       }
     });
     for (Operation *op : opsToPromote) {
@@ -181,32 +189,32 @@
       auto linalgOp = cast<linalg::LinalgOp>(op);
       bufferization::BufferizationOptions options;
       switch (promoteSharedMemPattern) {
-        case GPUPromoteSharedMemPattern::ContractionOpPattern:
-          // Promote all the input operands
-          for (auto operand : linalgOp.getDpsInputOperands()) {
-            FailureOr<Value> ret = bufferization::allocateTensorForShapedValue(
-                builder, op->getLoc(), operand->get(), false, options, true);
-            if (failed(ret)) {
-              return signalPassFailure();
-            }
-            Value v = ret.value();
-            operand->set(v);
+      case GPUPromoteSharedMemPattern::ContractionOpPattern:
+        // Promote all the input operands
+        for (auto operand : linalgOp.getDpsInputOperands()) {
+          FailureOr<Value> ret = bufferization::allocateTensorForShapedValue(
+              builder, op->getLoc(), operand->get(), false, options, true);
+          if (failed(ret)) {
+            return signalPassFailure();
           }
-          break;
+          Value v = ret.value();
+          operand->set(v);
+        }
+        break;
 
-        case GPUPromoteSharedMemPattern::TransposeOpPattern:
-          LinalgOpInfo opInfo(linalgOp, sharedMemTransposeFilter);
+      case GPUPromoteSharedMemPattern::TransposeOpPattern:
+        LinalgOpInfo opInfo(linalgOp, sharedMemTransposeFilter);
 
-          for (auto operand : opInfo.getTransposeOperands()) {
-            FailureOr<Value> ret = bufferization::allocateTensorForShapedValue(
-                builder, op->getLoc(), operand->get(), false, options, true);
-            if (failed(ret)) {
-              return signalPassFailure();
-            }
-            Value v = ret.value();
-            operand->set(v);
+        for (auto operand : opInfo.getTransposeOperands()) {
+          FailureOr<Value> ret = bufferization::allocateTensorForShapedValue(
+              builder, op->getLoc(), operand->get(), false, options, true);
+          if (failed(ret)) {
+            return signalPassFailure();
           }
-          break;
+          Value v = ret.value();
+          operand->set(v);
+        }
+        break;
       }
     }
 
@@ -229,12 +237,12 @@
     }
   }
 };
-}  // namespace
+} // namespace
 
-std::unique_ptr<OperationPass<func::FuncOp>> createGPUTensorAlloc(
-    GPUPromoteSharedMemPattern promoteSharedMemPattern) {
+std::unique_ptr<OperationPass<func::FuncOp>>
+createGPUTensorAlloc(GPUPromoteSharedMemPattern promoteSharedMemPattern) {
   return std::make_unique<GPUTensorAllocPass>(promoteSharedMemPattern);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTensorTile.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTensorTile.cpp
index 30483b3..4e0b99c 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTensorTile.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTensorTile.cpp

@@ -35,28 +35,31 @@
 
 class TileConsumerAndFuseInputProducer final
     : public OpInterfaceRewritePattern<TilingInterface> {
- public:
+public:
   TileConsumerAndFuseInputProducer(
       MLIRContext *context, IREE::LinalgExt::LinalgTransformationFilter filter,
       bool fuseInputProducer, PatternBenefit benefit = 1)
       : OpInterfaceRewritePattern<TilingInterface>(context, benefit),
-        filter(std::move(filter)),
-        fuseInputProducer(fuseInputProducer) {}
+        filter(std::move(filter)), fuseInputProducer(fuseInputProducer) {}
 
   LogicalResult matchAndRewrite(TilingInterface op,
                                 PatternRewriter &rewriter) const override {
-    if (failed(filter.checkAndNotify(rewriter, op))) return failure();
+    if (failed(filter.checkAndNotify(rewriter, op)))
+      return failure();
 
     // Make sure we have a PartitionableLoopInterface op here and query the tile
     // sizes from the partitionable loops.
     auto plOp = dyn_cast<PartitionableLoopsInterface>(*op);
-    if (!plOp) return failure();
+    if (!plOp)
+      return failure();
     auto partitionedLoops = plOp.getPartitionableLoops(kNumMaxParallelDims);
     SmallVector<int64_t> tileSizes = getTileSizes(op, 0);
-    if (tileSizes.empty()) return failure();
+    if (tileSizes.empty())
+      return failure();
     // Mask out non reduction dimensions.
     for (unsigned depth : partitionedLoops) {
-      if (depth < tileSizes.size()) tileSizes[depth] = 0;
+      if (depth < tileSizes.size())
+        tileSizes[depth] = 0;
     }
 
     // Make sure we have a tile size for each dimension.
@@ -86,10 +89,11 @@
     return success();
   }
 
- private:
-  FailureOr<scf::SCFTilingResult> tileConsumerAndFuseInputProducer(
-      RewriterBase &rewriter, TilingInterface consumer,
-      ArrayRef<int64_t> tileSizes) const {
+private:
+  FailureOr<scf::SCFTilingResult>
+  tileConsumerAndFuseInputProducer(RewriterBase &rewriter,
+                                   TilingInterface consumer,
+                                   ArrayRef<int64_t> tileSizes) const {
     // First tile the current op as the consumer op.
     auto tilingOptions = scf::SCFTilingOptions().setTileSizes(tileSizes);
     FailureOr<scf::SCFTilingResult> tilingResult =
@@ -98,9 +102,11 @@
       return rewriter.notifyMatchFailure(consumer, "failed to tile consumer");
     }
 
-    if (!fuseInputProducer) return tilingResult;
+    if (!fuseInputProducer)
+      return tilingResult;
     // If there are no generated loops generated, fusion is immaterial.
-    if (tilingResult->loops.empty()) return tilingResult;
+    if (tilingResult->loops.empty())
+      return tilingResult;
 
     // Collect immediate input operands that are fusable into the tiled loop.
     // We have tensor extract slice ops taking slices of the untiled op.
@@ -111,12 +117,15 @@
     assert(tilingResult->tiledOps.size() == 1);
     Operation *tiledOp = tilingResult->tiledOps.front();
     auto dsOp = dyn_cast<DestinationStyleOpInterface>(tiledOp);
-    if (!dsOp) return tilingResult;
+    if (!dsOp)
+      return tilingResult;
     for (OpOperand *operand : dsOp.getDpsInputOperands()) {
       auto sliceOp = operand->get().getDefiningOp<tensor::ExtractSliceOp>();
-      if (!sliceOp) continue;
+      if (!sliceOp)
+        continue;
       auto linalgOp = sliceOp.getSource().getDefiningOp<linalg::LinalgOp>();
-      if (!linalgOp) continue;
+      if (!linalgOp)
+        continue;
       // Restrict to fully parallel linalg ops for now for simplicity.
       auto isParallel = [](utils::IteratorType it) {
         return linalg::isParallelIterator(it);
@@ -200,7 +209,8 @@
   for (TilingInterface tilingOp : computeOps) {
     size_t numLoops = 0;
     for (auto type : tilingOp.getLoopIteratorTypes()) {
-      if (type == utils::IteratorType::parallel) numLoops++;
+      if (type == utils::IteratorType::parallel)
+        numLoops++;
     }
     IRRewriter rewriter(tilingOp->getContext());
     rewriter.setInsertionPoint(tilingOp);
@@ -209,7 +219,8 @@
     auto partitionedLoops =
         interfaceOp.getPartitionableLoops(kNumMaxParallelDims);
     // If there are no dimensions to tile skip the transformation.
-    if (partitionedLoops.empty()) continue;
+    if (partitionedLoops.empty())
+      continue;
     SmallVector<OpFoldResult> numThreads(numLoops, rewriter.getIndexAttr(0));
     int64_t id = 0, threadDim = 0;
     SmallVector<Attribute> idDims;
@@ -245,7 +256,8 @@
     auto consumerOp = cast<linalg::LinalgOp>(*convOp);
     IRRewriter rewriter(funcOp.getContext());
     SmallVector<int64_t> tileSizes = getTileSizes(consumerOp, 1);
-    if (tileSizes.empty()) return success();
+    if (tileSizes.empty())
+      return success();
 
     FailureOr<scf::SCFTileAndFuseResult> tileAndFuseResult =
         scf::tileConsumerAndFuseProducerGreedilyUsingSCFForOp(
@@ -289,11 +301,11 @@
 
 namespace {
 struct GPUTensorTilePass : public GPUTensorTileBase<GPUTensorTilePass> {
- private:
+private:
   // Distribute the workloads to warp if true otherwise distribute to threads.
   bool distributeToWarp = false;
 
- public:
+public:
   GPUTensorTilePass(bool distributeToWarp)
       : distributeToWarp(distributeToWarp) {}
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -301,7 +313,8 @@
   }
   void runOnOperation() override {
     auto funcOp = getOperation();
-    if (!isEntryPoint(funcOp)) return;
+    if (!isEntryPoint(funcOp))
+      return;
 
     funcOp->walk([&](linalg::LinalgOp op) {
       op->removeAttr(IREE::LinalgExt::LinalgTransforms::kLinalgTransformMarker);
@@ -321,7 +334,8 @@
 
     // Tile to serial loops to the wg tile size to handle reductions and other
     // dimension that have not been distributed.
-    if (failed(tileReductionToSerialLoops(funcOp))) return signalPassFailure();
+    if (failed(tileReductionToSerialLoops(funcOp)))
+      return signalPassFailure();
 
     LLVM_DEBUG({
       llvm::dbgs() << "--- After tile reductions:";
@@ -338,12 +352,12 @@
     });
   }
 };
-}  // namespace
+} // namespace
 
-std::unique_ptr<OperationPass<func::FuncOp>> createGPUTensorTile(
-    bool distributeToWarp) {
+std::unique_ptr<OperationPass<func::FuncOp>>
+createGPUTensorTile(bool distributeToWarp) {
   return std::make_unique<GPUTensorTilePass>(distributeToWarp);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTileReduction.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTileReduction.cpp
index d590928..6ca051d 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTileReduction.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTileReduction.cpp

@@ -40,7 +40,8 @@
   rewriter.setInsertionPoint(op);
   FailureOr<scf::SCFReductionTilingResult> results = scf::tileReductionUsingScf(
       rewriter, cast<PartialReductionOpInterface>(op.getOperation()), sizes);
-  if (failed(results)) return failure();
+  if (failed(results))
+    return failure();
   return success();
 }
 
@@ -48,12 +49,14 @@
   IRRewriter rewriter(op.getContext());
   rewriter.setInsertionPoint(op);
   SmallVector<int64_t> tileSizes = getTileSizes(op, 1);
-  if (tileSizes.empty()) return success();
+  if (tileSizes.empty())
+    return success();
   linalg::LinalgTilingOptions tileOption;
   tileOption.setTileSizes(tileSizes);
   FailureOr<linalg::TiledLinalgOp> tiledOps =
       linalg::tileLinalgOp(rewriter, op, tileOption);
-  if (failed(tiledOps)) return failure();
+  if (failed(tiledOps))
+    return failure();
   rewriter.replaceOp(op, tiledOps->tensorResults);
   return success();
 }
@@ -81,11 +84,11 @@
     }
   }
 };
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>> createGPUTileReductionPass() {
   return std::make_unique<GPUTileReductionPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUVectorization.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUVectorization.cpp
index 191d6bd..8f1a4fd 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUVectorization.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUVectorization.cpp

@@ -45,12 +45,15 @@
   // vectors larger than what would fit in register skip vectorization.
   f.addFilter([maxVectorSize](Operation *op) {
     auto linalgOp = dyn_cast<linalg::LinalgOp>(op);
-    if (!linalgOp) return success();
+    if (!linalgOp)
+      return success();
     int64_t maxFlatVecSize = 1;
     for (OpOperand &operand : linalgOp->getOpOperands()) {
       auto type = llvm::dyn_cast<ShapedType>(operand.get().getType());
-      if (!type) continue;
-      if (!type.hasStaticShape()) return failure();
+      if (!type)
+        continue;
+      if (!type.hasStaticShape())
+        return failure();
       maxFlatVecSize = std::max(maxFlatVecSize, type.getNumElements());
     }
     return success(maxFlatVecSize <= maxVectorSize);
@@ -106,13 +109,13 @@
     linalg::hoistRedundantVectorTransfersOnTensor(funcOp);
   }
 };
-}  // namespace
+} // namespace
 
-std::unique_ptr<OperationPass<func::FuncOp>> createGPUVectorizationPass(
-    bool generateContract, int64_t maxVectorSize) {
+std::unique_ptr<OperationPass<func::FuncOp>>
+createGPUVectorizationPass(bool generateContract, int64_t maxVectorSize) {
   return std::make_unique<GPUVectorizationPass>(generateContract,
                                                 maxVectorSize);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/VectorReductionToGPU.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/VectorReductionToGPU.cpp
index 148568a..96581d6 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/VectorReductionToGPU.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/VectorReductionToGPU.cpp

@@ -49,15 +49,19 @@
   using namespace IREE::HAL;
 
   auto loadOp = dyn_cast<memref::LoadOp>(op);
-  if (!loadOp) return false;
+  if (!loadOp)
+    return false;
   auto space = loadOp.getMemRefType().getMemorySpace();
   auto attr = llvm::dyn_cast_if_present<DescriptorTypeAttr>(space);
-  if (!attr) return false;
+  if (!attr)
+    return false;
 
-  if (attr.getValue() == DescriptorType::UniformBuffer) return true;
+  if (attr.getValue() == DescriptorType::UniformBuffer)
+    return true;
 
   auto subspan = loadOp.getMemRef().getDefiningOp<InterfaceBindingSubspanOp>();
-  if (!subspan) return false;
+  if (!subspan)
+    return false;
   if (auto flags = subspan.getDescriptorFlags()) {
     if (bitEnumContainsAll(*flags, IREE::HAL::DescriptorFlags::ReadOnly))
       return true;
@@ -67,19 +71,23 @@
 
 /// Hoist uniform operations as well as special hal operations that have side
 /// effect but are safe to move out of the warp single lane region.
-static void moveScalarAndBindingUniformCode(
-    vector::WarpExecuteOnLane0Op warpOp) {
+static void
+moveScalarAndBindingUniformCode(vector::WarpExecuteOnLane0Op warpOp) {
   /// Hoist ops without side effect as well as special binding ops.
   auto canBeHoisted = [](Operation *op,
                          function_ref<bool(Value)> definedOutside) {
-    if (op->getNumRegions() != 0) return false;
-    if (!llvm::all_of(op->getOperands(), definedOutside)) return false;
-    if (isMemoryEffectFree(op)) return true;
+    if (op->getNumRegions() != 0)
+      return false;
+    if (!llvm::all_of(op->getOperands(), definedOutside))
+      return false;
+    if (isMemoryEffectFree(op))
+      return true;
 
     if (isa<IREE::HAL::InterfaceBindingSubspanOp,
             IREE::HAL::InterfaceConstantLoadOp, memref::AssumeAlignmentOp>(op))
       return true;
-    if (isUniformLoad(op)) return true;
+    if (isUniformLoad(op))
+      return true;
 
     return false;
   };
@@ -108,7 +116,8 @@
   }
 
   // Move all the ops marked as uniform outside of the region.
-  for (Operation *op : opsToMove) op->moveBefore(warpOp);
+  for (Operation *op : opsToMove)
+    op->moveBefore(warpOp);
 }
 
 namespace {
@@ -117,12 +126,13 @@
 /// MultiDimReduction distribution is supported.
 class InsertElementToBroadcast final
     : public OpRewritePattern<vector::InsertElementOp> {
- public:
+public:
   using OpRewritePattern<vector::InsertElementOp>::OpRewritePattern;
 
   LogicalResult matchAndRewrite(vector::InsertElementOp insertOp,
                                 PatternRewriter &rewriter) const override {
-    if (insertOp.getDestVectorType().getNumElements() != 1) return failure();
+    if (insertOp.getDestVectorType().getNumElements() != 1)
+      return failure();
     rewriter.replaceOpWithNewOp<vector::BroadcastOp>(
         insertOp, insertOp.getDestVectorType(), insertOp.getSource());
     return success();
@@ -147,7 +157,7 @@
 
 class VectorReduceToGPUPass
     : public VectorReduceToGPUBase<VectorReduceToGPUPass> {
- public:
+public:
   explicit VectorReduceToGPUPass(std::function<int(func::FuncOp)> getWarpSize)
       : getWarpSize(getWarpSize) {}
 
@@ -230,7 +240,8 @@
       auto distributionFn = [](Value val) {
         AffineMap map = AffineMap::get(val.getContext());
         auto vecType = llvm::dyn_cast<VectorType>(val.getType());
-        if (!vecType) return map;
+        if (!vecType)
+          return map;
         // Create a map (d0, d1) -> (d1) to distribute along the inner
         // dimension. Once we support n-d distribution we can add more
         // complex cases.
@@ -274,11 +285,11 @@
     });
   }
 
- private:
+private:
   std::function<int(func::FuncOp)> getWarpSize;
 };
 
-}  // anonymous namespace
+} // anonymous namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>>
 createConvertVectorReductionToGPUPass(
@@ -286,5 +297,5 @@
   return std::make_unique<VectorReduceToGPUPass>(getWarpSize);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/WorkGroupSwizzle.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/WorkGroupSwizzle.cpp
index 5a3b301..1cf724d 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/WorkGroupSwizzle.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/WorkGroupSwizzle.cpp

@@ -27,7 +27,7 @@
 // function.
 static void makeSwizzledId(Location loc, OpBuilder b, Value workgroupIdX,
                            Value workgroupIdY, Value gridSizeX, Value gridSizeY,
-                           Value& SwizzledIdX, Value& SwizzledIdY,
+                           Value &SwizzledIdX, Value &SwizzledIdY,
                            unsigned swizzleTile) {
   Value zero = b.create<arith::ConstantIndexOp>(loc, 0);
   Value tile = b.create<arith::ConstantIndexOp>(loc, swizzleTile);
@@ -60,7 +60,7 @@
   WorkGroupSwizzlePass(unsigned swizzleLogTile)
       : swizzleLogTile(swizzleLogTile) {}
 
-  void getDependentDialects(DialectRegistry& registry) const override {
+  void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<affine::AffineDialect>();
   }
   LogicalResult initializeOptions(StringRef options) override {
@@ -71,7 +71,8 @@
     return success();
   }
   void runOnOperation() override {
-    if (swizzleLogTile == 0) return;
+    if (swizzleLogTile == 0)
+      return;
     unsigned swizzleTile = pow(2, swizzleLogTile);
     func::FuncOp funcOp = getOperation();
     std::array<IREE::HAL::InterfaceWorkgroupIDOp, 2> oldWorkgroupIds;
@@ -86,7 +87,8 @@
         yFound = true;
       }
     });
-    if (xFound == false || yFound == false) return;
+    if (xFound == false || yFound == false)
+      return;
     OpBuilder builder(funcOp);
     builder.setInsertionPoint(&funcOp.front(), funcOp.front().begin());
     Value workgroupIdX =
@@ -104,15 +106,15 @@
     oldWorkgroupIds[1].replaceAllUsesWith(SwizzledIdY);
   }
 
- private:
+private:
   unsigned swizzleLogTile;
 };
-}  // namespace
+} // namespace
 
-std::unique_ptr<OperationPass<func::FuncOp>> createWorkGroupSwizzle(
-    unsigned swizzleLogTile) {
+std::unique_ptr<OperationPass<func::FuncOp>>
+createWorkGroupSwizzle(unsigned swizzleLogTile) {
   return std::make_unique<WorkGroupSwizzlePass>(swizzleLogTile);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/HoistStaticallyBoundAllocations.cpp b/compiler/src/iree/compiler/Codegen/Common/HoistStaticallyBoundAllocations.cpp
index 385e7ce..33f084a 100644
--- a/compiler/src/iree/compiler/Codegen/Common/HoistStaticallyBoundAllocations.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/HoistStaticallyBoundAllocations.cpp

@@ -24,7 +24,7 @@
   void runOnOperation() override;
 };
 
-}  // namespace
+} // namespace
 
 void HoistStaticallyBoundAllocationsPass::runOnOperation() {
   func::FuncOp funcOp = getOperation();
@@ -37,5 +37,5 @@
   return std::make_unique<HoistStaticallyBoundAllocationsPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/IREEComprehensiveBufferizePass.cpp b/compiler/src/iree/compiler/Codegen/Common/IREEComprehensiveBufferizePass.cpp
index adbbfd8..185624d 100644
--- a/compiler/src/iree/compiler/Codegen/Common/IREEComprehensiveBufferizePass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/IREEComprehensiveBufferizePass.cpp

@@ -48,7 +48,7 @@
 namespace {
 class EliminateEmptyTensorsPass
     : public EliminateEmptyTensorsBase<EliminateEmptyTensorsPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<IREE::Flow::FlowDialect, tensor::TensorDialect>();
   }
@@ -59,15 +59,14 @@
 /// Pass to convert from tensor based ops to memref based ops.
 class IREEComprehensiveBufferizePass
     : public IREEComprehensiveBufferizeBase<IREEComprehensiveBufferizePass> {
- public:
+public:
   explicit IREEComprehensiveBufferizePass(
       std::optional<BufferizationOptions::AllocationFn> allocationFn =
           std::nullopt,
       std::optional<BufferizationOptions::DeallocationFn> deallocationFn =
           std::nullopt,
       std::optional<BufferizationOptions::MemCpyFn> memCpyFn = std::nullopt)
-      : allocationFn(allocationFn),
-        deallocationFn(deallocationFn),
+      : allocationFn(allocationFn), deallocationFn(deallocationFn),
         memCpyFn(memCpyFn) {}
 
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -90,12 +89,12 @@
 
   void runOnOperation() override;
 
- private:
+private:
   const std::optional<BufferizationOptions::AllocationFn> allocationFn;
   const std::optional<BufferizationOptions::DeallocationFn> deallocationFn;
   const std::optional<BufferizationOptions::MemCpyFn> memCpyFn;
 };
-}  // namespace
+} // namespace
 
 static bool isaTensor(Type t) { return llvm::isa<TensorType>(t); };
 
@@ -156,12 +155,13 @@
   return options;
 }
 
-LogicalResult eliminateEmptyTensors(
-    RewriterBase &rewriter, Operation *op,
-    const OneShotBufferizationOptions &options) {
+LogicalResult
+eliminateEmptyTensors(RewriterBase &rewriter, Operation *op,
+                      const OneShotBufferizationOptions &options) {
   // Analyze IR.
   OneShotAnalysisState state(op, options);
-  if (failed(analyzeOp(op, state))) return failure();
+  if (failed(analyzeOp(op, state)))
+    return failure();
 
   // Rewrite tensor.empty ops that are anchored on specific ops.
   if (failed(bufferization::insertSliceAnchoredEmptyTensorEliminationStep(
@@ -198,11 +198,14 @@
 
 // The following is copied from bufferization::runOneShotBufferize with
 // modifications.
-LogicalResult runIREEOneShotBufferize(
-    Operation *op, const IREEOneShotBufferizationOptions &options) {
+LogicalResult
+runIREEOneShotBufferize(Operation *op,
+                        const IREEOneShotBufferizationOptions &options) {
   OneShotAnalysisState state(op, options);
-  if (failed(analyzeOp(op, state))) return failure();
-  if (options.testAnalysisOnly) return success();
+  if (failed(analyzeOp(op, state)))
+    return failure();
+  if (options.testAnalysisOnly)
+    return success();
   return bufferization::runOneShotBufferize(op, options);
 }
 
@@ -238,9 +241,12 @@
     std::optional<BufferizationOptions::AllocationFn> allocationFn,
     std::optional<BufferizationOptions::DeallocationFn> deallocationFn,
     std::optional<BufferizationOptions::MemCpyFn> memCpyFn) {
-  if (!allocationFn) allocationFn = defaultAllocationFn;
-  if (!deallocationFn) deallocationFn = defaultDeallocationFn;
-  if (!memCpyFn) memCpyFn = defaultMemCpyFn;
+  if (!allocationFn)
+    allocationFn = defaultAllocationFn;
+  if (!deallocationFn)
+    deallocationFn = defaultDeallocationFn;
+  if (!memCpyFn)
+    memCpyFn = defaultMemCpyFn;
   return std::make_unique<IREEComprehensiveBufferizePass>(
       allocationFn, deallocationFn, memCpyFn);
 }
@@ -268,5 +274,5 @@
   addIREEPostBufferizationPasses(passManager);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/IREEExpandStridedMetadata.cpp b/compiler/src/iree/compiler/Codegen/Common/IREEExpandStridedMetadata.cpp
index ce0f3f8..39d46ca 100644
--- a/compiler/src/iree/compiler/Codegen/Common/IREEExpandStridedMetadata.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/IREEExpandStridedMetadata.cpp

@@ -26,7 +26,7 @@
   SmallVector<OpFoldResult> sizes;
   SmallVector<OpFoldResult> strides;
 };
-}  // namespace
+} // namespace
 
 /// Returns an AffineMap for an add or a mul.
 static AffineMap getAddMap(MLIRContext *context) {
@@ -42,8 +42,9 @@
 
 /// Returns the strides based on the sizes assuming that the `memref`
 /// has default layout, i.e. it is not a result of a subview.
-static SmallVector<OpFoldResult> getStridesFromSizes(
-    RewriterBase &rewriter, Location loc, ArrayRef<OpFoldResult> sizes) {
+static SmallVector<OpFoldResult>
+getStridesFromSizes(RewriterBase &rewriter, Location loc,
+                    ArrayRef<OpFoldResult> sizes) {
   if (sizes.size() == 0) {
     return {};
   }
@@ -91,8 +92,9 @@
 /// Replaces the offsets, sizes and strides based on values provided
 /// by `DescriptorInfo` object.
 template <typename OpTy>
-static void replaceOffsetSizesAndStridesWith(
-    RewriterBase &rewriter, OpTy op, const DescriptorInfo &resultDescriptor) {
+static void
+replaceOffsetSizesAndStridesWith(RewriterBase &rewriter, OpTy op,
+                                 const DescriptorInfo &resultDescriptor) {
   int rank = resultDescriptor.sizes.size();
   assert(rank == resultDescriptor.strides.size() &&
          "expected number of sizes and strides to match");
@@ -129,9 +131,11 @@
     auto binding =
         op.getSource()
             .template getDefiningOp<IREE::HAL::InterfaceBindingSubspanOp>();
-    if (!binding) return failure();
+    if (!binding)
+      return failure();
     auto memRefType = llvm::cast<MemRefType>(binding.getResult().getType());
-    if (memRefType.getRank() < 1) return failure();
+    if (memRefType.getRank() < 1)
+      return failure();
 
     auto loc = op.getLoc();
     OpBuilder::InsertionGuard g(rewriter);
@@ -225,7 +229,7 @@
 
   void runOnOperation() override;
 };
-}  // namespace
+} // namespace
 
 void IREEExpandStridedMetadataPass::runOnOperation() {
   MLIRContext *context = &getContext();
@@ -261,5 +265,5 @@
   return std::make_unique<IREEExpandStridedMetadataPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/InstrumentMemoryAccesses.cpp b/compiler/src/iree/compiler/Codegen/Common/InstrumentMemoryAccesses.cpp
index bc4282e..136df74 100644
--- a/compiler/src/iree/compiler/Codegen/Common/InstrumentMemoryAccesses.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/InstrumentMemoryAccesses.cpp

@@ -83,12 +83,12 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>>
 createInstrumentMemoryAccessesPass() {
   return std::make_unique<InstrumentMemoryAccessesPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/LowerUKernelsToCalls.cpp b/compiler/src/iree/compiler/Codegen/Common/LowerUKernelsToCalls.cpp
index 023b889..255ea31 100644
--- a/compiler/src/iree/compiler/Codegen/Common/LowerUKernelsToCalls.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/LowerUKernelsToCalls.cpp

@@ -23,7 +23,7 @@
   }
   void runOnOperation() override;
 };
-}  // namespace
+} // namespace
 
 void LowerUKernelOpsToCallsPass::runOnOperation() {
   MLIRContext *context = &getContext();
@@ -58,5 +58,5 @@
   return std::make_unique<LowerUKernelOpsToCallsPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp
index 0958fb4..1af52c0 100644
--- a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp

@@ -56,7 +56,8 @@
       getMixedValues(dispatchTensorType.getShape(), dynamicDims, builder);
   auto innerTileSizes = getInnerTileSizesOfr(
       builder, loc, boundTensorType, *encodingInfo, materializeEncodingValueFn);
-  if (failed(innerTileSizes)) return failure();
+  if (failed(innerTileSizes))
+    return failure();
   SmallVector<OpFoldResult> convertedTargetShape =
       tensor::PackOp::getResultShape(builder, loc, targetShape, *innerTileSizes,
                                      encodingInfo->innerDimsPos,
@@ -97,9 +98,10 @@
   using OpMaterializeEncodingPattern<
       IREE::HAL::InterfaceBindingSubspanOp>::OpMaterializeEncodingPattern;
 
-  LogicalResult matchAndRewrite(
-      IREE::HAL::InterfaceBindingSubspanOp subspanOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::InterfaceBindingSubspanOp subspanOp,
+                  OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto resultType = llvm::dyn_cast<IREE::Flow::DispatchTensorType>(
         subspanOp.getResult().getType());
     if (!resultType) {
@@ -149,9 +151,9 @@
   using OpMaterializeEncodingPattern<
       IREE::Flow::DispatchTensorLoadOp>::OpMaterializeEncodingPattern;
 
-  LogicalResult matchAndRewrite(
-      IREE::Flow::DispatchTensorLoadOp loadOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Flow::DispatchTensorLoadOp loadOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Only handle operations where the load covers the entire
     // `!flow.dispatch.tensor` type.
     // TODO(ravishankarm): Relax this for partial loads.
@@ -199,9 +201,9 @@
   using OpMaterializeEncodingPattern<
       IREE::Flow::DispatchTensorStoreOp>::OpMaterializeEncodingPattern;
 
-  LogicalResult matchAndRewrite(
-      IREE::Flow::DispatchTensorStoreOp storeOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Flow::DispatchTensorStoreOp storeOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Only handle operations where the store covers the entire
     // `!flow.dispatch.tensor` type.
     // TODO(ravishankarm): Relax this for partial stores.
@@ -241,32 +243,32 @@
   }
 };
 
-}  // namespace
+} // namespace
 
-IREE::LinalgExt::MaterializeEncodingInfo chooseEncodingInfoForMatmul(
-    MatmulType type, MatmulOperandRole operandRole,
-    MatmulTileParams tileParams) {
+IREE::LinalgExt::MaterializeEncodingInfo
+chooseEncodingInfoForMatmul(MatmulType type, MatmulOperandRole operandRole,
+                            MatmulTileParams tileParams) {
   MaterializeEncodingInfo encodingInfo;
   encodingInfo.innerDimsPos = {0, 1};
   switch (operandRole) {
-    case (MatmulOperandRole::LHS): {
-      encodingInfo.innerTileSizes = {tileParams.M, tileParams.K};
-      break;
-    }
-    case (MatmulOperandRole::RHS): {
-      encodingInfo.innerTileSizes = {tileParams.N, tileParams.K};
-      encodingInfo.innerDimsPos = {1, 0};
-      encodingInfo.outerDimsPerm = {1, 0};
-      break;
-    }
-    case (MatmulOperandRole::RESULT): {
-      encodingInfo.innerTileSizes = {tileParams.M, tileParams.N};
-      break;
-    }
-    default: {
-      assert(false);
-      return {};
-    }
+  case (MatmulOperandRole::LHS): {
+    encodingInfo.innerTileSizes = {tileParams.M, tileParams.K};
+    break;
+  }
+  case (MatmulOperandRole::RHS): {
+    encodingInfo.innerTileSizes = {tileParams.N, tileParams.K};
+    encodingInfo.innerDimsPos = {1, 0};
+    encodingInfo.outerDimsPerm = {1, 0};
+    break;
+  }
+  case (MatmulOperandRole::RESULT): {
+    encodingInfo.innerTileSizes = {tileParams.M, tileParams.N};
+    break;
+  }
+  default: {
+    assert(false);
+    return {};
+  }
   }
   return encodingInfo;
 }
@@ -277,16 +279,19 @@
     int64_t size = shape[encodingInfo.innerDimsPos[i]];
     // Dynamic sizes are assumed to be large enough, not to be candidates for
     // narrow kernels.
-    if (ShapedType::isDynamic(size)) continue;
+    if (ShapedType::isDynamic(size))
+      continue;
     int64_t &tileSize = encodingInfo.innerTileSizes[i];
     // Let's not try to handle any dynamic tile sizes here. We could handle the
     // case where size==1 (as whatever is the runtime value of tileSize, it
     // can't be less than that, so it should be OK to replace it with 1) but
     // in general, adjusting dynamic tile sizes has to be done by the
     // materializeEncodingValueFn which we obtain those tileSizes from.
-    if (ShapedType::isDynamic(tileSize)) continue;
+    if (ShapedType::isDynamic(tileSize))
+      continue;
     auto generateNarrowTileSize = [&](int64_t n) {
-      if (size <= n && tileSize >= n) tileSize = n;
+      if (size <= n && tileSize >= n)
+        tileSize = n;
     };
     generateNarrowTileSize(1);
     generateNarrowTileSize(2);
@@ -305,8 +310,8 @@
   return result;
 }
 
-MaterializeEncodingValueFn getMaterializeEncodingValueFn(
-    IREE::HAL::ExecutableTargetAttr targetAttr) {
+MaterializeEncodingValueFn
+getMaterializeEncodingValueFn(IREE::HAL::ExecutableTargetAttr targetAttr) {
   if (isVMVXBackend(targetAttr) && hasMicrokernels(targetAttr)) {
     return chooseDynamicEncodingInfoVMVXMicrokernels;
   }
@@ -335,7 +340,8 @@
         auto resultType = llvm::dyn_cast<IREE::Flow::DispatchTensorType>(
             subspanOp.getResult().getType());
         // For types that are not `Flow::DispatchTensorType` mark as legal.
-        if (!resultType) return true;
+        if (!resultType)
+          return true;
         return resultType == typeConverter.convertType(resultType);
       });
 
@@ -347,5 +353,5 @@
       context, typeConverter, materializeEncodingValueFn);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/MemrefCopyToLinalg.cpp b/compiler/src/iree/compiler/Codegen/Common/MemrefCopyToLinalg.cpp
index 07f0f09..cd78171 100644
--- a/compiler/src/iree/compiler/Codegen/Common/MemrefCopyToLinalg.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/MemrefCopyToLinalg.cpp

@@ -24,7 +24,8 @@
     Operation *linalgCopy =
         createLinalgCopyOp(rewriter, copyOp.getLoc(), copyOp.getSource(),
                            copyOp.getTarget(), copyOp->getAttrs());
-    if (!linalgCopy) return failure();
+    if (!linalgCopy)
+      return failure();
     rewriter.replaceOp(copyOp, linalgCopy->getResults());
     return success();
   }
@@ -47,11 +48,11 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>> createMemrefCopyToLinalgPass() {
   return std::make_unique<MemrefCopyToLinalgPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/OptimizeVectorTransferPass.cpp b/compiler/src/iree/compiler/Codegen/Common/OptimizeVectorTransferPass.cpp
index 165620b..806455c 100644
--- a/compiler/src/iree/compiler/Codegen/Common/OptimizeVectorTransferPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/OptimizeVectorTransferPass.cpp

@@ -24,10 +24,10 @@
 // There can be SubviewOp users as long as all its users are also
 // StoreOp/transfer_write. If return true it also fills out the uses, if it
 // returns false uses is unchanged.
-static bool allUsesAreStores(Operation* op, std::vector<Operation*>& uses) {
-  std::vector<Operation*> opUses;
-  for (OpOperand& use : op->getUses()) {
-    Operation* useOp = use.getOwner();
+static bool allUsesAreStores(Operation *op, std::vector<Operation *> &uses) {
+  std::vector<Operation *> opUses;
+  for (OpOperand &use : op->getUses()) {
+    Operation *useOp = use.getOwner();
     if (isa<vector::TransferWriteOp, memref::StoreOp>(useOp) ||
         (isa<memref::SubViewOp>(useOp) && allUsesAreStores(useOp, opUses))) {
       opUses.push_back(useOp);
@@ -42,13 +42,13 @@
 // Track temporary allocations that are never read from. If this is the case
 // it means both the allocations and associated stores can be removed.
 static void eraseDeadAllocAndStores(func::FuncOp funcOp) {
-  std::vector<Operation*> opToErase;
+  std::vector<Operation *> opToErase;
   funcOp.walk([&](memref::AllocOp op) {
     if (allUsesAreStores(op, opToErase)) {
       opToErase.push_back(op.getOperation());
     }
   });
-  for (Operation* op : opToErase) {
+  for (Operation *op : opToErase) {
     op->erase();
   }
 }
@@ -63,15 +63,16 @@
 // on the semantic of transpose in this case.
 class TransposeUnitDimToShapeCast
     : public OpRewritePattern<vector::TransposeOp> {
- public:
+public:
   using OpRewritePattern<vector::TransposeOp>::OpRewritePattern;
 
   LogicalResult matchAndRewrite(vector::TransposeOp op,
-                                PatternRewriter& rewriter) const override {
+                                PatternRewriter &rewriter) const override {
     unsigned numNonUnitSrcDim =
         llvm::count_if(op.getSourceVectorType().getShape(),
                        [](int64_t dim) { return dim != 1; });
-    if (numNonUnitSrcDim > 1) return failure();
+    if (numNonUnitSrcDim > 1)
+      return failure();
     rewriter.replaceOpWithNewOp<vector::ShapeCastOp>(
         op, op.getResultVectorType(), op.getVector());
     return success();
@@ -160,17 +161,17 @@
     return success();
   }
 
- private:
+private:
   bool flatten;
   bool dropUnitDims;
 };
 
-}  // namespace
+} // namespace
 
-std::unique_ptr<OperationPass<func::FuncOp>> createOptimizeVectorTransferPass(
-    bool flatten, bool dropUnitDims) {
+std::unique_ptr<OperationPass<func::FuncOp>>
+createOptimizeVectorTransferPass(bool flatten, bool dropUnitDims) {
   return std::make_unique<OptimizeVectorTransferPass>(flatten, dropUnitDims);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/PadDynamicAlloc.cpp b/compiler/src/iree/compiler/Codegen/Common/PadDynamicAlloc.cpp
index 9ccc6a8..e390ba6 100644
--- a/compiler/src/iree/compiler/Codegen/Common/PadDynamicAlloc.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/PadDynamicAlloc.cpp

@@ -21,12 +21,15 @@
 /// compute alloc sizes.
 static Value skipAffineMaxZero(Value dim) {
   auto affineMax = dim.getDefiningOp<affine::AffineMaxOp>();
-  if (!affineMax) return dim;
+  if (!affineMax)
+    return dim;
   for (AffineExpr expr : affineMax.getMap().getResults()) {
     if (auto cst = expr.dyn_cast<AffineConstantExpr>()) {
-      if (cst.getValue() == 0) continue;
+      if (cst.getValue() == 0)
+        continue;
     } else if (auto symExpr = expr.dyn_cast<AffineSymbolExpr>()) {
-      if (symExpr.getPosition() == 0) continue;
+      if (symExpr.getPosition() == 0)
+        continue;
     }
     return dim;
   }
@@ -56,7 +59,8 @@
     dimSize = *ub;
     sizes.push_back(dim);
   }
-  if (dynamicDimIdx == 0) return success();
+  if (dynamicDimIdx == 0)
+    return success();
   Type elType = allocOp.getType().getElementType();
   MemRefType allocType = MemRefType::get(shape, elType, AffineMap(),
                                          allocOp.getType().getMemorySpace());
@@ -82,15 +86,16 @@
     funcOp.walk(
         [&](memref::AllocOp allocOp) { sharedMemAllocs.push_back(allocOp); });
     for (memref::AllocOp alloc : sharedMemAllocs) {
-      if (failed(padAlloc(context, alloc))) return signalPassFailure();
+      if (failed(padAlloc(context, alloc)))
+        return signalPassFailure();
     }
   }
 };
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>> createPadDynamicAlloc() {
   return std::make_unique<PadDynamicAllocPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/PolynomialApproximationPass.cpp b/compiler/src/iree/compiler/Codegen/Common/PolynomialApproximationPass.cpp
index f67f94a..960395d 100644
--- a/compiler/src/iree/compiler/Codegen/Common/PolynomialApproximationPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/PolynomialApproximationPass.cpp

@@ -44,11 +44,11 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<>> createPolynomialApproximationPass() {
   return std::make_unique<PolynomialApproximationPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/RematerializeParallelOps.cpp b/compiler/src/iree/compiler/Codegen/Common/RematerializeParallelOps.cpp
index 741947b..9ea756e 100644
--- a/compiler/src/iree/compiler/Codegen/Common/RematerializeParallelOps.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/RematerializeParallelOps.cpp

@@ -23,10 +23,11 @@
   using OpRewritePattern<linalg::GenericOp>::OpRewritePattern;
 
   LogicalResult matchAndRewrite(linalg::GenericOp genericOp,
-                                PatternRewriter& rewriter) const override {
+                                PatternRewriter &rewriter) const override {
     // Find the first operand that is defined by another generic op on tensors.
-    for (OpOperand& opOperand : genericOp->getOpOperands()) {
-      if (!linalg::areElementwiseOpsFusable(&opOperand)) continue;
+    for (OpOperand &opOperand : genericOp->getOpOperands()) {
+      if (!linalg::areElementwiseOpsFusable(&opOperand))
+        continue;
 
       FailureOr<linalg::ElementwiseOpFusionResult> fusionResult =
           linalg::fuseElementwiseOps(rewriter, &opOperand);
@@ -59,12 +60,12 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>>
 createRematerializeParallelOpsPass() {
   return std::make_unique<RematerializeParallelOpsPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/RemoveTrivialLoops.cpp b/compiler/src/iree/compiler/Codegen/Common/RemoveTrivialLoops.cpp
index a9bc16f..8f4038f 100644
--- a/compiler/src/iree/compiler/Codegen/Common/RemoveTrivialLoops.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/RemoveTrivialLoops.cpp

@@ -26,25 +26,26 @@
 /// Converts a symbolic GPU processor dimension to its numeric one.
 static unsigned dimToIndex(gpu::Dimension dim) {
   switch (dim) {
-    case gpu::Dimension::x:
-      return 0;
-    case gpu::Dimension::y:
-      return 1;
-    case gpu::Dimension::z:
-      return 2;
-    default:
-      assert(false && "invalid dimension");
-      return 0;
+  case gpu::Dimension::x:
+    return 0;
+  case gpu::Dimension::y:
+    return 1;
+  case gpu::Dimension::z:
+    return 2;
+  default:
+    assert(false && "invalid dimension");
+    return 0;
   }
 }
 
 /// If the value is a threadID return the range [0, workgroupSize-1].
 /// If the number of workgroup is known also return the range of workgroupId ad
 /// workgroupCount.
-static std::optional<std::pair<AffineExpr, AffineExpr>> getWorkgroupRange(
-    Value processorValue, SmallVectorImpl<Value> & /*dims*/,
-    SmallVectorImpl<Value> & /*symbols*/, ArrayRef<int64_t> workgroupCount,
-    ArrayRef<int64_t> workgroupSize) {
+static std::optional<std::pair<AffineExpr, AffineExpr>>
+getWorkgroupRange(Value processorValue, SmallVectorImpl<Value> & /*dims*/,
+                  SmallVectorImpl<Value> & /*symbols*/,
+                  ArrayRef<int64_t> workgroupCount,
+                  ArrayRef<int64_t> workgroupSize) {
   if (auto idOp = processorValue.getDefiningOp<gpu::ThreadIdOp>()) {
     unsigned index = dimToIndex(idOp.getDimension());
     OpBuilder b(processorValue.getContext());
@@ -59,7 +60,8 @@
     return std::make_pair(bound, bound);
   }
 
-  if (workgroupCount.empty()) return std::nullopt;
+  if (workgroupCount.empty())
+    return std::nullopt;
 
   if (auto idOp =
           processorValue.getDefiningOp<IREE::HAL::InterfaceWorkgroupIDOp>()) {
@@ -67,7 +69,8 @@
 
     // Can't infer the range when workroupCount is unknown.
     unsigned index = idOp.getDimension().getZExtValue();
-    if (!workgroupCount[index]) return std::nullopt;
+    if (!workgroupCount[index])
+      return std::nullopt;
 
     AffineExpr zero = builder.getAffineConstantExpr(0);
     AffineExpr ubExpr = builder.getAffineConstantExpr(workgroupCount[index]);
@@ -79,7 +82,8 @@
 
     // Can't infer the range when workroupCount is unknown.
     unsigned index = dimOp.getDimension().getZExtValue();
-    if (!workgroupCount[index]) return std::nullopt;
+    if (!workgroupCount[index])
+      return std::nullopt;
 
     AffineExpr bound = builder.getAffineConstantExpr(workgroupCount[index]);
     return std::make_pair(bound, bound);
@@ -91,7 +95,8 @@
 static bool isWorkgroupLoop(const LoopTilingAndDistributionInfo &info) {
   auto forOp = cast<scf::ForOp>(info.loop);
   Operation *lbOp = forOp.getLowerBound().getDefiningOp();
-  if (isa<IREE::HAL::InterfaceWorkgroupIDOp>(lbOp)) return true;
+  if (isa<IREE::HAL::InterfaceWorkgroupIDOp>(lbOp))
+    return true;
   auto applyOp = dyn_cast<affine::AffineApplyOp>(lbOp);
   return applyOp && llvm::any_of(applyOp.getMapOperands(), [](Value operand) {
            return operand.getDefiningOp<IREE::HAL::InterfaceWorkgroupIDOp>();
@@ -101,10 +106,10 @@
 static LogicalResult removeOneTripTiledLoops(func::FuncOp funcOp,
                                              ArrayRef<int64_t> workgroupSize,
                                              ArrayRef<int64_t> numWorkgroups) {
-  auto getWorkgroupRangeFn = [numWorkgroups, workgroupSize](
-                                 Value processorValue,
-                                 SmallVectorImpl<Value> &dims,
-                                 SmallVectorImpl<Value> &symbols) {
+  auto getWorkgroupRangeFn = [numWorkgroups,
+                              workgroupSize](Value processorValue,
+                                             SmallVectorImpl<Value> &dims,
+                                             SmallVectorImpl<Value> &symbols) {
     return getWorkgroupRange(processorValue, dims, symbols, numWorkgroups,
                              workgroupSize);
   };
@@ -120,7 +125,8 @@
   void runOnOperation() override {
     func::FuncOp funcOp = getOperation();
     FailureOr<IREE::HAL::ExecutableExportOp> exportOp = getEntryPoint(funcOp);
-    if (failed(exportOp)) return;
+    if (failed(exportOp))
+      return;
 
     SmallVector<int64_t> workgroupSize = getWorkgroupSize(*exportOp);
     SmallVector<int64_t> numWorkgroups = getStaticNumWorkgroups(funcOp);
@@ -130,12 +136,12 @@
     }
   }
 };
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>>
 createRemoveSingleIterationLoopPass() {
   return std::make_unique<RemoveSingleIterationLoopPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/SplitFullPartialTransferPass.cpp b/compiler/src/iree/compiler/Codegen/Common/SplitFullPartialTransferPass.cpp
index 98714a6..34fa27d 100644
--- a/compiler/src/iree/compiler/Codegen/Common/SplitFullPartialTransferPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/SplitFullPartialTransferPass.cpp

@@ -45,16 +45,16 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>>
 createSplitFullPartialTransferPass() {
   return std::make_unique<SplitFullPartialTransferPass>();
 }
-std::unique_ptr<OperationPass<func::FuncOp>> createSplitFullPartialTransferPass(
-    StringRef option) {
+std::unique_ptr<OperationPass<func::FuncOp>>
+createSplitFullPartialTransferPass(StringRef option) {
   return std::make_unique<SplitFullPartialTransferPass>(option);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/TestExecutablePreprocessing.cpp b/compiler/src/iree/compiler/Codegen/Common/TestExecutablePreprocessing.cpp
index 487eec7..9f4f8e8 100644
--- a/compiler/src/iree/compiler/Codegen/Common/TestExecutablePreprocessing.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/TestExecutablePreprocessing.cpp

@@ -27,7 +27,8 @@
     // whatever it needed to the executable instead.
     getOperation()->walk([&](IREE::HAL::ExecutableVariantOp variantOp) {
       auto configAttr = variantOp.getTarget().getConfiguration();
-      if (!configAttr) return;
+      if (!configAttr)
+        return;
       auto replacementAttr = configAttr.getAs<IntegerAttr>("replace_i64");
       if (!replacementAttr) {
         // Skip variants that don't request modification.
@@ -44,11 +45,11 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<void>> createTestExecutablePreprocessingPass() {
   return std::make_unique<TestExecutablePreprocessingPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/TestPartitionableLoopsInterface.cpp b/compiler/src/iree/compiler/Codegen/Common/TestPartitionableLoopsInterface.cpp
index 2c4bb01..41b78d4 100644
--- a/compiler/src/iree/compiler/Codegen/Common/TestPartitionableLoopsInterface.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/TestPartitionableLoopsInterface.cpp

@@ -64,12 +64,12 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<void>>
 createTestPartitionableLoopsInterfacePass() {
   return std::make_unique<TestPartitionableLoopsInterfacePass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/TileAndDistributeToWorkgroupsPass.cpp b/compiler/src/iree/compiler/Codegen/Common/TileAndDistributeToWorkgroupsPass.cpp
index 08412bb..905a876 100644
--- a/compiler/src/iree/compiler/Codegen/Common/TileAndDistributeToWorkgroupsPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/TileAndDistributeToWorkgroupsPass.cpp

@@ -61,12 +61,13 @@
 // and dyamic cases are handled the same way. When the tile+distribute moves
 // away from using `scf.for` to using a construct that better captures
 // distribution (like `scf.forall`) this information can be dropped.
-static LogicalResult getTileAndDistributeConfig(
-    ArrayRef<Operation *> computeOps, Operation *&dispatchRootOp,
-    SmallVectorImpl<int64_t> &tileSizes,
-    SmallVectorImpl<int64_t> &staticLoopRanges,
-    SmallVectorImpl<int64_t> &interchange,
-    SmallVectorImpl<unsigned> &partitionableLoops) {
+static LogicalResult
+getTileAndDistributeConfig(ArrayRef<Operation *> computeOps,
+                           Operation *&dispatchRootOp,
+                           SmallVectorImpl<int64_t> &tileSizes,
+                           SmallVectorImpl<int64_t> &staticLoopRanges,
+                           SmallVectorImpl<int64_t> &interchange,
+                           SmallVectorImpl<unsigned> &partitionableLoops) {
   // Find the lowering configuration of the root operation.
   Operation *rootOp = nullptr;
   for (Operation *op : llvm::reverse(computeOps)) {
@@ -106,7 +107,8 @@
   partitionableLoopsSet.insert(partitionableLoops.begin(),
                                partitionableLoops.end());
   for (auto loopId : llvm::seq<unsigned>(0, tileSizes.size())) {
-    if (partitionableLoopsSet.count(loopId)) continue;
+    if (partitionableLoopsSet.count(loopId))
+      continue;
     tileSizes[loopId] = 0;
   }
 
@@ -156,9 +158,10 @@
   OpBuilder::InsertionGuard g(rewriter);
   rewriter.setInsertionPoint(workgroupCountOp);
   auto workloadValues = workgroupCountOp.getOperands();
-  SmallVector<OpFoldResult> tileSizes = llvm::map_to_vector(
-      givenTileSizes,
-      [&](int64_t v) -> OpFoldResult { return rewriter.getIndexAttr(v); });
+  SmallVector<OpFoldResult> tileSizes =
+      llvm::map_to_vector(givenTileSizes, [&](int64_t v) -> OpFoldResult {
+        return rewriter.getIndexAttr(v);
+      });
 
   Attribute zero = rewriter.getIndexAttr(0);
   tileSizes.resize(workloadValues.size(), zero);
@@ -199,8 +202,10 @@
   // slowest varying.
   SmallVector<Value> numWorkgroups;
   for (auto partitionedLoop : llvm::reverse(partitionedLoops)) {
-    if (partitionedLoop >= tileSizes.size()) continue;
-    if (isConstantIntValue(tileSizes[partitionedLoop], 0)) continue;
+    if (partitionedLoop >= tileSizes.size())
+      continue;
+    if (isConstantIntValue(tileSizes[partitionedLoop], 0))
+      continue;
     Value numTileAlongDim = getValueOrCreateConstantIndexOp(
         rewriter, loc, numTiles[partitionedLoop]);
     if (numWorkgroups.size() == maxWorkgroupParallelDims) {
@@ -294,7 +299,7 @@
 
   void runOnOperation() override;
 };
-}  // namespace
+} // namespace
 
 void TileAndDistributeToWorkgroupsPass::runOnOperation() {
   MLIRContext *context = &getContext();
@@ -310,7 +315,8 @@
 
   for (func::FuncOp funcOp : innerModule.getOps<func::FuncOp>()) {
     auto exportOp = entryPoints.lookup(funcOp.getName());
-    if (!exportOp) continue;
+    if (!exportOp)
+      continue;
 
     SmallVector<Operation *> computeOps = getComputeOps(funcOp);
     SmallVector<int64_t> tileSizes, staticLoopRanges, interchange;
@@ -455,5 +461,5 @@
       maxWorkgroupParallelDims, distributionMethod);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/TileDispatchUsingInterface.cpp b/compiler/src/iree/compiler/Codegen/Common/TileDispatchUsingInterface.cpp
index df4259f..04c72c0 100644
--- a/compiler/src/iree/compiler/Codegen/Common/TileDispatchUsingInterface.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/TileDispatchUsingInterface.cpp

@@ -39,10 +39,12 @@
 
 /// Helper method to adjust the interchange vector to match the iteration
 /// domain.
-static SmallVector<int64_t> fillInterchangeVector(
-    ArrayRef<unsigned> interchangeVector, size_t iterationDomainSize) {
+static SmallVector<int64_t>
+fillInterchangeVector(ArrayRef<unsigned> interchangeVector,
+                      size_t iterationDomainSize) {
   SmallVector<int64_t> filledVector;
-  for (auto v : interchangeVector) filledVector.push_back(v);
+  for (auto v : interchangeVector)
+    filledVector.push_back(v);
   if (filledVector.size() < iterationDomainSize) {
     auto range = llvm::seq<unsigned>(filledVector.size(), iterationDomainSize);
     filledVector.append(range.begin(), range.end());
@@ -90,7 +92,7 @@
   Value size;
   Value stride;
 };
-}  // namespace
+} // namespace
 
 /// Generate an empty loop nest that represents the tiled loop nest shell.
 /// - `loopRanges` specifies the lb, ub and step of the untiled iteration space.
@@ -202,7 +204,8 @@
       storeOps.push_back(storeOp);
     }
   }
-  if (storeOps.empty()) return success();
+  if (storeOps.empty())
+    return success();
   if (storeOps.size() != 1) {
     return rewriter.notifyMatchFailure(untiledValue.getOwner(),
                                        "expected a single store for the op");
@@ -270,11 +273,11 @@
   SmallVector<OpFoldResult> tileOffsets;
   SmallVector<OpFoldResult> tileSizes;
 };
-}  // namespace
+} // namespace
 
-static FailureOr<IREETilingResult> tileDispatchUsingSCFFopOp(
-    RewriterBase &rewriter, TilingInterface op,
-    linalg::LinalgTilingOptions options) {
+static FailureOr<IREETilingResult>
+tileDispatchUsingSCFFopOp(RewriterBase &rewriter, TilingInterface op,
+                          linalg::LinalgTilingOptions options) {
   OpBuilder::InsertionGuard guard(rewriter);
   rewriter.setInsertionPointAfter(op);
 
@@ -333,9 +336,8 @@
     if (!interchangeVector.empty()) {
       if (!isPermutationVector(interchangeVector)) {
         return rewriter.notifyMatchFailure(
-            op,
-            "invalid intechange vector, not a permutation of the entire "
-            "iteration space");
+            op, "invalid intechange vector, not a permutation of the entire "
+                "iteration space");
       }
 
       applyPermutationToVector(iterationDomain, interchangeVector);
@@ -466,8 +468,9 @@
 
 /// Return all slices that are used to access a tile of the producer. Assume
 /// that `tiledOps` are in "reverse" order of their appearance in the IR.
-static SmallVector<tensor::ExtractSliceOp> getAllFusableProducerUses(
-    Operation *untiledOp, ArrayRef<Operation *> tiledOps) {
+static SmallVector<tensor::ExtractSliceOp>
+getAllFusableProducerUses(Operation *untiledOp,
+                          ArrayRef<Operation *> tiledOps) {
   SmallVector<tensor::ExtractSliceOp> sliceOps;
   for (auto tiledOp : llvm::reverse(tiledOps)) {
     for (OpOperand &operand : llvm::reverse(tiledOp->getOpOperands())) {
@@ -480,9 +483,9 @@
   return sliceOps;
 }
 
-FailureOr<IREETileAndFuseResult> tileAndFuseDispatchUsingSCFForOp(
-    RewriterBase &rewriter, TilingInterface op,
-    linalg::LinalgTilingOptions tilingOptions) {
+FailureOr<IREETileAndFuseResult>
+tileAndFuseDispatchUsingSCFForOp(RewriterBase &rewriter, TilingInterface op,
+                                 linalg::LinalgTilingOptions tilingOptions) {
   IREETileAndFuseResult tileAndFuseResult;
   auto fusableProducers = getAllFusableProducers(op);
   // Apply the tiling pattern.
@@ -562,7 +565,8 @@
                                 PatternRewriter &rewriter) const override {
     auto loadOp =
         sliceOp.getSource().getDefiningOp<IREE::Flow::DispatchTensorLoadOp>();
-    if (!loadOp) return failure();
+    if (!loadOp)
+      return failure();
 
     SmallVector<OpFoldResult> combinedOffsets, combinedSizes, combinedStrides;
     if (failed(affine::mergeOffsetsSizesAndStrides(
@@ -591,7 +595,8 @@
   LogicalResult matchAndRewrite(tensor::ExtractSliceOp sliceOp,
                                 PatternRewriter &rewriter) const override {
     auto emptyTensorOp = sliceOp.getSource().getDefiningOp<tensor::EmptyOp>();
-    if (!emptyTensorOp) return failure();
+    if (!emptyTensorOp)
+      return failure();
 
     SmallVector<OpFoldResult> mixedSizes = sliceOp.getMixedSizes();
     if (mixedSizes.size() != sliceOp.getType().getRank()) {
@@ -599,7 +604,8 @@
       rankReducedMixedSizes.reserve(sliceOp.getType().getRank());
       auto droppedDims = sliceOp.getDroppedDims();
       for (auto [index, size] : llvm::enumerate(mixedSizes)) {
-        if (droppedDims.test(index)) continue;
+        if (droppedDims.test(index))
+          continue;
         rankReducedMixedSizes.push_back(size);
       }
       std::swap(mixedSizes, rankReducedMixedSizes);
@@ -610,7 +616,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateTileAndDistributeToWorkgroupsCleanupPatterns(
     RewritePatternSet &patterns, linalg::LinalgTilingOptions options) {
@@ -619,5 +625,5 @@
                   SwapExtractSliceWithTensorEmpty>(context);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/TransformDialectInterpreterPass.cpp b/compiler/src/iree/compiler/Codegen/Common/TransformDialectInterpreterPass.cpp
index 39b11d2..18a102a 100644
--- a/compiler/src/iree/compiler/Codegen/Common/TransformDialectInterpreterPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/TransformDialectInterpreterPass.cpp

@@ -57,7 +57,7 @@
     : public mlir::transform::TransformInterpreterPassBase<
           TransformDialectInterpreterPass,
           iree_compiler::TransformDialectInterpreterBase> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     // TODO: this is only necessary to make registry subset happy when running
     // the lowering to LLVM. The lowering should be changed to stop using the
@@ -122,16 +122,17 @@
   TransformDialectInterpreterPass(const TransformDialectInterpreterPass &pass) =
       default;
 };
-}  // namespace
+} // namespace
 
 namespace mlir {
 namespace iree_compiler {
 /// Create a Transform dialect interpreter pass.
-std::unique_ptr<Pass> createTransformDialectInterpreterPass(
-    llvm::StringRef transformFileName, llvm::StringRef debugPayloadRootTag,
-    llvm::StringRef debugTransformRootTag) {
+std::unique_ptr<Pass>
+createTransformDialectInterpreterPass(llvm::StringRef transformFileName,
+                                      llvm::StringRef debugPayloadRootTag,
+                                      llvm::StringRef debugTransformRootTag) {
   return std::make_unique<TransformDialectInterpreterPass>(
       transformFileName, debugPayloadRootTag, debugTransformRootTag);
 }
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensions.cpp b/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensions.cpp
index 54ae591..7917ecf 100644
--- a/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensions.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensions.cpp

@@ -98,7 +98,8 @@
       opToErase.push_back(op.getOperation());
     }
   });
-  for (Operation *op : opToErase) rewriter.eraseOp(op);
+  for (Operation *op : opToErase)
+    rewriter.eraseOp(op);
 }
 
 //===---------------------------------------------------------------------===//
@@ -132,7 +133,8 @@
 //===---------------------------------------------------------------------===//
 
 static void addOperands(Operation *op, SetVector<Value> &operandSet) {
-  if (!op) return;
+  if (!op)
+    return;
   TypeSwitch<Operation *, void>(op)
       .Case<linalg::LinalgOp>([&](linalg::LinalgOp linalgOp) {
         SmallVector<Value> inputOperands{linalgOp.getDpsInputOperands()};
@@ -146,10 +148,12 @@
 template <int limit = 3>
 static bool setFusedOpOperandLimit(OpOperand *fusedOperand) {
   Operation *producer = fusedOperand->get().getDefiningOp();
-  if (!producer) return false;
+  if (!producer)
+    return false;
   Operation *consumer = fusedOperand->getOwner();
   SetVector<Value> fusedOpOperands;
-  if (producer->getNumResults() != 1) return false;
+  if (producer->getNumResults() != 1)
+    return false;
   addOperands(consumer, fusedOpOperands);
   fusedOpOperands.remove(producer->getResult(0));
   addOperands(producer, fusedOpOperands);
@@ -210,7 +214,7 @@
     return success();
   }
 };
-}  // namespace
+} // namespace
 
 void transform_dialect::ApplyFoldFillIntoPadPatternsOp::populatePatterns(
     RewritePatternSet &patterns) {
@@ -230,7 +234,8 @@
     populatePatterns(RewritePatternSet &patterns) {
   auto unrollOrder = [](Operation *op) -> std::optional<SmallVector<int64_t>> {
     auto contract = dyn_cast<vector::ContractionOp>(op);
-    if (!contract) return std::nullopt;
+    if (!contract)
+      return std::nullopt;
     return mlir::iree_compiler::gpuMmaUnrollOrder(contract);
   };
   vector::populateVectorUnrollPatterns(
@@ -243,8 +248,8 @@
 // ApplyUnrollVectorsGpuWmmaSyncPatternsOp
 //===---------------------------------------------------------------------===//
 
-static std::optional<SmallVector<int64_t>> getGPUTensorCoreNativeWmmaVectorSize(
-    Operation *op) {
+static std::optional<SmallVector<int64_t>>
+getGPUTensorCoreNativeWmmaVectorSize(Operation *op) {
   return getWmmaNativeVectorSize(op);
 }
 
@@ -252,7 +257,8 @@
     populatePatterns(RewritePatternSet &patterns) {
   auto unrollOrder = [](Operation *op) -> std::optional<SmallVector<int64_t>> {
     auto contract = dyn_cast<vector::ContractionOp>(op);
-    if (!contract) return std::nullopt;
+    if (!contract)
+      return std::nullopt;
     return mlir::iree_compiler::gpuMmaUnrollOrder(contract);
   };
   vector::populateVectorUnrollPatterns(
@@ -311,15 +317,18 @@
       if (failed(eliminateCommonSubexpressions(op, /*domInfo=*/nullptr,
                                                &listener)))
         return WalkResult::interrupt();
-      if (listener.failed()) return WalkResult::interrupt();
+      if (listener.failed())
+        return WalkResult::interrupt();
       return WalkResult::skip();
     }
     return WalkResult::advance();
   });
 
-  if (!status.wasInterrupted()) return DiagnosedSilenceableFailure::success();
+  if (!status.wasInterrupted())
+    return DiagnosedSilenceableFailure::success();
 
-  if (listener.failed()) return listener.checkAndResetError();
+  if (listener.failed())
+    return listener.checkAndResetError();
 
   return mlir::emitDefiniteFailure(lastOpVisited, "CSE failed");
 }
@@ -417,7 +426,8 @@
     tensor::ExtractSliceOp extractSliceOp;
     for (Operation *user : toShare.getUsers()) {
       extractSliceOp = dyn_cast<tensor::ExtractSliceOp>(user);
-      if (extractSliceOp) break;
+      if (extractSliceOp)
+        break;
     }
     if (!extractSliceOp) {
       /*return mlir::emitSilenceableFailure(
@@ -432,8 +442,10 @@
     // (i.e., same source/target, offsets, sizes and strides).
     auto isMatchingParallelInsertSlice = [&](Operation &op) {
       auto insertSlice = dyn_cast<tensor::ParallelInsertSliceOp>(&op);
-      if (!insertSlice) return false;
-      if (insertSlice.getDest() != bbArg) return false;
+      if (!insertSlice)
+        return false;
+      if (insertSlice.getDest() != bbArg)
+        return false;
       return llvm::equal(insertSlice.getMixedOffsets(),
                          extractSliceOp.getMixedOffsets()) &&
              llvm::equal(insertSlice.getMixedSizes(),
@@ -574,7 +586,8 @@
 
   IREE::HAL::ExecutableExportOp exportOp;
   state.getTopLevel()->walk([&](IREE::HAL::ExecutableExportOp op) {
-    if (op.getSymName() == target.getName()) exportOp = op;
+    if (op.getSymName() == target.getName())
+      exportOp = op;
   });
   if (!exportOp) {
     return mlir::emitSilenceableFailure(
@@ -585,7 +598,8 @@
   auto walkResult = target->walk([&](scf::ForallOp forallOp) {
     if (forallOp->getParentOfType<scf::ForallOp>())
       return WalkResult::advance();
-    if (topLevelForallOp) return WalkResult::interrupt();
+    if (topLevelForallOp)
+      return WalkResult::interrupt();
     topLevelForallOp = forallOp;
     return WalkResult::advance();
   });
@@ -618,11 +632,11 @@
   transform::modifiesPayload(effects);
 }
 
-DiagnosedSilenceableFailure transform_dialect::
-    IREEPopulateWorkgroupCountRegionUsingNumThreadsSliceOp::applyToOne(
-        transform::TransformRewriter &rewriter, Operation *target,
-        transform::ApplyToEachResultList &results,
-        transform::TransformState &state) {
+DiagnosedSilenceableFailure
+transform_dialect::IREEPopulateWorkgroupCountRegionUsingNumThreadsSliceOp::
+    applyToOne(transform::TransformRewriter &rewriter, Operation *target,
+               transform::ApplyToEachResultList &results,
+               transform::TransformState &state) {
   auto forAllOp = dyn_cast<scf::ForallOp>(target);
   if (!forAllOp) {
     return mlir::emitDefiniteFailure(state.getTopLevel(),
@@ -776,14 +790,16 @@
       hasSharedMemoryAddressSpace(llvm::cast<MemRefType>(to.getType()))) {
     needsBarrier = true;
   }
-  if (needsBarrier) builder.create<gpu::BarrierOp>(loc);
+  if (needsBarrier)
+    builder.create<gpu::BarrierOp>(loc);
   // TODO: ideally we should use linalg.copy which was recently reintroduced
   // as an OpDSL named op. However, IREE-specific patterns to cleanup spurious
   // post-bufferization copies do not trigger properly.
   // So we keep using `createLinalgCopyOp` which builds a GenericOp.
   // builder.create<linalg::CopyOp>(loc, from, to);
   mlir::iree_compiler::createLinalgCopyOp(builder, loc, from, to);
-  if (needsBarrier) builder.create<gpu::BarrierOp>(loc);
+  if (needsBarrier)
+    builder.create<gpu::BarrierOp>(loc);
   return success();
 }
 
@@ -830,7 +846,7 @@
     return success();
   }
 };
-}  // namespace
+} // namespace
 
 DiagnosedSilenceableFailure transform_dialect::IREEBufferizeOp::apply(
     transform::TransformRewriter &rewriter,
@@ -840,9 +856,8 @@
       !isa<ModuleOp, HAL::ExecutableOp, HAL::ExecutableVariantOp>(
           *payload.begin())) {
     return mlir::emitDefiniteFailure(
-        state.getTopLevel(),
-        "requires exactly a single HAL::ExecutableOp or "
-        "HAL::ExecutableVariantOp target op.");
+        state.getTopLevel(), "requires exactly a single HAL::ExecutableOp or "
+                             "HAL::ExecutableVariantOp target op.");
   }
 
   //===-------------------------------------------------------------------===//
@@ -875,7 +890,8 @@
     // overloads only accepts ops that are isolated from above.
     SmallVector<Operation *> ops;
     state.getTopLevel()->walk([&](Operation *nestedOp) {
-      if (state.getTopLevel() != nestedOp) ops.push_back(nestedOp);
+      if (state.getTopLevel() != nestedOp)
+        ops.push_back(nestedOp);
     });
     LogicalResult result =
         applyOpPatternsAndFold(ops, std::move(patterns), config);
@@ -883,7 +899,8 @@
       return mlir::emitDefiniteFailure(state.getTopLevel(),
                                        "greedy pattern application failed");
     }
-    if (listener.failed()) return listener.checkAndResetError();
+    if (listener.failed())
+      return listener.checkAndResetError();
   }
 
   //   2. Run one-shot-bufferize, without the pass baggage.

diff --git a/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensions.h b/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensions.h
index 718da72..b80772d 100644
--- a/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensions.h
+++ b/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensions.h

@@ -16,19 +16,19 @@
 
 namespace func {
 class FuncOp;
-}  // namespace func
+} // namespace func
 
 namespace scf {
 class ForallOp;
-}  // namespace scf
+} // namespace scf
 
 namespace transform {
 // Types needed for builders.
 struct TileSizesSpec;
 struct NumThreadsSpec;
 class TransformTypeInterface;
-}  // namespace transform
-}  // namespace mlir
+} // namespace transform
+} // namespace mlir
 
 #define GET_OP_CLASSES
 #include "iree/compiler/Codegen/Common/TransformExtensions/CommonExtensionsOps.h.inc"
@@ -45,12 +45,12 @@
 /// Hook to register common transformations to the transform dialect.
 class CommonExtensions
     : public transform::TransformDialectExtension<CommonExtensions> {
- public:
+public:
   CommonExtensions();
 };
-}  // namespace transform_dialect
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace transform_dialect
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_COMMON_TRANSFORMEXTENSIONS_COMMONEXTENSIONS_H_
+#endif // IREE_COMPILER_CODEGEN_COMMON_TRANSFORMEXTENSIONS_COMMONEXTENSIONS_H_

diff --git a/compiler/src/iree/compiler/Codegen/Common/Transforms.h b/compiler/src/iree/compiler/Codegen/Common/Transforms.h
index 453f49a..20d4f9d 100644
--- a/compiler/src/iree/compiler/Codegen/Common/Transforms.h
+++ b/compiler/src/iree/compiler/Codegen/Common/Transforms.h

@@ -16,7 +16,7 @@
 namespace mlir {
 namespace bufferization {
 struct OneShotBufferizationOptions;
-}  // namespace bufferization
+} // namespace bufferization
 
 namespace iree_compiler {
 
@@ -26,8 +26,9 @@
     const bufferization::OneShotBufferizationOptions &options);
 
 /// Bufferizes the given op with One-Shot Bufferize.
-LogicalResult runIREEOneShotBufferize(
-    Operation *op, const IREEOneShotBufferizationOptions &options);
+LogicalResult
+runIREEOneShotBufferize(Operation *op,
+                        const IREEOneShotBufferizationOptions &options);
 
 /// For a given operation within a dispatch, tile and distribute the operation
 /// to workgroups as well as tile + fuse its producers. Returns the
@@ -38,16 +39,16 @@
   SmallVector<Value> workgroupCount;
 };
 
-FailureOr<IREETileAndFuseResult> tileAndFuseDispatchUsingSCFForOp(
-    RewriterBase &rewriter, TilingInterface op,
-    linalg::LinalgTilingOptions tilingOptions);
+FailureOr<IREETileAndFuseResult>
+tileAndFuseDispatchUsingSCFForOp(RewriterBase &rewriter, TilingInterface op,
+                                 linalg::LinalgTilingOptions tilingOptions);
 
 /// Populate patterns related to clean up the IR after tile and distribute to
 /// workgroups.
 void populateTileAndDistributeToWorkgroupsCleanupPatterns(
     RewritePatternSet &patterns, linalg::LinalgTilingOptions options);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_COMMON_TRANSFORMS_H_
+#endif // IREE_COMPILER_CODEGEN_COMMON_TRANSFORMS_H_

diff --git a/compiler/src/iree/compiler/Codegen/Common/TypePropagationPass.cpp b/compiler/src/iree/compiler/Codegen/Common/TypePropagationPass.cpp
index 2b10f5a..16d2c9a 100644
--- a/compiler/src/iree/compiler/Codegen/Common/TypePropagationPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/TypePropagationPass.cpp

@@ -44,7 +44,8 @@
 static Value convertElementType(OpBuilder &b, Location loc, Type targetType,
                                 Value source) {
   Type sourceType = source.getType();
-  if (sourceType == targetType) return source;
+  if (sourceType == targetType)
+    return source;
   if (llvm::isa<IntegerType>(sourceType) &&
       llvm::isa<IntegerType>(targetType)) {
     unsigned sourceBitWidth = sourceType.getIntOrFloatBitWidth();
@@ -65,7 +66,8 @@
     Type elementType = shapedType.getElementType();
     std::optional<Type> legalizedElementType =
         legalizeStorageElementType(elementType);
-    if (!legalizedElementType) return std::nullopt;
+    if (!legalizedElementType)
+      return std::nullopt;
     return RankedTensorType::get(shapedType.getShape(),
                                  legalizedElementType.value(),
                                  shapedType.getEncoding());
@@ -80,7 +82,8 @@
   TypePropagationTypeConverter() {
     addConversion([](Type t) {
       auto convertedType = getLegalizedType(t);
-      if (!convertedType) return t;
+      if (!convertedType)
+        return t;
       return convertedType.value();
     });
   }
@@ -99,9 +102,9 @@
     : public TypePropagationPattern<arith::ConstantOp> {
   using TypePropagationPattern<arith::ConstantOp>::TypePropagationPattern;
 
-  LogicalResult matchAndRewrite(
-      arith::ConstantOp constantOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const final {
+  LogicalResult
+  matchAndRewrite(arith::ConstantOp constantOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
     auto attr = llvm::cast<DenseElementsAttr>(constantOp.getValue());
     auto attrType = llvm::dyn_cast<ShapedType>(attr.getType());
     if (!attrType) {
@@ -141,9 +144,9 @@
 struct NamedOpTypePropagation : public TypePropagationPattern<OpTy> {
   using TypePropagationPattern<OpTy>::TypePropagationPattern;
 
-  LogicalResult matchAndRewrite(
-      OpTy namedOp, typename OpTy::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const final {
+  LogicalResult
+  matchAndRewrite(OpTy namedOp, typename OpTy::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
     SmallVector<Type> resultTypes;
     resultTypes.reserve(namedOp->getNumResults());
     for (auto resultType : namedOp->getResultTypes()) {
@@ -166,9 +169,9 @@
     : public TypePropagationPattern<linalg::GenericOp> {
   using TypePropagationPattern<linalg::GenericOp>::TypePropagationPattern;
 
-  LogicalResult matchAndRewrite(
-      linalg::GenericOp genericOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const final {
+  LogicalResult
+  matchAndRewrite(linalg::GenericOp genericOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
     llvm::SmallSetVector<unsigned, 8> modifiedOperandIndex;
     SmallVector<Type> resultTypes;
 
@@ -287,9 +290,9 @@
     : public TypePropagationPattern<linalg::FillOp> {
   using TypePropagationPattern<linalg::FillOp>::TypePropagationPattern;
 
-  LogicalResult matchAndRewrite(
-      linalg::FillOp fillOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const final {
+  LogicalResult
+  matchAndRewrite(linalg::FillOp fillOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
     Value value = adaptor.getInputs().front();
     std::optional<Type> legalizedElementType =
         legalizeStorageElementType(value.getType());
@@ -309,9 +312,9 @@
     : public TypePropagationPattern<tensor::ExtractOp> {
   using TypePropagationPattern<tensor::ExtractOp>::TypePropagationPattern;
 
-  LogicalResult matchAndRewrite(
-      tensor::ExtractOp extractOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const final {
+  LogicalResult
+  matchAndRewrite(tensor::ExtractOp extractOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
     Location loc = extractOp.getLoc();
     Value newExtract = rewriter.create<tensor::ExtractOp>(
         loc, adaptor.getTensor(), adaptor.getIndices());
@@ -327,9 +330,9 @@
     : TypePropagationPattern<IREE::LinalgExt::ScatterOp> {
   using TypePropagationPattern<
       IREE::LinalgExt::ScatterOp>::TypePropagationPattern;
-  LogicalResult matchAndRewrite(
-      IREE::LinalgExt::ScatterOp scatterOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const final {
+  LogicalResult
+  matchAndRewrite(IREE::LinalgExt::ScatterOp scatterOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
     auto opOperands = scatterOp->getOpOperands();
     Type inputType = opOperands[0].get().getType();
     Type legalizedInputType = this->getTypeConverter()->convertType(inputType);
@@ -412,9 +415,9 @@
 struct ForwardSourceType : public TypePropagationPattern<OpTy> {
   using TypePropagationPattern<OpTy>::TypePropagationPattern;
 
-  LogicalResult matchAndRewrite(
-      OpTy op, typename OpTy::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const final {
+  LogicalResult
+  matchAndRewrite(OpTy op, typename OpTy::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
     if (op->getNumResults() != 1 || adaptor.getOperands().size() != 1) {
       return rewriter.notifyMatchFailure(
           op, "unhandled op with multiple operands/results");
@@ -433,9 +436,9 @@
       : ConversionPattern(typeConverter, MatchAnyOpTypeTag(), /*benefit=*/1,
                           context) {}
 
-  LogicalResult matchAndRewrite(
-      Operation *op, ArrayRef<Value> convertedOperands,
-      ConversionPatternRewriter &rewriter) const final {
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> convertedOperands,
+                  ConversionPatternRewriter &rewriter) const final {
     if (op->getNumSuccessors()) {
       return rewriter.notifyMatchFailure(op, "unhandled ops with successors");
     }
@@ -519,11 +522,11 @@
     }
   }
 };
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>> createTypePropagationPass() {
   return std::make_unique<TypePropagationPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/UserConfig.cpp b/compiler/src/iree/compiler/Codegen/Common/UserConfig.cpp
index 64c8753..ec6e7bc 100644
--- a/compiler/src/iree/compiler/Codegen/Common/UserConfig.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/UserConfig.cpp

@@ -10,9 +10,9 @@
 namespace iree_compiler {
 
 /// Propagate the configuration annotated in the incoming IR.
-LogicalResult setUserConfig(
-    func::FuncOp entryPointFn, Operation *computeOp,
-    IREE::Codegen::CompilationInfoAttr compilationInfo) {
+LogicalResult
+setUserConfig(func::FuncOp entryPointFn, Operation *computeOp,
+              IREE::Codegen::CompilationInfoAttr compilationInfo) {
   if (auto translationInfo = getTranslationInfo(entryPointFn)) {
     return computeOp->emitOpError(
         "multiple ops within dispatch trying to set the translation "
@@ -20,7 +20,8 @@
   }
 
   auto info = compilationInfo.getTranslationInfo();
-  if (failed(setTranslationInfo(entryPointFn, info))) return failure();
+  if (failed(setTranslationInfo(entryPointFn, info)))
+    return failure();
 
   SmallVector<int64_t> workgroupSize = compilationInfo.getWorkgroupSizeVals();
   std::optional<int64_t> subgroupSize = compilationInfo.getSubgroupSize();
@@ -33,5 +34,5 @@
   return success();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/UserConfig.h b/compiler/src/iree/compiler/Codegen/Common/UserConfig.h
index ef22f00..8737ddb 100644
--- a/compiler/src/iree/compiler/Codegen/Common/UserConfig.h
+++ b/compiler/src/iree/compiler/Codegen/Common/UserConfig.h

@@ -13,5 +13,5 @@
 LogicalResult setUserConfig(func::FuncOp entryPointFn, Operation *computeOp,
                             IREE::Codegen::CompilationInfoAttr compilationInfo);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/VectorizePad.cpp b/compiler/src/iree/compiler/Codegen/Common/VectorizePad.cpp
index acd4914..355bca1 100644
--- a/compiler/src/iree/compiler/Codegen/Common/VectorizePad.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/VectorizePad.cpp

@@ -33,7 +33,8 @@
                              Location loc) {
   IntegerAttr attr;
   if (Value val = attrOrValue.dyn_cast<Value>()) {
-    if (val.getType().isIndex()) return val;
+    if (val.getType().isIndex())
+      return val;
     matchPattern(val, m_Constant(&attr));
   } else {
     attr = llvm::cast<IntegerAttr>(attrOrValue.get<Attribute>());
@@ -82,11 +83,13 @@
                                 PatternRewriter &rewriter) const override {
     // Static result shape is needed to reading padded dimensions in an
     // unrolled manner.
-    if (!padOp.getType().hasStaticShape()) return failure();
+    if (!padOp.getType().hasStaticShape())
+      return failure();
 
     // Only support constant padding value cases.
     Value paddingValue = padOp.getConstantPaddingValue();
-    if (!paddingValue) return failure();
+    if (!paddingValue)
+      return failure();
     Attribute paddingAttr;
     if (!matchPattern(paddingValue, m_Constant(&paddingAttr))) {
       return failure();
@@ -123,7 +126,8 @@
     SmallVector<Value> paddedDimLBs(tensorRank);
     SmallVector<Value> paddedDimUBs(tensorRank);
     for (int i = 0; i < tensorRank; ++i) {
-      if (isConstantZero(lowPads[i]) && isConstantZero(highPads[i])) continue;
+      if (isConstantZero(lowPads[i]) && isConstantZero(highPads[i]))
+        continue;
 
       paddedDimIndices.push_back(i);
       auto srcDimSize =
@@ -142,7 +146,8 @@
         loc, SplatElementsAttr::get(fullVectorType, {paddingAttr}));
 
     auto sliceVectorShape = llvm::to_vector(paddedTensorShape);
-    for (int dim : paddedDimIndices) sliceVectorShape[dim] = 1;
+    for (int dim : paddedDimIndices)
+      sliceVectorShape[dim] = 1;
     auto sliceVectorType =
         VectorType::get(dropLeadingOne(sliceVectorShape), elementType);
     Value cstSliceVector = rewriter.createOrFold<arith::ConstantOp>(
@@ -151,7 +156,8 @@
     // Calculate the total count of all padded dimensions. We need to generate
     // vector read ops with scf.if guards for each of them.
     int totalCount = 1;
-    for (int dim : paddedDimIndices) totalCount *= paddedTensorShape[dim];
+    for (int dim : paddedDimIndices)
+      totalCount *= paddedTensorShape[dim];
 
     auto zeroIndex = rewriter.createOrFold<arith::ConstantIndexOp>(loc, 0);
     auto trueAttr = rewriter.getBoolAttr(true);
@@ -243,7 +249,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateVectorizePadPatterns(RewritePatternSet &patterns,
                                   PatternBenefit baseBenefit) {
@@ -254,5 +260,5 @@
   return std::make_unique<TensorToVectorVectorizePadPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Common/WorkgroupSpecializationPass.cpp b/compiler/src/iree/compiler/Codegen/Common/WorkgroupSpecializationPass.cpp
index edf6338..86b03e9 100644
--- a/compiler/src/iree/compiler/Codegen/Common/WorkgroupSpecializationPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/WorkgroupSpecializationPass.cpp

@@ -49,8 +49,8 @@
     "iree-codegen-enable-workgroup-specialization",
     llvm::cl::desc("Enable workgroup specialization."), llvm::cl::init(true));
 
-static std::optional<int64_t> getConstantLowerBound(
-    affine::AffineMinOp affineMinOp) {
+static std::optional<int64_t>
+getConstantLowerBound(affine::AffineMinOp affineMinOp) {
   for (AffineExpr expr : affineMinOp.getMap().getResults()) {
     if (auto cst = expr.dyn_cast<AffineConstantExpr>()) {
       return cst.getValue();
@@ -112,7 +112,7 @@
   builder.setInsertionPointAfter(minSizeOps.back());
   // create a condition for scf.if
   Value cond;
-  SmallVector<Value> constantOps;  // ConstantIndexOps for tile sizes
+  SmallVector<Value> constantOps; // ConstantIndexOps for tile sizes
   for (unsigned i = 0, e = minSizeOps.size(); i != e; ++i) {
     affine::AffineMinOp minOp = minSizeOps[i];
     int64_t lowerBound = *getConstantLowerBound(minOp);
@@ -130,7 +130,7 @@
 
   // Transfer the original body to the scf.else body.
   auto origBodyBegin = ++Block::iterator(ifOp);
-  auto origBodyEnd = --block->end();  // yield
+  auto origBodyEnd = --block->end(); // yield
 
   Block *elseBlock = ifOp.elseBlock();
   elseBlock->getOperations().splice(elseBlock->begin(), block->getOperations(),
@@ -159,18 +159,19 @@
   }
 
   void runOnOperation() override {
-    if (!clEnableWorkgroupSpecialization) return;
+    if (!clEnableWorkgroupSpecialization)
+      return;
 
     func::FuncOp funcOp = getOperation();
     specializeFunction(funcOp);
   }
 };
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>>
 createWorkgroupSpecializationPass() {
   return std::make_unique<WorkgroupSpecializationPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Dialect/IREECodegenDialect.cpp b/compiler/src/iree/compiler/Codegen/Dialect/IREECodegenDialect.cpp
index 6f6b58a..6d8988b 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/IREECodegenDialect.cpp
+++ b/compiler/src/iree/compiler/Codegen/Dialect/IREECodegenDialect.cpp

@@ -48,7 +48,7 @@
       >();
 }
 
-}  // namespace Codegen
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Codegen
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Dialect/IREECodegenDialect.h b/compiler/src/iree/compiler/Codegen/Dialect/IREECodegenDialect.h
index 456b839..c0f7db1 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/IREECodegenDialect.h
+++ b/compiler/src/iree/compiler/Codegen/Dialect/IREECodegenDialect.h

@@ -11,7 +11,7 @@
 #include "mlir/IR/OpDefinition.h"
 
 // clang-format off: must be included after all LLVM/MLIR eaders
-#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h.inc"  // IWYU pragma: keep
+#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h.inc" // IWYU pragma: keep
 // clang-format on
 
 namespace mlir {
@@ -19,7 +19,7 @@
 
 void registerUKernelBufferizationInterface(DialectRegistry &registry);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_DIALECT_IREECODEGEN_DIALECT_H_
+#endif // IREE_COMPILER_CODEGEN_DIALECT_IREECODEGEN_DIALECT_H_

diff --git a/compiler/src/iree/compiler/Codegen/Dialect/IREECodegenOps.cpp b/compiler/src/iree/compiler/Codegen/Dialect/IREECodegenOps.cpp
index f917e9a..191bd43 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/IREECodegenOps.cpp
+++ b/compiler/src/iree/compiler/Codegen/Dialect/IREECodegenOps.cpp

@@ -26,7 +26,7 @@
 namespace mlir {
 namespace iree_compiler {
 namespace IREE {
-namespace Codegen {}  // namespace Codegen
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+namespace Codegen {} // namespace Codegen
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Dialect/IREECodegenOps.h b/compiler/src/iree/compiler/Codegen/Dialect/IREECodegenOps.h
index 926e576..958bd6a 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/IREECodegenOps.h
+++ b/compiler/src/iree/compiler/Codegen/Dialect/IREECodegenOps.h

@@ -18,4 +18,4 @@
 #include "iree/compiler/Codegen/Dialect/IREECodegenOps.h.inc" // IWYU pragma: export
 // clang-format on
 
-#endif  // #ifndef IREE_COMPILER_CODEGEN_DIALECT_IREECODEGENOPS_H_
+#endif // #ifndef IREE_COMPILER_CODEGEN_DIALECT_IREECODEGENOPS_H_

diff --git a/compiler/src/iree/compiler/Codegen/Dialect/LoweringConfig.cpp b/compiler/src/iree/compiler/Codegen/Dialect/LoweringConfig.cpp
index 6ae0285..6b56930 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/LoweringConfig.cpp
+++ b/compiler/src/iree/compiler/Codegen/Dialect/LoweringConfig.cpp

@@ -58,7 +58,8 @@
 /// Assumes that `arrayAttr` is a list of `IntegerAttr`s and returns the values
 /// in these attributes as a vector.
 static SmallVector<int64_t> getIntegerVals(ArrayAttr arrayAttr) {
-  if (!arrayAttr) return {};
+  if (!arrayAttr)
+    return {};
   SmallVector<int64_t> values(arrayAttr.size());
   for (auto [index, attr] : llvm::enumerate(arrayAttr)) {
     values[index] = llvm::cast<IntegerAttr>(attr).getInt();
@@ -125,7 +126,8 @@
 
 TileSizesListType LoweringConfigAttr::getTileSizeVals() {
   auto tileSizesAttr = getTileSizes();
-  if (!tileSizesAttr) return {};
+  if (!tileSizesAttr)
+    return {};
   TileSizesListType tileSizes;
   for (auto attr : tileSizesAttr) {
     auto vals = getIntegerVals(llvm::cast<ArrayAttr>(attr));
@@ -136,26 +138,30 @@
 
 SmallVector<int64_t> LoweringConfigAttr::getTileSizeVals(unsigned level) {
   ArrayAttr tileSizesAttr = getTileSizes();
-  if (!tileSizesAttr || tileSizesAttr.size() <= level) return {};
+  if (!tileSizesAttr || tileSizesAttr.size() <= level)
+    return {};
   return getIntegerVals(llvm::cast<ArrayAttr>(tileSizesAttr[level]));
 }
 
-SmallVector<int64_t> LoweringConfigAttr::getTileInterchangeVals(
-    unsigned level) {
+SmallVector<int64_t>
+LoweringConfigAttr::getTileInterchangeVals(unsigned level) {
   ArrayAttr tileInterchangeAttr = getTileInterchange();
-  if (!tileInterchangeAttr || tileInterchangeAttr.size() <= level) return {};
+  if (!tileInterchangeAttr || tileInterchangeAttr.size() <= level)
+    return {};
   return getIntegerVals(llvm::cast<ArrayAttr>(tileInterchangeAttr[level]));
 }
 
 SmallVector<int64_t> LoweringConfigAttr::getNativeVectorSizeVals() {
   ArrayAttr nativeVectorSizeAttr = getNativeVectorSize();
-  if (!nativeVectorSizeAttr) return {};
+  if (!nativeVectorSizeAttr)
+    return {};
   return getIntegerVals(nativeVectorSizeAttr);
 }
 
-LogicalResult LoweringConfigAttr::verify(
-    function_ref<InFlightDiagnostic()> emitError, ArrayAttr tileSizes,
-    ArrayAttr tileInterchange, ArrayAttr nativeVectorSize) {
+LogicalResult
+LoweringConfigAttr::verify(function_ref<InFlightDiagnostic()> emitError,
+                           ArrayAttr tileSizes, ArrayAttr tileInterchange,
+                           ArrayAttr nativeVectorSize) {
   if (!tileSizes) {
     return emitError() << "expected tile_sizes to be specified (even is "
                           "specified as empty)";
@@ -187,10 +193,11 @@
 // iree.compilation_info
 //===----------------------------------------------------------------------===//
 
-CompilationInfoAttr CompilationInfoAttr::get(
-    MLIRContext *context, LoweringConfigAttr configAttr,
-    TranslationInfoAttr translationInfo, ArrayRef<int64_t> workgroupSize,
-    std::optional<int64_t> subgroupSize) {
+CompilationInfoAttr
+CompilationInfoAttr::get(MLIRContext *context, LoweringConfigAttr configAttr,
+                         TranslationInfoAttr translationInfo,
+                         ArrayRef<int64_t> workgroupSize,
+                         std::optional<int64_t> subgroupSize) {
   ArrayAttr workgroupSizeAttr = getI64IntegerArrayAttr(context, workgroupSize);
   return get(context, configAttr, translationInfo, workgroupSizeAttr,
              subgroupSize);
@@ -228,7 +235,8 @@
 
 SmallVector<int64_t> CompilationInfoAttr::getWorkgroupSizeVals() {
   ArrayAttr workgroupSizeAttr = getWorkgroupSize();
-  if (!workgroupSizeAttr) return {};
+  if (!workgroupSizeAttr)
+    return {};
   return getIntegerVals(workgroupSizeAttr);
 }
 
@@ -239,20 +247,20 @@
 void IREECodegenDialect::initializeCodegenAttrs() {
   addAttributes<
 #define GET_ATTRDEF_LIST
-#include "iree/compiler/Codegen/Dialect/LoweringConfig.cpp.inc"  // IWYU pragma: keeep
+#include "iree/compiler/Codegen/Dialect/LoweringConfig.cpp.inc" // IWYU pragma: keeep
       >();
 }
 
-}  // namespace Codegen
-}  // namespace IREE
+} // namespace Codegen
+} // namespace IREE
 
 //===----------------------------------------------------------------------===//
 // Helpers for getting/setting iree_codegen.translation_info attribute on the
 // `hal.executable.export`
 // ===----------------------------------------------------------------------===//
 
-IREE::Codegen::TranslationInfoAttr getTranslationInfo(
-    IREE::HAL::ExecutableExportOp exportOp) {
+IREE::Codegen::TranslationInfoAttr
+getTranslationInfo(IREE::HAL::ExecutableExportOp exportOp) {
   return exportOp->getAttrOfType<IREE::Codegen::TranslationInfoAttr>(
       kTranslationInfoAttrName);
 }
@@ -276,7 +284,8 @@
                                 ArrayRef<int64_t> workgroupSize,
                                 std::optional<int64_t> subgroupSize) {
   FailureOr<IREE::HAL::ExecutableExportOp> exportOp = getEntryPoint(entryPoint);
-  if (failed(exportOp)) return failure();
+  if (failed(exportOp))
+    return failure();
   MLIRContext *context = exportOp->getContext();
   if (!workgroupSize.empty()) {
     auto attr = getIndexIntegerArrayAttr(context, workgroupSize);
@@ -288,11 +297,12 @@
   return success();
 }
 
-LogicalResult setTranslationInfo(
-    func::FuncOp entryPoint,
-    IREE::Codegen::TranslationInfoAttr translationInfo) {
+LogicalResult
+setTranslationInfo(func::FuncOp entryPoint,
+                   IREE::Codegen::TranslationInfoAttr translationInfo) {
   FailureOr<IREE::HAL::ExecutableExportOp> exportOp = getEntryPoint(entryPoint);
-  if (failed(exportOp)) return failure();
+  if (failed(exportOp))
+    return failure();
   exportOp.value()->setAttr(kTranslationInfoAttrName, translationInfo);
   return success();
 }
@@ -302,10 +312,11 @@
 // operations.
 // ===----------------------------------------------------------------------===//
 
-FailureOr<Operation *> getLoweringConfigCarryingOp(
-    ArrayRef<Operation *> computeOps) {
+FailureOr<Operation *>
+getLoweringConfigCarryingOp(ArrayRef<Operation *> computeOps) {
   for (Operation *op : computeOps) {
-    if (getLoweringConfig(op)) return op;
+    if (getLoweringConfig(op))
+      return op;
   }
   return failure();
 }
@@ -314,16 +325,18 @@
   return op->getAttrOfType<IREE::Codegen::LoweringConfigAttr>(kConfigAttrName);
 }
 
-FailureOr<IREE::Codegen::LoweringConfigAttr> getLoweringConfig(
-    ArrayRef<Operation *> computeOps) {
+FailureOr<IREE::Codegen::LoweringConfigAttr>
+getLoweringConfig(ArrayRef<Operation *> computeOps) {
   FailureOr<Operation *> op = getLoweringConfigCarryingOp(computeOps);
-  if (failed(op)) return failure();
+  if (failed(op))
+    return failure();
   return getLoweringConfig(*op);
 }
 
 SmallVector<int64_t> getTileSizes(Operation *op, unsigned level) {
   IREE::Codegen::LoweringConfigAttr configAttr = getLoweringConfig(op);
-  if (!configAttr) return {};
+  if (!configAttr)
+    return {};
   return configAttr.getTileSizeVals(level);
 }
 SmallVector<Value> getTileSizes(OpBuilder &b, Operation *op, unsigned level) {
@@ -334,7 +347,8 @@
 
 unsigned getNumTileLevels(Operation *op) {
   IREE::Codegen::LoweringConfigAttr configAttr = getLoweringConfig(op);
-  if (!configAttr) return 0;
+  if (!configAttr)
+    return 0;
   return configAttr.getTileSizes().size();
 }
 
@@ -362,5 +376,5 @@
   op->removeAttr(kCompilationInfoAttrName);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Dialect/LoweringConfig.h b/compiler/src/iree/compiler/Codegen/Dialect/LoweringConfig.h
index 1da8bd4..869618a 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/LoweringConfig.h
+++ b/compiler/src/iree/compiler/Codegen/Dialect/LoweringConfig.h

@@ -27,8 +27,8 @@
 /// Typedef for tile sizes to use at different levels of tiling.
 using TileSizesListType = SmallVector<SmallVector<int64_t>>;
 using TileSizesListTypeRef = ArrayRef<SmallVector<int64_t>>;
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
 // clang-format off
 #include "iree/compiler/Codegen/Dialect/LoweringConfigEnums.h.inc"
@@ -46,14 +46,15 @@
 /// Gets the translate executable info attribute value associated with
 /// `exportOp`. It expects that the attribute is stored using the identifier
 /// `translation_info`.
-IREE::Codegen::TranslationInfoAttr getTranslationInfo(
-    IREE::HAL::ExecutableExportOp exportOp);
+IREE::Codegen::TranslationInfoAttr
+getTranslationInfo(IREE::HAL::ExecutableExportOp exportOp);
 /// Returns the translation info for the `funcOp` (by looking at the entry
 /// point). Returns `nullptr` on failure.
-inline IREE::Codegen::TranslationInfoAttr getTranslationInfo(
-    func::FuncOp funcOp) {
+inline IREE::Codegen::TranslationInfoAttr
+getTranslationInfo(func::FuncOp funcOp) {
   FailureOr<IREE::HAL::ExecutableExportOp> exportOp = getEntryPoint(funcOp);
-  if (failed(exportOp)) return nullptr;
+  if (failed(exportOp))
+    return nullptr;
   return getTranslationInfo(*exportOp);
 }
 
@@ -77,9 +78,9 @@
 /// Sets and overwites the translate executable info for the given entry point.
 /// Returns failure if the given entry point is not exported via
 /// hal.executable.export.
-LogicalResult setTranslationInfo(
-    func::FuncOp entryPoint,
-    IREE::Codegen::TranslationInfoAttr translationInfo);
+LogicalResult
+setTranslationInfo(func::FuncOp entryPoint,
+                   IREE::Codegen::TranslationInfoAttr translationInfo);
 
 //===----------------------------------------------------------------------===//
 // Helpers for getting/setting `iree_codegen.lowering_config` attribute on root
@@ -91,8 +92,8 @@
 ///
 /// This scans ops in top-down order and the first one carrying the attribute
 /// will be returned.
-FailureOr<Operation *> getLoweringConfigCarryingOp(
-    ArrayRef<Operation *> computeOps);
+FailureOr<Operation *>
+getLoweringConfigCarryingOp(ArrayRef<Operation *> computeOps);
 
 /// Returns the lowering configuration set for an operation. Returns `nullptr`
 /// if no value is set.  It expects that the attribute is stored using the
@@ -104,8 +105,8 @@
 ///
 /// This scans ops in top-down order and the first one carrying the attribute
 /// will be returned.
-FailureOr<IREE::Codegen::LoweringConfigAttr> getLoweringConfig(
-    ArrayRef<Operation *> computeOps);
+FailureOr<IREE::Codegen::LoweringConfigAttr>
+getLoweringConfig(ArrayRef<Operation *> computeOps);
 
 /// Returns the tile sizes for a particular operation if the
 /// `iree_codegen.lowering_config` attribute is set on it.
@@ -160,7 +161,7 @@
 /// operation.
 void eraseCompilationInfo(Operation *op);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CONVERSION_COMMON_LOWERINGCONFIG_H_
+#endif // IREE_COMPILER_CONVERSION_COMMON_LOWERINGCONFIG_H_

diff --git a/compiler/src/iree/compiler/Codegen/Dialect/UKernelOps.cpp b/compiler/src/iree/compiler/Codegen/Dialect/UKernelOps.cpp
index 4addd65..f2bf272 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/UKernelOps.cpp
+++ b/compiler/src/iree/compiler/Codegen/Dialect/UKernelOps.cpp

@@ -35,10 +35,11 @@
 
 /// Helper method to generate a function declaration at a module scope,
 /// and a call to that function
-static FailureOr<func::CallOp> createFunctionCall(
-    RewriterBase &rewriter, Operation *op, StringRef fnName,
-    TypeRange callArgumentTypes, TypeRange callReturnTypes,
-    ValueRange callOperands, ArrayRef<NamedAttribute> fnDefAttrs) {
+static FailureOr<func::CallOp>
+createFunctionCall(RewriterBase &rewriter, Operation *op, StringRef fnName,
+                   TypeRange callArgumentTypes, TypeRange callReturnTypes,
+                   ValueRange callOperands,
+                   ArrayRef<NamedAttribute> fnDefAttrs) {
   FunctionType functionType =
       rewriter.getFunctionType(callArgumentTypes, callReturnTypes);
 
@@ -181,14 +182,14 @@
   return {static_cast<int64_t>(pos), static_cast<int64_t>(pos + size)};
 }
 
-FailureOr<func::CallOp> UKernelGenericOp::lowerToFunctionCall(
-    RewriterBase &rewriter) {
+FailureOr<func::CallOp>
+UKernelGenericOp::lowerToFunctionCall(RewriterBase &rewriter) {
   return lowerUKernelGenericToFunctionCall(rewriter, *this, getUKernelFnName(),
                                            getStridedOuterDimsAttr());
 }
 
-}  // namespace Codegen
-}  // namespace IREE
+} // namespace Codegen
+} // namespace IREE
 
 //===---------------------------------------------------------------------===//
 // Register bufferization interface.
@@ -199,9 +200,9 @@
 struct UKernelOpsBufferizationInterface
     : public bufferization::DstBufferizableOpInterfaceExternalModel<
           UKernelOpsBufferizationInterface<OpTy>, OpTy> {
-  LogicalResult bufferize(
-      Operation *op, RewriterBase &rewriter,
-      const bufferization::BufferizationOptions &options) const {
+  LogicalResult
+  bufferize(Operation *op, RewriterBase &rewriter,
+            const bufferization::BufferizationOptions &options) const {
     // TODO: Handle operations with regions if needed.
     if (op->getNumRegions() != 0) {
       op->emitOpError(
@@ -229,7 +230,8 @@
     // Ignore all result types that are tensor types.
     SmallVector<Type> resultTypes;
     for (auto resultType : op->getResultTypes()) {
-      if (llvm::isa<RankedTensorType>(resultType)) continue;
+      if (llvm::isa<RankedTensorType>(resultType))
+        continue;
       resultTypes.push_back(resultType);
     }
 
@@ -251,7 +253,7 @@
      ...);
   }
 };
-}  // namespace
+} // namespace
 
 void registerUKernelBufferizationInterface(DialectRegistry &registry) {
   registry.addExtension(
@@ -263,5 +265,5 @@
       });
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Dialect/UKernelOps.h b/compiler/src/iree/compiler/Codegen/Dialect/UKernelOps.h
index ea662f5..c6b610e 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/UKernelOps.h
+++ b/compiler/src/iree/compiler/Codegen/Dialect/UKernelOps.h

@@ -18,4 +18,4 @@
 #include "iree/compiler/Codegen/Dialect/UKernelOps.h.inc" // IWYU pragma: export
 // clang-format on
 
-#endif  // #ifndef IREE_COMPILER_CODEGEN_DIALECT_UKERNELOPS_H_
+#endif // #ifndef IREE_COMPILER_CODEGEN_DIALECT_UKERNELOPS_H_

diff --git a/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp b/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp
index 9a02b15..18ac7b8 100644
--- a/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp
+++ b/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp

@@ -62,9 +62,10 @@
   return strides;
 }
 
-static MemRefType getMemrefTypeForTensor(
-    IREE::Flow::DispatchTensorType tensorType,
-    MemRefLayoutAttrInterface layout = {}, Attribute memorySpace = {}) {
+static MemRefType
+getMemrefTypeForTensor(IREE::Flow::DispatchTensorType tensorType,
+                       MemRefLayoutAttrInterface layout = {},
+                       Attribute memorySpace = {}) {
   return MemRefType::get(tensorType.getShape(),
                          tensorType.getBoundElementType(), layout, memorySpace);
 }
@@ -74,8 +75,9 @@
 // TODO(#12933): Because of regressions in CUDA backend, there is an
 // option to keep a legacy mode of not representing the offset in the
 // type. Remove once the bug is fixed.
-static Value findOrCreateSubspanBuffer(
-    RewriterBase &rewriter, IREE::HAL::InterfaceBindingSubspanOp subspanOp) {
+static Value
+findOrCreateSubspanBuffer(RewriterBase &rewriter,
+                          IREE::HAL::InterfaceBindingSubspanOp subspanOp) {
   // Ensure that this a tensor subspan op.
   auto shapedType = llvm::dyn_cast<IREE::Flow::DispatchTensorType>(
       subspanOp.getResult().getType());
@@ -103,13 +105,16 @@
   // Look for an existing op.
   Block *block = subspanOp->getBlock();
   for (Operation &op : *block) {
-    if (&op == subspanOp.getOperation()) break;
+    if (&op == subspanOp.getOperation())
+      break;
     auto bufferSubspanOp = dyn_cast<IREE::HAL::InterfaceBindingSubspanOp>(&op);
-    if (!bufferSubspanOp) continue;
+    if (!bufferSubspanOp)
+      continue;
 
     auto bufferMemrefType =
         llvm::dyn_cast<MemRefType>(bufferSubspanOp.getResult().getType());
-    if (!bufferMemrefType) continue;
+    if (!bufferMemrefType)
+      continue;
 
     if (bufferSubspanOp.getSet() != subspanOp.getSet() ||
         bufferSubspanOp.getBinding() != subspanOp.getBinding() ||
@@ -211,8 +216,9 @@
     return false;
   }
 
-  bufferization::AliasingOpResultList getAliasingOpResults(
-      Operation *op, OpOperand &opOperand, const AnalysisState &state) const {
+  bufferization::AliasingOpResultList
+  getAliasingOpResults(Operation *op, OpOperand &opOperand,
+                       const AnalysisState &state) const {
     return {};
   }
 
@@ -242,11 +248,12 @@
           storeOp->getLoc(), subviewMemRefType, target,
           storeOp.getMixedOffsets(), storeOp.getMixedSizes(),
           storeOp.getMixedStrides());
-    }  // else: Writing the entire tensor, no subview required.
+    } // else: Writing the entire tensor, no subview required.
 
     auto maybeBuffer =
         getBuffer(rewriter, storeOp->getOpOperand(0).get(), options);
-    if (failed(maybeBuffer)) return failure();
+    if (failed(maybeBuffer))
+      return failure();
     Value srcMemref = *maybeBuffer;
 
     // If everything bufferized inplace, no copy is needed. We wrote to the
@@ -259,7 +266,7 @@
     return success();
   }
 };
-}  // namespace
+} // namespace
 
 /// Generic conversion for any LinalgExtOp on tensors.
 static LogicalResult bufferizeLinalgExtOp(RewriterBase &rewriter,
@@ -276,7 +283,8 @@
   rewriter.setInsertionPoint(op);
 
   // Nothing to do. This op is already bufferized.
-  if (dspOp.hasBufferSemantics()) return success();
+  if (dspOp.hasBufferSemantics())
+    return success();
 
   // Ensure op has only tensors. Allow mixed tensor-buffer mode on a per-need
   // basis.
@@ -292,7 +300,8 @@
       continue;
     }
     auto maybeBuffer = getBuffer(rewriter, opOperand->get(), options);
-    if (failed(maybeBuffer)) return failure();
+    if (failed(maybeBuffer))
+      return failure();
     // Input operands are never written to.
     newInputBuffers.push_back(*maybeBuffer);
   }
@@ -307,7 +316,8 @@
     FailureOr<Value> resultBuffer = getBuffer(
         rewriter, aliasingOpOperands.getAliases().front().opOperand->get(),
         options);
-    if (failed(resultBuffer)) return failure();
+    if (failed(resultBuffer))
+      return failure();
     newOutputBuffers.push_back(*resultBuffer);
   }
 
@@ -351,7 +361,8 @@
     // are a limited number of LinalgExt ops, so we hardcode them here. We don't
     // expect to add more LinalgExt ops.
     auto linalgExtOp = cast<IREE::LinalgExt::LinalgExtOp>(op);
-    if (linalgExtOp.isInputTensor(&opOperand)) return true;
+    if (linalgExtOp.isInputTensor(&opOperand))
+      return true;
     return !isa<IREE::LinalgExt::ScatterOp, IREE::LinalgExt::ReverseOp>(op);
   }
 
@@ -364,8 +375,9 @@
                 .empty();
   }
 
-  bufferization::AliasingOpOperandList getAliasingOpOperands(
-      Operation *op, OpResult opResult, const AnalysisState &state) const {
+  bufferization::AliasingOpOperandList
+  getAliasingOpOperands(Operation *op, OpResult opResult,
+                        const AnalysisState &state) const {
     auto linalgExtOp = cast<IREE::LinalgExt::LinalgExtOp>(op);
 
     // The i-th OpResult may alias with the i-th "out" tensor.
@@ -375,8 +387,9 @@
         /*isDefinite=*/false)};
   }
 
-  bufferization::AliasingOpResultList getAliasingOpResults(
-      Operation *op, OpOperand &opOperand, const AnalysisState &state) const {
+  bufferization::AliasingOpResultList
+  getAliasingOpResults(Operation *op, OpOperand &opOperand,
+                       const AnalysisState &state) const {
     auto dspOp = cast<DestinationStyleOpInterface>(op);
 
     // The i-th "out" tensor may alias with the i-th OpResult.
@@ -388,8 +401,9 @@
     return {};
   }
 
-  bufferization::BufferRelation bufferRelation(
-      Operation *op, OpResult opResult, const AnalysisState &state) const {
+  bufferization::BufferRelation
+  bufferRelation(Operation *op, OpResult opResult,
+                 const AnalysisState &state) const {
     return bufferization::BufferRelation::Equivalent;
   }
 
@@ -403,12 +417,14 @@
 /// Returns the buffers of the source and destination for pack and unpack ops.
 /// Returns a failure if the buffers can not be found.
 template <typename OpTy>
-static FailureOr<std::pair<Value, Value>> getSourceAndDestFromPackUnPackOp(
-    RewriterBase &rewriter, OpTy op, const BufferizationOptions &options) {
+static FailureOr<std::pair<Value, Value>>
+getSourceAndDestFromPackUnPackOp(RewriterBase &rewriter, OpTy op,
+                                 const BufferizationOptions &options) {
   static_assert(llvm::is_one_of<OpTy, tensor::PackOp, tensor::UnPackOp>::value);
   Value source;
   auto maybeBuffer = getBuffer(rewriter, op.getSource(), options);
-  if (failed(maybeBuffer)) return failure();
+  if (failed(maybeBuffer))
+    return failure();
   source = *maybeBuffer;
 
   Value dest;
@@ -419,7 +435,8 @@
   FailureOr<Value> resultBuffer = getBuffer(
       rewriter, aliasingOpOperands.getAliases().front().opOperand->get(),
       options);
-  if (failed(resultBuffer)) return failure();
+  if (failed(resultBuffer))
+    return failure();
   dest = *resultBuffer;
   return std::make_pair(source, dest);
 }
@@ -432,7 +449,8 @@
 
   auto maybeSrcAndDest =
       getSourceAndDestFromPackUnPackOp(rewriter, op, options);
-  if (failed(maybeSrcAndDest)) return failure();
+  if (failed(maybeSrcAndDest))
+    return failure();
   auto [source, dest] = *maybeSrcAndDest;
 
   // Set insertion point now that potential alloc/dealloc are introduced.
@@ -456,7 +474,8 @@
 
   auto maybeSrcAndDest =
       getSourceAndDestFromPackUnPackOp(rewriter, op, options);
-  if (failed(maybeSrcAndDest)) return failure();
+  if (failed(maybeSrcAndDest))
+    return failure();
   auto [source, dest] = *maybeSrcAndDest;
 
   // Set insertion point now that potential alloc/dealloc are introduced.
@@ -487,8 +506,9 @@
     return dpsOp.isDpsInit(&opOperand);
   }
 
-  SmallVector<OpOperand *> getAliasingOpOperand(
-      Operation *op, OpResult opResult, const AnalysisState &state) const {
+  SmallVector<OpOperand *>
+  getAliasingOpOperand(Operation *op, OpResult opResult,
+                       const AnalysisState &state) const {
     auto dpsOp = cast<DestinationStyleOpInterface>(op);
     return {dpsOp.getDpsInitOperand(opResult.getResultNumber())};
   }
@@ -498,12 +518,14 @@
     auto dspOp = cast<DestinationStyleOpInterface>(op);
 
     // The i-th "out" tensor may alias with the i-th OpResult.
-    if (dspOp.isDpsInit(&opOperand)) return {dspOp.getTiedOpResult(&opOperand)};
+    if (dspOp.isDpsInit(&opOperand))
+      return {dspOp.getTiedOpResult(&opOperand)};
     return {};
   }
 
-  bufferization::AliasingOpResultList getAliasingOpResults(
-      Operation *op, OpOperand &opOperand, const AnalysisState &state) const {
+  bufferization::AliasingOpResultList
+  getAliasingOpResults(Operation *op, OpOperand &opOperand,
+                       const AnalysisState &state) const {
     auto dspOp = cast<DestinationStyleOpInterface>(op);
 
     // The i-th "out" tensor may alias with the i-th OpResult.
@@ -514,8 +536,9 @@
     return {};
   }
 
-  bufferization::BufferRelation bufferRelation(
-      Operation *op, OpResult opResult, const AnalysisState &state) const {
+  bufferization::BufferRelation
+  bufferRelation(Operation *op, OpResult opResult,
+                 const AnalysisState &state) const {
     return bufferization::BufferRelation::Equivalent;
   }
 
@@ -571,7 +594,8 @@
       [&](OpOperand &operand, SmallVector<Value> &neededValues) {
         auto storeOp =
             dyn_cast<IREE::Flow::DispatchTensorStoreOp>(operand.getOwner());
-        if (!storeOp) return false;
+        if (!storeOp)
+          return false;
         neededValues.push_back(storeOp.getTarget());
         neededValues.append(storeOp.getTargetDims().begin(),
                             storeOp.getTargetDims().end());
@@ -648,5 +672,5 @@
   });
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.h b/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.h
index 2f53861..c90c478 100644
--- a/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.h
+++ b/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.h

@@ -28,7 +28,7 @@
     RewriterBase &rewriter, Operation *op,
     bufferization::OneShotAnalysisState &state);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_INTERFACES_BUFFERIZATIONINTERFACES_H_
+#endif // IREE_COMPILER_CODEGEN_INTERFACES_BUFFERIZATIONINTERFACES_H_

diff --git a/compiler/src/iree/compiler/Codegen/Interfaces/Interfaces.cpp b/compiler/src/iree/compiler/Codegen/Interfaces/Interfaces.cpp
index d65f8ae..8d22768 100644
--- a/compiler/src/iree/compiler/Codegen/Interfaces/Interfaces.cpp
+++ b/compiler/src/iree/compiler/Codegen/Interfaces/Interfaces.cpp

@@ -67,5 +67,5 @@
   vector::registerTransformDialectExtension(registry);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Interfaces/Interfaces.h b/compiler/src/iree/compiler/Codegen/Interfaces/Interfaces.h
index 9c777b6..4abd34f 100644
--- a/compiler/src/iree/compiler/Codegen/Interfaces/Interfaces.h
+++ b/compiler/src/iree/compiler/Codegen/Interfaces/Interfaces.h

@@ -15,7 +15,7 @@
 /// Register all codegen related interfaces.
 void registerCodegenInterfaces(DialectRegistry &registry);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_INTERFACES_INTERFACES_H_
+#endif // IREE_COMPILER_CODEGEN_INTERFACES_INTERFACES_H_

diff --git a/compiler/src/iree/compiler/Codegen/Interfaces/PartitionableLoopsInterface.cpp b/compiler/src/iree/compiler/Codegen/Interfaces/PartitionableLoopsInterface.cpp
index 07ae51a..8f875bb 100644
--- a/compiler/src/iree/compiler/Codegen/Interfaces/PartitionableLoopsInterface.cpp
+++ b/compiler/src/iree/compiler/Codegen/Interfaces/PartitionableLoopsInterface.cpp

@@ -22,17 +22,19 @@
 
 /// Filters out dimensions in `parallelLoops` that have unit range in
 /// `loopRanges`.
-static llvm::SmallVector<unsigned> pruneUnitTripParallelLoops(
-    llvm::ArrayRef<unsigned> parallelLoops,
-    llvm::ArrayRef<int64_t> loopRanges) {
-  return llvm::to_vector(llvm::make_filter_range(
-      parallelLoops,
-      [&loopRanges](unsigned loopDim) { return loopRanges[loopDim] != 1; }));
+static llvm::SmallVector<unsigned>
+pruneUnitTripParallelLoops(llvm::ArrayRef<unsigned> parallelLoops,
+                           llvm::ArrayRef<int64_t> loopRanges) {
+  return llvm::to_vector(
+      llvm::make_filter_range(parallelLoops, [&loopRanges](unsigned loopDim) {
+        return loopRanges[loopDim] != 1;
+      }));
 }
 
 /// Returns the partitionable loops for all Linalg ops.
-llvm::SmallVector<unsigned> getPartitionableLoopsImpl(
-    linalg::LinalgOp linalgOp, std::optional<unsigned> maxNumPartitionedLoops) {
+llvm::SmallVector<unsigned>
+getPartitionableLoopsImpl(linalg::LinalgOp linalgOp,
+                          std::optional<unsigned> maxNumPartitionedLoops) {
   llvm::SmallVector<unsigned> parallelLoops;
   linalgOp.getParallelDims(parallelLoops);
   // Get the static loop ranges.
@@ -49,8 +51,8 @@
   return parallelLoops;
 }
 
-static llvm::SmallVector<utils::IteratorType> getIteratorTypesFromAttr(
-    ArrayAttr iteratorTypesAttr) {
+static llvm::SmallVector<utils::IteratorType>
+getIteratorTypesFromAttr(ArrayAttr iteratorTypesAttr) {
   return llvm::map_to_vector(iteratorTypesAttr, [](Attribute attr) {
     return utils::symbolizeIteratorType(llvm::cast<StringAttr>(attr).getValue())
         .value();
@@ -62,8 +64,9 @@
 struct LinalgOpPartitionableLoops
     : public PartitionableLoopsInterface::ExternalModel<
           LinalgOpPartitionableLoops<OpTy>, OpTy> {
-  llvm::SmallVector<unsigned> getPartitionableLoops(
-      Operation *op, std::optional<unsigned> maxNumPartitionedLoops) const {
+  llvm::SmallVector<unsigned>
+  getPartitionableLoops(Operation *op,
+                        std::optional<unsigned> maxNumPartitionedLoops) const {
     auto linalgOp = cast<linalg::LinalgOp>(op);
     return getPartitionableLoopsImpl(linalgOp, maxNumPartitionedLoops);
   }
@@ -73,8 +76,9 @@
 struct Mmt4DOpPartitionableLoops
     : public PartitionableLoopsInterface::ExternalModel<
           Mmt4DOpPartitionableLoops, linalg::Mmt4DOp> {
-  llvm::SmallVector<unsigned> getPartitionableLoops(
-      Operation *op, std::optional<unsigned> maxNumPartitionedLoops) const {
+  llvm::SmallVector<unsigned>
+  getPartitionableLoops(Operation *op,
+                        std::optional<unsigned> maxNumPartitionedLoops) const {
     return {0, 1};
   }
 };
@@ -85,8 +89,9 @@
 struct OuterParallelAsPartitionableLoops
     : public PartitionableLoopsInterface::ExternalModel<
           OuterParallelAsPartitionableLoops<OpTy>, OpTy> {
-  llvm::SmallVector<unsigned> getPartitionableLoops(
-      Operation *op, std::optional<unsigned> maxNumPartitionedLoops) const {
+  llvm::SmallVector<unsigned>
+  getPartitionableLoops(Operation *op,
+                        std::optional<unsigned> maxNumPartitionedLoops) const {
     // For now just return the loops that are returned by the
     // `TiledOpInterface`. This needs to be further pruned to remove unit-dim
     // loops, but that needs the interface to return the static sizes of the
@@ -117,8 +122,9 @@
 template <typename OpTy>
 struct NoPartitionableLoops : public PartitionableLoopsInterface::ExternalModel<
                                   NoPartitionableLoops<OpTy>, OpTy> {
-  llvm::SmallVector<unsigned> getPartitionableLoops(
-      Operation *op, std::optional<unsigned> maxNumPartitionedLoops) const {
+  llvm::SmallVector<unsigned>
+  getPartitionableLoops(Operation *op,
+                        std::optional<unsigned> maxNumPartitionedLoops) const {
     return {};
   }
 };
@@ -127,8 +133,9 @@
 struct FftOpPartitionableLoops
     : public PartitionableLoopsInterface::ExternalModel<
           FftOpPartitionableLoops, IREE::LinalgExt::FftOp> {
-  llvm::SmallVector<unsigned> getPartitionableLoops(
-      Operation *op, std::optional<unsigned> maxNumPartitionedLoops) const {
+  llvm::SmallVector<unsigned>
+  getPartitionableLoops(Operation *op,
+                        std::optional<unsigned> maxNumPartitionedLoops) const {
     auto fftOp = cast<IREE::LinalgExt::FftOp>(op);
     auto range = llvm::seq<unsigned>(0, fftOp.getOperandRank());
     SmallVector<unsigned> partitionableLoops(range.begin(), range.end());
@@ -154,8 +161,9 @@
 struct AllParallelAsPartitionableLoops
     : public PartitionableLoopsInterface::ExternalModel<
           AllParallelAsPartitionableLoops<OpTy>, OpTy> {
-  llvm::SmallVector<unsigned> getPartitionableLoops(
-      Operation *op, std::optional<unsigned> maxNumPartitionedLoops) const {
+  llvm::SmallVector<unsigned>
+  getPartitionableLoops(Operation *op,
+                        std::optional<unsigned> maxNumPartitionedLoops) const {
     SmallVector<unsigned> partitionableLoops;
     auto interfaceOp = cast<OpTy>(op);
     for (auto iteratorType :
@@ -245,5 +253,5 @@
   });
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Interfaces/PartitionableLoopsInterface.h b/compiler/src/iree/compiler/Codegen/Interfaces/PartitionableLoopsInterface.h
index 7a6323c..23cb21f 100644
--- a/compiler/src/iree/compiler/Codegen/Interfaces/PartitionableLoopsInterface.h
+++ b/compiler/src/iree/compiler/Codegen/Interfaces/PartitionableLoopsInterface.h

@@ -22,7 +22,7 @@
 /// Register external models for PartitionableLoopsInterface.
 void registerPartitionableLoopsInterfaceModels(DialectRegistry &registry);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_INTERFACES_PARTITIONABLE_LOOPS_INTERFACE_H_
+#endif // IREE_COMPILER_CODEGEN_INTERFACES_PARTITIONABLE_LOOPS_INTERFACE_H_

diff --git a/compiler/src/iree/compiler/Codegen/Interfaces/ProcessorOpInterfaces.cpp b/compiler/src/iree/compiler/Codegen/Interfaces/ProcessorOpInterfaces.cpp
index fa1b772..062662a 100644
--- a/compiler/src/iree/compiler/Codegen/Interfaces/ProcessorOpInterfaces.cpp
+++ b/compiler/src/iree/compiler/Codegen/Interfaces/ProcessorOpInterfaces.cpp

@@ -18,12 +18,12 @@
 
 static unsigned dimToIndex(gpu::Dimension dim) {
   switch (dim) {
-    case gpu::Dimension::x:
-      return 0;
-    case gpu::Dimension::y:
-      return 1;
-    case gpu::Dimension::z:
-      return 2;
+  case gpu::Dimension::x:
+    return 0;
+  case gpu::Dimension::y:
+    return 1;
+  case gpu::Dimension::z:
+    return 2;
   }
   assert(false && "invalid dimension");
   return 0;
@@ -91,5 +91,5 @@
   });
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Interfaces/ProcessorOpInterfaces.h b/compiler/src/iree/compiler/Codegen/Interfaces/ProcessorOpInterfaces.h
index ef16895..db94a2f 100644
--- a/compiler/src/iree/compiler/Codegen/Interfaces/ProcessorOpInterfaces.h
+++ b/compiler/src/iree/compiler/Codegen/Interfaces/ProcessorOpInterfaces.h

@@ -11,7 +11,7 @@
 #include "mlir/IR/OpDefinition.h"
 
 /// Include the generated interface declarations.
-#include "iree/compiler/Codegen/Interfaces/ProcessorOpInterfaces.h.inc"  // IWYU pragma: export
+#include "iree/compiler/Codegen/Interfaces/ProcessorOpInterfaces.h.inc" // IWYU pragma: export
 
 namespace mlir {
 namespace iree_compiler {
@@ -19,7 +19,7 @@
 /// Registers external models implemented for the `TiledOpInterface`.
 void registerProcessorOpInterfaceExternalModels(DialectRegistry &registry);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_INTERFACES_PROCESSOROPINTERFACES_H_
+#endif // IREE_COMPILER_CODEGEN_INTERFACES_PROCESSOROPINTERFACES_H_

diff --git a/compiler/src/iree/compiler/Codegen/Interfaces/UKernelOpInterface.h b/compiler/src/iree/compiler/Codegen/Interfaces/UKernelOpInterface.h
index f48d549..a945818 100644
--- a/compiler/src/iree/compiler/Codegen/Interfaces/UKernelOpInterface.h
+++ b/compiler/src/iree/compiler/Codegen/Interfaces/UKernelOpInterface.h

@@ -15,4 +15,4 @@
 #include "iree/compiler/Codegen/Interfaces/UKernelOpInterface.h.inc" // IWYU pragma: export
 // clang-format on
 
-#endif  // IREE_COMPILER_CODEGEN_INTERFACES_UKERNEL_OP_INTERFACE_H_
+#endif // IREE_COMPILER_CODEGEN_INTERFACES_UKERNEL_OP_INTERFACE_H_

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp
index 50a07d6..e87de0f 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp

@@ -104,10 +104,11 @@
                              LLVMTypeConverter &typeConverter)
       : ConvertOpToLLVMWithABIPattern(abi, typeConverter,
                                       /*benefit=*/100) {}
-  LogicalResult matchAndRewrite(
-      func::FuncOp stdFuncOp, func::FuncOpAdaptor operands,
-      ConversionPatternRewriter &rewriter) const override {
-    if (!stdFuncOp.isPublic()) return failure();
+  LogicalResult
+  matchAndRewrite(func::FuncOp stdFuncOp, func::FuncOpAdaptor operands,
+                  ConversionPatternRewriter &rewriter) const override {
+    if (!stdFuncOp.isPublic())
+      return failure();
     FunctionType fnType = stdFuncOp.getFunctionType();
     if (fnType.getNumInputs() != 0 || fnType.getNumResults() != 0) {
       stdFuncOp->emitWarning()
@@ -193,15 +194,15 @@
     : public ConvertOpToLLVMWithABIPattern<
           IREE::HAL::ExecutableConstantLoadOp> {
   using ConvertOpToLLVMWithABIPattern::ConvertOpToLLVMWithABIPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::ExecutableConstantLoadOp loadOp,
-      IREE::HAL::ExecutableConstantLoadOpAdaptor operands,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::ExecutableConstantLoadOp loadOp,
+                  IREE::HAL::ExecutableConstantLoadOpAdaptor operands,
+                  ConversionPatternRewriter &rewriter) const override {
     auto resultType =
         typeConverter->convertType(loadOp->getResult(0).getType());
-    rewriter.replaceOp(
-        loadOp, abi.loadExecutableConstant(loadOp, loadOp.getKey(), resultType,
-                                           rewriter));
+    rewriter.replaceOp(loadOp,
+                       abi.loadExecutableConstant(loadOp, loadOp.getKey(),
+                                                  resultType, rewriter));
     return success();
   }
 };
@@ -212,10 +213,10 @@
 struct ConvertHALInterfaceWorkgroupIDOp
     : public ConvertOpToLLVMWithABIPattern<IREE::HAL::InterfaceWorkgroupIDOp> {
   using ConvertOpToLLVMWithABIPattern::ConvertOpToLLVMWithABIPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::InterfaceWorkgroupIDOp idOp,
-      IREE::HAL::InterfaceWorkgroupIDOpAdaptor operands,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::InterfaceWorkgroupIDOp idOp,
+                  IREE::HAL::InterfaceWorkgroupIDOpAdaptor operands,
+                  ConversionPatternRewriter &rewriter) const override {
     int32_t dim = (int32_t)idOp.getDimension().getZExtValue();
     auto resultType = typeConverter->convertType(idOp->getResult(0).getType());
     rewriter.replaceOp(idOp,
@@ -231,10 +232,10 @@
     : public ConvertOpToLLVMWithABIPattern<
           IREE::HAL::InterfaceWorkgroupSizeOp> {
   using ConvertOpToLLVMWithABIPattern::ConvertOpToLLVMWithABIPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::InterfaceWorkgroupSizeOp sizeOp,
-      IREE::HAL::InterfaceWorkgroupSizeOpAdaptor operands,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::InterfaceWorkgroupSizeOp sizeOp,
+                  IREE::HAL::InterfaceWorkgroupSizeOpAdaptor operands,
+                  ConversionPatternRewriter &rewriter) const override {
     int32_t dim = (int32_t)sizeOp.getDimension().getZExtValue();
     auto resultType =
         typeConverter->convertType(sizeOp->getResult(0).getType());
@@ -251,10 +252,10 @@
     : public ConvertOpToLLVMWithABIPattern<
           IREE::HAL::InterfaceWorkgroupCountOp> {
   using ConvertOpToLLVMWithABIPattern::ConvertOpToLLVMWithABIPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::InterfaceWorkgroupCountOp countOp,
-      IREE::HAL::InterfaceWorkgroupCountOpAdaptor operands,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::InterfaceWorkgroupCountOp countOp,
+                  IREE::HAL::InterfaceWorkgroupCountOpAdaptor operands,
+                  ConversionPatternRewriter &rewriter) const override {
     int32_t dim = (int32_t)countOp.getDimension().getZExtValue();
     auto resultType =
         typeConverter->convertType(countOp->getResult(0).getType());
@@ -270,10 +271,10 @@
 struct ConvertHALInterfaceConstantLoadOp
     : public ConvertOpToLLVMWithABIPattern<IREE::HAL::InterfaceConstantLoadOp> {
   using ConvertOpToLLVMWithABIPattern::ConvertOpToLLVMWithABIPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::InterfaceConstantLoadOp loadOp,
-      IREE::HAL::InterfaceConstantLoadOpAdaptor operands,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::InterfaceConstantLoadOp loadOp,
+                  IREE::HAL::InterfaceConstantLoadOpAdaptor operands,
+                  ConversionPatternRewriter &rewriter) const override {
     int64_t index = loadOp.getIndex().getZExtValue();
     auto resultType =
         typeConverter->convertType(loadOp->getResult(0).getType());
@@ -290,10 +291,10 @@
     : public ConvertOpToLLVMWithABIPattern<
           IREE::HAL::InterfaceBindingSubspanOp> {
   using ConvertOpToLLVMWithABIPattern::ConvertOpToLLVMWithABIPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::InterfaceBindingSubspanOp subspanOp,
-      IREE::HAL::InterfaceBindingSubspanOpAdaptor operands,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::InterfaceBindingSubspanOp subspanOp,
+                  IREE::HAL::InterfaceBindingSubspanOpAdaptor operands,
+                  ConversionPatternRewriter &rewriter) const override {
     MemRefType memRefType =
         llvm::dyn_cast<MemRefType>(subspanOp->getResult(0).getType());
     if (!memRefType) {
@@ -319,11 +320,9 @@
 };
 
 // entrySize must be 16-byte aligned
-static InstrumentationEntry acquireInstrumentationEntry(Location loc,
-                                                        Value buffer,
-                                                        Value bufferPtr,
-                                                        Value entrySize,
-                                                        OpBuilder &builder) {
+static InstrumentationEntry
+acquireInstrumentationEntry(Location loc, Value buffer, Value bufferPtr,
+                            Value entrySize, OpBuilder &builder) {
   auto i64Type = builder.getI64Type();
   auto bufferType = llvm::cast<MemRefType>(buffer.getType());
   int64_t totalBufferSize =
@@ -393,10 +392,10 @@
 struct ConvertHALInstrumentWorkgroupOp
     : public ConvertOpToLLVMWithABIPattern<IREE::HAL::InstrumentWorkgroupOp> {
   using ConvertOpToLLVMWithABIPattern::ConvertOpToLLVMWithABIPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::InstrumentWorkgroupOp instrumentOp,
-      IREE::HAL::InstrumentWorkgroupOpAdaptor operands,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::InstrumentWorkgroupOp instrumentOp,
+                  IREE::HAL::InstrumentWorkgroupOpAdaptor operands,
+                  ConversionPatternRewriter &rewriter) const override {
     auto loc = instrumentOp.getLoc();
     auto dataLayout =
         getTypeConverter()->getDataLayoutAnalysis()->getAbove(instrumentOp);
@@ -405,14 +404,14 @@
 
     auto entryType = LLVM::LLVMStructType::getLiteral(
         getContext(), {
-                          i32Type,  // header
-                          i32Type,  // workgroup_id_x
-                          i32Type,  // workgroup_id_y
-                          i32Type,  // workgroup_id_z
-                          i32Type,  // workgroup_count_x
-                          i32Type,  // workgroup_count_y
-                          i32Type,  // workgroup_count_z
-                          i32Type,  // processor_id
+                          i32Type, // header
+                          i32Type, // workgroup_id_x
+                          i32Type, // workgroup_id_y
+                          i32Type, // workgroup_id_z
+                          i32Type, // workgroup_count_x
+                          i32Type, // workgroup_count_y
+                          i32Type, // workgroup_count_z
+                          i32Type, // processor_id
                       });
 
     // 8 bit tag = 00 | 24 bit dispatch id
@@ -421,7 +420,7 @@
     Value rawDispatchId = instrumentOp.getDispatchId();
     Value header = rewriter.create<LLVM::ShlOp>(
         loc, i32Type, rawDispatchId,
-        rewriter.create<LLVM::ConstantOp>(loc, i32Type, 8));  // | 8bit tag
+        rewriter.create<LLVM::ConstantOp>(loc, i32Type, 8)); // | 8bit tag
 
     auto entry = appendInstrumentationEntry(
         loc, instrumentOp.getBuffer(), operands.getBuffer(), entryType,
@@ -457,29 +456,29 @@
       .Case<IntegerType>([&](Type type) -> std::optional<uint64_t> {
         if (type.isUnsignedInteger()) {
           switch (type.getIntOrFloatBitWidth()) {
-            case 8:
-              return IREE_INSTRUMENT_DISPATCH_VALUE_TYPE_UINT_8;
-            case 16:
-              return IREE_INSTRUMENT_DISPATCH_VALUE_TYPE_UINT_16;
-            case 32:
-              return IREE_INSTRUMENT_DISPATCH_VALUE_TYPE_UINT_32;
-            case 64:
-              return IREE_INSTRUMENT_DISPATCH_VALUE_TYPE_UINT_64;
-            default:
-              return std::nullopt;
+          case 8:
+            return IREE_INSTRUMENT_DISPATCH_VALUE_TYPE_UINT_8;
+          case 16:
+            return IREE_INSTRUMENT_DISPATCH_VALUE_TYPE_UINT_16;
+          case 32:
+            return IREE_INSTRUMENT_DISPATCH_VALUE_TYPE_UINT_32;
+          case 64:
+            return IREE_INSTRUMENT_DISPATCH_VALUE_TYPE_UINT_64;
+          default:
+            return std::nullopt;
           }
         }
         switch (type.getIntOrFloatBitWidth()) {
-          case 8:
-            return IREE_INSTRUMENT_DISPATCH_VALUE_TYPE_SINT_8;
-          case 16:
-            return IREE_INSTRUMENT_DISPATCH_VALUE_TYPE_SINT_16;
-          case 32:
-            return IREE_INSTRUMENT_DISPATCH_VALUE_TYPE_SINT_32;
-          case 64:
-            return IREE_INSTRUMENT_DISPATCH_VALUE_TYPE_SINT_64;
-          default:
-            return std::nullopt;
+        case 8:
+          return IREE_INSTRUMENT_DISPATCH_VALUE_TYPE_SINT_8;
+        case 16:
+          return IREE_INSTRUMENT_DISPATCH_VALUE_TYPE_SINT_16;
+        case 32:
+          return IREE_INSTRUMENT_DISPATCH_VALUE_TYPE_SINT_32;
+        case 64:
+          return IREE_INSTRUMENT_DISPATCH_VALUE_TYPE_SINT_64;
+        default:
+          return std::nullopt;
         }
       })
       .Case<FloatType>([&](Type type) -> std::optional<uint64_t> {
@@ -487,14 +486,14 @@
           return IREE_INSTRUMENT_DISPATCH_VALUE_TYPE_BFLOAT_16;
         }
         switch (type.getIntOrFloatBitWidth()) {
-          case 16:
-            return IREE_INSTRUMENT_DISPATCH_VALUE_TYPE_FLOAT_16;
-          case 32:
-            return IREE_INSTRUMENT_DISPATCH_VALUE_TYPE_FLOAT_32;
-          case 64:
-            return IREE_INSTRUMENT_DISPATCH_VALUE_TYPE_FLOAT_64;
-          default:
-            return std::nullopt;
+        case 16:
+          return IREE_INSTRUMENT_DISPATCH_VALUE_TYPE_FLOAT_16;
+        case 32:
+          return IREE_INSTRUMENT_DISPATCH_VALUE_TYPE_FLOAT_32;
+        case 64:
+          return IREE_INSTRUMENT_DISPATCH_VALUE_TYPE_FLOAT_64;
+        default:
+          return std::nullopt;
         }
       })
       .Case<IndexType>([&](Type type) -> std::optional<uint64_t> {
@@ -506,10 +505,10 @@
 struct ConvertHALInstrumentValueOp
     : public ConvertOpToLLVMWithABIPattern<IREE::HAL::InstrumentValueOp> {
   using ConvertOpToLLVMWithABIPattern::ConvertOpToLLVMWithABIPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::InstrumentValueOp instrumentOp,
-      IREE::HAL::InstrumentValueOpAdaptor operands,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::InstrumentValueOp instrumentOp,
+                  IREE::HAL::InstrumentValueOpAdaptor operands,
+                  ConversionPatternRewriter &rewriter) const override {
     auto loc = instrumentOp.getLoc();
 
     // Only convert ops we can handle, otherwise warn and discard.
@@ -533,8 +532,8 @@
 
     auto entryType =
         LLVM::LLVMStructType::getLiteral(getContext(), {
-                                                           i64Type,  // header
-                                                           i64Type,  // value
+                                                           i64Type, // header
+                                                           i64Type, // value
                                                        });
 
     // 8 bit tag
@@ -574,10 +573,10 @@
 struct ConvertHALInstrumentMemoryLoadOp
     : public ConvertOpToLLVMWithABIPattern<IREE::HAL::InstrumentMemoryLoadOp> {
   using ConvertOpToLLVMWithABIPattern::ConvertOpToLLVMWithABIPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::InstrumentMemoryLoadOp instrumentOp,
-      IREE::HAL::InstrumentMemoryLoadOpAdaptor operands,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::InstrumentMemoryLoadOp instrumentOp,
+                  IREE::HAL::InstrumentMemoryLoadOpAdaptor operands,
+                  ConversionPatternRewriter &rewriter) const override {
     auto loc = instrumentOp.getLoc();
     auto dataLayout =
         getTypeConverter()->getDataLayoutAnalysis()->getAbove(instrumentOp);
@@ -585,8 +584,8 @@
 
     auto entryType =
         LLVM::LLVMStructType::getLiteral(getContext(), {
-                                                           i64Type,  // header
-                                                           i64Type,  // address
+                                                           i64Type, // header
+                                                           i64Type, // address
                                                        });
 
     // 8 bit tag = 100 (read), 101 (write)
@@ -621,10 +620,10 @@
 struct ConvertHALInstrumentMemoryStoreOp
     : public ConvertOpToLLVMWithABIPattern<IREE::HAL::InstrumentMemoryStoreOp> {
   using ConvertOpToLLVMWithABIPattern::ConvertOpToLLVMWithABIPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::InstrumentMemoryStoreOp instrumentOp,
-      IREE::HAL::InstrumentMemoryStoreOpAdaptor operands,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::InstrumentMemoryStoreOp instrumentOp,
+                  IREE::HAL::InstrumentMemoryStoreOpAdaptor operands,
+                  ConversionPatternRewriter &rewriter) const override {
     auto loc = instrumentOp.getLoc();
     auto dataLayout =
         getTypeConverter()->getDataLayoutAnalysis()->getAbove(instrumentOp);
@@ -632,8 +631,8 @@
 
     auto entryType =
         LLVM::LLVMStructType::getLiteral(getContext(), {
-                                                           i64Type,  // header
-                                                           i64Type,  // address
+                                                           i64Type, // header
+                                                           i64Type, // address
                                                        });
 
     // 8 bit tag = 10 (read), 11 (write)
@@ -672,9 +671,10 @@
   SmallVector<StringRef> extraFields;
   if (auto extraFieldsAttr =
           forOp->getAttrOfType<ArrayAttr>("hal.import.fields")) {
-    extraFields = llvm::map_to_vector(
-        extraFieldsAttr.getValue(),
-        [](Attribute attr) { return llvm::cast<StringAttr>(attr).getValue(); });
+    extraFields =
+        llvm::map_to_vector(extraFieldsAttr.getValue(), [](Attribute attr) {
+          return llvm::cast<StringAttr>(attr).getValue();
+        });
   }
   return extraFields;
 }
@@ -697,8 +697,7 @@
 /// pattern.
 struct RewriteFuncOpABI : public OpRewritePattern<LLVM::LLVMFuncOp> {
   RewriteFuncOpABI(HALDispatchABI &abi, LLVMTypeConverter &typeConverter)
-      : OpRewritePattern(&typeConverter.getContext()),
-        abi(abi),
+      : OpRewritePattern(&typeConverter.getContext()), abi(abi),
         typeConverter(typeConverter) {}
 
   LogicalResult matchAndRewrite(LLVM::LLVMFuncOp funcOp,
@@ -743,7 +742,7 @@
     return success();
   }
 
- private:
+private:
   HALDispatchABI &abi;
   LLVMTypeConverter &typeConverter;
 };
@@ -757,15 +756,15 @@
 /// pattern.
 struct RewriteCallOpABI : public OpRewritePattern<LLVM::CallOp> {
   RewriteCallOpABI(HALDispatchABI &abi, LLVMTypeConverter &typeConverter)
-      : OpRewritePattern(&typeConverter.getContext()),
-        abi(abi),
+      : OpRewritePattern(&typeConverter.getContext()), abi(abi),
         typeConverter(typeConverter) {}
 
   LogicalResult matchAndRewrite(LLVM::CallOp callOp,
                                 PatternRewriter &rewriter) const override {
     auto symbol = callOp.getCallableForCallee().dyn_cast<SymbolRefAttr>();
     auto flatSymbol = llvm::dyn_cast_if_present<FlatSymbolRefAttr>(symbol);
-    if (!flatSymbol) return failure();
+    if (!flatSymbol)
+      return failure();
 
     // Ensure the target function is extern.
     // To support conversion inserting calls in local patterns that can't add
@@ -791,7 +790,7 @@
     return success();
   }
 
- private:
+private:
   HALDispatchABI &abi;
   LLVMTypeConverter &typeConverter;
 };
@@ -806,15 +805,15 @@
     : public OpRewritePattern<LLVM::CallOp> {
   RewriteExternCallOpToDynamicImportCallOp(HALDispatchABI &abi,
                                            LLVMTypeConverter &typeConverter)
-      : OpRewritePattern(&typeConverter.getContext()),
-        abi(abi),
+      : OpRewritePattern(&typeConverter.getContext()), abi(abi),
         typeConverter(typeConverter) {}
   LogicalResult matchAndRewrite(LLVM::CallOp callOp,
                                 PatternRewriter &rewriter) const override {
     // Ignore indirect calls (they're probably already converted imports).
     auto symbol = callOp.getCallableForCallee().dyn_cast<SymbolRefAttr>();
     auto flatSymbol = llvm::dyn_cast_if_present<FlatSymbolRefAttr>(symbol);
-    if (!flatSymbol) return failure();
+    if (!flatSymbol)
+      return failure();
 
     // Ensure the target function is extern.
     // To support conversion inserting calls in local patterns that can't add
@@ -878,7 +877,7 @@
 /// any 64-bit constants that would otherwise prevent the code from being
 /// vectorized.
 class ExpandMulSIExtended : public OpRewritePattern<arith::MulSIExtendedOp> {
- public:
+public:
   using OpRewritePattern<arith::MulSIExtendedOp>::OpRewritePattern;
 
   LogicalResult matchAndRewrite(arith::MulSIExtendedOp op,
@@ -917,7 +916,7 @@
 };
 
 class ConvertToLLVMPass : public ConvertToLLVMBase<ConvertToLLVMPass> {
- public:
+public:
   ConvertToLLVMPass(bool reassociateFpReductions) {
     targetReassociateFpReductions.setValue(reassociateFpReductions);
   }
@@ -928,7 +927,7 @@
 
   void runOnOperation() override;
 
- private:
+private:
   Option<std::string> targetTriple{
       *this, "target-triple", llvm::cl::desc("Code generation target triple."),
       llvm::cl::init("")};
@@ -942,7 +941,7 @@
       llvm::cl::init("false")};
 };
 
-}  // namespace
+} // namespace
 
 static std::string getStringAttrFromTargetAttr(ModuleOp module,
                                                StringRef attrName) {
@@ -1108,10 +1107,10 @@
   }
 }
 
-std::unique_ptr<OperationPass<ModuleOp>> createConvertToLLVMPass(
-    bool reassociateFpReductions) {
+std::unique_ptr<OperationPass<ModuleOp>>
+createConvertToLLVMPass(bool reassociateFpReductions) {
   return std::make_unique<ConvertToLLVMPass>(reassociateFpReductions);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
index a74a60a..d978d8e 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp

@@ -111,27 +111,27 @@
       "int64_t", LLVM::DIBasicTypeAttr::get(
                      context, llvm::dwarf::DW_TAG_base_type, "long long int",
                      /*sizeInBits=*/64, llvm::dwarf::DW_ATE_signed));
-  uint64T = getTypedefOf(
-      "uint64_t",
-      LLVM::DIBasicTypeAttr::get(
-          context, llvm::dwarf::DW_TAG_base_type, "long long unsigned int",
-          /*sizeInBits=*/64, llvm::dwarf::DW_ATE_unsigned));
+  uint64T = getTypedefOf("uint64_t",
+                         LLVM::DIBasicTypeAttr::get(
+                             context, llvm::dwarf::DW_TAG_base_type,
+                             "long long unsigned int",
+                             /*sizeInBits=*/64, llvm::dwarf::DW_ATE_unsigned));
   intptrT =
       getTypedefOf("intptr_t", ptrBitwidth == 32 ? getInt32T() : getInt64T());
   sizeT =
       getTypedefOf("size_t", ptrBitwidth == 32 ? getUint32T() : getUint64T());
 }
 
-LLVM::DIDerivedTypeAttr ExecutableLibraryDI::getConstOf(
-    LLVM::DITypeAttr typeAttr) {
+LLVM::DIDerivedTypeAttr
+ExecutableLibraryDI::getConstOf(LLVM::DITypeAttr typeAttr) {
   return LLVM::DIDerivedTypeAttr::get(
       builder.getContext(), llvm::dwarf::DW_TAG_const_type,
       /*name=*/nullptr, typeAttr, /*sizeInBits=*/0, /*alignInBits=*/0,
       /*offsetInBits=*/0);
 }
 
-LLVM::DIDerivedTypeAttr ExecutableLibraryDI::getPtrOf(
-    LLVM::DITypeAttr typeAttr) {
+LLVM::DIDerivedTypeAttr
+ExecutableLibraryDI::getPtrOf(LLVM::DITypeAttr typeAttr) {
   return LLVM::DIDerivedTypeAttr::get(
       builder.getContext(), llvm::dwarf::DW_TAG_pointer_type,
       /*name=*/nullptr, typeAttr, /*sizeInBits=*/ptrBitwidth,
@@ -139,8 +139,8 @@
       /*offsetInBits=*/0);
 }
 
-LLVM::DICompositeTypeAttr ExecutableLibraryDI::getArrayOf(
-    LLVM::DITypeAttr typeAttr, int64_t count) {
+LLVM::DICompositeTypeAttr
+ExecutableLibraryDI::getArrayOf(LLVM::DITypeAttr typeAttr, int64_t count) {
   return LLVM::DICompositeTypeAttr::get(
       builder.getContext(), llvm::dwarf::DW_TAG_array_type,
       /*name=*/builder.getStringAttr(""), fileAttr,
@@ -156,16 +156,17 @@
       });
 }
 
-LLVM::DIDerivedTypeAttr ExecutableLibraryDI::getTypedefOf(
-    StringRef name, LLVM::DITypeAttr typeAttr) {
+LLVM::DIDerivedTypeAttr
+ExecutableLibraryDI::getTypedefOf(StringRef name, LLVM::DITypeAttr typeAttr) {
   return LLVM::DIDerivedTypeAttr::get(
       builder.getContext(), llvm::dwarf::DW_TAG_typedef,
       builder.getStringAttr(name), typeAttr, /*sizeInBits=*/0,
       /*alignInBits=*/0, /*offsetInBits=*/0);
 }
 
-LLVM::DIDerivedTypeAttr ExecutableLibraryDI::getMemberOf(
-    StringRef name, LLVM::DITypeAttr typeAttr, unsigned *offsetInBits) {
+LLVM::DIDerivedTypeAttr
+ExecutableLibraryDI::getMemberOf(StringRef name, LLVM::DITypeAttr typeAttr,
+                                 unsigned *offsetInBits) {
   unsigned memberOffsetInBits = *offsetInBits;
   unsigned memberSizeInBits = getDITypeSizeInBits(typeAttr);
   *offsetInBits += memberSizeInBits;
@@ -182,21 +183,21 @@
       .Case([&](IntegerType integerType) -> LLVM::DITypeAttr {
         unsigned bitWidth = integerType.getIntOrFloatBitWidth();
         switch (bitWidth) {
-          case 8:
-            return integerType.isUnsigned() ? getUint8T() : getInt8T();
-          case 16:
-            return integerType.isUnsigned() ? getUint16T() : getInt16T();
-          case 32:
-            return integerType.isUnsigned() ? getUint32T() : getInt32T();
-          case 64:
-            return integerType.isUnsigned() ? getUint64T() : getInt64T();
-          default:
-            return LLVM::DIBasicTypeAttr::get(
-                builder.getContext(), llvm::dwarf::DW_TAG_base_type,
-                StringRef("int") + std::to_string(bitWidth),
-                /*sizeInBits=*/bitWidth,
-                integerType.isUnsigned() ? llvm::dwarf::DW_ATE_unsigned
-                                         : llvm::dwarf::DW_ATE_signed);
+        case 8:
+          return integerType.isUnsigned() ? getUint8T() : getInt8T();
+        case 16:
+          return integerType.isUnsigned() ? getUint16T() : getInt16T();
+        case 32:
+          return integerType.isUnsigned() ? getUint32T() : getInt32T();
+        case 64:
+          return integerType.isUnsigned() ? getUint64T() : getInt64T();
+        default:
+          return LLVM::DIBasicTypeAttr::get(
+              builder.getContext(), llvm::dwarf::DW_TAG_base_type,
+              StringRef("int") + std::to_string(bitWidth),
+              /*sizeInBits=*/bitWidth,
+              integerType.isUnsigned() ? llvm::dwarf::DW_ATE_unsigned
+                                       : llvm::dwarf::DW_ATE_signed);
         }
       })
       .Case([&](FloatType floatType) -> LLVM::DITypeAttr {
@@ -312,12 +313,14 @@
 llvm::sys::Mutex HALDispatchABI::sMutex;
 
 // static
-LLVM::LLVMStructType HALDispatchABI::getProcessorType(
-    MLIRContext *context, LLVMTypeConverter *typeConverter) {
+LLVM::LLVMStructType
+HALDispatchABI::getProcessorType(MLIRContext *context,
+                                 LLVMTypeConverter *typeConverter) {
   llvm::sys::ScopedLock lock(sMutex);
   auto structType =
       LLVM::LLVMStructType::getIdentified(context, "iree_hal_processor_v0_t");
-  if (structType.isInitialized()) return structType;
+  if (structType.isInitialized())
+    return structType;
 
   auto uint64Type = IntegerType::get(context, 64);
   SmallVector<Type> fieldTypes;
@@ -335,13 +338,15 @@
 }
 
 // static
-LLVM::LLVMStructType HALDispatchABI::getEnvironmentType(
-    MLIRContext *context, LLVMTypeConverter *typeConverter,
-    LLVM::LLVMStructType processorType) {
+LLVM::LLVMStructType
+HALDispatchABI::getEnvironmentType(MLIRContext *context,
+                                   LLVMTypeConverter *typeConverter,
+                                   LLVM::LLVMStructType processorType) {
   llvm::sys::ScopedLock lock(sMutex);
   auto structType = LLVM::LLVMStructType::getIdentified(
       context, "iree_hal_executable_environment_v0_t");
-  if (structType.isInitialized()) return structType;
+  if (structType.isInitialized())
+    return structType;
 
   auto uint32Type = IntegerType::get(context, 32);
   auto opaquePtrType = LLVM::LLVMPointerType::get(context);
@@ -371,12 +376,14 @@
 }
 
 // static
-LLVM::LLVMStructType HALDispatchABI::getDispatchStateType(
-    MLIRContext *context, LLVMTypeConverter *typeConverter) {
+LLVM::LLVMStructType
+HALDispatchABI::getDispatchStateType(MLIRContext *context,
+                                     LLVMTypeConverter *typeConverter) {
   llvm::sys::ScopedLock lock(sMutex);
   auto structType = LLVM::LLVMStructType::getIdentified(
       context, "iree_hal_executable_dispatch_state_v0_t");
-  if (structType.isInitialized()) return structType;
+  if (structType.isInitialized())
+    return structType;
 
   auto uint8Type = IntegerType::get(context, 8);
   auto uint16Type = IntegerType::get(context, 16);
@@ -423,12 +430,14 @@
 }
 
 // static
-LLVM::LLVMStructType HALDispatchABI::getWorkgroupStateType(
-    MLIRContext *context, LLVMTypeConverter *typeConverter) {
+LLVM::LLVMStructType
+HALDispatchABI::getWorkgroupStateType(MLIRContext *context,
+                                      LLVMTypeConverter *typeConverter) {
   llvm::sys::ScopedLock lock(sMutex);
   auto structType = LLVM::LLVMStructType::getIdentified(
       context, "iree_hal_executable_workgroup_state_v0_t");
-  if (structType.isInitialized()) return structType;
+  if (structType.isInitialized())
+    return structType;
 
   auto uint16Type = IntegerType::get(context, 16);
   auto uint32Type = IntegerType::get(context, 32);
@@ -462,8 +471,9 @@
 }
 
 // static
-SmallVector<Type, 5> HALDispatchABI::getInputTypes(
-    MLIRContext *context, LLVMTypeConverter *typeConverter) {
+SmallVector<Type, 5>
+HALDispatchABI::getInputTypes(MLIRContext *context,
+                              LLVMTypeConverter *typeConverter) {
   return SmallVector<Type, 5>{
       // const iree_hal_executable_environment_v0_t* IREE_RESTRICT
       //   environment
@@ -478,9 +488,9 @@
 }
 
 // static
-LLVM::DISubprogramAttr HALDispatchABI::buildScopeAttr(
-    mlir::ModuleOp moduleOp, StringRef funcName,
-    LLVMTypeConverter *typeConverter) {
+LLVM::DISubprogramAttr
+HALDispatchABI::buildScopeAttr(mlir::ModuleOp moduleOp, StringRef funcName,
+                               LLVMTypeConverter *typeConverter) {
   auto *context = &typeConverter->getContext();
   Builder builder(context);
 
@@ -550,7 +560,8 @@
 // the ops if MLIR or LLVM is likely to reject them.
 static bool isLocationValidForDI(Location loc) {
   // Unknown locations are passed as null and DI doesn't like that.
-  if (llvm::isa<UnknownLoc>(loc)) return false;
+  if (llvm::isa<UnknownLoc>(loc))
+    return false;
   // MLIR currently can't handle name-only locations. We do this check to ensure
   // there's at least one real location MLIR can pass along.
   if (auto callLoc = llvm::dyn_cast<CallSiteLoc>(loc)) {
@@ -570,9 +581,11 @@
 
 static Value buildArgDI(Operation *forOp, int argNum, Value value, Twine name,
                         LLVM::DITypeAttr type, OpBuilder &builder) {
-  if (!clVerboseDebugInfo) return value;
+  if (!clVerboseDebugInfo)
+    return value;
   auto loc = forOp->getLoc();
-  if (!isLocationValidForDI(loc)) return value;
+  if (!isLocationValidForDI(loc))
+    return value;
   auto scopeAttr = getLocalScopeAttr(forOp);
   builder.create<LLVM::DbgValueOp>(
       loc, value,
@@ -585,9 +598,11 @@
 
 static Value buildValueDI(Operation *forOp, Value value, Twine name,
                           LLVM::DITypeAttr type, OpBuilder &builder) {
-  if (!clVerboseDebugInfo) return value;
+  if (!clVerboseDebugInfo)
+    return value;
   auto loc = forOp->getLoc();
-  if (!isLocationValidForDI(loc)) return value;
+  if (!isLocationValidForDI(loc))
+    return value;
   auto scopeAttr = getLocalScopeAttr(forOp);
   builder.create<LLVM::DbgValueOp>(
       loc, value,
@@ -683,10 +698,10 @@
   Value constantValue =
       builder.create<LLVM::LoadOp>(loc, pushConstantType, constantPtrValue);
   auto resultValue = castValueToType(loc, constantValue, resultType, builder);
-  return buildValueDI(
-      forOp, resultValue,
-      StringRef("push_constant[") + std::to_string(offset) + "]",
-      di.getBasicType(resultType), builder);
+  return buildValueDI(forOp, resultValue,
+                      StringRef("push_constant[") + std::to_string(offset) +
+                          "]",
+                      di.getBasicType(resultType), builder);
 }
 
 Value HALDispatchABI::loadBindingCount(Operation *forOp, OpBuilder &builder) {
@@ -711,10 +726,10 @@
   auto elementValue = builder.create<LLVM::LoadOp>(
       loc, mlir::LLVM::LLVMPointerType::get(builder.getContext()),
       elementPtrValue);
-  return buildValueDI(
-      forOp, elementValue,
-      StringRef("binding_ptrs[") + std::to_string(ordinal) + "]",
-      di.getPtrOf(di.getUint8T()), builder);
+  return buildValueDI(forOp, elementValue,
+                      StringRef("binding_ptrs[") + std::to_string(ordinal) +
+                          "]",
+                      di.getPtrOf(di.getUint8T()), builder);
 }
 
 Value HALDispatchABI::loadBindingLength(Operation *forOp, int64_t ordinal,
@@ -728,10 +743,10 @@
       LLVM::GEPArg(int32_t(ordinal)));
   auto elementValue =
       builder.create<LLVM::LoadOp>(loc, indexType, elementPtrValue);
-  return buildValueDI(
-      forOp, elementValue,
-      StringRef("binding_lengths[") + std::to_string(ordinal) + "]",
-      di.getSizeT(), builder);
+  return buildValueDI(forOp, elementValue,
+                      StringRef("binding_lengths[") + std::to_string(ordinal) +
+                          "]",
+                      di.getSizeT(), builder);
 }
 
 MemRefDescriptor HALDispatchABI::loadBinding(Operation *forOp, int64_t ordinal,
@@ -869,10 +884,10 @@
     }
     std::string targetArchUppercase =
         StringRef(getIreeArchNameForTargetTriple(targetTriple.value())).upper();
-#define IREE_CPU_FEATURE_BIT(arch, field_index, bit_pos, bit_name, llvm_name) \
-  if (targetArchUppercase == #arch) {                                         \
-    assert(field_index == 0);                                                 \
-    featureToBitPattern[llvm_name] = 1ull << bit_pos;                         \
+#define IREE_CPU_FEATURE_BIT(arch, field_index, bit_pos, bit_name, llvm_name)  \
+  if (targetArchUppercase == #arch) {                                          \
+    assert(field_index == 0);                                                  \
+    featureToBitPattern[llvm_name] = 1ull << bit_pos;                          \
   }
 #include "iree/schemas/cpu_feature_bits.inl"
 #undef IREE_CPU_FEATURE_BIT
@@ -965,10 +980,10 @@
   SmallVector<int64_t, 1> position = {index};
   Value dataValue = builder.create<LLVM::ExtractValueOp>(
       forOp->getLoc(), dataArrayValue, position);
-  return buildValueDI(
-      forOp, dataValue,
-      StringRef("processor_data[") + std::to_string(index) + "]",
-      di.getBasicType(dataValue.getType()), builder);
+  return buildValueDI(forOp, dataValue,
+                      StringRef("processor_data[") + std::to_string(index) +
+                          "]",
+                      di.getBasicType(dataValue.getType()), builder);
 }
 
 Value HALDispatchABI::loadExecutableConstant(Operation *forOp, StringRef key,
@@ -1098,8 +1113,9 @@
 }
 
 // static
-std::optional<Type> HALDispatchABI::getParameterStructType(
-    TypeRange resultTypes, ValueRange args, TypeRange extraFieldsTypes) {
+std::optional<Type>
+HALDispatchABI::getParameterStructType(TypeRange resultTypes, ValueRange args,
+                                       TypeRange extraFieldsTypes) {
   // Struct types are ordered [results..., args...].
   SmallVector<Type> types(resultTypes);
   types.reserve(resultTypes.size() + args.size());
@@ -1115,9 +1131,10 @@
 }
 
 // static
-std::tuple<Type, Value> HALDispatchABI::packIntoParameterStruct(
-    Operation *forOp, TypeRange resultTypes, ValueRange args,
-    ValueRange extraFields, OpBuilder &builder) {
+std::tuple<Type, Value>
+HALDispatchABI::packIntoParameterStruct(Operation *forOp, TypeRange resultTypes,
+                                        ValueRange args, ValueRange extraFields,
+                                        OpBuilder &builder) {
   Location loc = forOp->getLoc();
   MLIRContext *context = builder.getContext();
 
@@ -1179,24 +1196,24 @@
   }
 
   switch (cConv) {
-    case IREE::HAL::CallingConvention::Default: {
-      if (resultTypes.size() > 1) {
-        return forOp->emitOpError(
-            "Cannot have multiple return values for function");
-      }
-      Type resultType = resultTypes.size() == 1
-                            ? resultTypes[0]
-                            : LLVM::LLVMVoidType::get(context);
-      SmallVector<Type> allArgTypes = argTypes;
-      allArgTypes.append(extraFieldsTypes.begin(), extraFieldsTypes.end());
-      return LLVM::LLVMFunctionType::get(resultType, allArgTypes);
+  case IREE::HAL::CallingConvention::Default: {
+    if (resultTypes.size() > 1) {
+      return forOp->emitOpError(
+          "Cannot have multiple return values for function");
     }
-    case IREE::HAL::CallingConvention::ParameterStruct:
-      return LLVM::LLVMFunctionType::get(LLVM::LLVMVoidType::get(context),
-                                         LLVM::LLVMPointerType::get(context));
-    default:
-      llvm_unreachable("unhandled calling convention");
-      return failure();
+    Type resultType = resultTypes.size() == 1
+                          ? resultTypes[0]
+                          : LLVM::LLVMVoidType::get(context);
+    SmallVector<Type> allArgTypes = argTypes;
+    allArgTypes.append(extraFieldsTypes.begin(), extraFieldsTypes.end());
+    return LLVM::LLVMFunctionType::get(resultType, allArgTypes);
+  }
+  case IREE::HAL::CallingConvention::ParameterStruct:
+    return LLVM::LLVMFunctionType::get(LLVM::LLVMVoidType::get(context),
+                                       LLVM::LLVMPointerType::get(context));
+  default:
+    llvm_unreachable("unhandled calling convention");
+    return failure();
   }
 }
 
@@ -1347,7 +1364,8 @@
 Value HALDispatchABI::castValueToType(Location loc, Value value,
                                       Type resultType, OpBuilder &builder) {
   // NOTE: we should handle more cases here (and proper sign extension).
-  if (value.getType() == resultType) return value;
+  if (value.getType() == resultType)
+    return value;
   return builder.createOrFold<LLVM::ZExtOp>(loc, resultType, value);
 }
 
@@ -1424,5 +1442,5 @@
   }
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.h b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.h
index e21cb59..6fb2e19 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.h
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.h

@@ -36,7 +36,7 @@
 // Debug information adapter for the executable_library.h types.
 // Manually synchronized with the runtime header as needed.
 class ExecutableLibraryDI {
- public:
+public:
   // Initializes a cached DI provider using |typeConverter| to determine
   // variable-width types such as index/size_t.
   explicit ExecutableLibraryDI(LLVMTypeConverter *typeConverter);
@@ -95,7 +95,7 @@
   // Returns `iree_hal_executable_workgroup_state_v0_t`.
   LLVM::DIDerivedTypeAttr getWorkgroupStateV0T();
 
- private:
+private:
   LLVMTypeConverter *typeConverter;
   Builder builder;
   LLVM::DIFileAttr fileAttr;
@@ -124,7 +124,7 @@
 // arguments so that we can adjust the ABI over time and support multiple
 // versions in the same compiled output.
 class HALDispatchABI {
- public:
+public:
   // Matches the field order in iree_hal_processor_v0_t.
   enum class ProcessorField {
     data = 0,
@@ -134,8 +134,8 @@
   static constexpr int ProcessorDataCapacity = 8;
 
   // Returns a Type representing iree_hal_processor_v0_t.
-  static LLVM::LLVMStructType getProcessorType(
-      MLIRContext *context, LLVMTypeConverter *typeConverter);
+  static LLVM::LLVMStructType
+  getProcessorType(MLIRContext *context, LLVMTypeConverter *typeConverter);
 
   // Matches the field order in iree_hal_executable_environment_v0_t.
   enum class EnvironmentField {
@@ -147,9 +147,9 @@
   };
 
   // Returns a Type representing iree_hal_executable_environment_v0_t.
-  static LLVM::LLVMStructType getEnvironmentType(
-      MLIRContext *context, LLVMTypeConverter *typeConverter,
-      LLVM::LLVMStructType processorType);
+  static LLVM::LLVMStructType
+  getEnvironmentType(MLIRContext *context, LLVMTypeConverter *typeConverter,
+                     LLVM::LLVMStructType processorType);
 
   // Matches the field order in iree_hal_executable_dispatch_state_v0_t.
   enum class DispatchStateField {
@@ -171,8 +171,8 @@
   }
 
   // Returns a Type representing iree_hal_executable_dispatch_state_v0_t.
-  static LLVM::LLVMStructType getDispatchStateType(
-      MLIRContext *context, LLVMTypeConverter *typeConverter);
+  static LLVM::LLVMStructType
+  getDispatchStateType(MLIRContext *context, LLVMTypeConverter *typeConverter);
 
   enum class WorkgroupStateField {
     /*uint32_t*/ workgroup_id_x = 0,
@@ -188,8 +188,8 @@
   }
 
   // Returns a Type representing iree_hal_executable_workgroup_state_v0_t.
-  static LLVM::LLVMStructType getWorkgroupStateType(
-      MLIRContext *context, LLVMTypeConverter *typeConverter);
+  static LLVM::LLVMStructType
+  getWorkgroupStateType(MLIRContext *context, LLVMTypeConverter *typeConverter);
 
   // Returns the types of the LLVM function inputs for the ABI.
   // This matches the signature of `iree_hal_executable_dispatch_v0_t` in
@@ -202,13 +202,12 @@
   // tables) from MLIR into LLVM IR. It does not need to match the exact
   // definition but the closer we can make it to the real thing the more useful
   // downstream tools will be.
-  static LLVM::DISubprogramAttr buildScopeAttr(
-      mlir::ModuleOp moduleOp, StringRef funcName,
-      LLVMTypeConverter *typeConverter);
+  static LLVM::DISubprogramAttr
+  buildScopeAttr(mlir::ModuleOp moduleOp, StringRef funcName,
+                 LLVMTypeConverter *typeConverter);
 
   explicit HALDispatchABI(LLVMTypeConverter *typeConverter)
-      : context(&typeConverter->getContext()),
-        typeConverter(typeConverter),
+      : context(&typeConverter->getContext()), typeConverter(typeConverter),
         processorType(getProcessorType(context, typeConverter)),
         environmentType(
             getEnvironmentType(context, typeConverter, processorType)),
@@ -342,22 +341,23 @@
   /// to append to argument list specified in `extraFields`; return the function
   /// type of use for the function that implements the specified calling
   /// convention.
-  FailureOr<LLVM::LLVMFunctionType> getABIFunctionType(
-      Operation *forOp, IREE::HAL::CallingConvention cConv,
-      TypeRange resultTypes, TypeRange argTypes,
-      ArrayRef<StringRef> extraFields);
+  FailureOr<LLVM::LLVMFunctionType>
+  getABIFunctionType(Operation *forOp, IREE::HAL::CallingConvention cConv,
+                     TypeRange resultTypes, TypeRange argTypes,
+                     ArrayRef<StringRef> extraFields);
 
   /// Given a calling convention `cConv`, and callee with return of
   /// `resultTypes` and operands with type `argTypes`, along with extra fields
   /// to append to argument list specified in `extraFields`; modify the `callOp`
   /// to implement the specified ABI. The calleee signature is expected to have
   /// been/to be modified separately, i.e. it isnt done within this method.
-  FailureOr<SmallVector<Value>> materializeABI(
-      Operation *callOp, StringRef symbolName,
-      IREE::HAL::CallingConvention cConv, TypeRange resultTypes,
-      ValueRange args, ArrayRef<StringRef> extraFields, RewriterBase &builder);
+  FailureOr<SmallVector<Value>>
+  materializeABI(Operation *callOp, StringRef symbolName,
+                 IREE::HAL::CallingConvention cConv, TypeRange resultTypes,
+                 ValueRange args, ArrayRef<StringRef> extraFields,
+                 RewriterBase &builder);
 
- private:
+private:
   Value getIndexValue(Location loc, int64_t value, OpBuilder &builder);
 
   Value castValueToType(Location loc, Value value, Type resultType,
@@ -415,7 +415,7 @@
   static llvm::sys::Mutex sMutex;
 };
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_LLVMCPU_DISPATCHABI_H_
+#endif // IREE_COMPILER_CODEGEN_LLVMCPU_DISPATCHABI_H_

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/ExpandF16OpToF32Pass.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/ExpandF16OpToF32Pass.cpp
index 5cf2fcf..c694da1 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/ExpandF16OpToF32Pass.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/ExpandF16OpToF32Pass.cpp

@@ -23,7 +23,7 @@
 /// operands, and then truncating the result back to f16.
 template <typename Op>
 struct ExpandF16OpToF32Pattern : public OpRewritePattern<Op> {
- public:
+public:
   using OpRewritePattern<Op>::OpRewritePattern;
 
   LogicalResult matchAndRewrite(Op op,
@@ -65,11 +65,11 @@
     }
   }
 };
-}  // namespace
+} // namespace
 
 std::unique_ptr<Pass> createExpandF16OpToF32Pass() {
   return std::make_unique<ExpandF16OpToF32Pass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
index 18d64b5..46c0955 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp

@@ -55,17 +55,20 @@
     llvm::cl::desc("disable thread distribution in codegen"),
     llvm::cl::init(false));
 
-static llvm::cl::list<int> mmt4dWorkgroupTileSizes(
-    "iree-codegen-llvm-mmt4d-workgroup-tile-sizes",
-    llvm::cl::desc("linalg.mmt4d workgroup tile size"), llvm::cl::ZeroOrMore);
+static llvm::cl::list<int>
+    mmt4dWorkgroupTileSizes("iree-codegen-llvm-mmt4d-workgroup-tile-sizes",
+                            llvm::cl::desc("linalg.mmt4d workgroup tile size"),
+                            llvm::cl::ZeroOrMore);
 
-static llvm::cl::list<int> mmt4dL1TileSizes(
-    "iree-codegen-llvm-mmt4d-l1-tile-size",
-    llvm::cl::desc("linalg.mmt4d L1 tile size"), llvm::cl::ZeroOrMore);
+static llvm::cl::list<int>
+    mmt4dL1TileSizes("iree-codegen-llvm-mmt4d-l1-tile-size",
+                     llvm::cl::desc("linalg.mmt4d L1 tile size"),
+                     llvm::cl::ZeroOrMore);
 
-static llvm::cl::list<int> mmt4dVectorSizes(
-    "iree-codegen-llvm-mmt4d-vector-size",
-    llvm::cl::desc("linalg.mmt4d vector tile size"), llvm::cl::ZeroOrMore);
+static llvm::cl::list<int>
+    mmt4dVectorSizes("iree-codegen-llvm-mmt4d-vector-size",
+                     llvm::cl::desc("linalg.mmt4d vector tile size"),
+                     llvm::cl::ZeroOrMore);
 
 static llvm::cl::opt<int> defaultWorkgroupTileSize(
     "iree-codegen-llvm-generic-ops-workgroup-size",
@@ -76,13 +79,15 @@
 // TODO(hanchung): Remove the flag. This is the flag for fastly falling back to
 // the previous snapshot.
 
-static llvm::cl::opt<bool> enableVectorPadding(
-    "iree-codegen-enable-vector-padding",
-    llvm::cl::desc("Enable padding for vectorization"), llvm::cl::init(true));
+static llvm::cl::opt<bool>
+    enableVectorPadding("iree-codegen-enable-vector-padding",
+                        llvm::cl::desc("Enable padding for vectorization"),
+                        llvm::cl::init(true));
 
-static llvm::cl::opt<bool> enableVectorPeeling(
-    "iree-codegen-enable-vector-peeling",
-    llvm::cl::desc("Enable peeling for vectorization"), llvm::cl::init(true));
+static llvm::cl::opt<bool>
+    enableVectorPeeling("iree-codegen-enable-vector-peeling",
+                        llvm::cl::desc("Enable peeling for vectorization"),
+                        llvm::cl::init(true));
 
 // Non-static options are used in other places.
 llvm::cl::opt<std::string> clCPUCodegenTransformDialectFileName(
@@ -129,18 +134,18 @@
 static llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
                                      const VectorPreProcStrategy &strategy) {
   switch (strategy) {
-    case VectorPreProcStrategy::Padding:
-      os << "Padding";
-      break;
-    case VectorPreProcStrategy::Peeling:
-      os << "Peeling";
-      break;
-    case VectorPreProcStrategy::Masking:
-      os << "Masking";
-      break;
-    case VectorPreProcStrategy::None:
-      os << "None";
-      break;
+  case VectorPreProcStrategy::Padding:
+    os << "Padding";
+    break;
+  case VectorPreProcStrategy::Peeling:
+    os << "Peeling";
+    break;
+  case VectorPreProcStrategy::Masking:
+    os << "Masking";
+    break;
+  case VectorPreProcStrategy::None:
+    os << "None";
+    break;
   }
   return os;
 }
@@ -155,9 +160,9 @@
   return os;
 }
 
-static llvm::raw_ostream &operator<<(
-    llvm::raw_ostream &os,
-    const mlir::iree_compiler::TileSizesListType &tileSizeList) {
+static llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os,
+           const mlir::iree_compiler::TileSizesListType &tileSizeList) {
   os << "[";
   for (auto &tuple : tileSizeList) {
     os << "[" << tuple << "]";
@@ -174,7 +179,8 @@
                                SmallVector<int64_t> &ub) {
   auto getStaticValue = [](OpFoldResult ofr) -> int64_t {
     std::optional<int64_t> intVal = getConstantIntValue(ofr);
-    if (!intVal) return ShapedType::kDynamic;
+    if (!intVal)
+      return ShapedType::kDynamic;
     return intVal.value();
   };
   lb = llvm::map_to_vector(loopRange,
@@ -193,8 +199,8 @@
 
 /// Returns the vectorization pre-processing strategy (padding, peeling) for the
 /// given LinalgOp, depending on the op traits and the target architecture.
-static VectorPreProcStrategy getVectorPreProcStrategy(
-    linalg::LinalgOp linalgOp) {
+static VectorPreProcStrategy
+getVectorPreProcStrategy(linalg::LinalgOp linalgOp) {
   // Generic strategies.
 
   if (linalgOp.hasBufferSemantics()) {
@@ -274,7 +280,8 @@
 }
 static int64_t getVectorSize(func::FuncOp entryPointFn, ShapedType shapedType) {
   Type elementType = shapedType.getElementType();
-  if (!elementType.isIntOrFloat()) return 1;
+  if (!elementType.isIntOrFloat())
+    return 1;
   unsigned byteWidth = IREE::Util::getRoundedElementByteWidth(elementType);
   return getVectorSize(entryPointFn, byteWidth);
 }
@@ -284,10 +291,10 @@
 /// looking into all the operands.
 // TODO(diegocaballero): Refactor this logic to a method that computes the final
 // tile sizes for vectorization/unrolling in one shot.
-static SmallVector<int64_t> getMinTilingSizesForEachDim(
-    func::FuncOp entryPointFn, linalg::LinalgOp op,
-    const LinalgOpInfo &linalgOpInfo,
-    const TargetMLTransformInfo &targetMLTransInfo) {
+static SmallVector<int64_t>
+getMinTilingSizesForEachDim(func::FuncOp entryPointFn, linalg::LinalgOp op,
+                            const LinalgOpInfo &linalgOpInfo,
+                            const TargetMLTransformInfo &targetMLTransInfo) {
   unsigned numLoops = op.getNumLoops();
   SmallVector<int64_t> minTileSizes(numLoops, 1);
   auto inputOutputOpOperands = op->getOpOperands();
@@ -295,10 +302,12 @@
   for (auto [index, map] : llvm::enumerate(op.getIndexingMapsArray())) {
     // Check the fastest varying dimension of the operand. Set the vector size
     // of the corresponding loop to the vector size.
-    if (map.getNumResults() == 0) continue;
+    if (map.getNumResults() == 0)
+      continue;
     auto fastestVaryingDimExpr =
         map.getResults().back().dyn_cast<AffineDimExpr>();
-    if (!fastestVaryingDimExpr) continue;
+    if (!fastestVaryingDimExpr)
+      continue;
     unsigned fastestVaryingDim = fastestVaryingDimExpr.getPosition();
 
     // If the indexing map has result it has to be a shaped type.
@@ -353,18 +362,21 @@
                   // Ignore operands that are 0D tensors. These
                   // are not vector-loadable, so using these to
                   // get vector length would be a pessimization.
-                  if (!dispatchTensorType.getRank()) return nullptr;
+                  if (!dispatchTensorType.getRank())
+                    return nullptr;
                   return dispatchTensorType.getBoundElementType();
                 })
             .Case<ShapedType>([&](auto shapedType) -> Type {
               // Ignore operands that are 0D tensors. These
               // are not vector-loadable, so using these to
               // get vector length would be a pessimization.
-              if (!shapedType.getRank()) return nullptr;
+              if (!shapedType.getRank())
+                return nullptr;
               return shapedType.getElementType();
             })
             .Default([&](Type t) -> Type { return nullptr; });
-    if (!elementType || !elementType.isIntOrFloat()) return;
+    if (!elementType || !elementType.isIntOrFloat())
+      return;
     unsigned typeWidthInBytes =
         IREE::Util::getRoundedElementByteWidth(elementType);
     referenceTypeLengthInBytes =
@@ -375,10 +387,11 @@
 
 /// Returns the default tile sizes to use for the loops that are distributed at
 /// Flow level.
-static SmallVector<int64_t> getDefaultDistributedLoopTileSizes(
-    ArrayRef<int64_t> lbs, ArrayRef<int64_t> ubs,
-    ArrayRef<int64_t> minTileSizes, ArrayRef<int64_t> maxTileSizes,
-    ArrayRef<int64_t> vectorSizeHints) {
+static SmallVector<int64_t>
+getDefaultDistributedLoopTileSizes(ArrayRef<int64_t> lbs, ArrayRef<int64_t> ubs,
+                                   ArrayRef<int64_t> minTileSizes,
+                                   ArrayRef<int64_t> maxTileSizes,
+                                   ArrayRef<int64_t> vectorSizeHints) {
   assert(lbs.size() == ubs.size() && lbs.size() == minTileSizes.size() &&
          lbs.size() == maxTileSizes.size() &&
          "expected all vectors to be of equal size");
@@ -621,7 +634,8 @@
   // Final fix up of the tile sizes to make sure that they divide the problem
   // size to make it vectorizable.
   for (auto i : llvm::seq<unsigned>(0, distributedTileSizes.size())) {
-    if (!distributedTileSizes[i]) continue;
+    if (!distributedTileSizes[i])
+      continue;
     distributedTileSizes[i] =
         getMaxDistributionTileSize(lbs[i], ubs[i], distributedTileSizes[i],
                                    minTileSizes[i], allowIncompleteTile);
@@ -646,9 +660,10 @@
 
 /// Splits the tile sizes in `parallelSizes` into `reductionSizes` for the
 /// reduction loops.
-static void splitParallelAndReductionTiles(
-    linalg::LinalgOp op, SmallVectorImpl<int64_t> &parallelSizes,
-    SmallVectorImpl<int64_t> &reductionSizes) {
+static void
+splitParallelAndReductionTiles(linalg::LinalgOp op,
+                               SmallVectorImpl<int64_t> &parallelSizes,
+                               SmallVectorImpl<int64_t> &reductionSizes) {
   reductionSizes.assign(parallelSizes.begin(), parallelSizes.end());
   for (auto [index, iteratorType] :
        llvm::enumerate(op.getIteratorTypesArray())) {
@@ -667,7 +682,8 @@
   for (auto [index, valuePair] : llvm::enumerate(
            llvm::zip_equal(staticLoopRanges, op.getIteratorTypesArray()))) {
     auto [size, iterType] = valuePair;
-    if (!ShapedType::isDynamic(size)) continue;
+    if (!ShapedType::isDynamic(size))
+      continue;
     if (iterType == utils::IteratorType::parallel) {
       parallelSizes[index] = 1;
     } else {
@@ -681,10 +697,11 @@
                        << reductionSizes << "\n");
 }
 
-static void setVectorSizesForDynamicShapes(
-    linalg::LinalgOp op, VectorPreProcStrategy vecPreProcStrategy,
-    SmallVectorImpl<int64_t> &parallelSizes,
-    SmallVectorImpl<int64_t> &reductionSizes) {
+static void
+setVectorSizesForDynamicShapes(linalg::LinalgOp op,
+                               VectorPreProcStrategy vecPreProcStrategy,
+                               SmallVectorImpl<int64_t> &parallelSizes,
+                               SmallVectorImpl<int64_t> &reductionSizes) {
   // Masking doesn't need any dim set to 1.
   if (vecPreProcStrategy == VectorPreProcStrategy::Masking) {
     return;
@@ -740,10 +757,10 @@
 
 /// Sets the default configuration to use for an operation that implements the
 /// `PartitionableLoopsInterface`, given the `lbs` and `ubs` of all the loops.
-static LogicalResult setDefaultRootConfig(
-    func::FuncOp entryPointFn,
-    PartitionableLoopsInterface partitionableLoopsInterfaceOp,
-    ArrayRef<int64_t> lbs, ArrayRef<int64_t> ubs) {
+static LogicalResult
+setDefaultRootConfig(func::FuncOp entryPointFn,
+                     PartitionableLoopsInterface partitionableLoopsInterfaceOp,
+                     ArrayRef<int64_t> lbs, ArrayRef<int64_t> ubs) {
   assert(!getLoweringConfig(partitionableLoopsInterfaceOp) &&
          "expected lowering_config is not set");
   SmallVector<unsigned> partitionableLoops =
@@ -776,10 +793,11 @@
   return success();
 }
 
-static LogicalResult setMatmulPadRootConfig(
-    func::FuncOp entryPointFn, linalg::ContractionOpInterface op,
-    ArrayRef<int64_t> flowTileSizes, ArrayRef<int64_t> workgroupTileSizes,
-    int vectorSize) {
+static LogicalResult
+setMatmulPadRootConfig(func::FuncOp entryPointFn,
+                       linalg::ContractionOpInterface op,
+                       ArrayRef<int64_t> flowTileSizes,
+                       ArrayRef<int64_t> workgroupTileSizes, int vectorSize) {
   // The tiling for parallel dims and reduction dims should be separated.
   SmallVector<int64_t> parallelTileSizes(workgroupTileSizes.begin(),
                                          workgroupTileSizes.end());
@@ -790,7 +808,8 @@
   // the inner tiling sizes are not clamped. Because padding won't be applied
   // along those dimensions.
   for (const auto &[index, size] : llvm::enumerate(flowTileSizes)) {
-    if (!size) continue;
+    if (!size)
+      continue;
     parallelTileSizes[index] = std::min(parallelTileSizes[index], size);
   }
 
@@ -812,8 +831,8 @@
       DispatchLoweringPassPipeline::CPUDoubleTilingPadExpert);
 }
 
-static DispatchLoweringPassPipeline getNoPadTilingExpert(
-    VectorPreProcStrategy strategy) {
+static DispatchLoweringPassPipeline
+getNoPadTilingExpert(VectorPreProcStrategy strategy) {
   if (strategy == VectorPreProcStrategy::Peeling) {
     return DispatchLoweringPassPipeline::CPUDoubleTilingPeelingExpert;
   }
@@ -833,7 +852,8 @@
     for (const auto &[idx, tileSize] : llvm::enumerate(tileSizeTuple)) {
       // Quantized cases are not fully evaluated yet, so it might go with NoPad
       // approach.
-      if (tileSize == 0 || shape[idx] == ShapedType::kDynamic) continue;
+      if (tileSize == 0 || shape[idx] == ShapedType::kDynamic)
+        continue;
       assert(shape[idx] % tileSize == 0);
       shape[idx] = tileSize;
     }
@@ -978,8 +998,9 @@
 
 /// Sets the lowering configuration for dispatch region with root op that
 /// implements the contraction operation interface.
-static LogicalResult setRootConfig(
-    func::FuncOp entryPointFn, linalg::ContractionOpInterface contractionOp) {
+static LogicalResult
+setRootConfig(func::FuncOp entryPointFn,
+              linalg::ContractionOpInterface contractionOp) {
   assert(!getLoweringConfig(contractionOp) &&
          "expected lowering_config is not set");
   auto linalgOp = cast<linalg::LinalgOp>(contractionOp.getOperation());
@@ -1122,8 +1143,8 @@
       DispatchLoweringPassPipeline::Mmt4dTilingExpert);
 }
 
-static SmallVector<int64_t> getLinalgExtDefaultWorkgroupTileSizes(
-    TilingInterface op) {
+static SmallVector<int64_t>
+getLinalgExtDefaultWorkgroupTileSizes(TilingInterface op) {
   unsigned numLoops = op.getLoopIteratorTypes().size();
   // Set all the distribution tile sizes to zero if thread distribution is
   // disabled.
@@ -1162,7 +1183,8 @@
   SmallVector<int64_t> innerTiles = op.getStaticTiles();
   ArrayRef<int64_t> dimPos = op.getInnerDimsPos();
   for (auto [pos, size] : llvm::zip_equal(dimPos, innerTiles)) {
-    if (distTileSizes[pos] == 0 || ShapedType::isDynamic(size)) continue;
+    if (distTileSizes[pos] == 0 || ShapedType::isDynamic(size))
+      continue;
     distTileSizes[pos] = distTileSizes[pos] / size;
     distTileSizes[pos] = std::max<int64_t>(distTileSizes[pos], 1);
   }
@@ -1182,10 +1204,10 @@
       DispatchLoweringPassPipeline::CPUDataTiling);
 }
 
-static LogicalResult setUnPackOpRootConfig(
-    func::FuncOp entryPointFn, tensor::UnPackOp op,
-    DispatchLoweringPassPipeline pipeline =
-        DispatchLoweringPassPipeline::CPUDataTiling) {
+static LogicalResult
+setUnPackOpRootConfig(func::FuncOp entryPointFn, tensor::UnPackOp op,
+                      DispatchLoweringPassPipeline pipeline =
+                          DispatchLoweringPassPipeline::CPUDataTiling) {
   SmallVector<int64_t> distTileSizes = getLinalgExtDefaultWorkgroupTileSizes(
       cast<TilingInterface>(op.getOperation()));
 
@@ -1193,7 +1215,8 @@
   SmallVector<int64_t> innerTiles = op.getStaticTiles();
   ArrayRef<int64_t> dimPos = op.getInnerDimsPos();
   for (auto [pos, size] : llvm::zip_equal(dimPos, innerTiles)) {
-    if (distTileSizes[pos] == 0 || ShapedType::isDynamic(size)) continue;
+    if (distTileSizes[pos] == 0 || ShapedType::isDynamic(size))
+      continue;
     distTileSizes[pos] = llvm::alignTo(distTileSizes[pos], size);
   }
 
@@ -1211,10 +1234,10 @@
 
 /// Sets the lowering configuration for dispatch region for linalg_ext.fft
 /// root op.
-static LogicalResult setRootConfig(
-    func::FuncOp entryPointFn, IREE::LinalgExt::FftOp fftOp,
-    DispatchLoweringPassPipeline pipeline =
-        DispatchLoweringPassPipeline::CPUDefault) {
+static LogicalResult
+setRootConfig(func::FuncOp entryPointFn, IREE::LinalgExt::FftOp fftOp,
+              DispatchLoweringPassPipeline pipeline =
+                  DispatchLoweringPassPipeline::CPUDefault) {
   assert(!getLoweringConfig(fftOp) && "expected lowering_config is not set");
   SmallVector<int64_t> workgroupTileSizes =
       getLinalgExtDefaultWorkgroupTileSizes(fftOp);
@@ -1288,10 +1311,11 @@
 
 /// Sets the default lowering configuration for a generic op to use
 /// CPUDoubleTilingExpert pipeline.
-static LogicalResult setDefaultGenericOpRootConfig(
-    func::FuncOp entryPointFn, linalg::GenericOp genericOp,
-    const LinalgOpInfo &linalgOpInfo,
-    const TargetMLTransformInfo &targetMLTransInfo) {
+static LogicalResult
+setDefaultGenericOpRootConfig(func::FuncOp entryPointFn,
+                              linalg::GenericOp genericOp,
+                              const LinalgOpInfo &linalgOpInfo,
+                              const TargetMLTransformInfo &targetMLTransInfo) {
   assert(!getLoweringConfig(genericOp) &&
          "expected lowering_config is not set");
   LLVM_DEBUG(KD_DBGS() << "Setting default generic op root configuration\n");
@@ -1362,13 +1386,15 @@
 }
 
 /// Set lowering info to be used by the transform dialect jitter.
-static LogicalResult setTransformStrategyRootConfig(
-    func::FuncOp entryPointFn, linalg::GenericOp genericOp,
-    const LinalgOpInfo &linalgOpInfo,
-    const TargetMLTransformInfo &targetMLTransInfo) {
+static LogicalResult
+setTransformStrategyRootConfig(func::FuncOp entryPointFn,
+                               linalg::GenericOp genericOp,
+                               const LinalgOpInfo &linalgOpInfo,
+                               const TargetMLTransformInfo &targetMLTransInfo) {
   assert(!getLoweringConfig(genericOp) &&
          "expected lowering_config is not set");
-  if (!clCPUEnableTransformDialectJit) return failure();
+  if (!clCPUEnableTransformDialectJit)
+    return failure();
   cpu::CPUModel cpuModel;
   if (failed(
           cpu::matchAndSetReductionStrategy(entryPointFn, genericOp, cpuModel)))
@@ -1383,10 +1409,11 @@
 
 /// Sets the lowering configuration for a generic op implementing a
 /// transposition to use CPUDoubleTilingExpert pipeline.
-static LogicalResult setTransposeLikeOpRootConfig(
-    func::FuncOp entryPointFn, linalg::GenericOp genericOp,
-    const LinalgOpInfo &linalgOpInfo,
-    const TargetMLTransformInfo &targetMLTransInfo) {
+static LogicalResult
+setTransposeLikeOpRootConfig(func::FuncOp entryPointFn,
+                             linalg::GenericOp genericOp,
+                             const LinalgOpInfo &linalgOpInfo,
+                             const TargetMLTransformInfo &targetMLTransInfo) {
   assert(!getLoweringConfig(genericOp) &&
          "expected lowering_config is not set");
   auto targetAttr = IREE::HAL::ExecutableTargetAttr::lookup(entryPointFn);
@@ -1461,8 +1488,10 @@
   assert(!getLoweringConfig(genericOp) &&
          "expected lowering_config is not set");
   unsigned numLoops = genericOp.getNumLoops();
-  if (numLoops == 0) return failure();
-  if (!linalg::isElementwise(genericOp)) return failure();
+  if (numLoops == 0)
+    return failure();
+  if (!linalg::isElementwise(genericOp))
+    return failure();
 
   // Set the flow level tiling to the default.
   SmallVector<int64_t> minTileSizes = getMinTilingSizesForEachDim(
@@ -1542,10 +1571,10 @@
 
 /// Sets the lowering configuration for a generic op to use
 /// CPUDoubleTilingExpert pipeline.
-static LogicalResult setRootConfig(
-    func::FuncOp entryPointFn, linalg::GenericOp genericOp,
-    const LinalgOpInfo &linalgOpInfo,
-    const TargetMLTransformInfo &targetMLTransInfo) {
+static LogicalResult
+setRootConfig(func::FuncOp entryPointFn, linalg::GenericOp genericOp,
+              const LinalgOpInfo &linalgOpInfo,
+              const TargetMLTransformInfo &targetMLTransInfo) {
   assert(!getLoweringConfig(genericOp) &&
          "expected lowering_config is not set");
   // First, try to apply the transform dialect strategy, if defined.
@@ -1658,9 +1687,10 @@
 /// Main utility to compute the workgroup (vectorization/unrolling) tile sizes.
 /// Note that this only works for NHWC input and HWCF kernel/filter
 /// convolutions, where the shape is [N, OH, OW, OC, KH, KW, (IC)].
-static SmallVector<int64_t> getNhwcConvWorkgroupSizes(
-    func::FuncOp entryPointFn, linalg::ConvolutionOpInterface op,
-    int64_t vectorSize) {
+static SmallVector<int64_t>
+getNhwcConvWorkgroupSizes(func::FuncOp entryPointFn,
+                          linalg::ConvolutionOpInterface op,
+                          int64_t vectorSize) {
   bool isSupported = is2DConvOp(op) || is2DDepthConvOp(op) || is2DPoolingOp(op);
   (void)isSupported;
   assert(isSupported && "conv op is not supported");
@@ -1669,34 +1699,47 @@
   auto targetAttr = IREE::HAL::ExecutableTargetAttr::lookup(entryPointFn);
 
   if (isX86(targetAttr)) {
-    if (is2DConvOp(op)) return {1, 1, 8, vectorSize, 1, 1, 8};
-    if (is2DDepthConvOp(op)) return {1, 1, 8, vectorSize, 1, 3};
-    if (is2DPoolingOp(op)) return {1, 1, 8, vectorSize, 1, 8};
+    if (is2DConvOp(op))
+      return {1, 1, 8, vectorSize, 1, 1, 8};
+    if (is2DDepthConvOp(op))
+      return {1, 1, 8, vectorSize, 1, 3};
+    if (is2DPoolingOp(op))
+      return {1, 1, 8, vectorSize, 1, 8};
     llvm_unreachable("unsupported conv");
   }
   if (isRISCV(targetAttr)) {
-    if (is2DConvOp(op)) return {1, 1, 8, vectorSize * 2, 1, 1, 8};
-    if (is2DDepthConvOp(op)) return {1, 1, 8, vectorSize, 1, 3};
-    if (is2DPoolingOp(op)) return {1, 1, 8, vectorSize * 2, 1, 8};
+    if (is2DConvOp(op))
+      return {1, 1, 8, vectorSize * 2, 1, 1, 8};
+    if (is2DDepthConvOp(op))
+      return {1, 1, 8, vectorSize, 1, 3};
+    if (is2DPoolingOp(op))
+      return {1, 1, 8, vectorSize * 2, 1, 8};
     llvm_unreachable("unsupported conv");
   }
   if (isAArch64(targetAttr)) {
-    if (is2DConvOp(op)) return {1, 1, 32, 64, 1, 1, 16};
-    if (is2DDepthConvOp(op)) return {1, 1, 4, 4, 1, 4};
-    if (is2DPoolingOp(op)) return {1, 1, 32, 64, 1, 16};
+    if (is2DConvOp(op))
+      return {1, 1, 32, 64, 1, 1, 16};
+    if (is2DDepthConvOp(op))
+      return {1, 1, 4, 4, 1, 4};
+    if (is2DPoolingOp(op))
+      return {1, 1, 32, 64, 1, 16};
     llvm_unreachable("unsupported conv");
   }
 
   // Get default hard-coded tile sizes if we couldn't compute anything
   // better.
-  if (is2DConvOp(op)) return {1, 1, vectorSize, vectorSize, 1, 1, vectorSize};
-  if (is2DDepthConvOp(op)) return {1, 1, vectorSize, vectorSize, 1, vectorSize};
-  if (is2DPoolingOp(op)) return {1, 1, vectorSize, vectorSize, 1, vectorSize};
+  if (is2DConvOp(op))
+    return {1, 1, vectorSize, vectorSize, 1, 1, vectorSize};
+  if (is2DDepthConvOp(op))
+    return {1, 1, vectorSize, vectorSize, 1, vectorSize};
+  if (is2DPoolingOp(op))
+    return {1, 1, vectorSize, vectorSize, 1, vectorSize};
   llvm_unreachable("unsupported conv");
 }
 
-static LogicalResult setConvInterfaceRootConfig(
-    func::FuncOp entryPointFn, linalg::ConvolutionOpInterface convOp) {
+static LogicalResult
+setConvInterfaceRootConfig(func::FuncOp entryPointFn,
+                           linalg::ConvolutionOpInterface convOp) {
   int64_t vectorSize = getVectorSize(
       entryPointFn, cast<ShapedType>(convOp->getResultTypes()[0]));
   SmallVector<int64_t> targetTileSizes =
@@ -1706,23 +1749,23 @@
   // they are in other layout format.
   Conv2DDimOrder order = getConv2DDimOrder(convOp);
   switch (order) {
-    case Conv2DDimOrder::Nhwc:
-      break;
-    case Conv2DDimOrder::Nchw:
-      SmallVector<int64_t> perm;
-      if (is2DConvOp(convOp)) {
-        // D.n, D.oh, D.ow,  D.f, D.kh, D.kw, D.c ->
-        // D.n,  D.f, D.oh, D.ow,  D.c, D.kh, D.kw
-        perm = {0, 3, 1, 2, 6, 4, 5};
-      } else if (is2DPoolingOp(convOp)) {
-        // D.n, D.oh, D.ow, D.c, D.kh, D.kw ->
-        // D.n, D.c, D.oh, D.ow, D.kh, D.kw
-        perm = {0, 3, 1, 2, 4, 5};
-      } else if (is2DDepthConvOp(convOp)) {
-        llvm_unreachable("Not implemented yet");
-      }
-      applyPermutationToVector(targetTileSizes, perm);
-      break;
+  case Conv2DDimOrder::Nhwc:
+    break;
+  case Conv2DDimOrder::Nchw:
+    SmallVector<int64_t> perm;
+    if (is2DConvOp(convOp)) {
+      // D.n, D.oh, D.ow,  D.f, D.kh, D.kw, D.c ->
+      // D.n,  D.f, D.oh, D.ow,  D.c, D.kh, D.kw
+      perm = {0, 3, 1, 2, 6, 4, 5};
+    } else if (is2DPoolingOp(convOp)) {
+      // D.n, D.oh, D.ow, D.c, D.kh, D.kw ->
+      // D.n, D.c, D.oh, D.ow, D.kh, D.kw
+      perm = {0, 3, 1, 2, 4, 5};
+    } else if (is2DDepthConvOp(convOp)) {
+      llvm_unreachable("Not implemented yet");
+    }
+    applyPermutationToVector(targetTileSizes, perm);
+    break;
   }
 
   return setConvRootConfig(entryPointFn,
@@ -1771,10 +1814,10 @@
 }
 
 /// Set default configuration for Linalg ops.
-static LogicalResult setRootConfig(
-    func::FuncOp entryPointFn, linalg::LinalgOp linalgOp,
-    DispatchLoweringPassPipeline pipeline =
-        DispatchLoweringPassPipeline::CPUDefault) {
+static LogicalResult
+setRootConfig(func::FuncOp entryPointFn, linalg::LinalgOp linalgOp,
+              DispatchLoweringPassPipeline pipeline =
+                  DispatchLoweringPassPipeline::CPUDefault) {
   auto partitionableLoopOp =
       cast<PartitionableLoopsInterface>(linalgOp.getOperation());
   SmallVector<int64_t> lbs(linalgOp.getNumLoops(), 0);
@@ -1790,10 +1833,10 @@
 
 /// Set the default configuration for operations that implement the
 /// `TiledOpInterface`.
-static LogicalResult setRootConfig(
-    func::FuncOp entryPointFn, TilingInterface tilingInterfaceOp,
-    DispatchLoweringPassPipeline pipeline =
-        DispatchLoweringPassPipeline::CPUDefault) {
+static LogicalResult
+setRootConfig(func::FuncOp entryPointFn, TilingInterface tilingInterfaceOp,
+              DispatchLoweringPassPipeline pipeline =
+                  DispatchLoweringPassPipeline::CPUDefault) {
   assert(!getLoweringConfig(tilingInterfaceOp) &&
          "expected lowering_config is not set");
   auto partitionableLoopOp =
@@ -1806,7 +1849,8 @@
       tilingInterfaceOp.getIterationDomain(builder);
   auto getStaticValue = [](OpFoldResult ofr) -> int64_t {
     std::optional<int64_t> intVal = getConstantIntValue(ofr);
-    if (!intVal) return ShapedType::kDynamic;
+    if (!intVal)
+      return ShapedType::kDynamic;
     return intVal.value();
   };
   auto lbs = llvm::map_to_vector(
@@ -1822,9 +1866,9 @@
 }
 
 /// Redirects to methods that set the configuration based on operation type.
-static LogicalResult setRootConfigImpl(
-    func::FuncOp entryPointFn, Operation *op,
-    const TargetMLTransformInfo &targetMLTransInfo) {
+static LogicalResult
+setRootConfigImpl(func::FuncOp entryPointFn, Operation *op,
+                  const TargetMLTransformInfo &targetMLTransInfo) {
   auto setRootConfigFn = [&](Operation *op) -> LogicalResult {
     return TypeSwitch<Operation *, LogicalResult>(op)
         .Case<linalg::GenericOp>([&](auto op) {
@@ -1888,14 +1932,15 @@
 ///   3. An operation that implements TilingInterface.
 /// If there are multiple operations meeting the same priority, the one closer
 /// to the end of the function is the root op.
-static FailureOr<Operation *> getRootOperation(
-    ArrayRef<Operation *> computeOps) {
+static FailureOr<Operation *>
+getRootOperation(ArrayRef<Operation *> computeOps) {
   Operation *rootOperation = nullptr;
   for (auto op : llvm::reverse(computeOps)) {
     if (auto linalgOp = dyn_cast<linalg::LinalgOp>(op)) {
       // Do not treat linalg ops that are all parallel as root operations in
       // this sweep.
-      if (linalgOp.getNumLoops() == linalgOp.getNumParallelLoops()) continue;
+      if (linalgOp.getNumLoops() == linalgOp.getNumParallelLoops())
+        continue;
 
       // All other linalg ops are root ops.
       rootOperation = op;
@@ -1936,7 +1981,8 @@
 static LogicalResult adjustTileSizesForPackOp(func::FuncOp entryPointFn,
                                               Operation *rootOp) {
   auto linalgOp = dyn_cast<linalg::LinalgOp>(rootOp);
-  if (!linalgOp) return success();
+  if (!linalgOp)
+    return success();
 
   // The transform dialect codegen has differnet logics and codegen flow. Ignore
   // the adjustment for it.
@@ -1950,7 +1996,8 @@
   bool hasChanged = false;
   auto res = entryPointFn.walk([&](tensor::PackOp packOp) -> WalkResult {
     // Multiple pack ops case is not supported.
-    if (hasChanged) return WalkResult::interrupt();
+    if (hasChanged)
+      return WalkResult::interrupt();
 
     hasChanged = true;
     LLVM_DEBUG(KD_DBGS() << "Find pack op candidate: " << packOp << "\n");
@@ -1962,7 +2009,8 @@
       ArrayRef<int64_t> innerTiles = packOp.getStaticInnerTiles();
       ArrayRef<int64_t> dimPos = packOp.getInnerDimsPos();
       for (auto [pos, size] : llvm::zip_equal(dimPos, innerTiles)) {
-        if (tileSizes[pos] == 0 || ShapedType::isDynamic(size)) continue;
+        if (tileSizes[pos] == 0 || ShapedType::isDynamic(size))
+          continue;
         tileSizes[pos] = tileSizes[pos] / size;
         tileSizes[pos] = std::max<int64_t>(tileSizes[pos], 1);
         LLVM_DEBUG(KD_DBGS() << "Scale # " << pos << " tile size to "
@@ -1972,7 +2020,8 @@
 
     return WalkResult::advance();
   });
-  if (res.wasInterrupted()) return failure();
+  if (res.wasInterrupted())
+    return failure();
 
   return setOpConfigAndEntryPointFnTranslation(entryPointFn, rootOp,
                                                tileSizesList, pipeline);
@@ -1981,7 +2030,8 @@
 static LogicalResult adjustTileSizesForUnPackOp(func::FuncOp entryPointFn,
                                                 Operation *rootOp) {
   auto linalgOp = dyn_cast<linalg::LinalgOp>(rootOp);
-  if (!linalgOp) return success();
+  if (!linalgOp)
+    return success();
 
   // The transform dialect codegen has differnet logics and codegen flow. Ignore
   // the adjustment for it.
@@ -1996,7 +2046,8 @@
   SmallVector<int64_t> alignedSizes(linalgOp.getNumLoops(), 1);
   for (OpOperand *opOperand : linalgOp.getDpsInputOperands()) {
     auto unpackOp = opOperand->get().getDefiningOp<tensor::UnPackOp>();
-    if (!unpackOp) continue;
+    if (!unpackOp)
+      continue;
 
     foundUnPackOp = true;
     auto idxMap = linalgOp.getMatchingIndexingMap(opOperand);
@@ -2007,15 +2058,18 @@
     SmallVector<int64_t> innerTiles = unpackOp.getStaticTiles();
     ArrayRef<int64_t> dimPos = unpackOp.getInnerDimsPos();
     for (auto [pos, size] : llvm::zip_equal(dimPos, innerTiles)) {
-      if (ShapedType::isDynamic(size)) continue;
+      if (ShapedType::isDynamic(size))
+        continue;
       auto dimExpr = idxMap.getResult(pos).dyn_cast<AffineDimExpr>();
-      if (!dimExpr) return failure();
+      if (!dimExpr)
+        return failure();
       int mappedPos = dimExpr.getPosition();
       alignedSizes[mappedPos] = std::lcm(alignedSizes[mappedPos], size);
     }
   }
 
-  if (!foundUnPackOp) return success();
+  if (!foundUnPackOp)
+    return success();
 
   LLVM_DEBUG(
       KD_DBGS() << "The tile sizes for each dimension should be aligned to "
@@ -2024,7 +2078,8 @@
   // Fixup for making tileSizes be multiple of inner_tile_sizes.
   for (SmallVectorImpl<int64_t> &tileSizes : tileSizesList) {
     for (auto idx : llvm::seq<int64_t>(0, tileSizes.size())) {
-      if (tileSizes[idx] == 0) continue;
+      if (tileSizes[idx] == 0)
+        continue;
       tileSizes[idx] = llvm::alignTo(tileSizes[idx], alignedSizes[idx]);
     }
   }
@@ -2040,8 +2095,9 @@
 }
 
 /// Sets the translation information to use for a dispatch region.
-static LogicalResult setTranslationInfoAndRootConfig(
-    func::FuncOp entryPointFn, ArrayRef<Operation *> computeOps) {
+static LogicalResult
+setTranslationInfoAndRootConfig(func::FuncOp entryPointFn,
+                                ArrayRef<Operation *> computeOps) {
   if (computeOps.empty()) {
     // No compute operations found. Allow to pass through without a config.
     return success();
@@ -2058,11 +2114,13 @@
 
   // Make sure that lowering_config is not preset on any compute ops.
   for (auto computeOp : computeOps) {
-    if (getLoweringConfig(computeOp)) return failure();
+    if (getLoweringConfig(computeOp))
+      return failure();
   }
 
   FailureOr<Operation *> rootOp = getRootOperation(computeOps);
-  if (failed(rootOp)) return failure();
+  if (failed(rootOp))
+    return failure();
   Operation *rootOperation = rootOp.value();
 
   // Handle the case with no known root operation.
@@ -2109,8 +2167,10 @@
       getAllEntryPoints(moduleOp);
   for (auto funcOp : moduleOp.getOps<func::FuncOp>()) {
     auto exportOp = exportOps.lookup(funcOp.getName());
-    if (!exportOp) continue;
-    if (getTranslationInfo(exportOp)) continue;
+    if (!exportOp)
+      continue;
+    if (getTranslationInfo(exportOp))
+      continue;
 
     // If using the transform dialect with a script file, intercept early.
     if (!clCPUCodegenTransformDialectFileName.empty()) {
@@ -2119,7 +2179,8 @@
       auto translationInfo = IREE::Codegen::TranslationInfoAttr::get(
           moduleOp.getContext(),
           IREE::Codegen::DispatchLoweringPassPipeline::TransformDialectCodegen);
-      if (failed(setTranslationInfo(funcOp, translationInfo))) return failure();
+      if (failed(setTranslationInfo(funcOp, translationInfo)))
+        return failure();
       continue;
     }
 
@@ -2136,5 +2197,5 @@
   return applyPatternsAndFoldGreedily(moduleOp, std::move(patterns));
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.h b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.h
index 44a1b39..9333a03 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.h
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.h

@@ -15,7 +15,7 @@
 
 LogicalResult initCPULaunchConfig(ModuleOp moduleOp);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_LLVMCPU_KERNELDISPATCH_H_
+#endif // IREE_COMPILER_CODEGEN_LLVMCPU_KERNELDISPATCH_H_

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUAssignConstantOrdinals.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUAssignConstantOrdinals.cpp
index cf0bc2e..637b074 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUAssignConstantOrdinals.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUAssignConstantOrdinals.cpp

@@ -23,11 +23,13 @@
 
     // Ignore non-LLVMCPU variants.
     // TODO(benvanik): a way to nest this in the pipeline via dynamic passes.
-    if (variantOp.getTarget().getBackend().getValue() != "llvm-cpu") return;
+    if (variantOp.getTarget().getBackend().getValue() != "llvm-cpu")
+      return;
 
     // Get a constant key -> ordinal mapping.
     auto keyOrdinals = variantOp.gatherConstantOrdinals();
-    if (keyOrdinals.empty()) return;
+    if (keyOrdinals.empty())
+      return;
 
     // Update placeholders to hold the concrete ordinal values.
     // Eventually MLIR or LLVM will inline them.
@@ -36,7 +38,8 @@
          llvm::make_early_inc_range(moduleOp.getOps<LLVM::GlobalOp>())) {
       auto keyAttr = globalOp->getAttr(
           IREE::HAL::ExecutableConstantBlockOp::getKeyAttrName());
-      if (!keyAttr) continue;
+      if (!keyAttr)
+        continue;
       auto it = keyOrdinals.find(keyAttr);
       if (it == keyOrdinals.end()) {
         globalOp.emitOpError()
@@ -52,12 +55,12 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<IREE::HAL::ExecutableVariantOp>>
 createLLVMCPUAssignConstantOrdinalsPass() {
   return std::make_unique<LLVMCPUAssignConstantOrdinalsPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUAssignImportOrdinals.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUAssignImportOrdinals.cpp
index 3ee3ce6..963f022 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUAssignImportOrdinals.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUAssignImportOrdinals.cpp

@@ -23,7 +23,8 @@
 
     // Ignore non-LLVMCPU variants.
     // TODO(benvanik): a way to nest this in the pipeline via dynamic passes.
-    if (variantOp.getTarget().getBackend().getValue() != "llvm-cpu") return;
+    if (variantOp.getTarget().getBackend().getValue() != "llvm-cpu")
+      return;
 
     auto *context = variantOp.getContext();
     auto unitAttr = UnitAttr::get(context);
@@ -40,11 +41,13 @@
     for (auto globalOp :
          llvm::make_early_inc_range(moduleOp.getOps<LLVM::GlobalOp>())) {
       auto keyAttr = globalOp->getAttrOfType<StringAttr>(importKeyAttr);
-      if (!keyAttr) continue;
+      if (!keyAttr)
+        continue;
       uniqueKeys.insert(keyAttr);
       ordinalGlobals[keyAttr].push_back(globalOp);
     }
-    if (uniqueKeys.empty()) return;
+    if (uniqueKeys.empty())
+      return;
     auto sortedKeys = uniqueKeys.takeVector();
     llvm::stable_sort(sortedKeys, [](auto lhs, auto rhs) {
       return lhs.getValue() < rhs.getValue();
@@ -75,12 +78,12 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<IREE::HAL::ExecutableVariantOp>>
 createLLVMCPUAssignImportOrdinalsPass() {
   return std::make_unique<LLVMCPUAssignImportOrdinalsPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUCheckIRBeforeLLVMConversion.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUCheckIRBeforeLLVMConversion.cpp
index 7291ba9..ada489b 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUCheckIRBeforeLLVMConversion.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUCheckIRBeforeLLVMConversion.cpp

@@ -29,12 +29,13 @@
           LLVMCPUCheckIRBeforeLLVMConversionPass> {
   void runOnOperation() override;
 };
-}  // namespace
+} // namespace
 
 /// Returns success if the cummulative stack allocation size is less than the
 /// limit set by clMaxAllocationSizeInBytes.
 static LogicalResult checkStackAllocationSize(func::FuncOp funcOp) {
-  if (funcOp.getBody().empty()) return success();
+  if (funcOp.getBody().empty())
+    return success();
 
   SmallVector<memref::AllocaOp> allocaOps;
   funcOp.walk(
@@ -53,7 +54,8 @@
     int allocaSize = 1;
     auto allocaType = llvm::cast<ShapedType>(allocaOp.getType());
     for (auto dimSize : allocaType.getShape()) {
-      if (ShapedType::isDynamic(dimSize)) continue;
+      if (ShapedType::isDynamic(dimSize))
+        continue;
       allocaSize *= dimSize;
     }
     for (auto operand : allocaOp.getDynamicSizes()) {
@@ -98,5 +100,5 @@
   return std::make_unique<LLVMCPUCheckIRBeforeLLVMConversionPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUEmitVectorizationRemarks.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUEmitVectorizationRemarks.cpp
index fd4541c..de8b6d9 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUEmitVectorizationRemarks.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUEmitVectorizationRemarks.cpp

@@ -17,7 +17,7 @@
     : LLVMCPUEmitVectorizationRemarksBase<LLVMCPUEmitVectorizationRemarksPass> {
   void runOnOperation() override;
 };
-}  // namespace
+} // namespace
 
 void LLVMCPUEmitVectorizationRemarksPass::runOnOperation() {
   auto funcOp = getOperation();
@@ -36,5 +36,5 @@
   return std::make_unique<LLVMCPUEmitVectorizationRemarksPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULinkExecutables.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULinkExecutables.cpp
index 148174c..79afd7a 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULinkExecutables.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULinkExecutables.cpp

@@ -25,7 +25,8 @@
 
     auto sourceExecutableOps =
         llvm::to_vector<8>(moduleOp.getOps<IREE::HAL::ExecutableOp>());
-    if (sourceExecutableOps.size() <= 1) return;
+    if (sourceExecutableOps.size() <= 1)
+      return;
 
     // Guess a module name, if needed, to make the output files readable.
     auto moduleName = guessModuleName(moduleOp, "llvm_module");
@@ -62,12 +63,12 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>>
 createLLVMCPULinkExecutablesPass() {
   return std::make_unique<LLVMCPULinkExecutablesPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp
index 59eefd3..0bf6a85 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp

@@ -40,7 +40,7 @@
 class LLVMCPULowerExecutableTargetPass
     : public LLVMCPULowerExecutableTargetBase<
           LLVMCPULowerExecutableTargetPass> {
- public:
+public:
   LLVMCPULowerExecutableTargetPass() = default;
   LLVMCPULowerExecutableTargetPass(
       const LLVMCPULowerExecutableTargetPass &pass) {}
@@ -64,7 +64,7 @@
 
   void runOnOperation() override;
 
- private:
+private:
   Option<bool> testLoweringConfiguration{
       *this, "test-lowering-configuration",
       llvm::cl::desc(
@@ -84,13 +84,14 @@
           "expected to work on the std.module op within the "
           "hal.executable.variant operation")};
 };
-}  // namespace
+} // namespace
 
 /// The pipeline parser doesnt like strings that have `'` or `"` in them. But it
 /// is needed for demarcating the option value. So just drop them before sending
 /// it one.
 static StringRef sanitizePipelineString(StringRef input) {
-  if (input.empty()) return input;
+  if (input.empty())
+    return input;
   // If first/last character is ' or ", drop them.
   if (input.front() == '\'' || input.front() == '"') {
     input = input.drop_front();
@@ -104,12 +105,14 @@
 /// Verify that valid configuration is set for all ops within the compiled
 /// module.
 template <typename F>
-static LogicalResult verifyLoweringConfiguration(
-    ModuleOp module, IREE::Codegen::TranslationInfoAttr translationInfo,
-    F verificationFn) {
+static LogicalResult
+verifyLoweringConfiguration(ModuleOp module,
+                            IREE::Codegen::TranslationInfoAttr translationInfo,
+                            F verificationFn) {
   auto walkResult = module.walk([&](Operation *op) -> WalkResult {
     IREE::Codegen::LoweringConfigAttr loweringConfig = getLoweringConfig(op);
-    if (!loweringConfig) return WalkResult::advance();
+    if (!loweringConfig)
+      return WalkResult::advance();
     TilingConfig tilingConfig(loweringConfig);
     return verificationFn(op, tilingConfig, translationInfo,
                           ArrayRef<int64_t>{});
@@ -149,8 +152,8 @@
   return rootLoweringConfig;
 }
 
-static TilingConfig getTilingConfigForPipeline(
-    IREE::HAL::ExecutableVariantOp variantOp) {
+static TilingConfig
+getTilingConfigForPipeline(IREE::HAL::ExecutableVariantOp variantOp) {
   auto maybeLoweringConfig = getRootLoweringConfig(variantOp);
   assert(succeeded(maybeLoweringConfig) &&
          "Pipeline requires a lowering config");
@@ -211,21 +214,21 @@
     if (translationInfo.has_value()) {
       LogicalResult verificationStatus = success();
       switch (translationInfo.value().getDispatchLoweringPassPipeline()) {
-        case IREE::Codegen::DispatchLoweringPassPipeline::CPUDoubleTilingExpert:
-        case IREE::Codegen::DispatchLoweringPassPipeline::
-            CPUDoubleTilingPadExpert:
-          verificationStatus = verifyLoweringConfiguration(
-              moduleOp, translationInfo.value(),
-              verifyDoubleTilingExpertPassPipelineConfig);
-          break;
-        case IREE::Codegen::DispatchLoweringPassPipeline::
-            CPUConvTileAndDecomposeExpert:
-          verificationStatus = verifyLoweringConfiguration(
-              moduleOp, translationInfo.value(),
-              verifyConvTileAndDecomposeExpertConfig);
-          break;
-        default:
-          break;
+      case IREE::Codegen::DispatchLoweringPassPipeline::CPUDoubleTilingExpert:
+      case IREE::Codegen::DispatchLoweringPassPipeline::
+          CPUDoubleTilingPadExpert:
+        verificationStatus = verifyLoweringConfiguration(
+            moduleOp, translationInfo.value(),
+            verifyDoubleTilingExpertPassPipelineConfig);
+        break;
+      case IREE::Codegen::DispatchLoweringPassPipeline::
+          CPUConvTileAndDecomposeExpert:
+        verificationStatus =
+            verifyLoweringConfiguration(moduleOp, translationInfo.value(),
+                                        verifyConvTileAndDecomposeExpertConfig);
+        break;
+      default:
+        break;
       }
       if (failed(verificationStatus)) {
         return signalPassFailure();
@@ -242,73 +245,73 @@
                                hasSMEFeature(target);
       if (!testLoweringConfiguration) {
         switch (translationInfo.value().getDispatchLoweringPassPipeline()) {
-          case IREE::Codegen::DispatchLoweringPassPipeline::CPUDefault:
-          case IREE::Codegen::DispatchLoweringPassPipeline::None:
-            addCPUDefaultPassPipeline(executableLoweringPipeline);
-            break;
-          case IREE::Codegen::DispatchLoweringPassPipeline::
-              CPUBufferOpsTileAndVectorize: {
-            TilingConfig tilingConfig = getTilingConfigForPipeline(variantOp);
-            addCPUBufferOpsTileAndVectorizePipeline(
-                executableLoweringPipeline, tilingConfig, enableVectorMasking,
-                enableAArch64SSVE);
-            break;
-          }
-          case IREE::Codegen::DispatchLoweringPassPipeline::
-              CPUDoubleTilingExpert: {
-            TilingConfig tilingConfig = getTilingConfigForPipeline(variantOp);
-            addMultiTilingExpertPassPipeline(
-                executableLoweringPipeline, tilingConfig,
-                /*enablePeeling=*/false, enableVectorMasking, lowerToAVX2);
-            break;
-          }
-          case IREE::Codegen::DispatchLoweringPassPipeline::
-              CPUDoubleTilingPadExpert: {
-            TilingConfig tilingConfig = getTilingConfigForPipeline(variantOp);
-            addDoubleTilingPadExpertPassPipeline(
-                executableLoweringPipeline, tilingConfig, enableVectorMasking);
-            break;
-          }
-          case IREE::Codegen::DispatchLoweringPassPipeline::
-              CPUDoubleTilingPeelingExpert: {
-            TilingConfig tilingConfig = getTilingConfigForPipeline(variantOp);
-            addMultiTilingExpertPassPipeline(
-                executableLoweringPipeline, tilingConfig,
-                /*enablePeeling=*/true, enableVectorMasking, lowerToAVX2,
-                enableAArch64SSVE);
-            break;
-          }
-          case IREE::Codegen::DispatchLoweringPassPipeline::
-              CPUConvTileAndDecomposeExpert: {
-            TilingConfig tilingConfig = getTilingConfigForPipeline(variantOp);
-            addConvTileAndDecomposeExpertPassPipeline(
-                executableLoweringPipeline, tilingConfig, enableVectorMasking,
-                enableAArch64SSVE);
-            break;
-          }
-          case IREE::Codegen::DispatchLoweringPassPipeline::Mmt4dTilingExpert: {
-            TilingConfig tilingConfig = getTilingConfigForPipeline(variantOp);
-            addMmt4dTilingExpertPassPipeline(executableLoweringPipeline,
-                                             tilingConfig, enableMicrokernels);
-            break;
-          }
-          case IREE::Codegen::DispatchLoweringPassPipeline::CPUDataTiling: {
-            TilingConfig tilingConfig = getTilingConfigForPipeline(variantOp);
-            addCPUDataTilingPipeline(executableLoweringPipeline, tilingConfig);
-            break;
-          }
-          case IREE::Codegen::DispatchLoweringPassPipeline::VMVXDefault:
-            addVMVXDefaultPassPipeline(executableLoweringPipeline,
-                                       enableMicrokernels);
-            break;
-          // Transform-dialect pipelines.
-          case IREE::Codegen::DispatchLoweringPassPipeline::
-              TransformDialectCodegen:
-            addTransformDialectPasses(executableLoweringPipeline);
-            break;
-          default:
-            variantOp.emitOpError("Unsupported pipeline on CPU target.");
-            return signalPassFailure();
+        case IREE::Codegen::DispatchLoweringPassPipeline::CPUDefault:
+        case IREE::Codegen::DispatchLoweringPassPipeline::None:
+          addCPUDefaultPassPipeline(executableLoweringPipeline);
+          break;
+        case IREE::Codegen::DispatchLoweringPassPipeline::
+            CPUBufferOpsTileAndVectorize: {
+          TilingConfig tilingConfig = getTilingConfigForPipeline(variantOp);
+          addCPUBufferOpsTileAndVectorizePipeline(
+              executableLoweringPipeline, tilingConfig, enableVectorMasking,
+              enableAArch64SSVE);
+          break;
+        }
+        case IREE::Codegen::DispatchLoweringPassPipeline::
+            CPUDoubleTilingExpert: {
+          TilingConfig tilingConfig = getTilingConfigForPipeline(variantOp);
+          addMultiTilingExpertPassPipeline(
+              executableLoweringPipeline, tilingConfig,
+              /*enablePeeling=*/false, enableVectorMasking, lowerToAVX2);
+          break;
+        }
+        case IREE::Codegen::DispatchLoweringPassPipeline::
+            CPUDoubleTilingPadExpert: {
+          TilingConfig tilingConfig = getTilingConfigForPipeline(variantOp);
+          addDoubleTilingPadExpertPassPipeline(
+              executableLoweringPipeline, tilingConfig, enableVectorMasking);
+          break;
+        }
+        case IREE::Codegen::DispatchLoweringPassPipeline::
+            CPUDoubleTilingPeelingExpert: {
+          TilingConfig tilingConfig = getTilingConfigForPipeline(variantOp);
+          addMultiTilingExpertPassPipeline(
+              executableLoweringPipeline, tilingConfig,
+              /*enablePeeling=*/true, enableVectorMasking, lowerToAVX2,
+              enableAArch64SSVE);
+          break;
+        }
+        case IREE::Codegen::DispatchLoweringPassPipeline::
+            CPUConvTileAndDecomposeExpert: {
+          TilingConfig tilingConfig = getTilingConfigForPipeline(variantOp);
+          addConvTileAndDecomposeExpertPassPipeline(
+              executableLoweringPipeline, tilingConfig, enableVectorMasking,
+              enableAArch64SSVE);
+          break;
+        }
+        case IREE::Codegen::DispatchLoweringPassPipeline::Mmt4dTilingExpert: {
+          TilingConfig tilingConfig = getTilingConfigForPipeline(variantOp);
+          addMmt4dTilingExpertPassPipeline(executableLoweringPipeline,
+                                           tilingConfig, enableMicrokernels);
+          break;
+        }
+        case IREE::Codegen::DispatchLoweringPassPipeline::CPUDataTiling: {
+          TilingConfig tilingConfig = getTilingConfigForPipeline(variantOp);
+          addCPUDataTilingPipeline(executableLoweringPipeline, tilingConfig);
+          break;
+        }
+        case IREE::Codegen::DispatchLoweringPassPipeline::VMVXDefault:
+          addVMVXDefaultPassPipeline(executableLoweringPipeline,
+                                     enableMicrokernels);
+          break;
+        // Transform-dialect pipelines.
+        case IREE::Codegen::DispatchLoweringPassPipeline::
+            TransformDialectCodegen:
+          addTransformDialectPasses(executableLoweringPipeline);
+          break;
+        default:
+          variantOp.emitOpError("Unsupported pipeline on CPU target.");
+          return signalPassFailure();
         }
       }
     }
@@ -324,5 +327,5 @@
   return std::make_unique<LLVMCPULowerExecutableTargetPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerToUKernels.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerToUKernels.cpp
index c34216c..daf0993 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerToUKernels.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerToUKernels.cpp

@@ -32,12 +32,13 @@
   }
   void runOnOperation() override;
 };
-}  // namespace
+} // namespace
 
 /// Returns `true` if an `outsOperand` value is initialized to zero.
 static bool isInitializedToZero(Value outsOperand) {
   auto fillOp = outsOperand.getDefiningOp<linalg::FillOp>();
-  if (!fillOp) return false;
+  if (!fillOp)
+    return false;
   Value fillVal = fillOp.getDpsInputOperand(0)->get();
   return matchPattern(fillVal, m_Zero()) ||
          matchPattern(fillVal, m_AnyZeroFloat());
@@ -51,9 +52,9 @@
 
 /// Returns the function name and attributes to use for a ukernel with given
 /// `ukernelName` on the target described by `targetAttr`.
-static FnNameAndDefAttrs getFnNameAndDefAttrs(
-    const char *ukernelName, RewriterBase &rewriter,
-    IREE::HAL::ExecutableTargetAttr targetAttr) {
+static FnNameAndDefAttrs
+getFnNameAndDefAttrs(const char *ukernelName, RewriterBase &rewriter,
+                     IREE::HAL::ExecutableTargetAttr targetAttr) {
   FnNameAndDefAttrs result;
   if (isVMVXBackend(targetAttr)) {
     result.name = std::string("vmvx.") + ukernelName;
@@ -83,8 +84,8 @@
 /// Matches an (linalg.fill -> )? linalg.mmt4d operation sequence and converts
 /// it into a iree_codegen.ukernel.mmt4d operation, that is later lowered
 /// into a call to the microkernel.
-static FailureOr<IREE::Codegen::UKernelOpInterface> matchDAGForUKernel(
-    RewriterBase &rewriter, linalg::Mmt4DOp op) {
+static FailureOr<IREE::Codegen::UKernelOpInterface>
+matchDAGForUKernel(RewriterBase &rewriter, linalg::Mmt4DOp op) {
   Value lhs = op.getDpsInputOperand(0)->get();
   Value rhs = op.getDpsInputOperand(1)->get();
   Value out = op.getDpsInitOperand(0)->get();
@@ -144,8 +145,8 @@
       genericMicroKernelOp.getOperation());
 }
 
-static FailureOr<IREE::Codegen::UKernelOpInterface> matchDAGForUKernel(
-    RewriterBase &rewriter, tensor::PackOp op) {
+static FailureOr<IREE::Codegen::UKernelOpInterface>
+matchDAGForUKernel(RewriterBase &rewriter, tensor::PackOp op) {
   Value in = op.getSource();
   Value out = op.getDest();
   auto inType = llvm::cast<ShapedType>(in.getType());
@@ -256,8 +257,8 @@
       genericMicroKernelOp.getOperation());
 }
 
-static FailureOr<IREE::Codegen::UKernelOpInterface> matchDAGForUKernel(
-    RewriterBase &rewriter, tensor::UnPackOp op) {
+static FailureOr<IREE::Codegen::UKernelOpInterface>
+matchDAGForUKernel(RewriterBase &rewriter, tensor::UnPackOp op) {
   Value in = op.getSource();
   Value out = op.getDest();
   auto inType = llvm::cast<ShapedType>(in.getType());
@@ -333,8 +334,8 @@
       genericMicroKernelOp.getOperation());
 }
 
-static FailureOr<IREE::Codegen::UKernelOpInterface> matchDAGForUKernel(
-    RewriterBase &rewriter, IREE::Codegen::QueryTileSizesOp op) {
+static FailureOr<IREE::Codegen::UKernelOpInterface>
+matchDAGForUKernel(RewriterBase &rewriter, IREE::Codegen::QueryTileSizesOp op) {
   auto tensorType = op.getTensorType().dyn_cast<RankedTensorType>();
   if (!tensorType) {
     return rewriter.notifyMatchFailure(op,
@@ -406,7 +407,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void LLVMCPULowerToUKernelsPass::runOnOperation() {
   MLIRContext *context = &getContext();
@@ -426,5 +427,5 @@
   return std::make_unique<LLVMCPULowerToUKernelsPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUMaterializeEncodingPass.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUMaterializeEncodingPass.cpp
index 2e3a0fe..b854beb 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUMaterializeEncodingPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUMaterializeEncodingPass.cpp

@@ -29,61 +29,62 @@
 
 static MatmulTileParams chooseMatmulTileParamsGeneric() { return {8, 4, 8}; }
 
-static MatmulTileParams chooseMatmulTileParamsAArch64(
-    MatmulType type, ExecutableTargetAttr target) {
+static MatmulTileParams
+chooseMatmulTileParamsAArch64(MatmulType type, ExecutableTargetAttr target) {
   switch (type) {
-    case MatmulType::F32F32F32:
-      return {8, 1, 8};
-    case MatmulType::I8I8I32:
-      if (hasFeature(target, "+i8mm")) {
-        // Aim to use SMMLA.
-        return {8, 8, 8};
-      }
-      if (hasFeature(target, "+dotprod")) {
-        // Aim to use SDOT.
-        return {8, 4, 8};
-      }
-      return {8, 1, 8};
-    default:
-      assert(false);
-      return {};
+  case MatmulType::F32F32F32:
+    return {8, 1, 8};
+  case MatmulType::I8I8I32:
+    if (hasFeature(target, "+i8mm")) {
+      // Aim to use SMMLA.
+      return {8, 8, 8};
+    }
+    if (hasFeature(target, "+dotprod")) {
+      // Aim to use SDOT.
+      return {8, 4, 8};
+    }
+    return {8, 1, 8};
+  default:
+    assert(false);
+    return {};
   }
 }
 
-static MatmulTileParams chooseMatmulTileParamsX86_64(
-    MatmulType type, ExecutableTargetAttr target) {
+static MatmulTileParams
+chooseMatmulTileParamsX86_64(MatmulType type, ExecutableTargetAttr target) {
   switch (type) {
-    case MatmulType::F32F32F32:
-      if (hasAVX512fFeature(target)) return {16, 1, 16};
-      if (hasFeature(target, "+avx")) {
-        // Note: for good performance, most +avx users will also want to add
-        // +fma, but that's a local instruction selection detail and the tile
-        // layout is unaffected, as there are enough registers even with the
-        // need for intermediate product registers when +fma is not used.
-        return {8, 1, 8};
-      }
-      // SSE fallback.
-      return {8, 1, 4};
-    case MatmulType::I8I8I32:
-      if (hasFeature(target, "+avx512vnni")) {
-        // Aim to use VPDPWSSD. This is the same tile size as with VPMADDWD
-        // as the only difference is that VPDPWSSD accumulates. VPDPBUSD would
-        // call for {16, 4, 16} but we can't use it because of its unsigned LHS.
-        return {16, 2, 16};
-      }
-      if (hasFeature(target, "+avx512bw")) {
-        // Aim to use VPMADDWD (zmm).
-        return {16, 2, 16};
-      }
-      if (hasFeature(target, "+avx2")) {
-        // Aim to use VPMADDWD (ymm).
-        return {8, 2, 8};
-      }
-      // SSE fallback. Aim to use PMADDWD (xmm).
-      return {8, 2, 4};
-    default:
-      assert(false);
-      return {};
+  case MatmulType::F32F32F32:
+    if (hasAVX512fFeature(target))
+      return {16, 1, 16};
+    if (hasFeature(target, "+avx")) {
+      // Note: for good performance, most +avx users will also want to add
+      // +fma, but that's a local instruction selection detail and the tile
+      // layout is unaffected, as there are enough registers even with the
+      // need for intermediate product registers when +fma is not used.
+      return {8, 1, 8};
+    }
+    // SSE fallback.
+    return {8, 1, 4};
+  case MatmulType::I8I8I32:
+    if (hasFeature(target, "+avx512vnni")) {
+      // Aim to use VPDPWSSD. This is the same tile size as with VPMADDWD
+      // as the only difference is that VPDPWSSD accumulates. VPDPBUSD would
+      // call for {16, 4, 16} but we can't use it because of its unsigned LHS.
+      return {16, 2, 16};
+    }
+    if (hasFeature(target, "+avx512bw")) {
+      // Aim to use VPMADDWD (zmm).
+      return {16, 2, 16};
+    }
+    if (hasFeature(target, "+avx2")) {
+      // Aim to use VPMADDWD (ymm).
+      return {8, 2, 8};
+    }
+    // SSE fallback. Aim to use PMADDWD (xmm).
+    return {8, 2, 4};
+  default:
+    assert(false);
+    return {};
   }
 }
 
@@ -109,7 +110,7 @@
   void runOnOperation() override;
 };
 
-}  // namespace
+} // namespace
 
 void LLVMCPUMaterializeEncodingPass::runOnOperation() {
   MLIRContext *context = &getContext();
@@ -120,7 +121,8 @@
       [targetAttr](
           RankedTensorType tensorType) -> FailureOr<MaterializeEncodingInfo> {
         std::optional<TensorEncoding> encoding = getEncoding(tensorType);
-        if (!encoding) return failure();
+        if (!encoding)
+          return failure();
 
         auto matmulType = getMatmulType(*encoding);
         auto matmulOperandRole = getMatmulOperandRole(*encoding);
@@ -164,5 +166,5 @@
   return std::make_unique<LLVMCPUMaterializeEncodingPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUMmt4dVectorLowering.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUMmt4dVectorLowering.cpp
index ec0c013..b3da29c 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUMmt4dVectorLowering.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUMmt4dVectorLowering.cpp

@@ -38,7 +38,7 @@
   }
   void runOnOperation() override;
 };
-}  // namespace
+} // namespace
 
 void LLVMCPUMmt4dVectorLoweringPass::runOnOperation() {
   MLIRContext *context = &getContext();
@@ -46,11 +46,13 @@
 
   std::optional<int64_t> numLoops;
   funcOp.walk([&](vector::ContractionOp op) {
-    if (numLoops) return signalPassFailure();
+    if (numLoops)
+      return signalPassFailure();
     numLoops = op.getIndexingMapsArray()[0].getNumDims();
   });
   // No vector.contract op to optimize.
-  if (!numLoops) return;
+  if (!numLoops)
+    return;
 
   {
     // Fold consumer add ops into the contraction op itself.
@@ -179,5 +181,5 @@
   return std::make_unique<LLVMCPUMmt4dVectorLoweringPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUPasses.h b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUPasses.h
index fe7c861..884054b 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUPasses.h
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUPasses.h

@@ -21,8 +21,8 @@
 class TilingConfig;
 
 /// Performs the final conversion to LLVM dialect.
-std::unique_ptr<OperationPass<ModuleOp>> createConvertToLLVMPass(
-    bool reassociateFpReordering = false);
+std::unique_ptr<OperationPass<ModuleOp>>
+createConvertToLLVMPass(bool reassociateFpReordering = false);
 
 /// Checks CPU backend specific IR constraints (like no stack allocations)
 std::unique_ptr<OperationPass<ModuleOp>>
@@ -59,8 +59,8 @@
 std::unique_ptr<OperationPass<func::FuncOp>> createLLVMCPUPeelPass();
 
 /// Pass to perform SplitReduction transformations of `LinalgOp`s.
-std::unique_ptr<OperationPass<func::FuncOp>> createLLVMCPUSplitReductionPass(
-    bool enableReassociateFpReductions = false);
+std::unique_ptr<OperationPass<func::FuncOp>>
+createLLVMCPUSplitReductionPass(bool enableReassociateFpReductions = false);
 
 /// Synchronizes LLVM linkage with MLIR symbol visibility.
 std::unique_ptr<OperationPass<ModuleOp>>
@@ -72,12 +72,12 @@
     LLVMCPUTensorPadOption option = LLVMCPUTensorPadOption::ParallelDims);
 
 /// Pass to tile and fuse TilingInterface ops with given tilingLevel.
-std::unique_ptr<OperationPass<func::FuncOp>> createLLVMCPUTileAndFusePass(
-    int64_t tilingLevel = -1);
+std::unique_ptr<OperationPass<func::FuncOp>>
+createLLVMCPUTileAndFusePass(int64_t tilingLevel = -1);
 
 /// Pass to tile TilingInterface ops with given tilingLevel.
-std::unique_ptr<OperationPass<func::FuncOp>> createLLVMCPUTilePass(
-    int64_t tilingLevel = -1);
+std::unique_ptr<OperationPass<func::FuncOp>>
+createLLVMCPUTilePass(int64_t tilingLevel = -1);
 
 /// Replaces llvm.intr.fma with its unfused mul and add ops.
 std::unique_ptr<OperationPass<func::FuncOp>> createLLVMCPUUnfuseFMAOpsPass();
@@ -97,8 +97,8 @@
   bool vectorizeGatherAccesses = false;
 };
 std::unique_ptr<OperationPass<func::FuncOp>> createLLVMCPUVectorizationPass();
-std::unique_ptr<OperationPass<func::FuncOp>> createLLVMCPUVectorizationPass(
-    const LLVMCPUVectorizationPassOptions &options);
+std::unique_ptr<OperationPass<func::FuncOp>>
+createLLVMCPUVectorizationPass(const LLVMCPUVectorizationPassOptions &options);
 
 /// A pass that converts certain vector.contract ops to custom kernels.
 std::unique_ptr<OperationPass<func::FuncOp>>
@@ -219,7 +219,7 @@
 /// Populates passes needed to link HAL executables across LLVMCPU targets.
 void buildLLVMCPULinkingPassPipeline(OpPassManager &passManager);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_LLVMCPU_PASSES_H_
+#endif // IREE_COMPILER_CODEGEN_LLVMCPU_PASSES_H_

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUPeel.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUPeel.cpp
index 90288a5..7917f3a 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUPeel.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUPeel.cpp

@@ -24,7 +24,8 @@
 // stages.
 void collectLoopsToPeel(RewriterBase &rewriter, Operation *op,
                         SmallVectorImpl<scf::ForOp> &loopsToPeel) {
-  if (!iree_compiler::getLoweringConfig(op)) return;
+  if (!iree_compiler::getLoweringConfig(op))
+    return;
 
   int maxNumLoopsToPeel = TypeSwitch<Operation *, int>(op)
                               .Case<linalg::LinalgOp>([](auto linalgOp) {
@@ -37,7 +38,8 @@
   for (int i = 0; i < maxNumLoopsToPeel; ++i) {
     op = op->getParentOfType<scf::ForOp>();
     auto loop = llvm::cast_or_null<scf::ForOp>(op);
-    if (!loop || iree_compiler::isTiledAndDistributedLoop(loop)) break;
+    if (!loop || iree_compiler::isTiledAndDistributedLoop(loop))
+      break;
     loopsToPeel.push_back(loop);
   }
 
@@ -45,7 +47,7 @@
 }
 
 class LLVMCPUPeelPass : public LLVMCPUPeelBase<LLVMCPUPeelPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<tensor::TensorDialect, linalg::LinalgDialect,
                     scf::SCFDialect>();
@@ -85,10 +87,10 @@
     return signalPassFailure();
   }
 }
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>> createLLVMCPUPeelPass() {
   return std::make_unique<LLVMCPUPeelPass>();
 }
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUSplitReduction.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUSplitReduction.cpp
index 9632e3a..05c2339 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUSplitReduction.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUSplitReduction.cpp

@@ -154,7 +154,7 @@
 /// Pass to splitReduce linalg operations.
 class LLVMCPUSplitReductionPass
     : public LLVMCPUSplitReductionBase<LLVMCPUSplitReductionPass> {
- public:
+public:
   LLVMCPUSplitReductionPass(bool fpReductionReordering) {
     this->enableFpReductionReordering = fpReductionReordering;
   }
@@ -199,12 +199,12 @@
     }
   }
 }
-}  // namespace
+} // namespace
 
-std::unique_ptr<OperationPass<func::FuncOp>> createLLVMCPUSplitReductionPass(
-    const bool enableFpReductionReordering) {
+std::unique_ptr<OperationPass<func::FuncOp>>
+createLLVMCPUSplitReductionPass(const bool enableFpReductionReordering) {
   return std::make_unique<LLVMCPUSplitReductionPass>(
       enableFpReductionReordering);
 }
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUSynchronizeSymbolVisibility.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUSynchronizeSymbolVisibility.cpp
index 8c81a91..aeb94c4 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUSynchronizeSymbolVisibility.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUSynchronizeSymbolVisibility.cpp

@@ -18,13 +18,13 @@
                                      LLVM::Linkage linkage) {
   SymbolTable::Visibility visibility = op.getVisibility();
   switch (linkage) {
-    case LLVM::Linkage::Private:
-    case LLVM::Linkage::Internal:
-      visibility = SymbolTable::Visibility::Private;
-      break;
-    default:
-      visibility = SymbolTable::Visibility::Public;
-      break;
+  case LLVM::Linkage::Private:
+  case LLVM::Linkage::Internal:
+    visibility = SymbolTable::Visibility::Private;
+    break;
+  default:
+    visibility = SymbolTable::Visibility::Public;
+    break;
   }
   op.setVisibility(visibility);
 }
@@ -46,12 +46,12 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<ModuleOp>>
 createLLVMCPUSynchronizeSymbolVisibilityPass() {
   return std::make_unique<LLVMCPUSynchronizeSymbolVisibilityPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUTensorPad.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUTensorPad.cpp
index b1edcb7..566aed9 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUTensorPad.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUTensorPad.cpp

@@ -21,10 +21,10 @@
 namespace iree_compiler {
 namespace {
 class LLVMCPUTensorPadPass : public LLVMCPUTensorPadBase<LLVMCPUTensorPadPass> {
- private:
+private:
   LLVMCPUTensorPadOption option = LLVMCPUTensorPadOption::ParallelDims;
 
- public:
+public:
   explicit LLVMCPUTensorPadPass(LLVMCPUTensorPadOption option)
       : option(option) {}
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -41,16 +41,16 @@
   bool nofold;
   utils::IteratorType targetIterType;
   switch (option) {
-    case LLVMCPUTensorPadOption::ParallelDims:
-      LLVM_DEBUG(llvm::dbgs() << "padding parallel dims\n");
-      targetIterType = utils::IteratorType::parallel;
-      nofold = false;
-      break;
-    case LLVMCPUTensorPadOption::ReductionDims:
-      LLVM_DEBUG(llvm::dbgs() << "padding reduction dims\n");
-      targetIterType = utils::IteratorType::reduction;
-      nofold = true;
-      break;
+  case LLVMCPUTensorPadOption::ParallelDims:
+    LLVM_DEBUG(llvm::dbgs() << "padding parallel dims\n");
+    targetIterType = utils::IteratorType::parallel;
+    nofold = false;
+    break;
+  case LLVMCPUTensorPadOption::ReductionDims:
+    LLVM_DEBUG(llvm::dbgs() << "padding reduction dims\n");
+    targetIterType = utils::IteratorType::reduction;
+    nofold = true;
+    break;
   };
   SmallVector<linalg::LinalgOp> candidates;
   funcOp.walk([&](linalg::LinalgOp op) { candidates.push_back(op); });
@@ -114,11 +114,11 @@
     }
   }
 }
-}  // namespace
+} // namespace
 
-std::unique_ptr<OperationPass<func::FuncOp>> createLLVMCPUTensorPadPass(
-    LLVMCPUTensorPadOption option) {
+std::unique_ptr<OperationPass<func::FuncOp>>
+createLLVMCPUTensorPadPass(LLVMCPUTensorPadOption option) {
   return std::make_unique<LLVMCPUTensorPadPass>(option);
 }
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUTile.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUTile.cpp
index dc19e35..2f1c06d 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUTile.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUTile.cpp

@@ -74,13 +74,15 @@
 
   for (auto computeOp : computeOps) {
     auto op = cast<TilingInterface>(computeOp);
-    if (op.getLoopIteratorTypes().empty()) continue;
+    if (op.getLoopIteratorTypes().empty())
+      continue;
 
     // For now do not tile `tensor.pad` operations. The `tensor.pad`
     // operations might be those introduced by the padding-based
     // codegeneration strategy. Those are not meant to be tiled again.
     // Need a better way for handling this, but this works for now.
-    if (isa<tensor::PadOp>(computeOp)) continue;
+    if (isa<tensor::PadOp>(computeOp))
+      continue;
 
     LLVM_DEBUG(llvm::dbgs() << "candidate: " << op << "\n");
     SmallVector<int64_t> tileSizes;
@@ -102,7 +104,8 @@
         });
     FailureOr<scf::SCFTilingResult> tiledResults =
         scf::tileUsingSCFForOp(rewriter, op, options);
-    if (failed(tiledResults)) continue;
+    if (failed(tiledResults))
+      continue;
     rewriter.replaceOp(op, tiledResults->replacements);
   }
 
@@ -118,12 +121,12 @@
     return signalPassFailure();
   }
 }
-}  // namespace
+} // namespace
 
-std::unique_ptr<OperationPass<func::FuncOp>> createLLVMCPUTilePass(
-    int64_t tilingLevel) {
+std::unique_ptr<OperationPass<func::FuncOp>>
+createLLVMCPUTilePass(int64_t tilingLevel) {
   return std::make_unique<LLVMCPUTilePass>(tilingLevel);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndFuse.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndFuse.cpp
index 28baf39..85e05d5 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndFuse.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndFuse.cpp

@@ -59,9 +59,9 @@
 /// For IREEs use case we dont need this. So this folds away the `if` condition.
 /// Note this is a fairly hacky workaround, but the current pad operation
 /// semantics force us down this path.
-static FailureOr<tensor::PadOp> foldIfGeneratedFromPadding(
-    RewriterBase &rewriter, tensor::PadOp untiledPadOp,
-    tensor::PadOp tiledPadOp) {
+static FailureOr<tensor::PadOp>
+foldIfGeneratedFromPadding(RewriterBase &rewriter, tensor::PadOp untiledPadOp,
+                           tensor::PadOp tiledPadOp) {
   auto ifOp = dyn_cast<scf::IfOp>(tiledPadOp->getParentOp());
   if (!ifOp) {
     return failure();
@@ -150,7 +150,8 @@
     std::optional<scf::SCFFuseProducerOfSliceResult> fusedProducer =
         tileAndFuseProducerOfSlice(rewriter, candidateSliceOp,
                                    tilingResult->loops);
-    if (!fusedProducer) continue;
+    if (!fusedProducer)
+      continue;
 
     // Check if the fused producer has other uses that require the value
     // to be yielded from within the tiled loop.
@@ -189,7 +190,8 @@
   TilingInterface consumerOp;
   funcOp.walk<WalkOrder::PostOrder, ReverseIterator>([&](TilingInterface op) {
     // Find the next consumer op if it does not have loops.
-    if (op.getLoopIteratorTypes().empty()) return WalkResult::advance();
+    if (op.getLoopIteratorTypes().empty())
+      return WalkResult::advance();
     consumerOp = op;
     return WalkResult::interrupt();
   });
@@ -249,12 +251,12 @@
     return signalPassFailure();
   }
 }
-}  // namespace
+} // namespace
 
-std::unique_ptr<OperationPass<func::FuncOp>> createLLVMCPUTileAndFusePass(
-    int64_t tilingLevel) {
+std::unique_ptr<OperationPass<func::FuncOp>>
+createLLVMCPUTileAndFusePass(int64_t tilingLevel) {
   return std::make_unique<LLVMCPUTileAndFusePass>(tilingLevel);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUUnfuseFMAOps.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUUnfuseFMAOps.cpp
index af174cd..916378a 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUUnfuseFMAOps.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUUnfuseFMAOps.cpp

@@ -19,7 +19,7 @@
 // Rewrites llvm.intr.fma as its un-fuse version.
 // TODO(ataei): Upstream this pattern if needed ?
 class UnfusedFMAOpsPassConversion : public OpRewritePattern<LLVM::FMAOp> {
- public:
+public:
   using OpRewritePattern<LLVM::FMAOp>::OpRewritePattern;
 
   LogicalResult matchAndRewrite(LLVM::FMAOp op,
@@ -33,7 +33,7 @@
     return success();
   }
 };
-}  // namespace
+} // namespace
 
 namespace {
 struct LLVMCPUUnfuseFMAOpsPass
@@ -43,7 +43,7 @@
   }
   void runOnOperation() override;
 };
-}  // namespace
+} // namespace
 
 void populateUnfusedFMAOpsPassPatterns(MLIRContext *context,
                                        RewritePatternSet &patterns) {
@@ -64,5 +64,5 @@
   return std::make_unique<LLVMCPUUnfuseFMAOpsPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUVectorLowering.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUVectorLowering.cpp
index 45f88ae..5c711c9 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUVectorLowering.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUVectorLowering.cpp

@@ -23,7 +23,8 @@
   bool res = false;
   funcOp.walk([&](vector::TransposeOp op) {
     auto srcGtOneDims = isTranspose2DSlice(op);
-    if (failed(srcGtOneDims)) return WalkResult::advance();
+    if (failed(srcGtOneDims))
+      return WalkResult::advance();
     VectorType srcType = op.getSourceVectorType();
     int64_t m = srcType.getDimSize(std::get<0>(srcGtOneDims.value()));
     int64_t n = srcType.getDimSize(std::get<1>(srcGtOneDims.value()));
@@ -40,7 +41,7 @@
 /// Pass to lower Vector ops before conversion to LLVM.
 class LLVMCPUVectorLoweringPass
     : public LLVMCPUVectorLoweringBase<LLVMCPUVectorLoweringPass> {
- public:
+public:
   using LLVMCPUVectorLoweringBase::LLVMCPUVectorLoweringBase;
   LLVMCPUVectorLoweringPass(const LLVMCPUVectorLoweringPassOptions &options) {
     this->splitVectorTransfersTo = options.splitVectorTransfersTo;
@@ -167,7 +168,7 @@
     (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
   }
 }
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>> createLLVMCPUVectorLoweringPass() {
   return std::make_unique<LLVMCPUVectorLoweringPass>();
@@ -176,5 +177,5 @@
     const LLVMCPUVectorLoweringPassOptions &options) {
   return std::make_unique<LLVMCPUVectorLoweringPass>(options);
 }
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUVectorization.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUVectorization.cpp
index de46976..971889a 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUVectorization.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUVectorization.cpp

@@ -113,8 +113,8 @@
 
 /// Tries to infer the vector sizes from an IR using ValueBounds analysis.
 /// Returns failure if vector sizes can't be inferred.
-static FailureOr<SmallVector<int64_t>> inferVectorSizesFromIR(
-    linalg::LinalgOp linalgOp) {
+static FailureOr<SmallVector<int64_t>>
+inferVectorSizesFromIR(linalg::LinalgOp linalgOp) {
   LLVM_DEBUG(VEC_DBGS() << "Inferring vector sizes for:\n" << linalgOp << "\n");
 
   SmallVector<int64_t> vectorSizes;
@@ -171,8 +171,9 @@
 
 // Give the canonical vector shape of a dispatch, returns the vector sizes for a
 // particular linalg op within that dispatch.
-static SmallVector<int64_t> getVectorSizes(
-    linalg::LinalgOp linalgOp, ArrayRef<int64_t> canonicalVectorShape) {
+static SmallVector<int64_t>
+getVectorSizes(linalg::LinalgOp linalgOp,
+               ArrayRef<int64_t> canonicalVectorShape) {
   // Try to infer the vector sizes from the IR. If it fails, try to get them
   // from the lowering config.
   auto inferredVectorSizes = inferVectorSizesFromIR(linalgOp);
@@ -202,7 +203,8 @@
   SmallVector<int64_t> vecSize(
       canonicalVectorShape.take_front(linalgOp.getNumLoops()));
   for (auto [idx, val] : llvm::enumerate(linalgOp.getStaticLoopRanges())) {
-    if (ShapedType::isDynamic(val)) continue;
+    if (ShapedType::isDynamic(val))
+      continue;
     vecSize[idx] = std::max(vecSize[idx], val);
   }
 
@@ -211,7 +213,7 @@
 
 class LLVMCPUVectorizationPass
     : public LLVMCPUVectorizationBase<LLVMCPUVectorizationPass> {
- public:
+public:
   using LLVMCPUVectorizationBase::LLVMCPUVectorizationBase;
   LLVMCPUVectorizationPass(const LLVMCPUVectorizationPassOptions &options) {
     this->enableVectorMasking.setValue(options.enableVectorMasking);
@@ -237,7 +239,8 @@
   IRRewriter rewriter(context);
   SmallVector<Operation *> candidates;
   funcOp.walk([&](Operation *op) {
-    if (isa<linalg::LinalgOp>(op)) candidates.push_back(op);
+    if (isa<linalg::LinalgOp>(op))
+      candidates.push_back(op);
     if (vectorizePadding && enableVectorMasking && isa<tensor::PadOp>(op))
       candidates.push_back(op);
   });
@@ -250,7 +253,8 @@
         auto ty = padOp.getResultType();
         // TODO(hanchung): Infer the vector sizes for pad op after
         // maskedVectorize method allows dynamic result shapes.
-        if (!ty.hasStaticShape()) continue;
+        if (!ty.hasStaticShape())
+          continue;
         vectorSizes.append(ty.getShape().begin(), ty.getShape().end());
       }
     }
@@ -297,14 +301,14 @@
   linalg::hoistRedundantVectorTransfers(funcOp);
   linalg::hoistRedundantVectorTransfersOnTensor(funcOp);
 }
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>> createLLVMCPUVectorizationPass() {
   return std::make_unique<LLVMCPUVectorizationPass>();
 }
-std::unique_ptr<OperationPass<func::FuncOp>> createLLVMCPUVectorizationPass(
-    const LLVMCPUVectorizationPassOptions &options) {
+std::unique_ptr<OperationPass<func::FuncOp>>
+createLLVMCPUVectorizationPass(const LLVMCPUVectorizationPassOptions &options) {
   return std::make_unique<LLVMCPUVectorizationPass>(options);
 }
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp
index 22e5601..6831195 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp

@@ -153,11 +153,13 @@
 //===---------------------------------------------------------------------===//
 
 static bool isValidInterchange(ArrayRef<int64_t> interchange, int numLoops) {
-  if (interchange.empty()) return true;
+  if (interchange.empty())
+    return true;
   llvm::SmallDenseSet<int64_t> s;
   s.insert(interchange.begin(), interchange.end());
   for (int i = 0; i < numLoops; ++i) {
-    if (!s.contains(i)) return false;
+    if (!s.contains(i))
+      return false;
   }
   return true;
 }
@@ -259,8 +261,10 @@
   SmallVector<int64_t> shape = linalgOp.getStaticLoopRanges();
   for (auto sizes : tilingConfig.getTileSizes()) {
     for (auto [i, size] : llvm::enumerate(sizes)) {
-      if (size == 1) shape[i] = 1;
-      if (shape[i] == -1 || size == 0) continue;
+      if (size == 1)
+        shape[i] = 1;
+      if (shape[i] == -1 || size == 0)
+        continue;
       if (shape[i] % size != 0) {
         shape[i] = -1;
       } else {
@@ -764,5 +768,5 @@
   variantPM.addPass(createLLVMCPUAssignImportOrdinalsPass());
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/TargetMLTransformInfo.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/TargetMLTransformInfo.cpp
index b03c91a..5e490c8 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/TargetMLTransformInfo.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/TargetMLTransformInfo.cpp

@@ -20,7 +20,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 namespace mlir {
 namespace iree_compiler {
@@ -34,5 +34,5 @@
   return TargetMLTransformInfo();
 };
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/TargetMLTransformInfo.h b/compiler/src/iree/compiler/Codegen/LLVMCPU/TargetMLTransformInfo.h
index 06ffd6f..912ec43 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/TargetMLTransformInfo.h
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/TargetMLTransformInfo.h

@@ -21,11 +21,11 @@
   unsigned defaultMaxTransposeUnrollFactor =
       std::numeric_limits<unsigned>::max();
 
-  static const TargetMLTransformInfo getTargetMLTransformInfo(
-      IREE::HAL::ExecutableTargetAttr targetAttr);
+  static const TargetMLTransformInfo
+  getTargetMLTransformInfo(IREE::HAL::ExecutableTargetAttr targetAttr);
 };
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_LLVMCPU_TARGETMLTRANSFORMINFO_H_
+#endif // IREE_COMPILER_CODEGEN_LLVMCPU_TARGETMLTRANSFORMINFO_H_

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/TileSizeSelection.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/TileSizeSelection.cpp
index 5f597fd..1cbbd82 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/TileSizeSelection.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/TileSizeSelection.cpp

@@ -29,22 +29,22 @@
   //       [vector-parallel], [vector-reduction]]
   int numTileLevels = loweringConfig.getTileSizes().size();
   switch (numTileLevels) {
-    case 3:
-      tilingLevelToActualLevelMap[VectorReductionTiles] = 2;
-      [[fallthrough]];
-    case 2:
-      tilingLevelToActualLevelMap[VectorParallelTiles] = 1;
-      [[fallthrough]];
-    case 1:
-      tilingLevelToActualLevelMap[DistributionTiles] = 0;
-      break;
-    case MaxNumTileLevels:
-      for (int i = 0; i < MaxNumTileLevels; ++i) {
-        tilingLevelToActualLevelMap[i] = i;
-      }
-      break;
-    default:
-      break;
+  case 3:
+    tilingLevelToActualLevelMap[VectorReductionTiles] = 2;
+    [[fallthrough]];
+  case 2:
+    tilingLevelToActualLevelMap[VectorParallelTiles] = 1;
+    [[fallthrough]];
+  case 1:
+    tilingLevelToActualLevelMap[DistributionTiles] = 0;
+    break;
+  case MaxNumTileLevels:
+    for (int i = 0; i < MaxNumTileLevels; ++i) {
+      tilingLevelToActualLevelMap[i] = i;
+    }
+    break;
+  default:
+    break;
   }
 };
 
@@ -67,19 +67,19 @@
 /// configuration.
 SmallVector<int64_t> TilingConfig::getFusableLevels() {
   switch (getNumTilingLevels()) {
-    case 0:
-      return {};
-    case 1:
-      // Only distribution level.
-      return {0};
-    case 3:
-      // Distribution + vector parallel levels.
-      return {0, 1};
-    case 5:
-      // Distribution + cache parallel levels.
-      return {0, 1};
-    default:
-      llvm_unreachable("Unexpected number of tiling levels");
+  case 0:
+    return {};
+  case 1:
+    // Only distribution level.
+    return {0};
+  case 3:
+    // Distribution + vector parallel levels.
+    return {0, 1};
+  case 5:
+    // Distribution + cache parallel levels.
+    return {0, 1};
+  default:
+    llvm_unreachable("Unexpected number of tiling levels");
   }
 }
 
@@ -92,5 +92,5 @@
   return actualLevel;
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/TileSizeSelection.h b/compiler/src/iree/compiler/Codegen/LLVMCPU/TileSizeSelection.h
index 90c36b5..d3e96dd 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/TileSizeSelection.h
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/TileSizeSelection.h

@@ -24,7 +24,7 @@
 ///   4. [[distribution], [cache-parallel], [cache-reduction],
 ///       [vector-parallel], [vector-reduction]]
 class TilingConfig {
- public:
+public:
   TilingConfig(IREE::Codegen::LoweringConfigAttr lc);
 
   /// Returns the number of tiling levels of the configuration.
@@ -99,7 +99,7 @@
     return loweringConfig.getNativeVectorSizeVals();
   }
 
- private:
+private:
   /// Internal representation for all the supported tiling levels. All or just
   /// a subset of them may be available in a valid configuration.
   enum TilingLevel : unsigned {
@@ -123,7 +123,7 @@
       tilingLevelToActualLevelMap;
 };
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_LLVMCPU_TILESIZESELECTION_H_
+#endif // IREE_COMPILER_CODEGEN_LLVMCPU_TILESIZESELECTION_H_

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/TransformExtensions/LLVMCPUExtensions.h b/compiler/src/iree/compiler/Codegen/LLVMCPU/TransformExtensions/LLVMCPUExtensions.h
index d066640..60d16c6 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/TransformExtensions/LLVMCPUExtensions.h
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/TransformExtensions/LLVMCPUExtensions.h

@@ -26,12 +26,12 @@
 // Hook to register LLVMCPU transformations to the transform dialect.
 class LLVMCPUExtensions
     : public transform::TransformDialectExtension<LLVMCPUExtensions> {
- public:
+public:
   LLVMCPUExtensions();
 };
-}  // namespace transform_dialect
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace transform_dialect
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_LLVMCPU_TRANSFORMEXTENSIONS_LLVMCPUEXTENSIONS_H_
+#endif // IREE_COMPILER_CODEGEN_LLVMCPU_TRANSFORMEXTENSIONS_LLVMCPUEXTENSIONS_H_

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/Utils.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/Utils.cpp
index ffe9358..58ded71 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/Utils.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/Utils.cpp

@@ -14,10 +14,11 @@
 namespace mlir {
 namespace iree_compiler {
 
-std::optional<StringRef> getCpuFeatures(
-    IREE::HAL::ExecutableTargetAttr targetAttr) {
+std::optional<StringRef>
+getCpuFeatures(IREE::HAL::ExecutableTargetAttr targetAttr) {
   auto cpuFeatures = getConfigStringAttr(targetAttr, "cpu_features");
-  if (!cpuFeatures) return std::nullopt;
+  if (!cpuFeatures)
+    return std::nullopt;
   return cpuFeatures->getValue();
 }
 
@@ -102,5 +103,5 @@
   return hasFeature(targetAttr, "+sme");
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/Utils.h b/compiler/src/iree/compiler/Codegen/LLVMCPU/Utils.h
index 45cb10a..06aaaee 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/Utils.h
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/Utils.h

@@ -13,8 +13,8 @@
 namespace iree_compiler {
 
 /// Returns the CPU target features associated with the `targetAttr`, if set.
-std::optional<StringRef> getCpuFeatures(
-    IREE::HAL::ExecutableTargetAttr targetAttr);
+std::optional<StringRef>
+getCpuFeatures(IREE::HAL::ExecutableTargetAttr targetAttr);
 
 /// Methods to get target information.
 bool isX86(IREE::HAL::ExecutableTargetAttr targetAttr);
@@ -51,7 +51,7 @@
 /// Returns true if the 'targetAttr' contains '+sme' in its cpu features.
 bool hasSMEFeature(IREE::HAL::ExecutableTargetAttr targetAttr);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_LLVMCPU_UTILS_H_
+#endif // IREE_COMPILER_CODEGEN_LLVMCPU_UTILS_H_

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/VectorContractCustomKernels.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/VectorContractCustomKernels.cpp
index cbe7c43..e9217f4 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/VectorContractCustomKernels.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/VectorContractCustomKernels.cpp

@@ -250,9 +250,9 @@
   const char *asmClobbers = nullptr;
 
   void validate() const {
-    assert(m0 * k0 == lhsRegSize * lhsRegs);  // number of elements of LHS
-    assert(n0 * k0 == rhsRegSize * rhsRegs);  // number of elements of RHS
-    assert(m0 * n0 == accRegSize * accRegs);  // number of elements of Accum
+    assert(m0 * k0 == lhsRegSize * lhsRegs); // number of elements of LHS
+    assert(n0 * k0 == rhsRegSize * rhsRegs); // number of elements of RHS
+    assert(m0 * n0 == accRegSize * accRegs); // number of elements of Accum
     assert(lhsType != ScalarType::None);
     assert(rhsType != ScalarType::None);
     assert(accType != ScalarType::None);
@@ -292,15 +292,15 @@
   kernel.lhsType = MMTKernel::ScalarType::I8;
   kernel.rhsType = MMTKernel::ScalarType::I8;
   kernel.accType = MMTKernel::ScalarType::I32;
-  kernel.m0 = 8;  // shape: 8x1x8, outer-product.
-  kernel.k0 = 1;  // note: we would have enough registers to widen to 12x1x8
-  kernel.n0 = 8;  // if needed.
-  kernel.lhsRegSize = 8;  // LHS NEON register type: int8x8
-  kernel.rhsRegSize = 8;  // RHS NEON register type: int8x8
-  kernel.accRegSize = 4;  // Accum NEON register type: int32x4
+  kernel.m0 = 8; // shape: 8x1x8, outer-product.
+  kernel.k0 = 1; // note: we would have enough registers to widen to 12x1x8
+  kernel.n0 = 8; // if needed.
+  kernel.lhsRegSize = 8; // LHS NEON register type: int8x8
+  kernel.rhsRegSize = 8; // RHS NEON register type: int8x8
+  kernel.accRegSize = 4; // Accum NEON register type: int32x4
   kernel.lhsRegs = 1;
   kernel.rhsRegs = 1;
-  kernel.accRegs = 16;  // = 8*8/4 for 8x8 accumulators, 4 per register
+  kernel.accRegs = 16; // = 8*8/4 for 8x8 accumulators, 4 per register
   kernel.asmImpl = R"ASM(
       // NEON does not have instructions to multiply int8 values and accumulate
       // into int32. This kernel sign-extends int8 to int16, then uses
@@ -337,15 +337,15 @@
   kernel.lhsType = MMTKernel::ScalarType::I8;
   kernel.rhsType = MMTKernel::ScalarType::I8;
   kernel.accType = MMTKernel::ScalarType::I32;
-  kernel.m0 = 8;  // shape: 8x8x1, matrix*vector
+  kernel.m0 = 8; // shape: 8x8x1, matrix*vector
   kernel.k0 = 8;
   kernel.n0 = 1;
-  kernel.lhsRegSize = 16;  // LHS NEON register type: int8x16
-  kernel.rhsRegSize = 8;   // RHS NEON register type: int8x8
-  kernel.accRegSize = 4;   // Accum NEON register type: int32x4
-  kernel.lhsRegs = 4;      // = 8x8/16 for 8x8 LHS elems, 16 per register
+  kernel.lhsRegSize = 16; // LHS NEON register type: int8x16
+  kernel.rhsRegSize = 8;  // RHS NEON register type: int8x8
+  kernel.accRegSize = 4;  // Accum NEON register type: int32x4
+  kernel.lhsRegs = 4;     // = 8x8/16 for 8x8 LHS elems, 16 per register
   kernel.rhsRegs = 1;
-  kernel.accRegs = 2;  // = 8/4 for 8 accumulators, 4 per register
+  kernel.accRegs = 2; // = 8/4 for 8 accumulators, 4 per register
   kernel.asmImpl = R"ASM(
     // This kernel multiplies int8 values into temporary int16 values in
     // registers v8--v15, then performs additions. We can't use
@@ -407,15 +407,15 @@
   kernel.lhsType = MMTKernel::ScalarType::I8;
   kernel.rhsType = MMTKernel::ScalarType::I8;
   kernel.accType = MMTKernel::ScalarType::I32;
-  kernel.m0 = 8;  // shape: 8x4x8. We would have enough registers to widen this
-  kernel.k0 = 4;  // to 12x4x8 if needed.
+  kernel.m0 = 8; // shape: 8x4x8. We would have enough registers to widen this
+  kernel.k0 = 4; // to 12x4x8 if needed.
   kernel.n0 = 8;
-  kernel.lhsRegSize = 16;  // LHS NEON register type: int8x16
-  kernel.rhsRegSize = 16;  // RHS NEON register type: int8x16
-  kernel.accRegSize = 4;   // Accum NEON register type: int32x4
-  kernel.lhsRegs = 2;      // = 8x4/16 for 8x4 LHS elems, 16 per register
-  kernel.rhsRegs = 2;      // = 8x4/16 for 8x4 RHS elems, 16 per register
-  kernel.accRegs = 16;     // = 8x8/4 for 8x8 Accum elems, 4 per register
+  kernel.lhsRegSize = 16; // LHS NEON register type: int8x16
+  kernel.rhsRegSize = 16; // RHS NEON register type: int8x16
+  kernel.accRegSize = 4;  // Accum NEON register type: int32x4
+  kernel.lhsRegs = 2;     // = 8x4/16 for 8x4 LHS elems, 16 per register
+  kernel.rhsRegs = 2;     // = 8x4/16 for 8x4 RHS elems, 16 per register
+  kernel.accRegs = 16;    // = 8x8/4 for 8x8 Accum elems, 4 per register
   kernel.asmImpl = R"ASM(
       // Note on the operands ordering: RHS before LHS, because we want
       // to multiply a 4x4 tile from RHS against a row-vector from LHS to
@@ -450,19 +450,19 @@
   kernel.lhsType = MMTKernel::ScalarType::I8;
   kernel.rhsType = MMTKernel::ScalarType::I8;
   kernel.accType = MMTKernel::ScalarType::I32;
-  kernel.m0 = 8;  // shape: 8x4x1.
+  kernel.m0 = 8; // shape: 8x4x1.
   kernel.k0 = 4;
   kernel.n0 = 1;
-  kernel.lhsRegSize = 16;  // LHS NEON register type: int8x16
-  kernel.rhsRegSize = 4;   // RHS NEON register type: int8x4. This is very small
-                           // and forces sub-optimal codegen. This needs to be
-                           // widened by peeling the surrounding loop, not by
-                           // increasing the k0 of this MMT, which would change
-                           // the data layout in an unwanted way.
-  kernel.accRegSize = 4;   // LHS NEON register type: int8x16
-  kernel.lhsRegs = 2;      // = 8x4/16 for 8x4 LHS elems, 16 per register
-  kernel.rhsRegs = 1;      // = 4/4 for 4 LHS elems, 4 per register
-  kernel.accRegs = 2;      // = 8/4 for 8 Accum elems, 4 per register
+  kernel.lhsRegSize = 16; // LHS NEON register type: int8x16
+  kernel.rhsRegSize = 4;  // RHS NEON register type: int8x4. This is very small
+                          // and forces sub-optimal codegen. This needs to be
+                          // widened by peeling the surrounding loop, not by
+                          // increasing the k0 of this MMT, which would change
+                          // the data layout in an unwanted way.
+  kernel.accRegSize = 4;  // LHS NEON register type: int8x16
+  kernel.lhsRegs = 2;     // = 8x4/16 for 8x4 LHS elems, 16 per register
+  kernel.rhsRegs = 1;     // = 4/4 for 4 LHS elems, 4 per register
+  kernel.accRegs = 2;     // = 8/4 for 8 Accum elems, 4 per register
   kernel.asmImpl = R"ASM(
       sdot $(acc:0).4s, $(lhs:0).16b, $(rhs:0).4b[0]
       sdot $(acc:1).4s, $(lhs:1).16b, $(rhs:0).4b[0]
@@ -479,15 +479,15 @@
   kernel.lhsType = MMTKernel::ScalarType::I8;
   kernel.rhsType = MMTKernel::ScalarType::I8;
   kernel.accType = MMTKernel::ScalarType::I32;
-  kernel.m0 = 8;  // shape: 8x8x8. We would have enough registers to widen this
-  kernel.k0 = 8;  // to 12x8x8 if needed.
+  kernel.m0 = 8; // shape: 8x8x8. We would have enough registers to widen this
+  kernel.k0 = 8; // to 12x8x8 if needed.
   kernel.n0 = 8;
-  kernel.lhsRegSize = 16;  // LHS NEON register type: int8x16
-  kernel.rhsRegSize = 16;  // RHS NEON register type: int8x16
-  kernel.accRegSize = 4;   // Accum NEON register type: int32x4
-  kernel.lhsRegs = 4;      // = 8x8/16 for 8x4 LHS elems, 16 per register
-  kernel.rhsRegs = 4;      // = 8x8/16 for 8x4 RHS elems, 16 per register
-  kernel.accRegs = 16;     // = 8x8/4 for 8x8 Accum elems, 4 per register
+  kernel.lhsRegSize = 16; // LHS NEON register type: int8x16
+  kernel.rhsRegSize = 16; // RHS NEON register type: int8x16
+  kernel.accRegSize = 4;  // Accum NEON register type: int32x4
+  kernel.lhsRegs = 4;     // = 8x8/16 for 8x4 LHS elems, 16 per register
+  kernel.rhsRegs = 4;     // = 8x8/16 for 8x4 RHS elems, 16 per register
+  kernel.accRegs = 16;    // = 8x8/4 for 8x8 Accum elems, 4 per register
   kernel.asmImpl = R"ASM(
       // What's with the horrendous shuffles (zip, uzp instructions) ?
       // The smmla instruction works with a 2x2 accumulator tile.
@@ -677,14 +677,14 @@
 // Constructs the mlir::Type corresponding to a scalar type.
 Type mlirType(MLIRContext *context, MMTKernel::ScalarType t) {
   switch (t) {
-    case MMTKernel::ScalarType::None:
-      break;
-    case MMTKernel::ScalarType::I8:
-      return IntegerType::get(context, 8, IntegerType::Signless);
-    case MMTKernel::ScalarType::I32:
-      return IntegerType::get(context, 32, IntegerType::Signless);
-    case MMTKernel::ScalarType::F32:
-      return FloatType::getF32(context);
+  case MMTKernel::ScalarType::None:
+    break;
+  case MMTKernel::ScalarType::I8:
+    return IntegerType::get(context, 8, IntegerType::Signless);
+  case MMTKernel::ScalarType::I32:
+    return IntegerType::get(context, 32, IntegerType::Signless);
+  case MMTKernel::ScalarType::F32:
+    return FloatType::getF32(context);
   }
   assert(false);
   return Type();
@@ -693,7 +693,7 @@
 // This class is a helper for patterns generating custom kernels based on
 // MMTKernel structs.
 class MMTKernelGenerator {
- public:
+public:
   MMTKernelGenerator(MLIRContext *context, MMTKernel kernel,
                      IREE::HAL::ExecutableTargetAttr target)
       : context(context), kernel(kernel), target(target) {
@@ -732,7 +732,7 @@
     return VectorType::get({kernel.accRegSize}, getAccType());
   }
 
- private:
+private:
   MLIRContext *const context;
   const MMTKernel kernel;
   const IREE::HAL::ExecutableTargetAttr target;
@@ -764,7 +764,7 @@
   }
   // Helper class to build the constraints string of an inline_asm op.
   class Constraints {
-   private:
+  private:
     // The LLVM inline asm syntax is documented here:
     // https://llvm.org/docs/LangRef.html#inline-assembler-expressions
     SmallVector<std::string> inputs;
@@ -772,20 +772,20 @@
     SmallVector<std::string> tiedInputs;
     SmallVector<std::string> clobbers;
 
-   public:
+  public:
     enum class Kind { Input, InputOutput };
     // Add a new constraint.
     void add(Kind kind, const std::string &constraintCode) {
       switch (kind) {
-        case Kind::Input:
-          inputs.push_back(constraintCode);
-          return;
-        case Kind::InputOutput:
-          // An input represented by a number `i` is a tied input, tied to the
-          // i-th output.
-          tiedInputs.push_back(llvm::itostr(outputs.size()));
-          outputs.push_back(std::string("=") + constraintCode);
-          return;
+      case Kind::Input:
+        inputs.push_back(constraintCode);
+        return;
+      case Kind::InputOutput:
+        // An input represented by a number `i` is a tied input, tied to the
+        // i-th output.
+        tiedInputs.push_back(llvm::itostr(outputs.size()));
+        outputs.push_back(std::string("=") + constraintCode);
+        return;
       }
       assert(false);
     }
@@ -902,10 +902,10 @@
 ///                 [...]
 ///
 class MMTCustomKernelPattern : public OpRewritePattern<vector::ContractionOp> {
- private:
+private:
   MMTKernel kernel;
 
- public:
+public:
   MMTCustomKernelPattern(MLIRContext *context, MMTKernel kernel)
       : OpRewritePattern<vector::ContractionOp>(context), kernel(kernel) {}
 
@@ -1018,7 +1018,7 @@
 /// It matches the same patterns as MMT_8x4x8_i8i8i32_Aarch64Dotprod_InlineAsm
 struct MMT_8x4x8_i8i8i32_Aarch64Dotprod_Intrinsics
     : public OpRewritePattern<vector::ContractionOp> {
- public:
+public:
   using OpRewritePattern<vector::ContractionOp>::OpRewritePattern;
 
   LogicalResult matchAndRewrite(vector::ContractionOp contractionOp,
@@ -1040,7 +1040,8 @@
     Value inLhs = getUnpromotedInput(I8Type, I32Type, lhs);
     Value inRhs = getUnpromotedInput(I8Type, I32Type, rhs);
 
-    if (!inLhs || !inRhs) return failure();
+    if (!inLhs || !inRhs)
+      return failure();
 
     auto loc = contractionOp.getLoc();
 
@@ -1117,7 +1118,7 @@
 
 class VectorContractCustomKernelsPass
     : public VectorContractCustomKernelsBase<VectorContractCustomKernelsPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<vector::VectorDialect, LLVM::LLVMDialect,
                     arm_neon::ArmNeonDialect>();
@@ -1133,11 +1134,11 @@
     }
   }
 
- private:
+private:
   IREE::HAL::ExecutableTargetAttr target;
 };
 
-}  // namespace
+} // namespace
 
 void populateVectorContractCustomKernelsPatterns(
     IREE::HAL::ExecutableTargetAttr target, RewritePatternSet &patterns) {
@@ -1178,5 +1179,5 @@
   return std::make_unique<VectorContractCustomKernelsPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/VerifyLinalgTransformLegality.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/VerifyLinalgTransformLegality.cpp
index 9610f2c..660d4e3 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/VerifyLinalgTransformLegality.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/VerifyLinalgTransformLegality.cpp

@@ -20,7 +20,7 @@
     : VerifyLinalgTransformLegalityBase<VerifyLinalgTransformLegalityPass> {
   void runOnOperation() override;
 };
-}  // namespace
+} // namespace
 
 void VerifyLinalgTransformLegalityPass::runOnOperation() {
   auto moduleOp = getOperation();
@@ -42,5 +42,5 @@
   return std::make_unique<VerifyLinalgTransformLegalityPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp
index 63308ca..9691407 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp

@@ -36,7 +36,8 @@
     if (addressOfOp.getGlobal(symbolTableCollection).getAddrSpace() == 3)
       addressOfOps.push_back(addressOfOp);
   });
-  if (addressOfOps.size() == 0) return;
+  if (addressOfOps.size() == 0)
+    return;
   OpBuilder builder(moduleOp);
   builder.setInsertionPoint(&moduleOp.front());
   auto type =
@@ -102,7 +103,8 @@
   LogicalResult matchAndRewrite(MathOpTy mathOp,
                                 PatternRewriter &rewriter) const override {
     auto vecType = llvm::dyn_cast<VectorType>(mathOp.getType());
-    if (!vecType) return failure();
+    if (!vecType)
+      return failure();
     Location loc = mathOp.getLoc();
     Value newVector = rewriter.create<arith::ConstantOp>(
         loc, vecType, rewriter.getZeroAttr(vecType));
@@ -134,7 +136,8 @@
 
   LogicalResult matchAndRewrite(memref::AllocOp allocOp,
                                 PatternRewriter &rewriter) const override {
-    if (!hasSharedMemoryAddressSpace(allocOp.getType())) return failure();
+    if (!hasSharedMemoryAddressSpace(allocOp.getType()))
+      return failure();
     ArrayRef<int64_t> shape = allocOp.getType().getShape();
     if (llvm::any_of(shape,
                      [](int64_t dim) { return dim == ShapedType::kDynamic; })) {
@@ -207,8 +210,8 @@
 /// InterfaceBindingOp and kernel argument index.
 /// For instance if the kernel has (set, bindings) A(0, 1), B(1, 5), C(0, 6) it
 /// will return the mapping [A, 0], [C, 1], [B, 2]
-static llvm::SmallDenseMap<SetBinding, size_t> getKernelArgMapping(
-    Operation *funcOp) {
+static llvm::SmallDenseMap<SetBinding, size_t>
+getKernelArgMapping(Operation *funcOp) {
   llvm::SetVector<SetBinding> usedBindingSet;
   funcOp->walk([&](IREE::HAL::InterfaceBindingSubspanOp subspanOp) {
     usedBindingSet.insert(
@@ -217,7 +220,8 @@
   auto sparseBindings = usedBindingSet.takeVector();
   std::sort(sparseBindings.begin(), sparseBindings.end(),
             [](SetBinding lhs, SetBinding rhs) {
-              if (lhs.first == rhs.first) return lhs.second.ult(rhs.second);
+              if (lhs.first == rhs.first)
+                return lhs.second.ult(rhs.second);
               return lhs.first.ult(rhs.first);
             });
   llvm::SmallDenseMap<SetBinding, size_t> mapBindingArgIndex;
@@ -228,17 +232,18 @@
 }
 
 class ConvertFunc : public ConvertToLLVMPattern {
- public:
+public:
   explicit ConvertFunc(MLIRContext *context, LLVMTypeConverter &converter)
       : ConvertToLLVMPattern(mlir::func::FuncOp::getOperationName(), context,
                              converter, 100) {}
-  LogicalResult matchAndRewrite(
-      Operation *op, ArrayRef<Value> operands,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
     auto funcOp = cast<func::FuncOp>(op);
     FunctionType fnType = funcOp.getFunctionType();
     (void)fnType;
-    if (!funcOp.isPublic()) return failure();
+    if (!funcOp.isPublic())
+      return failure();
 
     // illegal FuncOp must have 0 inputs.
     assert(fnType.getNumInputs() == 0 && fnType.getNumResults() == 0);
@@ -262,7 +267,8 @@
     });
     llvmInputTypes.resize(argMapping.size() + numConstants,
                           rewriter.getI32Type());
-    if (!llvmInputTypes.empty()) signatureConverter.addInputs(llvmInputTypes);
+    if (!llvmInputTypes.empty())
+      signatureConverter.addInputs(llvmInputTypes);
 
     // Construct newFunc with all attributes except return type & symbol name.
     SmallVector<NamedAttribute> funcAttrs;
@@ -297,7 +303,7 @@
 };
 
 class ConvertIREEBindingSubspanOp : public ConvertToLLVMPattern {
- public:
+public:
   explicit ConvertIREEBindingSubspanOp(MLIRContext *context,
                                        LLVMTypeConverter &converter)
       : ConvertToLLVMPattern(
@@ -323,12 +329,13 @@
     return allReadOnly;
   }
 
-  LogicalResult matchAndRewrite(
-      Operation *op, ArrayRef<Value> operands,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
     // Bail until nested under an LLVMFuncOp.
     auto llvmFuncOp = op->getParentOfType<LLVM::LLVMFuncOp>();
-    if (!llvmFuncOp) return failure();
+    if (!llvmFuncOp)
+      return failure();
     assert(llvmFuncOp.getNumArguments() > 0);
 
     auto argMapping = getKernelArgMapping(llvmFuncOp);
@@ -441,18 +448,19 @@
 };
 
 class ConvertIREEConstantOp : public ConvertToLLVMPattern {
- public:
+public:
   explicit ConvertIREEConstantOp(MLIRContext *context,
                                  LLVMTypeConverter &converter)
       : ConvertToLLVMPattern(
             IREE::HAL::InterfaceConstantLoadOp::getOperationName(), context,
             converter) {}
-  LogicalResult matchAndRewrite(
-      Operation *op, ArrayRef<Value> operands,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
     // Bail until nested under an LLVMFuncOp.
     auto llvmFuncOp = op->getParentOfType<LLVM::LLVMFuncOp>();
-    if (!llvmFuncOp) return failure();
+    if (!llvmFuncOp)
+      return failure();
     assert(llvmFuncOp.getNumArguments() > 0);
 
     auto argMapping = getKernelArgMapping(llvmFuncOp);
@@ -473,9 +481,9 @@
     : public OpConversionPattern<InterfaceOpTy> {
   using OpConversionPattern<InterfaceOpTy>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      InterfaceOpTy op, typename InterfaceOpTy::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(InterfaceOpTy op, typename InterfaceOpTy::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     int32_t index = static_cast<int32_t>(op.getDimension().getSExtValue());
     std::array<gpu::Dimension, 3> dimAttr{gpu::Dimension::x, gpu::Dimension::y,
                                           gpu::Dimension::z};
@@ -484,7 +492,7 @@
   }
 };
 
-}  // anonymous namespace
+} // anonymous namespace
 
 void populateLLVMConversionPatterns(MLIRContext *context,
                                     RewritePatternSet &patterns,
@@ -538,5 +546,5 @@
       });
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.h b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.h
index 6dffc1f..5a61d77 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.h
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.h

@@ -34,7 +34,7 @@
 void populateGpuMemorySpaceAttributeConversions(
     TypeConverter &typeConverter, const MemorySpaceMapping &mapping);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_LLVMGPU_COMMON_H_
+#endif // IREE_COMPILER_CODEGEN_LLVMGPU_COMMON_H_

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToNVVM.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToNVVM.cpp
index 3851be9..552da71 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToNVVM.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToNVVM.cpp

@@ -72,14 +72,14 @@
     populateGpuMemorySpaceAttributeConversions(
         converter, [](gpu::AddressSpace space) -> unsigned {
           switch (space) {
-            case gpu::AddressSpace::Global:
-              return static_cast<unsigned>(
-                  NVVM::NVVMMemorySpace::kGlobalMemorySpace);
-            case gpu::AddressSpace::Workgroup:
-              return static_cast<unsigned>(
-                  NVVM::NVVMMemorySpace::kSharedMemorySpace);
-            case gpu::AddressSpace::Private:
-              return 0;
+          case gpu::AddressSpace::Global:
+            return static_cast<unsigned>(
+                NVVM::NVVMMemorySpace::kGlobalMemorySpace);
+          case gpu::AddressSpace::Workgroup:
+            return static_cast<unsigned>(
+                NVVM::NVVMMemorySpace::kSharedMemorySpace);
+          case gpu::AddressSpace::Private:
+            return 0;
           }
           llvm_unreachable("unknown address space enum value");
           return 0;
@@ -144,7 +144,8 @@
       populateFuncToLLVMFuncOpConversionPattern(converter, llvmPatterns);
       configureGpuToNVVMConversionLegality(target);
       target.addDynamicallyLegalOp<func::FuncOp>([&](func::FuncOp funcOp) {
-        if (isEntryPoint(funcOp)) return false;
+        if (isEntryPoint(funcOp))
+          return false;
         return true;
       });
       if (failed(applyPartialConversion(m, target, std::move(llvmPatterns)))) {
@@ -155,11 +156,11 @@
   }
 };
 
-}  // anonymous namespace
+} // anonymous namespace
 
 std::unique_ptr<OperationPass<ModuleOp>> createConvertToNVVMPass() {
   return std::make_unique<ConvertToNVVMPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToROCDL.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToROCDL.cpp
index ed661ad..a02c312 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToROCDL.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToROCDL.cpp

@@ -53,12 +53,12 @@
     populateGpuMemorySpaceAttributeConversions(
         converter, [](gpu::AddressSpace space) {
           switch (space) {
-            case gpu::AddressSpace::Global:
-              return 1;
-            case gpu::AddressSpace::Workgroup:
-              return 3;
-            case gpu::AddressSpace::Private:
-              return 5;
+          case gpu::AddressSpace::Global:
+            return 1;
+          case gpu::AddressSpace::Workgroup:
+            return 3;
+          case gpu::AddressSpace::Private:
+            return 5;
           }
           llvm_unreachable("unknown address space enum value");
           return 0;
@@ -113,7 +113,8 @@
       populateFuncToLLVMFuncOpConversionPattern(converter, llvmPatterns);
       configureGpuToROCDLConversionLegality(target);
       target.addDynamicallyLegalOp<func::FuncOp>([&](func::FuncOp funcOp) {
-        if (isEntryPoint(funcOp)) return false;
+        if (isEntryPoint(funcOp))
+          return false;
         return true;
       });
       if (failed(applyPartialConversion(m, target, std::move(llvmPatterns))))
@@ -122,11 +123,11 @@
   }
 };
 
-}  // anonymous namespace
+} // anonymous namespace
 
 std::unique_ptr<OperationPass<ModuleOp>> createConvertToROCDLPass() {
   return std::make_unique<ConvertToROCDLPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ExtractAddressComputationGPUPass.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/ExtractAddressComputationGPUPass.cpp
index 62382ab..d34a8fa 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ExtractAddressComputationGPUPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/ExtractAddressComputationGPUPass.cpp

@@ -43,8 +43,9 @@
       ldMatrixOp.getTranspose(), ldMatrixOp.getNumTiles());
 }
 
-SmallVector<OpFoldResult> getLdMatrixOpViewSizeForEachDim(
-    RewriterBase &rewriter, nvgpu::LdMatrixOp ldMatrixOp) {
+SmallVector<OpFoldResult>
+getLdMatrixOpViewSizeForEachDim(RewriterBase &rewriter,
+                                nvgpu::LdMatrixOp ldMatrixOp) {
   Location loc = ldMatrixOp.getLoc();
   auto extractStridedMetadataOp =
       rewriter.create<memref::ExtractStridedMetadataOp>(
@@ -65,8 +66,8 @@
   return finalSizes;
 }
 
-static void populateExtractAddressComputationGPUPatterns(
-    RewritePatternSet &patterns) {
+static void
+populateExtractAddressComputationGPUPatterns(RewritePatternSet &patterns) {
   populateExtractAddressComputationPatterns(patterns);
   patterns.add<StoreLoadLikeOpRewriter<
       nvgpu::LdMatrixOp,
@@ -85,7 +86,7 @@
           ExtractAddressComputationGPUPass> {
   void runOnOperation() override;
 };
-}  // namespace
+} // namespace
 
 void ExtractAddressComputationGPUPass::runOnOperation() {
   RewritePatternSet patterns(&getContext());
@@ -99,5 +100,5 @@
 std::unique_ptr<Pass> createExtractAddressComputationGPUPass() {
   return std::make_unique<ExtractAddressComputationGPUPass>();
 }
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp
index 1d7038b..05310ae 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp

@@ -55,19 +55,19 @@
     llvm::cl::init(""));
 
 /// Flag to force using WMMA tensorcore operations.
-llvm::cl::opt<bool> clGPUUseWMMA(
-    "iree-codegen-llvmgpu-use-wmma",
-    llvm::cl::desc("force use of wmma operations for tensorcore"),
-    llvm::cl::init(false));
+llvm::cl::opt<bool>
+    clGPUUseWMMA("iree-codegen-llvmgpu-use-wmma",
+                 llvm::cl::desc("force use of wmma operations for tensorcore"),
+                 llvm::cl::init(false));
 
 /// Flag used to toggle using mma.sync vs wmma when targetting tensorcore.
-llvm::cl::opt<bool> clGPUUseMMASync(
-    "iree-codegen-llvmgpu-use-mma-sync",
-    llvm::cl::desc("force use mma sync instead of wmma ops"),
-    llvm::cl::init(false));
+llvm::cl::opt<bool>
+    clGPUUseMMASync("iree-codegen-llvmgpu-use-mma-sync",
+                    llvm::cl::desc("force use mma sync instead of wmma ops"),
+                    llvm::cl::init(false));
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
 namespace {
 
@@ -88,7 +88,7 @@
 
 // Simt codegen does not do software pipelining.
 constexpr unsigned softwarePipelineDepthSimt = 0;
-}  // namespace
+} // namespace
 
 /// Return the best combination of tile size and wg size. It will then used to
 /// pick the best size aligned with the shape dimension.
@@ -109,9 +109,9 @@
 
 /// Return the best combination of tile size and wg size when using tensorcore
 /// operations.
-static void getTensorCoreConfig(
-    SmallVectorImpl<TileWorkgroupSizePair> &tileSizes, Type elementType,
-    int64_t M, int64_t N, int64_t K) {
+static void
+getTensorCoreConfig(SmallVectorImpl<TileWorkgroupSizePair> &tileSizes,
+                    Type elementType, int64_t M, int64_t N, int64_t K) {
   // Based on early analysis we found that 128x256x32_3 gives acceptable
   // performance across many of the large matrix sizes for f16 and fp32. This
   // needs to be refined into a better startegy based on empircal data but this
@@ -164,12 +164,14 @@
 static TargetInfo getTargetInfo(func::FuncOp entryPoint) {
   TargetInfo info;
   // TODO: fill out target info for other vendors.
-  if (!isCudaTarget(entryPoint)) return info;
+  if (!isCudaTarget(entryPoint))
+    return info;
   // All the cuda target are assumed to have warp support.
   info.hasWarpShuffle = true;
   StringRef targetName = getTargetArch(entryPoint);
   // If no target name is set assume all the features are off.
-  if (targetName == "") return info;
+  if (targetName == "")
+    return info;
   if (!StringRef(targetName).starts_with("sm_")) {
     entryPoint.emitError("unknown target name ") << targetName;
     return info;
@@ -191,7 +193,8 @@
                                const TargetInfo &targetInfo) {
   // Limit tensor core pipeline to matmul as not all combinations of transpose
   // are supported upstream.
-  if (!targetInfo.hasTF32TensorCore) return false;
+  if (!targetInfo.hasTF32TensorCore)
+    return false;
   if (!(isa<linalg::MatmulOp>(op) || isa<linalg::BatchMatmulOp>(op))) {
     assert(linalg::isaContractionOpInterface(op));
     // If this is not a named op matmul check some properties to make sure that
@@ -200,22 +203,27 @@
     // should be a reduce.
     Region &body = op->getRegion(0);
     Region::OpIterator it = body.op_begin();
-    if (it == body.op_end() || !isa<arith::MulFOp>(*(it++))) return false;
-    if (it == body.op_end() || !isa<arith::AddFOp>(*(it++))) return false;
-    if (it == body.op_end() || !isa<linalg::YieldOp>(*(it++))) return false;
+    if (it == body.op_end() || !isa<arith::MulFOp>(*(it++)))
+      return false;
+    if (it == body.op_end() || !isa<arith::AddFOp>(*(it++)))
+      return false;
+    if (it == body.op_end() || !isa<linalg::YieldOp>(*(it++)))
+      return false;
     AffineMap outputMap = op.getMatchingIndexingMap(op.getDpsInitOperand(0));
-    if (outputMap.getNumResults() != outputMap.getNumDims() - 1) return false;
+    if (outputMap.getNumResults() != outputMap.getNumDims() - 1)
+      return false;
     OpBuilder b(op);
     for (unsigned i = 0, e = outputMap.getNumResults(); i < e - 1; i++) {
-      if (outputMap.getResult(i) != b.getAffineDimExpr(i)) return false;
+      if (outputMap.getResult(i) != b.getAffineDimExpr(i))
+        return false;
     }
   }
   return true;
 }
 
 /// Decides which tensorcore operations to use.
-static IREE::Codegen::DispatchLoweringPassPipeline getTensorCorePipeline(
-    Type elementType) {
+static IREE::Codegen::DispatchLoweringPassPipeline
+getTensorCorePipeline(Type elementType) {
   // Currently mma.sync is on by default for fp16 only.
   IREE::Codegen::DispatchLoweringPassPipeline codegenPipeline =
       IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUMatmulTensorCore;
@@ -274,7 +282,7 @@
         }
 
         tileSizes.emplace_back(
-            std::move(workgroupTileSizes));  // Workgroup level.
+            std::move(workgroupTileSizes)); // Workgroup level.
         return setOpConfigAndEntryPointFnTranslation(
             entryPoint, op, tileSizes, pipeline, workgroupSize,
             /*subgroupSize=*/std::nullopt, softwarePipelineDepth);
@@ -377,7 +385,8 @@
   int64_t tileK = config.tileSize[2];
   // Since specialization doesn't work for K loop and peeling is not enabled yet
   // we pick a tileK size that is aligned on the K size.
-  if (ShapedType::isDynamic(sizeK)) tileK = 1;
+  if (ShapedType::isDynamic(sizeK))
+    tileK = 1;
   while (sizeK % tileK != 0) {
     tileK >>= 1;
   }
@@ -451,15 +460,16 @@
       break;
     }
   }
-  tileSizes.emplace_back(std::move(workgroupTileSizes));  // Workgroup level
+  tileSizes.emplace_back(std::move(workgroupTileSizes)); // Workgroup level
   return setOpConfigAndEntryPointFnTranslation(
       entryPoint, op, tileSizes,
       IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUDistribute,
       workgroupSize);
 }
 
-static SmallVector<int64_t> getDefaultWorkgroupTileSizesForPackUnPack(
-    TilingInterface op, int64_t defaultSize) {
+static SmallVector<int64_t>
+getDefaultWorkgroupTileSizesForPackUnPack(TilingInterface op,
+                                          int64_t defaultSize) {
   unsigned numLoops = op.getLoopIteratorTypes().size();
   auto partitionedLoops = cast<PartitionableLoopsInterface>(op.getOperation())
                               .getPartitionableLoops(kNumMaxParallelDims);
@@ -486,7 +496,8 @@
   SmallVector<int64_t> innerTiles = packOp.getStaticTiles();
   ArrayRef<int64_t> dimPos = packOp.getInnerDimsPos();
   for (auto [pos, size] : llvm::zip_equal(dimPos, innerTiles)) {
-    if (tileSizes[pos] == 0 || ShapedType::isDynamic(size)) continue;
+    if (tileSizes[pos] == 0 || ShapedType::isDynamic(size))
+      continue;
     tileSizes[pos] = tileSizes[pos] / size;
     tileSizes[pos] = std::max<int64_t>(tileSizes[pos], 1);
   }
@@ -551,7 +562,7 @@
              shape.back() % (workgroupSize[0] * vectorSize) != 0) {
         vectorSize /= 2;
       }
-      if (vectorSize == 1)  // assume there is fastpath + slowpath
+      if (vectorSize == 1) // assume there is fastpath + slowpath
         vectorSize = 4;
       int64_t problemSize = std::accumulate(
           shape.begin(), shape.end(), 1,
@@ -568,7 +579,8 @@
         int64_t id = 0;
         for (int64_t dim : llvm::reverse(shape)) {
           // Unit loops are already skipped.
-          if (dim == 1) continue;
+          if (dim == 1)
+            continue;
           if (dim < flatWG) {
             skipInnerTiling++;
             workgroupSize[id] = dim;
@@ -578,7 +590,8 @@
           }
           flatWG = flatWG / dim;
           id++;
-          if (flatWG <= 1 || id >= workgroupSize.size()) break;
+          if (flatWG <= 1 || id >= workgroupSize.size())
+            break;
         }
         break;
       }
@@ -610,7 +623,8 @@
         workgroupTileSizes[depth - 1] = 0;
         skipInnerTiling--;
         id++;
-        if (id >= workgroupSize.size()) break;
+        if (id >= workgroupSize.size())
+          break;
         continue;
       }
       workgroupTileSizes[depth - 1] = workgroupSize[id] * vectorSize;
@@ -623,7 +637,7 @@
     // is the most inner dimension.
     workgroupTileSizes.append(linalgOp.getNumReductionLoops(), 4);
   }
-  tileSizes.emplace_back(std::move(workgroupTileSizes));  // Workgroup level
+  tileSizes.emplace_back(std::move(workgroupTileSizes)); // Workgroup level
   return setOpConfigAndEntryPointFnTranslation(entryPoint, op, tileSizes,
                                                passPipeline, workgroupSize);
 }
@@ -638,7 +652,8 @@
       auto expr = dim.dyn_cast<AffineDimExpr>();
       if (expr && expr.getPosition() == d) {
         auto type = llvm::cast<ShapedType>(op->getOperand(mapIdx).getType());
-        if (type.isDynamicDim(dimIdx)) return std::nullopt;
+        if (type.isDynamicDim(dimIdx))
+          return std::nullopt;
         return type.getDimSize(dimIdx);
       }
     }
@@ -686,19 +701,24 @@
 static LogicalResult setWarpReductionConfig(func::FuncOp entryPoint,
                                             linalg::LinalgOp op,
                                             const TargetInfo &targetInfo) {
-  if (!targetInfo.hasWarpShuffle) return failure();
-  if (!isa<linalg::GenericOp>(op)) return failure();
+  if (!targetInfo.hasWarpShuffle)
+    return failure();
+  if (!isa<linalg::GenericOp>(op))
+    return failure();
   // TODO(thomasraoux): Enable dynamic shape.
   bool hasDynamicShape = false;
   entryPoint.walk([&hasDynamicShape](linalg::LinalgOp op) {
-    if (op.hasDynamicShape()) hasDynamicShape = true;
+    if (op.hasDynamicShape())
+      hasDynamicShape = true;
   });
-  if (hasDynamicShape) return failure();
+  if (hasDynamicShape)
+    return failure();
   SmallVector<unsigned> reductionDims;
   op.getReductionDims(reductionDims);
   if (reductionDims.size() != 1 || reductionDims[0] != op.getNumLoops() - 1)
     return failure();
-  if (op.getRegionOutputArgs().size() != 1) return failure();
+  if (op.getRegionOutputArgs().size() != 1)
+    return failure();
 
   // Only support projected permutation, this could be extended to projected
   // permutated with broadcast.
@@ -713,29 +733,35 @@
     SmallVector<Operation *> combinerOps;
     if (matchReduction(op.getRegionOutputArgs(), i, combinerOps) &&
         combinerOps.size() == 1) {
-      if (foundSingleReductionOutput) return failure();
+      if (foundSingleReductionOutput)
+        return failure();
       foundSingleReductionOutput = true;
       continue;
     }
     if (!op.getMatchingIndexingMap(op.getDpsInitOperand(i)).isIdentity())
       return failure();
   }
-  if (!foundSingleReductionOutput) return failure();
+  if (!foundSingleReductionOutput)
+    return failure();
 
   std::optional<int64_t> dimSize = getLinalgDimSize(op, reductionDims[0]);
-  if (!dimSize || *dimSize % cudaWarpSize != 0) return failure();
+  if (!dimSize || *dimSize % cudaWarpSize != 0)
+    return failure();
 
   const Type elementType =
       llvm::cast<ShapedType>(op.getDpsInitOperand(0)->get().getType())
           .getElementType();
-  if (!elementType.isIntOrFloat()) return failure();
+  if (!elementType.isIntOrFloat())
+    return failure();
   unsigned bitWidth = elementType.getIntOrFloatBitWidth();
   // Reduction distribution only supports 8/16/32 bit types now.
-  if (bitWidth != 32 && bitWidth != 16 && bitWidth != 8) return failure();
+  if (bitWidth != 32 && bitWidth != 16 && bitWidth != 8)
+    return failure();
 
   const unsigned largestLoadSizeInBits = 128;
   unsigned vectorSize = largestLoadSizeInBits / bitWidth;
-  while ((*dimSize / vectorSize) % cudaWarpSize != 0) vectorSize /= 2;
+  while ((*dimSize / vectorSize) % cudaWarpSize != 0)
+    vectorSize /= 2;
 
   // TODO: Add reduction tiling to handle larger reductions.
   const int64_t maxWorkgroupSize = 1024;
@@ -755,8 +781,8 @@
   SmallVector<int64_t> reductionTileSizes(numLoops, 0);
   reductionTileSizes.push_back(groupSize * vectorSize);
   TileSizesListType tileSizes;
-  tileSizes.emplace_back(std::move(workgroupTileSizes));  // Workgroup level
-  tileSizes.emplace_back(std::move(reductionTileSizes));  // reduction level
+  tileSizes.emplace_back(std::move(workgroupTileSizes)); // Workgroup level
+  tileSizes.emplace_back(std::move(reductionTileSizes)); // reduction level
   return setOpConfigAndEntryPointFnTranslation(
       entryPoint, op, tileSizes,
       IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUWarpReduction,
@@ -845,7 +871,8 @@
       // Handle 4 elements per thread for the innermost dimension. We need
       // this for vectorized load.
       chosenTileSize = 4;
-      if (inputDim % (dim * chosenTileSize) != 0) continue;
+      if (inputDim % (dim * chosenTileSize) != 0)
+        continue;
     } else {
       for (int64_t t = residualTilingFactor; t >= 1; t >>= 1)
         if (inputDim % (dim * t) == 0) {
@@ -923,7 +950,7 @@
   int64_t residualThreads = subgroupSize;
   int64_t residualTilingFactor = bestTilingFactor;
 
-  SmallVector<int64_t, 3> workgroupSize(3, 1);  // (X, Y, Z)
+  SmallVector<int64_t, 3> workgroupSize(3, 1); // (X, Y, Z)
   SmallVector<int64_t> workgroupTileSizes(4, 1);
 
   if (isNCHW) {
@@ -1048,8 +1075,10 @@
 
   for (auto funcOp : moduleOp.getOps<func::FuncOp>()) {
     auto exportOp = exportOps.lookup(funcOp.getName());
-    if (!exportOp) continue;
-    if (getTranslationInfo(exportOp)) continue;
+    if (!exportOp)
+      continue;
+    if (getTranslationInfo(exportOp))
+      continue;
     SmallVector<Operation *> computeOps = getComputeOps(funcOp);
     Operation *rootOperation = nullptr;
     // Find the root operation. linalg.generic and linalg.fill are not root
@@ -1082,7 +1111,8 @@
       continue;
     }
 
-    if (failed(setRootConfig(funcOp, rootOperation))) continue;
+    if (failed(setRootConfig(funcOp, rootOperation)))
+      continue;
 
     // Propogate the configuration to the other ops.
     // TODO(ravishankarm, thomasraoux): This is a very specific use (and
@@ -1093,7 +1123,8 @@
     if (IREE::Codegen::LoweringConfigAttr config =
             getLoweringConfig(rootOperation)) {
       for (auto op : computeOps) {
-        if (op == rootOperation) continue;
+        if (op == rootOperation)
+          continue;
         setLoweringConfig(op, config);
       }
     }
@@ -1101,5 +1132,5 @@
   return success();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.h b/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.h
index d085245..b3f1a17 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.h
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.h

@@ -14,6 +14,6 @@
 
 LogicalResult initGPULaunchConfig(ModuleOp moduleOp);
 
-}  // namespace iree_compiler
-}  // namespace mlir
-#endif  // IREE_COMPILER_CODEGEN_LLVMGPU_KERNELCONFIG_H_
+} // namespace iree_compiler
+} // namespace mlir
+#endif // IREE_COMPILER_CODEGEN_LLVMGPU_KERNELCONFIG_H_

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUCastAddressSpaceFunction.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUCastAddressSpaceFunction.cpp
index 6c29141..7ce7750 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUCastAddressSpaceFunction.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUCastAddressSpaceFunction.cpp

@@ -82,12 +82,12 @@
     });
   }
 };
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<ModuleOp>>
 createLLVMGPUCastAddressSpaceFunction() {
   return std::make_unique<LLVMGPUCastAddressSpaceFunctionPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPULowerExecutableTarget.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPULowerExecutableTarget.cpp
index 61313e3..ec3e3f1 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPULowerExecutableTarget.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPULowerExecutableTarget.cpp

@@ -39,7 +39,7 @@
 class LLVMGPULowerExecutableTargetPass
     : public LLVMGPULowerExecutableTargetBase<
           LLVMGPULowerExecutableTargetPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     // clang-format off
     registry
@@ -65,7 +65,7 @@
 
   void runOnOperation() override;
 
- private:
+private:
   Option<bool> testLoweringConfiguration{
       *this, "test-lowering-configuration",
       llvm::cl::desc(
@@ -75,25 +75,28 @@
           "for lit tests. Not for general usage"),
       llvm::cl::init(false)};
 };
-}  // namespace
+} // namespace
 
 /// Verify that valid configuration is set for all ops within the compiled
 /// module.
 template <typename F>
-static LogicalResult verifyLoweringConfiguration(
-    ModuleOp module, IREE::Codegen::TranslationInfoAttr translationInfo,
-    ArrayRef<int64_t> workgroupSize, F verificationFn) {
+static LogicalResult
+verifyLoweringConfiguration(ModuleOp module,
+                            IREE::Codegen::TranslationInfoAttr translationInfo,
+                            ArrayRef<int64_t> workgroupSize, F verificationFn) {
   auto walkResult = module.walk([&](Operation *op) -> WalkResult {
     IREE::Codegen::LoweringConfigAttr loweringConfig = getLoweringConfig(op);
-    if (!loweringConfig) return WalkResult::advance();
+    if (!loweringConfig)
+      return WalkResult::advance();
     return verificationFn(op, loweringConfig, translationInfo, workgroupSize);
   });
   return failure(walkResult.wasInterrupted());
 }
 
-static LogicalResult verifyEntryPoint(
-    ModuleOp moduleOp, IREE::Codegen::TranslationInfoAttr translationInfo,
-    IREE::HAL::ExecutableExportOp exportOp) {
+static LogicalResult
+verifyEntryPoint(ModuleOp moduleOp,
+                 IREE::Codegen::TranslationInfoAttr translationInfo,
+                 IREE::HAL::ExecutableExportOp exportOp) {
   std::optional<mlir::ArrayAttr> workgroupSizeAttr =
       exportOp.getWorkgroupSize();
 
@@ -150,43 +153,42 @@
 
   if (!testLoweringConfiguration && translationInfo.has_value()) {
     switch (translationInfo.value().getDispatchLoweringPassPipeline()) {
-      case IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUDistribute:
-        addGPUSimpleDistributePassPipeline(executableLoweringPipeline);
-        break;
-      case IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUVectorize:
-        addGPUVectorizationPassPipeline(executableLoweringPipeline);
-        break;
-      case IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUMatmulSimt:
-        addGPUMatmulSimtPassPipeline(executableLoweringPipeline);
-        break;
-      case IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUMatmulTensorCore:
-        addGPUMatmulTensorCorePassPipeline(
-            executableLoweringPipeline,
-            translationInfo.value().getSoftwarePipelineDepth());
-        break;
-      case IREE::Codegen::DispatchLoweringPassPipeline::
-          LLVMGPUMatmulTensorCoreMmaSync:
-        addGPUMatmulTensorCoreMmaSyncPassPipeline(
-            executableLoweringPipeline,
-            translationInfo.value().getSoftwarePipelineDepth());
-        break;
-      case IREE::Codegen::DispatchLoweringPassPipeline::
-          LLVMGPUTransposeSharedMem:
-        addGPUTransposePassPipeline(executableLoweringPipeline);
-        break;
-      case IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUWarpReduction:
-        addGPUWarpReductionPassPipeline(executableLoweringPipeline);
-        break;
-      case IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUPackUnPack:
-        addGPUPackUnPackPasses(executableLoweringPipeline);
-        break;
-      // Transform-dialect pipelines.
-      case IREE::Codegen::DispatchLoweringPassPipeline::TransformDialectCodegen:
-        addGPUTransformDialectPasses(executableLoweringPipeline);
-        break;
-      default:
-        variantOp.emitOpError("Unsupported pipeline on GPU target.");
-        return signalPassFailure();
+    case IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUDistribute:
+      addGPUSimpleDistributePassPipeline(executableLoweringPipeline);
+      break;
+    case IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUVectorize:
+      addGPUVectorizationPassPipeline(executableLoweringPipeline);
+      break;
+    case IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUMatmulSimt:
+      addGPUMatmulSimtPassPipeline(executableLoweringPipeline);
+      break;
+    case IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUMatmulTensorCore:
+      addGPUMatmulTensorCorePassPipeline(
+          executableLoweringPipeline,
+          translationInfo.value().getSoftwarePipelineDepth());
+      break;
+    case IREE::Codegen::DispatchLoweringPassPipeline::
+        LLVMGPUMatmulTensorCoreMmaSync:
+      addGPUMatmulTensorCoreMmaSyncPassPipeline(
+          executableLoweringPipeline,
+          translationInfo.value().getSoftwarePipelineDepth());
+      break;
+    case IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUTransposeSharedMem:
+      addGPUTransposePassPipeline(executableLoweringPipeline);
+      break;
+    case IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUWarpReduction:
+      addGPUWarpReductionPassPipeline(executableLoweringPipeline);
+      break;
+    case IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUPackUnPack:
+      addGPUPackUnPackPasses(executableLoweringPipeline);
+      break;
+    // Transform-dialect pipelines.
+    case IREE::Codegen::DispatchLoweringPassPipeline::TransformDialectCodegen:
+      addGPUTransformDialectPasses(executableLoweringPipeline);
+      break;
+    default:
+      variantOp.emitOpError("Unsupported pipeline on GPU target.");
+      return signalPassFailure();
     }
   }
 
@@ -200,5 +202,5 @@
   return std::make_unique<LLVMGPULowerExecutableTargetPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUPackSharedMemoryAlloc.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUPackSharedMemoryAlloc.cpp
index 0346f54..49d603e 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUPackSharedMemoryAlloc.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUPackSharedMemoryAlloc.cpp

@@ -30,7 +30,8 @@
     needBarrier = true;
   } else {
     for (Operation &op : entryBlock->getOperations()) {
-      if (&op == alloc) break;
+      if (&op == alloc)
+        break;
       if (op.getNumRegions() != 0) {
         needBarrier = true;
         break;
@@ -41,7 +42,8 @@
       }
     }
   }
-  if (!needBarrier) return;
+  if (!needBarrier)
+    return;
   OpBuilder builder(alloc);
   // TODO: make it a option if needed.
   bool hasAsyncCopies = true;
@@ -60,7 +62,7 @@
 struct LLVMGPUPackSharedMemoryAllocPass
     : public LLVMGPUPackSharedMemoryAllocBase<
           LLVMGPUPackSharedMemoryAllocPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<nvgpu::NVGPUDialect>();
   }
@@ -79,7 +81,8 @@
     SmallVector<AliasGroup> aliasGroups;
     analyseAllocsForPacking(funcOp, allocs, aliasGroups);
     // If there is 1 or less alias group there is nothing to do.
-    if (aliasGroups.size() <= 1) return;
+    if (aliasGroups.size() <= 1)
+      return;
 
     // Pack all the allocations into one i8 alloc.
     // We may need to add extra barriers to make sure we are done writting or
@@ -94,12 +97,12 @@
     packAllocs(builder, funcOp, aliasGroups);
   }
 };
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>>
 createLLVMGPUPackSharedMemoryAlloc() {
   return std::make_unique<LLVMGPUPackSharedMemoryAllocPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUPasses.h b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUPasses.h
index 2688f4e..ecab5de 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUPasses.h
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUPasses.h

@@ -93,8 +93,8 @@
 std::unique_ptr<OperationPass<func::FuncOp>> createLLVMGPUTensorPadPass();
 
 /// Perform tiling and distribution to threads.
-std::unique_ptr<OperationPass<func::FuncOp>> createLLVMGPUTileAndDistribute(
-    bool distributeToWarp = false);
+std::unique_ptr<OperationPass<func::FuncOp>>
+createLLVMGPUTileAndDistribute(bool distributeToWarp = false);
 
 /// Lower vector ops before convertion to LLVM.
 std::unique_ptr<OperationPass<func::FuncOp>> createLLVMGPUVectorLoweringPass();
@@ -104,10 +104,11 @@
     GPUTensorCoreType tensorCoreType = GPUTensorCoreType::WMMA);
 
 /// Lowering calling vectorization patterns.
-LogicalResult verifyGPUMatmulPipeline(
-    Operation *op, IREE::Codegen::LoweringConfigAttr loweringConfig,
-    IREE::Codegen::TranslationInfoAttr translationInfo,
-    ArrayRef<int64_t> workgroupSize);
+LogicalResult
+verifyGPUMatmulPipeline(Operation *op,
+                        IREE::Codegen::LoweringConfigAttr loweringConfig,
+                        IREE::Codegen::TranslationInfoAttr translationInfo,
+                        ArrayRef<int64_t> workgroupSize);
 
 //------------------------------------------------------------------------------
 // Test passes
@@ -115,7 +116,7 @@
 
 std::unique_ptr<OperationPass<ModuleOp>> createTestLLVMGPULegalizePass();
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_LLVMGPU_PASSES_H_
+#endif // IREE_COMPILER_CODEGEN_LLVMGPU_PASSES_H_

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorCoreVectorization.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorCoreVectorization.cpp
index a65160f..fdbdc28 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorCoreVectorization.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorCoreVectorization.cpp

@@ -51,11 +51,13 @@
                                          bool useMmaSyncShape) {
   auto unrollOrder = [](Operation *op) -> std::optional<SmallVector<int64_t>> {
     auto contract = dyn_cast<vector::ContractionOp>(op);
-    if (!contract) return std::nullopt;
+    if (!contract)
+      return std::nullopt;
     return gpuMmaUnrollOrder(contract);
   };
   auto getNativeShape = [useMmaSyncShape](Operation *op) {
-    if (useMmaSyncShape) return getMmaNativeVectorSize(op);
+    if (useMmaSyncShape)
+      return getMmaNativeVectorSize(op);
     return getWmmaNativeVectorSize(op);
   };
   vector::populateVectorUnrollPatterns(
@@ -156,15 +158,15 @@
     }
   }
 
- private:
+private:
   GPUTensorCoreType tensorCoreType;
 };
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>>
 createLLVMGPUTensorCoreVectorizationPass(GPUTensorCoreType tensorCoreType) {
   return std::make_unique<LLVMGPUTensorCoreVectorizationPass>(tensorCoreType);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorPad.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorPad.cpp
index 8cc54eb..a374ed3 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorPad.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorPad.cpp

@@ -23,8 +23,9 @@
 
 namespace {
 
-static FailureOr<SmallVector<int64_t>> getPaddedShapeFromTensorLoad(
-    IREE::Flow::DispatchTensorLoadOp tensorLoad, ArrayRef<int64_t> origShape) {
+static FailureOr<SmallVector<int64_t>>
+getPaddedShapeFromTensorLoad(IREE::Flow::DispatchTensorLoadOp tensorLoad,
+                             ArrayRef<int64_t> origShape) {
   // Determine the padded shape from the load.
   SmallVector<int64_t> paddedShape(origShape.begin(), origShape.end());
   for (const auto &[index, size] :
@@ -37,16 +38,17 @@
               presburger::BoundType::UB, size.get<Value>(),
               /*dim=*/std::nullopt,
               /*stopCondition=*/nullptr, /*closedUB=*/true);
-      if (failed(upperBound)) return failure();
+      if (failed(upperBound))
+        return failure();
       paddedShape[index] = *upperBound;
     }
   }
   return paddedShape;
 }
 
-static FailureOr<SmallVector<Value>> rewriteAsPaddedOp(
-    IRRewriter &rewriter, linalg::LinalgOp linalgOp,
-    linalg::LinalgOp &paddedOp) {
+static FailureOr<SmallVector<Value>>
+rewriteAsPaddedOp(IRRewriter &rewriter, linalg::LinalgOp linalgOp,
+                  linalg::LinalgOp &paddedOp) {
   Location loc = linalgOp.getLoc();
 
   IRRewriter::InsertionGuard g(rewriter);
@@ -67,7 +69,8 @@
     }
     FailureOr<SmallVector<int64_t>> maybePaddedShape =
         getPaddedShapeFromTensorLoad(tensorLoad, linalgOp.getShape(&opOperand));
-    if (failed(maybePaddedShape)) return failure();
+    if (failed(maybePaddedShape))
+      return failure();
     auto paddedShape = *maybePaddedShape;
 
     Value paddingValue = rewriter.create<arith::ConstantOp>(
@@ -103,7 +106,8 @@
         llvm::cast<RankedTensorType>(paddedResult.getType()).getRank();
     SmallVector<OpFoldResult> offsets(rank, rewriter.getIndexAttr(0));
     SmallVector<OpFoldResult> sizes;
-    for (OpFoldResult v : reifiedResultShapes[resultNumber]) sizes.push_back(v);
+    for (OpFoldResult v : reifiedResultShapes[resultNumber])
+      sizes.push_back(v);
     SmallVector<OpFoldResult> strides(rank, rewriter.getIndexAttr(1));
     paddedSubviewResults.push_back(rewriter.create<tensor::ExtractSliceOp>(
         loc, paddedResult, offsets, sizes, strides));
@@ -127,7 +131,8 @@
 
   FailureOr<SmallVector<int64_t>> maybePaddedShape =
       getPaddedShapeFromTensorLoad(tensorLoad, op.getDestType().getShape());
-  if (failed(maybePaddedShape)) return failure();
+  if (failed(maybePaddedShape))
+    return failure();
   auto paddedShape = *maybePaddedShape;
 
   // Pad to the shape that makes tensor.unpack ops produce full tiles.
@@ -216,11 +221,11 @@
     });
   }
 };
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>> createLLVMGPUTensorPadPass() {
   return std::make_unique<LLVMGPUTensorPadPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTileAndDistribute.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTileAndDistribute.cpp
index 22430e4..03bda5e 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTileAndDistribute.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTileAndDistribute.cpp

@@ -101,8 +101,9 @@
 
 /// Return the tile size associated to one thread or warp based on the number of
 /// element in the group.
-static SmallVector<Value> calculateDistributedTileSize(
-    ArrayRef<int64_t> numElements, OpBuilder &builder, Operation *operation) {
+static SmallVector<Value>
+calculateDistributedTileSize(ArrayRef<int64_t> numElements, OpBuilder &builder,
+                             Operation *operation) {
   SmallVector<int64_t> blockTileSize = getTileSizes(operation, 0);
   SmallVector<Value> tileSizesVal;
   // Use partitionedLoop to know what loop needs to be distributed.
@@ -124,18 +125,21 @@
   unsigned idIdx = 0;
   std::reverse(distributedDim.begin(), distributedDim.end());
   for (unsigned depth : partitionedLoops) {
-    if (depth >= blockTileSize.size()) continue;
+    if (depth >= blockTileSize.size())
+      continue;
     tileSizesVal[depth] = builder.create<arith::ConstantIndexOp>(
         operation->getLoc(),
         llvm::divideCeil(blockTileSize[depth], distributedDim[idIdx++]));
-    if (idIdx == kNumMaxParallelDims) break;
+    if (idIdx == kNumMaxParallelDims)
+      break;
   }
   return tileSizesVal;
 }
 
 /// Patterns for warp level tiling.
-static void populateTilingToWarpPatterns(
-    RewritePatternSet &patterns, SmallVectorImpl<int64_t> &workgroupSize) {
+static void
+populateTilingToWarpPatterns(RewritePatternSet &patterns,
+                             SmallVectorImpl<int64_t> &workgroupSize) {
   std::array<int64_t, 3> warpPerWorkgroup = {
       workgroupSize[0] / kWarpSize, workgroupSize[1], workgroupSize[2]};
 
@@ -168,18 +172,19 @@
 }
 
 /// Patterns for thread level tiling.
-static void populateTilingToInvocationPatterns(
-    RewritePatternSet &patterns, SmallVectorImpl<int64_t> &workgroupSize) {
+static void
+populateTilingToInvocationPatterns(RewritePatternSet &patterns,
+                                   SmallVectorImpl<int64_t> &workgroupSize) {
   linalg::TileSizeComputationFunction getInnerTileSizeFn =
       [&](OpBuilder &builder, Operation *operation) {
         return calculateDistributedTileSize(workgroupSize, builder, operation);
       };
-  auto getThreadProcInfoFn = [&workgroupSize](
-                                 OpBuilder &builder, Location loc,
-                                 ArrayRef<Range> parallelLoopRanges) {
-    return getGPUThreadIdsAndCounts(builder, loc, parallelLoopRanges.size(),
-                                    workgroupSize);
-  };
+  auto getThreadProcInfoFn =
+      [&workgroupSize](OpBuilder &builder, Location loc,
+                       ArrayRef<Range> parallelLoopRanges) {
+        return getGPUThreadIdsAndCounts(builder, loc, parallelLoopRanges.size(),
+                                        workgroupSize);
+      };
   linalg::LinalgLoopDistributionOptions invocationDistributionOptions;
   invocationDistributionOptions.procInfo = getThreadProcInfoFn;
 
@@ -206,11 +211,11 @@
 namespace {
 struct LLVMGPUTileAndDistributePass
     : public LLVMGPUTileAndDistributeBase<LLVMGPUTileAndDistributePass> {
- private:
+private:
   // Distribute the workloads to warp if true otherwise distribute to threads.
   bool distributeToWarp = false;
 
- public:
+public:
   LLVMGPUTileAndDistributePass(bool distributeToWarp)
       : distributeToWarp(distributeToWarp) {}
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -219,7 +224,8 @@
   void runOnOperation() override {
     MLIRContext *context = &getContext();
     auto funcOp = getOperation();
-    if (!isEntryPoint(funcOp)) return;
+    if (!isEntryPoint(funcOp))
+      return;
 
     // Promote C matrix and propagate the potential  fill producer into the temp
     // allocation. This needs to be done before reduction tiling.
@@ -316,12 +322,12 @@
     });
   }
 };
-}  // namespace
+} // namespace
 
-std::unique_ptr<OperationPass<func::FuncOp>> createLLVMGPUTileAndDistribute(
-    bool distributeToWarp) {
+std::unique_ptr<OperationPass<func::FuncOp>>
+createLLVMGPUTileAndDistribute(bool distributeToWarp) {
   return std::make_unique<LLVMGPUTileAndDistributePass>(distributeToWarp);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorLowering.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorLowering.cpp
index a62c661..473b284 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorLowering.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorLowering.cpp

@@ -62,11 +62,11 @@
     }
   }
 };
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>> createLLVMGPUVectorLoweringPass() {
   return std::make_unique<LLVMGPUVectorLoweringPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorToGPU.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorToGPU.cpp
index f02c0b4..49a199e 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorToGPU.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorToGPU.cpp

@@ -43,7 +43,7 @@
     : public LLVMGPUVectorToGPUBase<LLVMGPUVectorToGPUPass> {
   LLVMGPUVectorToGPUPass(GPUTensorCoreType tensorCoreType)
       : tensorCoreType(tensorCoreType) {}
-  void getDependentDialects(DialectRegistry& registry) const override {
+  void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<gpu::GPUDialect, nvgpu::NVGPUDialect, affine::AffineDialect,
                     memref::MemRefDialect>();
   }
@@ -98,15 +98,15 @@
     }
   }
 
- private:
+private:
   GPUTensorCoreType tensorCoreType;
 };
-}  // namespace
+} // namespace
 
-std::unique_ptr<OperationPass<func::FuncOp>> createLLVMGPUVectorToGPU(
-    GPUTensorCoreType tensorCoreType) {
+std::unique_ptr<OperationPass<func::FuncOp>>
+createLLVMGPUVectorToGPU(GPUTensorCoreType tensorCoreType) {
   return std::make_unique<LLVMGPUVectorToGPUPass>(tensorCoreType);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp
index 37e0cca..b2a0fa0 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp

@@ -37,9 +37,9 @@
 namespace mlir {
 namespace iree_compiler {
 
-static llvm::cl::opt<unsigned> logSwizzleTile(
-    "iree-codegen-log-swizzle-tile", llvm::cl::desc("log swizzle tile value"),
-    llvm::cl::init(0));
+static llvm::cl::opt<unsigned>
+    logSwizzleTile("iree-codegen-log-swizzle-tile",
+                   llvm::cl::desc("log swizzle tile value"), llvm::cl::init(0));
 
 static FailureOr<Value> gpuAllocationFn(OpBuilder &builder, Location loc,
                                         MemRefType memRefType,
@@ -68,7 +68,8 @@
   if (hasSharedMemoryAddressSpace(llvm::cast<MemRefType>(to.getType()))) {
     needsBarrier = true;
   }
-  if (needsBarrier) builder.create<gpu::BarrierOp>(loc);
+  if (needsBarrier)
+    builder.create<gpu::BarrierOp>(loc);
   Operation *copy = builder.create<memref::CopyOp>(loc, from, to);
   if (needsBarrier) {
     setMarker(copy, getCopyToWorkgroupMemoryMarker());
@@ -91,8 +92,9 @@
       createEraseHALDescriptorTypeFromMemRefPass());
 }
 
-static void tileAndDistributeToWorkgroup(
-    OpPassManager &pm, bool useWARForCooperativeMatrixCodegen = false) {
+static void
+tileAndDistributeToWorkgroup(OpPassManager &pm,
+                             bool useWARForCooperativeMatrixCodegen = false) {
   pm.addPass(createTileAndDistributeToWorkgroupsPass(
       /*maxWorkgroupParallelDims=*/1,
       linalg::DistributionMethod::CyclicNumProcsEqNumIters));
@@ -535,7 +537,8 @@
   // debug info well.
   pm.addPass(createStripDebugInfoPass());
   // Cast address spaces of all function arguments to generic
-  if (!useROCM) pm.addPass(createLLVMGPUCastAddressSpaceFunction());
+  if (!useROCM)
+    pm.addPass(createLLVMGPUCastAddressSpaceFunction());
   if (useROCM) {
     // convert to ROCDL.
     pm.addPass(createConvertToROCDLPass());
@@ -588,5 +591,5 @@
   });
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensions.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensions.cpp
index 7573767..f6d1c20 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensions.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensions.cpp

@@ -89,7 +89,8 @@
 
   IREE::HAL::ExecutableExportOp exportOp;
   state.getTopLevel()->walk([&](IREE::HAL::ExecutableExportOp op) {
-    if (op.getSymName() == target.getName()) exportOp = op;
+    if (op.getSymName() == target.getName())
+      exportOp = op;
   });
   if (!exportOp) {
     state.getTopLevel()->emitOpError("no IREE::HAL::ExecutableExportOp found");
@@ -103,7 +104,8 @@
       mlir::transform::gpu::mapNestedForallToThreadsImpl(
           rewriter, transformOp, target, getWorkgroupDims(), getWarpDims(),
           true);
-  if (!diag.succeeded()) return diag;
+  if (!diag.succeeded())
+    return diag;
   auto newAttr = rewriter.getIndexArrayAttr(getWorkgroupDims());
   rewriter.startRootUpdate(exportOp);
   exportOp->setAttr(exportOp.getWorkgroupSizeAttrName(), newAttr);
@@ -137,8 +139,9 @@
 /// Return success if any replacement occurred, failure otherwise.
 // TODO: this is currently brittle, what we really need here is a scope-aware
 // SCCP.
-static LogicalResult replaceAllUsesOfLaneWithin(
-    RewriterBase &b, vector::WarpExecuteOnLane0Op executeOp) {
+static LogicalResult
+replaceAllUsesOfLaneWithin(RewriterBase &b,
+                           vector::WarpExecuteOnLane0Op executeOp) {
   OpBuilder::InsertionGuard g(b);
   b.setInsertionPoint(executeOp);
   Value zero = b.create<arith::ConstantIndexOp>(executeOp.getLoc(), 0);
@@ -146,7 +149,8 @@
   Value laneId = executeOp.getLaneid();
   bool applied = false;
   for (Operation *user : llvm::make_early_inc_range(laneId.getUsers())) {
-    if (!executeOp->isProperAncestor(user)) continue;
+    if (!executeOp->isProperAncestor(user))
+      continue;
     b.startRootUpdate(user);
     user->replaceUsesOfWith(laneId, zero);
     b.finalizeRootUpdate(user);
@@ -166,14 +170,16 @@
       !ifOp.getElseRegion().empty())
     return failure();
   auto pred = ifOp.getCondition().getDefiningOp<arith::CmpIOp>();
-  if (!pred) return failure();
+  if (!pred)
+    return failure();
   auto EQ = arith::CmpIPredicate::eq;
   auto SLT = arith::CmpIPredicate::slt;
   auto SLE = arith::CmpIPredicate::sle;
   auto ULT = arith::CmpIPredicate::ult;
   auto ULE = arith::CmpIPredicate::ule;
   if (auto threadIdOp = pred.getLhs().getDefiningOp<gpu::ThreadIdOp>()) {
-    if (threadIdOp.getDimension() != gpu::Dimension::x) return failure();
+    if (threadIdOp.getDimension() != gpu::Dimension::x)
+      return failure();
     if (pred.getPredicate() == EQ && isConstantIntValue(pred.getRhs(), 0))
       return threadIdOp;
     if (pred.getPredicate() == SLE && isConstantIntValue(pred.getRhs(), 0))
@@ -190,7 +196,8 @@
   auto UGT = arith::CmpIPredicate::ugt;
   auto UGE = arith::CmpIPredicate::uge;
   if (auto threadIdOp = pred.getRhs().getDefiningOp<gpu::ThreadIdOp>()) {
-    if (threadIdOp.getDimension() != gpu::Dimension::x) return failure();
+    if (threadIdOp.getDimension() != gpu::Dimension::x)
+      return failure();
     if (pred.getPredicate() == EQ && isConstantIntValue(pred.getLhs(), 0))
       return threadIdOp;
     if (pred.getPredicate() == SGE && isConstantIntValue(pred.getLhs(), 0))
@@ -209,13 +216,15 @@
   vector::WarpExecuteOnLane0Op warpOp;
 };
 
-static FailureOr<VectorDistributionResult> rewriteScfIfAsWarpExecuteOnLane0(
-    RewriterBase &rewriter, Location loc, scf::IfOp ifOp,
-    int64_t workgroupSizeX, int64_t warpSize) {
+static FailureOr<VectorDistributionResult>
+rewriteScfIfAsWarpExecuteOnLane0(RewriterBase &rewriter, Location loc,
+                                 scf::IfOp ifOp, int64_t workgroupSizeX,
+                                 int64_t warpSize) {
   // Bail if cond is not `if (threadIdx.x == 0)`.
   FailureOr<gpu::ThreadIdOp> maybeThreadIdxxOp =
       isThreadIdxxZeroPredicate(ifOp);
-  if (failed(maybeThreadIdxxOp)) return failure();
+  if (failed(maybeThreadIdxxOp))
+    return failure();
 
   // All the code below will be executed on a single warp given a
   // fixed (threadIdxy, threadIdxz). Note, we reuse
@@ -269,12 +278,15 @@
 }
 
 // TODO: Refactor in a generic util that can be reused.
-static HAL::ExecutableExportOp getExecutableExportOpForFunc(
-    HAL::ExecutableVariantOp halExecutableVariantOp, func::FuncOp funcOp) {
-  if (!halExecutableVariantOp || !funcOp) return {};
+static HAL::ExecutableExportOp
+getExecutableExportOpForFunc(HAL::ExecutableVariantOp halExecutableVariantOp,
+                             func::FuncOp funcOp) {
+  if (!halExecutableVariantOp || !funcOp)
+    return {};
   HAL::ExecutableExportOp exportOp;
   halExecutableVariantOp->walk([&](HAL::ExecutableExportOp op) {
-    if (op.getSymName() != funcOp.getName()) return WalkResult::advance();
+    if (op.getSymName() != funcOp.getName())
+      return WalkResult::advance();
     exportOp = op;
     return WalkResult::interrupt();
   });
@@ -347,9 +359,8 @@
     // nullptr.
     results.assign(1, nullptr);
     return mlir::emitSilenceableFailure(
-        target,
-        "scf::ifOp needs to be predicated on threadIdx.x == 0 "
-        "--- the transform is not applied");
+        target, "scf::ifOp needs to be predicated on threadIdx.x == 0 "
+                "--- the transform is not applied");
   }
 
   results.push_back(vectorDistributionResult->warpOp);
@@ -412,12 +423,13 @@
 /// until MultiDimReduction distribution is supported.
 class InsertElementToBroadcast final
     : public OpRewritePattern<vector::InsertElementOp> {
- public:
+public:
   using OpRewritePattern<vector::InsertElementOp>::OpRewritePattern;
 
   LogicalResult matchAndRewrite(vector::InsertElementOp insertOp,
                                 PatternRewriter &rewriter) const override {
-    if (insertOp.getDestVectorType().getNumElements() != 1) return failure();
+    if (insertOp.getDestVectorType().getNumElements() != 1)
+      return failure();
     rewriter.replaceOpWithNewOp<vector::BroadcastOp>(
         insertOp, insertOp.getDestVectorType(), insertOp.getSource());
     return success();
@@ -447,14 +459,16 @@
                                 PatternRewriter &rewriter) const override {
     OpOperand *operand = getWarpResult(
         warpOp, [](Operation *op) { return isa<memref::LoadOp>(op); });
-    if (!operand) return failure();
+    if (!operand)
+      return failure();
     auto load = operand->get().getDefiningOp<memref::LoadOp>();
     unsigned operandIndex = operand->getOperandNumber();
     Value distributedVal = warpOp.getResult(operandIndex);
 
     SmallVector<Value> indices(load.getIndices().begin(),
                                load.getIndices().end());
-    if (!indices.empty()) return failure();
+    if (!indices.empty())
+      return failure();
 
     OpBuilder::InsertionGuard g(rewriter);
     rewriter.setInsertionPointAfter(warpOp);
@@ -492,18 +506,20 @@
     if (!iree_compiler::hasSharedMemoryAddressSpace(alloc.getType()))
       return failure();
     auto warpParent = alloc->getParentOfType<vector::WarpExecuteOnLane0Op>();
-    if (!warpParent) return failure();
+    if (!warpParent)
+      return failure();
     alloc->moveBefore(warpParent);
     // Conservatively move the dealloc after the warpOp. This may
     // extend the liverange of the allocation but is always correct.
     for (Operation *user : alloc->getUsers()) {
-      if (isa<memref::DeallocOp>(user)) user->moveAfter(warpParent);
+      if (isa<memref::DeallocOp>(user))
+        user->moveAfter(warpParent);
     }
     return success();
   }
 };
 
-}  // namespace
+} // namespace
 
 static void populateMultiReductionLoweringPatterns(Operation *target,
                                                    RewritePatternSet &patterns,
@@ -518,7 +534,8 @@
 static AffineMap simpleDistributionFunction(Value val) {
   AffineMap map = AffineMap::get(val.getContext());
   auto vecType = llvm::dyn_cast<VectorType>(val.getType());
-  if (!vecType) return map;
+  if (!vecType)
+    return map;
   // Create a map (d0, d1) -> (d1) to distribute along the inner
   // dimension. Once we support n-d distribution we can add more
   // complex cases.
@@ -866,16 +883,20 @@
 /// Returns `true` if the op is defines the parallel region that is subject to
 /// barrier synchronization.
 static bool isParallelRegionBoundary(Operation *op) {
-  if (op->hasAttr("__parallel_region_boundary_for_test")) return true;
+  if (op->hasAttr("__parallel_region_boundary_for_test"))
+    return true;
 
   // We consider functions inside executable variants that have the same symbol
   // name as an export symbol.
   auto func = dyn_cast<FunctionOpInterface>(op);
-  if (!func) return false;
+  if (!func)
+    return false;
   auto parent = op->getParentOfType<ModuleOp>();
-  if (!parent) return false;
+  if (!parent)
+    return false;
   auto variant = parent->getParentOfType<HAL::ExecutableVariantOp>();
-  if (!variant) return false;
+  if (!variant)
+    return false;
   WalkResult result = variant.walk([&](HAL::ExecutableExportOp exportOp) {
     if (exportOp.getSymNameAttr() == func.getNameAttr())
       return WalkResult::interrupt();
@@ -916,15 +937,18 @@
 /// it could extract the effect information from the op, otherwise returns
 /// 'false' and conservatively populates the list with all possible effects
 /// associated with no particular value or symbol.
-static bool collectEffects(
-    Operation *op, SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
-    bool ignoreBarriers = true) {
+static bool
+collectEffects(Operation *op,
+               SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
+               bool ignoreBarriers = true) {
   // Skip over barriers to avoid infinite recursion (those barriers would ask
   // this barrier again).
-  if (ignoreBarriers && isa<gpu::BarrierOp>(op)) return true;
+  if (ignoreBarriers && isa<gpu::BarrierOp>(op))
+    return true;
 
   // Skip over ops that we know have no effects.
-  if (isKnownNoEffectsOpWithoutInterface(op)) return true;
+  if (isKnownNoEffectsOpWithoutInterface(op))
+    return true;
 
   // Collect effect instances the operation. Note that the implementation of
   // getEffects erases all effect instances that have the type other than the
@@ -940,7 +964,8 @@
     for (auto &region : op->getRegions()) {
       for (auto &block : region) {
         for (auto &innerOp : block)
-          if (!collectEffects(&innerOp, effects, ignoreBarriers)) return false;
+          if (!collectEffects(&innerOp, effects, ignoreBarriers))
+            return false;
       }
     }
     return true;
@@ -961,7 +986,8 @@
 bool getEffectsBefore(Operation *op,
                       SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
                       bool stopAtBarrier) {
-  if (!op->getBlock()) return true;
+  if (!op->getBlock())
+    return true;
 
   // If there is a non-structured control flow, bail.
   Region *region = op->getBlock()->getParent();
@@ -980,12 +1006,14 @@
         else
           continue;
       }
-      if (!collectEffects(it, effects)) return false;
+      if (!collectEffects(it, effects))
+        return false;
     }
   }
 
   // Stop if reached the parallel region boundary.
-  if (isParallelRegionBoundary(op->getParentOp())) return true;
+  if (isParallelRegionBoundary(op->getParentOp()))
+    return true;
 
   // Otherwise, keep collecting above the parent operation.
   if (!getEffectsBefore(op->getParentOp(), effects, stopAtBarrier))
@@ -1016,7 +1044,8 @@
   bool conservative = false;
   if (!hasSingleExecutionBody(op->getParentOp()))
     op->getParentOp()->walk([&](Operation *in) {
-      if (conservative) return WalkResult::interrupt();
+      if (conservative)
+        return WalkResult::interrupt();
       if (!collectEffects(in, effects)) {
         conservative = true;
         return WalkResult::interrupt();
@@ -1036,7 +1065,8 @@
 bool getEffectsAfter(Operation *op,
                      SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
                      bool stopAtBarrier) {
-  if (!op->getBlock()) return true;
+  if (!op->getBlock())
+    return true;
 
   // If there is a non-structured control flow, bail.
   Region *region = op->getBlock()->getParent();
@@ -1050,17 +1080,21 @@
     for (Operation *it = op->getNextNode(); it != nullptr;
          it = it->getNextNode()) {
       if (isa<gpu::BarrierOp>(it)) {
-        if (stopAtBarrier) return true;
+        if (stopAtBarrier)
+          return true;
         continue;
       }
-      if (!collectEffects(it, effects)) return false;
+      if (!collectEffects(it, effects))
+        return false;
     }
 
   // Stop if reached the parallel region boundary.
-  if (isParallelRegionBoundary(op->getParentOp())) return true;
+  if (isParallelRegionBoundary(op->getParentOp()))
+    return true;
 
   // Otherwise, keep collecting below the parent operation.
-  if (!getEffectsAfter(op->getParentOp(), effects, stopAtBarrier)) return false;
+  if (!getEffectsAfter(op->getParentOp(), effects, stopAtBarrier))
+    return false;
 
   // If the op is loop-like, collect effects from the leading operations until
   // we hit a barrier because they can executed after the current operation by
@@ -1077,7 +1111,8 @@
   // operation `op2` at iteration `i-1` and the side effects must be ordered
   // appropriately.
   if (isSequentialLoopLike(op->getParentOp())) {
-    if (isa<gpu::BarrierOp>(op->getBlock()->front())) return true;
+    if (isa<gpu::BarrierOp>(op->getBlock()->front()))
+      return true;
 
     bool exact = collectEffects(&op->getBlock()->front(), effects);
     return getEffectsAfter(&op->getBlock()->front(), effects,
@@ -1090,7 +1125,8 @@
   bool conservative = false;
   if (!hasSingleExecutionBody(op->getParentOp()))
     op->getParentOp()->walk([&](Operation *in) {
-      if (conservative) return WalkResult::interrupt();
+      if (conservative)
+        return WalkResult::interrupt();
       if (!collectEffects(in, effects)) {
         conservative = true;
         return WalkResult::interrupt();
@@ -1105,7 +1141,8 @@
 static Value getBase(Value v) {
   while (true) {
     Operation *definingOp = v.getDefiningOp();
-    if (!definingOp) break;
+    if (!definingOp)
+      break;
 
     bool shouldContinue =
         TypeSwitch<Operation *, bool>(v.getDefiningOp())
@@ -1123,7 +1160,8 @@
               return true;
             })
             .Default([](Operation *) { return false; });
-    if (!shouldContinue) break;
+    if (!shouldContinue)
+      break;
   }
   return v;
 }
@@ -1195,7 +1233,8 @@
       }
 
       std::optional<bool> knownCaptureStatus = getKnownCapturingStatus(user, v);
-      if (!knownCaptureStatus || *knownCaptureStatus) return true;
+      if (!knownCaptureStatus || *knownCaptureStatus)
+        return true;
     }
   }
 
@@ -1264,11 +1303,14 @@
   bool isArg[] = {isFunctionArgument(first), isFunctionArgument(second)};
 
   // Distinct bases (allocations) cannot have been passed as an argument.
-  if ((isDistinct[0] && isArg[1]) || (isDistinct[1] && isArg[0])) return false;
+  if ((isDistinct[0] && isArg[1]) || (isDistinct[1] && isArg[0]))
+    return false;
 
   // Non-captured base distinct values cannot conflict with another base value.
-  if (isDistinct[0] && !maybeCaptured(first)) return false;
-  if (isDistinct[1] && !maybeCaptured(second)) return false;
+  if (isDistinct[0] && !maybeCaptured(first))
+    return false;
+  if (isDistinct[1] && !maybeCaptured(second))
+    return false;
 
   // Otherwise, conservatively assume aliasing.
   DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, DBGS_ALIAS() << "-> may alias!\n");
@@ -1309,13 +1351,14 @@
 /// effect, there is no conflict since we are only expected to see the
 /// allocation happening in the same thread and it cannot be accessed from
 /// another thread without capture (which we do handle in alias analysis).
-static bool haveConflictingEffects(
-    ArrayRef<MemoryEffects::EffectInstance> beforeEffects,
-    ArrayRef<MemoryEffects::EffectInstance> afterEffects) {
+static bool
+haveConflictingEffects(ArrayRef<MemoryEffects::EffectInstance> beforeEffects,
+                       ArrayRef<MemoryEffects::EffectInstance> afterEffects) {
   for (const MemoryEffects::EffectInstance &before : beforeEffects) {
     for (const MemoryEffects::EffectInstance &after : afterEffects) {
       // If cannot alias, definitely no conflict.
-      if (!mayAlias(before, after)) continue;
+      if (!mayAlias(before, after))
+        continue;
 
       // Read/read is not a conflict.
       if (isa<MemoryEffects::Read>(before.getEffect()) &&
@@ -1340,7 +1383,8 @@
       //      conflicts.
       //   2. either the program is ill-formed and we are in undefined behavior
       //      territory.
-      if (isa<MemoryEffects::Free>(before.getEffect())) continue;
+      if (isa<MemoryEffects::Free>(before.getEffect()))
+        continue;
 
       // Other kinds of effects create a conflict, e.g. read-after-write.
       LLVM_DEBUG(
@@ -1372,7 +1416,7 @@
 /// Parallel Constructs" by Moses et.al. in PPoPP 2023 and implementation in
 /// Polygeist.
 class BarrierElimination final : public OpRewritePattern<gpu::BarrierOp> {
- public:
+public:
   using OpRewritePattern<gpu::BarrierOp>::OpRewritePattern;
 
   LogicalResult matchAndRewrite(gpu::BarrierOp barrier,
@@ -1417,7 +1461,7 @@
     return failure();
   }
 };
-}  // namespace
+} // namespace
 
 void transform_dialect::EliminateGpuBarriersOp::build(OpBuilder &builder,
                                                       OperationState &state,

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensions.h b/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensions.h
index c59a897..c3e89ea 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensions.h
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensions.h

@@ -22,12 +22,12 @@
 class ForallOp;
 class IfOp;
 class ForOp;
-}  // namespace scf
+} // namespace scf
 
 namespace vector {
 class VectorDialect;
 class WarpExecuteOnLane0Op;
-}  // namespace vector
+} // namespace vector
 
 namespace iree_compiler {
 
@@ -40,21 +40,22 @@
 // Hook to register LLVMGPU transformations to the transform dialect.
 class LLVMGPUExtensions
     : public transform::TransformDialectExtension<LLVMGPUExtensions> {
- public:
+public:
   LLVMGPUExtensions();
 };
-}  // namespace transform_dialect
-}  // namespace IREE
+} // namespace transform_dialect
+} // namespace IREE
 
 /// Transformation to convert scf.forall to gpu distribution.
-FailureOr<SmallVector<OpFoldResult>> rewriteForallToGpu(
-    scf::ForallOp forallOp, const SmallVector<int64_t> &globalWorkgroupSizes,
-    RewriterBase &rewriter, bool syncAfterDistribute = true);
+FailureOr<SmallVector<OpFoldResult>>
+rewriteForallToGpu(scf::ForallOp forallOp,
+                   const SmallVector<int64_t> &globalWorkgroupSizes,
+                   RewriterBase &rewriter, bool syncAfterDistribute = true);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
 #define GET_OP_CLASSES
 #include "iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensionsOps.h.inc"
 
-#endif  // IREE_COMPILER_CODEGEN_LLVMGPU_TRANSFORMEXTENSIONS_LLVMGPUEXTENSIONS_H_
+#endif // IREE_COMPILER_CODEGEN_LLVMGPU_TRANSFORMEXTENSIONS_LLVMGPUEXTENSIONS_H_

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPULayoutAnalysisAndDistribution.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPULayoutAnalysisAndDistribution.cpp
index 0671b2b..b733f43 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPULayoutAnalysisAndDistribution.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPULayoutAnalysisAndDistribution.cpp

@@ -24,8 +24,8 @@
 static constexpr int warpSize = 32;
 static constexpr int maxTensorDims = 2;
 namespace DimType {
-static constexpr int Batch0 = 0;  // Batch dimension for tensor dim 0
-static constexpr int Batch1 = 1;  // Batch dimension for tensor dim 1
+static constexpr int Batch0 = 0; // Batch dimension for tensor dim 0
+static constexpr int Batch1 = 1; // Batch dimension for tensor dim 1
 static constexpr int LaneIdZ = 2;
 static constexpr int LaneIdY = 3;
 static constexpr int LaneIdX = 4;
@@ -33,28 +33,28 @@
 static constexpr int VecIdY = 6;
 static constexpr int VecIdX = 7;
 static constexpr int NumDims = 8;
-}  // namespace DimType
+} // namespace DimType
 
 static std::string typeToString(int i) {
   switch (i) {
-    case DimType::Batch0:
-      return "Batch0";
-    case DimType::Batch1:
-      return "Batch1";
-    case DimType::LaneIdZ:
-      return "LaneIdZ";
-    case DimType::LaneIdY:
-      return "LaneIdY";
-    case DimType::LaneIdX:
-      return "LaneIdX";
-    case DimType::VecIdZ:
-      return "VecIdZ";
-    case DimType::VecIdY:
-      return "VecIdY";
-    case DimType::VecIdX:
-      return "VecIdX";
-    default:
-      return "";
+  case DimType::Batch0:
+    return "Batch0";
+  case DimType::Batch1:
+    return "Batch1";
+  case DimType::LaneIdZ:
+    return "LaneIdZ";
+  case DimType::LaneIdY:
+    return "LaneIdY";
+  case DimType::LaneIdX:
+    return "LaneIdX";
+  case DimType::VecIdZ:
+    return "VecIdZ";
+  case DimType::VecIdY:
+    return "VecIdY";
+  case DimType::VecIdX:
+    return "VecIdX";
+  default:
+    return "";
   }
 }
 
@@ -95,58 +95,54 @@
 
 enum class MMAMatrixType { AMatrix, BMatrix, CMatrix };
 
-static std::array<Dimension, 3> getMMADimensions(MMAType mmaType,
-                                                 MMAMatrixType matrixType,
-                                                 int dim) {
+static std::array<Dimension, 3>
+getMMADimensions(MMAType mmaType, MMAMatrixType matrixType, int dim) {
   switch (mmaType) {
-    case MMAType::M16N8K16:
-      switch (matrixType) {
-        case MMAMatrixType::AMatrix:
-          if (dim == 0)
-            return {{{DimType::LaneIdY, 8},
-                     {DimType::VecIdZ, 2},
-                     {DimType::LaneIdZ, 1}}};
-          return {{{DimType::VecIdX, 2},
-                   {DimType::LaneIdX, 4},
-                   {DimType::VecIdY, 2}}};
-        case MMAMatrixType::BMatrix:
-          if (dim == 0)
-            return {{{DimType::LaneIdY, 8},
-                     {DimType::LaneIdZ, 1},
-                     {DimType::VecIdZ, 1}}};
-          return {{{DimType::VecIdX, 2},
-                   {DimType::LaneIdX, 4},
-                   {DimType::VecIdY, 2}}};
-        case MMAMatrixType::CMatrix:
-          if (dim == 0)
-            return {{{DimType::LaneIdY, 8},
-                     {DimType::VecIdY, 2},
-                     {DimType::LaneIdZ, 1}}};
-          return {{{DimType::VecIdX, 2},
-                   {DimType::LaneIdX, 4},
-                   {DimType::VecIdZ, 1}}};
-      }
-      return {};
-    default:
-      return {};
+  case MMAType::M16N8K16:
+    switch (matrixType) {
+    case MMAMatrixType::AMatrix:
+      if (dim == 0)
+        return {{{DimType::LaneIdY, 8},
+                 {DimType::VecIdZ, 2},
+                 {DimType::LaneIdZ, 1}}};
+      return {
+          {{DimType::VecIdX, 2}, {DimType::LaneIdX, 4}, {DimType::VecIdY, 2}}};
+    case MMAMatrixType::BMatrix:
+      if (dim == 0)
+        return {{{DimType::LaneIdY, 8},
+                 {DimType::LaneIdZ, 1},
+                 {DimType::VecIdZ, 1}}};
+      return {
+          {{DimType::VecIdX, 2}, {DimType::LaneIdX, 4}, {DimType::VecIdY, 2}}};
+    case MMAMatrixType::CMatrix:
+      if (dim == 0)
+        return {{{DimType::LaneIdY, 8},
+                 {DimType::VecIdY, 2},
+                 {DimType::LaneIdZ, 1}}};
+      return {
+          {{DimType::VecIdX, 2}, {DimType::LaneIdX, 4}, {DimType::VecIdZ, 1}}};
+    }
+    return {};
+  default:
+    return {};
   }
 }
 
 static std::array<int, 2> getMMACanonicalShape(MMAType mmaType,
                                                MMAMatrixType matrixType) {
   switch (mmaType) {
-    case MMAType::M16N8K16:
-      switch (matrixType) {
-        case MMAMatrixType::AMatrix:
-          return {16, 16};
-        case MMAMatrixType::BMatrix:
-          return {8, 16};
-        case MMAMatrixType::CMatrix:
-          return {16, 8};
-      }
-      return {};
-    default:
-      return {};
+  case MMAType::M16N8K16:
+    switch (matrixType) {
+    case MMAMatrixType::AMatrix:
+      return {16, 16};
+    case MMAMatrixType::BMatrix:
+      return {8, 16};
+    case MMAMatrixType::CMatrix:
+      return {16, 8};
+    }
+    return {};
+  default:
+    return {};
   }
 }
 
@@ -186,18 +182,18 @@
   AffineExpr dimScale = builder.getAffineConstantExpr(1);
   for (const auto &dimType : order[i]) {
     switch (dimType) {
-      case DimType::LaneIdX:
-        dim = dim + dimScale * d0;
-        break;
-      case DimType::LaneIdY:
-        dim = dim + dimScale * d1;
-        break;
-      case DimType::LaneIdZ:
-        dim = dim + dimScale * d2;
-        break;
-      default:
-        dim = dim + dimScale * builder.getAffineConstantExpr(state[dimType]);
-        break;
+    case DimType::LaneIdX:
+      dim = dim + dimScale * d0;
+      break;
+    case DimType::LaneIdY:
+      dim = dim + dimScale * d1;
+      break;
+    case DimType::LaneIdZ:
+      dim = dim + dimScale * d2;
+      break;
+    default:
+      dim = dim + dimScale * builder.getAffineConstantExpr(state[dimType]);
+      break;
     }
     dimScale = dimScale * builder.getAffineConstantExpr(shape[dimType]);
   }
@@ -230,7 +226,8 @@
                           ArrayRef<int64_t> cShape) {
   if ((aShape[0] % 16 == 0) && (aShape[1] % 16 == 0) && (cShape[0] % 16 == 0) &&
       (cShape[1] % 8 == 0)) {
-    if ((bShape[0] % 16 == 0) && (bShape[1] % 8 == 0)) return MMAType::M16N8K16;
+    if ((bShape[0] % 16 == 0) && (bShape[1] % 8 == 0))
+      return MMAType::M16N8K16;
   }
   return MMAType::NONE;
 }
@@ -262,7 +259,8 @@
   ArrayRef<int64_t> bShape = bType.getShape();
   ArrayRef<int64_t> cShape = cType.getShape();
   MMAType mmaType = getMMAType(aShape, bShape, cShape);
-  if (mmaType == MMAType::NONE) return;
+  if (mmaType == MMAType::NONE)
+    return;
   // Set layouts for A, B and C
   auto setLayout = [&](Value matrix, MMAMatrixType matrixType,
                        llvm::StringRef name) {
@@ -292,10 +290,13 @@
 static void propagateLayoutToReduceBroadcastTranspose(
     vector::MultiDimReductionOp reductionOp, vector::BroadcastOp broadcastOp,
     vector::TransposeOp transposeOp, DenseMap<Value, Layout> &layoutMap) {
-  if (!broadcastOp) return;
-  if (!transposeOp) return;
+  if (!broadcastOp)
+    return;
+  if (!transposeOp)
+    return;
   Value reductionSrc = reductionOp.getSource();
-  if (!layoutMap.count(reductionSrc)) return;
+  if (!layoutMap.count(reductionSrc))
+    return;
   // Get the reduction dims
   auto reductionDims =
       llvm::to_vector(reductionOp.getReductionDims().getAsRange<IntegerAttr>());
@@ -305,7 +306,8 @@
   // Don't support dim-1 broadcasted dims
   llvm::SetVector<int64_t> dimOneBroadcastedDims =
       broadcastOp.computeBroadcastedUnitDims();
-  if (dimOneBroadcastedDims.size() > 0) return;
+  if (dimOneBroadcastedDims.size() > 0)
+    return;
   Value broadcastSource = broadcastOp.getSource();
   Value broadcastResult = broadcastOp.getResult();
   int64_t broadcastSourceRank =
@@ -314,27 +316,32 @@
       llvm::cast<VectorType>(broadcastResult.getType()).getRank();
   int64_t rankDiff = broadcastResultRank - broadcastSourceRank;
   llvm::SetVector<int64_t> broadcastedDims;
-  for (int64_t i = 0; i < rankDiff; i++) broadcastedDims.insert(i);
+  for (int64_t i = 0; i < rankDiff; i++)
+    broadcastedDims.insert(i);
   ArrayRef<int64_t> broadcastShape =
       llvm::cast<ShapedType>(broadcastResult.getType()).getShape();
   ArrayRef<int64_t> srcShape =
       llvm::cast<ShapedType>(reductionSrc.getType()).getShape();
   // Check that the same number of dims are reduced and broadcasted
-  if (reductionDims.size() != broadcastedDims.size()) return;
+  if (reductionDims.size() != broadcastedDims.size())
+    return;
   // Check that transpose(reductionDim) == broadcastDim
   // and that the shapes match
   for (IntegerAttr dimAttr : reductionDims) {
     int64_t dim = dimAttr.getInt();
     int64_t transposedDim = perm[dim];
-    if (!broadcastedDims.contains(transposedDim)) return;
-    if (srcShape[dim] != broadcastShape[transposedDim]) return;
+    if (!broadcastedDims.contains(transposedDim))
+      return;
+    if (srcShape[dim] != broadcastShape[transposedDim])
+      return;
   }
   Value transposedResult = transposeOp.getResult();
   layoutMap.try_emplace(transposedResult, layoutMap.at(reductionSrc));
   layoutMap.at(transposedResult).debugPrint("transposed");
   // Propagate 2D layout to 1D accumulator
   Value acc = reductionOp.getAcc();
-  if (layoutMap.count(acc)) return;
+  if (layoutMap.count(acc))
+    return;
   Layout accLayout = layoutMap.at(reductionSrc);
   accLayout.rank = 1;
   layoutMap.try_emplace(acc, accLayout);
@@ -363,7 +370,8 @@
                                  DenseMap<Value, Layout> &layoutMap) {
   for (auto argIndex : llvm::enumerate(forOp.getRegionIterArgs())) {
     BlockArgument &arg = argIndex.value();
-    if (!layoutMap.count(arg)) continue;
+    if (!layoutMap.count(arg))
+      continue;
     OpOperand &operand = forOp.getOpOperandForRegionIterArg(arg);
     Value result = forOp.getResult(argIndex.index());
     Layout newLayout = layoutMap.at(arg);
@@ -380,11 +388,13 @@
   // Find an operand with a layout
   int i;
   for (i = 0; i < numOperands; i++) {
-    if (layoutMap.count(operands[i])) break;
+    if (layoutMap.count(operands[i]))
+      break;
   }
   // Propagate layout to others
   for (int j = 0; j < numOperands; j++) {
-    if (j == i) continue;
+    if (j == i)
+      continue;
     if (!layoutMap.count(operands[j])) {
       layoutMap.try_emplace(operands[j], layoutMap.at(operands[i]));
       layoutMap.at(operands[j]).debugPrint("binary/unary operand");
@@ -417,10 +427,11 @@
 }
 
 /// Get indices of transfer op after distribution.
-static SmallVector<Value> getDistributedIndices(
-    OpBuilder &rewriter, Location loc, Layout &layout,
-    std::array<int, DimType::NumDims> &state, ArrayRef<Value> indices,
-    AffineMap permutationMap, const std::array<Value, 3> &threadIds) {
+static SmallVector<Value>
+getDistributedIndices(OpBuilder &rewriter, Location loc, Layout &layout,
+                      std::array<int, DimType::NumDims> &state,
+                      ArrayRef<Value> indices, AffineMap permutationMap,
+                      const std::array<Value, 3> &threadIds) {
   AffineExpr row = layout.computeDim(0, state, rewriter);
   AffineMap rowMap = AffineMap::get(3, 0, row, rewriter.getContext());
   std::array<Value, 2> laneOffsets;
@@ -434,7 +445,8 @@
   int64_t laneDim = 0;
   for (AffineExpr expr : permutationMap.getResults()) {
     auto dimExpr = expr.dyn_cast<AffineDimExpr>();
-    if (!dimExpr) continue;
+    if (!dimExpr)
+      continue;
     unsigned pos = dimExpr.getPosition();
     newIndices[pos] = rewriter.create<arith::AddIOp>(
         loc, laneOffsets[laneDim++], newIndices[pos]);
@@ -451,7 +463,8 @@
           llvm::cast<MemRefType>(readOp.getSource().getType())))
     return false;
   // TODO: Can be any 16bits type.
-  if (!readOp.getVectorType().getElementType().isF16()) return false;
+  if (!readOp.getVectorType().getElementType().isF16())
+    return false;
   bool compatibleLayout = layout.order.back()[0] == DimType::VecIdX &&
                           layout.shape[DimType::VecIdX] == 2 &&
                           layout.order.back()[1] == DimType::LaneIdX &&
@@ -467,7 +480,8 @@
   }
   auto exprX = map.getResult(0).dyn_cast<AffineDimExpr>();
   auto exprY = map.getResult(1).dyn_cast<AffineDimExpr>();
-  if (!exprX || !exprY) return false;
+  if (!exprX || !exprY)
+    return false;
   return exprX.getPosition() > exprY.getPosition();
 }
 
@@ -520,7 +534,8 @@
   }
   for (AffineExpr expr : permutationMap.getResults()) {
     auto dimExpr = expr.dyn_cast<AffineDimExpr>();
-    if (!dimExpr) continue;
+    if (!dimExpr)
+      continue;
     unsigned pos = dimExpr.getPosition();
     newIndices[pos] = rewriter.create<arith::AddIOp>(
         loc, vectorOffsets[laneDim], newIndices[pos]);
@@ -542,7 +557,8 @@
   OpBuilder::InsertionGuard guard(rewriter);
   rewriter.setInsertionPoint(readOp);
   Value result = readOp.getResult();
-  if (!layoutMap.count(result)) return;
+  if (!layoutMap.count(result))
+    return;
   Value source = readOp.getSource();
   Location loc = readOp.getLoc();
   SmallVector<Value> indices = readOp.getIndices();
@@ -611,14 +627,19 @@
   OpBuilder::InsertionGuard guard(rewriter);
   rewriter.setInsertionPoint(contractOp);
   Value lhs = contractOp.getLhs();
-  if (!layoutMap.count(lhs)) return;
-  if (!simdToSimtMap.count(lhs)) return;
+  if (!layoutMap.count(lhs))
+    return;
+  if (!simdToSimtMap.count(lhs))
+    return;
   Type elementType = llvm::cast<ShapedType>(lhs.getType()).getElementType();
   Value rhs = contractOp.getRhs();
-  if (!layoutMap.count(rhs)) return;
-  if (!simdToSimtMap.count(rhs)) return;
+  if (!layoutMap.count(rhs))
+    return;
+  if (!simdToSimtMap.count(rhs))
+    return;
   Value acc = contractOp.getAcc();
-  if (!simdToSimtMap.count(acc)) return;
+  if (!simdToSimtMap.count(acc))
+    return;
   Location loc = contractOp.getLoc();
   Value contractResult = contractOp.getResult();
   Layout lhsLayout = layoutMap.at(lhs);
@@ -672,8 +693,10 @@
       rewriter.create<gpu::ThreadIdOp>(loc, gpu::Dimension::x),
       rewriter.create<gpu::ThreadIdOp>(loc, gpu::Dimension::y),
       rewriter.create<gpu::ThreadIdOp>(loc, gpu::Dimension::z)};
-  if (!layoutMap.count(vector)) return;
-  if (!simdToSimtMap.count(vector)) return;
+  if (!layoutMap.count(vector))
+    return;
+  if (!simdToSimtMap.count(vector))
+    return;
   Layout layout = layoutMap.at(vector);
   std::array<int, DimType::NumDims> state;
   for (int b0 = 0; b0 < layout.shape[DimType::Batch0]; b0++) {
@@ -718,7 +741,8 @@
 
 static int getLaneIdIndex(std::array<int, 4> &order) {
   for (int i = 0; i < 4; i++) {
-    if (isLaneId(order[i])) return i;
+    if (isLaneId(order[i]))
+      return i;
   }
   return -1;
 }
@@ -726,7 +750,8 @@
 static int isSingleLaneIdReduced(std::array<int, 4> &order) {
   int count{0};
   for (int i = 0; i < 4; i++) {
-    if (isLaneId(order[i])) count++;
+    if (isLaneId(order[i]))
+      count++;
   }
   return count == 1;
 }
@@ -734,7 +759,8 @@
 static int getVecSizes(std::array<int, 4> &order, const Layout &layout) {
   int size = 1;
   for (int i = 0; i < 4; i++) {
-    if (isVectorId(i)) size *= layout.shape[i];
+    if (isVectorId(i))
+      size *= layout.shape[i];
   }
   return size;
 }
@@ -772,13 +798,13 @@
 /// Dim 2 of the SIMT vector maps to vy * VZ + vz
 /// Dim 3 of the SIMT vector maps to vx
 /// where VZ is the shape of the VectorZ dimension of the layout.
-static SmallVector<int64_t> getIndicesFromState(
-    std::array<int, DimType::NumDims> &state, Layout &layout) {
-  SmallVector<int64_t> indices{
-      state[DimType::Batch0], state[DimType::Batch1],
-      state[DimType::VecIdY] * layout.shape[DimType::VecIdZ] +
-          state[DimType::VecIdZ],
-      state[DimType::VecIdX]};
+static SmallVector<int64_t>
+getIndicesFromState(std::array<int, DimType::NumDims> &state, Layout &layout) {
+  SmallVector<int64_t> indices{state[DimType::Batch0], state[DimType::Batch1],
+                               state[DimType::VecIdY] *
+                                       layout.shape[DimType::VecIdZ] +
+                                   state[DimType::VecIdZ],
+                               state[DimType::VecIdX]};
   return indices;
 }
 
@@ -791,22 +817,28 @@
   rewriter.setInsertionPoint(reductionOp);
   Value source = reductionOp.getSource();
   Type elementType = llvm::cast<ShapedType>(source.getType()).getElementType();
-  if (!layoutMap.count(source)) return;
-  if (!simdToSimtMap.count(source)) return;
-  if (!broadcastOp) return;
-  if (!transposeOp) return;
+  if (!layoutMap.count(source))
+    return;
+  if (!simdToSimtMap.count(source))
+    return;
+  if (!broadcastOp)
+    return;
+  if (!transposeOp)
+    return;
   Location loc = reductionOp.getLoc();
   Layout layout = layoutMap.at(source);
   auto reductionDims =
       llvm::to_vector(reductionOp.getReductionDims().getAsRange<IntegerAttr>());
   vector::CombiningKind combiningKind = reductionOp.getKind();
   // Only support reduction on one dimension
-  if (reductionDims.size() > 1) return;
+  if (reductionDims.size() > 1)
+    return;
   int reductionDim = reductionDims[0].getInt();
   std::array<int, 4> reductionOrder = layout.order[reductionDim];
   std::array<int, 4> parallelOrder = layout.order[!reductionDim];
   Value acc = reductionOp.getAcc();
-  if (!simdToSimtMap.count(acc)) return;
+  if (!simdToSimtMap.count(acc))
+    return;
   SmallVector<int64_t> vecShape{
       layout.shape[DimType::Batch0], layout.shape[DimType::Batch1],
       layout.shape[DimType::VecIdZ] * layout.shape[DimType::VecIdY],
@@ -815,20 +847,21 @@
   Value output = rewriter.create<arith::ConstantOp>(
       loc, vecType, rewriter.getZeroAttr(vecType));
 
-  if (!isSingleLaneIdReduced(reductionOrder)) return;
+  if (!isSingleLaneIdReduced(reductionOrder))
+    return;
   int dimIndex = getLaneIdIndex(reductionOrder);
   int dimType = reductionOrder[dimIndex];
   int offset{0};
   switch (dimType) {
-    case DimType::LaneIdX:
-      offset = 1;
-      break;
-    case DimType::LaneIdY:
-      offset = layout.shape[DimType::LaneIdX];
-      break;
-    case DimType::LaneIdZ:
-      offset = layout.shape[DimType::LaneIdX] * layout.shape[DimType::LaneIdY];
-      break;
+  case DimType::LaneIdX:
+    offset = 1;
+    break;
+  case DimType::LaneIdY:
+    offset = layout.shape[DimType::LaneIdX];
+    break;
+  case DimType::LaneIdZ:
+    offset = layout.shape[DimType::LaneIdX] * layout.shape[DimType::LaneIdY];
+    break;
   }
 
   bodyType loopBody = [&](std::array<int, DimType::NumDims> &state) {
@@ -855,7 +888,8 @@
                                               SmallVector<int64_t>{index});
       if (!isFP32) {
         index = !index;
-        if (index) return;
+        if (index)
+          return;
       }
       result = !result ? tmp
                        : makeArithReduction(rewriter, loc, combiningKind,
@@ -899,7 +933,8 @@
     iterate(0, reductionOrder, state, layout, broadcastResult);
 
     // Reset reduction state
-    for (int type : reductionOrder) state[type] = 0;
+    for (int type : reductionOrder)
+      state[type] = 0;
   };
 
   std::array<int, DimType::NumDims> state;
@@ -1005,7 +1040,8 @@
   auto loop = cast<scf::ForOp>(yieldOp->getParentOp());
   auto yieldOperands = llvm::to_vector(yieldOp.getOperands());
   for (const auto &operand : llvm::enumerate(yieldOp.getOperands())) {
-    if (!simdToSimtMap.count(operand.value())) continue;
+    if (!simdToSimtMap.count(operand.value()))
+      continue;
     // Replace the yield of old value with the for op argument to make it easier
     // to remove the dead code.
     yieldOperands[operand.index()] = loop.getIterOperands()[operand.index()];
@@ -1023,10 +1059,12 @@
   OpBuilder::InsertionGuard g(rewriter);
   rewriter.setInsertionPoint(constantOp);
   Value constant = constantOp.getResult();
-  if (!layoutMap.count(constant)) return;
+  if (!layoutMap.count(constant))
+    return;
   auto attr = llvm::cast<DenseElementsAttr>(constantOp.getValue());
   // Only handle splat values for now
-  if (!attr.isSplat()) return;
+  if (!attr.isSplat())
+    return;
   Layout layout = layoutMap.at(constant);
   Type elementType =
       llvm::cast<VectorType>(constant.getType()).getElementType();
@@ -1046,13 +1084,16 @@
                                   DenseMap<Value, Value> &simdToSimtMap,
                                   RewriterBase &rewriter,
                                   llvm::SetVector<Operation *> &ops) {
-  if (!OpTrait::hasElementwiseMappableTraits(op)) return;
-  if (op->getNumResults() != 1) return;
+  if (!OpTrait::hasElementwiseMappableTraits(op))
+    return;
+  if (op->getNumResults() != 1)
+    return;
   OpBuilder::InsertionGuard g(rewriter);
   rewriter.setInsertionPoint(op);
   SmallVector<Value> newOperands;
   for (auto operand : op->getOperands()) {
-    if (!simdToSimtMap.count(operand)) return;
+    if (!simdToSimtMap.count(operand))
+      return;
     newOperands.push_back(simdToSimtMap.at(operand));
   }
   SmallVector<Type> resultTypes{newOperands.front().getType()};
@@ -1114,10 +1155,12 @@
       numMismatchedVecDims++;
       vecDim = dimType;
     }
-    if (isBatchId(dimType)) batchDim = dimType;
+    if (isBatchId(dimType))
+      batchDim = dimType;
   }
   // Only support single vector mismatched dim
-  if (numMismatchedVecDims > 1) return Value{};
+  if (numMismatchedVecDims > 1)
+    return Value{};
   // Assumes target layout vector dim > current layout vector dim
   int ratio = ((float)targetLayout.shape[vecDim] / currentLayout.shape[vecDim]);
 
@@ -1160,16 +1203,19 @@
   OpBuilder::InsertionGuard guard(rewriter);
   rewriter.setInsertionPoint(op);
   Value source = op.getSource();
-  if (!layoutMap.count(source) || !simdToSimtMap.count(source)) return;
+  if (!layoutMap.count(source) || !simdToSimtMap.count(source))
+    return;
   Layout currentLayout = layoutMap.at(source);
   Value result = op.getResult();
-  if (!layoutMap.count(result)) return;
+  if (!layoutMap.count(result))
+    return;
   Layout targetLayout = layoutMap.at(result);
 
   Value resolvedResult = simdToSimtMap.at(source);
   // For row and col of the vector, resolve layout differences
   for (int i = 0; i < currentLayout.order.size(); i++) {
-    if (!resolvedResult) return;
+    if (!resolvedResult)
+      return;
     // Check which dimension(s) are mismatched.
     SmallVector<int> mismatchedDims;
     for (auto dimType : currentLayout.order[i]) {
@@ -1177,7 +1223,8 @@
         mismatchedDims.push_back(dimType);
       }
     }
-    if (mismatchedDims.empty()) continue;
+    if (mismatchedDims.empty())
+      continue;
     // If any of the mismatched dims are laneId, this layout conflict cannot be
     // resolved.
     if (llvm::any_of(mismatchedDims,
@@ -1223,7 +1270,7 @@
   }
 }
 
-}  // namespace
+} // namespace
 
 static bool isMatmulTransposeB(vector::ContractionOp contractOp) {
   // Set up the parallel/reduction structure in right form.
@@ -1246,7 +1293,8 @@
   DenseMap<Value, Layout> layoutMap;
   funcOp.walk([&](Operation *op) {
     if (auto contractOp = dyn_cast<vector::ContractionOp>(op)) {
-      if (!isMatmulTransposeB(contractOp)) return WalkResult::advance();
+      if (!isMatmulTransposeB(contractOp))
+        return WalkResult::advance();
       Value lhs = contractOp.getLhs();
       Value rhs = contractOp.getRhs();
       Value acc = contractOp.getAcc();
@@ -1311,4 +1359,4 @@
   eraseOps(opsToErase, rewriter);
 }
 
-}  // namespace mlir::iree_compiler
+} // namespace mlir::iree_compiler

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPUUtils.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPUUtils.cpp
index 32b208e..80c8808 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPUUtils.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPUUtils.cpp

@@ -27,7 +27,7 @@
 namespace mlir {
 namespace iree_compiler {
 
-static bool isContiguousStore(Operation* write) {
+static bool isContiguousStore(Operation *write) {
   if (auto transferWrite = dyn_cast<vector::TransferWriteOp>(write)) {
     if (!transferWrite.getPermutationMap().isMinorIdentity() ||
         !transferWrite.isDimInBounds(0) || transferWrite.getMask()) {
@@ -43,7 +43,7 @@
   return false;
 }
 
-static bool isContiguousRead(Operation* read) {
+static bool isContiguousRead(Operation *read) {
   if (auto transferRead = dyn_cast<vector::TransferReadOp>(read)) {
     if (!transferRead.isDimInBounds(0) ||
         !transferRead.getPermutationMap().isMinorIdentity()) {
@@ -59,7 +59,7 @@
   return false;
 }
 
-static Value getMemrefOperand(Operation* op) {
+static Value getMemrefOperand(Operation *op) {
   if (auto transferWrite = dyn_cast<vector::TransferWriteOp>(op)) {
     return transferWrite.getSource();
   }
@@ -79,9 +79,10 @@
   vector::CreateMaskOp maskOp;
   vector::ExtractOp maybeExtractOp;
 };
-static MaskResult getMask(Operation* op) {
+static MaskResult getMask(Operation *op) {
   auto transferRead = dyn_cast<vector::TransferReadOp>(op);
-  if (!transferRead || !transferRead.getMask()) return MaskResult{};
+  if (!transferRead || !transferRead.getMask())
+    return MaskResult{};
   vector::ExtractOp maybeExtractOp =
       transferRead.getMask().getDefiningOp<vector::ExtractOp>();
   auto maskOp =
@@ -105,9 +106,10 @@
   return MaskResult{maskOp, maybeExtractOp};
 }
 
-static Value getMaskValue(RewriterBase& rewriter, Operation* op) {
+static Value getMaskValue(RewriterBase &rewriter, Operation *op) {
   MaskResult maskResult = getMask(op);
-  if (!maskResult.maskOp) return Value();
+  if (!maskResult.maskOp)
+    return Value();
   Value count = maskResult.maskOp->getOperands().back();
   vector::ExtractOp maybeExtractOp = maskResult.maybeExtractOp;
   if (maybeExtractOp) {
@@ -126,7 +128,7 @@
   return count;
 }
 
-static Value getValueStored(Operation* writeOp) {
+static Value getValueStored(Operation *writeOp) {
   if (auto transferWrite = dyn_cast<vector::TransferWriteOp>(writeOp)) {
     return transferWrite.getValue();
   }
@@ -136,7 +138,7 @@
   return Value();
 }
 
-static Operation::operand_range getIndices(Operation* op) {
+static Operation::operand_range getIndices(Operation *op) {
   if (auto vectorReadOp = dyn_cast<vector::LoadOp>(op))
     return vectorReadOp.getIndices();
   if (auto vectorStoreOp = dyn_cast<vector::StoreOp>(op))
@@ -185,13 +187,14 @@
   return true;
 }
 
-void createAsyncGroups(RewriterBase& rewriter, func::FuncOp funcOp,
+void createAsyncGroups(RewriterBase &rewriter, func::FuncOp funcOp,
                        bool useMMASync) {
   LDBG("Start asyncGroups: useMMASync=" << useMMASync);
-  llvm::SmallSetVector<Operation*, 16> copyToSharedMem;
+  llvm::SmallSetVector<Operation *, 16> copyToSharedMem;
   // Look for all the copy that can be converted to async copy ops.
-  funcOp.walk([&](Operation* writeOp) {
-    if (!isContiguousStore(writeOp)) return WalkResult::advance();
+  funcOp.walk([&](Operation *writeOp) {
+    if (!isContiguousStore(writeOp))
+      return WalkResult::advance();
     LDBG("--candidate writeOp: " << *writeOp);
     Value vectorVal = getValueStored(writeOp);
     if (llvm::cast<VectorType>(vectorVal.getType()).getRank() != 1) {
@@ -204,7 +207,7 @@
       LDBG("----address space is not workgroup -> Skip");
       return WalkResult::advance();
     }
-    Operation* readOp = vectorVal.getDefiningOp();
+    Operation *readOp = vectorVal.getDefiningOp();
     if (readOp == nullptr || !isContiguousRead(readOp)) {
       LDBG("----no contiguous readOp defining the writeOp -> Skip");
       return WalkResult::advance();
@@ -245,13 +248,13 @@
   });
 
   while (!copyToSharedMem.empty()) {
-    SmallVector<Operation*> group;
-    Operation* writeOp = *copyToSharedMem.begin();
+    SmallVector<Operation *> group;
+    Operation *writeOp = *copyToSharedMem.begin();
     LDBG("--START a group from: " << *writeOp);
     // Start a group with the first write.
     copyToSharedMem.remove(writeOp);
     group.push_back(writeOp);
-    Operation* nextNode = writeOp;
+    Operation *nextNode = writeOp;
     // Look in the next nodes for more copies to add to the same group.
     while ((nextNode = nextNode->getNextNode())) {
       // Ignore ops without side effects
@@ -261,7 +264,7 @@
         continue;
       // ignore read from a different address space.
       if (isa<vector::TransferReadOp, vector::LoadOp>(nextNode)) {
-        Operation* readOp = nextNode;
+        Operation *readOp = nextNode;
         Value memrefOperand = getMemrefOperand(readOp);
         if (!hasSharedMemoryAddressSpace(
                 llvm::cast<MemRefType>(memrefOperand.getType()))) {
@@ -280,12 +283,12 @@
     }
     // emit the group.
     SmallVector<Value> tokens;
-    for (Operation* writeOp : group) {
+    for (Operation *writeOp : group) {
       rewriter.setInsertionPoint(writeOp);
       Value vectorVal = getValueStored(writeOp);
       auto vectorType = llvm::cast<VectorType>(vectorVal.getType());
       int64_t numElements = vectorType.getNumElements();
-      Operation* readOp = vectorVal.getDefiningOp();
+      Operation *readOp = vectorVal.getDefiningOp();
       Value storeBase = getMemrefOperand(writeOp);
       Value loadBase = getMemrefOperand(readOp);
       Value mask = getMaskValue(rewriter, readOp);
@@ -304,15 +307,16 @@
     rewriter.create<nvgpu::DeviceAsyncWaitOp>(funcOp.getLoc(), groupToken,
                                               nullptr);
     // Clean up old stores.
-    for (Operation* writeOp : group) rewriter.eraseOp(writeOp);
+    for (Operation *writeOp : group)
+      rewriter.eraseOp(writeOp);
   }
 }
 
-void reorderTranspose(RewriterBase& rewriter, func::FuncOp funcOp) {
+void reorderTranspose(RewriterBase &rewriter, func::FuncOp funcOp) {
   SmallVector<vector::TransposeOp> transposeOps;
-  funcOp.walk([&](Operation* op) {
+  funcOp.walk([&](Operation *op) {
     if (auto transposeOp = dyn_cast<vector::TransposeOp>(op)) {
-      Operation* definingOp = transposeOp.getVector().getDefiningOp();
+      Operation *definingOp = transposeOp.getVector().getDefiningOp();
       if (OpTrait::hasElementwiseMappableTraits(definingOp)) {
         transposeOps.push_back(transposeOp);
       }
@@ -322,7 +326,7 @@
 
   for (auto transposeOp : transposeOps) {
     OpBuilder::InsertionGuard g(rewriter);
-    Operation* op = transposeOp.getVector().getDefiningOp();
+    Operation *op = transposeOp.getVector().getDefiningOp();
     rewriter.setInsertionPoint(op);
     SmallVector<int64_t> perm;
     transposeOp.getTransp(perm);
@@ -333,12 +337,12 @@
       transposedOperands.push_back(transposed);
     }
     SmallVector<Type> resultTypes{transposedOperands.front().getType()};
-    Operation* newOp =
+    Operation *newOp =
         rewriter.create(op->getLoc(), op->getName().getIdentifier(),
                         transposedOperands, resultTypes, op->getAttrs());
     rewriter.replaceAllUsesWith(transposeOp.getResult(), newOp->getResult(0));
   }
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPUUtils.h b/compiler/src/iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPUUtils.h
index 31c4b1c..2c96986 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPUUtils.h
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPUUtils.h

@@ -25,7 +25,7 @@
 /// Function to reorder transposes and elementwise ops.
 void reorderTranspose(RewriterBase &rewriter, func::FuncOp funcOp);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
 #endif

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/Verifiers.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/Verifiers.cpp
index e1a4541..315f4f2 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/Verifiers.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/Verifiers.cpp

@@ -35,47 +35,48 @@
     Operation *op, IREE::Codegen::DispatchLoweringPassPipelineAttr pipeline,
     Type inputElementType, SmallVector<int64_t> &instructionShape) {
   switch (pipeline.getValue()) {
-    case IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUMatmulSimt:
-      // SIMT Pipeline / CUDA Cores
-      instructionShape = {1, 1, 1};
-      break;
-    case IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUMatmulTensorCore:
-      // Tensor Core Pipeline / WMMA API
-      if (inputElementType.isF16() || inputElementType.isBF16()) {
-        instructionShape = {16, 16, 16};
-      } else if (inputElementType.isF32()) {
-        instructionShape = {16, 16, 8};
-      } else {
-        return op->emitError(
-            "Expected f16, bf16 or f32 for Tensor Core (WMMA) pipeline");
-      }
-      break;
-    case IREE::Codegen::DispatchLoweringPassPipeline::
-        LLVMGPUMatmulTensorCoreMmaSync:
-      // Tensor Core Pipeline / MMA.SYNC
-      if (inputElementType.isF16() || inputElementType.isBF16()) {
-        instructionShape = {16, 8, 16};
-      } else if (inputElementType.isF32()) {
-        instructionShape = {16, 8, 8};
-      } else {
-        return op->emitError(
-            "Expected f16, bf16 or f32 for Tensor Core (MMA.SYNC) pipeline");
-      }
-      break;
-    default:
+  case IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUMatmulSimt:
+    // SIMT Pipeline / CUDA Cores
+    instructionShape = {1, 1, 1};
+    break;
+  case IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUMatmulTensorCore:
+    // Tensor Core Pipeline / WMMA API
+    if (inputElementType.isF16() || inputElementType.isBF16()) {
+      instructionShape = {16, 16, 16};
+    } else if (inputElementType.isF32()) {
+      instructionShape = {16, 16, 8};
+    } else {
       return op->emitError(
-          "Expected matmul SIMT, TensorCore(WMMA), or TensorCore(MMA.SYNC), "
-          "compilation pipeline");
+          "Expected f16, bf16 or f32 for Tensor Core (WMMA) pipeline");
+    }
+    break;
+  case IREE::Codegen::DispatchLoweringPassPipeline::
+      LLVMGPUMatmulTensorCoreMmaSync:
+    // Tensor Core Pipeline / MMA.SYNC
+    if (inputElementType.isF16() || inputElementType.isBF16()) {
+      instructionShape = {16, 8, 16};
+    } else if (inputElementType.isF32()) {
+      instructionShape = {16, 8, 8};
+    } else {
+      return op->emitError(
+          "Expected f16, bf16 or f32 for Tensor Core (MMA.SYNC) pipeline");
+    }
+    break;
+  default:
+    return op->emitError(
+        "Expected matmul SIMT, TensorCore(WMMA), or TensorCore(MMA.SYNC), "
+        "compilation pipeline");
   }
   return success();
 }
 
 /// Verifies launch configuration for matmul and batchmatmul on a GPU for CUDA
 /// and Tensor Core pipelines.
-LogicalResult verifyGPUMatmulPipeline(
-    Operation *op, IREE::Codegen::LoweringConfigAttr loweringConfig,
-    IREE::Codegen::TranslationInfoAttr translationInfo,
-    ArrayRef<int64_t> workgroupSize) {
+LogicalResult
+verifyGPUMatmulPipeline(Operation *op,
+                        IREE::Codegen::LoweringConfigAttr loweringConfig,
+                        IREE::Codegen::TranslationInfoAttr translationInfo,
+                        ArrayRef<int64_t> workgroupSize) {
   // Only verify batched and unbatched matmul.
   if (!isa<linalg::MatmulOp, linalg::BatchMatmulOp>(op)) {
     return success();
@@ -228,5 +229,5 @@
   return success();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/PassDetail.h b/compiler/src/iree/compiler/Codegen/PassDetail.h
index 5119b35..fea75f0 100644
--- a/compiler/src/iree/compiler/Codegen/PassDetail.h
+++ b/compiler/src/iree/compiler/Codegen/PassDetail.h

@@ -20,7 +20,7 @@
 #define GEN_PASS_CLASSES
 #include "iree/compiler/Codegen/Passes.h.inc"
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CONVERSIONS_PASS_DETAIL_H_
+#endif // IREE_COMPILER_CONVERSIONS_PASS_DETAIL_H_

diff --git a/compiler/src/iree/compiler/Codegen/Passes.cpp b/compiler/src/iree/compiler/Codegen/Passes.cpp
index 2656a06..102984a 100644
--- a/compiler/src/iree/compiler/Codegen/Passes.cpp
+++ b/compiler/src/iree/compiler/Codegen/Passes.cpp

@@ -26,7 +26,7 @@
 namespace {
 #define GEN_PASS_REGISTRATION
 #include "iree/compiler/Codegen/Passes.h.inc"
-}  // namespace
+} // namespace
 
 void registerCodegenPasses() {
   // Generated.
@@ -85,5 +85,5 @@
   passManager.addNestedPass<func::FuncOp>(createRematerializeParallelOpsPass());
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Passes.h b/compiler/src/iree/compiler/Codegen/Passes.h
index a6c23c8..3b47fa8 100644
--- a/compiler/src/iree/compiler/Codegen/Passes.h
+++ b/compiler/src/iree/compiler/Codegen/Passes.h

@@ -13,7 +13,7 @@
 /// Registers all conversion passes in this directory.
 void registerCodegenPasses();
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_PASSES_H_
+#endif // IREE_COMPILER_CODEGEN_PASSES_H_

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/AMDConfig.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/AMDConfig.cpp
index d1b0b68..227ab51 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/AMDConfig.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/AMDConfig.cpp

@@ -86,7 +86,8 @@
     // Use the result type in case of larger bitwidth for accumulators.
     auto type = cast<ShapedType>(convOp->getResult(0).getType());
     const int bitwidth = type.getElementTypeBitWidth();
-    if (bitwidth > 32) return failure();
+    if (bitwidth > 32)
+      return failure();
     const int multipler = 32 / bitwidth;
     bool hasPaddedInput = convOp.image().getDefiningOp<tensor::PadOp>();
     const int bestTilingFactor = (hasPaddedInput ? 16 : 32) * multipler;
@@ -97,6 +98,6 @@
   return failure();
 }
 
-}  // namespace detail
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace detail
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/AdrenoConfig.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/AdrenoConfig.cpp
index 79c53d7..0747513 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/AdrenoConfig.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/AdrenoConfig.cpp

@@ -54,7 +54,8 @@
     // Use the result type in case of larger bitwidth for accumulators.
     auto type = cast<ShapedType>(convOp->getResult(0).getType());
     const int bitwidth = type.getElementTypeBitWidth();
-    if (bitwidth > 32) return failure();
+    if (bitwidth > 32)
+      return failure();
     const int multipler = 32 / bitwidth;
     linalg::detail::ConvolutionDimensions convDims;
     linalg::detail::isConvolutionInterfaceImpl(rootOp, &convDims);
@@ -66,6 +67,6 @@
   return failure();
 }
 
-}  // namespace detail
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace detail
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/AppleConfig.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/AppleConfig.cpp
index a9c357c..7f0cd9d 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/AppleConfig.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/AppleConfig.cpp

@@ -54,7 +54,8 @@
     // Use the result type in case of larger bitwidth for accumulators.
     auto type = cast<ShapedType>(convOp->getResult(0).getType());
     const int bitwidth = type.getElementTypeBitWidth();
-    if (bitwidth > 32) return failure();
+    if (bitwidth > 32)
+      return failure();
     const int multipler = 32 / bitwidth;
     const int bestTilingFactor = 16 * multipler;
     return setConvOpConfig(cast<linalg::LinalgOp>(rootOp), subgroupSize,
@@ -64,6 +65,6 @@
   return failure();
 }
 
-}  // namespace detail
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace detail
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp
index 5a4955c..e9814ca 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp

@@ -82,14 +82,15 @@
   OpBuilder builder(moduleOp.getContext());
   auto variable =
       builder.create<spirv::GlobalVariableOp>(loc, type, name, set, binding);
-  if (alias) variable->setAttr("aliased", builder.getUnitAttr());
+  if (alias)
+    variable->setAttr("aliased", builder.getUnitAttr());
   symbolTable->insert(variable, moduleOp.getBody()->begin());
   return variable;
 }
 
 /// Returns the (set, binding) pair for the given interface op.
-std::pair<int32_t, int32_t> getInterfaceSetAndBinding(
-    IREE::HAL::InterfaceBindingSubspanOp op) {
+std::pair<int32_t, int32_t>
+getInterfaceSetAndBinding(IREE::HAL::InterfaceBindingSubspanOp op) {
   return {op.getSet().getSExtValue(), op.getBinding().getSExtValue()};
 }
 
@@ -114,7 +115,8 @@
 
     func.walk([&](Operation *op) {
       auto subspanOp = dyn_cast<IREE::HAL::InterfaceBindingSubspanOp>(op);
-      if (!subspanOp || subspanOp.use_empty()) return;
+      if (!subspanOp || subspanOp.use_empty())
+        return;
       subspanOps.emplace_back(subspanOp);
       setBindings.emplace_back(getInterfaceSetAndBinding(subspanOp));
       setBindingTypes[setBindings.back()].insert(subspanOp.getType());
@@ -158,7 +160,7 @@
   return interfaceToResourceVars;
 }
 
-}  // namespace
+} // namespace
 
 //===----------------------------------------------------------------------===//
 // Conversion patterns
@@ -171,9 +173,9 @@
     : public OpConversionPattern<IREE::HAL::InterfaceConstantLoadOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      IREE::HAL::InterfaceConstantLoadOp loadOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::InterfaceConstantLoadOp loadOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // TODO(#1519): this conversion should look up the entry point information
     // to get the total push constant count.
     auto variantOp = loadOp->getParentOfType<IREE::HAL::ExecutableVariantOp>();
@@ -204,9 +206,9 @@
     : public OpConversionPattern<InterfaceOpTy> {
   using OpConversionPattern<InterfaceOpTy>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      InterfaceOpTy op, typename InterfaceOpTy::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(InterfaceOpTy op, typename InterfaceOpTy::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     int32_t index = static_cast<int32_t>(op.getDimension().getSExtValue());
     auto i32Type = rewriter.getIntegerType(32);
     Value spirvBuiltin =
@@ -240,9 +242,10 @@
       : OpConversionPattern(typeConverter, context, benefit),
         interfaceToResourceVars(interfaceToResourceVars) {}
 
-  LogicalResult matchAndRewrite(
-      IREE::HAL::InterfaceBindingSubspanOp subspanOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::InterfaceBindingSubspanOp subspanOp,
+                  OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (subspanOp.use_empty()) {
       rewriter.eraseOp(subspanOp);
       return success();
@@ -270,7 +273,7 @@
     return success();
   }
 
- private:
+private:
   const InterfaceResourceMap &interfaceToResourceVars;
 };
 
@@ -278,9 +281,9 @@
 template <typename OpTy>
 struct FoldAsNoOp final : public OpConversionPattern<OpTy> {
   using OpConversionPattern<OpTy>::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      OpTy op, typename OpTy::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(OpTy op, typename OpTy::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOp(op, adaptor.getOperands());
     return success();
   }
@@ -308,9 +311,9 @@
 struct RemoveIdentityConversionCast final
     : public OpConversionPattern<UnrealizedConversionCastOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      UnrealizedConversionCastOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(UnrealizedConversionCastOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (op->getNumOperands() == 1 && op->getNumResults() == 1 &&
         adaptor.getOperands().front().getType() ==
             op->getResultTypes().front()) {
@@ -332,7 +335,7 @@
 /// GPU processor ID ops into SPIR-V global variables, loop/standard ops into
 /// corresponding SPIR-V ops.
 class ConvertToSPIRVPass : public ConvertToSPIRVBase<ConvertToSPIRVPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<spirv::SPIRVDialect>();
   }
@@ -341,7 +344,8 @@
       : enableFastMath(enableFastMath), indexBits(indexBits) {}
 
   LogicalResult initializeOptions(StringRef options) override {
-    if (failed(Pass::initializeOptions(options))) return failure();
+    if (failed(Pass::initializeOptions(options)))
+      return failure();
     // Use pass option if present.
     enableFastMath |= enableFastMathOption;
     indexBits = indexBitsOption;
@@ -350,14 +354,14 @@
 
   void runOnOperation() override;
 
- private:
+private:
   // Enable fast math when doing type conversion by assuming no NaN or infinite
   // values.
   bool enableFastMath;
   // Use 64 bits for index widths.
   unsigned indexBits;
 };
-}  // namespace
+} // namespace
 
 void ConvertToSPIRVPass::runOnOperation() {
   MLIRContext *context = &getContext();
@@ -367,11 +371,13 @@
       getAllEntryPoints(moduleOp);
   for (auto funcOp : moduleOp.getOps<func::FuncOp>()) {
     auto exportOp = exportOps.lookup(funcOp.getName());
-    if (!exportOp) continue;
+    if (!exportOp)
+      continue;
     // TODO(ravishankarm): This needs to be removed after ConvertToGPU is
     // deprecated. All passes must set the `workgroup_size` on the
     // `hal.executable.export` directly and not on the function.
-    if (funcOp->hasAttr(spirv::getEntryPointABIAttrName())) continue;
+    if (funcOp->hasAttr(spirv::getEntryPointABIAttrName()))
+      continue;
     SmallVector<int64_t> workgroupSize = getWorkgroupSize(exportOp);
     if (workgroupSize.empty()) {
       exportOp.emitOpError(
@@ -382,7 +388,8 @@
     auto workgroupSize32 = llvm::map_to_vector(
         workgroupSize, [](int64_t v) { return static_cast<int32_t>(v); });
     std::optional<int> subgroupSize32;
-    if (subgroupSize) subgroupSize32 = *subgroupSize;
+    if (subgroupSize)
+      subgroupSize32 = *subgroupSize;
     funcOp->setAttr(
         spirv::getEntryPointABIAttrName(),
         spirv::getEntryPointABIAttr(context, workgroupSize32, subgroupSize32));
@@ -494,7 +501,8 @@
 
   SmallVector<func::FuncOp, 1> functions;
   for (func::FuncOp fn : moduleOp.getOps<func::FuncOp>()) {
-    if (!fn.isPublic()) continue;
+    if (!fn.isPublic())
+      continue;
     functions.push_back(fn);
   }
 
@@ -514,8 +522,10 @@
   Dialect *spvDialect = spvModule->getDialect();
   for (Operation &op : llvm::make_early_inc_range(*moduleOp.getBody())) {
     // Skip the newly created spirv.module itself.
-    if (&op == spvModule) continue;
-    if (op.getDialect() == spvDialect) op.moveBefore(body, body->end());
+    if (&op == spvModule)
+      continue;
+    if (op.getDialect() == spvDialect)
+      op.moveBefore(body, body->end());
   }
 }
 
@@ -523,10 +533,10 @@
 // Pass entry point and registration
 //===----------------------------------------------------------------------===//
 
-std::unique_ptr<OperationPass<ModuleOp>> createConvertToSPIRVPass(
-    bool enableFastMath, unsigned indexBits) {
+std::unique_ptr<OperationPass<ModuleOp>>
+createConvertToSPIRVPass(bool enableFastMath, unsigned indexBits) {
   return std::make_unique<ConvertToSPIRVPass>(enableFastMath, indexBits);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/KernelConfig.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/KernelConfig.cpp
index eea389d..22b469f 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/KernelConfig.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/KernelConfig.cpp

@@ -65,7 +65,8 @@
 // Check if the given linalg op is fused with another op that may result
 // in too much shared memory usage.
 static bool fusedOpMayUseExtraSharedMemory(linalg::LinalgOp matmul) {
-  if (matmul->getNumResults() != 1) return true;
+  if (matmul->getNumResults() != 1)
+    return true;
 
   func::FuncOp entryPoint = matmul->getParentOfType<func::FuncOp>();
 
@@ -115,7 +116,8 @@
       // Handle `vectorSize` elements per thread for the innermost dimension.
       // We need this for the best utilization of memory.
       chosenTileSize = vectorSize;
-      if (inputDim % (dim * chosenTileSize) != 0) continue;
+      if (inputDim % (dim * chosenTileSize) != 0)
+        continue;
     } else {
       for (int64_t t = residualTilingFactor; t >= 1; t >>= 1)
         if (inputDim % (dim * t) == 0) {
@@ -177,7 +179,8 @@
   // Restrict to pure 4-D input/output shapes for now. This excludes convolution
   // ops with 1- or 3-D window sizes. It also excludes 2-D-window convolution
   // ops like `linalg.depthwise_conv_2d_nhwc_hwcm`.
-  if (inputShape.size() != 4 || outputShape.size() != 4) return failure();
+  if (inputShape.size() != 4 || outputShape.size() != 4)
+    return failure();
 
   linalg::detail::ConvolutionDimensions convDims;
   linalg::detail::isConvolutionInterfaceImpl(linalgOp, &convDims);
@@ -241,7 +244,8 @@
 
   // We use `vectorSize` as the tile size along IC dimension. If smaller than
   // 4, it will be unrolled into size 1.
-  if (ic && !(*ic % vectorSize == 0 || *ic < 4)) return failure();
+  if (ic && !(*ic % vectorSize == 0 || *ic < 4))
+    return failure();
 
   // The core idea is to distribute the convolution dimensions to the workgroup
   // Z/Y/X dimensions, with each thread in a workgroup handling multiple vector
@@ -251,7 +255,7 @@
   int64_t residualThreads = subgroupSize;
   int64_t residualTilingFactor = bestTilingFactor;
 
-  SmallVector<int64_t, 3> workgroupSize(3, 1);  // (X, Y, Z)
+  SmallVector<int64_t, 3> workgroupSize(3, 1); // (X, Y, Z)
   SmallVector<int64_t> workgroupTileSizes(4, 0);
 
   const bool isNCHW = ocIndex < ohIndex;
@@ -298,7 +302,7 @@
   }
 
   SmallVector<int64_t> threadTileSizes(4, 0);
-  threadTileSizes[0] = 1;  // Tile along the N dimension with size 1
+  threadTileSizes[0] = 1; // Tile along the N dimension with size 1
   for (int i = 1; i <= 3; ++i) {
     threadTileSizes[i] = workgroupTileSizes[i] / workgroupSize[3 - i];
   }
@@ -331,7 +335,7 @@
                                                pipeline, workgroupSize);
 }
 
-}  // namespace detail
+} // namespace detail
 
 //===----------------------------------------------------------------------===//
 // Matmul Default Configuration
@@ -347,12 +351,14 @@
   auto lhsShape = llvm::cast<ShapedType>(lhs->get().getType()).getShape();
   auto rhsShape = llvm::cast<ShapedType>(rhs->get().getType()).getShape();
 
-  auto lhsLoopIndices = llvm::map_to_vector(
-      llvm::seq<int>(0, lhsShape.size()),
-      [&](int i) { return op.getMatchingIndexingMap(lhs).getDimPosition(i); });
-  auto rhsLoopIndices = llvm::map_to_vector(
-      llvm::seq<int>(0, rhsShape.size()),
-      [&](int i) { return op.getMatchingIndexingMap(rhs).getDimPosition(i); });
+  auto lhsLoopIndices =
+      llvm::map_to_vector(llvm::seq<int>(0, lhsShape.size()), [&](int i) {
+        return op.getMatchingIndexingMap(lhs).getDimPosition(i);
+      });
+  auto rhsLoopIndices =
+      llvm::map_to_vector(llvm::seq<int>(0, rhsShape.size()), [&](int i) {
+        return op.getMatchingIndexingMap(rhs).getDimPosition(i);
+      });
 
   // Figure out what dimension each loop corresponds to.
   int bIndex = -1, mIndex = -1, nIndex = -1, kIndex = -1;
@@ -369,15 +375,18 @@
     } else if (inLHS) {
       // For cases where we have two parallel dimensions only accessed by
       // the LHS, treat the outer one of them as the batch dimension.
-      if (mIndex >= 0 && bIndex < 0) bIndex = mIndex;
+      if (mIndex >= 0 && bIndex < 0)
+        bIndex = mIndex;
       mIndex = i;
     } else if (inRHS) {
       // For cases where we have two parallel dimensions only accessed by
       // the RHS, treat the outer one of them as the batch dimension.
-      if (nIndex >= 0 && bIndex < 0) bIndex = nIndex;
+      if (nIndex >= 0 && bIndex < 0)
+        bIndex = nIndex;
       nIndex = i;
     }
-    if (lastParallelDim) *lastParallelDim = i;
+    if (lastParallelDim)
+      *lastParallelDim = i;
   }
 
   LLVM_DEBUG({
@@ -463,13 +472,15 @@
                      int64_t elementBits, bool promoteC) {
   int64_t paddingBits = detail::bankConflictReductionPaddingBits / elementBits;
   int64_t count = (mTileSize + nTileSize) * (kTileSize + paddingBits);
-  if (promoteC) count += mTileSize * (nTileSize + paddingBits);
+  if (promoteC)
+    count += mTileSize * (nTileSize + paddingBits);
   return (elementBits / 8) * count;
 }
 
 int64_t getMultiBufferMemoryUsage(int64_t singleBufferBytes, unsigned depth,
                                   unsigned storeStage) {
-  if (depth == 0) return singleBufferBytes;
+  if (depth == 0)
+    return singleBufferBytes;
   return singleBufferBytes * (storeStage == 1 ? depth : depth + 1);
 };
 
@@ -481,7 +492,8 @@
                                const int64_t subgroupSize, int64_t vectorSize) {
   const int64_t totalThreads = wgSize[0] * wgSize[1] * wgSize[2];
   LLVM_DEBUG(llvm::dbgs() << "initial total thread = " << totalThreads << "\n");
-  if (totalThreads <= subgroupSize) return false;
+  if (totalThreads <= subgroupSize)
+    return false;
 
   const bool canVectorLoadLHS = canPerformVectorAccessUsingAllThreads(
       {mTileSize, kTileSize}, totalThreads, vectorSize);
@@ -491,7 +503,8 @@
   LLVM_DEBUG(llvm::dbgs() << "RHS vector load: " << canVectorLoadRHS << "\n");
 
   // If we can perform vector load of neither, just don't use shared memory.
-  if (!canVectorLoadLHS && !canVectorLoadRHS) return false;
+  if (!canVectorLoadLHS && !canVectorLoadRHS)
+    return false;
 
   // If we can only perform vector load of one operands, adjust the tiling
   // scheme to see if we can make both work. Increase K to load more data for
@@ -499,12 +512,15 @@
   if (canVectorLoadLHS && !canVectorLoadRHS) {
     for (const int scale : {2, 4}) {
       const int64_t newKTileSize = kTileSize * scale;
-      if (dimMNKSize[2] % newKTileSize != 0) continue;
+      if (dimMNKSize[2] % newKTileSize != 0)
+        continue;
       const int64_t newMTileSize = mTileSize / scale;
       const int64_t newWgMDim = wgSize[1] / scale;
-      if (newMTileSize == 0 || newWgMDim == 0) continue;
+      if (newMTileSize == 0 || newWgMDim == 0)
+        continue;
       const int64_t newCount = wgSize[0] * newWgMDim * wgSize[2];
-      if (newCount <= subgroupSize) continue;
+      if (newCount <= subgroupSize)
+        continue;
       if (!canPerformVectorAccessUsingAllThreads({newMTileSize, newKTileSize},
                                                  newCount, vectorSize) ||
           !canPerformVectorAccessUsingAllThreads({newKTileSize, nTileSize},
@@ -606,19 +622,23 @@
   auto rhsType = llvm::cast<ShapedType>(rhs->get().getType());
   auto elementBits =
       static_cast<int>(lhsType.getElementType().getIntOrFloatBitWidth());
-  if (!llvm::is_contained({8, 16, 32}, elementBits)) return failure();
+  if (!llvm::is_contained({8, 16, 32}, elementBits))
+    return failure();
 
   ArrayRef<int64_t> lhsShape = lhsType.getShape();
   ArrayRef<int64_t> rhsShape = rhsType.getShape();
-  if (llvm::any_of(lhsShape, ShapedType::isDynamic)) return failure();
-  if (llvm::any_of(rhsShape, ShapedType::isDynamic)) return failure();
+  if (llvm::any_of(lhsShape, ShapedType::isDynamic))
+    return failure();
+  if (llvm::any_of(rhsShape, ShapedType::isDynamic))
+    return failure();
 
   assert(llvm::is_contained({2u, 3u}, op.getNumParallelLoops()));
 
   int lastParallelDim = -1;
   const auto [bIndex, mIndex, nIndex, kIndex] =
       getMatmulBMNKIndex(op, &lastParallelDim);
-  if (mIndex < 0 || nIndex < 0 || kIndex < 0) return failure();
+  if (mIndex < 0 || nIndex < 0 || kIndex < 0)
+    return failure();
   const bool isBM = bIndex >= 0;
 
   SmallVector<int64_t> loopRanges = op.getStaticLoopRanges();
@@ -658,11 +678,12 @@
   int64_t residualThreads = bestX * bestY;
   int64_t residualTilingFactor = (bestThreadM + bestThreadK) * bestThreadN;
 
-  SmallVector<int64_t, 3> workgroupSize(3, 1);  // (X, Y, Z)
+  SmallVector<int64_t, 3> workgroupSize(3, 1); // (X, Y, Z)
   SmallVector<int64_t> workgroupTileSizes(numLoops, 0);
   SmallVector<int64_t> reductionTileSizes(numLoops, 0);
 
-  if (isBM) workgroupTileSizes[bIndex] = 1;
+  if (isBM)
+    workgroupTileSizes[bIndex] = 1;
 
   if (!tileMatmulNToWorkgroupX(dimN, bestThreadN, residualThreads, bestX,
                                residualTilingFactor, workgroupSize[0],
@@ -752,14 +773,15 @@
       CodeGenPipeline::SPIRVBaseVectorize, workgroupSize);
 }
 
-}  // namespace detail
+} // namespace detail
 
 //===----------------------------------------------------------------------===//
 // Cooperative Matrix Default Configuration
 //===----------------------------------------------------------------------===//
 
 bool isCooperativeMatrixFusable(linalg::GenericOp genericOp) {
-  if (genericOp.getNumLoops() != genericOp.getNumParallelLoops()) return false;
+  if (genericOp.getNumLoops() != genericOp.getNumParallelLoops())
+    return false;
 
   // Look at fused elementwise ops to make sure they are allowed by the
   // cooperative matrix spec.
@@ -782,7 +804,8 @@
   // classes.
   for (Value input : genericOp.getInputs()) {
     if (llvm::isa<TensorType>(input.getType())) {
-      if (matchPattern(input, m_Constant())) return false;
+      if (matchPattern(input, m_Constant()))
+        return false;
       continue;
     }
 
@@ -792,7 +815,8 @@
       input = subviewOp.getViewSource();
     }
     if (auto toMemrefOp = input.getDefiningOp<bufferization::ToMemrefOp>()) {
-      if (matchPattern(toMemrefOp.getTensor(), m_Constant())) return false;
+      if (matchPattern(toMemrefOp.getTensor(), m_Constant()))
+        return false;
     }
   }
 
@@ -802,46 +826,50 @@
 bool needToPrmoteCForCooperativeMatrix(linalg::LinalgOp matmulOp) {
   assert(matmulOp.hasTensorSemantics());
   Value result = matmulOp.getOperation()->getResult(0);
-  if (!result.hasOneUse()) return true;  // Be conservative.
+  if (!result.hasOneUse())
+    return true; // Be conservative.
   Operation *user = *result.getUsers().begin();
-  if (isa<IREE::Flow::DispatchTensorStoreOp>(user)) return false;
+  if (isa<IREE::Flow::DispatchTensorStoreOp>(user))
+    return false;
   if (auto genericOp = dyn_cast<linalg::GenericOp>(user)) {
     return !isCooperativeMatrixFusable(genericOp);
   }
-  return true;  // Be conservative.
+  return true; // Be conservative.
 }
 
 struct CooperativeMatrixSize {
-  int64_t mSize;       // Native cooperative matrix size along M dimension
-  int64_t nSize;       // Native cooperative matrix size along N dimension
-  int64_t kSize;       // Native cooperative matrix size along K dimension
-  int64_t mWarpCount;  // # subgroups along M dimension
-  int64_t nWarpCount;  // # subgroups along N dimension
-  int64_t mTileCount;  // # tiles per subgroup along M dimension
-  int64_t nTileCount;  // # tiles per subgroup along N dimension
-  int64_t kTileCount;  // # tiles along K dimension
+  int64_t mSize;      // Native cooperative matrix size along M dimension
+  int64_t nSize;      // Native cooperative matrix size along N dimension
+  int64_t kSize;      // Native cooperative matrix size along K dimension
+  int64_t mWarpCount; // # subgroups along M dimension
+  int64_t nWarpCount; // # subgroups along N dimension
+  int64_t mTileCount; // # tiles per subgroup along M dimension
+  int64_t nTileCount; // # tiles per subgroup along N dimension
+  int64_t kTileCount; // # tiles along K dimension
 };
 
 /// Returns the cooperative matrix (M, N, K) sizes that are supported by the
 /// target environment and match the given parameters.
-static std::optional<CooperativeMatrixSize> getCooperativeMatrixSize(
-    spirv::ResourceLimitsAttr resourceLimits,
-    const unsigned numSubgroupsPerWorkgroup,
-    const unsigned numMNTilesPerSubgroup, Type aType, Type bType, Type cType,
-    int64_t m, int64_t n, int64_t k) {
+static std::optional<CooperativeMatrixSize>
+getCooperativeMatrixSize(spirv::ResourceLimitsAttr resourceLimits,
+                         const unsigned numSubgroupsPerWorkgroup,
+                         const unsigned numMNTilesPerSubgroup, Type aType,
+                         Type bType, Type cType, int64_t m, int64_t n,
+                         int64_t k) {
   auto properties = resourceLimits.getCooperativeMatrixPropertiesNv()
                         .getAsRange<spirv::CooperativeMatrixPropertiesNVAttr>();
   for (auto property : properties) {
     if (property.getAType() != aType || property.getBType() != bType ||
         property.getCType() != cType || property.getResultType() != cType ||
         property.getScope().getValue() != spirv::Scope::Subgroup) {
-      continue;  // Cannot use this cooperative matrix configuration
+      continue; // Cannot use this cooperative matrix configuration
     }
 
     const unsigned matmulM = property.getMSize();
     const unsigned matmulN = property.getNSize();
     const unsigned matmulK = property.getKSize();
-    if (m % matmulM != 0 || n % matmulN != 0 || k % matmulK != 0) continue;
+    if (m % matmulM != 0 || n % matmulN != 0 || k % matmulK != 0)
+      continue;
 
     uint64_t nTotalTileCount = n / matmulN;
     uint64_t mTotalTileCount = m / matmulM;
@@ -934,7 +962,8 @@
     return failure();
   }
 
-  if (op.hasDynamicShape()) return failure();
+  if (op.hasDynamicShape())
+    return failure();
 
   Value lhs = op.getDpsInputOperand(0)->get();
   Value rhs = op.getDpsInputOperand(1)->get();
@@ -943,7 +972,8 @@
   int lastParallelDim = -1;
   const auto [bIndex, mIndex, nIndex, kIndex] =
       getMatmulBMNKIndex(op, &lastParallelDim);
-  if (mIndex < 0 || nIndex < 0 || kIndex < 0) return failure();
+  if (mIndex < 0 || nIndex < 0 || kIndex < 0)
+    return failure();
   const bool isBM = bIndex >= 0;
 
   SmallVector<int64_t> loopRanges = op.getStaticLoopRanges();
@@ -971,7 +1001,8 @@
       limits, numSubgroupsPerWorkgroup, numMNTilesPerSubgroup,
       getElementType(lhs), getElementType(rhs), getElementType(init), dimM,
       dimN, dimK);
-  if (!coopMatSize) return failure();
+  if (!coopMatSize)
+    return failure();
 
   auto pipeline = CodeGenPipeline::SPIRVCooperativeMatrixVectorize;
 
@@ -987,18 +1018,21 @@
                                        coopMatSize->mWarpCount, 1};
 
   SmallVector<int64_t> vectorSizes(kIndex + 1, 0);
-  if (isBM) vectorSizes[bIndex] = 1;
+  if (isBM)
+    vectorSizes[bIndex] = 1;
   vectorSizes[mIndex] = coopMatSize->mSize;
   vectorSizes[nIndex] = coopMatSize->nSize;
   vectorSizes[kIndex] = coopMatSize->kSize;
 
   SmallVector<int64_t> subgroupTileSizes(lastParallelDim + 1, 0);
-  if (isBM) subgroupTileSizes[bIndex] = 1;
+  if (isBM)
+    subgroupTileSizes[bIndex] = 1;
   subgroupTileSizes[mIndex] = coopMatSize->mTileCount * vectorSizes[mIndex];
   subgroupTileSizes[nIndex] = coopMatSize->nTileCount * vectorSizes[nIndex];
 
   SmallVector<int64_t> workgroupTileSizes(lastParallelDim + 1, 0);
-  if (isBM) workgroupTileSizes[bIndex] = 1;
+  if (isBM)
+    workgroupTileSizes[bIndex] = 1;
   workgroupTileSizes[mIndex] =
       coopMatSize->mWarpCount * subgroupTileSizes[mIndex];
   workgroupTileSizes[nIndex] =
@@ -1047,7 +1081,7 @@
       workgroupSize, subgroupSize, pipelineDepth, storeStage);
 }
 
-}  // namespace detail
+} // namespace detail
 
 //===----------------------------------------------------------------------===//
 // FFT Default Configuration
@@ -1116,10 +1150,12 @@
   LLVM_DEBUG(llvm::dbgs() << "trying to deduce config as reduction...\n");
   auto funcOp = op->getParentOfType<FunctionOpInterface>();
   auto walkResult = funcOp.walk([](linalg::LinalgOp op) {
-    if (op.hasDynamicShape()) return WalkResult::interrupt();
+    if (op.hasDynamicShape())
+      return WalkResult::interrupt();
     return WalkResult::advance();
   });
-  if (walkResult.wasInterrupted()) return failure();
+  if (walkResult.wasInterrupted())
+    return failure();
 
   // This pipeline eventually generates non-uniform group shuffle ops, which
   // requires special capability.
@@ -1130,7 +1166,8 @@
   op.getReductionDims(reductionDims);
   if (reductionDims.size() != 1 || reductionDims[0] != op.getNumLoops() - 1)
     return failure();
-  if (op.getRegionOutputArgs().size() != 1) return failure();
+  if (op.getRegionOutputArgs().size() != 1)
+    return failure();
 
   // Only support projected permutation for now. This could be extended to
   // projected permutated with broadcast.
@@ -1146,29 +1183,35 @@
     SmallVector<Operation *> combinerOps;
     if (matchReduction(op.getRegionOutputArgs(), i, combinerOps) &&
         combinerOps.size() == 1) {
-      if (foundSingleReductionOutput) return failure();
+      if (foundSingleReductionOutput)
+        return failure();
       foundSingleReductionOutput = true;
       continue;
     }
     if (!op.getMatchingIndexingMap(op.getDpsInitOperand(i)).isIdentity())
       return failure();
   }
-  if (!foundSingleReductionOutput) return failure();
+  if (!foundSingleReductionOutput)
+    return failure();
 
   const int subgroupSize = targetEnv.getResourceLimits().getSubgroupSize();
   std::optional<int64_t> dimSize = op.getStaticLoopRanges()[reductionDims[0]];
-  if (!dimSize || *dimSize % subgroupSize != 0) return failure();
+  if (!dimSize || *dimSize % subgroupSize != 0)
+    return failure();
 
   const Type elementType =
       llvm::cast<ShapedType>(op.getOutputs()[0].getType()).getElementType();
-  if (!elementType.isIntOrFloat()) return failure();
+  if (!elementType.isIntOrFloat())
+    return failure();
   unsigned bitWidth = elementType.getIntOrFloatBitWidth();
   // Reduction distribution only supports 8/16/32 bit types now.
-  if (bitWidth != 32 && bitWidth != 16 && bitWidth != 8) return failure();
+  if (bitWidth != 32 && bitWidth != 16 && bitWidth != 8)
+    return failure();
 
   // Let each thread handle `vectorSize` elements.
   unsigned vectorSize = kMaxVectorNumBits / bitWidth;
-  while ((*dimSize / vectorSize) % subgroupSize != 0) vectorSize /= 2;
+  while ((*dimSize / vectorSize) % subgroupSize != 0)
+    vectorSize /= 2;
 
   // TODO: Add reduction tiling to handle larger reductions.
   const int64_t maxWorkgroupSize =
@@ -1202,8 +1245,8 @@
   reductionTileSizes.push_back(groupSize * vectorSize);
 
   TileSizesListType tileSizes;
-  tileSizes.emplace_back(std::move(workgroupTileSizes));  // Workgroup level
-  tileSizes.emplace_back(std::move(reductionTileSizes));  // reduction level
+  tileSizes.emplace_back(std::move(workgroupTileSizes)); // Workgroup level
+  tileSizes.emplace_back(std::move(reductionTileSizes)); // reduction level
   if (failed(setOpConfigAndEntryPointFnTranslation(
           op->getParentOfType<func::FuncOp>(), op, tileSizes,
           CodeGenPipeline::SPIRVSubgroupReduce, workgroupSize))) {
@@ -1226,7 +1269,8 @@
 /// Returns a small tiling factor for the given reduction `dimSize`.
 /// Returns 0 to avoid tiling.
 static int getReductionTilingFactor(int64_t dimSize) {
-  if (dimSize % 4 == 0) return 4;
+  if (dimSize % 4 == 0)
+    return 4;
 
   // Try to find the smallest prime factor as the tiling factor. As a trade off
   // between generated code size and compilation time, only look at prime
@@ -1234,10 +1278,11 @@
   static constexpr std::array<int, 15> primeNumbers = {
       2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47};
   for (int n : primeNumbers) {
-    if (dimSize % n == 0) return n;
+    if (dimSize % n == 0)
+      return n;
   }
 
-  return 1;  // Otherwise just tile with size 1.
+  return 1; // Otherwise just tile with size 1.
 }
 
 static LogicalResult setDefaultOpConfig(spirv::ResourceLimitsAttr limits,
@@ -1337,8 +1382,10 @@
     if (isa<linalg::GenericOp>(linalgOp.getOperation())) {
       SmallVector<int64_t> ranges = linalgOp.getStaticLoopRanges();
       for (int64_t i = 0, e = workgroupTileSizes.size(); i < e; i++) {
-        if (workgroupTileSizes[i] != 0) break;
-        if (ranges[i] != 1) workgroupTileSizes[i] = 1;
+        if (workgroupTileSizes[i] != 0)
+          break;
+        if (ranges[i] != 1)
+          workgroupTileSizes[i] = 1;
       }
     }
     // Scan from the innermost shape dimension and try to deduce the
@@ -1347,7 +1394,8 @@
     for (auto shapeDim : llvm::reverse(partitionedLoops)) {
       int64_t loopBound = loopBounds[shapeDim];
       // Skip dynamic dimensions.
-      if (ShapedType::isDynamic(loopBound)) continue;
+      if (ShapedType::isDynamic(loopBound))
+        continue;
 
       // Try to find some power of two that can devide the current shape dim
       // size. This vector keeps the candidate tile sizes.
@@ -1370,10 +1418,12 @@
 
       for (int64_t candidate : candidates) {
         if (loopBound % candidate != 0) {
-          if (!lossFactor) continue;
+          if (!lossFactor)
+            continue;
           // Skip this candidate if it causes many threads to be idle.
           int64_t idleThreads = candidate - (loopBound % candidate);
-          if (idleThreads > candidate / *lossFactor) continue;
+          if (idleThreads > candidate / *lossFactor)
+            continue;
         }
         // If the workload is too small and we cannot distribute to more than 2
         // workgroups, try a smaller tile size to increase parallelism.
@@ -1398,7 +1448,8 @@
           assert(numThreads % (candidate / vectorSize) == 0);
           numThreads /= candidate / vectorSize;
         } else {
-          if (wgDim == 0) vectorizable = false;
+          if (wgDim == 0)
+            vectorizable = false;
           threadTileSizes[shapeDim] = 1;
           workgroupSize[wgDim] = candidate;
           assert(numThreads % candidate == 0);
@@ -1409,7 +1460,8 @@
       }
 
       // Stop if we have distributed all threads.
-      if (numThreads == 1) break;
+      if (numThreads == 1)
+        break;
       wgDim++;
     }
     return numThreads;
@@ -1425,7 +1477,8 @@
     int64_t lossFactor = 32;
 
     for (; lossFactor >= 1; lossFactor >>= 1) {
-      if (distributeToThreads(numThreads, lossFactor) == 1) break;
+      if (distributeToThreads(numThreads, lossFactor) == 1)
+        break;
     }
   }
 
@@ -1486,28 +1539,28 @@
   // First try to find a proper CodeGen configuration to tile and vectorize for
   // the current target architecture.
   switch (targetEnv.getVendorID()) {
-    case spirv::Vendor::AMD:
-      if (succeeded(detail::setAMDCodeGenConfig(targetEnv, rootOp)))
-        return success();
-      break;
-    case spirv::Vendor::Apple:
-      if (succeeded(detail::setAppleCodeGenConfig(targetEnv, rootOp)))
-        return success();
-      break;
-    case spirv::Vendor::ARM:
-      if (succeeded(detail::setMaliCodeGenConfig(targetEnv, rootOp)))
-        return success();
-      break;
-    case spirv::Vendor::NVIDIA:
-      if (succeeded(detail::setNVIDIACodeGenConfig(targetEnv, rootOp)))
-        return success();
-      break;
-    case spirv::Vendor::Qualcomm:
-      if (succeeded(detail::setAdrenoCodeGenConfig(targetEnv, rootOp)))
-        return success();
-      break;
-    default:
-      break;
+  case spirv::Vendor::AMD:
+    if (succeeded(detail::setAMDCodeGenConfig(targetEnv, rootOp)))
+      return success();
+    break;
+  case spirv::Vendor::Apple:
+    if (succeeded(detail::setAppleCodeGenConfig(targetEnv, rootOp)))
+      return success();
+    break;
+  case spirv::Vendor::ARM:
+    if (succeeded(detail::setMaliCodeGenConfig(targetEnv, rootOp)))
+      return success();
+    break;
+  case spirv::Vendor::NVIDIA:
+    if (succeeded(detail::setNVIDIACodeGenConfig(targetEnv, rootOp)))
+      return success();
+    break;
+  case spirv::Vendor::Qualcomm:
+    if (succeeded(detail::setAdrenoCodeGenConfig(targetEnv, rootOp)))
+      return success();
+    break;
+  default:
+    break;
   }
 
   // Otherwise fallback to use a default configuration that tiles and
@@ -1527,7 +1580,8 @@
         }
         auto result =
             detail::setMatmulOpConfig(limits, op, workgroupXY, threadMNK);
-        if (succeeded(result)) return success();
+        if (succeeded(result))
+          return success();
 
         // If unsuccessful, try to tile and distribute.
         return setDefaultOpConfig(limits, op);
@@ -1542,7 +1596,8 @@
           const int subgroupSize = 32;
           auto result = detail::setConvOpConfig(cast<linalg::LinalgOp>(*op),
                                                 subgroupSize, bestTilingFactor);
-          if (succeeded(result)) return success();
+          if (succeeded(result))
+            return success();
         }
 
         // If unsuccessful, try to tile and distribute/vectorize.
@@ -1550,7 +1605,8 @@
       })
       .Case<linalg::GenericOp>([&](linalg::GenericOp op) {
         LLVM_DEBUG(llvm::dbgs() << "figuring configuration for generic op\n");
-        if (succeeded(setReductionConfig(targetEnv, op))) return success();
+        if (succeeded(setReductionConfig(targetEnv, op)))
+          return success();
 
         // If a generic op has reduction iterator types, it can be treated as a
         // root op for configuration as well. Use the default configuration,
@@ -1592,7 +1648,8 @@
   Operation *computeOp = computeOps.back();
   spirv::ResourceLimitsAttr limits = targetEnv.getResourceLimits();
   // If there are still no root op, check for any linalg.generic op.
-  if (succeeded(setDefaultOpConfig(limits, computeOp))) return success();
+  if (succeeded(setDefaultOpConfig(limits, computeOp)))
+    return success();
 
   // Check if the op configuration was set.
   return computeOp->emitOpError(
@@ -1613,7 +1670,8 @@
 
   for (auto funcOp : module.getOps<func::FuncOp>()) {
     auto exportOp = exportOps.lookup(funcOp.getName());
-    if (!exportOp) continue;
+    if (!exportOp)
+      continue;
 
     if (failed(setConfigForKernel(targetEnv, exportOp, funcOp))) {
       return failure();
@@ -1623,5 +1681,5 @@
   return success();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/KernelConfig.h b/compiler/src/iree/compiler/Codegen/SPIRV/KernelConfig.h
index f08f19b..d23517d 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/KernelConfig.h
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/KernelConfig.h

@@ -102,7 +102,7 @@
 LogicalResult setNVIDIACodeGenConfig(const spirv::TargetEnv &targetEnv,
                                      Operation *rootOp);
 
-}  // namespace detail
+} // namespace detail
 
 /// Returns true if the given `linalgOp` is a (batch) matmul op.
 bool isMatmulOrBatchMatmul(linalg::LinalgOp linalgOp);
@@ -110,15 +110,15 @@
 /// Given the linalg `op` with `lhsShape` and `rhsShape`, tries to treat as a
 /// (batch) matmul like op and deduce the index of the loop corresponding to
 /// B/M/N/K dimension respectively. Returns -1 as the index if unable to deduce.
-std::tuple<int, int, int, int> getMatmulBMNKIndex(
-    linalg::LinalgOp op, int *lastParallelDim = nullptr);
+std::tuple<int, int, int, int>
+getMatmulBMNKIndex(linalg::LinalgOp op, int *lastParallelDim = nullptr);
 
 /// Attaches the `translation_info` attribute to entry points in `moduleOp` and
 /// `lowering_config` attributes to all root ops in `moduleOp`'s region.
 /// These attributes are used to drive the CodeGen pipeline.
 LogicalResult initSPIRVLaunchConfig(ModuleOp moduleOp);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_SPIRV_KERNELCONFIG_H_
+#endif // IREE_COMPILER_CODEGEN_SPIRV_KERNELCONFIG_H_

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/MaliConfig.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/MaliConfig.cpp
index e2e8f6b..4b251ab 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/MaliConfig.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/MaliConfig.cpp

@@ -56,7 +56,8 @@
     // Use the result type in case of larger bitwidth for accumulators.
     auto type = cast<ShapedType>(convOp->getResult(0).getType());
     const int bitwidth = type.getElementTypeBitWidth();
-    if (bitwidth > 32) return failure();
+    if (bitwidth > 32)
+      return failure();
     const int multipler = 32 / bitwidth;
     bool hasPaddedInput = convOp.image().getDefiningOp<tensor::PadOp>();
     const int bestTilingFactor = (hasPaddedInput ? 8 : 16) * multipler;
@@ -67,6 +68,6 @@
   return failure();
 }
 
-}  // namespace detail
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace detail
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/NVIDIAConfig.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/NVIDIAConfig.cpp
index 9fa5574..1a06506 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/NVIDIAConfig.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/NVIDIAConfig.cpp

@@ -95,6 +95,6 @@
   return failure();
 }
 
-}  // namespace detail
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace detail
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/Passes.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/Passes.cpp
index 16e2321..6b62b94 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/Passes.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/Passes.cpp

@@ -98,7 +98,8 @@
 
   bool needsBarrier = hasSharedMemoryAddressSpace(fromType) ||
                       hasSharedMemoryAddressSpace(toType);
-  if (needsBarrier) builder.create<gpu::BarrierOp>(loc);
+  if (needsBarrier)
+    builder.create<gpu::BarrierOp>(loc);
   Operation *copy = builder.create<memref::CopyOp>(loc, from, to);
   if (needsBarrier) {
     setMarker(copy, getCopyToWorkgroupMemoryMarker());
@@ -138,9 +139,9 @@
   nestedModulePM.addPass(createCSEPass());
 }
 
-static void addSPIRVBufferizePasses(
-    OpPassManager &passManager,
-    BufferizationOptions::AllocationFn allocationFn) {
+static void
+addSPIRVBufferizePasses(OpPassManager &passManager,
+                        BufferizationOptions::AllocationFn allocationFn) {
   // Resolve dim ops first so that we don't have compute Linalg ops lingering on
   // becuase of dim op usage. This avoids bufferizing those compute ops just for
   // their shape dimensions.
@@ -602,5 +603,5 @@
   });
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVAnnotateWinogradLoops.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVAnnotateWinogradLoops.cpp
index 1a809ee..f779b6f 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVAnnotateWinogradLoops.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVAnnotateWinogradLoops.cpp

@@ -15,7 +15,7 @@
 
 class SPIRVAnnotateWinogradLoopsPass final
     : public SPIRVAnnotateWinogradLoopsBase<SPIRVAnnotateWinogradLoopsPass> {
- public:
+public:
   SPIRVAnnotateWinogradLoopsPass() = default;
   SPIRVAnnotateWinogradLoopsPass(const SPIRVAnnotateWinogradLoopsPass &pass) =
       default;
@@ -24,24 +24,26 @@
     func::FuncOp funcOp = getOperation();
     SmallVector<scf::ForOp> forOps;
     funcOp.walk([&](scf::ForOp forOp) {
-      if (!isTiledAndDistributedLoop(forOp)) forOps.push_back(forOp);
+      if (!isTiledAndDistributedLoop(forOp))
+        forOps.push_back(forOp);
     });
 
     MLIRContext *context = &getContext();
     OpBuilder builder(context);
     const char *attrName = getSPIRVDistributeAttrName();
     for (auto [index, forOp] : llvm::enumerate(forOps)) {
-      if (index > kNumGPUDims) break;
+      if (index > kNumGPUDims)
+        break;
       forOp->setAttr(attrName, builder.getIndexAttr(index));
     }
   }
 };
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>>
 createSPIRVAnnotateWinogradLoopsPass() {
   return std::make_unique<SPIRVAnnotateWinogradLoopsPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVBreakDownLargeVector.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVBreakDownLargeVector.cpp
index f57d306..35e9b05 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVBreakDownLargeVector.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVBreakDownLargeVector.cpp

@@ -40,12 +40,12 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>>
 createSPIRVBreakDownLargeVectorPass() {
   return std::make_unique<SPIRVBreakDownLargeVectorPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVCreateFastSlowPath.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVCreateFastSlowPath.cpp
index 244ad51..ce1514b 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVCreateFastSlowPath.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVCreateFastSlowPath.cpp

@@ -55,18 +55,21 @@
   // Find the anchor tensor.pad op, from which we get the conditions for
   // switching between the fast and slow path.
   auto padOps = llvm::to_vector(body->getOps<tensor::PadOp>());
-  if (llvm::size(padOps) != 1) return;
+  if (llvm::size(padOps) != 1)
+    return;
   tensor::PadOp padOp = *padOps.begin();
 
   // If all padding sizes are zero, we don't need to do anything.
   SmallVector<OpFoldResult> lowPads = padOp.getMixedLowPad();
   SmallVector<OpFoldResult> highPads = padOp.getMixedHighPad();
-  if (llvm::all_of(lowPads, isZero) && llvm::all_of(highPads, isZero)) return;
+  if (llvm::all_of(lowPads, isZero) && llvm::all_of(highPads, isZero))
+    return;
 
   IRRewriter rewriter(funcOp.getContext());
   rewriter.setInsertionPoint(body->getTerminator());
   SmallVector<Operation *, 16> allOps;
-  for (Operation &op : body->without_terminator()) allOps.push_back(&op);
+  for (Operation &op : body->without_terminator())
+    allOps.push_back(&op);
 
   BackwardSliceOptions options;
   options.filter = [](Operation *op) { return true; };
@@ -93,7 +96,8 @@
 
   SmallVector<Operation *> cloneOps;
   for (Operation *op : allOps) {
-    if (!padSizeOps.contains(op)) cloneOps.push_back(op);
+    if (!padSizeOps.contains(op))
+      cloneOps.push_back(op);
   }
 
   // Build the scf.if op itself. Clone all ops other than those used for
@@ -113,13 +117,15 @@
   };
   auto elseBuilder = [&](OpBuilder &builder, Location loc) {
     IRMapping bvm;
-    for (Operation *op : cloneOps) builder.clone(*op, bvm);
+    for (Operation *op : cloneOps)
+      builder.clone(*op, bvm);
     builder.create<scf::YieldOp>(loc);
   };
   rewriter.create<scf::IfOp>(padOp.getLoc(), ifCond, thenBuilder, elseBuilder);
 
   // All of these ops have been cloned to both regions. Erease them now.
-  for (Operation *op : llvm::reverse(cloneOps)) rewriter.eraseOp(op);
+  for (Operation *op : llvm::reverse(cloneOps))
+    rewriter.eraseOp(op);
 }
 
 namespace {
@@ -147,12 +153,12 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>>
 createSPIRVCreateFastSlowPathPass() {
   return std::make_unique<SPIRVCreateFastSlowPathPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVDistribute.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVDistribute.cpp
index 7a94686..1866f83 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVDistribute.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVDistribute.cpp

@@ -35,7 +35,8 @@
     // Only distribute if we see the marker attribute.
     auto numDimAttr =
         forOp->getAttrOfType<IntegerAttr>(getSPIRVDistributeAttrName());
-    if (!numDimAttr) return failure();
+    if (!numDimAttr)
+      return failure();
 
     Location loc = forOp.getLoc();
     auto indexType = rewriter.getIndexType();
@@ -82,11 +83,11 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>> createSPIRVDistributePass() {
   return std::make_unique<SPIRVDistributePass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVEmulateI64.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVEmulateI64.cpp
index 328c4ab..7a96fb8 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVEmulateI64.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVEmulateI64.cpp

@@ -46,9 +46,9 @@
     : OpConversionPattern<IREE::HAL::InterfaceBindingSubspanOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      IREE::HAL::InterfaceBindingSubspanOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::InterfaceBindingSubspanOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Type newResultTy = getTypeConverter()->convertType(op.getType());
     if (!newResultTy)
       return rewriter.notifyMatchFailure(
@@ -75,9 +75,11 @@
 // Tries to flatten `type` to a 1-D vector type. Returns `nullptr` on failure.
 static VectorType flattenVectorType(Type type) {
   auto vecTy = llvm::dyn_cast<VectorType>(type);
-  if (!vecTy) return nullptr;
+  if (!vecTy)
+    return nullptr;
 
-  if (vecTy.isScalable() || vecTy.getRank() <= 1) return nullptr;
+  if (vecTy.isScalable() || vecTy.getRank() <= 1)
+    return nullptr;
 
   int64_t totalElements = vecTy.getNumElements();
   return VectorType::get(llvm::ArrayRef(totalElements), vecTy.getElementType());
@@ -102,7 +104,8 @@
 
   LogicalResult matchAndRewrite(Operation *op,
                                 PatternRewriter &rewriter) const override {
-    if (!OpTrait::hasElementwiseMappableTraits(op)) return failure();
+    if (!OpTrait::hasElementwiseMappableTraits(op))
+      return failure();
 
     auto newResultTypes = llvm::to_vector_of<Type, 2>(
         llvm::map_range(op->getResultTypes(), flattenVectorType));
@@ -115,7 +118,8 @@
     auto operands = llvm::to_vector_of<Value, 2>(op->getOperands());
     for (Value &operand : operands) {
       VectorType newOperandTy = flattenVectorType(operand.getType());
-      if (!newOperandTy) return failure();
+      if (!newOperandTy)
+        return failure();
 
       operand = rewriter.createOrFold<vector::ShapeCastOp>(loc, newOperandTy,
                                                            operand);
@@ -140,8 +144,9 @@
 // Helper functions
 //===----------------------------------------------------------------------===//
 
-static void populateIreeI64EmulationPatterns(
-    arith::WideIntEmulationConverter &converter, RewritePatternSet &patterns) {
+static void
+populateIreeI64EmulationPatterns(arith::WideIntEmulationConverter &converter,
+                                 RewritePatternSet &patterns) {
   patterns.add<ConvertHalInterfaceBindingSubspan>(converter,
                                                   patterns.getContext());
 }
@@ -165,7 +170,8 @@
 
   void runOnOperation() override {
     ModuleOp op = getOperation();
-    if (supportsI64(op)) return;
+    if (supportsI64(op))
+      return;
 
     arith::WideIntEmulationConverter typeConverter(32);
     memref::populateMemRefWideIntEmulationConversions(typeConverter);
@@ -216,7 +222,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 //===----------------------------------------------------------------------===//
 // Public interface
@@ -226,5 +232,5 @@
   return std::make_unique<SPIRVEmulateI64Pass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVEraseStorageBufferStaticShape.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVEraseStorageBufferStaticShape.cpp
index dc381a4..7eb3073 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVEraseStorageBufferStaticShape.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVEraseStorageBufferStaticShape.cpp

@@ -36,10 +36,12 @@
 bool is1DStaticShapedStorageBuffer(
     IREE::HAL::InterfaceBindingSubspanOp subspanOp) {
   auto type = llvm::dyn_cast<MemRefType>(subspanOp.getType());
-  if (!type) return false;
+  if (!type)
+    return false;
   auto attr = llvm::dyn_cast_if_present<IREE::HAL::DescriptorTypeAttr>(
       type.getMemorySpace());
-  if (!attr) return false;
+  if (!attr)
+    return false;
   return type.hasStaticShape() && type.getRank() == 1 &&
          attr.getValue() == IREE::HAL::DescriptorType::StorageBuffer;
 }
@@ -58,8 +60,9 @@
 ///  hal.interface.binding.subspan set(0) binding(0) offset(%offset)
 ///      : memref<?xf32>{%c16}
 /// ```
-IREE::HAL::InterfaceBindingSubspanOp rewriteStorageBufferSubspanOp(
-    RewriterBase &rewriter, IREE::HAL::InterfaceBindingSubspanOp subspanOp) {
+IREE::HAL::InterfaceBindingSubspanOp
+rewriteStorageBufferSubspanOp(RewriterBase &rewriter,
+                              IREE::HAL::InterfaceBindingSubspanOp subspanOp) {
   assert(is1DStaticShapedStorageBuffer(subspanOp));
   LLVM_DEBUG({
     llvm::dbgs() << "Rewriting subspan op: ";
@@ -94,7 +97,7 @@
   return newOp;
 }
 
-}  // namespace
+} // namespace
 
 void EraseStorageBufferStaticShapePass::runOnOperation() {
   func::FuncOp funcOp = getOperation();
@@ -131,5 +134,5 @@
   return std::make_unique<EraseStorageBufferStaticShapePass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVLowerExecutableTargetPass.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVLowerExecutableTargetPass.cpp
index 3e51150..71350d2 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVLowerExecutableTargetPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVLowerExecutableTargetPass.cpp

@@ -39,7 +39,7 @@
 /// - then convert to SPIRV dialect.
 class SPIRVLowerExecutableTargetPass
     : public SPIRVLowerExecutableTargetBase<SPIRVLowerExecutableTargetPass> {
- public:
+public:
   SPIRVLowerExecutableTargetPass() = default;
   SPIRVLowerExecutableTargetPass(const SPIRVLowerExecutableTargetPass &pass) {}
 
@@ -55,7 +55,7 @@
 
   void runOnOperation() override;
 
- private:
+private:
   Option<bool> testLoweringConfiguration{
       *this, "test-lowering-configuration",
       llvm::cl::desc("Flag used for lit-testing the configuration set for root "
@@ -63,25 +63,28 @@
                      "to true for lit tests; not for general usage"),
       llvm::cl::init(false)};
 };
-}  // namespace
+} // namespace
 
 /// Verify that valid configuration is set for all ops within the compiled
 /// module.
 template <typename F>
-static LogicalResult verifyLoweringConfiguration(
-    ModuleOp module, IREE::Codegen::TranslationInfoAttr translationInfo,
-    ArrayRef<int64_t> workgroupSize, F verificationFn) {
+static LogicalResult
+verifyLoweringConfiguration(ModuleOp module,
+                            IREE::Codegen::TranslationInfoAttr translationInfo,
+                            ArrayRef<int64_t> workgroupSize, F verificationFn) {
   auto walkResult = module.walk([&](Operation *op) -> WalkResult {
     IREE::Codegen::LoweringConfigAttr loweringConfig = getLoweringConfig(op);
-    if (!loweringConfig) return WalkResult::advance();
+    if (!loweringConfig)
+      return WalkResult::advance();
     return verificationFn(op, loweringConfig, translationInfo, workgroupSize);
   });
   return failure(walkResult.wasInterrupted());
 }
 
-static LogicalResult verifyEntryPoint(
-    ModuleOp moduleOp, IREE::Codegen::TranslationInfoAttr translationInfo,
-    IREE::HAL::ExecutableExportOp exportOp) {
+static LogicalResult
+verifyEntryPoint(ModuleOp moduleOp,
+                 IREE::Codegen::TranslationInfoAttr translationInfo,
+                 IREE::HAL::ExecutableExportOp exportOp) {
   if (translationInfo.getDispatchLoweringPassPipeline() ==
       CodeGenPipeline::TransformDialectCodegen) {
     // Transform dialect encodes configuration into the schedule directly.
@@ -102,20 +105,20 @@
   }
 
   switch (translationInfo.getDispatchLoweringPassPipeline()) {
-    case CodeGenPipeline::SPIRVBaseVectorize:
-      return verifyLoweringConfiguration(moduleOp, translationInfo,
-                                         workgroupSizes,
-                                         verifySPIRVBaseVectorizePassPipeline);
-    case CodeGenPipeline::SPIRVMatmulPromoteVectorize:
-      return verifyLoweringConfiguration(
-          moduleOp, translationInfo, workgroupSizes,
-          verifySPIRVMatmulPromoteVectorizePassPipeline);
-    case CodeGenPipeline::SPIRVCooperativeMatrixVectorize:
-      return verifyLoweringConfiguration(
-          moduleOp, translationInfo, workgroupSizes,
-          verifySPIRVCooperativeMatrixVectorizePassPipeline);
-    default:
-      break;
+  case CodeGenPipeline::SPIRVBaseVectorize:
+    return verifyLoweringConfiguration(moduleOp, translationInfo,
+                                       workgroupSizes,
+                                       verifySPIRVBaseVectorizePassPipeline);
+  case CodeGenPipeline::SPIRVMatmulPromoteVectorize:
+    return verifyLoweringConfiguration(
+        moduleOp, translationInfo, workgroupSizes,
+        verifySPIRVMatmulPromoteVectorizePassPipeline);
+  case CodeGenPipeline::SPIRVCooperativeMatrixVectorize:
+    return verifyLoweringConfiguration(
+        moduleOp, translationInfo, workgroupSizes,
+        verifySPIRVCooperativeMatrixVectorizePassPipeline);
+  default:
+    break;
   }
   return success();
 }
@@ -163,34 +166,34 @@
 
   if (!testLoweringConfiguration && translationInfo.has_value()) {
     switch (translationInfo.value().getDispatchLoweringPassPipeline()) {
-      case CodeGenPipeline::SPIRVBaseDistribute:
-        addSPIRVBaseDistributePassPipeline(pipeline);
-        break;
-      case CodeGenPipeline::SPIRVBaseVectorize:
-        addSPIRVBaseVectorizePassPipeline(pipeline);
-        break;
-      case CodeGenPipeline::SPIRVCooperativeMatrixVectorize:
-        addSPIRVCooperativeMatrixVectorizePassPipeline(
-            pipeline, translationInfo.value().getSoftwarePipelineDepth(),
-            translationInfo.value().getSoftwarePipelineStoreStage());
-        break;
-      case CodeGenPipeline::SPIRVMatmulPromoteVectorize:
-        addSPIRVMatmulPromoteVectorizePassPipeline(
-            pipeline, translationInfo.value().getSoftwarePipelineDepth(),
-            translationInfo.value().getSoftwarePipelineStoreStage());
-        break;
-      case CodeGenPipeline::SPIRVSubgroupReduce:
-        addSPIRVSubgroupReducePassPipeline(pipeline);
-        break;
-      case CodeGenPipeline::SPIRVWinogradVectorize:
-        addSPIRVWinogradVectorizePassPipeline(pipeline);
-        break;
-      case CodeGenPipeline::TransformDialectCodegen:
-        addSPIRVTransformDialectPassPipeline(pipeline);
-        break;
-      default:
-        variantOp.emitOpError("Unsupported pipeline on GPU target.");
-        return signalPassFailure();
+    case CodeGenPipeline::SPIRVBaseDistribute:
+      addSPIRVBaseDistributePassPipeline(pipeline);
+      break;
+    case CodeGenPipeline::SPIRVBaseVectorize:
+      addSPIRVBaseVectorizePassPipeline(pipeline);
+      break;
+    case CodeGenPipeline::SPIRVCooperativeMatrixVectorize:
+      addSPIRVCooperativeMatrixVectorizePassPipeline(
+          pipeline, translationInfo.value().getSoftwarePipelineDepth(),
+          translationInfo.value().getSoftwarePipelineStoreStage());
+      break;
+    case CodeGenPipeline::SPIRVMatmulPromoteVectorize:
+      addSPIRVMatmulPromoteVectorizePassPipeline(
+          pipeline, translationInfo.value().getSoftwarePipelineDepth(),
+          translationInfo.value().getSoftwarePipelineStoreStage());
+      break;
+    case CodeGenPipeline::SPIRVSubgroupReduce:
+      addSPIRVSubgroupReducePassPipeline(pipeline);
+      break;
+    case CodeGenPipeline::SPIRVWinogradVectorize:
+      addSPIRVWinogradVectorizePassPipeline(pipeline);
+      break;
+    case CodeGenPipeline::TransformDialectCodegen:
+      addSPIRVTransformDialectPassPipeline(pipeline);
+      break;
+    default:
+      variantOp.emitOpError("Unsupported pipeline on GPU target.");
+      return signalPassFailure();
     }
   }
 
@@ -210,5 +213,5 @@
   return std::make_unique<SPIRVLowerExecutableTargetPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVMapMemRefStorageClass.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVMapMemRefStorageClass.cpp
index 5640575..226cdba 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVMapMemRefStorageClass.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVMapMemRefStorageClass.cpp

@@ -22,47 +22,47 @@
 namespace iree_compiler {
 namespace {
 
-std::optional<spirv::StorageClass> mapHALDescriptorTypeForVulkan(
-    Attribute attr) {
+std::optional<spirv::StorageClass>
+mapHALDescriptorTypeForVulkan(Attribute attr) {
   if (auto dtAttr =
           llvm::dyn_cast_if_present<IREE::HAL::DescriptorTypeAttr>(attr)) {
     switch (dtAttr.getValue()) {
-      case IREE::HAL::DescriptorType::UniformBuffer:
-        return spirv::StorageClass::Uniform;
-      case IREE::HAL::DescriptorType::StorageBuffer:
-        return spirv::StorageClass::StorageBuffer;
-      default:
-        return std::nullopt;
+    case IREE::HAL::DescriptorType::UniformBuffer:
+      return spirv::StorageClass::Uniform;
+    case IREE::HAL::DescriptorType::StorageBuffer:
+      return spirv::StorageClass::StorageBuffer;
+    default:
+      return std::nullopt;
     }
   }
   if (auto gpuAttr = llvm::dyn_cast_if_present<gpu::AddressSpaceAttr>(attr)) {
     switch (gpuAttr.getValue()) {
-      case gpu::AddressSpace::Workgroup:
-        return spirv::StorageClass::Workgroup;
-      default:
-        return std::nullopt;
+    case gpu::AddressSpace::Workgroup:
+      return spirv::StorageClass::Workgroup;
+    default:
+      return std::nullopt;
     }
   };
   return spirv::mapMemorySpaceToVulkanStorageClass(attr);
 }
 
-std::optional<spirv::StorageClass> mapHALDescriptorTypeForOpenCL(
-    Attribute attr) {
+std::optional<spirv::StorageClass>
+mapHALDescriptorTypeForOpenCL(Attribute attr) {
   if (auto dtAttr =
           llvm::dyn_cast_if_present<IREE::HAL::DescriptorTypeAttr>(attr)) {
     switch (dtAttr.getValue()) {
-      case IREE::HAL::DescriptorType::UniformBuffer:
-        return spirv::StorageClass::Uniform;
-      case IREE::HAL::DescriptorType::StorageBuffer:
-        return spirv::StorageClass::CrossWorkgroup;
+    case IREE::HAL::DescriptorType::UniformBuffer:
+      return spirv::StorageClass::Uniform;
+    case IREE::HAL::DescriptorType::StorageBuffer:
+      return spirv::StorageClass::CrossWorkgroup;
     }
   }
   if (auto gpuAttr = llvm::dyn_cast_if_present<gpu::AddressSpaceAttr>(attr)) {
     switch (gpuAttr.getValue()) {
-      case gpu::AddressSpace::Workgroup:
-        return spirv::StorageClass::Workgroup;
-      default:
-        return std::nullopt;
+    case gpu::AddressSpace::Workgroup:
+      return spirv::StorageClass::Workgroup;
+    default:
+      return std::nullopt;
     }
   };
   return spirv::mapMemorySpaceToOpenCLStorageClass(attr);
@@ -104,12 +104,12 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>>
 createSPIRVMapMemRefStorageClassPass() {
   return std::make_unique<SPIRVMapMemRefStorageClassPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVPasses.h b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVPasses.h
index 9abfc72..1eb7e61 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVPasses.h
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVPasses.h

@@ -56,8 +56,8 @@
 /// This pass converts remaining interface ops into SPIR-V global variables,
 /// GPU processor ID ops into SPIR-V global variables, loop/standard ops into
 /// corresponding SPIR-V ops.
-std::unique_ptr<OperationPass<ModuleOp>> createConvertToSPIRVPass(
-    bool enableFastMath = false, unsigned indexWidth = 32);
+std::unique_ptr<OperationPass<ModuleOp>>
+createConvertToSPIRVPass(bool enableFastMath = false, unsigned indexWidth = 32);
 
 /// Annotates the innermost Winograd loops with the spirv distribute attribute.
 std::unique_ptr<OperationPass<func::FuncOp>>
@@ -107,8 +107,9 @@
 
 /// Pass to promote Linalg ops with buffer semantics to use workgroup memory
 /// and then tile to invocations.
-std::unique_ptr<OperationPass<func::FuncOp>> createSPIRVTileAndPromotePass(
-    bool promoteCMatrix = false, bool skipThreadLevel = false);
+std::unique_ptr<OperationPass<func::FuncOp>>
+createSPIRVTileAndPromotePass(bool promoteCMatrix = false,
+                              bool skipThreadLevel = false);
 
 /// Pass to tile Linalg ops with tensor semantics to invocations.
 std::unique_ptr<OperationPass<func::FuncOp>> createSPIRVTilePass();
@@ -159,7 +160,7 @@
     IREE::Codegen::TranslationInfoAttr translationInfo,
     ArrayRef<int64_t> workgroupSize);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_SPIRV_PASSES_H_
+#endif // IREE_COMPILER_CODEGEN_SPIRV_PASSES_H_

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVTile.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVTile.cpp
index bbbd078..c5bb074 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVTile.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVTile.cpp

@@ -46,8 +46,9 @@
 //===----------------------------------------------------------------------===//
 
 /// Collects computation ops which we will use as anchor to tile and fuse.
-static FailureOr<IREE::Codegen::LoweringConfigAttr> collectComputeOps(
-    func::FuncOp funcOp, SmallVectorImpl<Operation *> &computeOps) {
+static FailureOr<IREE::Codegen::LoweringConfigAttr>
+collectComputeOps(func::FuncOp funcOp,
+                  SmallVectorImpl<Operation *> &computeOps) {
   // If there are `scf.if` ops which have linalg ops, we have both a fast and
   // slow paths for padding handling. Then we need to scan both regions to
   // discover such computation ops so that we can tile and fuse both regions.
@@ -66,7 +67,8 @@
   if (ifOps.empty()) {
     computeOps = getComputeOps(funcOp);
     for (Operation *op : computeOps) {
-      if (auto config = getLoweringConfig(op)) configs.push_back(config);
+      if (auto config = getLoweringConfig(op))
+        configs.push_back(config);
     }
     if (computeOps.size() > 1) {
       // Only keep the last compute ops.
@@ -80,7 +82,8 @@
 
     ifOps.front()->walk([&configs](Operation *op) {
       if (isa<linalg::LinalgOp, TilingInterface>(op)) {
-        if (auto config = getLoweringConfig(op)) configs.push_back(config);
+        if (auto config = getLoweringConfig(op))
+          configs.push_back(config);
       }
     });
 
@@ -297,7 +300,7 @@
 namespace {
 
 class SPIRVTilePass final : public SPIRVTileBase<SPIRVTilePass> {
- public:
+public:
   SPIRVTilePass() = default;
   SPIRVTilePass(const SPIRVTilePass &pass) = default;
 
@@ -309,7 +312,8 @@
     SmallVector<Operation *> computeOps;
     FailureOr<IREE::Codegen::LoweringConfigAttr> loweringConfig =
         collectComputeOps(funcOp, computeOps);
-    if (failed(loweringConfig)) return signalPassFailure();
+    if (failed(loweringConfig))
+      return signalPassFailure();
     assert(computeOps.size() <= 2);
 
     // Now tile the last computation op to invocations and fuse all operand
@@ -346,7 +350,7 @@
 
     concretizePadShape(funcOp);
 
-    {  // Downsize n-D (n > 1) convolutions to 1-D.
+    { // Downsize n-D (n > 1) convolutions to 1-D.
       RewritePatternSet patterns(context);
       linalg::populateDecomposeConvolutionPatterns(patterns);
       // Downsizing creates consecutive extract/insert slice ops. Merge them.
@@ -367,11 +371,11 @@
     }
   }
 };
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>> createSPIRVTilePass() {
   return std::make_unique<SPIRVTilePass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVTileAndDistribute.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVTileAndDistribute.cpp
index 742125a..e84b16b 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVTileAndDistribute.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVTileAndDistribute.cpp

@@ -115,7 +115,7 @@
 /// buffer semantics.
 class SPIRVTileAndDistributePass
     : public SPIRVTileAndDistributeBase<SPIRVTileAndDistributePass> {
- public:
+public:
   SPIRVTileAndDistributePass() = default;
   SPIRVTileAndDistributePass(const SPIRVTileAndDistributePass &pass) = default;
 
@@ -127,7 +127,7 @@
 
   void runOnOperation() override;
 };
-}  // namespace
+} // namespace
 
 //====---------------------------------------------------------------------===//
 // Main pass implementation
@@ -136,14 +136,17 @@
 void SPIRVTileAndDistributePass::runOnOperation() {
   MLIRContext *context = &getContext();
   func::FuncOp funcOp = getOperation();
-  if (!isEntryPoint(funcOp)) return;
+  if (!isEntryPoint(funcOp))
+    return;
 
   auto threadTileComputeFn = getSPIRVTileSizeComputeFn(funcOp, 1);
-  if (failed(threadTileComputeFn)) return signalPassFailure();
+  if (failed(threadTileComputeFn))
+    return signalPassFailure();
   auto reductionTileComputeFn = getSPIRVTileSizeComputeFn(funcOp, 2);
-  if (failed(reductionTileComputeFn)) return signalPassFailure();
+  if (failed(reductionTileComputeFn))
+    return signalPassFailure();
 
-  {  // Tile and distribute to invocations.
+  { // Tile and distribute to invocations.
     RewritePatternSet invocationTilingPatterns(context);
     populateTilingToInvocationPatterns(invocationTilingPatterns,
                                        *threadTileComputeFn);
@@ -184,7 +187,7 @@
     });
   }
 
-  {  // Tile reduction dimensions.
+  { // Tile reduction dimensions.
     RewritePatternSet reductionTilingPatterns(context);
     populateTilingReductionPatterns(reductionTilingPatterns,
                                     *reductionTileComputeFn);
@@ -220,5 +223,5 @@
   return std::make_unique<SPIRVTileAndDistributePass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVTileAndPromote.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVTileAndPromote.cpp
index 637ecdc..bdd208f 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVTileAndPromote.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVTileAndPromote.cpp

@@ -118,7 +118,7 @@
 
 class SPIRVTileAndPromotePass final
     : public SPIRVTileAndPromoteBase<SPIRVTileAndPromotePass> {
- public:
+public:
   SPIRVTileAndPromotePass(bool promoteCMatrix, bool skipThreadLevel)
       : promoteCMatrix(promoteCMatrix), skipThreadLevel(skipThreadLevel) {}
 
@@ -127,7 +127,8 @@
   }
 
   LogicalResult initializeOptions(StringRef options) override {
-    if (failed(Pass::initializeOptions(options))) return failure();
+    if (failed(Pass::initializeOptions(options)))
+      return failure();
     // Consider pass option too
     promoteCMatrix |= this->promoteC;
     skipThreadLevel |= this->skipThread;
@@ -136,7 +137,7 @@
 
   void runOnOperation() override;
 
- private:
+private:
   /// Promotes C matrix to shared memory when necessary and returns success if
   /// no error happens.
   LogicalResult doPromoteCMatrix(func::FuncOp funcOp) const;
@@ -147,29 +148,33 @@
   bool skipThreadLevel = false;
 };
 
-}  // namespace
+} // namespace
 
 void SPIRVTileAndPromotePass::runOnOperation() {
   MLIRContext *context = &getContext();
   func::FuncOp funcOp = getOperation();
   FailureOr<IREE::HAL::ExecutableExportOp> exportOp = getEntryPoint(funcOp);
-  if (failed(exportOp)) return;
+  if (failed(exportOp))
+    return;
 
   auto threadTileComputeFn = getSPIRVTileSizeComputeFn(funcOp, 1);
-  if (failed(threadTileComputeFn)) return signalPassFailure();
+  if (failed(threadTileComputeFn))
+    return signalPassFailure();
   auto reductionTileComputeFn = getSPIRVTileSizeComputeFn(funcOp, 2);
-  if (failed(reductionTileComputeFn)) return signalPassFailure();
+  if (failed(reductionTileComputeFn))
+    return signalPassFailure();
 
   // Promote C matrix and propagate the potential fill producer into the
   // allocation. This needs to be done before reduction tiling.
-  if (failed(doPromoteCMatrix(funcOp))) return signalPassFailure();
+  if (failed(doPromoteCMatrix(funcOp)))
+    return signalPassFailure();
 
   StringLiteral markerAttrName =
       IREE::LinalgExt::LinalgTransforms::kLinalgTransformMarker;
   auto workgroupMarker = StringAttr::get(context, getWorkgroupMemoryMarker());
   auto kTiledMarker = StringAttr::get(context, getWorkgroupKTiledMarker());
 
-  {  // Tile reduction dimensions.
+  { // Tile reduction dimensions.
     RewritePatternSet patterns(context);
     IREE::LinalgExt::LinalgTransformationFilter filter(
         // Going through C matrix promotion we will have the marker..
@@ -237,7 +242,8 @@
     // that there are no subview ops), clear markers to enable following steps.
     funcOp.walk([&](linalg::LinalgOp linalgOp) {
       auto marker = linalgOp->getAttrOfType<StringAttr>(markerAttrName);
-      if (!marker) return WalkResult::advance();
+      if (!marker)
+        return WalkResult::advance();
       if (marker.getValue() == promoteBothMarker)
         linalgOp->removeAttr(markerAttrName);
       return WalkResult::advance();
@@ -258,7 +264,7 @@
     }
   });
 
-  if (!skipThreadLevel) {  // Tile and distribute to invocations.
+  if (!skipThreadLevel) { // Tile and distribute to invocations.
     RewritePatternSet tilingPatterns(context);
     IREE::LinalgExt::LinalgTransformationFilter filter({workgroupMarker},
                                                        std::nullopt);
@@ -290,15 +296,17 @@
   }
 }
 
-LogicalResult SPIRVTileAndPromotePass::doPromoteCMatrix(
-    func::FuncOp funcOp) const {
+LogicalResult
+SPIRVTileAndPromotePass::doPromoteCMatrix(func::FuncOp funcOp) const {
   MLIRContext *context = funcOp.getContext();
-  if (!promoteCMatrix) return success();
+  if (!promoteCMatrix)
+    return success();
 
   SmallVector<Operation *> computeOps = getComputeOps(funcOp);
   SmallVector<Operation *> linalgOps;
   for (Operation *op : computeOps) {
-    if (isa<linalg::FillOp>(op)) continue;  // Don't care
+    if (isa<linalg::FillOp>(op))
+      continue; // Don't care
     if (auto linalgOp = dyn_cast<linalg::LinalgOp>(op)) {
       linalgOps.push_back(linalgOp);
     } else {
@@ -311,7 +319,8 @@
   }
 
   // If there are no fused elementwise ops, we can avoid promoting C matrix.
-  if (linalgOps.size() <= 1) return success();
+  if (linalgOps.size() <= 1)
+    return success();
 
   auto matmulOp = cast<linalg::LinalgOp>(linalgOps.front());
   auto genericOp = cast<linalg::GenericOp>(*linalgOps.back());
@@ -330,7 +339,8 @@
 
   // If the fused elementwise ops are allowed to use cooperative types, we can
   // also avoid promoting C matrix.
-  if (isCooperativeMatrixFusable(genericOp)) return success();
+  if (isCooperativeMatrixFusable(genericOp))
+    return success();
 
   // Finally do promote C matrix.
   RewritePatternSet patterns(context);
@@ -353,11 +363,11 @@
   return success();
 }
 
-std::unique_ptr<OperationPass<func::FuncOp>> createSPIRVTileAndPromotePass(
-    bool promoteCMatrix, bool skipThreadLevel) {
+std::unique_ptr<OperationPass<func::FuncOp>>
+createSPIRVTileAndPromotePass(bool promoteCMatrix, bool skipThreadLevel) {
   return std::make_unique<SPIRVTileAndPromotePass>(promoteCMatrix,
                                                    skipThreadLevel);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVTileAndVectorizeToCooperativeOps.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVTileAndVectorizeToCooperativeOps.cpp
index ff3b85a..27ced4d 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVTileAndVectorizeToCooperativeOps.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVTileAndVectorizeToCooperativeOps.cpp

@@ -61,7 +61,7 @@
 /// Gets the chosen hardware cooperative op size attached to the given `op`
 /// as CodeGen lowering configuration.
 static SmallVector<int64_t> getTargetCooperativeOpSize(linalg::LinalgOp op) {
-  return getTileSizes(op, 3);  // For native vector sizes
+  return getTileSizes(op, 3); // For native vector sizes
 }
 
 /// Deduces required subgroup counts along all workgroup tiled dimensions.
@@ -75,8 +75,10 @@
 
   SmallVector<int64_t> subgroupCounts;
   for (int i = 0, e = workgroupTileSizes.size(); i < e; ++i) {
-    if (subgroupTileSizes[i] == 0) continue;
-    if (linalg::isReductionIterator(op.getIteratorTypesArray()[i])) continue;
+    if (subgroupTileSizes[i] == 0)
+      continue;
+    if (linalg::isReductionIterator(op.getIteratorTypesArray()[i]))
+      continue;
     assert(workgroupTileSizes[i] % subgroupTileSizes[i] == 0);
     subgroupCounts.push_back(workgroupTileSizes[i] / subgroupTileSizes[i]);
   }
@@ -94,15 +96,15 @@
     ArrayRef<int64_t> subgroupTileSizes, RewritePatternSet &patterns) {
   MLIRContext *context = patterns.getContext();
 
-  auto getSubgroupProcInfoFn = [subgroupCounts, subgroupSize](
-                                   OpBuilder &builder, Location loc,
-                                   ArrayRef<Range> parallelLoopRanges) {
-    auto counts = llvm::to_vector<3>(subgroupCounts);
-    // `getSubgroupIdsAndCounts` assumes we follow GPU (X, Y, Z) order.
-    std::reverse(counts.begin(), counts.end());
-    return getSubgroupIdsAndCounts(builder, loc, subgroupSize,
-                                   parallelLoopRanges.size(), counts);
-  };
+  auto getSubgroupProcInfoFn =
+      [subgroupCounts, subgroupSize](OpBuilder &builder, Location loc,
+                                     ArrayRef<Range> parallelLoopRanges) {
+        auto counts = llvm::to_vector<3>(subgroupCounts);
+        // `getSubgroupIdsAndCounts` assumes we follow GPU (X, Y, Z) order.
+        std::reverse(counts.begin(), counts.end());
+        return getSubgroupIdsAndCounts(builder, loc, subgroupSize,
+                                       parallelLoopRanges.size(), counts);
+      };
 
   linalg::LinalgLoopDistributionOptions distributionOptions;
   distributionOptions.procInfo = getSubgroupProcInfoFn;
@@ -150,16 +152,18 @@
 }
 
 template <typename ExtOpTy>
-std::optional<SmallVector<int64_t>> getExtOpVectorShape(
-    ExtOpTy op, ArrayRef<int64_t> nativeShape) {
+std::optional<SmallVector<int64_t>>
+getExtOpVectorShape(ExtOpTy op, ArrayRef<int64_t> nativeShape) {
   auto insert =
       op.getOperand().template getDefiningOp<vector::InsertStridedSliceOp>();
-  if (!insert) return std::nullopt;
+  if (!insert)
+    return std::nullopt;
 
   VectorType sliceType = insert.getSourceVectorType();
   for (Operation *users : op->getUsers()) {
     auto extract = dyn_cast<vector::ExtractStridedSliceOp>(users);
-    if (!extract) return std::nullopt;
+    if (!extract)
+      return std::nullopt;
     auto vecType = llvm::cast<VectorType>(extract.getResult().getType());
     if (!llvm::equal(sliceType.getShape(), vecType.getShape()))
       return std::nullopt;
@@ -170,8 +174,8 @@
 
 /// Returns vector shape matching native cooperative op sizes for unrolling
 /// high-D vectors.
-std::optional<SmallVector<int64_t>> getCooperativeOpVectorShape(
-    Operation *op, ArrayRef<int64_t> nativeShape) {
+std::optional<SmallVector<int64_t>>
+getCooperativeOpVectorShape(Operation *op, ArrayRef<int64_t> nativeShape) {
   // Unroll vector.contract ops according to native cooperative matrix size.
   if (auto contractOp = dyn_cast<vector::ContractionOp>(op)) {
     return llvm::to_vector(nativeShape);
@@ -180,7 +184,7 @@
   // Unroll elementwise ops according to native cooperative matrix size.
   if (OpTrait::hasElementwiseMappableTraits(op) && op->getNumResults() == 1) {
     if (auto vecType = llvm::dyn_cast<VectorType>(op->getResultTypes()[0]))
-      return llvm::to_vector(nativeShape.drop_back());  // Drop K dim size
+      return llvm::to_vector(nativeShape.drop_back()); // Drop K dim size
   }
 
   // Unrolling vector.contract generates vector.{insert|extract}_strided_slice
@@ -216,9 +220,11 @@
     VectorType sliceType;
     for (Operation *users : sourceOp->getUsers()) {
       auto extract = dyn_cast<vector::ExtractStridedSliceOp>(users);
-      if (!extract) return std::nullopt;
+      if (!extract)
+        return std::nullopt;
       auto vecType = llvm::cast<VectorType>(extract.getResult().getType());
-      if (sliceType && sliceType != vecType) return std::nullopt;
+      if (sliceType && sliceType != vecType)
+        return std::nullopt;
       sliceType = vecType;
     }
     return llvm::to_vector(sliceType.getShape());
@@ -254,7 +260,7 @@
 // or replace unrolling.
 class CombineContractTranspose final
     : public OpRewritePattern<vector::ContractionOp> {
- public:
+public:
   using OpRewritePattern<vector::ContractionOp>::OpRewritePattern;
 
   LogicalResult matchAndRewrite(vector::ContractionOp op,
@@ -286,7 +292,8 @@
       newSources.push_back(tranposeOp.getVector());
       foundTranspose = true;
     }
-    if (!foundTranspose) return failure();
+    if (!foundTranspose)
+      return failure();
 
     Value res = rewriter.create<vector::ContractionOp>(
         loc, newSources[0], newSources[1], newSources[2],
@@ -302,7 +309,7 @@
 
 class SPIRVTileToCooperativeOpsPass final
     : public SPIRVTileToCooperativeOpsBase<SPIRVTileToCooperativeOpsPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<gpu::GPUDialect, linalg::LinalgDialect,
                     vector::VectorDialect>();
@@ -371,7 +378,7 @@
 class SPIRVVectorizeToCooperativeOpsPass final
     : public SPIRVVectorizeToCooperativeOpsBase<
           SPIRVVectorizeToCooperativeOpsPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<gpu::GPUDialect, linalg::LinalgDialect,
                     vector::VectorDialect>();
@@ -472,7 +479,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>>
 createSPIRVTileToCooperativeOpsPass() {
@@ -484,5 +491,5 @@
   return std::make_unique<SPIRVVectorizeToCooperativeOpsPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorToGPUSubgroupMMAOps.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorToGPUSubgroupMMAOps.cpp
index b35aa1f..ab48475 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorToGPUSubgroupMMAOps.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorToGPUSubgroupMMAOps.cpp

@@ -20,7 +20,7 @@
 namespace {
 struct SPIRVVectorToGPUSubgroupMMAPass final
     : public SPIRVVectorToGPUSubgroupMMABase<SPIRVVectorToGPUSubgroupMMAPass> {
-  void getDependentDialects(DialectRegistry& registry) const override {
+  void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<affine::AffineDialect, gpu::GPUDialect,
                     memref::MemRefDialect>();
   }
@@ -50,7 +50,7 @@
     }
 
     // Make sure we actually generate GPU subgroup mma ops.
-    WalkResult result = funcOp.walk([](Operation* op) {
+    WalkResult result = funcOp.walk([](Operation *op) {
       return isa<gpu::SubgroupMmaComputeOp>(op) ? WalkResult::interrupt()
                                                 : WalkResult::advance();
     });
@@ -60,12 +60,12 @@
     }
   }
 };
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>>
 createSPIRVVectorToGPUSubgroupMMAOpsPass() {
   return std::make_unique<SPIRVVectorToGPUSubgroupMMAPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorize.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorize.cpp
index 4ec70d1..6fe6800 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorize.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorize.cpp

@@ -52,7 +52,8 @@
 
 int getComputeVectorSize(int64_t size) {
   for (int i : {4, 3, 2}) {
-    if (size % i == 0) return i;
+    if (size % i == 0)
+      return i;
   }
   return 1;
 }
@@ -67,10 +68,13 @@
     // 128-bit chunks. This helps with memory access performance. Such vector
     // sizes are not native in SPIR-V though; this relies on following passes to
     // bitcast them to 32-bit 4-element vectors to be valid.
-    if (bitwidth <= 8 && size % 16 == 0) return 16;
-    if (bitwidth <= 16 && size % 8 == 0) return 8;
+    if (bitwidth <= 8 && size % 16 == 0)
+      return 16;
+    if (bitwidth <= 16 && size % 8 == 0)
+      return 8;
   }
-  if (bitwidth <= 32 && size % 4 == 0) return 4;
+  if (bitwidth <= 32 && size % 4 == 0)
+    return 4;
   return size % 2 == 0 ? 2 : 1;
 }
 
@@ -107,7 +111,8 @@
             })
             .Default([](Operation *) { return nullptr; });
 
-    if (!source) break;
+    if (!source)
+      break;
     op = source;
   }
 
@@ -117,7 +122,8 @@
 /// Returns true when |op| has the i32 element type that is likely to be result
 /// of a zero/sign extension from i8.
 bool mayExtI8ToI32(Value op) {
-  if (!getElementTypeOrSelf(op.getType()).isInteger(32)) return false;
+  if (!getElementTypeOrSelf(op.getType()).isInteger(32))
+    return false;
 
   // Look through vector operations created by vector unrolling patterns,
   // hoping to find a zero/sign extension op. Note that we do not need to find
@@ -143,13 +149,15 @@
 /// Succeeds when |contract| is a i32 matmul whose LHS and RHS operands may be
 /// result of zero/sign extension of i8 inputs.
 LogicalResult detectI8ToI32Matmul(vector::ContractionOp contract) {
-  if (contract.getKind() != vector::CombiningKind::ADD) return failure();
+  if (contract.getKind() != vector::CombiningKind::ADD)
+    return failure();
 
   if (!mayExtI8ToI32(contract.getLhs()) || !mayExtI8ToI32(contract.getRhs()))
     return failure();
 
   ArrayRef<Attribute> iteratorTypes = contract.getIteratorTypes().getValue();
-  if (iteratorTypes.size() != 3) return failure();
+  if (iteratorTypes.size() != 3)
+    return failure();
 
   return success(vector::isParallelIterator(iteratorTypes[0]) &&
                  vector::isParallelIterator(iteratorTypes[1]) &&
@@ -205,7 +213,7 @@
 
 SmallVector<int64_t> getNativeVectorShapeImpl(vector::ReductionOp op) {
   VectorType srcVectorType = op.getSourceVectorType();
-  assert(srcVectorType.getRank() == 1);  // Guaranteed by semantics
+  assert(srcVectorType.getRank() == 1); // Guaranteed by semantics
   int64_t vectorSize = getComputeVectorSize(srcVectorType.getDimSize(0));
   return {vectorSize};
 }
@@ -224,8 +232,8 @@
   return nativeSize;
 }
 
-std::optional<SmallVector<int64_t>> getNativeVectorShape(
-    Operation *op, bool targetSupportsDotProd) {
+std::optional<SmallVector<int64_t>>
+getNativeVectorShape(Operation *op, bool targetSupportsDotProd) {
   if (OpTrait::hasElementwiseMappableTraits(op) && op->getNumResults() == 1) {
     if (auto vecType = llvm::dyn_cast<VectorType>(op->getResultTypes()[0])) {
       SmallVector<int64_t> nativeSize(vecType.getRank(), 1);
@@ -274,7 +282,8 @@
     // attribute. This may be preferred in tests.
     targetEnvAttr =
         fn->getAttrOfType<spirv::TargetEnvAttr>(spirv::getTargetEnvAttrName());
-    if (!targetEnvAttr) return false;
+    if (!targetEnvAttr)
+      return false;
   }
 
   spirv::TargetEnv targetEnv(targetEnvAttr);
@@ -283,16 +292,19 @@
 
   // Query all the dot prod capabilities except for the packed one -- none of
   // the vectorization patterns need it.
-  if (!targetEnv.allows(spirv::Capability::DotProduct)) return false;
-  if (!targetEnv.allows(spirv::Capability::DotProductInput4x8Bit)) return false;
-  if (!targetEnv.allows(spirv::Capability::DotProductInputAll)) return false;
+  if (!targetEnv.allows(spirv::Capability::DotProduct))
+    return false;
+  if (!targetEnv.allows(spirv::Capability::DotProductInput4x8Bit))
+    return false;
+  if (!targetEnv.allows(spirv::Capability::DotProductInputAll))
+    return false;
 
   return true;
 }
 
 /// Vectorizes Linalg ops on buffer semantics.
 class SPIRVVectorizePass : public SPIRVVectorizeBase<SPIRVVectorizePass> {
- public:
+public:
   SPIRVVectorizePass() = default;
   SPIRVVectorizePass(const SPIRVVectorizePass &pass) = default;
 
@@ -337,7 +349,8 @@
         op.emitOpError("should not remain after vectorization");
         return WalkResult::interrupt();
       });
-      if (result.wasInterrupted()) return signalPassFailure();
+      if (result.wasInterrupted())
+        return signalPassFailure();
     }
 
     // Special peephole optimizations to clean up IR before further processing.
@@ -371,7 +384,8 @@
     // batch dimension. Try to drop that to map to matmul dimensions better.
     SmallVector<vector::ContractionOp> contractOps;
     funcOp.walk([&](vector::ContractionOp op) {
-      if (op.getIteratorTypes().size() > 3) contractOps.push_back(op);
+      if (op.getIteratorTypes().size() > 3)
+        contractOps.push_back(op);
     });
     for (vector::ContractionOp op : contractOps) {
       OpBuilder builder(op);
@@ -641,11 +655,11 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>> createSPIRVVectorizePass() {
   return std::make_unique<SPIRVVectorizePass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorizeLoadStore.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorizeLoadStore.cpp
index f3c4353..1989bd9 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorizeLoadStore.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorizeLoadStore.cpp

@@ -45,7 +45,8 @@
                                    SmallVectorImpl<Operation *> &uses) {
   assert(uses.empty() && "expected uses to be empty");
   for (Operation *userOp : value.getUsers()) {
-    if (isa<memref::DeallocOp, memref::AssumeAlignmentOp>(userOp)) continue;
+    if (isa<memref::DeallocOp, memref::AssumeAlignmentOp>(userOp))
+      continue;
 
     if (!isa<gpu::SubgroupMmaLoadMatrixOp, gpu::SubgroupMmaStoreMatrixOp,
              vector::TransferReadOp, vector::TransferWriteOp>(userOp)) {
@@ -83,8 +84,8 @@
 }
 
 // Calculates the vector bit count we want to use based on the memref uses.
-static unsigned calculateMemRefVectorNumBits(
-    SmallVectorImpl<Operation *> &uses) {
+static unsigned
+calculateMemRefVectorNumBits(SmallVectorImpl<Operation *> &uses) {
   unsigned minBits = kMaxVectorNumBits;
   for (Operation *op : uses) {
     if (isa<gpu::SubgroupMmaLoadMatrixOp, gpu::SubgroupMmaStoreMatrixOp>(op)) {
@@ -92,10 +93,12 @@
       continue;
     }
     auto transferOp = dyn_cast<VectorTransferOpInterface>(op);
-    if (!transferOp) return 0;
+    if (!transferOp)
+      return 0;
     std::optional<unsigned> transferSize =
         getBitWidth(transferOp.getVectorType());
-    if (!transferSize) return 0;
+    if (!transferSize)
+      return 0;
     minBits = std::min(minBits, *transferSize);
   }
 
@@ -109,7 +112,8 @@
       memrefVal = storeOp.getDstMemref();
       stride = storeOp.getLeadDimension().getSExtValue();
     }
-    if (!memrefVal) continue;
+    if (!memrefVal)
+      continue;
 
     // GPU subgroup MMA ops do not care about the memref element type. But we
     // still need to make sure we can load/store with good strides.
@@ -118,10 +122,12 @@
     auto memrefType = llvm::cast<MemRefType>(memrefVal.getType());
     std::optional<unsigned> elementBits =
         getBitWidth(memrefType.getElementType());
-    if (!elementBits) return 0;
+    if (!elementBits)
+      return 0;
     int64_t strideBits = stride * *elementBits;
     // Make sure the stride is aligned with the planned vector bitwidth.
-    if (strideBits % minBits != 0) return 0;
+    if (strideBits % minBits != 0)
+      return 0;
   }
 
   return minBits;
@@ -165,7 +171,8 @@
 
   if (getUsesIfAllTransferOp(value, uses)) {
     unsigned vectorBits = calculateMemRefVectorNumBits(uses);
-    if (!vectorBits) return 0;
+    if (!vectorBits)
+      return 0;
     unsigned vectorSize = vectorBits / elementNumBits;
     LLVM_DEBUG(llvm::dbgs() << "vectorBits=" << vectorBits << "\n");
     LLVM_DEBUG(llvm::dbgs() << "elementNumBits=" << elementNumBits << "\n");
@@ -190,7 +197,7 @@
 /// logic is to vectorize memref only if it is used by vector transfer
 /// read/write ops.
 class MemRefUsageAnalysis {
- public:
+public:
   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(MemRefUsageAnalysis)
 
   explicit MemRefUsageAnalysis(mlir::Operation *);
@@ -210,7 +217,7 @@
     return transferOps.count(op);
   }
 
- private:
+private:
   void analyzeMemRefValue(Value value);
 
   // The mapping from a MemRef value to the number of bits of the vector this
@@ -244,44 +251,45 @@
 
 template <typename OpTy>
 class MemRefConversionPattern : public OpConversionPattern<OpTy> {
- public:
+public:
   MemRefConversionPattern<OpTy>(MLIRContext *context,
                                 const MemRefUsageAnalysis &memrefUsageAnalysis)
       : OpConversionPattern<OpTy>::OpConversionPattern(context),
         memrefUsageAnalysis(memrefUsageAnalysis) {}
 
- protected:
-  std::optional<MemRefType> getVectorizedMemRefType(
-      ConversionPatternRewriter &rewriter, Value memRefValue) const;
+protected:
+  std::optional<MemRefType>
+  getVectorizedMemRefType(ConversionPatternRewriter &rewriter,
+                          Value memRefValue) const;
 
   /// Adjusts indices for vector transfer / GPU MMA load/store ops to index into
   /// vector memref.
-  FailureOr<SmallVector<Value>> adjustIndices(
-      MemRefType scalarMemrefType, MemRefType vectorMemrefType,
-      ValueRange indices, ConversionPatternRewriter &rewriter,
-      Location loc) const;
+  FailureOr<SmallVector<Value>>
+  adjustIndices(MemRefType scalarMemrefType, MemRefType vectorMemrefType,
+                ValueRange indices, ConversionPatternRewriter &rewriter,
+                Location loc) const;
 
   const MemRefUsageAnalysis &memrefUsageAnalysis;
 };
 
 class ProcessFunctionArgument final
     : public MemRefConversionPattern<func::FuncOp> {
- public:
+public:
   using MemRefConversionPattern::MemRefConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      func::FuncOp funcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override;
+  LogicalResult
+  matchAndRewrite(func::FuncOp funcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override;
 };
 
 class ProcessTransferRead final
     : public MemRefConversionPattern<vector::TransferReadOp> {
- public:
+public:
   using MemRefConversionPattern::MemRefConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      vector::TransferReadOp read, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(vector::TransferReadOp read, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (!memrefUsageAnalysis.shouldConvertTransfer(read)) {
       return rewriter.notifyMatchFailure(
           read, "cannot be vectorized per memref usage analysis");
@@ -296,7 +304,8 @@
     auto vectorMemrefType =
         llvm::dyn_cast<MemRefType>(adaptor.getSource().getType());
     auto readVectorType = read.getVectorType();
-    if (!scalarMemrefType || !vectorMemrefType) return failure();
+    if (!scalarMemrefType || !vectorMemrefType)
+      return failure();
 
     std::optional<unsigned> vectorMemrefElemSize =
         getBitWidth(vectorMemrefType.getElementType());
@@ -304,7 +313,8 @@
 
     auto indices = adjustIndices(scalarMemrefType, vectorMemrefType,
                                  adaptor.getIndices(), rewriter, loc);
-    if (failed(indices)) return failure();
+    if (failed(indices))
+      return failure();
 
     // If the transfer_read can be replaced by a load after vectorization use
     // LoadOp and cast back to the original type.
@@ -329,12 +339,12 @@
 
 class ProcessTransferWrite final
     : public MemRefConversionPattern<vector::TransferWriteOp> {
- public:
+public:
   using MemRefConversionPattern::MemRefConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      vector::TransferWriteOp write, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(vector::TransferWriteOp write, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (!memrefUsageAnalysis.shouldConvertTransfer(write)) {
       return rewriter.notifyMatchFailure(
           write, "cannot be vectorized per memref usage analysis");
@@ -349,7 +359,8 @@
     auto vectorMemrefType =
         llvm::dyn_cast<MemRefType>(adaptor.getSource().getType());
     auto writeVectorType = write.getVectorType();
-    if (!scalarMemrefType || !vectorMemrefType) return failure();
+    if (!scalarMemrefType || !vectorMemrefType)
+      return failure();
 
     std::optional<unsigned> vectorMemrefElemSize =
         getBitWidth(vectorMemrefType.getElementType());
@@ -357,7 +368,8 @@
 
     auto indices = adjustIndices(scalarMemrefType, vectorMemrefType,
                                  adaptor.getIndices(), rewriter, loc);
-    if (failed(indices)) return failure();
+    if (failed(indices))
+      return failure();
 
     // If the transfer_write can be replaced by a store after vectorization cast
     // the original value and use StoreOp.
@@ -410,7 +422,8 @@
   Type vectorType = VectorType::get(vectorNumElements, scalarType);
   auto newShape = llvm::to_vector<2>(type.getShape());
   unsigned ratio = vectorNumBits / type.getElementTypeBitWidth();
-  if (newShape.back() % ratio != 0) return {};
+  if (newShape.back() % ratio != 0)
+    return {};
   newShape.back() = newShape.back() / ratio;
 
   MemRefLayoutAttrInterface layout = {};
@@ -443,7 +456,8 @@
       getBitWidth(vectorMemrefType.getElementType());
   std::optional<unsigned> scalarMemrefElemSize =
       getBitWidth(scalarMemrefType.getElementType());
-  if (!vectorMemrefElemSize || !scalarMemrefElemSize) return failure();
+  if (!vectorMemrefElemSize || !scalarMemrefElemSize)
+    return failure();
 
   MLIRContext *context = rewriter.getContext();
   AffineExpr sym0, sym1;
@@ -459,14 +473,15 @@
 }
 
 class ProcessAlloc final : public MemRefConversionPattern<memref::AllocOp> {
- public:
+public:
   using MemRefConversionPattern::MemRefConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      memref::AllocOp alloc, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(memref::AllocOp alloc, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto memrefType = getVectorizedMemRefType(rewriter, alloc.getResult());
-    if (!memrefType) return failure();
+    if (!memrefType)
+      return failure();
     rewriter.replaceOpWithNewOp<memref::AllocOp>(alloc, *memrefType,
                                                  alloc.getDynamicSizes());
     return success();
@@ -475,14 +490,16 @@
 
 class ProcessInterfaceBindingSubspan final
     : public MemRefConversionPattern<IREE::HAL::InterfaceBindingSubspanOp> {
- public:
+public:
   using MemRefConversionPattern::MemRefConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      IREE::HAL::InterfaceBindingSubspanOp subspanOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::InterfaceBindingSubspanOp subspanOp,
+                  OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto memrefType = llvm::dyn_cast<MemRefType>(subspanOp.getType());
-    if (!memrefType) return failure();
+    if (!memrefType)
+      return failure();
 
     // This should be guaranteed by the analysis step. But just double check.
     assert(memrefType.getRank() > 0 &&
@@ -506,9 +523,9 @@
     : public MemRefConversionPattern<gpu::SubgroupMmaLoadMatrixOp> {
   using MemRefConversionPattern::MemRefConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      gpu::SubgroupMmaLoadMatrixOp loadOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(gpu::SubgroupMmaLoadMatrixOp loadOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto scalarMemrefType =
         llvm::dyn_cast<MemRefType>(loadOp.getSrcMemref().getType());
     auto vectorMemrefType =
@@ -517,7 +534,8 @@
     Location loc = loadOp.getLoc();
     auto indices = adjustIndices(scalarMemrefType, vectorMemrefType,
                                  adaptor.getIndices(), rewriter, loc);
-    if (failed(indices)) return failure();
+    if (failed(indices))
+      return failure();
 
     // Compute how many bits the mma op stride corresponds to for the scalar
     // memref, and rescale it to vector memref.
@@ -539,9 +557,9 @@
     : public MemRefConversionPattern<gpu::SubgroupMmaStoreMatrixOp> {
   using MemRefConversionPattern::MemRefConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      gpu::SubgroupMmaStoreMatrixOp storeOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(gpu::SubgroupMmaStoreMatrixOp storeOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto scalarMemrefType =
         llvm::dyn_cast<MemRefType>(storeOp.getDstMemref().getType());
     auto vectorMemrefType =
@@ -550,7 +568,8 @@
     Location loc = storeOp.getLoc();
     auto indices = adjustIndices(scalarMemrefType, vectorMemrefType,
                                  adaptor.getIndices(), rewriter, loc);
-    if (failed(indices)) return failure();
+    if (failed(indices))
+      return failure();
 
     // Compute how many bits the mma op stride corresponds to for the scalar
     // memref, and rescale it to vector memref.
@@ -570,12 +589,12 @@
 
 template <typename OpT>
 class PassThroughConversion : public OpConversionPattern<OpT> {
- public:
+public:
   using OpConversionPattern<OpT>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      OpT op, typename OpT::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(OpT op, typename OpT::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.updateRootInPlace(op,
                                [&] { op->setOperands(adaptor.getOperands()); });
     return success();
@@ -638,7 +657,8 @@
   LogicalResult matchAndRewrite(vector::LoadOp loadOp,
                                 PatternRewriter &rewriter) const override {
     VectorType vectorType = loadOp.getType();
-    if (vectorType.getRank() > 1) return failure();
+    if (vectorType.getRank() > 1)
+      return failure();
 
     Location loc = loadOp.getLoc();
     if (vectorType.getRank() == 0) {
@@ -727,10 +747,10 @@
 
   void runOnOperation() override;
 
- private:
+private:
   MemRefUsageAnalysis *memrefUsageAnalysis = nullptr;
 };
-}  // namespace
+} // namespace
 
 LogicalResult ProcessFunctionArgument::matchAndRewrite(
     func::FuncOp funcOp, OpAdaptor adaptor,
@@ -822,5 +842,5 @@
   return std::make_unique<SPIRVVectorizeLoadStorePass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/Utils.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/Utils.cpp
index 6f1b3af..d0938fd 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/Utils.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/Utils.cpp

@@ -29,11 +29,14 @@
 
 spirv::TargetEnvAttr getSPIRVTargetEnvAttr(Operation *op) {
   auto variant = op->getParentOfType<IREE::HAL::ExecutableVariantOp>();
-  if (!variant) return nullptr;
+  if (!variant)
+    return nullptr;
   IREE::HAL::ExecutableTargetAttr targetAttr = variant.getTarget();
-  if (!targetAttr) return nullptr;
+  if (!targetAttr)
+    return nullptr;
   auto config = targetAttr.getConfiguration();
-  if (!config) return nullptr;
+  if (!config)
+    return nullptr;
   return config.getAs<spirv::TargetEnvAttr>(spirv::getTargetEnvAttrName());
 }
 
@@ -42,11 +45,14 @@
   llvm::StringMap<IREE::HAL::ExecutableExportOp> exportOps =
       getAllEntryPoints(moduleOp);
   auto exportOp = exportOps.lookup(funcOp.getName());
-  if (!exportOp) return std::nullopt;
-  if (auto size = exportOp.getSubgroupSize()) return size->getSExtValue();
+  if (!exportOp)
+    return std::nullopt;
+  if (auto size = exportOp.getSubgroupSize())
+    return size->getSExtValue();
 
   spirv::TargetEnvAttr target = getSPIRVTargetEnvAttr(funcOp);
-  if (!target) return std::nullopt;
+  if (!target)
+    return std::nullopt;
   return target.getResourceLimits().getSubgroupSize();
 }
 
@@ -61,10 +67,11 @@
   return config->getTileSizeVals(tilingLevel);
 }
 
-FailureOr<linalg::TileSizeComputationFunction> getSPIRVTileSizeComputeFn(
-    func::FuncOp funcOp, int tilingLevel) {
+FailureOr<linalg::TileSizeComputationFunction>
+getSPIRVTileSizeComputeFn(func::FuncOp funcOp, int tilingLevel) {
   auto tileSizes = getSPIRVTileSize(funcOp, tilingLevel);
-  if (failed(tileSizes)) return failure();
+  if (failed(tileSizes))
+    return failure();
   linalg::TileSizeComputationFunction computeFn =
       [tileSizes](OpBuilder &builder, Operation *op) {
         auto range = llvm::map_range(*tileSizes, [&](int64_t size) -> Value {
@@ -76,9 +83,8 @@
 }
 
 template <typename GPUIdOp, typename GPUCountOp>
-static linalg::ProcInfo getGPUProcessorIdAndCountImpl(OpBuilder &builder,
-                                                      Location loc,
-                                                      unsigned dim) {
+static linalg::ProcInfo
+getGPUProcessorIdAndCountImpl(OpBuilder &builder, Location loc, unsigned dim) {
   assert(dim < kNumGPUDims && "processor index out of range!");
 
   std::array<gpu::Dimension, kNumGPUDims> dimAttr{
@@ -90,8 +96,9 @@
 }
 
 template <typename GPUIdOp, typename GPUCountOp>
-static SmallVector<linalg::ProcInfo, 2> getGPUProcessorIdsAndCountsImpl(
-    OpBuilder &builder, Location loc, unsigned numDims) {
+static SmallVector<linalg::ProcInfo, 2>
+getGPUProcessorIdsAndCountsImpl(OpBuilder &builder, Location loc,
+                                unsigned numDims) {
   SmallVector<linalg::ProcInfo, 2> procInfo(numDims);
   for (unsigned i = 0; i < numDims; ++i) {
     procInfo[numDims - 1 - i] =
@@ -113,5 +120,5 @@
 getGPUProcessorIdsAndCounts<gpu::ThreadIdOp, gpu::BlockDimOp>(
     OpBuilder &builder, Location loc, unsigned numDims);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/Utils.h b/compiler/src/iree/compiler/Codegen/SPIRV/Utils.h
index 33d5fff..1c57a2a 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/Utils.h
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/Utils.h

@@ -39,17 +39,16 @@
 
 /// Returns the functor to compute tile sizes at the given `tilingLevel` for
 /// compute ops in `funcOp`.
-FailureOr<linalg::TileSizeComputationFunction> getSPIRVTileSizeComputeFn(
-    func::FuncOp funcOp, int tilingLevel);
+FailureOr<linalg::TileSizeComputationFunction>
+getSPIRVTileSizeComputeFn(func::FuncOp funcOp, int tilingLevel);
 
 /// Generate the operations that compute the processor ID and number of
 /// processors. Used as the callback needed for LinalgDistributionOptions.
 template <typename GPUIdOp, typename GPUCountOp>
-SmallVector<linalg::ProcInfo, 2> getGPUProcessorIdsAndCounts(OpBuilder &builder,
-                                                             Location loc,
-                                                             unsigned numDims);
+SmallVector<linalg::ProcInfo, 2>
+getGPUProcessorIdsAndCounts(OpBuilder &builder, Location loc, unsigned numDims);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  //  IREE_COMPILER_CODEGEN_SPIRV_UTILS_H_
+#endif //  IREE_COMPILER_CODEGEN_SPIRV_UTILS_H_

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/Verifiers.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/Verifiers.cpp
index 066abab..83aceab 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/Verifiers.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/Verifiers.cpp

@@ -36,7 +36,8 @@
            << stringifyEnum(CodeGenPipeline::SPIRVMatmulPromoteVectorize);
   }
 
-  if (!isa<linalg::MatmulOp, linalg::BatchMatmulOp>(op)) return success();
+  if (!isa<linalg::MatmulOp, linalg::BatchMatmulOp>(op))
+    return success();
 
   LLVM_DEBUG({
     llvm::dbgs() << "verifying op: " << *op << "\n";
@@ -53,7 +54,8 @@
 
   auto funcOp = op->getParentOfType<func::FuncOp>();
   const std::optional<int> subgroupSize = getSPIRVSubgroupSize(funcOp);
-  if (!subgroupSize) return funcOp->emitError("failed to query subgroup size");
+  if (!subgroupSize)
+    return funcOp->emitError("failed to query subgroup size");
   const int maxThreads = limits.getMaxComputeWorkgroupInvocations();
   const auto maxWorkGroupSize = llvm::map_to_vector<3>(
       limits.getMaxComputeWorkgroupSize().getAsValueRange<IntegerAttr>(),
@@ -157,7 +159,8 @@
 
   auto funcOp = op->getParentOfType<func::FuncOp>();
   const std::optional<int> subgroupSize = getSPIRVSubgroupSize(funcOp);
-  if (!subgroupSize) return funcOp->emitError("failed to query subgroup size");
+  if (!subgroupSize)
+    return funcOp->emitError("failed to query subgroup size");
   const int maxThreads = limits.getMaxComputeWorkgroupInvocations();
   const auto maxWorkGroupSize = llvm::map_to_vector<3>(
       limits.getMaxComputeWorkgroupSize().getAsValueRange<IntegerAttr>(),
@@ -358,5 +361,5 @@
   return success();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/TransformStrategies/CPU/Common.cpp b/compiler/src/iree/compiler/Codegen/TransformStrategies/CPU/Common.cpp
index b84f795..2bd0a9a 100644
--- a/compiler/src/iree/compiler/Codegen/TransformStrategies/CPU/Common.cpp
+++ b/compiler/src/iree/compiler/Codegen/TransformStrategies/CPU/Common.cpp

@@ -148,15 +148,16 @@
   int64_t reductionSize = captures.reductionOpSizes.back();
   if (cpuModel.model == CPUModel::kDefaultCPU) {
     if (captures.reductionOutputElementalTypeBitWidth == 32) {
-      if (reductionSize == 32) return ReductionConfig{/*vectorSize=*/32};
+      if (reductionSize == 32)
+        return ReductionConfig{/*vectorSize=*/32};
     }
   }
   return failure();
 }
 
-static ReductionConfig getReductionConfig(
-    const transform_ext::MatchedReductionCaptures &captures,
-    const CPUModel &cpuModel) {
+static ReductionConfig
+getReductionConfig(const transform_ext::MatchedReductionCaptures &captures,
+                   const CPUModel &cpuModel) {
   return ReductionConfig{16};
 }
 
@@ -168,7 +169,8 @@
   transform_ext::MatcherContext matcherContext;
   makeReductionMatcher(matcherContext, reduction, captures,
                        /*mustMatchEntireFunc=*/true);
-  if (!matchPattern(op, *reduction)) return failure();
+  if (!matchPattern(op, *reduction))
+    return failure();
 
   // 2. Construct the configuration and the strategy builder.
   // TODO: Generalize along the HW axis.

diff --git a/compiler/src/iree/compiler/Codegen/TransformStrategies/CPU/Common.h b/compiler/src/iree/compiler/Codegen/TransformStrategies/CPU/Common.h
index 58e4e97..741c1bc 100644
--- a/compiler/src/iree/compiler/Codegen/TransformStrategies/CPU/Common.h
+++ b/compiler/src/iree/compiler/Codegen/TransformStrategies/CPU/Common.h

@@ -25,8 +25,8 @@
 /// the variant op.
 // TODO: pass control to LowerVectorsOp once the builder allows it.
 std::pair<Value, Value> buildCommonTrailingStrategy(
-    ImplicitLocOpBuilder& b, Value variantH,
-    const vector::LowerVectorsOptions& lowerVectorsOpts);
+    ImplicitLocOpBuilder &b, Value variantH,
+    const vector::LowerVectorsOptions &lowerVectorsOpts);
 
 //===----------------------------------------------------------------------===//
 // Higher-level problem-specific strategy creation APIs, these should favor
@@ -52,9 +52,9 @@
 /// ModuleOp after the `entryPoint` func::FuncOp.
 LogicalResult matchAndSetReductionStrategy(func::FuncOp entryPoint,
                                            linalg::LinalgOp op,
-                                           const CPUModel& cpuModel);
-}  // namespace cpu
-}  // namespace iree_compiler
-}  // namespace mlir
+                                           const CPUModel &cpuModel);
+} // namespace cpu
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_TRANSFORM_DIALECT_STRATEGIES_CPU_COMMON_H_
+#endif // IREE_COMPILER_CODEGEN_TRANSFORM_DIALECT_STRATEGIES_CPU_COMMON_H_

diff --git a/compiler/src/iree/compiler/Codegen/TransformStrategies/CPU/ReductionStrategy.cpp b/compiler/src/iree/compiler/Codegen/TransformStrategies/CPU/ReductionStrategy.cpp
index b90b569..41f3374 100644
--- a/compiler/src/iree/compiler/Codegen/TransformStrategies/CPU/ReductionStrategy.cpp
+++ b/compiler/src/iree/compiler/Codegen/TransformStrategies/CPU/ReductionStrategy.cpp

@@ -69,7 +69,8 @@
            {maybeLeadingHBlock, strategy.captures.maybeLeadingRank},
            {gridReductionH, strategy.captures.reductionRank},
            {maybeTiledTrailingHBlock, strategy.captures.maybeTrailingRank}}) {
-    if (rank == 0) continue;
+    if (rank == 0)
+      continue;
     SmallVector<int64_t> tileSizes(rank - 1, 0);
     tileSizes.push_back(strategy.getVectorSize());
     buildTileFuseToScfFor(b, variantH, val, {},

diff --git a/compiler/src/iree/compiler/Codegen/TransformStrategies/CPU/ReductionStrategy.h b/compiler/src/iree/compiler/Codegen/TransformStrategies/CPU/ReductionStrategy.h
index b3aac41..4992ee2 100644
--- a/compiler/src/iree/compiler/Codegen/TransformStrategies/CPU/ReductionStrategy.h
+++ b/compiler/src/iree/compiler/Codegen/TransformStrategies/CPU/ReductionStrategy.h

@@ -28,7 +28,7 @@
 
 /// A simple CPU ReductionStrategy.
 class ReductionStrategy : public iree_compiler::AbstractReductionStrategy {
- public:
+public:
   ReductionStrategy(const transform_ext::MatchedReductionCaptures &captures,
                     const ReductionConfig &reductionConfig);
 
@@ -37,7 +37,7 @@
 
   int64_t getVectorSize() const { return vectorSize; }
 
- private:
+private:
   /// Compute the small strategy based on the problem size.
   void configure(const ReductionConfig &config);
 
@@ -54,8 +54,8 @@
 void buildReductionStrategy(ImplicitLocOpBuilder &b, Value variantH,
                             const ReductionStrategy &strategy);
 
-}  // namespace cpu
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace cpu
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_TRANSFORM_DIALECT_STRATEGIES_CPU_REDUCTION_STRATEGY_H_
+#endif // IREE_COMPILER_CODEGEN_TRANSFORM_DIALECT_STRATEGIES_CPU_REDUCTION_STRATEGY_H_

diff --git a/compiler/src/iree/compiler/Codegen/TransformStrategies/Common/AbstractReductionStrategy.h b/compiler/src/iree/compiler/Codegen/TransformStrategies/Common/AbstractReductionStrategy.h
index 1c4ccd6..1ef887e 100644
--- a/compiler/src/iree/compiler/Codegen/TransformStrategies/Common/AbstractReductionStrategy.h
+++ b/compiler/src/iree/compiler/Codegen/TransformStrategies/Common/AbstractReductionStrategy.h

@@ -27,7 +27,7 @@
   SmallVector<int64_t> workgroupTileSizes;
 };
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_TRANSFORM_DIALECT_STRATEGIES_COMMON_ABSTRACT_REDUCTION_STRATEGY_H_
+#endif // IREE_COMPILER_CODEGEN_TRANSFORM_DIALECT_STRATEGIES_COMMON_ABSTRACT_REDUCTION_STRATEGY_H_

diff --git a/compiler/src/iree/compiler/Codegen/TransformStrategies/Common/Common.cpp b/compiler/src/iree/compiler/Codegen/TransformStrategies/Common/Common.cpp
index ec7d0a9..f7a1ff7 100644
--- a/compiler/src/iree/compiler/Codegen/TransformStrategies/Common/Common.cpp
+++ b/compiler/src/iree/compiler/Codegen/TransformStrategies/Common/Common.cpp

@@ -59,7 +59,8 @@
                                          /*numHandles=*/N);
   assert(matchOp->getNumResults() == N && "Unexpected number of results");
   std::array<Value, N> a;
-  for (int64_t i = 0; i < N; ++i) a[i] = matchOp->getResult(i);
+  for (int64_t i = 0; i < N; ++i)
+    a[i] = matchOp->getResult(i);
   return std::tuple_cat(a);
 }
 
@@ -75,16 +76,18 @@
   return ((val + multiple - 1) / multiple) * multiple;
 }
 
-FailureOr<int64_t> mlir::iree_compiler::maxDivisorOfValueBelowLimit(
-    int64_t value, int64_t limit) {
+FailureOr<int64_t>
+mlir::iree_compiler::maxDivisorOfValueBelowLimit(int64_t value, int64_t limit) {
   // Conservatively return failure when `limit` is greater than 1024 to avoid
   // prohibitively long compile time overheads.
   // TODO: approximate with a faster implementation based on a few desirable
   // primes.
-  if (limit > 1024) return failure();
+  if (limit > 1024)
+    return failure();
   // If either value or limit is <= 0, the loop is skipped and we fail.
   for (int64_t i = std::min(value, limit); i > 1; --i)
-    if (value % i == 0) return i;
+    if (value % i == 0)
+      return i;
   return failure();
 }
 
@@ -118,8 +121,10 @@
 /// Prints `handles` in order. Prints the whole IR if `handles` is empty.
 void mlir::iree_compiler::buildPrint(ImplicitLocOpBuilder &b,
                                      ValueRange handles) {
-  if (handles.empty()) b.create<PrintOp>();
-  for (auto h : handles) b.create<PrintOp>(h);
+  if (handles.empty())
+    b.create<PrintOp>();
+  for (auto h : handles)
+    b.create<PrintOp>(h);
 }
 
 /// Create an ApplyPatternsOp that performs a set of key canonicalizations and
@@ -135,7 +140,8 @@
     b.create<IREE::transform_dialect::ApplyFoldFillIntoPadPatternsOp>(loc);
     b.create<transform::ApplyForLoopCanonicalizationPatternsOp>(loc);
     b.create<transform::ApplyCanonicalizationPatternsOp>(loc);
-    if (populatePatternsFn) populatePatternsFn(b, loc);
+    if (populatePatternsFn)
+      populatePatternsFn(b, loc);
   });
   b.create<IREE::transform_dialect::ApplyLoopIndependentCodeMotionOp>(funcH);
   b.create<IREE::transform_dialect::ApplyCommonSubexpressionEliminationOp>(
@@ -150,8 +156,9 @@
 /// This is used as a normalization operation that replaces conditionals, either
 /// in C++ or in transform IR.
 /// This can be thought of as a control-flow -> data-dependent conversion.
-std::pair<Value, Value> mlir::iree_compiler::buildSelectFirstNonEmpty(
-    ImplicitLocOpBuilder &b, Value handle1, Value handle2) {
+std::pair<Value, Value>
+mlir::iree_compiler::buildSelectFirstNonEmpty(ImplicitLocOpBuilder &b,
+                                              Value handle1, Value handle2) {
   auto anyOpType = transform::AnyOpType::get(b.getContext());
   auto selector = b.create<TakeFirstOp>(anyOpType, anyOpType,
                                         ArrayRef<Value>{handle1, handle2});
@@ -363,16 +370,16 @@
   /// was not re-matched.
   Value trailingEltwiseH;
 };
-}  // namespace
+} // namespace
 
 /// Builds transform IR requesting to bubble up the "expand_shape" operation
 /// produced as parent of reduction splitting if necessary for fusion of the
 /// leading elementwise operation.
 // TODO: consider passing a problem-specific struct to control information.
-static ReductionSplitResult createBubbleExpand(
-    ImplicitLocOpBuilder &b, Value variantH,
-    SplitReductionOp splitReductionTransformOp, bool hasLeadingEltwise,
-    bool hasTrailingEltwise) {
+static ReductionSplitResult
+createBubbleExpand(ImplicitLocOpBuilder &b, Value variantH,
+                   SplitReductionOp splitReductionTransformOp,
+                   bool hasLeadingEltwise, bool hasTrailingEltwise) {
   ReductionSplitResult result;
   if (!hasLeadingEltwise) {
     result.splitFillH = splitReductionTransformOp.getFillOp();

diff --git a/compiler/src/iree/compiler/Codegen/TransformStrategies/Common/Common.h b/compiler/src/iree/compiler/Codegen/TransformStrategies/Common/Common.h
index 66a7ef1..f4eac88 100644
--- a/compiler/src/iree/compiler/Codegen/TransformStrategies/Common/Common.h
+++ b/compiler/src/iree/compiler/Codegen/TransformStrategies/Common/Common.h

@@ -111,9 +111,10 @@
 
 /// Build transform IR to perform multi-level tile and fuse into an scf.for op.
 /// Note: fusion is currently unsupported.
-TileToScfForAndFuseResult buildTileFuseToScfFor(
-    ImplicitLocOpBuilder &b, Value variantH, Value rootH, ValueRange opsHToFuse,
-    ArrayRef<OpFoldResult> tileSizes, bool canonicalize = true);
+TileToScfForAndFuseResult
+buildTileFuseToScfFor(ImplicitLocOpBuilder &b, Value variantH, Value rootH,
+                      ValueRange opsHToFuse, ArrayRef<OpFoldResult> tileSizes,
+                      bool canonicalize = true);
 
 /// Result of the combined transform performing tiling, fusion and
 /// distribution to parallel constructs.
@@ -243,7 +244,7 @@
 /// Build transform IR that applies memory optimizations.
 Value buildMemoryOptimizations(ImplicitLocOpBuilder &b, Value funcH);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_TRANSFORM_DIALECT_STRATEGIES_COMMON_COMMON_H_
+#endif // IREE_COMPILER_CODEGEN_TRANSFORM_DIALECT_STRATEGIES_COMMON_COMMON_H_

diff --git a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/AbstractGemmLikeStrategy.cpp b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/AbstractGemmLikeStrategy.cpp
index 1865128..981fee0 100644
--- a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/AbstractGemmLikeStrategy.cpp
+++ b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/AbstractGemmLikeStrategy.cpp

@@ -91,10 +91,11 @@
   }
 }
 
-ArrayAttr AbstractGemmLikeStrategy::getZeroPadAttrFromElementalTypes(
-    OpBuilder &b) const {
+ArrayAttr
+AbstractGemmLikeStrategy::getZeroPadAttrFromElementalTypes(OpBuilder &b) const {
   SmallVector<Attribute> paddingValues;
-  for (Type t : paddingValueTypes) paddingValues.push_back(b.getZeroAttr(t));
+  for (Type t : paddingValueTypes)
+    paddingValues.push_back(b.getZeroAttr(t));
   return b.getArrayAttr(paddingValues);
 }
 
@@ -102,8 +103,8 @@
 // Validation of support for the configured strategy.
 //===--------------------------------------------------------------------===//
 
-LogicalResult AbstractGemmLikeStrategy::validate(
-    const GPUModel &gpuModel) const {
+LogicalResult
+AbstractGemmLikeStrategy::validate(const GPUModel &gpuModel) const {
   if (totalNumThreads() != totalNumWarps() * kCudaWarpSize) {
     llvm::errs() << "Number of threads specified by warps must match total "
                     "number of threads\n";
@@ -211,7 +212,8 @@
   os << "- block tile sizes: {";
   bool isFirst = true;
   for (int64_t blockTileSize : blockTileSizes) {
-    if (!isFirst) os << ", ";
+    if (!isFirst)
+      os << ", ";
     os << blockTileSize;
     isFirst = false;
   }
@@ -221,7 +223,8 @@
   os << "- number of threads: {";
   isFirst = true;
   for (int64_t numThreadsForDim : numThreads) {
-    if (!isFirst) os << ", ";
+    if (!isFirst)
+      os << ", ";
     os << numThreadsForDim;
     isFirst = false;
   }
@@ -230,7 +233,8 @@
   os << "- number of warps: {";
   isFirst = true;
   for (int64_t numWarpsForDim : numWarps) {
-    if (!isFirst) os << ", ";
+    if (!isFirst)
+      os << ", ";
     os << numWarpsForDim;
     isFirst = false;
   }

diff --git a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/AbstractGemmLikeStrategy.h b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/AbstractGemmLikeStrategy.h
index 0e60a67..726a0db 100644
--- a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/AbstractGemmLikeStrategy.h
+++ b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/AbstractGemmLikeStrategy.h

@@ -60,13 +60,15 @@
   /// Common values based on derived quantities.
   int64_t totalNumThreads() const {
     int64_t res = 1;
-    for (auto v : numThreads) res *= v;
+    for (auto v : numThreads)
+      res *= v;
     return res;
   }
 
   int64_t totalNumWarps() const {
     int64_t res = 1;
-    for (auto v : numWarps) res *= v;
+    for (auto v : numWarps)
+      res *= v;
     return res;
   }
 
@@ -156,8 +158,8 @@
   constexpr static int64_t kMinMmaSyncMinK = 4;
 };
 
-}  // namespace gpu
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace gpu
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_TRANSFORM_DIALECT_STRATEGIES_GPU_ABSTRACT_GEMM_LIKE_STRATEGY_H_
+#endif // IREE_COMPILER_CODEGEN_TRANSFORM_DIALECT_STRATEGIES_GPU_ABSTRACT_GEMM_LIKE_STRATEGY_H_

diff --git a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/Common.cpp b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/Common.cpp
index be92764..de6dc3a 100644
--- a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/Common.cpp
+++ b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/Common.cpp

@@ -86,7 +86,8 @@
   assert((bitWidth & bitWidth - 1) == 0 && "bitWidth must be a power of 2");
   int64_t factor;
   for (factor = scaleUpByBitWidth(1, bitWidth); factor > 1; factor >>= 1)
-    if (numWarpsToUse % factor == 0) break;
+    if (numWarpsToUse % factor == 0)
+      break;
   numWarpsToUse /= factor;
   // Try to scale to using 128b elements in warp shuffles.
   return std::max(numWarpsToUse / 4, int64_t(1));
@@ -186,7 +187,8 @@
         Attribute mappingAttr, int64_t maxVectorSize) {
   // Poor man's handling of optionality in C++. Will need to be converted to
   // proper transform dialect filters or handling of emptiness.
-  if (rank == 0) return;
+  if (rank == 0)
+    return;
 
   // Compute split point to guarantee we form a maximal chunk divisible by
   // numThreads * vectorSize.

diff --git a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/Common.h b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/Common.h
index ce2a0fd..46d1700 100644
--- a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/Common.h
+++ b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/Common.h

@@ -89,9 +89,9 @@
 /// the variant op.
 // TODO: abstract away AbstractReductionStrategy, this is supposed to be
 // retargetable.
-std::pair<Value, Value> buildCommonTrailingStrategy(
-    ImplicitLocOpBuilder &b, Value variantH,
-    ArrayRef<int64_t> numThreadsInBlock);
+std::pair<Value, Value>
+buildCommonTrailingStrategy(ImplicitLocOpBuilder &b, Value variantH,
+                            ArrayRef<int64_t> numThreadsInBlock);
 
 //===----------------------------------------------------------------------===//
 // Mid-level problem-specific strategy builder APIs, follow MLIR-style builders.
@@ -149,9 +149,10 @@
 
 /// Distribute the explicit copies involved in a matmul operation
 /// `paddedMatmulOpH`.
-std::tuple<Value, Value, Value> buildDistributeMatmulCopies(
-    ImplicitLocOpBuilder &b, Value variantH, Value paddedMatmulOpH,
-    const AbstractGemmLikeStrategy &strategy);
+std::tuple<Value, Value, Value>
+buildDistributeMatmulCopies(ImplicitLocOpBuilder &b, Value variantH,
+                            Value paddedMatmulOpH,
+                            const AbstractGemmLikeStrategy &strategy);
 
 /// Specific pattern to perform masked vectorization of copies give as
 /// parameters, cleanup and vectorize the rest.
@@ -195,8 +196,8 @@
                                            Operation *op,
                                            const GPUModel &gpuModel);
 
-}  // namespace gpu
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace gpu
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_TRANSFORM_DIALECT_STRATEGIES_GPU_COMMON_H_
+#endif // IREE_COMPILER_CODEGEN_TRANSFORM_DIALECT_STRATEGIES_GPU_COMMON_H_

diff --git a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/CopyMapping.cpp b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/CopyMapping.cpp
index ef81fd4..364e1ec 100644
--- a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/CopyMapping.cpp
+++ b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/CopyMapping.cpp

@@ -44,7 +44,8 @@
       alignment, sizes.back(), elementalBitWidth);
   LDBG("--maxVectorSize: " << maxVectorSize);
   int64_t numElements = 1;
-  for (auto s : sizes) numElements *= s;
+  for (auto s : sizes)
+    numElements *= s;
   LDBG("--numElements: " << numElements);
 
   int64_t actualVectorSize = maxVectorSize;
@@ -59,7 +60,8 @@
     for (; actualVectorSize >= 1; actualVectorSize /= 2) {
       LDBG("--step totalNumThreads * actualVectorSize: "
            << totalNumThreads * actualVectorSize);
-      if (numElements % (totalNumThreads * actualVectorSize) != 0) continue;
+      if (numElements % (totalNumThreads * actualVectorSize) != 0)
+        continue;
       break;
     }
     LDBG("--numElements: " << numElements);
@@ -99,7 +101,7 @@
 }
 
 iree_compiler::gpu::MappingInfo iree_compiler::gpu::CopyMapping::getMappingInfo(
-    MLIRContext* ctx, int totalNumThreads, int64_t alignment,
+    MLIRContext *ctx, int totalNumThreads, int64_t alignment,
     ArrayRef<int64_t> copySizes, bool favorPredication,
     int64_t elementalBitWidth) {
   assert(copySizes.size() == 2 && "only 2-D copy supported for now");

diff --git a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/CopyMapping.h b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/CopyMapping.h
index 80330f3..3918bec 100644
--- a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/CopyMapping.h
+++ b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/CopyMapping.h

@@ -36,9 +36,10 @@
   /// the required aligned for a cp.async, thae conversion to cp.async will be
   /// skipped.
   /// Asserts that `elementalBitWidth` divides `numContiguousElements`.
-  static int64_t maxContiguousElementsToTransfer(
-      int64_t alignment, int64_t numContiguousElements,
-      int64_t elementalBitWidth = 32);
+  static int64_t
+  maxContiguousElementsToTransfer(int64_t alignment,
+                                  int64_t numContiguousElements,
+                                  int64_t elementalBitWidth = 32);
 
   /// Compute the number of threads to use to perform a copy of `sizes`
   /// elements of `elementalBitWidth`.
@@ -53,9 +54,10 @@
   /// the copy, even if it means reducing the granularity of the transfer.
   /// Otherwise, the implementation will come up with a best-effort predicated
   /// mapping that respects the maximal vector transfer size.
-  static FailureOr<CopyMapping> numThreadsForCopy(
-      int totalNumThreads, int64_t alignment, ArrayRef<int64_t> sizes,
-      bool favorPredication, int64_t elementalBitWidth = 32);
+  static FailureOr<CopyMapping>
+  numThreadsForCopy(int totalNumThreads, int64_t alignment,
+                    ArrayRef<int64_t> sizes, bool favorPredication,
+                    int64_t elementalBitWidth = 32);
 
   /// Greedily compute the MappingInfo to use to perform a copy of `sizes`
   /// elements of bitwidth `elementalBitWidth`.
@@ -76,8 +78,8 @@
                                     int64_t elementalBitWidth = 32);
 };
 
-}  // namespace gpu
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace gpu
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_TRANSFORM_DIALECT_STRATEGIES_GPU_COPY_MAPPING_H_
+#endif // IREE_COMPILER_CODEGEN_TRANSFORM_DIALECT_STRATEGIES_GPU_COPY_MAPPING_H_

diff --git a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/MappingInfo.h b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/MappingInfo.h
index d975756..703c53d 100644
--- a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/MappingInfo.h
+++ b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/MappingInfo.h

@@ -26,8 +26,8 @@
   LLVM_DUMP_METHOD void dump() const;
 };
 
-}  // namespace gpu
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace gpu
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_TRANSFORM_DIALECT_STRATEGIES_GPU_MAPPING_INFO_H_
+#endif // IREE_COMPILER_CODEGEN_TRANSFORM_DIALECT_STRATEGIES_GPU_MAPPING_INFO_H_

diff --git a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/MatmulTensorCoreStrategy.cpp b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/MatmulTensorCoreStrategy.cpp
index 4cb9244..a0b7a90 100644
--- a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/MatmulTensorCoreStrategy.cpp
+++ b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/MatmulTensorCoreStrategy.cpp

@@ -78,7 +78,8 @@
 
 LogicalResult MatmulStrategy::validate(const GPUModel &gpuModel) const {
   // Validate the parent strategy.
-  if (failed(AbstractGemmLikeStrategy::validate(gpuModel))) return failure();
+  if (failed(AbstractGemmLikeStrategy::validate(gpuModel)))
+    return failure();
 
   return success();
 }
@@ -188,7 +189,8 @@
 
   if (strategy.useAsyncCopies) {
     // Step 10. Multi-buffering.
-    if (strategy.pipelineDepth > 1) buildMultiBuffering(b, funcH, strategy);
+    if (strategy.pipelineDepth > 1)
+      buildMultiBuffering(b, funcH, strategy);
 
     // Step 11. Convert to async copies.
     // TODO: avoid consuming handles and returning here.

diff --git a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/MatmulTensorCoreStrategy.h b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/MatmulTensorCoreStrategy.h
index f9b2fc0..365b19d 100644
--- a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/MatmulTensorCoreStrategy.h
+++ b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/MatmulTensorCoreStrategy.h

@@ -28,7 +28,7 @@
 struct GPUModel;
 
 class MatmulStrategy : public AbstractGemmLikeStrategy {
- public:
+public:
   MatmulStrategy(MLIRContext *context,
                  const transform_ext::MatchedMatmulCaptures &captures)
       : AbstractGemmLikeStrategy(), ctx(context), captures(captures) {
@@ -169,8 +169,8 @@
   LLVM_DUMP_METHOD void dump() const;
 };
 
-}  // namespace gpu
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace gpu
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_TRANSFORM_DIALECT_STRATEGIES_GPU_TENSOR_CORE_MATMUL_STRATEGY_H_
+#endif // IREE_COMPILER_CODEGEN_TRANSFORM_DIALECT_STRATEGIES_GPU_TENSOR_CORE_MATMUL_STRATEGY_H_

diff --git a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/PadStrategy.cpp b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/PadStrategy.cpp
index dc0b691..93232d6 100644
--- a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/PadStrategy.cpp
+++ b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/PadStrategy.cpp

@@ -78,8 +78,9 @@
 
 void iree_compiler::gpu::PadStrategy::configure(GPUModel gpuModel) {}
 
-static std::tuple<Value, Value> buildPadStrategyBlockDistribution(
-    ImplicitLocOpBuilder &b, Value variantH, const PadStrategy &strategy) {
+static std::tuple<Value, Value>
+buildPadStrategyBlockDistribution(ImplicitLocOpBuilder &b, Value variantH,
+                                  const PadStrategy &strategy) {
   // Step 1. Call the matcher. Note that this is the same matcher as used to
   // trigger this compilation path, so it must always apply.
   b.create<RegisterMatchCallbacksOp>();

diff --git a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/PadStrategy.h b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/PadStrategy.h
index 1058a20..da8ad4b 100644
--- a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/PadStrategy.h
+++ b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/PadStrategy.h

@@ -22,7 +22,7 @@
 
 /// Simple padding strategy.
 class PadStrategy {
- public:
+public:
   PadStrategy(MLIRContext *context,
               const transform_ext::MatchedPadCaptures &captures,
               const PadConfig &config)
@@ -57,8 +57,8 @@
   bool useAsyncCopies = false;
 };
 
-}  // namespace gpu
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace gpu
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_TRANSFORM_DIALECT_STRATEGIES_GPU_PAD_STRATEGY_H_
+#endif // IREE_COMPILER_CODEGEN_TRANSFORM_DIALECT_STRATEGIES_GPU_PAD_STRATEGY_H_

diff --git a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/SmallReductionStrategy.cpp b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/SmallReductionStrategy.cpp
index c664c0e..cacbb2d 100644
--- a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/SmallReductionStrategy.cpp
+++ b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/SmallReductionStrategy.cpp

@@ -80,7 +80,8 @@
   // in a crash in the associated upstream util.
   // TODO: More generally fix PadDynamicAlloc and the associated upstream util.
   bool hasTrailingElementwise = (captures.maybeTrailingRank > 0);
-  if (failed(maybeDivisor) && hasTrailingElementwise) maybeDivisor = 1;
+  if (failed(maybeDivisor) && hasTrailingElementwise)
+    maybeDivisor = 1;
 
   // If the captured dimension has no satisfactory divisor, just tile the last
   // parallel dimension by 2 * kCudaWarpSize.

diff --git a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/SmallReductionStrategy.h b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/SmallReductionStrategy.h
index 89c12de..b04cd10 100644
--- a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/SmallReductionStrategy.h
+++ b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/SmallReductionStrategy.h

@@ -42,7 +42,7 @@
 // TODO: Add another strategy based on segmented scans, which can allow us
 // to force sizes that don't divide properly into warp shuffles.
 class SmallReductionStrategy : public AbstractReductionStrategy {
- public:
+public:
   SmallReductionStrategy(
       const transform_ext::MatchedReductionCaptures &captures,
       const ReductionConfig &reductionConfig);
@@ -57,14 +57,14 @@
     return res;
   }
 
- private:
+private:
   /// Compute the small strategy based on the problem size and the
   /// `maxNumThreadsToUse`.
   void configure(const ReductionConfig &reductionConfig);
 };
 
-}  // namespace gpu
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace gpu
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_TRANSFORM_DIALECT_STRATEGIES_GPU_SMALL_REDUCTION_STRATEGY_H_
+#endif // IREE_COMPILER_CODEGEN_TRANSFORM_DIALECT_STRATEGIES_GPU_SMALL_REDUCTION_STRATEGY_H_

diff --git a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/StagedReductionStrategy.cpp b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/StagedReductionStrategy.cpp
index 36be8e6..19ab093 100644
--- a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/StagedReductionStrategy.cpp
+++ b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/StagedReductionStrategy.cpp

@@ -102,7 +102,8 @@
     // TODO: splitting here also requires the post-bufferization privatization
     // analysis (see #11715).
     for (vectorSize = maxVectorSize; vectorSize > 1; vectorSize >>= 1)
-      if (reductionDimensionSize % vectorSize == 0) break;
+      if (reductionDimensionSize % vectorSize == 0)
+        break;
     // Pad to the next multiple of the warp size above
     // `reductionDimensionSize / vectorSize` but below `maxNumThreadsToUse`.
     numThreadsXInBlock = std::min(

diff --git a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/StagedReductionStrategy.h b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/StagedReductionStrategy.h
index e3ef2bb..f11fbab 100644
--- a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/StagedReductionStrategy.h
+++ b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/StagedReductionStrategy.h

@@ -51,7 +51,7 @@
 // TODO: Split to ensure 4 on most of the problem and use a 1-epilogue. This is
 // best done if we can encode the future stride to ensure the 4 is aligned.
 class StagedReductionStrategy : public AbstractReductionStrategy {
- public:
+public:
   StagedReductionStrategy(
       const transform_ext::MatchedReductionCaptures &captures,
       const ReductionConfig &reductionConfig);
@@ -65,7 +65,7 @@
 
   int64_t getVectorSize() const { return vectorSize; }
 
- private:
+private:
   /// Compute the staged strategy based on the reductionDimensionSize, the
   /// `maxNumThreadsToUse` and the `vectorSize`.
   /// The latter 2 numbers control the tradeoff between parallelism and shared
@@ -83,8 +83,8 @@
   int64_t numThreadsXInBlock;
 };
 
-}  // namespace gpu
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace gpu
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_TRANSFORM_DIALECT_STRATEGIES_GPU_STAGED_REDUCTION_STRATEGY_H_
+#endif // IREE_COMPILER_CODEGEN_TRANSFORM_DIALECT_STRATEGIES_GPU_STAGED_REDUCTION_STRATEGY_H_

diff --git a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/Strategies.cpp b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/Strategies.cpp
index 784f874..42d0083 100644
--- a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/Strategies.cpp
+++ b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/Strategies.cpp

@@ -126,9 +126,9 @@
 // TODO: Lift some of the strategy sizing logic as hints and/or heuristics to
 // also work properly in the dynamic case.
 // TODO: Support more HW configs and make it more pluggable.
-static ReductionConfig getReductionConfig(
-    const transform_ext::MatchedReductionCaptures &captures,
-    const GPUModel &gpuModel) {
+static ReductionConfig
+getReductionConfig(const transform_ext::MatchedReductionCaptures &captures,
+                   const GPUModel &gpuModel) {
   auto maybeHardcodedConfiguration =
       applyKnownGoodReductionConfigurations(captures, gpuModel);
   if (succeeded(maybeHardcodedConfiguration))
@@ -256,12 +256,14 @@
   return failure();
 }
 
-static int64_t selectLargestFailsafeValueIfNeeded(
-    int64_t value, int64_t limit, ArrayRef<int64_t> thresholds,
-    ArrayRef<int64_t> failSafeValues) {
+static int64_t
+selectLargestFailsafeValueIfNeeded(int64_t value, int64_t limit,
+                                   ArrayRef<int64_t> thresholds,
+                                   ArrayRef<int64_t> failSafeValues) {
   for (auto [threshold, failSafeValue] :
        llvm::zip(thresholds, failSafeValues)) {
-    if (limit < threshold && value > failSafeValue) return failSafeValue;
+    if (limit < threshold && value > failSafeValue)
+      return failSafeValue;
   }
   return value;
 }
@@ -309,11 +311,13 @@
 
 /// The configurations below have been determined empirically.
 // TODO: Significantly improve these heuristics.
-static MatmulStrategy getMatmulConfig(
-    MLIRContext *context, const transform_ext::MatchedMatmulCaptures &captures,
-    const GPUModel &gpuModel) {
+static MatmulStrategy
+getMatmulConfig(MLIRContext *context,
+                const transform_ext::MatchedMatmulCaptures &captures,
+                const GPUModel &gpuModel) {
   MatmulStrategy strategy(context, captures);
-  if (strategy.cliOptionsSpecified) return strategy;
+  if (strategy.cliOptionsSpecified)
+    return strategy;
 
   auto maybeHardcodedConfiguration =
       applyKnownGoodMatmulConfigurations(captures, gpuModel);

diff --git a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/Strategies.h b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/Strategies.h
index 86364ed..0b44483 100644
--- a/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/Strategies.h
+++ b/compiler/src/iree/compiler/Codegen/TransformStrategies/GPU/Strategies.h

@@ -82,8 +82,8 @@
 void buildStagedReductionStrategy(ImplicitLocOpBuilder &b, Value variantH,
                                   const StagedReductionStrategy &strategy);
 
-}  // namespace gpu
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace gpu
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_TRANSFORM_DIALECT_STRATEGIES_GPU_STRATEGIES_H_
+#endif // IREE_COMPILER_CODEGEN_TRANSFORM_DIALECT_STRATEGIES_GPU_STRATEGIES_H_

diff --git a/compiler/src/iree/compiler/Codegen/Transforms/AffineMinDistributedSCFCanonicalization.cpp b/compiler/src/iree/compiler/Codegen/Transforms/AffineMinDistributedSCFCanonicalization.cpp
index 5ab6a2c..55719c8 100644
--- a/compiler/src/iree/compiler/Codegen/Transforms/AffineMinDistributedSCFCanonicalization.cpp
+++ b/compiler/src/iree/compiler/Codegen/Transforms/AffineMinDistributedSCFCanonicalization.cpp

@@ -43,7 +43,8 @@
   // Check if any of the dimensions is a ForOp or ParallelOp induction variable.
   for (auto dim : minOp.getDimOperands()) {
     auto ivArg = llvm::dyn_cast<BlockArgument>(dim);
-    if (!ivArg) continue;
+    if (!ivArg)
+      continue;
     Operation *containingOp = ivArg.getOwner()->getParentOp();
     auto forOp = dyn_cast_or_null<scf::ForOp>(containingOp);
     if (forOp && forOp.getInductionVar() == dim) {
@@ -54,7 +55,8 @@
       break;
     }
     auto parallelOp = dyn_cast_or_null<scf::ParallelOp>(containingOp);
-    if (!parallelOp) continue;
+    if (!parallelOp)
+      continue;
     for (auto [index, inductionVar] :
          llvm::enumerate(parallelOp.getInductionVars())) {
       if (inductionVar == dim) {
@@ -65,9 +67,11 @@
         break;
       }
     }
-    if (iv) break;
+    if (iv)
+      break;
   }
-  if (!iv) return false;
+  if (!iv)
+    return false;
   // Calculate the affine map representing `%ub - %iv`.
   AffineExpr ivDim;
   AffineExpr ubDim;
@@ -93,9 +97,11 @@
   // `dividend` or equal to `%ub - %iv`.
   for (AffineExpr result : minOp.getAffineMap().getResults()) {
     if (auto cst = result.dyn_cast<AffineConstantExpr>()) {
-      if (cst.getValue() <= 0 || cst.getValue() % dividend != 0) return false;
+      if (cst.getValue() <= 0 || cst.getValue() % dividend != 0)
+        return false;
     } else {
-      if (diffExp != result) return false;
+      if (diffExp != result)
+        return false;
     }
   }
   // Now check that for every value of the induction variable `%ub - %iv` is
@@ -118,11 +124,13 @@
   affine::canonicalizeMapAndOperands(&modMap, &ops);
   modMap = simplifyAffineMap(modMap);
   auto cst = modMap.getResult(0).dyn_cast<AffineConstantExpr>();
-  if (cst) return (cst.getValue() == 0);
+  if (cst)
+    return (cst.getValue() == 0);
   // If the map doesn't fold to 0 but simplifies to (d0 %n) with d0 an
   // affine.min, check if all the results of the affine.min's map are divisible
   // by `dividend`.
-  if (modMap.getResult(0) != mod) return false;
+  if (modMap.getResult(0) != mod)
+    return false;
   assert(ops.size() == 1);
   auto minOp = ops[0].getDefiningOp<affine::AffineMinOp>();
   return (minOp && affineMinOpDivisible(minOp, dividend));
@@ -144,10 +152,12 @@
       constantResult = cst.getValue();
     }
   }
-  if (constantResult == 0) return {};
+  if (constantResult == 0)
+    return {};
   // If afine.min map's results are all positive and divisible by
   // `constantResult` then it can be replaced by `constantResult`.
-  if (affineMinOpDivisible(minOp, constantResult)) return constantResult;
+  if (affineMinOpDivisible(minOp, constantResult))
+    return constantResult;
   return {};
 }
 
@@ -156,11 +166,12 @@
     : public mlir::OpRewritePattern<mlir::affine::AffineMinOp> {
   using OpRewritePattern<mlir::affine::AffineMinOp>::OpRewritePattern;
 
-  mlir::LogicalResult matchAndRewrite(
-      mlir::affine::AffineMinOp minOp,
-      mlir::PatternRewriter &rewriter) const override {
+  mlir::LogicalResult
+  matchAndRewrite(mlir::affine::AffineMinOp minOp,
+                  mlir::PatternRewriter &rewriter) const override {
     std::optional<int64_t> cst = foldAffineMin(minOp);
-    if (!cst) return failure();
+    if (!cst)
+      return failure();
     rewriter.replaceOpWithNewOp<arith::ConstantOp>(minOp,
                                                    rewriter.getIndexAttr(*cst));
     return success();
@@ -199,7 +210,7 @@
     (void)applyOpPatternsAndFold(minOps, frozenPatterns);
   }
 };
-}  // namespace
+} // namespace
 
 void populateAffineMinSCFCanonicalizationPattern(RewritePatternSet &patterns) {
   patterns.add<AffineMinDistributedSCFCanonicalizationPattern>(
@@ -210,5 +221,5 @@
   return std::make_unique<AffineMinDistributedSCFCanonicalizationPass>();
 });
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Transforms/RemoveSingleIterationLoop.cpp b/compiler/src/iree/compiler/Codegen/Transforms/RemoveSingleIterationLoop.cpp
index 2181402..db4062e 100644
--- a/compiler/src/iree/compiler/Codegen/Transforms/RemoveSingleIterationLoop.cpp
+++ b/compiler/src/iree/compiler/Codegen/Transforms/RemoveSingleIterationLoop.cpp

@@ -54,7 +54,8 @@
       for (unsigned dimIdx = 0; dimIdx < dims.size(); ++dimIdx) {
         Value dim = dims[dimIdx];
         auto minMax = getMinMaxExpr(dim, dims, symbols);
-        if (!minMax) continue;
+        if (!minMax)
+          continue;
         AffineExpr dimExpr = getAffineDimExpr(dimIdx, expr.getContext());
         LLVM_DEBUG(DBGS() << "Subst: " << dim << " @ " << dimExpr << "\n");
         LLVM_DEBUG(DBGS() << "Before: " << expr << "\n");
@@ -71,7 +72,8 @@
       for (unsigned symIdx = 0; symIdx < symbols.size(); ++symIdx) {
         Value sym = symbols[symIdx];
         auto minMax = getMinMaxExpr(sym, dims, symbols);
-        if (!minMax) continue;
+        if (!minMax)
+          continue;
         AffineExpr symExpr = getAffineSymbolExpr(symIdx, expr.getContext());
         LLVM_DEBUG(DBGS() << "Subst: " << sym << " @ " << symExpr << "\n");
         LLVM_DEBUG(DBGS() << "Before: " << expr << "\n");
@@ -128,7 +130,8 @@
   AffineMap simplifiedMap = substituteMin(map, dims, symbols, getMinMax);
   assert(simplifiedMap.getNumResults() == 1);
   if (auto cst = simplifiedMap.getResult(0).dyn_cast<AffineConstantExpr>()) {
-    if (cst.getValue() > 0) return true;
+    if (cst.getValue() > 0)
+      return true;
   }
   return false;
 }
@@ -153,7 +156,8 @@
   AffineMap simplifiedMap = substituteMin(map, dims, symbols, getMinMax);
   assert(simplifiedMap.getNumResults() == 1);
   if (auto cst = simplifiedMap.getResult(0).dyn_cast<AffineConstantExpr>()) {
-    if (cst.getValue() >= 0) return true;
+    if (cst.getValue() >= 0)
+      return true;
   }
   return false;
 }
@@ -184,16 +188,16 @@
     return success();
   }
 
- private:
+private:
   GetMinMaxExprFn getMinMax;
 };
 
-}  // namespace
+} // namespace
 
 void populateRemoveSingleIterationLoopPattern(RewritePatternSet &patterns,
                                               GetMinMaxExprFn getMinMaxFn) {
   patterns.add<SimplifyTrivialLoops>(patterns.getContext(), getMinMaxFn);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Transforms/Transforms.cpp b/compiler/src/iree/compiler/Codegen/Transforms/Transforms.cpp
index f64351e..e30e9f2 100644
--- a/compiler/src/iree/compiler/Codegen/Transforms/Transforms.cpp
+++ b/compiler/src/iree/compiler/Codegen/Transforms/Transforms.cpp

@@ -34,9 +34,11 @@
 static bool sliceFilter(Operation *op, ValueRange nonIndexComputationOperands,
                         Operation *baseOp) {
   for (auto val : nonIndexComputationOperands) {
-    if (op == val.getDefiningOp()) return false;
+    if (op == val.getDefiningOp())
+      return false;
   }
-  if (op->isProperAncestor(baseOp)) return false;
+  if (op->isProperAncestor(baseOp))
+    return false;
   return !isa<IREE::HAL::InterfaceConstantLoadOp>(op);
 }
 
@@ -84,26 +86,29 @@
   return clonedVals;
 }
 
-SliceAndDynamicDims cloneOffsetsSizesAndStrides(
-    OpBuilder &builder, IREE::Flow::DispatchTensorStoreOp storeOp) {
+SliceAndDynamicDims
+cloneOffsetsSizesAndStrides(OpBuilder &builder,
+                            IREE::Flow::DispatchTensorStoreOp storeOp) {
   return cloneOffsetsSizesAndStridesImpl(
       builder, storeOp, ValueRange{storeOp.getValue(), storeOp.getTarget()},
       storeOp.getMixedOffsets(), storeOp.getMixedSizes(),
       storeOp.getMixedStrides(), storeOp.getTargetDims());
 }
 
-SliceAndDynamicDims cloneOffsetsSizesAndStrides(
-    OpBuilder &builder, IREE::Flow::DispatchTensorLoadOp loadOp) {
+SliceAndDynamicDims
+cloneOffsetsSizesAndStrides(OpBuilder &builder,
+                            IREE::Flow::DispatchTensorLoadOp loadOp) {
   return cloneOffsetsSizesAndStridesImpl(
       builder, loadOp, ValueRange{loadOp.getSource()}, loadOp.getMixedOffsets(),
       loadOp.getMixedSizes(), loadOp.getMixedStrides(), loadOp.getSourceDims());
 }
 
 template <typename AllocLikeOpType>
-std::optional<Value> hoistOneStaticallyBoundAllocation(
-    func::FuncOp funcOp, OpBuilder &builder, Location loc,
-    MemRefType allocLikeType, ValueRange dynamicSizes,
-    std::optional<uint64_t> alignment) {
+std::optional<Value>
+hoistOneStaticallyBoundAllocation(func::FuncOp funcOp, OpBuilder &builder,
+                                  Location loc, MemRefType allocLikeType,
+                                  ValueRange dynamicSizes,
+                                  std::optional<uint64_t> alignment) {
   IntegerAttr alignmentAttr =
       alignment ? builder.getI64IntegerAttr(alignment.value()) : nullptr;
   // For static case just create a new allocation in the entry block of the same
@@ -170,8 +175,9 @@
 }
 
 template <typename AllocLikeOpType>
-std::optional<Value> hoistOneStaticallyBoundAllocation(
-    func::FuncOp funcOp, OpBuilder &builder, AllocLikeOpType allocLikeOp) {
+std::optional<Value>
+hoistOneStaticallyBoundAllocation(func::FuncOp funcOp, OpBuilder &builder,
+                                  AllocLikeOpType allocLikeOp) {
   OpBuilder::InsertionGuard guard(builder);
   builder.setInsertionPoint(allocLikeOp);
   return hoistOneStaticallyBoundAllocation<AllocLikeOpType>(
@@ -195,7 +201,8 @@
 
   // Collect all allocLikes that are hoistable.
   funcOp.walk([&](AllocLikeOpType allocLikeOp) {
-    if (allocLikeOp->getBlock() == &funcOp.getBody().front()) return;
+    if (allocLikeOp->getBlock() == &funcOp.getBody().front())
+      return;
     if (allocLikeOp.getDynamicSizes().empty()) {
       allocLikeOps.push_back(allocLikeOp);
       return;
@@ -214,7 +221,8 @@
     SmallVector<memref::DeallocOp> deallocOps;
     for (Operation *user : allocLikeOp->getUsers()) {
       auto dealloc = dyn_cast<memref::DeallocOp>(user);
-      if (dealloc) deallocOps.push_back(dealloc);
+      if (dealloc)
+        deallocOps.push_back(dealloc);
     }
 
     LLVM_DEBUG({
@@ -226,7 +234,8 @@
     });
     std::optional<Value> replacement =
         hoistOneStaticallyBoundAllocation(funcOp, rewriter, allocLikeOp);
-    if (!replacement) continue;
+    if (!replacement)
+      continue;
     LLVM_DEBUG({
       llvm::dbgs() << "Replacement : ";
       replacement->dump();
@@ -234,20 +243,23 @@
     Value replacementVal = replacement.value();
     rewriter.replaceOp(allocLikeOp, replacementVal);
 
-    for (memref::DeallocOp deallocOp : deallocOps) rewriter.eraseOp(deallocOp);
+    for (memref::DeallocOp deallocOp : deallocOps)
+      rewriter.eraseOp(deallocOp);
   }
 }
 
 /// Explicit instantiations for `hoistStaticallyBoundAllocationsInFunc` and
 /// dependent functions.
-template std::optional<Value> hoistOneStaticallyBoundAllocation<
-    memref::AllocOp>(func::FuncOp funcOp, OpBuilder &builder, Location loc,
-                     MemRefType allocLikeType, ValueRange dynamicSizes,
-                     std::optional<uint64_t> alignment);
-template std::optional<Value> hoistOneStaticallyBoundAllocation<
-    memref::AllocaOp>(func::FuncOp funcOp, OpBuilder &builder, Location loc,
-                      MemRefType allocLikeType, ValueRange dynamicSizes,
-                      std::optional<uint64_t> alignment);
+template std::optional<Value>
+hoistOneStaticallyBoundAllocation<memref::AllocOp>(
+    func::FuncOp funcOp, OpBuilder &builder, Location loc,
+    MemRefType allocLikeType, ValueRange dynamicSizes,
+    std::optional<uint64_t> alignment);
+template std::optional<Value>
+hoistOneStaticallyBoundAllocation<memref::AllocaOp>(
+    func::FuncOp funcOp, OpBuilder &builder, Location loc,
+    MemRefType allocLikeType, ValueRange dynamicSizes,
+    std::optional<uint64_t> alignment);
 template std::optional<Value>
 hoistOneStaticallyBoundAllocation<memref::AllocOp>(func::FuncOp funcOp,
                                                    OpBuilder &builder,
@@ -255,10 +267,12 @@
 template std::optional<Value>
 hoistOneStaticallyBoundAllocation<memref::AllocaOp>(
     func::FuncOp funcOp, OpBuilder &builder, memref::AllocaOp allocLikeOp);
-template void hoistStaticallyBoundAllocationsInFunc<memref::AllocOp>(
-    RewriterBase &rewriter, func::FuncOp funcOp);
-template void hoistStaticallyBoundAllocationsInFunc<memref::AllocaOp>(
-    RewriterBase &rewriter, func::FuncOp funcOp);
+template void
+hoistStaticallyBoundAllocationsInFunc<memref::AllocOp>(RewriterBase &rewriter,
+                                                       func::FuncOp funcOp);
+template void
+hoistStaticallyBoundAllocationsInFunc<memref::AllocaOp>(RewriterBase &rewriter,
+                                                        func::FuncOp funcOp);
 
 //===---------------------------------------------------------------------===//
 // Lowering `flow.dispatch.workgroup_count_from_slice` operation.
@@ -443,11 +457,13 @@
                                 PatternRewriter &rewriter) const override {
     std::optional<Value> reshapeSrc =
         getStaticReshapeOpSrc<TensorReshapeOp>(reshapeOp);
-    if (!reshapeSrc) return failure();
+    if (!reshapeSrc)
+      return failure();
 
     auto loadOp =
         reshapeSrc->template getDefiningOp<IREE::Flow::DispatchTensorLoadOp>();
-    if (!loadOp) return failure();
+    if (!loadOp)
+      return failure();
 
     // Make sure we are loading the full incoming subspan. Otherwise we cannot
     // simply adjust the subspan's resultant type later.
@@ -458,7 +474,8 @@
     auto subspanOp =
         loadOp.getSource()
             .template getDefiningOp<IREE::HAL::InterfaceBindingSubspanOp>();
-    if (!subspanOp) return failure();
+    if (!subspanOp)
+      return failure();
     assert(subspanOp.getDynamicDims().empty());
 
     auto tensorAccess =
@@ -524,12 +541,14 @@
                   cast<tensor::CollapseShapeOp>(reshapeOp))
             : getStaticReshapeOpSrc<tensor::ExpandShapeOp>(
                   cast<tensor::ExpandShapeOp>(reshapeOp));
-    if (!reshapeSrc) return failure();
+    if (!reshapeSrc)
+      return failure();
 
     auto subspanOp =
         storeOp.getTarget()
             .template getDefiningOp<IREE::HAL::InterfaceBindingSubspanOp>();
-    if (!subspanOp) return failure();
+    if (!subspanOp)
+      return failure();
     assert(subspanOp.getDynamicDims().empty());
 
     auto tensorAccess =
@@ -555,7 +574,7 @@
     return success();
   }
 };
-}  // namespace
+} // namespace
 
 void populateReshapeToInterfaceTensorPatterns(RewritePatternSet &patterns) {
   patterns.insert<FoldReshapeIntoInterfaceTensorLoad<tensor::CollapseShapeOp>,
@@ -616,7 +635,7 @@
     return eraseAlignmentOnlyDeadOp(rewriter, op);
   }
 };
-}  // namespace
+} // namespace
 
 void populateRemoveDeadMemAllocPatterns(RewritePatternSet &patterns) {
   patterns.insert<RemoveDeadMemAllocs>(patterns.getContext());
@@ -646,7 +665,8 @@
         // Skip the whole analysis if any user is a subview.
         // TODO: This could be extended if needed by recursively merging
         // liveness.
-        if (isa<memref::SubViewOp>(user)) return;
+        if (isa<memref::SubViewOp>(user))
+          return;
         if (group.liveness.count(user)) {
           aliasGroups.push_back(i);
           break;
@@ -684,12 +704,14 @@
   LLVM_DEBUG({
     for (size_t i = 0; i < groups.size(); i++) {
       llvm::dbgs() << "Alias group " << i << ":\n";
-      for (Operation *op : groups[i].allocs) op->dump();
+      for (Operation *op : groups[i].allocs)
+        op->dump();
     }
   });
 
   for (size_t i = 0; i < groups.size(); i++) {
-    if (groups[i].allocs.empty()) continue;
+    if (groups[i].allocs.empty())
+      continue;
     aliasGroups.push_back(std::move(groups[i].allocs));
   }
 }
@@ -704,7 +726,8 @@
 
 void packAllocs(OpBuilder &builder, func::FuncOp funcOp,
                 ArrayRef<AliasGroup> aliasGroups) {
-  if (aliasGroups.empty()) return;
+  if (aliasGroups.empty())
+    return;
   DataLayout dataLayout = DataLayout::closest(funcOp);
   builder.setInsertionPointToStart(&(*funcOp.getBody().begin()));
   int64_t maxAlloc = 0;
@@ -738,5 +761,5 @@
   }
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Transforms/Transforms.h b/compiler/src/iree/compiler/Codegen/Transforms/Transforms.h
index 8452886..8f2c95a 100644
--- a/compiler/src/iree/compiler/Codegen/Transforms/Transforms.h
+++ b/compiler/src/iree/compiler/Codegen/Transforms/Transforms.h

@@ -34,10 +34,12 @@
   SmallVector<OpFoldResult> strides;
   SmallVector<Value> dynamicDims;
 };
-SliceAndDynamicDims cloneOffsetsSizesAndStrides(
-    OpBuilder &builder, IREE::Flow::DispatchTensorStoreOp storeOp);
-SliceAndDynamicDims cloneOffsetsSizesAndStrides(
-    OpBuilder &builder, IREE::Flow::DispatchTensorLoadOp loadOp);
+SliceAndDynamicDims
+cloneOffsetsSizesAndStrides(OpBuilder &builder,
+                            IREE::Flow::DispatchTensorStoreOp storeOp);
+SliceAndDynamicDims
+cloneOffsetsSizesAndStrides(OpBuilder &builder,
+                            IREE::Flow::DispatchTensorLoadOp loadOp);
 
 /// Creates an allocation in the entry block of the function if the size is
 /// statically bounded. For a static allocation, it returns an allocation
@@ -46,10 +48,11 @@
 /// dynamic shape of the allocation. Returns std::nullopt if the method
 /// couldnt creat an allocation in the entry block.
 template <typename AllocLikeOpType>
-std::optional<Value> hoistOneStaticallyBoundAllocation(
-    func::FuncOp funcOp, OpBuilder &builder, Location loc,
-    MemRefType allocaType, ValueRange dynamicSizes,
-    std::optional<uint64_t> alignment);
+std::optional<Value>
+hoistOneStaticallyBoundAllocation(func::FuncOp funcOp, OpBuilder &builder,
+                                  Location loc, MemRefType allocaType,
+                                  ValueRange dynamicSizes,
+                                  std::optional<uint64_t> alignment);
 
 /// Hoists `allocaOp` to the entry block of the function if the size is
 /// statically bounded. For a static allocation, it returns an allocation
@@ -58,8 +61,9 @@
 /// dynamic shape of the allocation. The method returns a value, but
 /// does not replace the uses of the `allocaOp`.
 template <typename AllocLikeOpType>
-std::optional<Value> hoistOneStaticallyBoundAllocation(
-    func::FuncOp funcOp, OpBuilder &builder, AllocLikeOpType allocaOp);
+std::optional<Value>
+hoistOneStaticallyBoundAllocation(func::FuncOp funcOp, OpBuilder &builder,
+                                  AllocLikeOpType allocaOp);
 
 /// Traverse funcOp and try to hoist every AllocaOp to the entry block of the
 /// function if the size is statically bounded.
@@ -144,7 +148,7 @@
     ArrayRef<OpFoldResult> workgroupCount,
     int maxWorkgroupParallelDims = kNumMaxParallelDims);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_TRANSFORMS_TRANSFORMS_H_
+#endif // IREE_COMPILER_CODEGEN_TRANSFORMS_TRANSFORMS_H_

diff --git a/compiler/src/iree/compiler/Codegen/Utils/EncodingUtils.cpp b/compiler/src/iree/compiler/Codegen/Utils/EncodingUtils.cpp
index 8ba4384..26ae6b4 100644
--- a/compiler/src/iree/compiler/Codegen/Utils/EncodingUtils.cpp
+++ b/compiler/src/iree/compiler/Codegen/Utils/EncodingUtils.cpp

@@ -35,40 +35,41 @@
 std::optional<TensorEncoding> getEncoding(RankedTensorType tensorType) {
   auto encodingAttr =
       llvm::dyn_cast_if_present<EncodingAttr>(tensorType.getEncoding());
-  if (!encodingAttr) return std::nullopt;
+  if (!encodingAttr)
+    return std::nullopt;
   return encodingAttr.getEncoding().getValue();
 }
 
 std::optional<MatmulType> getMatmulType(TensorEncoding encoding) {
   switch (encoding) {
-    case TensorEncoding::MATMUL_F32F32F32_LHS:
-    case TensorEncoding::MATMUL_F32F32F32_RHS:
-    case TensorEncoding::MATMUL_F32F32F32_RESULT:
-      return MatmulType::F32F32F32;
-    case TensorEncoding::MATMUL_I8I8I32_LHS:
-    case TensorEncoding::MATMUL_I8I8I32_RHS:
-    case TensorEncoding::MATMUL_I8I8I32_RESULT:
-      return MatmulType::I8I8I32;
-    default:
-      return std::nullopt;
+  case TensorEncoding::MATMUL_F32F32F32_LHS:
+  case TensorEncoding::MATMUL_F32F32F32_RHS:
+  case TensorEncoding::MATMUL_F32F32F32_RESULT:
+    return MatmulType::F32F32F32;
+  case TensorEncoding::MATMUL_I8I8I32_LHS:
+  case TensorEncoding::MATMUL_I8I8I32_RHS:
+  case TensorEncoding::MATMUL_I8I8I32_RESULT:
+    return MatmulType::I8I8I32;
+  default:
+    return std::nullopt;
   }
 }
 
 std::optional<MatmulOperandRole> getMatmulOperandRole(TensorEncoding encoding) {
   switch (encoding) {
-    case TensorEncoding::MATMUL_F32F32F32_LHS:
-    case TensorEncoding::MATMUL_I8I8I32_LHS:
-      return MatmulOperandRole::LHS;
-    case TensorEncoding::MATMUL_F32F32F32_RHS:
-    case TensorEncoding::MATMUL_I8I8I32_RHS:
-      return MatmulOperandRole::RHS;
-    case TensorEncoding::MATMUL_F32F32F32_RESULT:
-    case TensorEncoding::MATMUL_I8I8I32_RESULT:
-      return MatmulOperandRole::RESULT;
-    default:
-      return std::nullopt;
+  case TensorEncoding::MATMUL_F32F32F32_LHS:
+  case TensorEncoding::MATMUL_I8I8I32_LHS:
+    return MatmulOperandRole::LHS;
+  case TensorEncoding::MATMUL_F32F32F32_RHS:
+  case TensorEncoding::MATMUL_I8I8I32_RHS:
+    return MatmulOperandRole::RHS;
+  case TensorEncoding::MATMUL_F32F32F32_RESULT:
+  case TensorEncoding::MATMUL_I8I8I32_RESULT:
+    return MatmulOperandRole::RESULT;
+  default:
+    return std::nullopt;
   }
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Utils/EncodingUtils.h b/compiler/src/iree/compiler/Codegen/Utils/EncodingUtils.h
index c6a2100..1eb914b 100644
--- a/compiler/src/iree/compiler/Codegen/Utils/EncodingUtils.h
+++ b/compiler/src/iree/compiler/Codegen/Utils/EncodingUtils.h

@@ -29,26 +29,25 @@
 
 // Constructs a MatmulType from separate operands element types, or returns
 // std::nullopt if no MatmulType enumeration value would match.
-std::optional<MatmulType> getMatmulType(Type lhsElementType,
-                                        Type rhsElementType,
-                                        Type resultElementType);
+std::optional<MatmulType>
+getMatmulType(Type lhsElementType, Type rhsElementType, Type resultElementType);
 
 // Helper to read the TensorEncoding from a TensorEncodingAttr on a TensorType.
 // Return std::nullopt if the TensorType does not have a TensorEncodingAttr.
-std::optional<IREE::LinalgExt::TensorEncoding> getEncoding(
-    RankedTensorType tensorType);
+std::optional<IREE::LinalgExt::TensorEncoding>
+getEncoding(RankedTensorType tensorType);
 
 // Reads a MatmulType from a TensorEncoding, or returns std::nullopt if no
 // MatmulType enumeration value would match.
-std::optional<MatmulType> getMatmulType(
-    IREE::LinalgExt::TensorEncoding encoding);
+std::optional<MatmulType>
+getMatmulType(IREE::LinalgExt::TensorEncoding encoding);
 
 // Reads a MatmulOperandRole from a TensorEncoding, or returns std::nullopt if
 // no MatmulOperandRole enumeration value would match.
-std::optional<MatmulOperandRole> getMatmulOperandRole(
-    IREE::LinalgExt::TensorEncoding encoding);
+std::optional<MatmulOperandRole>
+getMatmulOperandRole(IREE::LinalgExt::TensorEncoding encoding);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_SRC_IREE_COMPILER_CODEGEN_UTILS_ENCODINGUTILS_H_
+#endif // IREE_COMPILER_SRC_IREE_COMPILER_CODEGEN_UTILS_ENCODINGUTILS_H_

diff --git a/compiler/src/iree/compiler/Codegen/Utils/GPUUtils.cpp b/compiler/src/iree/compiler/Codegen/Utils/GPUUtils.cpp
index ac4051a..a6e7254 100644
--- a/compiler/src/iree/compiler/Codegen/Utils/GPUUtils.cpp
+++ b/compiler/src/iree/compiler/Codegen/Utils/GPUUtils.cpp

@@ -30,9 +30,10 @@
 // GPU processor IDs and sizes
 //===----------------------------------------------------------------------===//
 
-llvm::SmallVector<mlir::linalg::ProcInfo, 2> getGPUThreadIdsAndCounts(
-    mlir::OpBuilder &builder, mlir::Location loc, unsigned numDims,
-    llvm::ArrayRef<int64_t> workgroupSize) {
+llvm::SmallVector<mlir::linalg::ProcInfo, 2>
+getGPUThreadIdsAndCounts(mlir::OpBuilder &builder, mlir::Location loc,
+                         unsigned numDims,
+                         llvm::ArrayRef<int64_t> workgroupSize) {
   assert(numDims <= kNumGPUDims);
   llvm::SmallVector<mlir::linalg::ProcInfo, 2> procInfo(numDims);
   std::array<gpu::Dimension, kNumGPUDims> dimAttr{
@@ -48,9 +49,10 @@
   return procInfo;
 }
 
-llvm::SmallVector<mlir::linalg::ProcInfo, 2> getSubgroupIdsAndCounts(
-    mlir::OpBuilder &builder, mlir::Location loc, unsigned warpSize,
-    unsigned numDims, llvm::ArrayRef<int64_t> numSubgroups) {
+llvm::SmallVector<mlir::linalg::ProcInfo, 2>
+getSubgroupIdsAndCounts(mlir::OpBuilder &builder, mlir::Location loc,
+                        unsigned warpSize, unsigned numDims,
+                        llvm::ArrayRef<int64_t> numSubgroups) {
   assert(numDims <= kNumGPUDims);
   llvm::SmallVector<mlir::linalg::ProcInfo, 2> procInfo(numDims);
   std::array<gpu::Dimension, kNumGPUDims> dimAttr{
@@ -98,30 +100,35 @@
   // Verify that each dimension of the shape can be distributed on the
   // threads
   // For zero dim tensor, consider it's too small to access using all threads.
-  if (shape.size() == 0) return false;
+  if (shape.size() == 0)
+    return false;
   int64_t threadsAvailable = threadCount;
   for (const auto &[index, dim] : llvm::enumerate(llvm::reverse(shape))) {
     int64_t numElementPerThread = index == 0 ? vectorSize : 1;
     int64_t numThreads = dim / numElementPerThread;
-    if (numThreads == 0) return false;
+    if (numThreads == 0)
+      return false;
     if (numThreads > threadsAvailable) {
       // If there are no enough remaining threads to distribute the current
       // dimension, try to use all remaining threads. But we still need to make
       // sure all work can be distributed to these threads evenly.
-      if (numThreads % threadsAvailable != 0) return false;
+      if (numThreads % threadsAvailable != 0)
+        return false;
       numThreads = threadsAvailable;
     }
-    if (threadsAvailable % numThreads != 0) return false;
+    if (threadsAvailable % numThreads != 0)
+      return false;
     threadsAvailable = threadsAvailable / numThreads;
-    if (threadsAvailable == 1) break;
+    if (threadsAvailable == 1)
+      break;
   }
   return threadsAvailable == 1;
 }
 
 /// Pick an unrolling order that will allow tensorcore operation to reuse LHS
 /// register. This is needed to get good performance on sm_80 target.
-std::optional<SmallVector<int64_t>> gpuMmaUnrollOrder(
-    vector::ContractionOp contract) {
+std::optional<SmallVector<int64_t>>
+gpuMmaUnrollOrder(vector::ContractionOp contract) {
   SmallVector<int64_t> order;
   // First make reduction the outer dimensions.
   for (auto [index, iter] : llvm::enumerate(contract.getIteratorTypes())) {
@@ -160,14 +167,16 @@
   OpBuilder::InsertionGuard guard(builder);
 
   func::FuncOp funcOp = subview->getParentOfType<func::FuncOp>();
-  if (!funcOp) return std::nullopt;
+  if (!funcOp)
+    return std::nullopt;
 
   // The subview size bounds are expected to be constant; they specify the shape
   // of the allocation.
   SmallVector<int64_t, 2> shape;
   for (Value bound : sizeBounds) {
     APInt value;
-    if (!matchPattern(bound, m_ConstantInt(&value))) return std::nullopt;
+    if (!matchPattern(bound, m_ConstantInt(&value)))
+      return std::nullopt;
     shape.push_back(value.getSExtValue());
   }
 
@@ -200,8 +209,10 @@
     }
 
     auto fillOp = dyn_cast<linalg::FillOp>(prevOp);
-    if (!fillOp) break;
-    if (fillOp.output() != copyOp.getSource()) break;
+    if (!fillOp)
+      break;
+    if (fillOp.output() != copyOp.getSource())
+      break;
     // Move the fillOp and change the destination to the copy destination.
     fillOp->moveBefore(copyOp);
     fillOp.getOutputsMutable().assign(copyOp.getTarget());
@@ -240,8 +251,9 @@
 
 /// Propagate the shared memory copy into the consumer op if it's a fully
 /// parallel linalg.generic.
-static bool propagateCopySourceIntoConsumerGeneric(
-    memref::CopyOp copyOp, SmallVector<Operation *> &toDelete) {
+static bool
+propagateCopySourceIntoConsumerGeneric(memref::CopyOp copyOp,
+                                       SmallVector<Operation *> &toDelete) {
   // Look for a generic Op reading the copyOp target.
   Operation *nextOp = copyOp->getNextNode();
   while (nextOp) {
@@ -254,7 +266,8 @@
         !consumer.getMatchingIndexingMap(consumer.getDpsInitOperand(0))
              .isIdentity())
       break;
-    if (*consumer.getOutputs().begin() != copyOp.getTarget()) break;
+    if (*consumer.getOutputs().begin() != copyOp.getTarget())
+      break;
     insertInputValueIntoGeneric(copyOp.getSource(), consumer);
     toDelete.push_back(consumer);
     return true;
@@ -274,7 +287,8 @@
         toDelete.push_back(copyOp.getOperation());
     }
   });
-  for (Operation *op : toDelete) op->erase();
+  for (Operation *op : toDelete)
+    op->erase();
 }
 
 void insertBarriersAroundSharedMemoryCopy(func::FuncOp funcOp) {
@@ -391,35 +405,35 @@
                                           vector::CombiningKind combiningKind,
                                           Type type) {
   switch (combiningKind) {
-    case vector::CombiningKind::ADD:
-      return builder.getZeroAttr(type);
-    case vector::CombiningKind::MUL: {
-      if (type.isIntOrIndex()) {
-        return builder.getIntegerAttr(type, 1);
-      }
-      return builder.getFloatAttr(type, 1);
-    }
-    case vector::CombiningKind::MINUI:
-    case vector::CombiningKind::MINSI:
-      return builder.getIntegerAttr(type, std::numeric_limits<int64_t>::max());
-    case vector::CombiningKind::MAXUI:
-    case vector::CombiningKind::MAXSI:
-      return builder.getIntegerAttr(type, std::numeric_limits<int64_t>::min());
-    case vector::CombiningKind::AND:
+  case vector::CombiningKind::ADD:
+    return builder.getZeroAttr(type);
+  case vector::CombiningKind::MUL: {
+    if (type.isIntOrIndex()) {
       return builder.getIntegerAttr(type, 1);
-    case vector::CombiningKind::OR:
-    case vector::CombiningKind::XOR:
-      return builder.getZeroAttr(type);
-    case vector::CombiningKind::MINF: {
-      auto posInfApFloat = APFloat::getInf(
-          llvm::cast<FloatType>(type).getFloatSemantics(), /*Negative=*/false);
-      return builder.getFloatAttr(type, posInfApFloat);
     }
-    case vector::CombiningKind::MAXF: {
-      auto negInfApFloat = APFloat::getInf(
-          llvm::cast<FloatType>(type).getFloatSemantics(), /*Negative=*/true);
-      return builder.getFloatAttr(type, negInfApFloat);
-    }
+    return builder.getFloatAttr(type, 1);
+  }
+  case vector::CombiningKind::MINUI:
+  case vector::CombiningKind::MINSI:
+    return builder.getIntegerAttr(type, std::numeric_limits<int64_t>::max());
+  case vector::CombiningKind::MAXUI:
+  case vector::CombiningKind::MAXSI:
+    return builder.getIntegerAttr(type, std::numeric_limits<int64_t>::min());
+  case vector::CombiningKind::AND:
+    return builder.getIntegerAttr(type, 1);
+  case vector::CombiningKind::OR:
+  case vector::CombiningKind::XOR:
+    return builder.getZeroAttr(type);
+  case vector::CombiningKind::MINF: {
+    auto posInfApFloat = APFloat::getInf(
+        llvm::cast<FloatType>(type).getFloatSemantics(), /*Negative=*/false);
+    return builder.getFloatAttr(type, posInfApFloat);
+  }
+  case vector::CombiningKind::MAXF: {
+    auto negInfApFloat = APFloat::getInf(
+        llvm::cast<FloatType>(type).getFloatSemantics(), /*Negative=*/true);
+    return builder.getFloatAttr(type, negInfApFloat);
+  }
   }
   return TypedAttr();
 }
@@ -580,9 +594,11 @@
     VectorType sliceType;
     for (Operation *users : op->getUsers()) {
       auto extract = dyn_cast<vector::ExtractStridedSliceOp>(users);
-      if (!extract) return std::nullopt;
+      if (!extract)
+        return std::nullopt;
       auto vecType = llvm::cast<VectorType>(extract.getResult().getType());
-      if (sliceType && sliceType != vecType) return std::nullopt;
+      if (sliceType && sliceType != vecType)
+        return std::nullopt;
       sliceType = vecType;
     }
     return llvm::to_vector(sliceType.getShape());
@@ -591,7 +607,8 @@
     if (auto vecType = llvm::dyn_cast<VectorType>(op->getResultTypes()[0])) {
       // TODO: The condition for unrolling elementwise should be restricted
       // only to operations that need unrolling (connected to the contract).
-      if (vecType.getRank() < 2) return std::nullopt;
+      if (vecType.getRank() < 2)
+        return std::nullopt;
 
       // First check whether there is a slice to infer the shape from. This is
       // required for cases where the accumulator type differs from the input
@@ -600,12 +617,15 @@
       VectorType sliceType;
       for (Operation *users : op->getUsers()) {
         auto extract = dyn_cast<vector::ExtractStridedSliceOp>(users);
-        if (!extract) return std::nullopt;
+        if (!extract)
+          return std::nullopt;
         auto vecType = llvm::cast<VectorType>(extract.getResult().getType());
-        if (sliceType && sliceType != vecType) return std::nullopt;
+        if (sliceType && sliceType != vecType)
+          return std::nullopt;
         sliceType = vecType;
       }
-      if (sliceType) return llvm::to_vector(sliceType.getShape());
+      if (sliceType)
+        return llvm::to_vector(sliceType.getShape());
 
       // Else unroll for trailing elementwise.
       SmallVector<int64_t> nativeSize(vecType.getRank() - 2, 1);
@@ -621,34 +641,42 @@
 // getMmaNativeVectorSize
 //===----------------------------------------------------------------------===//
 /// Returns vector::ContractionOp operand's index where the result is used.
-static std::optional<int> getVectorContractOpOperandId(
-    vector::ContractionOp contractOp, OpResult result) {
-  if (contractOp.getLhs() == result) return 0;
-  if (contractOp.getRhs() == result) return 1;
-  if (contractOp.getAcc() == result) return 2;
+static std::optional<int>
+getVectorContractOpOperandId(vector::ContractionOp contractOp,
+                             OpResult result) {
+  if (contractOp.getLhs() == result)
+    return 0;
+  if (contractOp.getRhs() == result)
+    return 1;
+  if (contractOp.getAcc() == result)
+    return 2;
   return std::nullopt;
 }
 
 /// Returns vector::ContractionOp operand's index  where the
 /// vector::TransferReadOp is consumed either consumed directly or via
 /// vector::ExtractStridedSliceOp.
-static std::optional<int> getVectorContractOpOperandIdForVectorReadOp(
-    Operation *op) {
+static std::optional<int>
+getVectorContractOpOperandIdForVectorReadOp(Operation *op) {
   vector::ContractionOp contractOp;
 
   // Check if the vector::TransferReadOp is consumed directly by
   // vector::ContractionOp.
-  if (op->use_empty()) return std::nullopt;
+  if (op->use_empty())
+    return std::nullopt;
   Operation *firstLevelUser = *((op->getUsers()).begin());
-  if (!firstLevelUser) return std::nullopt;
+  if (!firstLevelUser)
+    return std::nullopt;
   if (auto contractOp = dyn_cast<vector::ContractionOp>(firstLevelUser))
     return getVectorContractOpOperandId(contractOp, op->getResult(0));
 
   // Check if the vector::TransferReadOp is consumed indirectly by
   // vector::ContractionOp. Only check until the second level of use-def chain.
-  if (firstLevelUser->use_empty()) return std::nullopt;
+  if (firstLevelUser->use_empty())
+    return std::nullopt;
   Operation *secondLevelUser = *((firstLevelUser->getUsers()).begin());
-  if (!secondLevelUser) return std::nullopt;
+  if (!secondLevelUser)
+    return std::nullopt;
   if (auto contractOp = dyn_cast<vector::ContractionOp>(secondLevelUser))
     return getVectorContractOpOperandId(contractOp,
                                         firstLevelUser->getResult(0));
@@ -794,9 +822,11 @@
         VectorType sliceType;
         for (Operation *users : op->getUsers()) {
           auto extract = dyn_cast<vector::ExtractStridedSliceOp>(users);
-          if (!extract) return std::nullopt;
+          if (!extract)
+            return std::nullopt;
           auto vecType = llvm::cast<VectorType>(extract.getResult().getType());
-          if (sliceType && sliceType != vecType) return std::nullopt;
+          if (sliceType && sliceType != vecType)
+            return std::nullopt;
           sliceType = vecType;
         }
         LLVM_DEBUG({
@@ -837,5 +867,5 @@
   return false;
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Utils/GPUUtils.h b/compiler/src/iree/compiler/Codegen/Utils/GPUUtils.h
index 221ef46..ff0172b 100644
--- a/compiler/src/iree/compiler/Codegen/Utils/GPUUtils.h
+++ b/compiler/src/iree/compiler/Codegen/Utils/GPUUtils.h

@@ -20,9 +20,9 @@
 // GPU processor IDs and sizes
 //===----------------------------------------------------------------------===//
 
-llvm::SmallVector<linalg::ProcInfo, 2> getGPUThreadIdsAndCounts(
-    OpBuilder &builder, Location loc, unsigned numDims,
-    llvm::ArrayRef<int64_t> workgroupSize);
+llvm::SmallVector<linalg::ProcInfo, 2>
+getGPUThreadIdsAndCounts(OpBuilder &builder, Location loc, unsigned numDims,
+                         llvm::ArrayRef<int64_t> workgroupSize);
 
 /// Computes subgroup ID and returns in (X, Y, Z) order.
 ///
@@ -31,9 +31,9 @@
 /// warp is full and we pick a workgroup size so that `workgroupSize.x %
 /// warpSize == 0`. This is why we can have warpId = { threadId.x / warpSize,
 /// threadId.y, threadId.z }.
-llvm::SmallVector<linalg::ProcInfo, 2> getSubgroupIdsAndCounts(
-    OpBuilder &builder, Location loc, unsigned warpSize, unsigned numDims,
-    llvm::ArrayRef<int64_t> numSubgroups);
+llvm::SmallVector<linalg::ProcInfo, 2>
+getSubgroupIdsAndCounts(OpBuilder &builder, Location loc, unsigned warpSize,
+                        unsigned numDims, llvm::ArrayRef<int64_t> numSubgroups);
 
 /// Returns the workgroup size associated to the funcOp entry point.
 std::array<int64_t, 3> getWorkgroupSize(func::FuncOp funcOp);
@@ -50,8 +50,8 @@
 
 /// Pick an unrolling order that will allow tensorcore operation to reuse LHS
 /// register. This is needed to get good performance on sm_80 target.
-std::optional<SmallVector<int64_t>> gpuMmaUnrollOrder(
-    vector::ContractionOp contract);
+std::optional<SmallVector<int64_t>>
+gpuMmaUnrollOrder(vector::ContractionOp contract);
 
 //===----------------------------------------------------------------------===//
 // GPU workgroup memory
@@ -109,7 +109,7 @@
 /// using shared memory when CodeGen towards the GPU.
 bool sharedMemTransposeFilter(AffineMap indexMap);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_UTILS_GPUUTILS_H_
+#endif // IREE_COMPILER_CODEGEN_UTILS_GPUUTILS_H_

diff --git a/compiler/src/iree/compiler/Codegen/Utils/LinalgOpInfo.cpp b/compiler/src/iree/compiler/Codegen/Utils/LinalgOpInfo.cpp
index 04025d2..2adcf61 100644
--- a/compiler/src/iree/compiler/Codegen/Utils/LinalgOpInfo.cpp
+++ b/compiler/src/iree/compiler/Codegen/Utils/LinalgOpInfo.cpp

@@ -73,8 +73,8 @@
 // TODO(dcaballe):
 //   * Consider transpose + reductions.
 //   * Consider input and output transposes.
-static SmallVector<OpOperand *> computeTransposeInfo(
-    LinalgOp linalgOp, TransposeMapFilter transposeMapFilter) {
+static SmallVector<OpOperand *>
+computeTransposeInfo(LinalgOp linalgOp, TransposeMapFilter transposeMapFilter) {
   SmallVector<OpOperand *> transposeOperands;
 
   // Reductions are not supported.
@@ -125,5 +125,5 @@
   dynamicTrait = computeDynamicInfo(linalgOp);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Utils/LinalgOpInfo.h b/compiler/src/iree/compiler/Codegen/Utils/LinalgOpInfo.h
index 3463353..1e57e49 100644
--- a/compiler/src/iree/compiler/Codegen/Utils/LinalgOpInfo.h
+++ b/compiler/src/iree/compiler/Codegen/Utils/LinalgOpInfo.h

@@ -23,7 +23,7 @@
 using TransposeMapFilter = std::function<bool(AffineMap map)>;
 
 class LinalgOpInfo {
- public:
+public:
   LinalgOpInfo(linalg::LinalgOp linalgOp);
   LinalgOpInfo(linalg::LinalgOp linalgOp,
                TransposeMapFilter transposeMapFilter);
@@ -36,7 +36,7 @@
     return transposeOperands;
   }
 
- private:
+private:
   void computeInfo(linalg::LinalgOp);
 
   TransposeMapFilter transposeMapFilter;
@@ -46,7 +46,7 @@
   SmallVector<OpOperand *> transposeOperands;
 };
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_COMMON_LINALGOPINFO_H_
+#endif // IREE_COMPILER_CODEGEN_COMMON_LINALGOPINFO_H_

diff --git a/compiler/src/iree/compiler/Codegen/Utils/LinkingUtils.cpp b/compiler/src/iree/compiler/Codegen/Utils/LinkingUtils.cpp
index 81f3892..824d8c2 100644
--- a/compiler/src/iree/compiler/Codegen/Utils/LinkingUtils.cpp
+++ b/compiler/src/iree/compiler/Codegen/Utils/LinkingUtils.cpp

@@ -13,8 +13,8 @@
 namespace mlir {
 namespace iree_compiler {
 
-SetVector<IREE::HAL::ExecutableTargetAttr> gatherExecutableTargets(
-    ArrayRef<IREE::HAL::ExecutableOp> executableOps) {
+SetVector<IREE::HAL::ExecutableTargetAttr>
+gatherExecutableTargets(ArrayRef<IREE::HAL::ExecutableOp> executableOps) {
   SetVector<IREE::HAL::ExecutableTargetAttr> result;
   for (auto executableOp : executableOps) {
     auto variantOps =
@@ -28,10 +28,10 @@
 
 // Renames |op| within |moduleOp| with a new name that is unique within both
 // |moduleOp| and |optionalSymbolTable| (if one is provided).
-static void renameWithDisambiguatedName(
-    Operation *op, Operation *moduleOp,
-    DenseMap<StringRef, Operation *> &targetSymbolMap,
-    SymbolTable *optionalSymbolTable) {
+static void
+renameWithDisambiguatedName(Operation *op, Operation *moduleOp,
+                            DenseMap<StringRef, Operation *> &targetSymbolMap,
+                            SymbolTable *optionalSymbolTable) {
   StringRef originalName = SymbolTable::getSymbolName(op).getValue();
 
   // Iteratively try suffixes until we find one that isn't used.
@@ -63,9 +63,9 @@
 //
 // Fails if a public symbol in |sourceModuleOp| conflicts with another public
 // symbol tracked in |targetSymbolMap|.
-static LogicalResult mergeModuleInto(
-    Operation *sourceModuleOp, Operation *targetModuleOp,
-    DenseMap<StringRef, Operation *> &targetSymbolMap) {
+static LogicalResult
+mergeModuleInto(Operation *sourceModuleOp, Operation *targetModuleOp,
+                DenseMap<StringRef, Operation *> &targetSymbolMap) {
   auto &sourceBlock = sourceModuleOp->getRegion(0).front();
   auto &targetBlock = targetModuleOp->getRegion(0).front();
   SymbolTable sourceSymbolTable(sourceModuleOp);
@@ -73,7 +73,8 @@
       llvm::map_to_vector<8>(sourceBlock, [&](Operation &op) { return &op; });
 
   for (auto &sourceOp : allOps) {
-    if (sourceOp->hasTrait<OpTrait::IsTerminator>()) continue;
+    if (sourceOp->hasTrait<OpTrait::IsTerminator>())
+      continue;
     if (auto symbolOp = dyn_cast<SymbolOpInterface>(sourceOp)) {
       auto symbolName = symbolOp.getName();
 
@@ -146,16 +147,19 @@
 // `@new_executable::@old_export` and an export update would then not match the
 // new/old mismatched ref. This means we have to do three walks over the entire
 // module in order to do the replacements; not great.
-static void replaceEntryPointUses(
-    mlir::ModuleOp moduleOp, const SymbolReplacements &symbolReplacements) {
+static void
+replaceEntryPointUses(mlir::ModuleOp moduleOp,
+                      const SymbolReplacements &symbolReplacements) {
   auto replaceSymbolRefs = [](Operation *rootOp,
                               const DenseMap<Attribute, Attribute> &map) {
     auto allUses = SymbolTable::getSymbolUses(rootOp);
-    if (!allUses) return;
+    if (!allUses)
+      return;
     for (auto use : *allUses) {
       auto oldAttr = use.getSymbolRef();
       auto newAttr = map.lookup(oldAttr);
-      if (!newAttr) continue;
+      if (!newAttr)
+        continue;
       auto newDict = use.getUser()->getAttrDictionary().replace(
           [&](Attribute attr) -> std::pair<Attribute, WalkResult> {
             if (attr == oldAttr) {
@@ -212,7 +216,8 @@
       // TODO(benvanik): allow for grouping when multi-versioning is supported?
       // We could, for example, link all aarch64 variants together and then
       // use function multi-versioning to let LLVM insert runtime switches.
-      if (variantOp.getTarget() != linkedTargetOp.getTarget()) continue;
+      if (variantOp.getTarget() != linkedTargetOp.getTarget())
+        continue;
 
       // Add any required object files to the set we will link in the target.
       if (auto objectsAttr = variantOp.getObjectsAttr()) {
@@ -293,5 +298,5 @@
   return success();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Utils/LinkingUtils.h b/compiler/src/iree/compiler/Codegen/Utils/LinkingUtils.h
index e082544..b28179b 100644
--- a/compiler/src/iree/compiler/Codegen/Utils/LinkingUtils.h
+++ b/compiler/src/iree/compiler/Codegen/Utils/LinkingUtils.h

@@ -15,8 +15,8 @@
 namespace iree_compiler {
 
 // Returns a uniqued set of all targets in |executableOps|.
-SetVector<IREE::HAL::ExecutableTargetAttr> gatherExecutableTargets(
-    ArrayRef<IREE::HAL::ExecutableOp> executableOps);
+SetVector<IREE::HAL::ExecutableTargetAttr>
+gatherExecutableTargets(ArrayRef<IREE::HAL::ExecutableOp> executableOps);
 
 // Links all executables for the current target found in |moduleOp| into
 // |linkedExecutableOp|. Functions will be cloned into |linkedModuleOp|.
@@ -28,7 +28,7 @@
     std::function<Operation *(mlir::ModuleOp moduleOp)> getInnerModuleFn,
     OpBuilder &builder);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_UTILS_LINKINGUTILS_H_
+#endif // IREE_COMPILER_CODEGEN_UTILS_LINKINGUTILS_H_

diff --git a/compiler/src/iree/compiler/Codegen/Utils/MarkerUtils.cpp b/compiler/src/iree/compiler/Codegen/Utils/MarkerUtils.cpp
index e62239e..68300ad 100644
--- a/compiler/src/iree/compiler/Codegen/Utils/MarkerUtils.cpp
+++ b/compiler/src/iree/compiler/Codegen/Utils/MarkerUtils.cpp

@@ -49,7 +49,8 @@
 StringRef getMarkerOrNull(Operation *op) {
   StringAttr attr = op->getAttrOfType<StringAttr>(
       IREE::LinalgExt::LinalgTransforms::kLinalgTransformMarker);
-  if (!attr) return "";
+  if (!attr)
+    return "";
   return attr.getValue();
 }
 
@@ -67,5 +68,5 @@
               StringAttr::get(op->getContext(), marker));
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Utils/MarkerUtils.h b/compiler/src/iree/compiler/Codegen/Utils/MarkerUtils.h
index 2057ede..3684c8c 100644
--- a/compiler/src/iree/compiler/Codegen/Utils/MarkerUtils.h
+++ b/compiler/src/iree/compiler/Codegen/Utils/MarkerUtils.h

@@ -59,7 +59,7 @@
 /// Sets a given marker on an operation.
 void setMarker(Operation *, StringRef);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_CODEGENUTILS_MARKERUTILS_H_
+#endif // IREE_COMPILER_CODEGEN_CODEGENUTILS_MARKERUTILS_H_

diff --git a/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp b/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp
index 1eb2e28..81c35c7 100644
--- a/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp
+++ b/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp

@@ -34,7 +34,8 @@
 
 FailureOr<IREE::HAL::ExecutableExportOp> getEntryPoint(func::FuncOp funcOp) {
   auto variantOp = funcOp->getParentOfType<IREE::HAL::ExecutableVariantOp>();
-  if (!variantOp) return failure();
+  if (!variantOp)
+    return failure();
 
   for (auto op : variantOp.getOps<IREE::HAL::ExecutableExportOp>()) {
     if (op.getSymName() == funcOp.getName()) {
@@ -44,8 +45,8 @@
   return failure();
 }
 
-FailureOr<IREE::HAL::ExecutableVariantOp> getExecutableVariantOp(
-    Operation *op) {
+FailureOr<IREE::HAL::ExecutableVariantOp>
+getExecutableVariantOp(Operation *op) {
   if (auto result = dyn_cast<IREE::HAL::ExecutableVariantOp>(op)) {
     return result;
   }
@@ -59,8 +60,8 @@
   return func.isPublic() && succeeded(getEntryPoint(func));
 }
 
-llvm::StringMap<IREE::HAL::ExecutableExportOp> getAllEntryPoints(
-    ModuleOp module) {
+llvm::StringMap<IREE::HAL::ExecutableExportOp>
+getAllEntryPoints(ModuleOp module) {
   auto variantOp = module->getParentOfType<IREE::HAL::ExecutableVariantOp>();
   llvm::StringMap<IREE::HAL::ExecutableExportOp> exportOps;
   for (auto op : variantOp.getOps<IREE::HAL::ExecutableExportOp>()) {
@@ -69,40 +70,53 @@
   return exportOps;
 }
 
-std::optional<StringAttr> getConfigStringAttr(
-    IREE::HAL::ExecutableTargetAttr targetAttr, StringRef stringAttr) {
-  if (!targetAttr) return std::nullopt;
+std::optional<StringAttr>
+getConfigStringAttr(IREE::HAL::ExecutableTargetAttr targetAttr,
+                    StringRef stringAttr) {
+  if (!targetAttr)
+    return std::nullopt;
   auto config = targetAttr.getConfiguration();
-  if (!config) return std::nullopt;
+  if (!config)
+    return std::nullopt;
   auto attr = config.getAs<StringAttr>(stringAttr);
-  if (!attr) return std::nullopt;
+  if (!attr)
+    return std::nullopt;
   return attr;
 }
 
-std::optional<IntegerAttr> getConfigIntegerAttr(
-    IREE::HAL::ExecutableTargetAttr targetAttr, StringRef integerAttr) {
-  if (!targetAttr) return std::nullopt;
+std::optional<IntegerAttr>
+getConfigIntegerAttr(IREE::HAL::ExecutableTargetAttr targetAttr,
+                     StringRef integerAttr) {
+  if (!targetAttr)
+    return std::nullopt;
   auto config = targetAttr.getConfiguration();
-  if (!config) return std::nullopt;
+  if (!config)
+    return std::nullopt;
   auto attr = config.getAs<IntegerAttr>(integerAttr);
-  if (!attr) return std::nullopt;
+  if (!attr)
+    return std::nullopt;
   return attr;
 }
 
-std::optional<BoolAttr> getConfigBoolAttr(
-    IREE::HAL::ExecutableTargetAttr targetAttr, StringRef integerAttr) {
-  if (!targetAttr) return std::nullopt;
+std::optional<BoolAttr>
+getConfigBoolAttr(IREE::HAL::ExecutableTargetAttr targetAttr,
+                  StringRef integerAttr) {
+  if (!targetAttr)
+    return std::nullopt;
   auto config = targetAttr.getConfiguration();
-  if (!config) return std::nullopt;
+  if (!config)
+    return std::nullopt;
   auto attr = config.getAs<BoolAttr>(integerAttr);
-  if (!attr) return std::nullopt;
+  if (!attr)
+    return std::nullopt;
   return attr;
 }
 
-std::optional<llvm::Triple> getTargetTriple(
-    IREE::HAL::ExecutableTargetAttr targetAttr) {
+std::optional<llvm::Triple>
+getTargetTriple(IREE::HAL::ExecutableTargetAttr targetAttr) {
   auto triple = getConfigStringAttr(targetAttr, "target_triple");
-  if (!triple) return std::nullopt;
+  if (!triple)
+    return std::nullopt;
   return llvm::Triple(triple.value().str());
 }
 
@@ -139,7 +153,8 @@
 
 bool isReadOnly(Value v) {
   Operation *definingOp = v.getDefiningOp();
-  if (!definingOp) return false;
+  if (!definingOp)
+    return false;
   return TypeSwitch<Operation *, bool>(definingOp)
       .Case<arith::ConstantOp>(
           [&](arith::ConstantOp constantOp) { return true; })
@@ -164,7 +179,8 @@
 template <typename T>
 static AffineExpr getAffineExprOfType(ArrayRef<AffineExpr> exprs) {
   for (auto expr : exprs) {
-    if (expr.isa<T>()) return expr;
+    if (expr.isa<T>())
+      return expr;
   }
   return nullptr;
 }
@@ -195,8 +211,9 @@
   }
   return nullptr;
 }
-static SmallVector<Value> getValuesForDimsOrSymbols(
-    affine::AffineApplyOp applyOp, ArrayRef<AffineExpr> exprs) {
+static SmallVector<Value>
+getValuesForDimsOrSymbols(affine::AffineApplyOp applyOp,
+                          ArrayRef<AffineExpr> exprs) {
   SmallVector<Value> vals;
   for (auto expr : exprs) {
     vals.push_back(getValueForDimOrSymbol(applyOp, expr));
@@ -215,7 +232,8 @@
 }
 template <typename T1, typename T2, typename... T3>
 static std::optional<unsigned> getDimension(Operation *op) {
-  if (!op) return std::nullopt;
+  if (!op)
+    return std::nullopt;
   if (auto dimension = getDimension<T1>(op)) {
     return dimension;
   }
@@ -228,11 +246,13 @@
 /// returns the dimension.  If `refDimension` is passed checks if the dimension
 /// matches the given value.
 template <typename... T>
-static std::optional<unsigned> checkDimensions(
-    ArrayRef<Value> vals, std::optional<unsigned> refDimension = std::nullopt) {
+static std::optional<unsigned>
+checkDimensions(ArrayRef<Value> vals,
+                std::optional<unsigned> refDimension = std::nullopt) {
   for (auto v : vals) {
     auto currDimension = getDimension<T...>(v.getDefiningOp());
-    if (!currDimension) return std::nullopt;
+    if (!currDimension)
+      return std::nullopt;
     if (refDimension) {
       if (refDimension.value() != currDimension.value()) {
         return std::nullopt;
@@ -250,7 +270,7 @@
 /// hal.interface.workgroup.id or hal.interface.workgroup.size.
 class LowerBoundExprVisitor
     : public AffineExprVisitor<LowerBoundExprVisitor, LogicalResult> {
- public:
+public:
   LowerBoundExprVisitor(affine::AffineApplyOp applyOp,
                         LoopTilingAndDistributionInfo &loopInfo)
       : applyOp(applyOp), loopInfo(loopInfo) {}
@@ -326,7 +346,7 @@
     return success();
   }
 
- private:
+private:
   affine::AffineApplyOp applyOp;
   LoopTilingAndDistributionInfo &loopInfo;
 };
@@ -337,7 +357,7 @@
 /// operation.
 class StepExprVisitor
     : public AffineExprVisitor<StepExprVisitor, LogicalResult> {
- public:
+public:
   StepExprVisitor(affine::AffineApplyOp applyOp,
                   LoopTilingAndDistributionInfo &loopInfo)
       : applyOp(applyOp), loopInfo(loopInfo) {}
@@ -425,7 +445,7 @@
     return success();
   }
 
- private:
+private:
   LogicalResult processSentinel(AffineExpr e,
                                 SmallVectorImpl<AffineExpr> &sentinels) {
     if (isaAffineExprOfType<AffineDimExpr, AffineSymbolExpr>(e)) {
@@ -445,7 +465,7 @@
   affine::AffineApplyOp applyOp;
   LoopTilingAndDistributionInfo &loopInfo;
 };
-}  // namespace
+} // namespace
 
 template <typename OpTy>
 static std::optional<unsigned> getInterfaceWorkgroupOpDim(Value value) {
@@ -468,8 +488,8 @@
 ///     affine_map<(d0)[s0, s1] -> (d0 * s0 * s1)>(%step)[%id, %size]
 ///   scf.for %iv = %offset to %ub step %new_step { ... }
 /// ```
-std::optional<LoopTilingAndDistributionInfo> isTiledAndDistributedLoop(
-    scf::ForOp forOp) {
+std::optional<LoopTilingAndDistributionInfo>
+isTiledAndDistributedLoop(scf::ForOp forOp) {
   LoopTilingAndDistributionInfo loopInfo;
   loopInfo.loop = forOp;
   loopInfo.untiledUpperBound = getAsOpFoldResult(forOp.getUpperBound());
@@ -492,7 +512,8 @@
       countDim = ifx.getDimIndex();
     }
 
-    if (!idDim || !countDim) return std::nullopt;
+    if (!idDim || !countDim)
+      return std::nullopt;
 
     Builder b(forOp.getContext());
     loopInfo.untiledLowerBound = b.getIndexAttr(0);
@@ -528,8 +549,8 @@
   return computeOps;
 }
 
-SmallVector<LoopTilingAndDistributionInfo> getTiledAndDistributedLoopInfo(
-    func::FuncOp funcOp) {
+SmallVector<LoopTilingAndDistributionInfo>
+getTiledAndDistributedLoopInfo(func::FuncOp funcOp) {
   SmallVector<LoopTilingAndDistributionInfo> info;
   funcOp.walk([&](scf::ForOp forOp) {
     if (auto tiledLoopInfo = isTiledAndDistributedLoop(forOp)) {
@@ -578,12 +599,13 @@
     const SmallVector<int64_t> &tileSizes,
     linalg::DistributionMethod distributionMethod,
     int32_t maxWorkgroupParallelDims) {
-  return {[&tileSizes, distributionMethod, maxWorkgroupParallelDims](
-              OpBuilder &builder, Location loc,
-              ArrayRef<Range> parallelLoopRanges) {
+  return {[&tileSizes, distributionMethod,
+           maxWorkgroupParallelDims](OpBuilder &builder, Location loc,
+                                     ArrayRef<Range> parallelLoopRanges) {
     SmallVector<int64_t> nonZeroTileSizes;
     for (int64_t size : tileSizes) {
-      if (size != 0) nonZeroTileSizes.push_back(size);
+      if (size != 0)
+        nonZeroTileSizes.push_back(size);
     }
     auto numParallelDims = parallelLoopRanges.size();
 
@@ -660,8 +682,9 @@
 
 /// Replaces a `use` with the `replacement` for cases where a simple substition
 /// might lead to verification errors.
-static std::optional<SmallVector<Value>> replaceNonTrivialUse(
-    RewriterBase &rewriter, Location loc, OpOperand &use, Value replacement) {
+static std::optional<SmallVector<Value>>
+replaceNonTrivialUse(RewriterBase &rewriter, Location loc, OpOperand &use,
+                     Value replacement) {
   Operation *user = use.getOwner();
   OpBuilder::InsertionGuard guard(rewriter);
   rewriter.setInsertionPoint(user);
@@ -834,10 +857,12 @@
 SmallVector<int64_t> getStaticNumWorkgroups(func::FuncOp funcOp) {
   SmallVector<int64_t> result;
   FailureOr<IREE::HAL::ExecutableExportOp> exportOp = getEntryPoint(funcOp);
-  if (failed(exportOp)) return result;
+  if (failed(exportOp))
+    return result;
 
   Block *body = exportOp->getWorkgroupCountBody();
-  if (!body) return result;
+  if (!body)
+    return result;
 
   auto returnOp = cast<IREE::HAL::ReturnOp>(body->getTerminator());
   assert(returnOp.getNumOperands() == 3);
@@ -854,5 +879,5 @@
   return result;
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/Utils/Utils.h b/compiler/src/iree/compiler/Codegen/Utils/Utils.h
index f14d980..ee84b34 100644
--- a/compiler/src/iree/compiler/Codegen/Utils/Utils.h
+++ b/compiler/src/iree/compiler/Codegen/Utils/Utils.h

@@ -34,8 +34,8 @@
 bool isEntryPoint(func::FuncOp func);
 
 /// Returns a map from function symbol name to corresponding entry point op.
-llvm::StringMap<IREE::HAL::ExecutableExportOp> getAllEntryPoints(
-    ModuleOp module);
+llvm::StringMap<IREE::HAL::ExecutableExportOp>
+getAllEntryPoints(ModuleOp module);
 
 /// Returns the entry point op for the `funcOp`. Returns `nullptr` on failure.
 FailureOr<IREE::HAL::ExecutableExportOp> getEntryPoint(func::FuncOp funcOp);
@@ -46,22 +46,25 @@
 
 /// Returns the StringAttr with the name `stringAttr` in the `targetAttr`, if
 /// found.
-std::optional<StringAttr> getConfigStringAttr(
-    IREE::HAL::ExecutableTargetAttr targetAttr, StringRef stringAttr);
+std::optional<StringAttr>
+getConfigStringAttr(IREE::HAL::ExecutableTargetAttr targetAttr,
+                    StringRef stringAttr);
 
 /// Returns the IntegerAttr with the name `integerAttr` in the `targetAttr`, if
 /// found.
-std::optional<IntegerAttr> getConfigIntegerAttr(
-    IREE::HAL::ExecutableTargetAttr targetAttr, StringRef integerAttr);
+std::optional<IntegerAttr>
+getConfigIntegerAttr(IREE::HAL::ExecutableTargetAttr targetAttr,
+                     StringRef integerAttr);
 
 /// Returns the BoolAttr with the name `integerAttr` in the `targetAttr`, if
 /// found.
-std::optional<BoolAttr> getConfigBoolAttr(
-    IREE::HAL::ExecutableTargetAttr targetAttr, StringRef integerAttr);
+std::optional<BoolAttr>
+getConfigBoolAttr(IREE::HAL::ExecutableTargetAttr targetAttr,
+                  StringRef integerAttr);
 
 /// Returns the LLVM Target triple associated with the `targetAttr`, if set.
-std::optional<llvm::Triple> getTargetTriple(
-    IREE::HAL::ExecutableTargetAttr targetAttr);
+std::optional<llvm::Triple>
+getTargetTriple(IREE::HAL::ExecutableTargetAttr targetAttr);
 
 /// Returns the target architecture name, in IREE_ARCH convention, from the
 /// given target triple.
@@ -147,12 +150,12 @@
 
 /// If the given `forOp` is a tiled and distributed loop, returns its tiling and
 /// distribution information.
-std::optional<LoopTilingAndDistributionInfo> isTiledAndDistributedLoop(
-    scf::ForOp forOp);
+std::optional<LoopTilingAndDistributionInfo>
+isTiledAndDistributedLoop(scf::ForOp forOp);
 
 /// Collects information about loops matching tiled+distribute pattern.
-SmallVector<LoopTilingAndDistributionInfo> getTiledAndDistributedLoopInfo(
-    func::FuncOp funcOp);
+SmallVector<LoopTilingAndDistributionInfo>
+getTiledAndDistributedLoopInfo(func::FuncOp funcOp);
 
 Operation *createLinalgCopyOp(OpBuilder &b, Location loc, Value from, Value to,
                               ArrayRef<NamedAttribute> attributes = {});
@@ -186,7 +189,7 @@
 void sinkOpsInCFG(const SmallVector<Operation *> &allocs,
                   DominanceInfo &dominators);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_UTILS_UTILS_H_
+#endif // IREE_COMPILER_CODEGEN_UTILS_UTILS_H_

diff --git a/compiler/src/iree/compiler/Codegen/VMVX/EncodingInfo.h b/compiler/src/iree/compiler/Codegen/VMVX/EncodingInfo.h
index 7b2da6f..1a52720 100644
--- a/compiler/src/iree/compiler/Codegen/VMVX/EncodingInfo.h
+++ b/compiler/src/iree/compiler/Codegen/VMVX/EncodingInfo.h

@@ -13,7 +13,7 @@
 namespace iree_compiler {
 // A placeholder for chooseDynamicEncodingInfoVMVXMicrokernels. See
 // Common/EncodingInfo.h for more details.
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_VMVX_ENCODINGINFO_H_
+#endif // IREE_COMPILER_CODEGEN_VMVX_ENCODINGINFO_H_

diff --git a/compiler/src/iree/compiler/Codegen/VMVX/LowerLinalgMicrokernels.cpp b/compiler/src/iree/compiler/Codegen/VMVX/LowerLinalgMicrokernels.cpp
index 6924815..2be06ad 100644
--- a/compiler/src/iree/compiler/Codegen/VMVX/LowerLinalgMicrokernels.cpp
+++ b/compiler/src/iree/compiler/Codegen/VMVX/LowerLinalgMicrokernels.cpp

@@ -99,7 +99,7 @@
   }
 
   ArrayRef<int64_t> sizes = type.getShape();
-  assert(rank >= 2);  // Ensured by above early return.
+  assert(rank >= 2); // Ensured by above early return.
   if (strides[rank - 1] != 1) {
     return false;
   }
@@ -145,7 +145,7 @@
   /// with element-based addressing.
   Value castToLinear(Location loc, OpBuilder &builder) { return baseBuffer; }
 
- private:
+private:
   // The base !util.buffer
   Value baseBuffer;
   friend class StridedBufferAnalysis;
@@ -160,7 +160,7 @@
 /// SubviewOps to some memref with an identity layout (i.e. not offsets/strides
 /// applied).
 class StridedBufferAnalysis {
- public:
+public:
   StridedBufferAnalysis(Value buffer) : buffer(buffer) {}
 
   // Whether analysis was successful.
@@ -180,7 +180,8 @@
 
   StridedBufferDescriptor &getDesc(OpBuilder &builder) {
     assert(isValid() && "invalid StridedBufferAnalysis");
-    if (desc) return *desc;
+    if (desc)
+      return *desc;
 
     OpBuilder::InsertionGuard guard(builder);
     builder.setInsertionPointAfterValue(buffer);
@@ -208,7 +209,7 @@
     return *desc;
   }
 
- private:
+private:
   Value buffer;
   std::optional<StridedBufferDescriptor> desc;
 };
@@ -243,8 +244,7 @@
 
   BinaryEmitter(Descriptor operand0, Descriptor operand1, Descriptor result,
                 OpSelection selection)
-      : operands(std::make_pair(operand0, operand1)),
-        result(result),
+      : operands(std::make_pair(operand0, operand1)), result(result),
         selection(selection) {}
 
   bool isProjectedPermutation() {
@@ -261,7 +261,8 @@
   LogicalResult initialize(Location loc, PatternRewriter &rewriter) {
     if (!isProjectedPermutation())
       return rewriter.notifyMatchFailure(loc, "not projected permutation");
-    if (maxRank() > 2) return rewriter.notifyMatchFailure(loc, "rank > 2");
+    if (maxRank() > 2)
+      return rewriter.notifyMatchFailure(loc, "rank > 2");
     if (!operands.first.bufferAnal.isValid() ||
         !operands.second.bufferAnal.isValid() || !result.bufferAnal.isValid()) {
       return rewriter.notifyMatchFailure(loc,
@@ -308,26 +309,26 @@
     leftPadToRank(loc, params.sizes, 2, 1, rewriter);
 
     switch (selection.opType) {
-      case OpType::GenericBinary: {
-        rewriter.create<IREE::VMVX::BinaryOp>(
-            loc, rewriter.getStringAttr(selection.opcode),
-            // LHS
-            params.in0Buffer, operands.first.bufferDesc->offset,
-            params.in0Strides,
-            // RHS
-            params.in1Buffer, operands.second.bufferDesc->offset,
-            params.in1Strides,
-            // OUT
-            params.outBuffer, result.bufferDesc->offset, params.outStrides,
-            // Sizes
-            params.sizes,
-            // Attributes
-            operands.first.bufferDesc->getElementTypeAttr());
+    case OpType::GenericBinary: {
+      rewriter.create<IREE::VMVX::BinaryOp>(
+          loc, rewriter.getStringAttr(selection.opcode),
+          // LHS
+          params.in0Buffer, operands.first.bufferDesc->offset,
+          params.in0Strides,
+          // RHS
+          params.in1Buffer, operands.second.bufferDesc->offset,
+          params.in1Strides,
+          // OUT
+          params.outBuffer, result.bufferDesc->offset, params.outStrides,
+          // Sizes
+          params.sizes,
+          // Attributes
+          operands.first.bufferDesc->getElementTypeAttr());
 
-        break;
-      }
-      default:
-        assert(false && "unhandled OpType");
+      break;
+    }
+    default:
+      assert(false && "unhandled OpType");
     }
   }
 };
@@ -373,7 +374,8 @@
   LogicalResult initialize(Location loc, PatternRewriter &rewriter) {
     if (!isProjectedPermutation())
       return rewriter.notifyMatchFailure(loc, "not projected permutation");
-    if (maxRank() > 2) return rewriter.notifyMatchFailure(loc, "rank > 2");
+    if (maxRank() > 2)
+      return rewriter.notifyMatchFailure(loc, "rank > 2");
     if (!operand.bufferAnal.isValid() || !result.bufferAnal.isValid()) {
       return rewriter.notifyMatchFailure(loc,
                                          "could not compute buffer descriptor");
@@ -410,22 +412,22 @@
     leftPadToRank(loc, params.sizes, 2, 1, rewriter);
 
     switch (selection.opType) {
-      case OpType::GenericUnary: {
-        rewriter.create<IREE::VMVX::UnaryOp>(
-            loc, rewriter.getStringAttr(selection.opcode),
-            // IN
-            params.inBuffer, operand.bufferDesc->offset, params.inStrides,
-            // OUT
-            params.outBuffer, result.bufferDesc->offset, params.outStrides,
-            // Sizes
-            params.sizes,
-            // Attributes
-            operand.bufferDesc->getElementTypeAttr());
+    case OpType::GenericUnary: {
+      rewriter.create<IREE::VMVX::UnaryOp>(
+          loc, rewriter.getStringAttr(selection.opcode),
+          // IN
+          params.inBuffer, operand.bufferDesc->offset, params.inStrides,
+          // OUT
+          params.outBuffer, result.bufferDesc->offset, params.outStrides,
+          // Sizes
+          params.sizes,
+          // Attributes
+          operand.bufferDesc->getElementTypeAttr());
 
-        break;
-      }
-      default:
-        assert(false && "unhandled OpType");
+      break;
+    }
+    default:
+      assert(false && "unhandled OpType");
     }
   }
 };
@@ -465,7 +467,8 @@
   LogicalResult initialize(Location loc, PatternRewriter &rewriter) {
     if (!isProjectedPermutation())
       return rewriter.notifyMatchFailure(loc, "not projected permutation");
-    if (maxRank() > 2) return rewriter.notifyMatchFailure(loc, "rank > 2");
+    if (maxRank() > 2)
+      return rewriter.notifyMatchFailure(loc, "rank > 2");
 
     // Initialize buffer descriptors.
     for (auto &copy : copies) {
@@ -529,9 +532,11 @@
                                 PatternRewriter &rewriter) const override {
     auto &children = op.getBlock()->getOperations();
     // Only match two children (op + yield).
-    if (children.size() != 2) return failure();
+    if (children.size() != 2)
+      return failure();
     // Only match parallel loops.
-    if (op.getNumParallelLoops() != op.getNumLoops()) return failure();
+    if (op.getNumParallelLoops() != op.getNumLoops())
+      return failure();
 
     // Match:
     //   %0 = someop %arg2, %arg3
@@ -546,7 +551,8 @@
         llvm::dyn_cast<BlockArgument>(binaryOp->getOperands()[0]);
     BlockArgument operandScalar1 =
         llvm::dyn_cast<BlockArgument>(binaryOp->getOperands()[1]);
-    if (!operandScalar0 || !operandScalar1) return failure();
+    if (!operandScalar0 || !operandScalar1)
+      return failure();
 
     // Construct the emitter and start lowering.
     // Note that the operands may map to an out if the aliasing is safe,
@@ -594,7 +600,8 @@
     // Select the op to lower to and configure the emitter.
     // Emit from the iree_ukernel_x32b_opcode_t table.
     Type resultType = binaryOp->getResult(0).getType();
-    if (!resultType.isIntOrFloat()) return failure();
+    if (!resultType.isIntOrFloat())
+      return failure();
     std::optional<BinaryEmitter> emitter =
         TypeSwitch<Operation *, std::optional<BinaryEmitter>>(binaryOp)
             .Case([&](arith::AddFOp op) -> std::optional<BinaryEmitter> {
@@ -687,7 +694,8 @@
     if (!emitter) {
       return rewriter.notifyMatchFailure(op, "unrecognized binary op");
     }
-    if (failed(emitter->initialize(op.getLoc(), rewriter))) return failure();
+    if (failed(emitter->initialize(op.getLoc(), rewriter)))
+      return failure();
 
     emitter->emit(op.getLoc(), rewriter);
     rewriter.eraseOp(op);
@@ -704,9 +712,11 @@
                                 PatternRewriter &rewriter) const override {
     auto &children = op.getBlock()->getOperations();
     // Only match two children (op + yield).
-    if (children.size() != 2) return failure();
+    if (children.size() != 2)
+      return failure();
     // Only match parallel loops.
-    if (op.getNumParallelLoops() != op.getNumLoops()) return failure();
+    if (op.getNumParallelLoops() != op.getNumLoops())
+      return failure();
 
     // Match:
     //   %0 = someop %arg2
@@ -719,7 +729,8 @@
     }
     BlockArgument operandScalar0 =
         llvm::dyn_cast<BlockArgument>(unaryOp->getOperands()[0]);
-    if (!operandScalar0) return failure();
+    if (!operandScalar0)
+      return failure();
 
     // Construct the emitter and start lowering.
     // Note that the operands may map to an out if the aliasing is safe,
@@ -747,7 +758,8 @@
     // Select the op to lower to and configure the emitter.
     // Emit from the iree_ukernel_x32b_opcode_t table.
     Type resultType = unaryOp->getResult(0).getType();
-    if (!resultType.isIntOrFloat()) return failure();
+    if (!resultType.isIntOrFloat())
+      return failure();
     std::optional<UnaryEmitter> emitter =
         TypeSwitch<Operation *, std::optional<UnaryEmitter>>(unaryOp)
             .Case([&](math::AbsFOp op) -> std::optional<UnaryEmitter> {
@@ -805,7 +817,8 @@
     if (!emitter) {
       return rewriter.notifyMatchFailure(op, "unrecognized unary op");
     }
-    if (failed(emitter->initialize(op.getLoc(), rewriter))) return failure();
+    if (failed(emitter->initialize(op.getLoc(), rewriter)))
+      return failure();
 
     emitter->emit(op.getLoc(), rewriter);
     rewriter.eraseOp(op);
@@ -822,9 +835,11 @@
                                 PatternRewriter &rewriter) const override {
     auto &children = op.getBlock()->getOperations();
     // Only match one child (yield).
-    if (children.size() != 1) return failure();
+    if (children.size() != 1)
+      return failure();
     // Only match parallel loops.
-    if (op.getNumParallelLoops() != op.getNumLoops()) return failure();
+    if (op.getNumParallelLoops() != op.getNumLoops())
+      return failure();
 
     // Presumed to be a yield terminator: configure the emitter.
     CopyEmitter emitter;
@@ -845,7 +860,8 @@
       }
     }
 
-    if (failed(emitter.initialize(op.getLoc(), rewriter))) return failure();
+    if (failed(emitter.initialize(op.getLoc(), rewriter)))
+      return failure();
     emitter.emit(op.getLoc(), rewriter);
     rewriter.eraseOp(op);
     return success();
@@ -903,7 +919,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 class VMVXLowerLinalgMicrokernelsPass
     : public VMVXLowerLinalgMicrokernelsBase<VMVXLowerLinalgMicrokernelsPass> {
@@ -941,5 +957,5 @@
   return std::make_unique<VMVXLowerLinalgMicrokernelsPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/VMVX/Passes.cpp b/compiler/src/iree/compiler/Codegen/VMVX/Passes.cpp
index 5e21cf6..0abfedb 100644
--- a/compiler/src/iree/compiler/Codegen/VMVX/Passes.cpp
+++ b/compiler/src/iree/compiler/Codegen/VMVX/Passes.cpp

@@ -29,5 +29,5 @@
           createVMVXAssignConstantOrdinalsPass());
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/VMVX/VMVXAssignConstantOrdinals.cpp b/compiler/src/iree/compiler/Codegen/VMVX/VMVXAssignConstantOrdinals.cpp
index 2bf812f..f3f6702 100644
--- a/compiler/src/iree/compiler/Codegen/VMVX/VMVXAssignConstantOrdinals.cpp
+++ b/compiler/src/iree/compiler/Codegen/VMVX/VMVXAssignConstantOrdinals.cpp

@@ -22,11 +22,13 @@
 
     // Ignore non-VMVX variants.
     // TODO(benvanik): a way to nest this in the pipeline via dynamic passes.
-    if (variantOp.getTarget().getBackend().getValue() != "vmvx") return;
+    if (variantOp.getTarget().getBackend().getValue() != "vmvx")
+      return;
 
     // Get a constant key -> ordinal mapping.
     auto keyOrdinals = variantOp.gatherConstantOrdinals();
-    if (keyOrdinals.empty()) return;
+    if (keyOrdinals.empty())
+      return;
 
     // Update placeholders to hold the concrete ordinal values.
     // Eventually the VM global folding passes will inline them.
@@ -36,7 +38,8 @@
                moduleOp.getOps<IREE::VM::GlobalI32Op>())) {
         auto keyAttr = globalOp->getAttr(
             IREE::HAL::ExecutableConstantBlockOp::getKeyAttrName());
-        if (!keyAttr) continue;
+        if (!keyAttr)
+          continue;
         auto it = keyOrdinals.find(keyAttr);
         if (it == keyOrdinals.end()) {
           globalOp.emitOpError()
@@ -53,12 +56,12 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<IREE::HAL::ExecutableVariantOp>>
 createVMVXAssignConstantOrdinalsPass() {
   return std::make_unique<VMVXAssignConstantOrdinalsPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/VMVX/VMVXLinkExecutables.cpp b/compiler/src/iree/compiler/Codegen/VMVX/VMVXLinkExecutables.cpp
index ca58568..f4403a2 100644
--- a/compiler/src/iree/compiler/Codegen/VMVX/VMVXLinkExecutables.cpp
+++ b/compiler/src/iree/compiler/Codegen/VMVX/VMVXLinkExecutables.cpp

@@ -26,7 +26,8 @@
 
     auto sourceExecutableOps =
         llvm::to_vector<8>(moduleOp.getOps<IREE::HAL::ExecutableOp>());
-    if (sourceExecutableOps.size() <= 1) return;
+    if (sourceExecutableOps.size() <= 1)
+      return;
 
     // Guess a module name, if needed, to make the output files readable.
     auto moduleName = guessModuleName(moduleOp, "vmvx_module");
@@ -70,11 +71,11 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>> createVMVXLinkExecutablesPass() {
   return std::make_unique<VMVXLinkExecutablesPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/VMVX/VMVXMaterializeEncodingPass.cpp b/compiler/src/iree/compiler/Codegen/VMVX/VMVXMaterializeEncodingPass.cpp
index e2f052d..0a1bd91 100644
--- a/compiler/src/iree/compiler/Codegen/VMVX/VMVXMaterializeEncodingPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/VMVX/VMVXMaterializeEncodingPass.cpp

@@ -45,8 +45,8 @@
   return chooseMatmulTileParamsGeneric();
 }
 
-static MaterializeEncodingValueFn getMaterializeEncodingValueFn(
-    IREE::HAL::ExecutableTargetAttr targetAttr) {
+static MaterializeEncodingValueFn
+getMaterializeEncodingValueFn(IREE::HAL::ExecutableTargetAttr targetAttr) {
   if (hasMicrokernels(targetAttr)) {
     return chooseDynamicEncodingInfoVMVXMicrokernels;
   }
@@ -64,7 +64,7 @@
   void runOnOperation() override;
 };
 
-}  // namespace
+} // namespace
 
 void VMVXMaterializeEncodingPass::runOnOperation() {
   MLIRContext *context = &getContext();
@@ -75,7 +75,8 @@
       [targetAttr](
           RankedTensorType tensorType) -> FailureOr<MaterializeEncodingInfo> {
         std::optional<TensorEncoding> encoding = getEncoding(tensorType);
-        if (!encoding) return failure();
+        if (!encoding)
+          return failure();
 
         auto matmulType = getMatmulType(*encoding);
         auto matmulOperandRole = getMatmulOperandRole(*encoding);
@@ -119,5 +120,5 @@
   return std::make_unique<VMVXMaterializeEncodingPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/VMVX/VMVXPasses.h b/compiler/src/iree/compiler/Codegen/VMVX/VMVXPasses.h
index ea0bf87..ac3a5f5 100644
--- a/compiler/src/iree/compiler/Codegen/VMVX/VMVXPasses.h
+++ b/compiler/src/iree/compiler/Codegen/VMVX/VMVXPasses.h

@@ -44,7 +44,7 @@
 
 /// Populates passes needed to link HAL executables across VMVX targets.
 void buildVMVXLinkingPassPipeline(OpPassManager &passManager);
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_VMVX_PASSES_H_
+#endif // IREE_COMPILER_CODEGEN_VMVX_PASSES_H_

diff --git a/compiler/src/iree/compiler/Codegen/WGSL/WGSLPasses.h b/compiler/src/iree/compiler/Codegen/WGSL/WGSLPasses.h
index 8811360..c1511c4 100644
--- a/compiler/src/iree/compiler/Codegen/WGSL/WGSLPasses.h
+++ b/compiler/src/iree/compiler/Codegen/WGSL/WGSLPasses.h

@@ -23,7 +23,7 @@
 std::unique_ptr<OperationPass<func::FuncOp>>
 createWGSLReplacePushConstantsPass();
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_WGSL_PASSES_H_
+#endif // IREE_COMPILER_CODEGEN_WGSL_PASSES_H_

diff --git a/compiler/src/iree/compiler/Codegen/WGSL/WGSLReplacePushConstants.cpp b/compiler/src/iree/compiler/Codegen/WGSL/WGSLReplacePushConstants.cpp
index 6b38e22..864d3c8 100644
--- a/compiler/src/iree/compiler/Codegen/WGSL/WGSLReplacePushConstants.cpp
+++ b/compiler/src/iree/compiler/Codegen/WGSL/WGSLReplacePushConstants.cpp

@@ -98,7 +98,8 @@
     auto loc = funcOp.getLoc();
     auto constantLoadOps =
         llvm::to_vector(funcOp.getOps<IREE::HAL::InterfaceConstantLoadOp>());
-    if (constantLoadOps.empty()) return;
+    if (constantLoadOps.empty())
+      return;
 
     OpBuilder builder(funcOp);
     builder.setInsertionPointToStart(&funcOp.getBlocks().front());
@@ -172,12 +173,12 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>>
 createWGSLReplacePushConstantsPass() {
   return std::make_unique<WGSLReplacePushConstantsPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/ConstEval/JitGlobals.cpp b/compiler/src/iree/compiler/ConstEval/JitGlobals.cpp
index ac4d41d..1d6d7a8 100644
--- a/compiler/src/iree/compiler/ConstEval/JitGlobals.cpp
+++ b/compiler/src/iree/compiler/ConstEval/JitGlobals.cpp

@@ -28,10 +28,9 @@
 namespace {
 
 struct ProgramExtractor {
- public:
+public:
   ProgramExtractor(Operation *sourceModuleOp, Operation *targetModuleOp)
-      : sourceSymbolTable(sourceModuleOp),
-        targetSymbolTable(targetModuleOp),
+      : sourceSymbolTable(sourceModuleOp), targetSymbolTable(targetModuleOp),
         builder(OpBuilder::atBlockEnd(&targetModuleOp->getRegion(0).front())) {}
 
   // Creates an accessor function to load the given global value.
@@ -71,7 +70,8 @@
       iterWorklist.swap(symbolImportWorklist);
 
       for (StringAttr symbolRef : iterWorklist) {
-        if (targetSymbolTable.lookup(symbolRef)) continue;
+        if (targetSymbolTable.lookup(symbolRef))
+          continue;
 
         Operation *sourceOp = sourceSymbolTable.lookup(symbolRef);
         if (!sourceOp) {
@@ -109,7 +109,7 @@
     // TODO: Scan for functions, etc.
   }
 
- private:
+private:
   SymbolTable sourceSymbolTable;
   SymbolTable targetSymbolTable;
   OpBuilder builder;
@@ -137,7 +137,7 @@
     // Invoke IREE compilation flow.
     options->executableOptions.targets.push_back("vmvx");
     options->targetOptions.f32Extension = true;
-    options->targetOptions.f64Extension = false;  // not yet implemented
+    options->targetOptions.f64Extension = false; // not yet implemented
 
     // Disable constant evaluation for our Jit compilation pipeline.
     // It would make no sense to recursively do constant evaluation, and since
@@ -184,8 +184,10 @@
     SmallVector<std::pair<StringAttr, StringAttr>> uninitializedGlobals;
     for (Operation &childOp : *innerModule.getBody()) {
       auto globalOp = llvm::dyn_cast<IREE::Util::GlobalOp>(childOp);
-      if (!globalOp) continue;
-      if (globalOp.getInitialValueAttr()) continue;
+      if (!globalOp)
+        continue;
+      if (globalOp.getInitialValueAttr())
+        continue;
 
       // Only generate an accessor for types our runtime bridge knows how to
       // handle.
@@ -257,12 +259,12 @@
   OpPassManager compilePipeline;
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<ModuleOp>> createJitGlobalsPass() {
   return std::make_unique<JitGlobalsPass>();
 }
 
-}  // namespace ConstEval
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace ConstEval
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/ConstEval/PassDetail.h b/compiler/src/iree/compiler/ConstEval/PassDetail.h
index e3b8d5d..fc34b68 100644
--- a/compiler/src/iree/compiler/ConstEval/PassDetail.h
+++ b/compiler/src/iree/compiler/ConstEval/PassDetail.h

@@ -17,8 +17,8 @@
 #define GEN_PASS_CLASSES
 #include "iree/compiler/ConstEval/Passes.h.inc"
 
-}  // namespace ConstEval
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace ConstEval
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CONSTEVAL_PASSDETAIL_H_
+#endif // IREE_COMPILER_CONSTEVAL_PASSDETAIL_H_

diff --git a/compiler/src/iree/compiler/ConstEval/Passes.cpp b/compiler/src/iree/compiler/ConstEval/Passes.cpp
index 37e1dd7..af731cd 100644
--- a/compiler/src/iree/compiler/ConstEval/Passes.cpp
+++ b/compiler/src/iree/compiler/ConstEval/Passes.cpp

@@ -12,14 +12,14 @@
 
 namespace {
 #define GEN_PASS_REGISTRATION
-#include "iree/compiler/ConstEval/Passes.h.inc"  // IWYU pragma: export
-}  // namespace
+#include "iree/compiler/ConstEval/Passes.h.inc" // IWYU pragma: export
+} // namespace
 
 void registerConstEvalPasses() {
   // Generated.
   registerPasses();
 }
 
-}  // namespace ConstEval
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace ConstEval
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/ConstEval/Passes.h b/compiler/src/iree/compiler/ConstEval/Passes.h
index 5330ca7..e867da2 100644
--- a/compiler/src/iree/compiler/ConstEval/Passes.h
+++ b/compiler/src/iree/compiler/ConstEval/Passes.h

@@ -21,8 +21,8 @@
 
 void registerConstEvalPasses();
 
-}  // namespace ConstEval
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace ConstEval
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CONSTEVAL_PASSES_H_
+#endif // IREE_COMPILER_CONSTEVAL_PASSES_H_

diff --git a/compiler/src/iree/compiler/ConstEval/Runtime.cpp b/compiler/src/iree/compiler/ConstEval/Runtime.cpp
index 356082d..8260cdf 100644
--- a/compiler/src/iree/compiler/ConstEval/Runtime.cpp
+++ b/compiler/src/iree/compiler/ConstEval/Runtime.cpp

@@ -64,7 +64,7 @@
   // sizeof(bool) == 1, we just bit-cast. Otherwise, we go through a temporary.
   if (elementType.isInteger(1)) {
     if (sizeof(bool) == 1) {
-      ArrayRef<bool> boolArray(reinterpret_cast<bool*>(rawBuffer.data()),
+      ArrayRef<bool> boolArray(reinterpret_cast<bool *>(rawBuffer.data()),
                                rawBuffer.size());
       return DenseElementsAttr::get(tensorType, boolArray);
     } else {
@@ -81,7 +81,7 @@
   return {};
 }
 
-}  // namespace
+} // namespace
 
 CompiledBinary::CompiledBinary() = default;
 
@@ -141,7 +141,7 @@
                                                    StringRef name) {
   Attribute result;
   if (failed(invokeNullary(
-          loc, name, [&](iree_vm_list_t* outputs) -> LogicalResult {
+          loc, name, [&](iree_vm_list_t *outputs) -> LogicalResult {
             if (iree_vm_list_size(outputs) != 1) {
               return emitError(loc) << "expected 1 result for func " << name
                                     << " got " << iree_vm_list_size(outputs);
@@ -185,35 +185,36 @@
   return false;
 }
 
-Attribute CompiledBinary::convertVariantToAttribute(
-    Location loc, iree_vm_variant_t& variant) {
+Attribute
+CompiledBinary::convertVariantToAttribute(Location loc,
+                                          iree_vm_variant_t &variant) {
   auto context = loc.getContext();
   Builder builder(context);
   if (iree_vm_variant_is_value(variant)) {
     switch (iree_vm_type_def_as_value(variant.type)) {
-      case IREE_VM_VALUE_TYPE_I8:
-        return builder.getI8IntegerAttr(variant.i8);
-      case IREE_VM_VALUE_TYPE_I16:
-        return builder.getI16IntegerAttr(variant.i16);
-      case IREE_VM_VALUE_TYPE_I32:
-        return builder.getI32IntegerAttr(variant.i32);
-      case IREE_VM_VALUE_TYPE_I64:
-        return builder.getI64IntegerAttr(variant.i64);
-      case IREE_VM_VALUE_TYPE_F32:
-        return builder.getF32FloatAttr(variant.f32);
-      case IREE_VM_VALUE_TYPE_F64:
-        return builder.getF64FloatAttr(variant.f64);
-      default:
-        emitError(loc) << "unrecognized evaluated value type: "
-                       << static_cast<int>(
-                              iree_vm_type_def_as_value(variant.type));
-        return {};
+    case IREE_VM_VALUE_TYPE_I8:
+      return builder.getI8IntegerAttr(variant.i8);
+    case IREE_VM_VALUE_TYPE_I16:
+      return builder.getI16IntegerAttr(variant.i16);
+    case IREE_VM_VALUE_TYPE_I32:
+      return builder.getI32IntegerAttr(variant.i32);
+    case IREE_VM_VALUE_TYPE_I64:
+      return builder.getI64IntegerAttr(variant.i64);
+    case IREE_VM_VALUE_TYPE_F32:
+      return builder.getF32FloatAttr(variant.f32);
+    case IREE_VM_VALUE_TYPE_F64:
+      return builder.getF64FloatAttr(variant.f64);
+    default:
+      emitError(loc) << "unrecognized evaluated value type: "
+                     << static_cast<int>(
+                            iree_vm_type_def_as_value(variant.type));
+      return {};
     }
   }
 
   if (iree_vm_variant_is_ref(variant)) {
     if (iree_hal_buffer_view_isa(variant.ref)) {
-      iree_hal_buffer_view_t* bufferView =
+      iree_hal_buffer_view_t *bufferView =
           iree_hal_buffer_view_deref(variant.ref);
 
       // Get the shape.
@@ -227,12 +228,13 @@
       iree_hal_element_type_t halElementType =
           iree_hal_buffer_view_element_type(bufferView);
       Type elementType = mapElementType(loc, halElementType);
-      if (!elementType) return {};
+      if (!elementType)
+        return {};
 
       auto tensorType = RankedTensorType::get(shape, elementType);
 
       auto length = iree_hal_buffer_view_byte_length(bufferView);
-      iree_hal_buffer_t* buffer = iree_hal_buffer_view_buffer(bufferView);
+      iree_hal_buffer_t *buffer = iree_hal_buffer_view_buffer(bufferView);
 
       // Map the memory and construct.
       // TODO(benvanik): fallback to alloc + iree_hal_device_transfer_range if
@@ -243,7 +245,7 @@
           buffer, IREE_HAL_MAPPING_MODE_SCOPED, IREE_HAL_MEMORY_ACCESS_READ,
           /*byte_offset=*/0, length, &mapping));
       MutableArrayRef<char> rawBufferArray(
-          reinterpret_cast<char*>(mapping.contents.data),
+          reinterpret_cast<char *>(mapping.contents.data),
           mapping.contents.data_length);
       auto convertedAttr =
           createAttributeFromRawData(loc, tensorType, rawBufferArray);
@@ -262,11 +264,11 @@
   return {};
 }
 
-void CompiledBinary::initialize(void* data, size_t length) {
-  Runtime& runtime = Runtime::getInstance();
+void CompiledBinary::initialize(void *data, size_t length) {
+  Runtime &runtime = Runtime::getInstance();
 
   // Create driver and device.
-  iree_hal_driver_t* driver = nullptr;
+  iree_hal_driver_t *driver = nullptr;
   IREE_CHECK_OK(iree_hal_driver_registry_try_create(
       runtime.registry, iree_make_cstring_view("local-task"),
       iree_allocator_system(), &driver));
@@ -285,7 +287,7 @@
       iree_allocator_null(), iree_allocator_system(), &main_module));
 
   // Context.
-  std::array<iree_vm_module_t*, 2> modules = {
+  std::array<iree_vm_module_t *, 2> modules = {
       hal_module.get(),
       main_module.get(),
   };
@@ -296,8 +298,8 @@
 
 InMemoryCompiledBinary::~InMemoryCompiledBinary() { deinitialize(); }
 
-LogicalResult InMemoryCompiledBinary::translateFromModule(
-    mlir::ModuleOp moduleOp) {
+LogicalResult
+InMemoryCompiledBinary::translateFromModule(mlir::ModuleOp moduleOp) {
   llvm::raw_string_ostream os(binary);
   iree_compiler::IREE::VM::TargetOptions vmOptions;
   iree_compiler::IREE::VM::BytecodeTargetOptions bytecodeOptions;
@@ -324,11 +326,11 @@
   iree_hal_driver_registry_free(registry);
 }
 
-Runtime& Runtime::getInstance() {
+Runtime &Runtime::getInstance() {
   static Runtime instance;
   return instance;
 }
 
-}  // namespace ConstEval
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace ConstEval
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/ConstEval/Runtime.h b/compiler/src/iree/compiler/ConstEval/Runtime.h
index 6b4a3db..6078da0 100644
--- a/compiler/src/iree/compiler/ConstEval/Runtime.h
+++ b/compiler/src/iree/compiler/ConstEval/Runtime.h

@@ -24,8 +24,8 @@
 
 // Abstract base class for a compiled binary.
 class CompiledBinary {
- public:
-  using ResultsCallback = std::function<LogicalResult(iree_vm_list_t* outputs)>;
+public:
+  using ResultsCallback = std::function<LogicalResult(iree_vm_list_t *outputs)>;
   virtual ~CompiledBinary();
 
   // Invokes a nullary function.
@@ -39,14 +39,14 @@
   // Whether the given type is supported in *AsAttribute methods.
   static bool isSupportedResultType(Type type);
 
- protected:
+protected:
   CompiledBinary();
-  void initialize(void* data, size_t length);
+  void initialize(void *data, size_t length);
   // The base class does not clean up initialized state. This must be done
   // explicitly by subclasses, ensuring that any backing images remain valid
   // through the call to deinitialize().
   void deinitialize();
-  Attribute convertVariantToAttribute(Location loc, iree_vm_variant_t& variant);
+  Attribute convertVariantToAttribute(Location loc, iree_vm_variant_t &variant);
 
   iree::vm::ref<iree_hal_device_t> device;
   iree::vm::ref<iree_vm_module_t> hal_module;
@@ -56,30 +56,30 @@
 
 // An in-memory compiled binary and accessors for working with it.
 class InMemoryCompiledBinary : public CompiledBinary {
- public:
+public:
   LogicalResult translateFromModule(mlir::ModuleOp moduleOp);
   ~InMemoryCompiledBinary() override;
 
- private:
+private:
   std::string binary;
 };
 
 // Simple wrapper around IREE runtime library sufficient for loading and
 // executing simple programs.
 class Runtime {
- public:
-  static Runtime& getInstance();
+public:
+  static Runtime &getInstance();
 
-  iree_hal_driver_registry_t* registry = nullptr;
+  iree_hal_driver_registry_t *registry = nullptr;
   iree::vm::ref<iree_vm_instance_t> instance;
 
- private:
+private:
   Runtime();
   ~Runtime();
 };
 
-}  // namespace ConstEval
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace ConstEval
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CONSTEVAL_RUNTIME_H_
+#endif // IREE_COMPILER_CONSTEVAL_RUNTIME_H_

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/Patterns.cpp b/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/Patterns.cpp
index 75bb5c6..c71a90b 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/Patterns.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/Patterns.cpp

@@ -204,7 +204,8 @@
     SmallVector<Value> outputDynamicShapes;
     for (auto [resultShape, outputShp] : llvm::zip_equal(
              reshapeOp.getResultType().getShape(), outputShape[0])) {
-      if (resultShape != ShapedType::kDynamic) continue;
+      if (resultShape != ShapedType::kDynamic)
+        continue;
       outputDynamicShapes.push_back(getValueOrCreateConstantIndexOp(
           rewriter, reshapeOp.getLoc(), outputShp));
     }
@@ -238,7 +239,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateTensorToFlowConversionPatterns(MLIRContext *context,
                                             RewritePatternSet &patterns) {
@@ -251,7 +252,7 @@
               ConvertTensorReshapePattern<tensor::ExpandShapeOp>>(context);
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/Patterns.h b/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/Patterns.h
index 0ba9fa9..d6f1808 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/Patterns.h
+++ b/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/Patterns.h

@@ -19,9 +19,9 @@
 void populateTensorToFlowConversionPatterns(MLIRContext *context,
                                             RewritePatternSet &patterns);
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_FLOW_CONVERSION_TENSORTOFLOW_PATTERNS_H_
+#endif // IREE_COMPILER_DIALECT_FLOW_CONVERSION_TENSORTOFLOW_PATTERNS_H_

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/Utils.cpp b/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/Utils.cpp
index b3a4556..b7bbca3 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/Utils.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/Utils.cpp

@@ -18,8 +18,8 @@
 namespace Flow {
 
 /// Gets the list of non-static values from a list of `OpFoldResult`.
-static SmallVector<Value> getDynamicValues(
-    ArrayRef<OpFoldResult> valueOrAttrList) {
+static SmallVector<Value>
+getDynamicValues(ArrayRef<OpFoldResult> valueOrAttrList) {
   SmallVector<Value> dynamicDims;
   for (auto valueOrAttr : valueOrAttrList) {
     if (auto value = valueOrAttr.dyn_cast<Value>()) {
@@ -30,8 +30,8 @@
 }
 
 /// Get shape of the tensor given the sizes as a list of `OpFoldResult`.
-static SmallVector<int64_t> getShapeFromSizes(
-    ArrayRef<OpFoldResult> valueOrAttrList) {
+static SmallVector<int64_t>
+getShapeFromSizes(ArrayRef<OpFoldResult> valueOrAttrList) {
   return llvm::map_to_vector(
       valueOrAttrList, [&](OpFoldResult valueOrAttr) -> int64_t {
         if (auto attr = valueOrAttr.dyn_cast<Attribute>()) {
@@ -68,17 +68,21 @@
     int64_t staticSize = getVal(sizes[dim - 1], ShapedType::kDynamic);
     int64_t staticStride = getVal(strides[dim - 1], ShapedType::kDynamic);
 
-    if (staticStride != 1) return false;
+    if (staticStride != 1)
+      return false;
     // The offsets and sizes dont have to be static for all dimensions. When
     // `fullSlices` is true, the offset and sizes can be dynamic. But many
     // cases, the dynamic offset/size value is obtained by computing from
     // another tensor which lives on the device. To avoid host-round tripping
     // enforce that offset/size is also static.
-    if (staticSize == ShapedType::kDynamic) return false;
-    if (staticOffset == ShapedType::kDynamic) return false;
+    if (staticSize == ShapedType::kDynamic)
+      return false;
+    if (staticOffset == ShapedType::kDynamic)
+      return false;
 
     if (fullSlices == false) {
-      if (staticSize != 1) return false;
+      if (staticSize != 1)
+        return false;
     } else {
       if (!(staticOffset == 0 && staticSize != ShapedType::kDynamic &&
             baseShape[dim - 1] != ShapedType::kDynamic &&
@@ -90,8 +94,9 @@
   return true;
 }
 
-LogicalResult convertInsertSliceOpToFlowUpdateOp(
-    RewriterBase &rewriter, tensor::InsertSliceOp insertOp) {
+LogicalResult
+convertInsertSliceOpToFlowUpdateOp(RewriterBase &rewriter,
+                                   tensor::InsertSliceOp insertOp) {
   OpBuilder::InsertionGuard g(rewriter);
   rewriter.setInsertionPoint(insertOp);
 
@@ -133,8 +138,9 @@
   return success();
 }
 
-LogicalResult convertExtractSliceOpToFlowSliceOp(
-    RewriterBase &rewriter, tensor::ExtractSliceOp sliceOp) {
+LogicalResult
+convertExtractSliceOpToFlowSliceOp(RewriterBase &rewriter,
+                                   tensor::ExtractSliceOp sliceOp) {
   OpBuilder::InsertionGuard g(rewriter);
   rewriter.setInsertionPoint(sliceOp);
 
@@ -180,7 +186,7 @@
   return success();
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/Utils.h b/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/Utils.h
index 49b9baf..dc8327b 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/Utils.h
+++ b/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/Utils.h

@@ -19,23 +19,25 @@
 namespace tensor {
 class ExtractSliceOp;
 class InsertSliceOp;
-}  // namespace tensor
+} // namespace tensor
 
 namespace iree_compiler {
 namespace IREE {
 namespace Flow {
 
 /// Rewrite the given InsertSliceOp into a Flow::TensorUpdateOp.
-LogicalResult convertInsertSliceOpToFlowUpdateOp(
-    RewriterBase &rewriter, tensor::InsertSliceOp insertOp);
+LogicalResult
+convertInsertSliceOpToFlowUpdateOp(RewriterBase &rewriter,
+                                   tensor::InsertSliceOp insertOp);
 
 /// Rewrite the given ExtractSliceOp into a Flow::TensorSliceOp.
-LogicalResult convertExtractSliceOpToFlowSliceOp(
-    RewriterBase &rewriter, tensor::ExtractSliceOp sliceOp);
+LogicalResult
+convertExtractSliceOpToFlowSliceOp(RewriterBase &rewriter,
+                                   tensor::ExtractSliceOp sliceOp);
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_FLOW_CONVERSION_TENSORTOFLOW_UTILS_H_
+#endif // IREE_COMPILER_DIALECT_FLOW_CONVERSION_TENSORTOFLOW_UTILS_H_

diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowDialect.cpp b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowDialect.cpp
index 03bb15a..d15d2a8 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowDialect.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowDialect.cpp

@@ -58,7 +58,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 FlowDialect::FlowDialect(MLIRContext *context)
     : Dialect(getDialectNamespace(), context, TypeID::get<FlowDialect>()) {
@@ -83,7 +83,7 @@
   return nullptr;
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowDialect.h b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowDialect.h
index 636678b..814c218 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowDialect.h
+++ b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowDialect.h

@@ -17,7 +17,7 @@
 namespace Flow {
 
 class FlowDialect : public Dialect {
- public:
+public:
   explicit FlowDialect(MLIRContext *context);
   static StringRef getDialectNamespace() { return "flow"; }
 
@@ -35,14 +35,14 @@
            op->getDialect()->getNamespace() == getDialectNamespace();
   }
 
- private:
+private:
   void registerAttributes();
   void registerTypes();
 };
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_FLOW_IR_FLOWDIALECT_H_
+#endif // IREE_COMPILER_DIALECT_FLOW_IR_FLOWDIALECT_H_

diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp
index 99d9b93..3f6ada4 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp

@@ -49,7 +49,8 @@
   using OpRewritePattern<Op>::OpRewritePattern;
   LogicalResult matchAndRewrite(Op op,
                                 PatternRewriter &rewriter) const override {
-    if (!op.use_empty()) return failure();
+    if (!op.use_empty())
+      return failure();
     rewriter.eraseOp(op);
     return success();
   }
@@ -58,13 +59,15 @@
 // Returns true if |value| is definitely empty at runtime.
 static bool isTensorZeroElements(Value value) {
   auto type = llvm::dyn_cast<ShapedType>(value.getType());
-  if (!type) return false;
+  if (!type)
+    return false;
   // Any static dimension being zero is definitely empty.
   for (int64_t i = 0; i < type.getRank(); ++i) {
     int64_t dim = type.getDimSize(i);
-    if (dim == 0) return true;
+    if (dim == 0)
+      return true;
   }
-  return false;  // may still be dynamically empty
+  return false; // may still be dynamically empty
 }
 
 // Returns true if |value| is definitely empty at runtime.
@@ -87,7 +90,8 @@
   LogicalResult matchAndRewrite(Op op,
                                 PatternRewriter &rewriter) const override {
     auto operand = op->getOperand(OperandIdx);
-    if (!isTensorOperandZeroElements(operand)) return failure();
+    if (!isTensorOperandZeroElements(operand))
+      return failure();
     auto result = op->getResult(ResultIdx);
     auto dynamicDims = op.getResultDynamicDims(result.getResultNumber());
     rewriter.replaceOpWithNewOp<IREE::Flow::TensorEmptyOp>(op, result.getType(),
@@ -102,7 +106,8 @@
   LogicalResult matchAndRewrite(Op op,
                                 PatternRewriter &rewriter) const override {
     auto result = op->getResult(ResultIdx);
-    if (!isTensorResultZeroElements(result)) return failure();
+    if (!isTensorResultZeroElements(result))
+      return failure();
     auto dynamicDims = op.getResultDynamicDims(result.getResultNumber());
     rewriter.replaceOpWithNewOp<IREE::Flow::TensorEmptyOp>(op, result.getType(),
                                                            dynamicDims);
@@ -117,7 +122,8 @@
                                 PatternRewriter &rewriter) const override {
     auto operand = op->getOperand(OperandIdx);
     auto emptyOp = dyn_cast_or_null<TensorEmptyOp>(operand.getDefiningOp());
-    if (!emptyOp) return failure();
+    if (!emptyOp)
+      return failure();
     auto result = op->getResult(ResultIdx);
     auto dynamicDims = op.getResultDynamicDims(result.getResultNumber());
     rewriter.replaceOpWithNewOp<IREE::Flow::TensorEmptyOp>(op, result.getType(),
@@ -131,7 +137,8 @@
 // Example: tensor<?x0x1xf32> -> tensor<0x0x1xf32>
 static Type makeZeroElementsStaticTensorType(Type type) {
   auto tensorType = llvm::cast<RankedTensorType>(type);
-  if (tensorType.hasStaticShape()) return type;
+  if (tensorType.hasStaticShape())
+    return type;
   SmallVector<int64_t> dims;
   dims.resize(tensorType.getRank());
   for (int64_t i = 0; i < tensorType.getRank(); ++i) {
@@ -149,7 +156,8 @@
                                                   Type newType,
                                                   ValueRange oldDims,
                                                   PatternRewriter &rewriter) {
-  if (oldType == newType) return llvm::to_vector(oldDims);
+  if (oldType == newType)
+    return llvm::to_vector(oldDims);
 
   // Build an expanded list of all the dims - constants will be nullptr.
   // This lets us map back the new types without worrying about whether some
@@ -219,7 +227,8 @@
     // will drop it.
     bool didReplaceAny = false;
     for (auto result : op.getResults()) {
-      if (result.use_empty()) continue;
+      if (result.use_empty())
+        continue;
       if (isTensorResultZeroElements(result)) {
         auto dynamicDims = op.getResultDynamicDims(result.getResultNumber());
         auto emptyOp = rewriter.create<IREE::Flow::TensorEmptyOp>(
@@ -462,7 +471,8 @@
                                MutableOperandRange mutableDimValues) {
   auto dynamicDimsOr = IREE::Util::findDynamicDims(tensorValue, op->getBlock(),
                                                    Block::iterator(op));
-  if (!dynamicDimsOr.has_value()) return false;
+  if (!dynamicDimsOr.has_value())
+    return false;
   auto dynamicDims = dynamicDimsOr.value();
   bool anyChanged = false;
   OperandRange oldValueRange = mutableDimValues;
@@ -525,11 +535,11 @@
 /// `flow.dispatch.tensor.store`) is also passed in. The type of the slice to
 /// use in the canonicalized op is returned.
 template <typename OpTy>
-static FailureOr<RankedTensorType> canonicalizeSubViewParts(
-    OpTy op, RankedTensorType sliceType,
-    SmallVector<OpFoldResult> &mixedOffsets,
-    SmallVector<OpFoldResult> &mixedSizes,
-    SmallVector<OpFoldResult> &mixedStrides) {
+static FailureOr<RankedTensorType>
+canonicalizeSubViewParts(OpTy op, RankedTensorType sliceType,
+                         SmallVector<OpFoldResult> &mixedOffsets,
+                         SmallVector<OpFoldResult> &mixedSizes,
+                         SmallVector<OpFoldResult> &mixedStrides) {
   // If there are no constant operands then we return early before the more
   // expensive work below.
   if (llvm::none_of(op.offsets(),
@@ -562,7 +572,8 @@
   llvm::SmallVector<int64_t> newShape;
   llvm::SmallBitVector droppedDims = op.getDroppedDims();
   for (auto size : llvm::enumerate(mixedSizes)) {
-    if (droppedDims.test(size.index())) continue;
+    if (droppedDims.test(size.index()))
+      continue;
     std::optional<int64_t> staticSize = getConstantIntValue(size.value());
     newShape.push_back(staticSize ? staticSize.value() : ShapedType::kDynamic);
   }
@@ -582,7 +593,8 @@
     RankedTensorType resultType = loadOp.getType();
     auto newResultType = canonicalizeSubViewParts(
         loadOp, resultType, mixedOffsets, mixedSizes, mixedStrides);
-    if (failed(newResultType)) return failure();
+    if (failed(newResultType))
+      return failure();
 
     // We need to resolve the new inferred type with the specified type.
     Location loc = loadOp.getLoc();
@@ -598,7 +610,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void DispatchTensorLoadOp::getCanonicalizationPatterns(
     RewritePatternSet &results, MLIRContext *context) {
@@ -645,7 +657,8 @@
   LogicalResult matchAndRewrite(DispatchTensorStoreOp storeOp,
                                 PatternRewriter &rewriter) const override {
     auto parentOp = storeOp.getValue().getDefiningOp<tensor::CastOp>();
-    if (!parentOp || !tensor::canFoldIntoConsumerOp(parentOp)) return failure();
+    if (!parentOp || !tensor::canFoldIntoConsumerOp(parentOp))
+      return failure();
 
     rewriter.replaceOpWithNewOp<DispatchTensorStoreOp>(
         storeOp, parentOp.getSource(), storeOp.getTarget(),
@@ -665,7 +678,8 @@
     RankedTensorType valueType = storeOp.getValueType();
     auto newValueType = canonicalizeSubViewParts(
         storeOp, valueType, mixedOffsets, mixedSizes, mixedStrides);
-    if (failed(newValueType)) return failure();
+    if (failed(newValueType))
+      return failure();
 
     Value value = storeOp.getValue();
     Location loc = storeOp.getLoc();
@@ -679,7 +693,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void DispatchTensorStoreOp::getCanonicalizationPatterns(
     RewritePatternSet &results, MLIRContext *context) {
@@ -779,7 +793,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void TensorConstantOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                    MLIRContext *context) {
@@ -834,7 +848,8 @@
                                 PatternRewriter &rewriter) const override {
     auto sourceOp = dyn_cast_or_null<TensorReshapeOp>(
         reshapeOp.getSource().getDefiningOp());
-    if (!sourceOp) return failure();
+    if (!sourceOp)
+      return failure();
 
     // We want the same result value/shape but to source from the ancestor. We
     // need to pull any dynamic dims from that as we don't care about the
@@ -856,7 +871,8 @@
     auto sourceOp =
         dyn_cast_or_null<TensorSplatOp>(loadOp.getSource().getDefiningOp());
 
-    if (!sourceOp) return failure();
+    if (!sourceOp)
+      return failure();
 
     rewriter.replaceOp(loadOp, sourceOp.getValue());
     return success();
@@ -868,11 +884,13 @@
 
   LogicalResult matchAndRewrite(TensorSplatOp splatOp,
                                 PatternRewriter &rewriter) const override {
-    if (!splatOp.getResult().hasOneUse()) return failure();
+    if (!splatOp.getResult().hasOneUse())
+      return failure();
 
     auto reshapeOp = dyn_cast_or_null<TensorReshapeOp>(
         splatOp.getResult().use_begin()->getOwner());
-    if (!reshapeOp) return failure();
+    if (!reshapeOp)
+      return failure();
 
     rewriter.replaceOpWithNewOp<TensorSplatOp>(
         reshapeOp, reshapeOp.getResult().getType(), splatOp.getValue(),
@@ -918,7 +936,8 @@
     }
     unsigned dimOffset = 0;
     for (unsigned i = 0; i < idx; ++i) {
-      if (shapedType.isDynamicDim(i)) ++dimOffset;
+      if (shapedType.isDynamicDim(i))
+        ++dimOffset;
     }
     rewriter.replaceOp(op, dynamicDims.value()[dimOffset]);
 
@@ -926,7 +945,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void TensorReshapeOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                   MLIRContext *context) {
@@ -969,7 +988,8 @@
 
 OpFoldResult TensorStoreOp::fold(FoldAdaptor operands) {
   auto value = operands.getValue();
-  if (!value) return {};
+  if (!value)
+    return {};
   if (auto target =
           llvm::dyn_cast_if_present<ElementsAttr>(operands.getTarget())) {
     // Store into the constant target tensor.
@@ -1194,7 +1214,8 @@
                                 PatternRewriter &rewriter) const override {
     auto targetCastOp = updateOp.getTarget().getDefiningOp<tensor::CastOp>();
     auto updateCastOp = updateOp.getUpdate().getDefiningOp<tensor::CastOp>();
-    if (!targetCastOp && !updateCastOp) return failure();
+    if (!targetCastOp && !updateCastOp)
+      return failure();
     Value target = (targetCastOp ? cast<Value>(targetCastOp.getSource())
                                  : cast<Value>(updateOp.getTarget()));
     Value update = (updateCastOp ? cast<Value>(updateCastOp.getSource())
@@ -1221,13 +1242,14 @@
   LogicalResult matchAndRewrite(TensorUpdateOp op,
                                 PatternRewriter &rewriter) const override {
     auto operand = op.getUpdate();
-    if (!isTensorOperandZeroElements(operand)) return failure();
+    if (!isTensorOperandZeroElements(operand))
+      return failure();
     rewriter.replaceOp(op, op.getTarget());
     return success();
   }
 };
 
-}  // namespace
+} // namespace
 
 void TensorUpdateOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                  MLIRContext *context) {
@@ -1248,7 +1270,7 @@
   results.insert<ElideUnusedOp<ChannelSplitOp>>(context);
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.cpp b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.cpp
index b98d8ab..6404e6c 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.cpp

@@ -56,9 +56,9 @@
 }
 
 // Verifies that a dispatch |op|'s |workload| matches that of the |exportOp|.
-static LogicalResult verifyDispatchWorkload(
-    Operation *op, IREE::Flow::ExecutableExportOp exportOp,
-    ValueRange workload) {
+static LogicalResult
+verifyDispatchWorkload(Operation *op, IREE::Flow::ExecutableExportOp exportOp,
+                       ValueRange workload) {
   // If the target has a workgroup count computation function we can verify that
   // the workload here matches what is expected.
   if (!exportOp.getWorkgroupCount().empty()) {
@@ -104,8 +104,9 @@
 }
 
 // Gets the dropped dimensions for `flow.dispatch.tensor.load/store`.
-static llvm::SmallBitVector getDroppedDimsImpl(
-    RankedTensorType slicedObjectType, ArrayRef<OpFoldResult> mixedSizes) {
+static llvm::SmallBitVector
+getDroppedDimsImpl(RankedTensorType slicedObjectType,
+                   ArrayRef<OpFoldResult> mixedSizes) {
   ArrayRef<int64_t> resultShape = slicedObjectType.getShape();
   llvm::SmallBitVector droppedDims(mixedSizes.size());
   if (slicedObjectType.getRank() == mixedSizes.size()) {
@@ -224,11 +225,13 @@
 static void printWorkgroupCountRegionWithoutKeyword(OpAsmPrinter &p,
                                                     Operation *op,
                                                     Region &body) {
-  if (body.empty()) return;
+  if (body.empty())
+    return;
   p << "(";
   auto args = body.getArguments();
   for (unsigned i = 0; i < args.size(); ++i) {
-    if (i > 0) p << ", ";
+    if (i > 0)
+      p << ", ";
     p.printRegionArgument(args[i]);
   }
   p << ")";
@@ -244,14 +247,15 @@
 static ParseResult parseWorkgroupCountRegion(OpAsmParser &parser,
                                              Region &body) {
   if (failed(parser.parseOptionalKeyword("workgroups"))) {
-    return success();  // Omitted.
+    return success(); // Omitted.
   }
   return parseWorkgroupCountRegionWithoutKeyword(parser, body);
 }
 
 static void printWorkgroupCountRegion(OpAsmPrinter &p, Operation *op,
                                       Region &body) {
-  if (body.empty()) return;
+  if (body.empty())
+    return;
   p << "workgroups";
   printWorkgroupCountRegionWithoutKeyword(p, op, body);
 }
@@ -259,14 +263,15 @@
 static ParseResult parseDispatchWorkgroupsCountRegion(OpAsmParser &parser,
                                                       Region &body) {
   if (failed(parser.parseOptionalKeyword("count"))) {
-    return success();  // Omitted.
+    return success(); // Omitted.
   }
   return parseWorkgroupCountRegionWithoutKeyword(parser, body);
 }
 
 static void printDispatchWorkgroupsCountRegion(OpAsmPrinter &p, Operation *op,
                                                Region &body) {
-  if (body.empty()) return;
+  if (body.empty())
+    return;
   p << " count";
   printWorkgroupCountRegionWithoutKeyword(p, op, body);
 }
@@ -286,7 +291,8 @@
 
   // Verify terminator.
   auto returnOp = dyn_cast<Flow::ReturnOp>(getBody().front().getTerminator());
-  if (!returnOp) return emitOpError() << "expected 'flow.return' terminator";
+  if (!returnOp)
+    return emitOpError() << "expected 'flow.return' terminator";
   for (const auto [resultType, returnType] :
        llvm::zip_equal(getResultTypes(), returnOp->getOperandTypes()))
     if (resultType != returnType)
@@ -307,22 +313,28 @@
   SmallVector<OpAsmParser::UnresolvedOperand> allOperands;
   std::unique_ptr<Region> bodyRegion = std::make_unique<Region>();
   std::unique_ptr<Region> workloadCountRegion = std::make_unique<Region>();
-  if (parser.parseOptionalAttrDict(result.attributes)) return failure();
+  if (parser.parseOptionalAttrDict(result.attributes))
+    return failure();
   SmallVector<OpAsmParser::UnresolvedOperand> workloadOperands;
   SMLoc workloadOperandsLoc;
   (void)workloadOperandsLoc;
   if (succeeded(parser.parseOptionalLSquare())) {
     workloadOperandsLoc = parser.getCurrentLocation();
-    if (parser.parseOperandList(workloadOperands)) return failure();
-    if (parser.parseRSquare()) return failure();
+    if (parser.parseOperandList(workloadOperands))
+      return failure();
+    if (parser.parseRSquare())
+      return failure();
   }
   if (succeeded(parser.parseOptionalArrow())) {
     ParseResult typeListResult =
         parser.parseCommaSeparatedList(OpAsmParser::Delimiter::Paren, [&]() {
-          if (parser.parseType(resultTypes.emplace_back())) return failure();
+          if (parser.parseType(resultTypes.emplace_back()))
+            return failure();
           auto shapedType = llvm::dyn_cast<ShapedType>(resultTypes.back());
-          if (!shapedType) return success();
-          if (shapedType.hasStaticShape()) return success();
+          if (!shapedType)
+            return success();
+          if (shapedType.hasStaticShape())
+            return success();
           SmallVector<OpAsmParser::UnresolvedOperand> dynamicDims;
           if (parser.parseOperandList(dynamicDims,
                                       shapedType.getNumDynamicDims(),
@@ -331,9 +343,11 @@
           allOperands.append(dynamicDims.begin(), dynamicDims.end());
           return success();
         });
-    if (typeListResult) return failure();
+    if (typeListResult)
+      return failure();
   }
-  if (parser.parseRegion(*bodyRegion)) return failure();
+  if (parser.parseRegion(*bodyRegion))
+    return failure();
   ensureTerminator(*bodyRegion, parser.getBuilder(), result.location);
 
   if (parseDispatchWorkgroupsCountRegion(parser, *workloadCountRegion)) {
@@ -381,7 +395,8 @@
         resultDimCounter += shapedType.getNumDynamicDims();
       }
     }
-    if (it.index() < getNumResults() - 1) p << ", ";
+    if (it.index() < getNumResults() - 1)
+      p << ", ";
   }
   p << ") ";
 
@@ -437,7 +452,8 @@
          "expected that all dynamic dims were processed");
 
   // Nothing to do if all results are used.
-  if (unusedResults.empty()) return false;
+  if (unusedResults.empty())
+    return false;
 
   // Create new region and move over the body.
   auto newRegionOp = rewriter.create<Flow::DispatchRegionOp>(
@@ -565,9 +581,9 @@
   return;
 }
 
-RankedTensorType DispatchTensorLoadOp::inferResultType(
-    IREE::Flow::DispatchTensorType sourceType,
-    ArrayRef<OpFoldResult> mixedSizes) {
+RankedTensorType
+DispatchTensorLoadOp::inferResultType(IREE::Flow::DispatchTensorType sourceType,
+                                      ArrayRef<OpFoldResult> mixedSizes) {
   auto shape =
       llvm::map_to_vector(mixedSizes, [&](OpFoldResult valueOrAttr) -> int64_t {
         if (auto attr = valueOrAttr.dyn_cast<Attribute>()) {
@@ -664,13 +680,13 @@
   return IREE::Util::TiedOpInterface::findTiedBaseValue(getSource());
 }
 
-::std::optional<unsigned> DispatchTensorLoadOp::getTiedResultOperandIndex(
-    unsigned resultIndex) {
-  return {0};  // source
+::std::optional<unsigned>
+DispatchTensorLoadOp::getTiedResultOperandIndex(unsigned resultIndex) {
+  return {0}; // source
 }
 
 SmallVector<int64_t> DispatchTensorLoadOp::getTiedResultOperandIndices() {
-  return {0};  // source
+  return {0}; // source
 }
 
 bool DispatchTensorLoadOp::isLoadOfWholeSource() {
@@ -904,10 +920,12 @@
     return success();
   };
   for (auto type : getOperandTypes()) {
-    if (failed(verifyIOType(type))) return failure();
+    if (failed(verifyIOType(type)))
+      return failure();
   }
   for (auto type : getResultTypes()) {
-    if (failed(verifyIOType(type))) return failure();
+    if (failed(verifyIOType(type)))
+      return failure();
   }
 
   // Workgroup count region is optional.
@@ -996,14 +1014,16 @@
             hasWrites = true;
           });
     }
-    if (hasReads && !hasWrites) tensorAccess = TensorAccess::ReadOnly;
-    if (!hasReads && hasWrites) tensorAccess = TensorAccess::WriteOnly;
+    if (hasReads && !hasWrites)
+      tensorAccess = TensorAccess::ReadOnly;
+    if (!hasReads && hasWrites)
+      tensorAccess = TensorAccess::WriteOnly;
   }
   return tensorAccess;
 }
 
-IREE::Util::ValueAccess DispatchWorkgroupsOp::getOperandAccess(
-    unsigned operandIndex) {
+IREE::Util::ValueAccess
+DispatchWorkgroupsOp::getOperandAccess(unsigned operandIndex) {
   BlockArgument arg = getWorkgroupBody().front().getArgument(operandIndex);
   if (auto tensorType = llvm::dyn_cast<DispatchTensorType>(arg.getType())) {
     auto tensorAccess = refineTensorAccess(arg, tensorType);
@@ -1020,8 +1040,8 @@
   }
 }
 
-IREE::Util::ValueAccess DispatchWorkgroupsOp::getResultAccess(
-    unsigned resultIndex) {
+IREE::Util::ValueAccess
+DispatchWorkgroupsOp::getResultAccess(unsigned resultIndex) {
   unsigned startIndex = getBody()->getNumArguments() - getNumResults();
   BlockArgument arg =
       getWorkgroupBody().front().getArgument(startIndex + resultIndex);
@@ -1099,18 +1119,20 @@
   // For dropped results, erase all the store-op uses. It is a pre-requisite
   // that the result can be dropped only if it is written within the dispatch
   // region op.
-  unsigned baseResultIndex = getArguments().size();  // old index
+  unsigned baseResultIndex = getArguments().size(); // old index
   auto erasedArguments = llvm::to_vector(excludedOperandIndices);
   for (unsigned i = baseResultIndex, e = newBody.getNumArguments(); i != e;
        ++i) {
-    if (!is_contained(excludedResultIndices, i - baseResultIndex)) continue;
+    if (!is_contained(excludedResultIndices, i - baseResultIndex))
+      continue;
     auto arg = newBody.front().getArgument(i);
     eraseArgUseTree(arg, rewriter);
     erasedArguments.push_back(i);
   }
   auto &block = newBody.front();
   BitVector eraseIndices(block.getNumArguments());
-  for (auto i : erasedArguments) eraseIndices.set(i);
+  for (auto i : erasedArguments)
+    eraseIndices.set(i);
   block.eraseArguments(eraseIndices);
 
   return newOp;
@@ -1123,7 +1145,8 @@
 
 SmallVector<int64_t> DispatchWorkgroupsOp::getTiedOperandsAsIntegerList() {
   ArrayAttr attr = getTiedOperandsAttr();
-  if (!attr) return {};
+  if (!attr)
+    return {};
   return llvm::map_to_vector(attr, [](Attribute intAttr) {
     return llvm::cast<IntegerAttr>(intAttr).getInt();
   });
@@ -1266,7 +1289,7 @@
 }
 
 std::pair<unsigned, unsigned> DispatchOp::getTiedOperandsIndexAndLength() {
-  return getODSOperandIndexAndLength(1);  // $operands
+  return getODSOperandIndexAndLength(1); // $operands
 }
 
 LogicalResult DispatchOp::verify() {
@@ -1379,7 +1402,7 @@
 }
 
 std::pair<unsigned, unsigned> CallOp::getTiedOperandsIndexAndLength() {
-  return getODSOperandIndexAndLength(0);  // $arguments
+  return getODSOperandIndexAndLength(0); // $arguments
 }
 
 LogicalResult CallOp::verify() {
@@ -1465,13 +1488,13 @@
   return IREE::Util::TiedOpInterface::findTiedBaseValue(getSource());
 }
 
-::std::optional<unsigned> TensorReshapeOp::getTiedResultOperandIndex(
-    unsigned resultIndex) {
-  return {0};  // source
+::std::optional<unsigned>
+TensorReshapeOp::getTiedResultOperandIndex(unsigned resultIndex) {
+  return {0}; // source
 }
 
 SmallVector<int64_t> TensorReshapeOp::getTiedResultOperandIndices() {
-  return {0};  // source
+  return {0}; // source
 }
 
 //===----------------------------------------------------------------------===//
@@ -1591,13 +1614,13 @@
   return IREE::Util::TiedOpInterface::findTiedBaseValue(getTarget());
 }
 
-::std::optional<unsigned> TensorUpdateOp::getTiedResultOperandIndex(
-    unsigned resultIndex) {
-  return {0};  // target
+::std::optional<unsigned>
+TensorUpdateOp::getTiedResultOperandIndex(unsigned resultIndex) {
+  return {0}; // target
 }
 
 SmallVector<int64_t> TensorUpdateOp::getTiedResultOperandIndices() {
-  return {0};  // target
+  return {0}; // target
 }
 
 //===----------------------------------------------------------------------===//
@@ -1616,7 +1639,8 @@
     auto dispatchTensorLoadOp =
         extractSliceOp.getSource()
             .getDefiningOp<IREE::Flow::DispatchTensorLoadOp>();
-    if (!dispatchTensorLoadOp) return failure();
+    if (!dispatchTensorLoadOp)
+      return failure();
 
     SmallVector<OpFoldResult> offsets, sizes, strides;
     // `tensor.extract_slice` (i.e. the producer) folds **into**
@@ -1644,12 +1668,13 @@
     : OpRewritePattern<IREE::Flow::DispatchTensorStoreOp> {
   using OpRewritePattern<IREE::Flow::DispatchTensorStoreOp>::OpRewritePattern;
 
-  LogicalResult matchAndRewrite(
-      IREE::Flow::DispatchTensorStoreOp dispatchTensorStoreOp,
-      PatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Flow::DispatchTensorStoreOp dispatchTensorStoreOp,
+                  PatternRewriter &rewriter) const override {
     auto insertSliceOp =
         dispatchTensorStoreOp.getValue().getDefiningOp<tensor::InsertSliceOp>();
-    if (!insertSliceOp) return failure();
+    if (!insertSliceOp)
+      return failure();
 
     // Check that the `dest` of the `tensor.insert_slice` and target of the
     // `flow.dispatch.tensor.store` are the same interface binding.
@@ -1736,13 +1761,13 @@
   return IREE::Util::TiedOpInterface::findTiedBaseValue(getTarget());
 }
 
-::std::optional<unsigned> CollectiveAllGatherOp::getTiedResultOperandIndex(
-    unsigned resultIndex) {
-  return {0};  // target
+::std::optional<unsigned>
+CollectiveAllGatherOp::getTiedResultOperandIndex(unsigned resultIndex) {
+  return {0}; // target
 }
 
 SmallVector<int64_t> CollectiveAllGatherOp::getTiedResultOperandIndices() {
-  return {0};  // target
+  return {0}; // target
 }
 
 void CollectiveAllGatherOp::build(OpBuilder &builder, OperationState &state,
@@ -1762,13 +1787,13 @@
   return IREE::Util::TiedOpInterface::findTiedBaseValue(getTarget());
 }
 
-::std::optional<unsigned> CollectiveAllReduceOp::getTiedResultOperandIndex(
-    unsigned resultIndex) {
-  return {0};  // target
+::std::optional<unsigned>
+CollectiveAllReduceOp::getTiedResultOperandIndex(unsigned resultIndex) {
+  return {0}; // target
 }
 
 SmallVector<int64_t> CollectiveAllReduceOp::getTiedResultOperandIndices() {
-  return {0};  // target
+  return {0}; // target
 }
 
 void CollectiveAllReduceOp::build(OpBuilder &builder, OperationState &state,
@@ -1789,13 +1814,13 @@
   return IREE::Util::TiedOpInterface::findTiedBaseValue(getTarget());
 }
 
-std::optional<unsigned> CollectiveAllToAllOp::getTiedResultOperandIndex(
-    unsigned resultIndex) {
-  return {0};  // target
+std::optional<unsigned>
+CollectiveAllToAllOp::getTiedResultOperandIndex(unsigned resultIndex) {
+  return {0}; // target
 }
 
 SmallVector<int64_t> CollectiveAllToAllOp::getTiedResultOperandIndices() {
-  return {0};  // target
+  return {0}; // target
 }
 
 void CollectiveAllToAllOp::build(OpBuilder &builder, OperationState &state,
@@ -1816,13 +1841,13 @@
   return IREE::Util::TiedOpInterface::findTiedBaseValue(getTarget());
 }
 
-::std::optional<unsigned> CollectiveReduceScatterOp::getTiedResultOperandIndex(
-    unsigned resultIndex) {
-  return {0};  // target
+::std::optional<unsigned>
+CollectiveReduceScatterOp::getTiedResultOperandIndex(unsigned resultIndex) {
+  return {0}; // target
 }
 
 SmallVector<int64_t> CollectiveReduceScatterOp::getTiedResultOperandIndices() {
-  return {0};  // target
+  return {0}; // target
 }
 
 void CollectiveReduceScatterOp::build(OpBuilder &builder, OperationState &state,
@@ -1844,13 +1869,13 @@
   return IREE::Util::TiedOpInterface::findTiedBaseValue(getTarget());
 }
 
-std::optional<unsigned> CollectiveSendRecvOp::getTiedResultOperandIndex(
-    unsigned resultIndex) {
-  return {0};  // target
+std::optional<unsigned>
+CollectiveSendRecvOp::getTiedResultOperandIndex(unsigned resultIndex) {
+  return {0}; // target
 }
 
 SmallVector<int64_t> CollectiveSendRecvOp::getTiedResultOperandIndices() {
-  return {0};  // target
+  return {0}; // target
 }
 
 void CollectiveSendRecvOp::build(OpBuilder &builder, OperationState &state,
@@ -1864,14 +1889,14 @@
         recv, builder.getIndexArrayAttr({0}));
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
 //===----------------------------------------------------------------------===//
 // TableGen definitions (intentionally last)
 //===----------------------------------------------------------------------===//
 
 #define GET_OP_CLASSES
-#include "iree/compiler/Dialect/Flow/IR/FlowOps.cpp.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/Flow/IR/FlowOps.cpp.inc" // IWYU pragma: keep

diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.h b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.h
index fea7852..2085658 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.h
+++ b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.h

@@ -50,12 +50,12 @@
 bool dropUnusedDispatchRegionResults(RewriterBase &rewriter,
                                      Flow::DispatchRegionOp regionOp);
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
 #define GET_OP_CLASSES
-#include "iree/compiler/Dialect/Flow/IR/FlowOps.h.inc"  // IWYU pragma: export
+#include "iree/compiler/Dialect/Flow/IR/FlowOps.h.inc" // IWYU pragma: export
 
-#endif  // IREE_COMPILER_DIALECT_FLOW_IR_FLOWOPS_H_
+#endif // IREE_COMPILER_DIALECT_FLOW_IR_FLOWOPS_H_

diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowTypes.cpp b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowTypes.cpp
index 1ed6019..4c86bce 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowTypes.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowTypes.cpp

@@ -12,10 +12,10 @@
 
 // clang-format off: must be included after all LLVM/MLIR headers.
 #define GET_ATTRDEF_CLASSES
-#include "iree/compiler/Dialect/Flow/IR/FlowAttrs.cpp.inc"  // IWYU pragma: keep
-#include "iree/compiler/Dialect/Flow/IR/FlowEnums.cpp.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/Flow/IR/FlowAttrs.cpp.inc" // IWYU pragma: keep
+#include "iree/compiler/Dialect/Flow/IR/FlowEnums.cpp.inc" // IWYU pragma: keep
 #define GET_TYPEDEF_CLASSES
-#include "iree/compiler/Dialect/Flow/IR/FlowTypes.cpp.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/Flow/IR/FlowTypes.cpp.inc" // IWYU pragma: keep
 // clang-format on
 
 namespace mlir {
@@ -67,7 +67,8 @@
   assert(hasStaticShape() && "cannot get element count of dynamic shaped type");
   auto shape = getShape();
   int64_t num = 1;
-  for (auto dim : shape) num *= dim;
+  for (auto dim : shape)
+    num *= dim;
   return num;
 }
 
@@ -117,9 +118,9 @@
   return hasStaticShape() && getShape() == shape;
 }
 
-LogicalResult DispatchTensorType::verify(
-    function_ref<InFlightDiagnostic()> emitError, uint32_t access,
-    Type boundType) {
+LogicalResult
+DispatchTensorType::verify(function_ref<InFlightDiagnostic()> emitError,
+                           uint32_t access, Type boundType) {
   if (!boundType.isIntOrFloat() && !llvm::isa<RankedTensorType>(boundType)) {
     return emitError() << "unhandled bounded type in dispatch. Must by int, "
                           "float or ranked tensor type";
@@ -146,17 +147,17 @@
 
 static void printShapedType(DispatchTensorType &type, AsmPrinter &p) {
   switch (type.getAccess()) {
-    case TensorAccess::ReadOnly:
-      p << "readonly";
-      break;
-    case TensorAccess::ReadWrite:
-      p << "readwrite";
-      break;
-    case TensorAccess::WriteOnly:
-      p << "writeonly";
-      break;
-    default:
-      assert(false && "unhandled access");
+  case TensorAccess::ReadOnly:
+    p << "readonly";
+    break;
+  case TensorAccess::ReadWrite:
+    p << "readwrite";
+    break;
+  case TensorAccess::WriteOnly:
+    p << "writeonly";
+    break;
+  default:
+    assert(false && "unhandled access");
   }
   p << ":" << type.getBoundType();
 }
@@ -176,13 +177,13 @@
 // Dialect registration
 //===----------------------------------------------------------------------===//
 
-#include "iree/compiler/Dialect/Flow/IR/FlowOpInterfaces.cpp.inc"  // IWYU pragma: keep
-#include "iree/compiler/Dialect/Flow/IR/FlowTypeInterfaces.cpp.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/Flow/IR/FlowOpInterfaces.cpp.inc" // IWYU pragma: keep
+#include "iree/compiler/Dialect/Flow/IR/FlowTypeInterfaces.cpp.inc" // IWYU pragma: keep
 
 void FlowDialect::registerAttributes() {
   addAttributes<
 #define GET_ATTRDEF_LIST
-#include "iree/compiler/Dialect/Flow/IR/FlowAttrs.cpp.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/Flow/IR/FlowAttrs.cpp.inc" // IWYU pragma: keep
       >();
 }
 
@@ -190,7 +191,7 @@
   addTypes<DispatchTensorType>();
   addTypes<
 #define GET_TYPEDEF_LIST
-#include "iree/compiler/Dialect/Flow/IR/FlowTypes.cpp.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/Flow/IR/FlowTypes.cpp.inc" // IWYU pragma: keep
       >();
 }
 
@@ -203,7 +204,8 @@
   Type type;
   OptionalParseResult parseResult =
       generatedTypeParser(parser, &mnemonic, type);
-  if (parseResult.has_value()) return type;
+  if (parseResult.has_value())
+    return type;
   if (mnemonic == "dispatch.tensor") {
     return DispatchTensorType::parse(parser);
   }
@@ -220,7 +222,7 @@
   }
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowTypes.h b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowTypes.h
index 2ff1ab4..d7455e7 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowTypes.h
+++ b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowTypes.h

@@ -21,7 +21,7 @@
 #include "mlir/Support/LLVM.h"
 
 // clang-format off: must be included after all LLVM/MLIR headers.
-#include "iree/compiler/Dialect/Flow/IR/FlowEnums.h.inc"  // IWYU pragma: export
+#include "iree/compiler/Dialect/Flow/IR/FlowEnums.h.inc" // IWYU pragma: export
 // clang-format on
 
 namespace mlir {
@@ -29,8 +29,8 @@
 namespace IREE {
 namespace Flow {
 
-#include "iree/compiler/Dialect/Flow/IR/FlowOpInterfaces.h.inc"  // IWYU pragma: export
-#include "iree/compiler/Dialect/Flow/IR/FlowTypeInterfaces.h.inc"  // IWYU pragma: export
+#include "iree/compiler/Dialect/Flow/IR/FlowOpInterfaces.h.inc" // IWYU pragma: export
+#include "iree/compiler/Dialect/Flow/IR/FlowTypeInterfaces.h.inc" // IWYU pragma: export
 
 //===----------------------------------------------------------------------===//
 // Object types
@@ -38,7 +38,7 @@
 
 namespace detail {
 struct DispatchTensorTypeStorage;
-}  // namespace detail
+} // namespace detail
 
 enum class TensorAccess : uint32_t {
   ReadOnly,
@@ -51,7 +51,7 @@
 class DispatchTensorType
     : public Type::TypeBase<DispatchTensorType, Type,
                             detail::DispatchTensorTypeStorage> {
- public:
+public:
   using ImplType = detail::DispatchTensorTypeStorage;
 
   using Base::Base;
@@ -161,18 +161,18 @@
   Type boundType;
 };
 
-}  // namespace detail
+} // namespace detail
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
 // clang-format off: must be included after all LLVM/MLIR headers.
 #define GET_ATTRDEF_CLASSES
-#include "iree/compiler/Dialect/Flow/IR/FlowAttrs.h.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/Flow/IR/FlowAttrs.h.inc" // IWYU pragma: keep
 #define GET_TYPEDEF_CLASSES
-#include "iree/compiler/Dialect/Flow/IR/FlowTypes.h.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/Flow/IR/FlowTypes.h.inc" // IWYU pragma: keep
 // clang-format on
 
-#endif  // IREE_COMPILER_DIALECT_FLOW_IR_FLOWTYPES_H_
+#endif // IREE_COMPILER_DIALECT_FLOW_IR_FLOWTYPES_H_

diff --git a/compiler/src/iree/compiler/Dialect/Flow/TransformExtensions/FlowExtensions.cpp b/compiler/src/iree/compiler/Dialect/Flow/TransformExtensions/FlowExtensions.cpp
index 4c4e088..897dfbd 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/TransformExtensions/FlowExtensions.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/TransformExtensions/FlowExtensions.cpp

@@ -51,9 +51,10 @@
 /// Populate the workgroup_count region of `dispatchOp`.
 /// For now, this only supports constant index ops and empty workload operands.
 /// Assumes the Flow::DispatchWorkgroupsOp is built with an empty region.
-static LogicalResult populateWorkgroupCountComputingRegion(
-    PatternRewriter &rewriter, scf::ForallOp forallOp,
-    Flow::DispatchWorkgroupsOp dispatchOp) {
+static LogicalResult
+populateWorkgroupCountComputingRegion(PatternRewriter &rewriter,
+                                      scf::ForallOp forallOp,
+                                      Flow::DispatchWorkgroupsOp dispatchOp) {
   Location loc = forallOp.getLoc();
   OpBuilder::InsertionGuard g(rewriter);
   Region &r = dispatchOp.getWorkgroupCount();
@@ -66,7 +67,8 @@
   // TODO: Iteratively pull operations that are only consuming IndexType.
   for (Value v : forallOp.getUpperBound(rewriter)) {
     auto op = dyn_cast_or_null<arith::ConstantIndexOp>(v.getDefiningOp());
-    if (!op) return failure();
+    if (!op)
+      return failure();
     results.push_back(
         cast<arith::ConstantIndexOp>(rewriter.clone(*op)).getResult());
   }
@@ -135,10 +137,12 @@
         source = forallOp.getTiedOpOperand(sourceBbArg)->get();
 
     auto it = llvm::find(tensorOperands, source);
-    if (it == tensorOperands.end()) return;
+    if (it == tensorOperands.end())
+      return;
     int64_t index = std::distance(tensorOperands.begin(), it);
     Value sourceFlow = tensorToFlowBvm.lookupOrNull(source);
-    if (!sourceFlow) return;
+    if (!sourceFlow)
+      return;
 
     Location loc = extractSliceOp.getLoc();
     OpBuilder::InsertionGuard g(rewriter);
@@ -167,14 +171,16 @@
   // worklist.
   llvm::SetVector<Operation *> worklist;
   for (Value v : valuesDefinedAbove)
-    if (Operation *op = v.getDefiningOp()) worklist.insert(op);
+    if (Operation *op = v.getDefiningOp())
+      worklist.insert(op);
   llvm::SmallVector<Operation *> opsToClone;
   llvm::DenseSet<Operation *> visited;
 
   // Process all ops in the worklist.
   while (!worklist.empty()) {
     Operation *op = worklist.pop_back_val();
-    if (visited.contains(op)) continue;
+    if (visited.contains(op))
+      continue;
     visited.insert(op);
 
     // Do not clone ops that are not clonable.
@@ -188,14 +194,16 @@
                      .getDest()
                      .getDefiningOp() == op;
         });
-    if (isDestination) continue;
+    if (isDestination)
+      continue;
 
     opsToClone.push_back(op);
 
     // Add all operands to the worklist.
     for (Value operand : op->getOperands()) {
       Operation *operandOp = operand.getDefiningOp();
-      if (!operandOp) continue;
+      if (!operandOp)
+        continue;
       worklist.insert(operandOp);
     }
   }
@@ -207,7 +215,8 @@
     Operation *cloned = rewriter.clone(*op);
     SmallVector<OpOperand *> uses;
     for (OpOperand &use : op->getUses())
-      if (forallOp->isProperAncestor(use.getOwner())) uses.push_back(&use);
+      if (forallOp->isProperAncestor(use.getOwner()))
+        uses.push_back(&use);
     for (OpOperand *use : uses) {
       unsigned resultNum = llvm::cast<OpResult>(use->get()).getResultNumber();
       rewriter.updateRootInPlace(
@@ -264,7 +273,8 @@
         llvm::cast<BlockArgument>(parallelInsertOp.getDest());
     Value dest = forallOp.getTiedOpOperand(destBbArg)->get();
     bool inserted = resultTensorOperands.insert(dest);
-    if (!inserted) continue;
+    if (!inserted)
+      continue;
     auto dynamicDims =
         getIndicesOfDynamicDims(llvm::cast<ShapedType>(dest.getType()));
     for (int64_t dim : dynamicDims)
@@ -288,7 +298,8 @@
       nonTensorOperands.push_back(v);
       continue;
     }
-    if (resultTensorOperands.contains(v)) continue;
+    if (resultTensorOperands.contains(v))
+      continue;
     tensorOperands.push_back(v);
     for (int64_t dim : getIndicesOfDynamicDims(tensorType))
       tensorDynamicDims.push_back(rewriter.create<tensor::DimOp>(loc, v, dim));
@@ -297,7 +308,8 @@
   // tensor operands.)
   for (Value v : forallOp.getOutputs()) {
     auto tensorType = llvm::cast<RankedTensorType>(v.getType());
-    if (resultTensorOperands.contains(v)) continue;
+    if (resultTensorOperands.contains(v))
+      continue;
     tensorOperands.push_back(v);
     for (int64_t dim : getIndicesOfDynamicDims(tensorType))
       tensorDynamicDims.push_back(rewriter.create<tensor::DimOp>(loc, v, dim));
@@ -462,7 +474,8 @@
   SimplePatternRewriter patternRewriter(target->getContext());
   FailureOr<Flow::DispatchWorkgroupsOp> result =
       rewriteForeachThreadToFlowDispatchWorkgroups(target, patternRewriter);
-  if (failed(result)) return emitDefaultDefiniteFailure(target);
+  if (failed(result))
+    return emitDefaultDefiniteFailure(target);
   results.push_back(*result);
   return DiagnosedSilenceableFailure::success();
 }
@@ -479,7 +492,8 @@
     transform::ApplyToEachResultList &results, transform::TransformState &) {
   FailureOr<Flow::DispatchWorkgroupsOp> result =
       rewriteFlowDispatchRegionToFlowDispatchWorkgroups(target, rewriter);
-  if (failed(result)) return emitDefaultDefiniteFailure(target);
+  if (failed(result))
+    return emitDefaultDefiniteFailure(target);
   results.push_back(*result);
   return DiagnosedSilenceableFailure::success();
 }
@@ -526,7 +540,8 @@
   for (Operation *target : orderedTargets) {
     FailureOr<Operation *> clonedTarget =
         clonePrecedingOpIntoDispatchRegion(rewriter, target, regionOp);
-    if (failed(clonedTarget)) return emitDefaultDefiniteFailure(target);
+    if (failed(clonedTarget))
+      return emitDefaultDefiniteFailure(target);
     clonedTargets.push_back(*clonedTarget);
   }
 
@@ -576,7 +591,8 @@
   for (Operation *target : orderedTargets) {
     auto newRegionOp =
         movePrecedingOpsIntoDispatchRegion(rewriter, target, regionOp);
-    if (failed(newRegionOp)) return emitDefaultDefiniteFailure(target);
+    if (failed(newRegionOp))
+      return emitDefaultDefiniteFailure(target);
     regionOp = *newRegionOp;
   }
 
@@ -618,9 +634,9 @@
 // }
 // %0 = "some_op"(%r) : (tensor<?xf32>) -> (tensor<?xf32>)
 // %2 = "yet_another_use"(%0) : (tensor<?xf32>) -> (tensor<?xf32>)
-static FailureOr<Operation *> cloneSucceedingOpIntoDispatchRegion(
-    RewriterBase &rewriter, Operation *target,
-    Flow::DispatchRegionOp regionOp) {
+static FailureOr<Operation *>
+cloneSucceedingOpIntoDispatchRegion(RewriterBase &rewriter, Operation *target,
+                                    Flow::DispatchRegionOp regionOp) {
   if (!regionOp->isBeforeInBlock(target)) {
     target->emitError() << "expected that region op comes first";
     return failure();
@@ -630,7 +646,8 @@
 
   // Gather all uses of `target`.
   SmallVector<OpOperand *> usesOutsideOfRegion;
-  for (OpOperand &use : target->getUses()) usesOutsideOfRegion.push_back(&use);
+  for (OpOperand &use : target->getUses())
+    usesOutsideOfRegion.push_back(&use);
 
   // Clone op into dispatch region.
   auto returnOp = cast<Flow::ReturnOp>(body.getTerminator());
@@ -678,9 +695,9 @@
 //   flow.return %1, %0 : tensor<?xf32>, tensor<?xf32>
 // }
 // %2 = "yet_another_use"(%r#1) : (tensor<?xf32>) -> (tensor<?xf32>)
-static FailureOr<Flow::DispatchRegionOp> moveSucceedingOpIntoDispatchRegion(
-    RewriterBase &rewriter, Operation *target,
-    Flow::DispatchRegionOp regionOp) {
+static FailureOr<Flow::DispatchRegionOp>
+moveSucceedingOpIntoDispatchRegion(RewriterBase &rewriter, Operation *target,
+                                   Flow::DispatchRegionOp regionOp) {
   if (!regionOp->isBeforeInBlock(target)) {
     target->emitError() << "expected that region op comes first";
     return failure();
@@ -690,7 +707,8 @@
 
   // Gather all uses of `target`.
   SmallVector<OpOperand *> usesOutsideOfRegion;
-  for (OpOperand &use : target->getUses()) usesOutsideOfRegion.push_back(&use);
+  for (OpOperand &use : target->getUses())
+    usesOutsideOfRegion.push_back(&use);
 
   // Compute dynamic result dims.
   SmallVector<SmallVector<Value>> dynamicDims;
@@ -725,7 +743,8 @@
   for (const auto &it : llvm::enumerate(target->getResults())) {
     auto newRegionOp = appendDispatchRegionResults(
         rewriter, regionOp, it.value(), dynamicDims[it.index()]);
-    if (failed(newRegionOp)) return failure();
+    if (failed(newRegionOp))
+      return failure();
     regionOp = *newRegionOp;
   }
 
@@ -829,7 +848,8 @@
     transform::ApplyToEachResultList &results,
     transform::TransformState &state) {
   auto regionOp = Flow::wrapOpInDispatchRegion(rewriter, target);
-  if (failed(regionOp)) return emitDefaultDefiniteFailure(target);
+  if (failed(regionOp))
+    return emitDefaultDefiniteFailure(target);
 
   results.push_back(*regionOp);
   return DiagnosedSilenceableFailure::success();

diff --git a/compiler/src/iree/compiler/Dialect/Flow/TransformExtensions/FlowExtensions.h b/compiler/src/iree/compiler/Dialect/Flow/TransformExtensions/FlowExtensions.h
index f53f119..173fa41 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/TransformExtensions/FlowExtensions.h
+++ b/compiler/src/iree/compiler/Dialect/Flow/TransformExtensions/FlowExtensions.h

@@ -16,17 +16,17 @@
 
 namespace scf {
 class ForallOp;
-}  // namespace scf
+} // namespace scf
 
 namespace iree_compiler {
 namespace IREE {
 namespace Flow {
 class DispatchWorkgroupsOp;
 class DispatchRegionOp;
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
 #define GET_OP_CLASSES
 #include "iree/compiler/Dialect/Flow/TransformExtensions/FlowExtensionsOps.h.inc"
@@ -43,12 +43,12 @@
 // Hook to register Flow transformations to the transform dialect.
 class FlowExtensions
     : public transform::TransformDialectExtension<FlowExtensions> {
- public:
+public:
   FlowExtensions();
 };
-}  // namespace transform_dialect
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace transform_dialect
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_CODEGEN_FLOW_TRANSFORMEXTENSIONS_FLOWEXTENSIONS_H_
+#endif // IREE_COMPILER_CODEGEN_FLOW_TRANSFORMEXTENSIONS_FLOWEXTENSIONS_H_

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/CaptureDispatchDynamicDims.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/CaptureDispatchDynamicDims.cpp
index 2b3d156..0c99de0 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/CaptureDispatchDynamicDims.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/CaptureDispatchDynamicDims.cpp

@@ -42,7 +42,8 @@
     outerToInnerMap[operand] = arg;
   }
   for (auto result : dispatchOp.getResults()) {
-    if (dispatchOp.getTiedResultOperand(result)) continue;  // ignored tied
+    if (dispatchOp.getTiedResultOperand(result))
+      continue; // ignored tied
     auto arg = entryBlock->getArgument(argIdx++);
     outerToInnerMap[result] = arg;
   }
@@ -53,13 +54,16 @@
   auto captureTensorDims = [&](Value externalValue, Value internalValue) {
     auto tensorType =
         llvm::dyn_cast<IREE::Flow::DispatchTensorType>(internalValue.getType());
-    if (!tensorType) return;
-    if (tensorType.hasStaticShape()) return;
+    if (!tensorType)
+      return;
+    if (tensorType.hasStaticShape())
+      return;
 
     // Find the dimensions in the parent.
     auto maybeDynamicDims = IREE::Util::findDynamicDims(
         externalValue, dispatchOp->getBlock(), Block::iterator(dispatchOp));
-    if (!maybeDynamicDims.has_value()) return;
+    if (!maybeDynamicDims.has_value())
+      return;
     // Convert to a vector -- we cannot use the ValueRange directly because
     // it might point into the operand list of this op, which we might mutate
     // in-place.
@@ -106,7 +110,8 @@
     captureTensorDims(operand, outerToInnerMap[operand]);
   }
   for (auto result : dispatchOp.getResults()) {
-    if (dispatchOp.getTiedResultOperand(result)) continue;  // ignore tied
+    if (dispatchOp.getTiedResultOperand(result))
+      continue; // ignore tied
     captureTensorDims(result, outerToInnerMap[result]);
   }
 }
@@ -124,13 +129,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<Pass> createCaptureDispatchDynamicDimsPass() {
   return std::make_unique<CaptureDispatchDynamicDimsPass>();
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/CleanupNumericNarrowing.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/CleanupNumericNarrowing.cpp
index 4b200e8..f507e05 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/CleanupNumericNarrowing.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/CleanupNumericNarrowing.cpp

@@ -25,13 +25,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<Pass> createCleanupNumericNarrowingPass() {
   return std::make_unique<CleanupNumericNarrowingPass>();
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/CleanupTensorShapes.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/CleanupTensorShapes.cpp
index 6bb5693..57c9eba 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/CleanupTensorShapes.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/CleanupTensorShapes.cpp

@@ -34,17 +34,18 @@
             foundBadOps = true;
           }
         });
-    if (foundBadOps) return signalPassFailure();
+    if (foundBadOps)
+      return signalPassFailure();
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<Pass> createCleanupTensorShapesPass() {
   return std::make_unique<CleanupTensorShapesPass>();
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/CloneProducersIntoDispatchRegions.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/CloneProducersIntoDispatchRegions.cpp
index e888509..a24baa5 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/CloneProducersIntoDispatchRegions.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/CloneProducersIntoDispatchRegions.cpp

@@ -27,7 +27,7 @@
           CloneProducersIntoDispatchRegionPass> {
   void runOnOperation() override;
 };
-}  // namespace
+} // namespace
 
 void CloneProducersIntoDispatchRegionPass::runOnOperation() {
   FunctionOpInterface funcOp = getOperation();
@@ -44,7 +44,7 @@
   return std::make_unique<CloneProducersIntoDispatchRegionPass>();
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/CollapseDimensions.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/CollapseDimensions.cpp
index dad5dda..33aa2a3 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/CollapseDimensions.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/CollapseDimensions.cpp

@@ -45,18 +45,19 @@
       : CollapseDimensionsPass() {}
   void runOnOperation() override;
 };
-}  // namespace
+} // namespace
 
 /// Searches the same sequence in all the affine maps and collapses these
 /// dimensions. It only applies these to "parallel" loops without mixing them
 /// with "reduction" types.
-static SmallVector<ReassociationIndices> getCollapsibleLoops(
-    linalg::GenericOp genericOp) {
+static SmallVector<ReassociationIndices>
+getCollapsibleLoops(linalg::GenericOp genericOp) {
   SmallVector<ReassociationIndices> contiguousLoops;
 
   SmallVector<unsigned> pDims;
   genericOp.getParallelDims(pDims);
-  if (pDims.size() < 2) return contiguousLoops;
+  if (pDims.size() < 2)
+    return contiguousLoops;
 
   llvm::SmallDenseSet<unsigned> pLoops(pDims.begin(), pDims.end());
 
@@ -69,7 +70,8 @@
           break;
         }
       }
-      if (!foundSeq) return false;
+      if (!foundSeq)
+        return false;
     }
     return true;
   };
@@ -86,15 +88,18 @@
       }
     }
     preExpr = nextExpr;
-    if (pLoops.count(pos)) range.push_back(pos);
+    if (pLoops.count(pos))
+      range.push_back(pos);
   }
-  if (range.size() > 1) contiguousLoops.push_back(range);
+  if (range.size() > 1)
+    contiguousLoops.push_back(range);
 
   LLVM_DEBUG({
     llvm::dbgs() << "Collapsing dimensions if possible: ";
     for (auto indices : contiguousLoops) {
       llvm::dbgs() << "[";
-      for (auto idx : indices) llvm::dbgs() << idx << ",";
+      for (auto idx : indices)
+        llvm::dbgs() << idx << ",";
       llvm::dbgs() << "]\t";
     }
     llvm::dbgs() << "\n";
@@ -104,9 +109,9 @@
 }
 
 /// Collapse possible dimension of the given linalg.generic
-static FailureOr<SmallVector<Value>> collapseLinalgGeneric(
-    IRRewriter &rewriter, linalg::GenericOp genericOp,
-    SmallVector<ReassociationIndices> &collapseIndices) {
+static FailureOr<SmallVector<Value>>
+collapseLinalgGeneric(IRRewriter &rewriter, linalg::GenericOp genericOp,
+                      SmallVector<ReassociationIndices> &collapseIndices) {
   rewriter.setInsertionPoint(genericOp->getParentOp());
   FailureOr<SmallVector<Value>> replacements =
       mlir::linalg::collapseGenericOpIterationDims(genericOp, collapseIndices,
@@ -124,7 +129,8 @@
   // TODO(guray) There is no mechanism to tell the collapsed indexes to
   // `tensor.expand_shape`. Once we have this support in MLIR, we can enable
   // dynamic tensor shapes.
-  if (genericOp.hasDynamicShape()) return false;
+  if (genericOp.hasDynamicShape())
+    return false;
 
   // TODO(guray) Currently we can only collapse when result of all the
   // AffineMaps are dimensions. Possible to collapse cases like
@@ -139,37 +145,43 @@
 
   // TODO(guray) Collapsing caused performance regression in a cpu
   // benchmark, so we disable it.
-  if (genericOp.hasIndexSemantics()) return false;
+  if (genericOp.hasIndexSemantics())
+    return false;
 
   return true;
 }
 
 /// Traverses all the the Ops in DispatchRegionOps and finds linalg.generic Op
 /// without any producers.
-static FailureOr<linalg::GenericOp> findRootGenericOp(
-    DispatchRegionOp regionOp) {
+static FailureOr<linalg::GenericOp>
+findRootGenericOp(DispatchRegionOp regionOp) {
   SmallVector<Operation *> computeOps;
   auto &ops = regionOp.getBody().front().getOperations();
   for (Operation &op : ops) {
-    if (isa<TilingInterface>(op)) computeOps.push_back(&op);
+    if (isa<TilingInterface>(op))
+      computeOps.push_back(&op);
   }
   // Looking for root without producer
-  if (computeOps.size() != 1 || ops.size() != 2) return failure();
+  if (computeOps.size() != 1 || ops.size() != 2)
+    return failure();
   auto genericOp = llvm::dyn_cast<linalg::GenericOp>(computeOps.front());
-  if (!genericOp) return failure();
+  if (!genericOp)
+    return failure();
   return genericOp;
 }
 
 /// Generate a new dispatch.region and workload according with the collapsed
 /// linalg Generic Op
-static LogicalResult generateNewDispatchRegion(
-    IRRewriter &rewriter, DispatchRegionOp regionOp,
-    SmallVector<Value> collapseResults, linalg::GenericOp newGenericOp) {
+static LogicalResult
+generateNewDispatchRegion(IRRewriter &rewriter, DispatchRegionOp regionOp,
+                          SmallVector<Value> collapseResults,
+                          linalg::GenericOp newGenericOp) {
   OpBuilder::InsertionGuard g(rewriter);
   rewriter.setInsertionPoint(regionOp->getParentOp());
 
   auto maybeRegionOp = Flow::wrapOpInDispatchRegion(rewriter, newGenericOp);
-  if (failed(maybeRegionOp)) return failure();
+  if (failed(maybeRegionOp))
+    return failure();
 
   // Replace old regionOp with the result of collapse
   rewriter.replaceOp(regionOp, collapseResults);
@@ -183,24 +195,30 @@
                                         DispatchRegionOp &regionOp) {
   // Step 1. Find the root linalg.generic Op with no producer
   std::optional<linalg::GenericOp> genericOp = findRootGenericOp(regionOp);
-  if (!genericOp.has_value()) return success();
+  if (!genericOp.has_value())
+    return success();
 
   // Step 2. Check whether it is possible to collapse
-  if (!isEligibleForCollapse(genericOp.value())) return success();
+  if (!isEligibleForCollapse(genericOp.value()))
+    return success();
   SmallVector<ReassociationIndices> collapseIndices;
   collapseIndices = getCollapsibleLoops(genericOp.value());
-  if (collapseIndices.empty()) return success();
+  if (collapseIndices.empty())
+    return success();
 
   // Step 3. Collapse dimensions
   auto maybeReplacements =
       collapseLinalgGeneric(rewriter, genericOp.value(), collapseIndices);
-  if (failed(maybeReplacements)) return failure();
+  if (failed(maybeReplacements))
+    return failure();
   auto expandshapeOp =
       maybeReplacements->front().getDefiningOp<tensor::ExpandShapeOp>();
-  if (!expandshapeOp) return failure();
+  if (!expandshapeOp)
+    return failure();
   auto newGenericOp =
       expandshapeOp.getOperand().getDefiningOp<linalg::GenericOp>();
-  if (!newGenericOp) return failure();
+  if (!newGenericOp)
+    return failure();
 
   // Step 4. Generate new dispatch region and replace old one users
   if (failed(generateNewDispatchRegion(rewriter, regionOp, *maybeReplacements,
@@ -240,7 +258,7 @@
   return std::make_unique<CollapseDimensionsPass>();
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/CollapseReductionDims.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/CollapseReductionDims.cpp
index 7161888..6b8b115 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/CollapseReductionDims.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/CollapseReductionDims.cpp

@@ -20,7 +20,8 @@
 /// Check whether the given dimensions are contiguous in the result map.
 /// If non of the dimension are present in the map return true as well.
 static bool hasContiguousDims(AffineMap map, ArrayRef<unsigned> dims) {
-  if (!map.isProjectedPermutation()) return false;
+  if (!map.isProjectedPermutation())
+    return false;
   llvm::SmallDenseSet<unsigned> existingDims(dims.begin(), dims.end());
   for (unsigned i = 0, e = map.getNumResults(); i < e; i++) {
     if (map.getDimPosition(i) != dims[0]) {
@@ -41,15 +42,17 @@
   return true;
 }
 
-static SmallVector<ReassociationIndices> collapseDimensions(
-    linalg::GenericOp genericOp) {
+static SmallVector<ReassociationIndices>
+collapseDimensions(linalg::GenericOp genericOp) {
   SmallVector<ReassociationIndices> collapseIndices;
   SmallVector<unsigned> reductionDims;
   genericOp.getReductionDims(reductionDims);
-  if (reductionDims.size() < 2) return collapseIndices;
+  if (reductionDims.size() < 2)
+    return collapseIndices;
 
   for (AffineMap map : genericOp.getIndexingMapsArray()) {
-    if (!hasContiguousDims(map, reductionDims)) return collapseIndices;
+    if (!hasContiguousDims(map, reductionDims))
+      return collapseIndices;
   }
   ReassociationIndices indices;
   for (unsigned dim : reductionDims) {
@@ -74,13 +77,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<Pass> createCollapseDimsPass() {
   return std::make_unique<CollapseDimsPass>();
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/Convert1X1FilterConv2DToMatmul.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/Convert1X1FilterConv2DToMatmul.cpp
index c1ea7c6..a2eb556 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/Convert1X1FilterConv2DToMatmul.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/Convert1X1FilterConv2DToMatmul.cpp

@@ -22,7 +22,7 @@
 // Converts linalg.conv_2d_input_nhwc_filter_nhwc op to linalg.matmul
 template <typename Conv2DOpType>
 class Convert1x1FilterConvToMatmul : public OpRewritePattern<Conv2DOpType> {
- public:
+public:
   using OpRewritePattern<Conv2DOpType>::OpRewritePattern;
 
   LogicalResult matchAndRewrite(Conv2DOpType convOp,
@@ -36,7 +36,8 @@
 
     const bool isNCHW = isa<linalg::Conv2DNchwFchwOp>(convOp);
     const bool isNHWC = isa<linalg::Conv2DNhwcHwcfOp>(convOp);
-    if (!isNCHW & !isNHWC) return failure();
+    if (!isNCHW & !isNHWC)
+      return failure();
 
     if (!inputShapeType || !filterShapeType || !outputShapeType)
       return failure();
@@ -60,13 +61,15 @@
 
     // We cannot merge the width and height if they are both dynamic as we
     // cannot expand them back to their dynamic values.
-    if (isInputHWDynamic) return failure();
+    if (isInputHWDynamic)
+      return failure();
 
     if (filterShape[khIndex] != 1 || filterShape[kwIndex] != 1)
       return failure();
 
     // TODO(ataei): Support conversion to linalg.batch_matmul.
-    if (inputShape[0] != 1) return failure();
+    if (inputShape[0] != 1)
+      return failure();
 
     if (!llvm::all_of(convOp.getStrides(), [](APInt element) {
           return element.getSExtValue() == 1;
@@ -175,13 +178,13 @@
     }
   }
 };
-}  // namespace
+} // namespace
 
 std::unique_ptr<Pass> createConvert1X1FilterConv2DToMatmulPass() {
   return std::make_unique<Convert1X1FilterConv2DToMatmulPass>();
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/ConvertLinalgTensorOps.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/ConvertLinalgTensorOps.cpp
index ee8b026..09af6f5 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/ConvertLinalgTensorOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/ConvertLinalgTensorOps.cpp

@@ -51,7 +51,8 @@
     SmallVector<Value> outputDynamicShapes;
     for (auto shape : llvm::zip_equal(reshapeOp.getResultType().getShape(),
                                       outputShape[0])) {
-      if (std::get<0>(shape) != ShapedType::kDynamic) continue;
+      if (std::get<0>(shape) != ShapedType::kDynamic)
+        continue;
       outputDynamicShapes.push_back(std::get<1>(shape));
     }
     rewriter.replaceOpWithNewOp<IREE::Flow::TensorReshapeOp>(
@@ -139,7 +140,7 @@
     }
   }
 };
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::func::FuncOp>>
 createConvertLinalgTensorOpsPass(bool runBeforeDispatchRegionFormation) {
@@ -147,7 +148,7 @@
       runBeforeDispatchRegionFormation);
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/ConvertRegionToWorkgroups.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/ConvertRegionToWorkgroups.cpp
index db91501..601f640 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/ConvertRegionToWorkgroups.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/ConvertRegionToWorkgroups.cpp

@@ -40,7 +40,8 @@
   }
 
   for (auto dim : llvm::enumerate(tensorType.getShape())) {
-    if (dim.value() != ShapedType::kDynamic) continue;
+    if (dim.value() != ShapedType::kDynamic)
+      continue;
     argumentDims.push_back(
         b.createOrFold<tensor::DimOp>(loc, tensor, dim.index()));
   }
@@ -48,11 +49,13 @@
 
 /// Follow the reverse SSA use-def chain of the given value (always taking the
 /// tied operand) and return the first value outside of `regionOp`.
-static std::optional<Value> findFirstTiedValueOutsideOfRegionOp(
-    Flow::DispatchRegionOp regionOp, Value value) {
+static std::optional<Value>
+findFirstTiedValueOutsideOfRegionOp(Flow::DispatchRegionOp regionOp,
+                                    Value value) {
   // Check if `v` is defined outside of `regionOp`.
   auto isOutside = [&](Value v) {
-    if (llvm::isa<OpResult>(v)) return !regionOp->isAncestor(v.getDefiningOp());
+    if (llvm::isa<OpResult>(v))
+      return !regionOp->isAncestor(v.getDefiningOp());
     assert(v.isa<BlockArgument>() && "expected bbArg");
     // DispatchRegionOp does not have block arguments.
     return true;
@@ -72,7 +75,7 @@
   return value;
 }
 
-}  // namespace
+} // namespace
 
 /// Rewrite the DispatchRegionOp into a DispatchWorkgroupsOp. The
 /// DispatchRegionOp is not isolated from above and may capture any SSA value
@@ -83,7 +86,8 @@
     Flow::DispatchRegionOp regionOp, RewriterBase &rewriter) {
   // Only ops with a single block are supported.
   Region &region = regionOp.getBody();
-  if (!region.hasOneBlock()) return failure();
+  if (!region.hasOneBlock())
+    return failure();
   Block &body = region.front();
   auto terminator = cast<Flow::ReturnOp>(body.getTerminator());
   unsigned numResults = terminator->getNumOperands();
@@ -105,7 +109,8 @@
   SmallVector<Value> argumentDims;
   for (Value tensor : argumentsSet) {
     auto tensorType = llvm::dyn_cast<RankedTensorType>(tensor.getType());
-    if (!tensorType) continue;
+    if (!tensorType)
+      continue;
     appendDynamicDims(rewriter, loc, argumentDims, tensor);
   }
 
@@ -116,11 +121,13 @@
   for (const auto &it : llvm::enumerate(terminator->getOperands())) {
     auto tiedArgument =
         findFirstTiedValueOutsideOfRegionOp(regionOp, it.value());
-    if (!tiedArgument.has_value()) continue;
+    if (!tiedArgument.has_value())
+      continue;
     assert(argumentsSet.contains(*tiedArgument) &&
            "expected that tiedArgument is already an argument");
     // Do not tie an argument to multiple results.
-    if (tiedArgumentsSet.contains(*tiedArgument)) continue;
+    if (tiedArgumentsSet.contains(*tiedArgument))
+      continue;
     tiedArgumentsSet.insert(*tiedArgument);
     tiedArguments[it.index()] = std::distance(
         argumentsSet.begin(), llvm::find(argumentsSet, *tiedArgument));
@@ -166,7 +173,8 @@
   rewriter.setInsertionPointToStart(&newBody);
   for (const auto &it : llvm::enumerate(arguments)) {
     auto tensorType = llvm::dyn_cast<RankedTensorType>(it.value().getType());
-    if (!tensorType) continue;
+    if (!tensorType)
+      continue;
     auto inputBbArg = workgroupsOp.getInputBlockArgument(it.index());
     auto dims =
         Util::findVariadicDynamicDims(it.index(), arguments, argumentDims);
@@ -205,7 +213,7 @@
     auto tensorType = it.value().getType().cast<RankedTensorType>();
     assert(dims.size() == tensorType.getNumDynamicDims() &&
            "mismatching number of dynamic dims");
-#endif  // NDEBUG
+#endif // NDEBUG
     SmallVector<Value> bbArgDims =
         llvm::map_to_vector(dims, [&](Value v) { return bvm.lookup(v); });
     rewriter.create<IREE::Flow::DispatchTensorStoreOp>(loc, it.value(),
@@ -241,13 +249,13 @@
     }
   }
 };
-}  // namespace
+} // namespace
 
 std::unique_ptr<Pass> createConvertRegionToWorkgroupsPass() {
   return std::make_unique<ConvertRegionToWorkgroupsPass>();
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/ConvertRegionToWorkgroups.h b/compiler/src/iree/compiler/Dialect/Flow/Transforms/ConvertRegionToWorkgroups.h
index d6586dd..5fe3e82 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/ConvertRegionToWorkgroups.h
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/ConvertRegionToWorkgroups.h

@@ -44,9 +44,9 @@
 rewriteFlowDispatchRegionToFlowDispatchWorkgroups(DispatchRegionOp regionOp,
                                                   RewriterBase &rewriter);
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_FLOW_TRANSFORMS_CONVERTREGIONTOWORKGROUPS_H_
+#endif // IREE_COMPILER_DIALECT_FLOW_TRANSFORMS_CONVERTREGIONTOWORKGROUPS_H_

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/ConvertToFlow.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/ConvertToFlow.cpp
index 17d0e71..572ec0c 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/ConvertToFlow.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/ConvertToFlow.cpp

@@ -40,7 +40,7 @@
     }
   }
 };
-}  // namespace
+} // namespace
 
 std::unique_ptr<Pass> Flow::createConvertToFlowPass() {
   return std::make_unique<ConvertToFlowPass>();

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/DeduplicateExecutables.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/DeduplicateExecutables.cpp
index 3fc5b8a..24b8f4a 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/DeduplicateExecutables.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/DeduplicateExecutables.cpp

@@ -27,7 +27,8 @@
   auto lhsIt = lhs.begin();
   auto rhsIt = rhs.begin();
   while (lhsIt != lhs.end() && rhsIt != rhs.end()) {
-    if (!pred(*lhsIt++, *rhsIt++)) return false;
+    if (!pred(*lhsIt++, *rhsIt++))
+      return false;
   }
   if ((lhsIt == lhs.end()) != (rhsIt == rhs.end())) {
     // Block count mismatch. We do this here so that we avoid the O(n) scan
@@ -79,13 +80,14 @@
             }
             for (auto [lhsArg, rhsArg] : llvm::zip_equal(
                      lhsBlock.getArguments(), rhsBlock.getArguments())) {
-              if (lhsArg.getType() != rhsArg.getType()) return false;
+              if (lhsArg.getType() != rhsArg.getType())
+                return false;
               mapping.map(lhsArg, rhsArg);
             }
             mapping.map(&lhsBlock, &rhsBlock);
             return true;
           })) {
-    return false;  // block mismatch
+    return false; // block mismatch
   }
 
   // Walk the blocks again now that we have a populated mapping.
@@ -101,11 +103,13 @@
     llvm::ReversePostOrderTraversal<Block *> traversal(&b);
     rhsBlocks.insert(traversal.begin(), traversal.end());
   }
-  if (lhsBlocks.size() != rhsBlocks.size()) return false;
+  if (lhsBlocks.size() != rhsBlocks.size())
+    return false;
   for (auto [lhsBlock, rhsBlock] : llvm::zip_equal(lhsBlocks, rhsBlocks)) {
     auto &lhsOperations = lhsBlock->getOperations();
     auto &rhsOperations = rhsBlock->getOperations();
-    if (lhsOperations.size() != rhsOperations.size()) return false;
+    if (lhsOperations.size() != rhsOperations.size())
+      return false;
     for (auto [lhsOp, rhsOp] : llvm::zip_equal(lhsOperations, rhsOperations)) {
       if (!isStructurallyEquivalentTo(lhsOp, rhsOp, mapping)) {
         return false;
@@ -120,11 +124,16 @@
 static bool isStructurallyEquivalentTo(Operation &lhs, Operation &rhs,
                                        IRMapping &parentMapping) {
   // Check operation metadata for early-exit opportunities.
-  if (lhs.getName() != rhs.getName()) return false;
-  if (lhs.getNumOperands() != rhs.getNumOperands()) return false;
-  if (lhs.getNumResults() != rhs.getNumResults()) return false;
-  if (lhs.getNumRegions() != rhs.getNumRegions()) return false;
-  if (lhs.getNumSuccessors() != rhs.getNumSuccessors()) return false;
+  if (lhs.getName() != rhs.getName())
+    return false;
+  if (lhs.getNumOperands() != rhs.getNumOperands())
+    return false;
+  if (lhs.getNumResults() != rhs.getNumResults())
+    return false;
+  if (lhs.getNumRegions() != rhs.getNumRegions())
+    return false;
+  if (lhs.getNumSuccessors() != rhs.getNumSuccessors())
+    return false;
 
   // TODO(#3996): symbol mapping; for now allow them to differ unconditionally.
   if (!compare_ranges(
@@ -143,7 +152,8 @@
   // in the mapping already from the parent region to do the lhs->rhs mapping.
   for (auto [lhsSuccessor, rhsSuccessor] :
        llvm::zip_equal(lhs.getSuccessors(), rhs.getSuccessors())) {
-    if (rhsSuccessor != parentMapping.lookup(lhsSuccessor)) return false;
+    if (rhsSuccessor != parentMapping.lookup(lhsSuccessor))
+      return false;
   }
 
   // Ensure result types match first and add to the block and value mapping.
@@ -152,7 +162,8 @@
   // exit prior to the full traversal.
   for (auto [lhsValue, rhsValue] :
        llvm::zip_equal(lhs.getResults(), rhs.getResults())) {
-    if (lhsValue.getType() != rhsValue.getType()) return false;
+    if (lhsValue.getType() != rhsValue.getType())
+      return false;
     parentMapping.map(lhsValue, rhsValue);
   }
 
@@ -160,8 +171,10 @@
   // these values they should already be defined in the mapping.
   for (auto [lhsValue, rhsValue] :
        llvm::zip_equal(lhs.getOperands(), rhs.getOperands())) {
-    if (lhsValue.getType() != rhsValue.getType()) return false;
-    if (rhsValue != parentMapping.lookup(lhsValue)) return false;
+    if (lhsValue.getType() != rhsValue.getType())
+      return false;
+    if (rhsValue != parentMapping.lookup(lhsValue))
+      return false;
   }
 
   // Recurse into regions.
@@ -197,11 +210,11 @@
   }
 }
 
-}  // namespace
+} // namespace
 
 class DeduplicateExecutablesPass
     : public DeduplicateExecutablesBase<DeduplicateExecutablesPass> {
- public:
+public:
   explicit DeduplicateExecutablesPass() {}
   DeduplicateExecutablesPass(const DeduplicateExecutablesPass &pass) {}
 
@@ -283,7 +296,7 @@
     }
   }
 
- private:
+private:
   Statistic totalExecutables{
       this, "total executable(s)",
       "Number of flow.executable ops before deduplication"};
@@ -300,7 +313,7 @@
   return std::make_unique<DeduplicateExecutablesPass>();
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/DetachElementwiseFromNamedOps.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/DetachElementwiseFromNamedOps.cpp
index 0ad287e..f615e51 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/DetachElementwiseFromNamedOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/DetachElementwiseFromNamedOps.cpp

@@ -42,7 +42,8 @@
         !isa<linalg::ConvolutionOpInterface>(*linalgOp)) {
       return failure();
     }
-    if (!linalgOp.hasTensorSemantics()) return failure();
+    if (!linalgOp.hasTensorSemantics())
+      return failure();
 
     // Nothing to do if the output tensor operand is already a fill op.
     OpOperandVector outputOperands;
@@ -51,7 +52,8 @@
     }
     // Right now all the cases we see have one output. This can be relaxed once
     // we see multiple output ops.
-    if (outputOperands.size() != 1) return failure();
+    if (outputOperands.size() != 1)
+      return failure();
     Value outputOperand = outputOperands.front()->get();
 
     auto outsDefiningOp = outputOperand.getDefiningOp<linalg::LinalgOp>();
@@ -60,7 +62,8 @@
       return failure();
     }
     auto outputType = llvm::cast<RankedTensorType>(outputOperand.getType());
-    if (!outputType.getElementType().isIntOrFloat()) return failure();
+    if (!outputType.getElementType().isIntOrFloat())
+      return failure();
     auto elementType = outputType.getElementType();
 
     Location loc = linalgOp.getLoc();
@@ -84,7 +87,8 @@
         linalgOp.getMatchingIndexingMap(outputOperands.front()));
     // Only support identity map for output access for now; this is the case for
     // all existing contraction/convolution ops.
-    if (!outputMap.isIdentity()) return failure();
+    if (!outputMap.isIdentity())
+      return failure();
     SmallVector<AffineMap> maps(3, outputMap);
 
     SmallVector<utils::IteratorType> iterators;
@@ -92,7 +96,8 @@
     for (int i = 0, e = outputMap.getNumResults(); i < e; ++i) {
       int pos = outputMap.getResult(i).cast<AffineDimExpr>().getPosition();
       auto attr = linalgOp.getIteratorTypesArray()[pos];
-      if (!linalg::isParallelIterator(attr)) return failure();
+      if (!linalg::isParallelIterator(attr))
+        return failure();
       iterators.push_back(attr);
     }
 
@@ -141,14 +146,17 @@
          llvm::enumerate(dpsInterfaceOp.getDpsInitOperands())) {
       auto constOp =
           outOperand.value()->get().template getDefiningOp<arith::ConstantOp>();
-      if (!constOp) continue;
+      if (!constOp)
+        continue;
 
       auto resultType =
           llvm::dyn_cast<RankedTensorType>(constOp.getResult().getType());
-      if (!resultType || !resultType.getElementType().isIntOrFloat()) continue;
+      if (!resultType || !resultType.getElementType().isIntOrFloat())
+        continue;
 
       auto attr = llvm::dyn_cast<DenseElementsAttr>(constOp.getValue());
-      if (!attr || !attr.isSplat()) continue;
+      if (!attr || !attr.isSplat())
+        continue;
 
       Location loc = constOp.getLoc();
       Type elementType = resultType.getElementType();
@@ -199,13 +207,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<Pass> createDetachElementwiseFromNamedOpsPass() {
   return std::make_unique<DetachElementwiseFromNamedOpsPass>();
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/DispatchWithTransformDialect.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/DispatchWithTransformDialect.cpp
index 4acc8e3..ee3d3d4 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/DispatchWithTransformDialect.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/DispatchWithTransformDialect.cpp

@@ -49,7 +49,7 @@
     // clang-format on
   }
 
- public:
+public:
   DispatchWithTransformDialect(StringRef transformFileName,
                                StringRef debugPayloadRootTag = StringRef(),
                                StringRef debugTransformRootTag = StringRef()) {
@@ -64,7 +64,7 @@
     this->debugTransformRootTag = pass.debugTransformRootTag;
   }
 
- private:
+private:
   Statistic numDispatches{this, "number of dispatches",
                           "Number of Flow dispatches created"};
 };
@@ -77,7 +77,7 @@
       transformFileName, debugPayloadRootTag, debugTransformRootTag);
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/DumpDispatchGraph.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/DumpDispatchGraph.cpp
index 2cfb4b8..b741e57 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/DumpDispatchGraph.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/DumpDispatchGraph.cpp

@@ -45,7 +45,8 @@
 static const StringRef kShapeEllipse = "ellipse";
 
 static StringRef getShape(Operation *op) {
-  if (isa<DispatchOp>(op)) return kShapeBox;
+  if (isa<DispatchOp>(op))
+    return kShapeBox;
 
   return kShapeEllipse;
 }
@@ -72,32 +73,32 @@
   return strFromOs([&](raw_ostream &os) {
     for (unsigned char c : str) {
       switch (c) {
-        case '\\':
-          os << '\\' << '\\';
+      case '\\':
+        os << '\\' << '\\';
+        break;
+      case '\t':
+        os << '\\' << 't';
+        break;
+      case '\n':
+        os << '\\' << 'n';
+        break;
+      case '"':
+        os << '\\' << '"';
+        break;
+      case '\r': // translate "carriage return" as "\l"
+        os << '\\' << 'l';
+        break;
+      default:
+        if (llvm::isPrint(c)) {
+          os << c;
           break;
-        case '\t':
-          os << '\\' << 't';
-          break;
-        case '\n':
-          os << '\\' << 'n';
-          break;
-        case '"':
-          os << '\\' << '"';
-          break;
-        case '\r':  // translate "carriage return" as "\l"
-          os << '\\' << 'l';
-          break;
-        default:
-          if (llvm::isPrint(c)) {
-            os << c;
-            break;
-          }
+        }
 
-          // Always use a full 3-character octal escape.
-          os << '\\';
-          os << char('0' + ((c >> 6) & 7));
-          os << char('0' + ((c >> 3) & 7));
-          os << char('0' + ((c >> 0) & 7));
+        // Always use a full 3-character octal escape.
+        os << '\\';
+        os << char('0' + ((c >> 6) & 7));
+        os << char('0' + ((c >> 3) & 7));
+        os << char('0' + ((c >> 0) & 7));
       }
     }
   });
@@ -118,7 +119,7 @@
 /// cluster with `lhead` and `ltail` attributes. Therefore, when creating a new
 /// cluster, an invisible "anchor" node is created.
 struct Node {
- public:
+public:
   Node(int id = 0, std::optional<int> clusterId = std::nullopt)
       : id(id), clusterId(clusterId) {}
 
@@ -131,21 +132,24 @@
 /// about the Graphviz DOT language.
 class DumpDispatchGraphPass
     : public DumpDispatchGraphBase<DumpDispatchGraphPass> {
- public:
+public:
   DumpDispatchGraphPass(raw_ostream &os) : os(os) {}
   DumpDispatchGraphPass(const DumpDispatchGraphPass &o)
       : DumpDispatchGraphPass(o.os.getOStream()) {}
 
   void runOnOperation() override {
     auto modOp = dyn_cast<ModuleOp>(getOperation());
-    if (!modOp) return;
+    if (!modOp)
+      return;
 
     auto funcOps = modOp.getOps<func::FuncOp>();
 
-    if (funcOps.empty()) return;
+    if (funcOps.empty())
+      return;
 
     emitGraph([&]() {
-      for (auto funcOp : funcOps) processOperation(funcOp);
+      for (auto funcOp : funcOps)
+        processOperation(funcOp);
       emitAllEdgeStmts();
     });
   }
@@ -157,11 +161,12 @@
     emitGraph([&]() { processRegion(region); });
   }
 
- private:
+private:
   /// Emit all edges. This function should be called after all nodes have been
   /// emitted.
   void emitAllEdgeStmts() {
-    for (const std::string &edge : edges) os << edge << ";\n";
+    for (const std::string &edge : edges)
+      os << edge << ";\n";
     edges.clear();
   }
 
@@ -335,9 +340,11 @@
   }
 
   void annotateOperation(raw_ostream &os, Operation *op, AsmState &state) {
-    if (isa<arith::ConstantOp>(op)) return;
+    if (isa<arith::ConstantOp>(op))
+      return;
 
-    if (isa<func::ReturnOp>(op)) return;
+    if (isa<func::ReturnOp>(op))
+      return;
 
     if (auto load = dyn_cast<DispatchTensorLoadOp>(op)) {
       printDispatchTensorLoad(os, load, state);
@@ -373,7 +380,8 @@
     auto entryPoint = dispatchOp.getEntryPoint();
     auto executableOp = cast<ExecutableOp>(SymbolTable::lookupNearestSymbolFrom(
         dispatchOp, entryPoint.getRootReference()));
-    if (!executableOp) return;
+    if (!executableOp)
+      return;
 
     auto calleeNameAttr = entryPoint.getLeafReference();
     auto innerModule = executableOp.getInnerModule();
@@ -381,7 +389,8 @@
     auto funcIt = llvm::find_if(funcOps, [&](func::FuncOp op) {
       return op.getNameAttr() == calleeNameAttr;
     });
-    if (funcIt == funcOps.end()) return;
+    if (funcIt == funcOps.end())
+      return;
 
     auto callee = *funcIt;
 
@@ -447,7 +456,7 @@
           if (rootName == leafName) {
             os << leafName;
           } else {
-            os << entryPoint;  // print the full name
+            os << entryPoint; // print the full name
           }
 
           // print entry function args
@@ -499,7 +508,8 @@
 
   bool isScalarConstantOp(Operation *op) {
     if (auto constOp = dyn_cast<mlir::arith::ConstantOp>(op))
-      if (constOp.getResult().getType().isIntOrIndexOrFloat()) return true;
+      if (constOp.getResult().getType().isIntOrIndexOrFloat())
+        return true;
 
     return false;
   }
@@ -521,7 +531,8 @@
       // Emit cluster for op with regions.
       node = emitClusterStmt(
           [&]() {
-            for (Region &region : op->getRegions()) processRegion(region);
+            for (Region &region : op->getRegions())
+              processRegion(region);
           },
           getLabel(op));
     } else {
@@ -543,19 +554,22 @@
       }
     }
 
-    for (Value result : op->getResults()) valueToNode[result] = node;
+    for (Value result : op->getResults())
+      valueToNode[result] = node;
 
     return node;
   }
 
   /// Process a region.
   void processRegion(Region &region) {
-    for (Block &block : region.getBlocks()) processBlock(block);
+    for (Block &block : region.getBlocks())
+      processBlock(block);
   }
 
   /// Truncate long strings.
   std::string truncateString(std::string str) {
-    if (str.length() <= maxLabelLen) return str;
+    if (str.length() <= maxLabelLen)
+      return str;
     return str.substr(0, maxLabelLen) + "...";
   }
 
@@ -570,13 +584,13 @@
   int counter = 0;
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<Pass> createDumpDispatchGraphPass(raw_ostream &os) {
   return std::make_unique<DumpDispatchGraphPass>(os);
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/EraseUnusedLinalgOperands.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/EraseUnusedLinalgOperands.cpp
index 43dd5ce..55ad1da 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/EraseUnusedLinalgOperands.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/EraseUnusedLinalgOperands.cpp

@@ -30,7 +30,7 @@
     }
   }
 };
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>>
 Flow::createEraseUnusedLinalgOperands() {

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/ExpandTensorShapes.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/ExpandTensorShapes.cpp
index aa3f9fa..2a92331 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/ExpandTensorShapes.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/ExpandTensorShapes.cpp

@@ -113,7 +113,8 @@
 // Expands tensors in the given |types| list to (tensor, dynamic dims...).
 // This could be changed to some iterator magic to avoid the alloc.
 static SmallVector<Type> expandTypes(TypeRange types) {
-  if (types.empty()) return {};
+  if (types.empty())
+    return {};
   auto indexType = IndexType::get(types.front().getContext());
   SmallVector<Type> newTypes;
   newTypes.reserve(types.size() * 2);
@@ -191,19 +192,22 @@
 // given |region|. All branches, ops, and nested regions will be processed.
 static void expandRegion(Region &region, ExpandedGlobalMap &globalMap,
                          IndexSet &indexSet, TensorDimMap tensorDimMap) {
-  if (region.empty()) return;
+  if (region.empty())
+    return;
 
   // Update all block arguments.
   auto indexType = IndexType::get(region.getContext());
   for (auto &block : region.getBlocks()) {
-    if (!llvm::any_of(block.getArgumentTypes(), isDynamicTensor)) continue;
+    if (!llvm::any_of(block.getArgumentTypes(), isDynamicTensor))
+      continue;
 
     // Insert and build a list of expanded (tensor, dynamic dims...) tuples.
     SmallVector<ExpandedValue> expansions;
     for (int i = block.getNumArguments() - 1; i >= 0; --i) {
       auto arg = block.getArgument(i);
       auto tensorType = llvm::dyn_cast<RankedTensorType>(arg.getType());
-      if (!tensorType || tensorType.hasStaticShape()) continue;
+      if (!tensorType || tensorType.hasStaticShape())
+        continue;
       ExpandedValue expandedValue;
       expandedValue.tensor = arg;
       for (unsigned j = 0; j < tensorType.getNumDynamicDims(); ++j) {
@@ -255,7 +259,8 @@
 static void expandGlobalLoadOp(IREE::Util::GlobalLoadOp op,
                                ExpandedGlobalMap &globalMap, IndexSet &indexSet,
                                TensorDimMap &tensorDimMap) {
-  if (!usesDynamicTensors(op)) return;
+  if (!usesDynamicTensors(op))
+    return;
   OpBuilder builder(op);
   builder.setInsertionPointAfter(op);
   auto indexType = builder.getIndexType();
@@ -290,7 +295,8 @@
                                 ExpandedGlobalMap &globalMap,
                                 IndexSet &indexSet,
                                 TensorDimMap &tensorDimMap) {
-  if (!usesDynamicTensors(op)) return;
+  if (!usesDynamicTensors(op))
+    return;
   OpBuilder builder(op);
   builder.setInsertionPointAfter(op);
   auto expandedValue = consumeExpandedValue(op.getLoc(), op.getValue(),
@@ -344,7 +350,8 @@
 //  %2 = flow.tensor.tie_shape %r : tensor<?xf32>{%rd}
 static void expandCallOp(mlir::func::CallOp op, IndexSet &indexSet,
                          TensorDimMap &tensorDimMap) {
-  if (!usesDynamicTensors(op)) return;
+  if (!usesDynamicTensors(op))
+    return;
 
   // Build the new call op with expanded operands and results.
   OpBuilder builder(op);
@@ -392,7 +399,8 @@
 //  return %0, %d
 static void expandReturnOp(mlir::func::ReturnOp op, IndexSet &indexSet,
                            TensorDimMap &tensorDimMap) {
-  if (!usesDynamicTensors(op)) return;
+  if (!usesDynamicTensors(op))
+    return;
   OpBuilder builder(op);
   auto operands = expandOperands(op.getLoc(), op.getOperands(), tensorDimMap,
                                  indexSet, builder);
@@ -422,7 +430,8 @@
 
 static void expandCondBranchOp(mlir::cf::CondBranchOp op, IndexSet &indexSet,
                                TensorDimMap &tensorDimMap) {
-  if (!usesDynamicTensors(op)) return;
+  if (!usesDynamicTensors(op))
+    return;
   OpBuilder builder(op);
   builder.create<mlir::cf::CondBranchOp>(
       op.getLoc(), op.getCondition(), op.getTrueDest(),
@@ -446,7 +455,8 @@
 //   %4 = flow.tensor.tie_shape %2 : tensor<?xf32>{%3}
 static void expandSelectOp(mlir::arith::SelectOp op, IndexSet &indexSet,
                            TensorDimMap &tensorDimMap) {
-  if (!usesDynamicTensors(op)) return;
+  if (!usesDynamicTensors(op))
+    return;
   OpBuilder builder(op);
 
   auto trueValue = consumeExpandedValue(op.getLoc(), op.getTrueValue(),
@@ -512,7 +522,7 @@
 // elision/deduplication/etc left until cleanup.
 class ExpandTensorShapesPass
     : public ExpandTensorShapesBase<ExpandTensorShapesPass> {
- public:
+public:
   ExpandTensorShapesPass() = default;
 
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -546,13 +556,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>> createExpandTensorShapesPass() {
   return std::make_unique<ExpandTensorShapesPass>();
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/ExportBenchmarkFuncs.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/ExportBenchmarkFuncs.cpp
index 5815947..2eef00c 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/ExportBenchmarkFuncs.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/ExportBenchmarkFuncs.cpp

@@ -32,9 +32,10 @@
 // Example:
 //  util.global @some_fn_arg0 = 4 : i32
 //  util.global @some_fn_arg0 = dense<4> : tensor<4xi32>
-static IREE::Util::GlobalOp createPrimitiveDefaultGlobalOp(
-    std::string name, Location loc, Type type, SymbolTable& symbolTable,
-    OpBuilder& moduleBuilder) {
+static IREE::Util::GlobalOp
+createPrimitiveDefaultGlobalOp(std::string name, Location loc, Type type,
+                               SymbolTable &symbolTable,
+                               OpBuilder &moduleBuilder) {
   // Get a zero-initialized constant attribute for the type, if supported.
   auto initialValue = moduleBuilder.getZeroAttr(type);
   if (!initialValue) {
@@ -54,9 +55,10 @@
 
 // Creates a util.global of the given |globalType| and initializes a buffer or
 // buffer view as a zeroed |tensorType|.
-static IREE::Util::GlobalOp createBufferLikeGlobalOp(
-    std::string name, Location loc, Type globalType, TensorType tensorType,
-    SymbolTable& symbolTable, OpBuilder& moduleBuilder) {
+static IREE::Util::GlobalOp
+createBufferLikeGlobalOp(std::string name, Location loc, Type globalType,
+                         TensorType tensorType, SymbolTable &symbolTable,
+                         OpBuilder &moduleBuilder) {
   // Create !hal.buffer global for the storage buffer or buffer view.
   auto globalOp = moduleBuilder.create<IREE::Util::GlobalOp>(
       loc, name,
@@ -99,14 +101,15 @@
 // ->
 //  util.global @some_fn_arg0 : !hal.buffer_view
 //  util.initializer { ... }
-static IREE::Util::GlobalOp createImportBufferViewGlobalOp(
-    std::string name, BlockArgument arg, SymbolTable& symbolTable,
-    OpBuilder& moduleBuilder, Explorer& explorer) {
+static IREE::Util::GlobalOp
+createImportBufferViewGlobalOp(std::string name, BlockArgument arg,
+                               SymbolTable &symbolTable,
+                               OpBuilder &moduleBuilder, Explorer &explorer) {
   auto loc = arg.getLoc();
 
   // Find a hal.tensor.import user.
   IREE::HAL::TensorImportOp importOp;
-  if (explorer.walkTransitiveUsers(arg, [&](Operation* op) -> WalkResult {
+  if (explorer.walkTransitiveUsers(arg, [&](Operation *op) -> WalkResult {
         importOp = dyn_cast<IREE::HAL::TensorImportOp>(op);
         return importOp ? WalkResult::interrupt() : WalkResult::advance();
       }) == TraversalResult::INCOMPLETE) {
@@ -140,14 +143,14 @@
 //  util.initializer { ... }
 static IREE::Util::GlobalOp createExportBufferGlobalOp(std::string name,
                                                        BlockArgument arg,
-                                                       SymbolTable& symbolTable,
-                                                       OpBuilder& moduleBuilder,
-                                                       Explorer& explorer) {
+                                                       SymbolTable &symbolTable,
+                                                       OpBuilder &moduleBuilder,
+                                                       Explorer &explorer) {
   auto loc = arg.getLoc();
 
   // Find a hal.tensor.export user.
   IREE::HAL::TensorExportOp exportOp;
-  if (explorer.walkTransitiveUsers(arg, [&](Operation* op) -> WalkResult {
+  if (explorer.walkTransitiveUsers(arg, [&](Operation *op) -> WalkResult {
         exportOp = dyn_cast<IREE::HAL::TensorExportOp>(op);
         return exportOp ? WalkResult::interrupt() : WalkResult::advance();
       }) == TraversalResult::INCOMPLETE) {
@@ -170,11 +173,11 @@
                                   symbolTable, moduleBuilder);
 }
 
-static IREE::Util::GlobalOp createDummyInput(const std::string& namePrefix,
+static IREE::Util::GlobalOp createDummyInput(const std::string &namePrefix,
                                              BlockArgument arg,
-                                             SymbolTable& symbolTable,
-                                             OpBuilder& moduleBuilder,
-                                             Explorer& explorer) {
+                                             SymbolTable &symbolTable,
+                                             OpBuilder &moduleBuilder,
+                                             Explorer &explorer) {
   std::string name = namePrefix + "_arg" + std::to_string(arg.getArgNumber());
   return TypeSwitch<Type, IREE::Util::GlobalOp>(arg.getType())
       .Case([&](IREE::HAL::BufferViewType type) {
@@ -191,9 +194,10 @@
       });
 }
 
-static LogicalResult createEntryPointBenchmarkFunc(
-    mlir::ModuleOp moduleOp, mlir::func::FuncOp entryFuncOp,
-    Explorer& explorer) {
+static LogicalResult
+createEntryPointBenchmarkFunc(mlir::ModuleOp moduleOp,
+                              mlir::func::FuncOp entryFuncOp,
+                              Explorer &explorer) {
   auto symbolTable = explorer.getSymbolTables().getSymbolTable(moduleOp);
   OpBuilder moduleBuilder(moduleOp.getContext());
   moduleBuilder.setInsertionPointAfter(entryFuncOp);
@@ -207,7 +211,8 @@
   for (auto arg : entryFuncOp.getArguments()) {
     auto dummyVar =
         createDummyInput(funcName, arg, symbolTable, moduleBuilder, explorer);
-    if (!dummyVar) return failure();
+    if (!dummyVar)
+      return failure();
     dummyInputVariableOps.push_back(dummyVar);
   }
 
@@ -223,7 +228,7 @@
   };
   funcOp->setAttr("iree.reflection",
                   moduleBuilder.getDictionaryAttr(reflectionAttrs));
-  Block* block = funcOp.addEntryBlock();
+  Block *block = funcOp.addEntryBlock();
 
   // Call the existing function with dummy arguments.
   auto blockBuilder = OpBuilder::atBlockBegin(block);
@@ -255,8 +260,8 @@
 // The input are provided using util.globals.
 class ExportBenchmarkFuncsPass
     : public ExportBenchmarkFuncsBase<ExportBenchmarkFuncsPass> {
- public:
-  void getDependentDialects(DialectRegistry& registry) const override {
+public:
+  void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<arith::ArithDialect, IREE::Flow::FlowDialect,
                     IREE::HAL::HALDialect, IREE::Util::UtilDialect>();
   }
@@ -291,7 +296,7 @@
   return std::make_unique<ExportBenchmarkFuncsPass>();
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/FormDispatchRegions.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/FormDispatchRegions.cpp
index ef439d8..aa0a30b 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/FormDispatchRegions.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/FormDispatchRegions.cpp

@@ -56,17 +56,20 @@
 }
 SmallVector<tensor::DimOp> TensorDimTrackingRewriter::getTensorDimOps() {
   SmallVector<tensor::DimOp> result;
-  for (Operation *op : dimOps) result.push_back(cast<tensor::DimOp>(op));
+  for (Operation *op : dimOps)
+    result.push_back(cast<tensor::DimOp>(op));
   return result;
 }
 void TensorDimTrackingRewriter::notifyOperationRemoved(Operation *op) {
   IRRewriter::Listener::notifyOperationRemoved(op);
-  if (isa<tensor::DimOp>(op)) dimOps.erase(op);
+  if (isa<tensor::DimOp>(op))
+    dimOps.erase(op);
 }
 
 void TensorDimTrackingRewriter::notifyOperationInserted(Operation *op) {
   IRRewriter::Listener::notifyOperationInserted(op);
-  if (isa<tensor::DimOp>(op)) dimOps.insert(op);
+  if (isa<tensor::DimOp>(op))
+    dimOps.insert(op);
 }
 
 namespace iree_compiler {
@@ -78,11 +81,13 @@
   for (tensor::DimOp dimOp : dimOps) {
     // Only DimOps with static indices are supported.
     std::optional<int64_t> idx = dimOp.getConstantIndex();
-    if (!idx.has_value()) continue;
+    if (!idx.has_value())
+      continue;
     // Only DimOps with ranked tensors are supported.
     auto tensorType =
         llvm::dyn_cast<RankedTensorType>(dimOp.getSource().getType());
-    if (!tensorType) continue;
+    if (!tensorType)
+      continue;
 
     if (!tensorType.isDynamicDim(*idx)) {
       // Rewrite static dimension with constant.
@@ -100,7 +105,8 @@
       return failure();
     unsigned ctr = 0;
     for (int64_t i = 0; i < *dimOp.getConstantIndex(); ++i)
-      if (tensorType.isDynamicDim(i)) ++ctr;
+      if (tensorType.isDynamicDim(i))
+        ++ctr;
     rewriter.replaceOp(dimOp, dynamicDims[ctr]);
   }
 
@@ -210,7 +216,8 @@
 }
 static RankedTensorType getSourceTypeOfPackLikeOp(Operation *op) {
   Value source = getSourceOfPackLikeOp(op);
-  if (!source) return nullptr;
+  if (!source)
+    return nullptr;
   return llvm::cast<RankedTensorType>(source.getType());
 }
 
@@ -254,7 +261,8 @@
       interfaceOp.getLoopIteratorTypes();
   llvm::SmallBitVector parallelLoops(loopIteratorTypes.size());
   for (auto iteratorType : llvm::enumerate(loopIteratorTypes)) {
-    if (iteratorType.value() != utils::IteratorType::parallel) break;
+    if (iteratorType.value() != utils::IteratorType::parallel)
+      break;
     parallelLoops.set(iteratorType.index());
   }
   return parallelLoops;
@@ -264,8 +272,10 @@
 /// drop the result exprs that are constant zeros, the `map` will become an
 /// identity.
 static bool isIdentityMapWithZeros(AffineMap map) {
-  if (map.getNumSymbols() != 0) return false;
-  if (map.isEmpty()) return false;
+  if (map.getNumSymbols() != 0)
+    return false;
+  if (map.isEmpty())
+    return false;
   unsigned dimsSeen = 0;
   for (auto result : map.getResults()) {
     bool isValidExpr = TypeSwitch<AffineExpr, bool>(result)
@@ -279,14 +289,15 @@
                              return constExpr.getValue() == 0;
                            })
                            .Default([](AffineExpr) { return false; });
-    if (!isValidExpr) return false;
+    if (!isValidExpr)
+      return false;
   }
   return dimsSeen == map.getNumDims();
 }
 
-static bool matchIteratorTypes(
-    const llvm::SmallBitVector &rootOuterParallelLoop,
-    const llvm::SmallBitVector &candidateOuterParallelLoop) {
+static bool
+matchIteratorTypes(const llvm::SmallBitVector &rootOuterParallelLoop,
+                   const llvm::SmallBitVector &candidateOuterParallelLoop) {
   // If the candidate is not all parallel, then its loop configuration should be
   // the same as the root.
   if (candidateOuterParallelLoop.size() != candidateOuterParallelLoop.count()) {
@@ -298,7 +309,8 @@
   for (int pos : llvm::seq<int>(0, rootOuterParallelLoop.size())) {
     // If we reach the end of the outer loops of the root, break out of the
     // loop.
-    if (!rootOuterParallelLoop.test(pos)) break;
+    if (!rootOuterParallelLoop.test(pos))
+      break;
     // If the root loop is parallel, the candidate loop should also be parallel.
     if (pos >= candidateOuterParallelLoop.size() ||
         !candidateOuterParallelLoop.test(pos))
@@ -313,7 +325,8 @@
     OpOperand &operand, const llvm::SmallBitVector &rootOuterParallelLoops) {
   auto producer = operand.get().getDefiningOp<linalg::LinalgOp>();
   auto consumer = dyn_cast<linalg::LinalgOp>(operand.getOwner());
-  if (!producer || !consumer) return false;
+  if (!producer || !consumer)
+    return false;
 
   llvm::SmallBitVector producerParallelLoops =
       getOuterParallelLoops(cast<TilingInterface>(producer.getOperation()));
@@ -350,8 +363,9 @@
 }
 
 /// For all uses of an operation, finds the use that dominates all other uses.
-static std::optional<OpOperand *> getFusableUse(
-    Operation *op, DominanceInfo const &dominanceInfo, bool fuseMultiUse) {
+static std::optional<OpOperand *>
+getFusableUse(Operation *op, DominanceInfo const &dominanceInfo,
+              bool fuseMultiUse) {
   if (!fuseMultiUse && llvm::count_if(op->getUses(), [](OpOperand &use) {
                          return !isa<tensor::DimOp>(use.getOwner());
                        }) != 1) {
@@ -361,14 +375,16 @@
   // Collect non-dim users.
   SmallVector<Operation *> nonDimUsers;
   for (Operation *user : op->getUsers()) {
-    if (isa<tensor::DimOp>(user)) continue;
+    if (isa<tensor::DimOp>(user))
+      continue;
     nonDimUsers.push_back(user);
   }
 
   // Find the use in a non-dim user that dominates all other non-dim users.
   for (auto &use : op->getUses()) {
     Operation *user = use.getOwner();
-    if (isa<tensor::DimOp>(user)) continue;
+    if (isa<tensor::DimOp>(user))
+      continue;
     if (llvm::all_of(nonDimUsers, [&](Operation *c) {
           return dominanceInfo.dominates(user, c);
         })) {
@@ -384,7 +400,8 @@
   // Collect all the uses from producer to consumer.
   SmallVector<OpOperand *> allUses;
   for (OpOperand &producerUse : producer->getUses()) {
-    if (producerUse.getOwner() != consumer) continue;
+    if (producerUse.getOwner() != consumer)
+      continue;
     allUses.push_back(&producerUse);
   }
 
@@ -409,7 +426,8 @@
 
   // Check that the owner is a `generic` op.
   auto genericOp = dyn_cast<linalg::GenericOp>(inOperand->getOwner());
-  if (!genericOp) return false;
+  if (!genericOp)
+    return false;
 
   // All loops to be parallel.
   if (genericOp.getNumLoops() != genericOp.getNumParallelLoops()) {
@@ -417,11 +435,13 @@
   }
 
   /// The input operand cannot be an init operand already.
-  if (genericOp.isDpsInit(inOperand)) return false;
+  if (genericOp.isDpsInit(inOperand))
+    return false;
 
   // If the init operand value is used it cannot be reused for the input
   // operand.
-  if (genericOp.payloadUsesValueFromOperand(initOperand)) return false;
+  if (genericOp.payloadUsesValueFromOperand(initOperand))
+    return false;
 
   // Indexing map used to access the input and init have to match.
   if (genericOp.getMatchingIndexingMap(inOperand) !=
@@ -431,16 +451,18 @@
 
   // Types have to match for the input operand to reuse the buffer from the init
   // operand
-  if (inOperand->get().getType() != initOperand->get().getType()) return false;
+  if (inOperand->get().getType() != initOperand->get().getType())
+    return false;
 
   return true;
 }
 
 /// Returns true if this is a fusable use, while fusing a root with its
 /// consumer.
-static bool isFusableWithConsumer(
-    OpOperand &fusedOperand, const llvm::SmallBitVector &rootOuterParallelLoops,
-    FormDispatchRegionsOptions const &options) {
+static bool
+isFusableWithConsumer(OpOperand &fusedOperand,
+                      const llvm::SmallBitVector &rootOuterParallelLoops,
+                      FormDispatchRegionsOptions const &options) {
   Operation *producer = fusedOperand.get().getDefiningOp();
   Operation *consumer = fusedOperand.getOwner();
 
@@ -485,7 +507,8 @@
 
   auto producerLinalgOp = dyn_cast<linalg::LinalgOp>(producer);
   auto consumerLinalgOp = dyn_cast<linalg::LinalgOp>(consumer);
-  if (!producerLinalgOp || !consumerLinalgOp) return false;
+  if (!producerLinalgOp || !consumerLinalgOp)
+    return false;
 
   // Check that the consumer is all parallel.
   if (consumerLinalgOp.getNumLoops() !=
@@ -501,7 +524,8 @@
   // result of the dispatch. To avoid a stack allocation we have to ensure that
   // all operations can bufferize without needing additional memory.
   for (OpOperand *inputOperand : consumerLinalgOp.getDpsInputOperands()) {
-    if (inputOperand->get().getDefiningOp() != producer) continue;
+    if (inputOperand->get().getDefiningOp() != producer)
+      continue;
     if (isa<linalg::ConvolutionOpInterface>(producer) &&
         !llvm::any_of(
             consumerLinalgOp.getDpsInitOperands(), [&](OpOperand *initOperand) {
@@ -550,7 +574,8 @@
 
       std::optional<OpOperand *> fusableUse = getFusableUse(
           currRoot, dominanceInfo, /*fuseMultiUse=*/options.fuseMultiUse);
-      if (!fusableUse) continue;
+      if (!fusableUse)
+        continue;
 
       // Analyse the use to see if it is fusable.
       Operation *consumerOp = fusableUse.value()->getOwner();
@@ -569,9 +594,10 @@
 }
 
 /// Method to check if the consumer of a use can be fused with its producer.
-static bool isFusableWithProducer(
-    OpOperand &operand, const llvm::SmallBitVector &rootOuterParallelLoops,
-    FormDispatchRegionsOptions const &options) {
+static bool
+isFusableWithProducer(OpOperand &operand,
+                      const llvm::SmallBitVector &rootOuterParallelLoops,
+                      FormDispatchRegionsOptions const &options) {
   Operation *producer = operand.get().getDefiningOp();
   Operation *consumer = operand.getOwner();
 
@@ -634,7 +660,8 @@
     Operation *candidate = worklist.pop_back_val();
     for (OpOperand &operand : candidate->getOpOperands()) {
       Operation *producer = operand.get().getDefiningOp();
-      if (!producer) continue;
+      if (!producer)
+        continue;
       if (isClonableIntoDispatchOp(producer) ||
           hasFusionGroupsAttribute(producer) || hasRootOpAttribute(producer)) {
         continue;
@@ -642,7 +669,8 @@
 
       std::optional<OpOperand *> fusableUse = getFusableUse(
           producer, dominanceInfo, /*fuseMultiUse=*/options.fuseMultiUse);
-      if (!fusableUse || fusableUse.value()->getOwner() != candidate) continue;
+      if (!fusableUse || fusableUse.value()->getOwner() != candidate)
+        continue;
 
       if (!isFusableWithProducer(operand, rootOuterParallelLoops, options)) {
         continue;
@@ -662,9 +690,10 @@
 /// be marked to fuse with multiple root operations (i.e. replicated). For now a
 /// very simple heuristic is used below, but the mechanism should be general
 /// enough to capture any heuristic.
-static unsigned decideFusableLinalgOps(
-    FunctionOpInterface funcOp, DominanceInfo const &dominanceInfo,
-    FormDispatchRegionsOptions const &options) {
+static unsigned
+decideFusableLinalgOps(FunctionOpInterface funcOp,
+                       DominanceInfo const &dominanceInfo,
+                       FormDispatchRegionsOptions const &options) {
   unsigned numRootOps = 0;
   MLIRContext *context = funcOp->getContext();
   OpBuilder builder(context);
@@ -677,7 +706,8 @@
     SmallVector<Operation *> roots;
     for (Operation &op : llvm::reverse(block)) {
       // Start with a root operation and fuse its producers.
-      if (hasFusionGroupsAttribute(&op) || !isRootOp(&op)) continue;
+      if (hasFusionGroupsAttribute(&op) || !isRootOp(&op))
+        continue;
       unsigned newGroup = numRootOps++;
       setRootAttribute(context, &op, newGroup);
 
@@ -694,7 +724,8 @@
     SmallVector<Operation *> roots;
     for (Operation &op : llvm::reverse(block)) {
       // If it is part of a fusion group or root op, ignore it.
-      if (hasFusionGroupsAttribute(&op) || hasRootOpAttribute(&op)) continue;
+      if (hasFusionGroupsAttribute(&op) || hasRootOpAttribute(&op))
+        continue;
       // Only look for Linalg ops here. Avoid moving `linalg.fill` that aren't
       // fused with anything else into their own dispatches since it is better
       // to convert them to splats.
@@ -722,10 +753,11 @@
 //===----------------------------------------------------------------------===//
 
 /// Create Flow::DispatchGroupsOps based on a fusion heuristic.
-static LogicalResult createFusionGroups(
-    TensorDimTrackingRewriter &rewriter, FunctionOpInterface funcOp,
-    DominanceInfo const &dominanceInfo,
-    FormDispatchRegionsOptions const &options) {
+static LogicalResult
+createFusionGroups(TensorDimTrackingRewriter &rewriter,
+                   FunctionOpInterface funcOp,
+                   DominanceInfo const &dominanceInfo,
+                   FormDispatchRegionsOptions const &options) {
   // Step 1: Decide fusion groups (heuristic). This marks rootOps with an
   // attribute
   unsigned numRoots = decideFusableLinalgOps(funcOp, dominanceInfo, options);
@@ -767,7 +799,8 @@
     // Create fusion group.
     Flow::DispatchRegionOp regionOp;
     auto maybeRegionOp = Flow::wrapOpInDispatchRegion(rewriter, it.value());
-    if (failed(maybeRegionOp)) return failure();
+    if (failed(maybeRegionOp))
+      return failure();
     regionOp = *maybeRegionOp;
 
     // Sort producers topologically. All producers must be in the same block
@@ -789,7 +822,8 @@
 
       auto newRegionOp =
           movePrecedingOpsIntoDispatchRegion(rewriter, producer, regionOp);
-      if (failed(newRegionOp)) return failure();
+      if (failed(newRegionOp))
+        return failure();
       regionOp = *newRegionOp;
     }
     // Simplify tensor::DimOps.
@@ -843,7 +877,7 @@
 
   void runOnOperation() override;
 };
-}  // namespace
+} // namespace
 
 /// Create dispatch.region Ops based on a fusion heuristic.
 void FormDispatchRegionsPass::runOnOperation() {
@@ -863,7 +897,7 @@
 createFormDispatchRegionsPass(FormDispatchRegionsOptions options) {
   return std::make_unique<FormDispatchRegionsPass>(options);
 }
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/FormDispatchRegions.h b/compiler/src/iree/compiler/Dialect/Flow/Transforms/FormDispatchRegions.h
index b8d917e..1d78ec6 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/FormDispatchRegions.h
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/FormDispatchRegions.h

@@ -15,17 +15,17 @@
 class Operation;
 /// A rewriter that keeps track of all tensor::DimOps.
 class TensorDimTrackingRewriter : public IRRewriter, IRRewriter::Listener {
- public:
+public:
   /// Create a new rewriter: Scan the given op for tensor::DimOps.
   TensorDimTrackingRewriter(Operation *op);
   /// Return all tracked tensor::DimOps.
   SmallVector<tensor::DimOp> getTensorDimOps();
 
- protected:
+protected:
   void notifyOperationRemoved(Operation *op) override;
   void notifyOperationInserted(Operation *op) override;
 
- private:
+private:
   SmallPtrSet<Operation *, 16> dimOps;
 };
 
@@ -46,9 +46,9 @@
 ///   value.
 LogicalResult simplifyDimOps(RewriterBase &rewriter,
                              const SmallVector<tensor::DimOp> &dimOps);
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_FLOW_TRANSFORMS_FORMDISPATCHREGIONS_H_
+#endif // IREE_COMPILER_DIALECT_FLOW_TRANSFORMS_FORMDISPATCHREGIONS_H_

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/FormDispatchWorkgroups.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/FormDispatchWorkgroups.cpp
index 1b92091..5a6396f 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/FormDispatchWorkgroups.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/FormDispatchWorkgroups.cpp

@@ -55,7 +55,8 @@
   for (auto regionOp : regionOps) {
     auto maybeWorkgroupOp =
         rewriteFlowDispatchRegionToFlowDispatchWorkgroups(regionOp, rewriter);
-    if (failed(maybeWorkgroupOp)) return failure();
+    if (failed(maybeWorkgroupOp))
+      return failure();
     result.push_back(*maybeWorkgroupOp);
   }
   return result;
@@ -70,8 +71,8 @@
 
 /// Wrap a single op in a DispatchWorkgroupsOp. When generateWorkloadRegion is
 /// true, `workload_count` region is generated for dispatch.region
-static FailureOr<Flow::DispatchWorkgroupsOp> wrapInWorkgroupsOp(
-    mlir::TensorDimTrackingRewriter &rewriter, Operation *op) {
+static FailureOr<Flow::DispatchWorkgroupsOp>
+wrapInWorkgroupsOp(mlir::TensorDimTrackingRewriter &rewriter, Operation *op) {
   // Simplify tensor::DimOps.
   SmallVector<tensor::DimOp> dimOps = rewriter.getTensorDimOps();
   if (failed(iree_compiler::IREE::Flow::simplifyDimOps(
@@ -80,22 +81,26 @@
 
   // Wrap operation.
   auto regionOp = Flow::wrapOpInDispatchRegion(rewriter, op);
-  if (failed(regionOp)) return failure();
-  if (failed(cloneProducersToRegion(rewriter, *regionOp))) return failure();
+  if (failed(regionOp))
+    return failure();
+  if (failed(cloneProducersToRegion(rewriter, *regionOp)))
+    return failure();
   auto workgroupsOp = Flow::rewriteFlowDispatchRegionToFlowDispatchWorkgroups(
       *regionOp, rewriter);
-  if (failed(workgroupsOp)) return failure();
+  if (failed(workgroupsOp))
+    return failure();
   return *workgroupsOp;
 }
 
 /// Wrap all given ops in a DispatchWorkgroupsOp.
-static FailureOr<SmallVector<Flow::DispatchWorkgroupsOp>> wrapInWorkgroupsOp(
-    mlir::TensorDimTrackingRewriter &rewriter,
-    SmallVector<Operation *> rootOps) {
+static FailureOr<SmallVector<Flow::DispatchWorkgroupsOp>>
+wrapInWorkgroupsOp(mlir::TensorDimTrackingRewriter &rewriter,
+                   SmallVector<Operation *> rootOps) {
   SmallVector<Flow::DispatchWorkgroupsOp> result;
   for (Operation *rootOp : rootOps) {
     auto workgroupsOp = wrapInWorkgroupsOp(rewriter, rootOp);
-    if (failed(workgroupsOp)) return failure();
+    if (failed(workgroupsOp))
+      return failure();
     result.push_back(*workgroupsOp);
   }
   return result;
@@ -104,14 +109,15 @@
 /// Wrap all ops of the given types that are direct children of the given op
 /// in DispatchWorkgroupsOps.
 template <typename... OpTys>
-static FailureOr<SmallVector<Flow::DispatchWorkgroupsOp>> wrapInWorkgroupsOp(
-    mlir::TensorDimTrackingRewriter &rewriter, Operation *op) {
+static FailureOr<SmallVector<Flow::DispatchWorkgroupsOp>>
+wrapInWorkgroupsOp(mlir::TensorDimTrackingRewriter &rewriter, Operation *op) {
   // Find ops of type OpTys.
   SmallVector<Operation *> rootOps;
   for (Region &r : op->getRegions())
     for (Block &b : r.getBlocks())
       for (Operation &op : b)
-        if (isa<OpTys...>(&op)) rootOps.push_back(&op);
+        if (isa<OpTys...>(&op))
+          rootOps.push_back(&op);
 
   // Wrap ops in DispatchWorkgroupsOps.
   return wrapInWorkgroupsOp(rewriter, rootOps);
@@ -119,13 +125,15 @@
 
 /// Rewrite top-level InsertSliceOps to FlowUpdateOps or wrap them in a
 /// dispatch region.
-LogicalResult convertInsertSliceOps(
-    mlir::TensorDimTrackingRewriter &rewriter, mlir::FunctionOpInterface funcOp,
-    SmallVector<Flow::DispatchWorkgroupsOp> &workgroupsOps) {
+LogicalResult
+convertInsertSliceOps(mlir::TensorDimTrackingRewriter &rewriter,
+                      mlir::FunctionOpInterface funcOp,
+                      SmallVector<Flow::DispatchWorkgroupsOp> &workgroupsOps) {
   // Find eligible InsertSliceOps.
   SmallVector<tensor::InsertSliceOp> insertSliceOps;
   funcOp.walk([&](tensor::InsertSliceOp op) {
-    if (!isInDispatchRegion(op)) insertSliceOps.push_back(op);
+    if (!isInDispatchRegion(op))
+      insertSliceOps.push_back(op);
   });
 
   // Rewrite InsertSliceOps to FlowUpdateOps.
@@ -139,7 +147,8 @@
   // Create a DispatchWorkgroupsOp for every remaining InsertSliceOp.
   FailureOr<SmallVector<Flow::DispatchWorkgroupsOp>> newWorkgroupsOps =
       wrapInWorkgroupsOp(rewriter, remainingInsertSliceOps);
-  if (failed(newWorkgroupsOps)) return failure();
+  if (failed(newWorkgroupsOps))
+    return failure();
   workgroupsOps.append(newWorkgroupsOps->begin(), newWorkgroupsOps->end());
 
   return success();
@@ -147,13 +156,15 @@
 
 /// Rewrite top-level ExtractSliceOps to FlowSliceOps or wrap them in a
 /// dispatch region.
-LogicalResult convertExtractSliceOps(
-    mlir::TensorDimTrackingRewriter &rewriter, mlir::FunctionOpInterface funcOp,
-    SmallVector<Flow::DispatchWorkgroupsOp> &workgroupsOps) {
+LogicalResult
+convertExtractSliceOps(mlir::TensorDimTrackingRewriter &rewriter,
+                       mlir::FunctionOpInterface funcOp,
+                       SmallVector<Flow::DispatchWorkgroupsOp> &workgroupsOps) {
   // Find eligible ExtractSliceOps.
   SmallVector<tensor::ExtractSliceOp> extractSliceOps;
   funcOp.walk([&](tensor::ExtractSliceOp op) {
-    if (!isInDispatchRegion(op)) extractSliceOps.push_back(op);
+    if (!isInDispatchRegion(op))
+      extractSliceOps.push_back(op);
   });
 
   // Rewrite ExtractSliceOps to FlowSliceOps.
@@ -167,7 +178,8 @@
   // Create a DispatchWorkgroupsOp for every remaining ExtractSliceOp.
   FailureOr<SmallVector<Flow::DispatchWorkgroupsOp>> newWorkgroupsOps =
       wrapInWorkgroupsOp(rewriter, remainingExtractSliceOps);
-  if (failed(newWorkgroupsOps)) return failure();
+  if (failed(newWorkgroupsOps))
+    return failure();
   workgroupsOps.append(newWorkgroupsOps->begin(), newWorkgroupsOps->end());
 
   return success();
@@ -183,8 +195,9 @@
 /// - To correlate back to the captured workload,
 /// `flow.dispatch.workload.ordinal`
 ///   to map the captured operand to the position in the workload list.
-static void createDefaultWorkgroupCountRegion(
-    RewriterBase &rewriter, Flow::DispatchWorkgroupsOp workgroupsOp) {
+static void
+createDefaultWorkgroupCountRegion(RewriterBase &rewriter,
+                                  Flow::DispatchWorkgroupsOp workgroupsOp) {
   Region &workgroupCountBody = workgroupsOp.getWorkgroupCount();
   if (!workgroupCountBody.empty()) {
     // Preserve pre-existing workgroup count region.
@@ -197,7 +210,8 @@
   SmallVector<Location> workloadLocs;
   for (auto argument : workgroupsOp.getArguments()) {
     Type argumentType = argument.getType();
-    if (!llvm::isa<IndexType>(argumentType)) continue;
+    if (!llvm::isa<IndexType>(argumentType))
+      continue;
     workload.push_back(argument);
     workloadTypes.push_back(argumentType);
     workloadLocs.push_back(argument.getLoc());
@@ -225,7 +239,8 @@
     rewriter.setInsertionPointToStart(workgroupsOp.getBody());
     int ordinalNumber = 0;
     for (auto [index, operand] : llvm::enumerate(workgroupsOp.getArguments())) {
-      if (!llvm::isa<IndexType>(operand.getType())) continue;
+      if (!llvm::isa<IndexType>(operand.getType()))
+        continue;
       BlockArgument arg = workgroupsOp.getInputBlockArgument(index);
       auto ordinalOp = rewriter.create<Flow::DispatchWorkloadOrdinalOp>(
           loc, arg, rewriter.getIndexAttr(ordinalNumber++));
@@ -250,7 +265,7 @@
       : FormDispatchWorkgroupsPass(pass.generateWorkloadRegion) {}
   void runOnOperation() override;
 };
-}  // namespace
+} // namespace
 
 void FormDispatchWorkgroupsPass::runOnOperation() {
   mlir::FunctionOpInterface funcOp = getOperation();
@@ -262,7 +277,8 @@
   // Step 1: Create a DispatchWorkgroupsOp for every DispatchRegionOp.
   auto maybeWorkgroupsOps =
       createDispatchWorkgroups(rewriter, funcOp, dominanceInfo);
-  if (failed(maybeWorkgroupsOps)) return signalPassFailure();
+  if (failed(maybeWorkgroupsOps))
+    return signalPassFailure();
   SmallVector<Flow::DispatchWorkgroupsOp> workgroupsOps = *maybeWorkgroupsOps;
 
   LLVM_DEBUG({
@@ -313,9 +329,8 @@
         foldExtractInsertSliceOps, context);
     if (failed(applyPatternsAndFoldGreedily(
             funcOp, std::move(foldExtractInsertSliceOps)))) {
-      funcOp->emitOpError(
-          "failed to insert/extract_slice with "
-          "flow.dispatch.tensor.load/store");
+      funcOp->emitOpError("failed to insert/extract_slice with "
+                          "flow.dispatch.tensor.load/store");
       return signalPassFailure();
     }
   }
@@ -344,7 +359,7 @@
   return std::make_unique<FormDispatchWorkgroupsPass>(generateWorkloadRegion);
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/FormScalarDispatches.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/FormScalarDispatches.cpp
index eef91c0..6182b23 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/FormScalarDispatches.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/FormScalarDispatches.cpp

@@ -38,7 +38,7 @@
 
   void runOnOperation() override;
 };
-}  // namespace
+} // namespace
 
 /// Return true if type represents a value less than `n` elements.
 static bool isScalarOrTensorOfLinearSizeN(int n, Type type) {
@@ -138,8 +138,9 @@
 }
 
 // Form dispatch regions from slice of the operation.
-static FailureOr<DispatchRegionOp> formDispatchRegionFromSlice(
-    RewriterBase &rewriter, Operation *rootOp, ArrayRef<Operation *> slice) {
+static FailureOr<DispatchRegionOp>
+formDispatchRegionFromSlice(RewriterBase &rewriter, Operation *rootOp,
+                            ArrayRef<Operation *> slice) {
   OpBuilder::InsertionGuard g(rewriter);
   rewriter.setInsertionPoint(rootOp);
   FailureOr<DispatchRegionOp> dispatchRegionOp =
@@ -276,7 +277,7 @@
   return std::make_unique<FormScalarDispatchesPass>();
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/FusionOfTensorOps.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/FusionOfTensorOps.cpp
index 68da996..0cc0152 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/FusionOfTensorOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/FusionOfTensorOps.cpp

@@ -57,9 +57,11 @@
       // This can be generalized if needed
       unsigned numUsesOfOp = 0;
       for (OpOperand &operand : sourceOp->getOpOperands()) {
-        if (operand.get().getDefiningOp() == op) numUsesOfOp++;
+        if (operand.get().getDefiningOp() == op)
+          numUsesOfOp++;
       }
-      if (numUsesOfOp != 1) return std::nullopt;
+      if (numUsesOfOp != 1)
+        return std::nullopt;
       return &source;
     }
   }
@@ -70,13 +72,16 @@
 static bool areFusableOps(MLIRContext *context, OpOperand *fusedOperand) {
   Operation *producerOp = fusedOperand->get().getDefiningOp();
   Operation *consumerOp = fusedOperand->getOwner();
-  if (!producerOp) return false;
+  if (!producerOp)
+    return false;
 
   // Check for i1 return types, if so aggressively fuse to avoid `i1` buffers.
   if (llvm::all_of(producerOp->getResultTypes(), [](Type t) {
-        if (t.isInteger(1)) return true;
+        if (t.isInteger(1))
+          return true;
         if (auto shapedType = llvm::dyn_cast<ShapedType>(t)) {
-          if (shapedType.getElementType().isInteger(1)) return true;
+          if (shapedType.getElementType().isInteger(1))
+            return true;
         }
         return false;
       })) {
@@ -94,7 +99,8 @@
 
   // If the generic op is "just" copy, then fuse always.
   Block &body = producerOp->getRegion(0).front();
-  if (std::begin(body)->hasTrait<OpTrait::IsTerminator>()) return true;
+  if (std::begin(body)->hasTrait<OpTrait::IsTerminator>())
+    return true;
   if (llvm::all_of(body.getArguments(),
                    [](BlockArgument arg) { return arg.use_empty(); })) {
     // THe operands arent used, its just an `linalg.index` op.
@@ -102,7 +108,8 @@
   }
 
   // If producer does not have a single user, dont fuse.
-  if (!producerOp->hasOneUse()) return false;
+  if (!producerOp->hasOneUse())
+    return false;
 
   // If the producer has a single use (this op), only fuse if
   // - 1) The consumer op is all parallel loops. The parallelism of the consumer
@@ -139,15 +146,18 @@
                                 PatternRewriter &rewriter) const override {
     auto consumerMarker =
         consumerOp->getAttrOfType<IntegerAttr>(getConsumerAttributeName());
-    if (!consumerMarker) return failure();
+    if (!consumerMarker)
+      return failure();
 
     auto fusedOperandIt =
         llvm::find_if(consumerOp->getOpOperands(), [&](OpOperand &operand) {
           Operation *operandProducer = operand.get().getDefiningOp();
-          if (!operandProducer) return false;
+          if (!operandProducer)
+            return false;
           auto producerMarker = operandProducer->getAttrOfType<IntegerAttr>(
               getProducerAttributeName());
-          if (!producerMarker) return false;
+          if (!producerMarker)
+            return false;
           return consumerMarker.getValue() == producerMarker.getValue();
         });
     assert(fusedOperandIt != consumerOp->getOpOperands().end() &&
@@ -198,8 +208,10 @@
 
     std::optional<OpOperand *> fusableUse =
         getFusableUse(genericOp, dominanceInfo);
-    if (!fusableUse) return;
-    if (!linalg::areElementwiseOpsFusable(fusableUse.value())) return;
+    if (!fusableUse)
+      return;
+    if (!linalg::areElementwiseOpsFusable(fusableUse.value()))
+      return;
 
     auto consumer = dyn_cast<linalg::GenericOp>(fusableUse.value()->getOwner());
     auto isParallelIteratorType = [](Attribute attr) {
@@ -261,7 +273,8 @@
       linalg::ControlFusionFn fuseElementwiseOpsControlFn =
           [&](OpOperand *fusedOperand) {
             Operation *producer = fusedOperand->get().getDefiningOp();
-            if (!producer) return false;
+            if (!producer)
+              return false;
             Operation *consumer = fusedOperand->getOwner();
 
             // Limit the number of operands. We have hard limit (32) of bindings
@@ -276,7 +289,8 @@
             operands.insert(std::next(consumer->operand_begin(),
                                       fusedOperand->getOperandNumber() + 1),
                             consumer->operand_end());
-            if (operands.size() >= kIreeMaxOperandCount) return false;
+            if (operands.size() >= kIreeMaxOperandCount)
+              return false;
 
             return areFusableOps(context, fusedOperand);
           };
@@ -354,7 +368,8 @@
             }
 
             auto reshapeOp = dyn_cast<tensor::ExpandShapeOp>(producer);
-            if (!reshapeOp) return true;
+            if (!reshapeOp)
+              return true;
 
             return reshapeOp.getSrc().getDefiningOp<linalg::LinalgOp>() !=
                    nullptr;
@@ -407,13 +422,14 @@
           funcOp->emitError("failed to fuse multi-use producers");
           return signalPassFailure();
         }
-        if (numOfFusableCandidates.value() == 0) break;
+        if (numOfFusableCandidates.value() == 0)
+          break;
       }
     }
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>
 createFusionOfTensorOpsPass(bool fuseMultiUse,
@@ -422,7 +438,7 @@
                                                  multiUseFusionIteration);
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/InferNumericNarrowing.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/InferNumericNarrowing.cpp
index 04138a0..309d1a6 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/InferNumericNarrowing.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/InferNumericNarrowing.cpp

@@ -130,13 +130,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<Pass> createInferNumericNarrowingPass() {
   return std::make_unique<InferNumericNarrowingPass>();
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/InitializeEmptyTensors.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/InitializeEmptyTensors.cpp
index 231c46b..b86d0fb 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/InitializeEmptyTensors.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/InitializeEmptyTensors.cpp

@@ -109,13 +109,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<Pass> createInitializeEmptyTensorsPass(bool zeroFill) {
   return std::make_unique<InitializeEmptyTensorsPass>(zeroFill);
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/InjectDispatchTracing.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/InjectDispatchTracing.cpp
index ec993b9..22c9c79 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/InjectDispatchTracing.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/InjectDispatchTracing.cpp

@@ -20,17 +20,18 @@
 namespace IREE {
 namespace Flow {
 
-static SmallVector<Value> filterTensorValues(ValueRange&& range) {
+static SmallVector<Value> filterTensorValues(ValueRange &&range) {
   SmallVector<Value> result;
   for (auto value : range) {
-    if (llvm::isa<TensorType>(value.getType())) result.push_back(value);
+    if (llvm::isa<TensorType>(value.getType()))
+      result.push_back(value);
   }
   return result;
 }
 
 class InjectDispatchTracingPass
     : public InjectDispatchTracingBase<InjectDispatchTracingPass> {
- public:
+public:
   InjectDispatchTracingPass() = default;
 
   void runOnOperation() override {
@@ -65,7 +66,7 @@
   return std::make_unique<InjectDispatchTracingPass>();
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/InsertDispatchDebugTargets.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/InsertDispatchDebugTargets.cpp
index b1f0ec9..ae7a7b5 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/InsertDispatchDebugTargets.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/InsertDispatchDebugTargets.cpp

@@ -28,24 +28,28 @@
 static SmallVector<Value> filterNonTensorValues(ValueRange &&range) {
   SmallVector<Value> result;
   for (auto value : range) {
-    if (llvm::isa<TensorType>(value.getType())) result.push_back(value);
+    if (llvm::isa<TensorType>(value.getType()))
+      result.push_back(value);
   }
   return result;
 }
 
 // Attempts to interpret a pass arg as @<function_name>:<ordinal>, else returns
 // a negative ordinal indicating no match.
-static std::tuple<std::string, int> getOrdinalFromDebugTarget(
-    std::string marker) {
-  if (marker.empty() || marker[0] != '@') return std::make_tuple("", -1);
+static std::tuple<std::string, int>
+getOrdinalFromDebugTarget(std::string marker) {
+  if (marker.empty() || marker[0] != '@')
+    return std::make_tuple("", -1);
 
   SmallVector<StringRef, 2> parts;
   auto cropped = marker.substr(1);
   llvm::SplitString(llvm::StringRef(cropped), parts, ":");
-  if (parts.size() != 2) return std::make_tuple("", -1);
+  if (parts.size() != 2)
+    return std::make_tuple("", -1);
 
   int ordinal;
-  if (parts[1].getAsInteger(10, ordinal)) return std::make_tuple("", -1);
+  if (parts[1].getAsInteger(10, ordinal))
+    return std::make_tuple("", -1);
 
   return std::make_tuple(parts[0].str(), ordinal);
 }
@@ -72,10 +76,12 @@
 static LogicalResult replaceReturnWithOpResults(mlir::ModuleOp moduleOp,
                                                 mlir::func::FuncOp funcOp,
                                                 Operation *op) {
-  if (!funcOp->isProperAncestor(op)) return failure();
+  if (!funcOp->isProperAncestor(op))
+    return failure();
 
   // TODO: Handle nested function calls.
-  if (!SymbolTable::symbolKnownUseEmpty(funcOp, moduleOp)) return failure();
+  if (!SymbolTable::symbolKnownUseEmpty(funcOp, moduleOp))
+    return failure();
 
   // TODO: Handle (nested) control flow.
   auto funcBlock = op->getBlock();
@@ -144,13 +150,16 @@
 
       // Only look for dispatches in upstream func ops.
       auto funcOp = llvm::dyn_cast<mlir::func::FuncOp>(operation);
-      if (!funcOp) continue;
+      if (!funcOp)
+        continue;
 
       std::string fName = funcOp.getName().str();
       int localBreakOrdinal = -1;
-      if (fName == breakFname) localBreakOrdinal = breakOrdinal;
+      if (fName == breakFname)
+        localBreakOrdinal = breakOrdinal;
       int localTraceOrdinal = -1;
-      if (fName == traceFname) localTraceOrdinal = traceOrdinal;
+      if (fName == traceFname)
+        localTraceOrdinal = traceOrdinal;
 
       auto &bodyRegion = op.getFunctionBody();
       auto dispatchOps = llvm::to_vector<8>(bodyRegion.getOps<DispatchOp>());
@@ -200,10 +209,12 @@
   void runOnOperation() override {
     // Setup regex for matching symbol names.
     llvm::Regex traceMatcher;
-    if (!traceDebugTarget.empty()) traceMatcher = llvm::Regex(traceDebugTarget);
+    if (!traceDebugTarget.empty())
+      traceMatcher = llvm::Regex(traceDebugTarget);
 
     llvm::Regex breakMatcher;
-    if (!breakDebugTarget.empty()) breakMatcher = llvm::Regex(breakDebugTarget);
+    if (!breakDebugTarget.empty())
+      breakMatcher = llvm::Regex(breakDebugTarget);
 
     for (auto it :
          llvm::enumerate(getOperation().getOps<FunctionOpInterface>())) {
@@ -255,7 +266,7 @@
                                                          traceDebugTarget);
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/InterchangeGenericOps.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/InterchangeGenericOps.cpp
index b07a756..a229b4a 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/InterchangeGenericOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/InterchangeGenericOps.cpp

@@ -33,15 +33,18 @@
     SmallVector<unsigned> interchange;
     bool needInterchange = false;
     unsigned numParallelLoop = genericOp.getNumParallelLoops();
-    if (numParallelLoop == 0) return failure();
+    if (numParallelLoop == 0)
+      return failure();
     for (auto iter : llvm::enumerate(genericOp.getIteratorTypesArray())) {
       if (linalg::isParallelIterator(iter.value())) {
         interchange.push_back(iter.index());
-        if (iter.index() >= numParallelLoop) needInterchange = true;
+        if (iter.index() >= numParallelLoop)
+          needInterchange = true;
       }
     }
     // If all the parallel loops are outter loops skip the pattern.
-    if (!needInterchange) return failure();
+    if (!needInterchange)
+      return failure();
     for (auto iter : llvm::enumerate(genericOp.getIteratorTypesArray())) {
       if (linalg::isReductionIterator(iter.value())) {
         interchange.push_back(iter.index());
@@ -67,13 +70,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<Pass> createInterchangeGenericOpsPass() {
   return std::make_unique<InterchangeGenericOpsPass>();
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/InterchangeTransposeGenericOps.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/InterchangeTransposeGenericOps.cpp
index 0228fd8..a9d3dce 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/InterchangeTransposeGenericOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/InterchangeTransposeGenericOps.cpp

@@ -41,7 +41,8 @@
 
     for (auto operand : genericOp.getDpsInputOperands()) {
       auto producer = operand->get().getDefiningOp<linalg::LinalgOp>();
-      if (!producer) continue;
+      if (!producer)
+        continue;
 
       // check if the generic op has a non-identity map for the operand.
       auto indexingMap = genericOp.getMatchingIndexingMap(operand);
@@ -50,18 +51,21 @@
         return rewriter.notifyMatchFailure(genericOp, "already normalized");
       }
       // The map must be a permutation. If not, then look for other operand.
-      if (!indexingMap.isPermutation()) continue;
+      if (!indexingMap.isPermutation())
+        continue;
 
-      if (!mapForInterchange) mapForInterchange = indexingMap;
+      if (!mapForInterchange)
+        mapForInterchange = indexingMap;
     }
 
     if (!mapForInterchange) {
       return rewriter.notifyMatchFailure(genericOp, "no eligible operands");
     }
     // Make the input indexing maps identity by interchanging.
-    auto interchange = llvm::map_to_vector(
-        mapForInterchange->getResults(),
-        [](AffineExpr e) { return e.cast<AffineDimExpr>().getPosition(); });
+    auto interchange =
+        llvm::map_to_vector(mapForInterchange->getResults(), [](AffineExpr e) {
+          return e.cast<AffineDimExpr>().getPosition();
+        });
 
     return interchangeGenericOp(rewriter, genericOp, interchange);
   }
@@ -84,13 +88,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<Pass> createInterchangeTransposeGenericOpsPass() {
   return std::make_unique<InterchangeTransposeGenericOpsPass>();
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/OptimizeNumerics.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/OptimizeNumerics.cpp
index 4cbcbb6..b4b3ccf 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/OptimizeNumerics.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/OptimizeNumerics.cpp

@@ -23,7 +23,8 @@
 
 int getNextPotBitWidth(int bitWidth, int minBitWidth = 8) {
   for (int i = minBitWidth;; i *= 2) {
-    if (i >= bitWidth) return i;
+    if (i >= bitWidth)
+      return i;
   }
 }
 
@@ -115,7 +116,8 @@
   LogicalResult matchAndRewrite(IREE::Util::NumericCastOpInterface castOp,
                                 PatternRewriter &rewriter) const override {
     auto emptyOp = castOp.getInput().getDefiningOp<tensor::EmptyOp>();
-    if (!emptyOp) return failure();
+    if (!emptyOp)
+      return failure();
     Type resultType = castOp.getCasted().getType();
 
     rewriter.replaceOpWithNewOp<tensor::EmptyOp>(castOp, resultType,
@@ -133,7 +135,8 @@
                                 PatternRewriter &rewriter) const override {
     auto loc = castOp.getLoc();
     auto fillOp = castOp.getInput().getDefiningOp<linalg::FillOp>();
-    if (!fillOp) return failure();
+    if (!fillOp)
+      return failure();
     Type toElementType = getElementTypeOrSelf(castOp.getCastedType());
 
     Value fillInput = fillOp.value();
@@ -280,13 +283,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<Pass> createOptimizeNumericsPass() {
   return std::make_unique<OptimizeNumericsPass>();
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchRegions.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchRegions.cpp
index 11b8252..fd2e16d 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchRegions.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchRegions.cpp

@@ -35,7 +35,8 @@
 static int64_t costOfDomain(ArrayRef<int64_t> domain) {
   int64_t product = 1;
   for (int64_t size : domain) {
-    if (size == mlir::ShapedType::kDynamic) return INT64_MAX;
+    if (size == mlir::ShapedType::kDynamic)
+      return INT64_MAX;
     product *= size;
   }
   return product;
@@ -57,7 +58,8 @@
   auto resultTypes = llvm::to_vector(op->getResultTypes());
   for (Type t : llvm::concat<Type>(operandTypes, resultTypes)) {
     auto tensorType = llvm::dyn_cast<TensorType>(t);
-    if (!tensorType) continue;
+    if (!tensorType)
+      continue;
     if (!main) {
       main = tensorType;
     } else if (costOfDomain(tensorType.getShape()) >
@@ -148,13 +150,15 @@
 static std::string getOpNameWithoutDialectName(Operation *op) {
   auto opName =
       op->getName().getStringRef().drop_until([](char c) { return c == '.'; });
-  if (opName.starts_with(".")) opName = opName.drop_front();
+  if (opName.starts_with("."))
+    opName = opName.drop_front();
   return opName.str();
 }
 
 static std::string summarizeLinalgOp(linalg::LinalgOp op) {
   auto opName = op->getName().getStringRef();
-  if (!opName.consume_front("linalg.")) return "";
+  if (!opName.consume_front("linalg."))
+    return "";
   std::string opLoopRanges = loopRangesToString(op.getStaticLoopRanges());
   std::string opTypes = opLoopRanges.empty() ? "" : getLinalgDataTypes(op);
   return opName.str() + (opLoopRanges.empty() ? "" : "_" + opLoopRanges) +
@@ -163,7 +167,8 @@
 
 static std::string summarizeLinalgExtOp(Operation *op) {
   auto opName = op->getName().getStringRef();
-  if (!opName.consume_front("iree_linalg_ext.")) return "";
+  if (!opName.consume_front("iree_linalg_ext."))
+    return "";
   std::string suffix = "";
   if (TensorType mainTensor = getMainTensorForLinalgExtOp(op)) {
     llvm::raw_string_ostream sstream(suffix);
@@ -178,8 +183,8 @@
 
 // Summarizes the contents of a dispatch into a short string.
 // This uses heuristics to aid developer debugging.
-static std::string summarizeDispatchWorkgroupsOp(
-    DispatchWorkgroupsOp regionOp) {
+static std::string
+summarizeDispatchWorkgroupsOp(DispatchWorkgroupsOp regionOp) {
   // The goal here is to build a relatively concise description that gives
   // enough information to developers to see roughly what sort of computation a
   // dispatch region performs. Multiple approaches are valid here, depending on
@@ -200,7 +205,8 @@
     TypeSwitch<Operation *>(op)
         .Case<linalg::LinalgOp>([&](auto op) {
           int64_t estimatedCost = estimateLinalgOpCost(op);
-          if (estimatedCost < bestEstimatedCost) return;
+          if (estimatedCost < bestEstimatedCost)
+            return;
           bestEstimatedCost = estimatedCost;
           bestOp = op;
           LLVM_DEBUG(llvm::dbgs() << "// new best op: '" << bestOp->getName()
@@ -211,7 +217,8 @@
               // SetEncoding/UnsetEncoding is the bestOp only if there are no
               // other operations.
               int64_t estimatedCost = kMinEstimatedCost + 1;
-              if (estimatedCost < bestEstimatedCost) return;
+              if (estimatedCost < bestEstimatedCost)
+                return;
               bestEstimatedCost = estimatedCost;
               bestOp = op;
               LLVM_DEBUG(llvm::dbgs()
@@ -220,7 +227,8 @@
             })
         .Case<IREE::LinalgExt::LinalgExtOp>([&](auto op) {
           int64_t estimatedCost = estimateLinalgExtOpCost(op);
-          if (estimatedCost < bestEstimatedCost) return;
+          if (estimatedCost < bestEstimatedCost)
+            return;
           bestEstimatedCost = estimatedCost;
           bestOp = op;
           LLVM_DEBUG(llvm::dbgs() << "// new best op: '" << bestOp->getName()
@@ -230,7 +238,8 @@
           // No cost estimation implemented, skip.
         });
   });
-  if (!bestOp) return "";
+  if (!bestOp)
+    return "";
 
   std::string bestSummary = "";
   TypeSwitch<Operation *>(bestOp)
@@ -326,9 +335,8 @@
 }
 
 // Converts a dispatch region body to a free-floating function.
-static mlir::func::FuncOp createWorkgroupFunc(Location loc,
-                                              StringRef functionName,
-                                              Region &region) {
+static mlir::func::FuncOp
+createWorkgroupFunc(Location loc, StringRef functionName, Region &region) {
   // Build function type matching the region signature.
   auto functionType = FunctionType::get(
       region.getContext(), region.getArgumentTypes(), /*results=*/{});
@@ -354,9 +362,10 @@
 
 // Outlines a dispatch region into a flow.executable and replaces the region op
 // with a dispatch to that outlined executable.
-static LogicalResult outlineDispatchWorkgroupsOp(
-    std::string executableOpName, std::string exportOpName,
-    DispatchWorkgroupsOp regionOp) {
+static LogicalResult
+outlineDispatchWorkgroupsOp(std::string executableOpName,
+                            std::string exportOpName,
+                            DispatchWorkgroupsOp regionOp) {
   // Convert the region to a free-floating function.
   auto workgroupFuncOp = createWorkgroupFunc(regionOp.getLoc(), exportOpName,
                                              regionOp.getWorkgroupBody());
@@ -389,11 +398,11 @@
   return convertToDispatchOp(regionOp, executableOp, exportOp);
 }
 
-}  // namespace
+} // namespace
 
 class OutlineDispatchRegionsPass
     : public OutlineDispatchRegionsBase<OutlineDispatchRegionsPass> {
- public:
+public:
   OutlineDispatchRegionsPass() = default;
 
   void runOnOperation() override {
@@ -448,7 +457,7 @@
   return std::make_unique<OutlineDispatchRegionsPass>();
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/PassDetail.h b/compiler/src/iree/compiler/Dialect/Flow/Transforms/PassDetail.h
index 9a8cb80..8b01d33 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/PassDetail.h
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/PassDetail.h

@@ -17,11 +17,11 @@
 namespace Flow {
 
 #define GEN_PASS_CLASSES
-#include "iree/compiler/Dialect/Flow/Transforms/Passes.h.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/Flow/Transforms/Passes.h.inc" // IWYU pragma: keep
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_FLOW_TRANSFORMS_PASS_DETAIL_H_
+#endif // IREE_COMPILER_DIALECT_FLOW_TRANSFORMS_PASS_DETAIL_H_

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/Passes.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/Passes.cpp
index 16b5d2c..544758a 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/Passes.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/Passes.cpp

@@ -96,26 +96,29 @@
     llvm::cl::desc("Enable fusing tensor.pad ops into Linalg consumer ops."),
     llvm::cl::init(false));
 
-static llvm::cl::opt<bool> clEnableFuseMultiUse(
-    "iree-flow-fuse-multi-use", llvm::cl::desc("Fuse multi-use ops."),
-    llvm::cl::init(false));
+static llvm::cl::opt<bool>
+    clEnableFuseMultiUse("iree-flow-fuse-multi-use",
+                         llvm::cl::desc("Fuse multi-use ops."),
+                         llvm::cl::init(false));
 
 static llvm::cl::opt<bool> clDispatchGenerateWorkloadRegion(
     "iree-flow-dispatch-generate-workload-region",
     llvm::cl::desc("Generate the workload region."), llvm::cl::init(true));
 
-static llvm::cl::opt<bool> clEnableDataTiling(
-    "iree-flow-enable-data-tiling", llvm::cl::desc("Enable data tiling path."),
-    llvm::cl::init(false));
+static llvm::cl::opt<bool>
+    clEnableDataTiling("iree-flow-enable-data-tiling",
+                       llvm::cl::desc("Enable data tiling path."),
+                       llvm::cl::init(false));
 
 static llvm::cl::opt<bool> clNormalizeInputIndexingMap(
     "iree-flow-normalize-input-indexing-map",
     llvm::cl::desc("Enable normalizing input indexing map to identity."),
     llvm::cl::init(false));
 
-static llvm::cl::opt<bool> clDumpDispatchGraph(
-    "iree-flow-dump-dispatch-graph",
-    llvm::cl::desc("Dump a dot graph for dispatches."), llvm::cl::init(false));
+static llvm::cl::opt<bool>
+    clDumpDispatchGraph("iree-flow-dump-dispatch-graph",
+                        llvm::cl::desc("Dump a dot graph for dispatches."),
+                        llvm::cl::init(false));
 
 static llvm::cl::opt<std::string> clDumpDispatchGraphOutputFile(
     "iree-flow-dump-dispatch-graph-output-file",
@@ -184,7 +187,7 @@
       IREE::Util::createFixedPointIteratorPass(std::move(pipeline)));
 }
 
-}  // namespace
+} // namespace
 
 void buildFlowTransformPassPipeline(OpPassManager &passManager,
                                     const TransformOptions &transformOptions) {
@@ -407,8 +410,8 @@
 
 namespace {
 #define GEN_PASS_REGISTRATION
-#include "iree/compiler/Dialect/Flow/Transforms/Passes.h.inc"  // IWYU pragma: export
-}  // namespace
+#include "iree/compiler/Dialect/Flow/Transforms/Passes.h.inc" // IWYU pragma: export
+} // namespace
 
 void registerFlowPasses() {
   // Generated.
@@ -418,7 +421,7 @@
   registerFlowTransformPassPipeline();
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/Passes.h b/compiler/src/iree/compiler/Dialect/Flow/Transforms/Passes.h
index bc0a000..e3008ac 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/Passes.h
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/Passes.h

@@ -84,8 +84,8 @@
 
 // Pass to convert a tensor.pad operation into a linalg.fill +
 // tensor.insert_slice.
-std::unique_ptr<Pass> createTensorPadToTensorInsertSlicePass(
-    bool skipSingleLinalgOpUses = false);
+std::unique_ptr<Pass>
+createTensorPadToTensorInsertSlicePass(bool skipSingleLinalgOpUses = false);
 
 // Create a pass to detach elementwise ops from named Linalg ops.
 std::unique_ptr<Pass> createDetachElementwiseFromNamedOpsPass();
@@ -249,8 +249,8 @@
 createStripAndSplatConstantVariablesPass();
 
 /// Creates a pass to dump a graph for dispatches
-std::unique_ptr<Pass> createDumpDispatchGraphPass(
-    raw_ostream &os = llvm::errs());
+std::unique_ptr<Pass>
+createDumpDispatchGraphPass(raw_ostream &os = llvm::errs());
 
 //===----------------------------------------------------------------------===//
 // Register all Passes
@@ -258,9 +258,9 @@
 
 void registerFlowPasses();
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_FLOW_TRANSFORMS_PASSES_H_
+#endif // IREE_COMPILER_DIALECT_FLOW_TRANSFORMS_PASSES_H_

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/PromoteTensorLoads.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/PromoteTensorLoads.cpp
index d78483a..bbaf35a 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/PromoteTensorLoads.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/PromoteTensorLoads.cpp

@@ -28,9 +28,9 @@
 struct ExtractElementOpLowering
     : public OpConversionPattern<tensor::ExtractOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      tensor::ExtractOp op, ArrayRef<Value> args,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(tensor::ExtractOp op, ArrayRef<Value> args,
+                  ConversionPatternRewriter &rewriter) const override {
     // tensor<i1> is not valid to load, it needs to be converted to i8 or
     // something else instead.
     auto tensorType = op.tensor().getType().cast<TensorType>();
@@ -61,11 +61,11 @@
   patterns.insert<ExtractElementOpLowering>(context);
 }
 
-}  // namespace
+} // namespace
 
 class PromoteTensorLoadsPass
     : public PromoteTensorLoadsBase<PromoteTensorLoadsPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<FlowDialect, func::FuncDialect, mlir::arith::ArithDialect,
                     mlir::math::MathDialect, tensor::TensorDialect>();
@@ -95,7 +95,7 @@
   return std::make_unique<PromoteTensorLoadsPass>();
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/RaiseSpecialOps.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/RaiseSpecialOps.cpp
index 785a31f..3390c04 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/RaiseSpecialOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/RaiseSpecialOps.cpp

@@ -57,13 +57,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<Pass> createRaiseSpecialOps() {
   return std::make_unique<RaiseSpecialOpsPass>();
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/RegionOpUtils.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/RegionOpUtils.cpp
index c6225e3..6c2cb2e 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/RegionOpUtils.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/RegionOpUtils.cpp

@@ -70,9 +70,9 @@
   });
 }
 
-static SmallVector<Range> getLoopRangesImpl(
-    ReifyRankedShapedTypeOpInterface shapedOp, Location loc,
-    OpBuilder &builder) {
+static SmallVector<Range>
+getLoopRangesImpl(ReifyRankedShapedTypeOpInterface shapedOp, Location loc,
+                  OpBuilder &builder) {
   Value zero = builder.create<arith::ConstantIndexOp>(loc, 0);
   Value one = builder.create<arith::ConstantIndexOp>(loc, 1);
   ReifiedRankedShapedTypeDims resultDims;
@@ -178,7 +178,8 @@
     RewriterBase &rewriter, Flow::DispatchRegionOp &regionOp,
     TypeRange workloadTypes, ArrayRef<Location> workloadLocs) {
   Region &countRegion = regionOp.getWorkgroupCount();
-  if (!countRegion.empty()) return;
+  if (!countRegion.empty())
+    return;
   Block *body = rewriter.createBlock(&countRegion, countRegion.begin(),
                                      workloadTypes, workloadLocs);
   auto args = body->getArguments();
@@ -194,7 +195,8 @@
 /// dynamic dimension.
 static bool hasDynamicShape(Type t) {
   auto shapedType = llvm::dyn_cast<ShapedType>(t);
-  if (!shapedType) return false;
+  if (!shapedType)
+    return false;
   return !shapedType.hasStaticShape();
 }
 
@@ -204,7 +206,8 @@
   OpBuilder::InsertionGuard guard(b);
 
   // Case 1: No dynamic result dims.
-  if (!hasDynamicShape(value.getType())) return success();
+  if (!hasDynamicShape(value.getType()))
+    return success();
 
   // There is at least one dynamic dimension, continue...
   ShapedType shapedType = llvm::cast<ShapedType>(value.getType());
@@ -252,7 +255,8 @@
   auto reifyShapeOp = dyn_cast<ReifyRankedShapedTypeOpInterface>(op);
   if (reifyShapeOp) {
     ReifiedRankedShapedTypeDims dims;
-    if (failed(reifyShapeOp.reifyResultShapes(b, dims))) return failure();
+    if (failed(reifyShapeOp.reifyResultShapes(b, dims)))
+      return failure();
     for (int64_t i = 0; i < shapedType.getRank(); ++i)
       if (shapedType.isDynamicDim(i))
         dynamicDims.push_back(dims[opResult.getResultNumber()][i].get<Value>());
@@ -293,7 +297,7 @@
     auto tensorType = result.getType().cast<RankedTensorType>();
     assert(tensorType.getNumDynamicDims() == dynamicDims[index].size() &&
            "incorrect number of dynamicDims provided");
-#endif  // NDEBUG
+#endif // NDEBUG
     resultTypes.push_back(result.getType());
     regionDynamicDims.append(dynamicDims[index]);
     returnedValues.push_back(result);
@@ -337,9 +341,10 @@
 
 // Clone a `target` op that is preceding the given dispatch region op into the
 // dispatch region.
-FailureOr<Operation *> Flow::clonePrecedingOpIntoDispatchRegion(
-    RewriterBase &rewriter, Operation *target,
-    Flow::DispatchRegionOp regionOp) {
+FailureOr<Operation *>
+Flow::clonePrecedingOpIntoDispatchRegion(RewriterBase &rewriter,
+                                         Operation *target,
+                                         Flow::DispatchRegionOp regionOp) {
   Block &body = regionOp.getBody().front();
 
   // Gather all uses of `target`.
@@ -367,9 +372,10 @@
 
 // Move a `target` op that is preceding the given dispatch region op into the
 // dispatch region.
-FailureOr<Flow::DispatchRegionOp> Flow::movePrecedingOpsIntoDispatchRegion(
-    RewriterBase &rewriter, ArrayRef<Operation *> targets,
-    Flow::DispatchRegionOp regionOp) {
+FailureOr<Flow::DispatchRegionOp>
+Flow::movePrecedingOpsIntoDispatchRegion(RewriterBase &rewriter,
+                                         ArrayRef<Operation *> targets,
+                                         Flow::DispatchRegionOp regionOp) {
   // Values replaced by moving the `targets` into the dispatch region.
   SmallVector<Value> replacedValues;
 
@@ -436,8 +442,8 @@
   return newRegionOp.value();
 }
 
-FailureOr<Flow::DispatchRegionOp> Flow::wrapOpInDispatchRegion(
-    RewriterBase &rewriter, Operation *op) {
+FailureOr<Flow::DispatchRegionOp>
+Flow::wrapOpInDispatchRegion(RewriterBase &rewriter, Operation *op) {
   OpBuilder::InsertionGuard g(rewriter);
 
   SmallVector<Value> workload;
@@ -483,7 +489,8 @@
     return true;
   }
   if (isa<arith::ConstantOp>(op) || isa<complex::ConstantOp>(op)) {
-    if (clInlineConstantByteLength == 0) return false;
+    if (clInlineConstantByteLength == 0)
+      return false;
     Attribute constantValueAttr;
     if (!matchPattern(op->getResult(0), m_Constant(&constantValueAttr))) {
       return false;
@@ -528,24 +535,27 @@
     Operation *owner = ownerWorkgroupsOp ? ownerWorkgroupsOp : ownerRegionOp;
 
     // Ignore uses outside of dispatch workgroups op.
-    if (owner != dispatchOp) continue;
+    if (owner != dispatchOp)
+      continue;
 
     // Cannot fuse producer of `dest` with `tensor.insert_slice`.
     if (auto insertSliceUser = dyn_cast<tensor::InsertSliceOp>(user)) {
-      if (insertSliceUser.getDest() == v) return true;
+      if (insertSliceUser.getDest() == v)
+        return true;
     }
   }
   return false;
 }
 
 /// Collect all ops that should be cloned into the given dispatch region op.
-static SmallVector<Operation *> getCloneableOps(
-    Flow::DispatchRegionOp regionOp) {
+static SmallVector<Operation *>
+getCloneableOps(Flow::DispatchRegionOp regionOp) {
   // Find values that are used inside of the dispatch region but defined outside
   // of the dispatch region.
   llvm::SetVector<Value> valuesDefinedAbove;
   mlir::getUsedValuesDefinedAbove(regionOp.getBody(), valuesDefinedAbove);
-  if (valuesDefinedAbove.empty()) return {};
+  if (valuesDefinedAbove.empty())
+    return {};
 
   // Traverse the defining ops of these values (and the ops on their reverse
   // SSA use-def chain).
@@ -556,7 +566,8 @@
   while (!worklist.empty()) {
     Value outsideValue = worklist.pop_back_val();
     // Skip values that were already visited.
-    if (visited.count(outsideValue)) continue;
+    if (visited.count(outsideValue))
+      continue;
     visited.insert(outsideValue);
 
     Operation *definingOp = outsideValue.getDefiningOp();

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/RegionOpUtils.h b/compiler/src/iree/compiler/Dialect/Flow/Transforms/RegionOpUtils.h
index 9bf2768..de6c108 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/RegionOpUtils.h
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/RegionOpUtils.h

@@ -56,8 +56,9 @@
 /// %2 = "yet_another_use"(%0) : (tensor<?xf32>) -> (tensor<?xf32>)
 ///
 /// Returns the cloned target op.
-FailureOr<Operation *> clonePrecedingOpIntoDispatchRegion(
-    RewriterBase &rewriter, Operation *target, Flow::DispatchRegionOp regionOp);
+FailureOr<Operation *>
+clonePrecedingOpIntoDispatchRegion(RewriterBase &rewriter, Operation *target,
+                                   Flow::DispatchRegionOp regionOp);
 
 /// Move a `target` op that is preceding the given dispatch region op into the
 /// dispatch region.
@@ -74,9 +75,10 @@
 ///   flow.return %1 : tensor<?xf32>
 /// }
 /// %2 = "yet_another_use"(%0) : (tensor<?xf32>) -> (tensor<?xf32>)
-FailureOr<Flow::DispatchRegionOp> movePrecedingOpsIntoDispatchRegion(
-    RewriterBase &rewriter, ArrayRef<Operation *> targets,
-    Flow::DispatchRegionOp regionOp);
+FailureOr<Flow::DispatchRegionOp>
+movePrecedingOpsIntoDispatchRegion(RewriterBase &rewriter,
+                                   ArrayRef<Operation *> targets,
+                                   Flow::DispatchRegionOp regionOp);
 
 /// Wrap the given op in a new dispatch region op.
 FailureOr<Flow::DispatchRegionOp> wrapOpInDispatchRegion(RewriterBase &rewriter,
@@ -94,9 +96,9 @@
 LogicalResult cloneProducersToRegion(RewriterBase &rewriter,
                                      Flow::DispatchRegionOp regionOp);
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_FLOW_TRANSFORMS_REGIONOPUTILS_H_
+#endif // IREE_COMPILER_DIALECT_FLOW_TRANSFORMS_REGIONOPUTILS_H_

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/SetEncoding.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/SetEncoding.cpp
index e72e849..413e55b 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/SetEncoding.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/SetEncoding.cpp

@@ -56,17 +56,19 @@
             return cast<TypedAttr>(builder.getIntegerAttr(intType, 0));
           })
           .Default([](Type type) { return nullptr; });
-  if (!zeroVal) return failure();
+  if (!zeroVal)
+    return failure();
   return builder.create<arith::ConstantOp>(loc, elementType, zeroVal)
       .getResult();
 }
 
 /// Pads `value` to `padding` if needed. If no padding is specified,
 /// return `value` itself.
-static FailureOr<Value> padIfNeeded(
-    OpBuilder &builder, Location loc, Value value,
-    std::optional<int64_t> padding = std::nullopt) {
-  if (!padding) return value;
+static FailureOr<Value>
+padIfNeeded(OpBuilder &builder, Location loc, Value value,
+            std::optional<int64_t> padding = std::nullopt) {
+  if (!padding)
+    return value;
 
   OpFoldResult paddingOfr = builder.getIndexAttr(padding.value());
   FailureOr<SmallVector<OpFoldResult>> shape =
@@ -114,12 +116,13 @@
 struct SetMatmulEncoding : public OpRewritePattern<linalg::MatmulOp> {
   SetMatmulEncoding(MLIRContext *context, int64_t padding,
                     PatternBenefit benefit = 1)
-      : OpRewritePattern<linalg::MatmulOp>(context, benefit),
-        padding(padding) {}
+      : OpRewritePattern<linalg::MatmulOp>(context, benefit), padding(padding) {
+  }
 
   LogicalResult matchAndRewrite(linalg::MatmulOp matmulOp,
                                 PatternRewriter &rewriter) const override {
-    if (!matmulOp.hasTensorSemantics()) return failure();
+    if (!matmulOp.hasTensorSemantics())
+      return failure();
     auto inputs = matmulOp.getDpsInputOperands();
     auto outputs = matmulOp.getDpsInitOperands();
     auto hasEncoding = [](OpOperand *operand) -> bool {
@@ -229,7 +232,7 @@
     return success();
   }
 
- private:
+private:
   int64_t padding;
 };
 
@@ -242,7 +245,8 @@
   LogicalResult matchAndRewrite(IREE::LinalgExt::SetEncodingOp encodingOp,
                                 PatternRewriter &rewriter) const override {
     auto fillOp = encodingOp.getSource().getDefiningOp<linalg::FillOp>();
-    if (!fillOp) return failure();
+    if (!fillOp)
+      return failure();
 
     // Create a new fill op, with outs being defined by a new `tensor.empty` op.
     RankedTensorType encodingType = encodingOp.getResultType();
@@ -265,7 +269,7 @@
 
   void runOnOperation() override;
 };
-}  // namespace
+} // namespace
 
 void SetEncodingPass::runOnOperation() {
   MLIRContext *context = &getContext();
@@ -286,7 +290,7 @@
   return std::make_unique<SetEncodingPass>();
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/SplitReduction.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/SplitReduction.cpp
index 205a7e2..831d95b 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/SplitReduction.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/SplitReduction.cpp

@@ -25,9 +25,9 @@
 namespace Flow {
 
 // TODO(thomasraoux): Move to attributes.
-static llvm::cl::opt<int64_t> splitReductionRatio(
-    "iree-flow-split-matmul-reduction", llvm::cl::desc("split ratio"),
-    llvm::cl::init(1));
+static llvm::cl::opt<int64_t>
+    splitReductionRatio("iree-flow-split-matmul-reduction",
+                        llvm::cl::desc("split ratio"), llvm::cl::init(1));
 
 static llvm::cl::list<int64_t> topkSplitReductionRatio(
     "iree-flow-topk-split-reduction",
@@ -44,8 +44,8 @@
                        LinalgExt::LinalgTransformationFilter f,
                        PatternBenefit benefit = 1)
       : OpInterfaceRewritePattern<linalg::LinalgOp>(context, benefit),
-        controlSplitReductionFn(controlSplitReductionFn),
-        filter(std::move(f)) {}
+        controlSplitReductionFn(controlSplitReductionFn), filter(std::move(f)) {
+  }
 
   LogicalResult matchAndRewrite(linalg::LinalgOp op,
                                 PatternRewriter &rewriter) const override {
@@ -63,7 +63,8 @@
 
     FailureOr<linalg::LinalgOp> result = LinalgExt::splitReduction(
         rewriter, op, controlSplitReductionFn, filter);
-    if (failed(result)) return failure();
+    if (failed(result))
+      return failure();
     // If any attributes needs to be propagated set it.
     for (std::pair<StringAttr, Attribute> &attrib : attributes) {
       result.value()->setAttr(attrib.first, attrib.second);
@@ -71,7 +72,7 @@
     return result;
   }
 
- private:
+private:
   linalg::ControlSplitReductionFn controlSplitReductionFn;
   LinalgExt::LinalgTransformationFilter filter;
 };
@@ -136,13 +137,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<Pass> createSplitReductionPass() {
   return std::make_unique<SplitReductionPass>();
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/StripAndSplatConstantVariables.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/StripAndSplatConstantVariables.cpp
index 80d65f5..381475a 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/StripAndSplatConstantVariables.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/StripAndSplatConstantVariables.cpp

@@ -26,7 +26,7 @@
 class StripAndSplatConstantVariablesPass
     : public StripAndSplatConstantVariablesBase<
           StripAndSplatConstantVariablesPass> {
- public:
+public:
   StripAndSplatConstantVariablesPass() = default;
 
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -63,10 +63,12 @@
     moduleOp.walk([&](Operation *op) {
       if (auto globalOp = dyn_cast<Util::GlobalOp>(op)) {
         // Only strip constant variables.
-        if (globalOp.getIsMutable()) return;
+        if (globalOp.getIsMutable())
+          return;
 
         // Only strip tensor type constants (to replace with dense<>).
-        if (!llvm::isa<TensorType>(globalOp.getType())) return;
+        if (!llvm::isa<TensorType>(globalOp.getType()))
+          return;
 
         auto tensorType = llvm::cast<TensorType>(globalOp.getType());
         TypedAttr newValue = getSplatAttr(tensorType);
@@ -79,7 +81,8 @@
         newOp->setAttr("noinline", UnitAttr::get(builder.getContext()));
         globalOp.erase();
       } else if (auto cstOp = dyn_cast<arith::ConstantOp>(op)) {
-        if (!llvm::isa<TensorType>(cstOp.getType())) return;
+        if (!llvm::isa<TensorType>(cstOp.getType()))
+          return;
 
         auto tensorType = llvm::cast<TensorType>(cstOp.getType());
         TypedAttr newValue = getSplatAttr(tensorType);
@@ -98,7 +101,7 @@
   return std::make_unique<StripAndSplatConstantVariablesPass>();
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/StripSignedness.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/StripSignedness.cpp
index 3baebe2..7b450d2 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/StripSignedness.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/StripSignedness.cpp

@@ -20,13 +20,13 @@
 namespace {
 
 class StripSignednessPass : public StripSignednessBase<StripSignednessPass> {
- public:
+public:
   explicit StripSignednessPass() {}
   void runOnOperation() override;
 };
 
 class IntegerTypeConverter : public TypeConverter {
- public:
+public:
   static Type convertType(Type type) {
     if (auto iType = llvm::dyn_cast<IntegerType>(type)) {
       if (!iType.isSignless()) {
@@ -51,12 +51,12 @@
 // conversion patterns that used the original Quant types to be updated to
 // the non-quant variants.
 class GenericTypeConvert : public ConversionPattern {
- public:
-  GenericTypeConvert(MLIRContext* context, TypeConverter& converter)
+public:
+  GenericTypeConvert(MLIRContext *context, TypeConverter &converter)
       : ConversionPattern(converter, MatchAnyOpTypeTag(), 0, context) {}
-  LogicalResult matchAndRewrite(
-      Operation* op, ArrayRef<Value> operands,
-      ConversionPatternRewriter& rewriter) const override {
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
     llvm::SmallVector<Type> newResults;
     if (isa<FunctionOpInterface>(op)) {
       return failure();
@@ -65,15 +65,15 @@
     (void)getTypeConverter()->convertTypes(op->getResultTypes(), newResults);
     OperationState state(op->getLoc(), op->getName().getStringRef(), operands,
                          newResults, op->getAttrs(), op->getSuccessors());
-    for (Region& r : op->getRegions()) {
-      Region* newRegion = state.addRegion();
+    for (Region &r : op->getRegions()) {
+      Region *newRegion = state.addRegion();
       rewriter.inlineRegionBefore(r, *newRegion, newRegion->begin());
       TypeConverter::SignatureConversion result(newRegion->getNumArguments());
       (void)getTypeConverter()->convertSignatureArgs(
           newRegion->getArgumentTypes(), result);
       rewriter.applySignatureConversion(newRegion, result);
     }
-    Operation* newOp = rewriter.create(state);
+    Operation *newOp = rewriter.create(state);
     rewriter.replaceOp(op, newOp->getResults());
     return success();
   }
@@ -93,25 +93,29 @@
   ConversionTarget target(getContext());
 
   // Operations are legal if they don't contain any illegal type.
-  target.markUnknownOpDynamicallyLegal([](Operation* op) {
+  target.markUnknownOpDynamicallyLegal([](Operation *op) {
     if (auto funcOp = dyn_cast<FunctionOpInterface>(op)) {
       for (Type type : funcOp.getArgumentTypes()) {
-        if (isIllegalType(type)) return false;
+        if (isIllegalType(type))
+          return false;
       }
       for (Type type : funcOp.getResultTypes()) {
-        if (isIllegalType(type)) return false;
+        if (isIllegalType(type))
+          return false;
       }
     }
     for (Type type : op->getResultTypes()) {
-      if (type && isIllegalType(type)) return false;
+      if (type && isIllegalType(type))
+        return false;
     }
     for (Type type : op->getOperandTypes()) {
-      if (type && isIllegalType(type)) return false;
+      if (type && isIllegalType(type))
+        return false;
     }
     return true;
   });
 
-  auto* ctx = &getContext();
+  auto *ctx = &getContext();
 
   RewritePatternSet patterns(&getContext());
   patterns.insert<GenericTypeConvert>(ctx, converter);
@@ -124,14 +128,14 @@
   }
 }
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>
 createStripSignednessPass() {
   return std::make_unique<StripSignednessPass>();
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/TensorPadToTensorInsertSlice.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/TensorPadToTensorInsertSlice.cpp
index b1ba127..147dcd6 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/TensorPadToTensorInsertSlice.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/TensorPadToTensorInsertSlice.cpp

@@ -44,7 +44,8 @@
     // scalar that is not one of the arguments of the linalg operation.
     Region &region = padTensorOp.getRegion();
     Block &block = region.front();
-    if (!llvm::hasSingleElement(block)) return failure();
+    if (!llvm::hasSingleElement(block))
+      return failure();
     auto yieldOp = cast<tensor::YieldOp>(block.getTerminator());
     Value yieldVal = yieldOp.getValue();
     if (llvm::any_of(block.getArguments(),
@@ -114,7 +115,7 @@
     return success();
   }
 
- private:
+private:
   // Option to skip the pattern when tensor.pad op has one use and is used by
   // a Linalg op.
   bool skipSingleLinalgOpUses = false;
@@ -152,19 +153,19 @@
     }
   }
 
- private:
+private:
   bool skipSingleLinalgOpUses;
 };
 
-}  // namespace
+} // namespace
 
-std::unique_ptr<Pass> createTensorPadToTensorInsertSlicePass(
-    bool skipSingleLinalgOpUses) {
+std::unique_ptr<Pass>
+createTensorPadToTensorInsertSlicePass(bool skipSingleLinalgOpUses) {
   return std::make_unique<TensorPadToTensorInsertSlicePass>(
       skipSingleLinalgOpUses);
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/VerifyInputLegality.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/VerifyInputLegality.cpp
index 07429ad..b7601c3 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/VerifyInputLegality.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/VerifyInputLegality.cpp

@@ -37,13 +37,13 @@
     }
   }
 };
-}  // namespace
+} // namespace
 
 std::unique_ptr<Pass> createVerifyInputLegalityPass() {
   return std::make_unique<VerifyInputLegalityPass>();
 }
 
-}  // namespace Flow
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Flow
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Analysis/BindingLayout.cpp b/compiler/src/iree/compiler/Dialect/HAL/Analysis/BindingLayout.cpp
index f8c8562..7ee3242 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Analysis/BindingLayout.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Analysis/BindingLayout.cpp

@@ -40,8 +40,8 @@
 }
 
 // Finds all dispatches within |rootOp| and groups them by executable export.
-static BindingLayoutAnalysis::ExportDispatchMap findAllDispatchSites(
-    Operation *rootOp) {
+static BindingLayoutAnalysis::ExportDispatchMap
+findAllDispatchSites(Operation *rootOp) {
   SymbolTable symbolTable(rootOp);
   BindingLayoutAnalysis::ExportDispatchMap dispatchMap;
   rootOp->walk([&](IREE::Stream::CmdDispatchOp dispatchOp) {
@@ -55,9 +55,9 @@
 }
 
 // Derives an pipeline layout from all of the dispatches to |exportOp|.
-static PipelineLayout deriveExportLayout(
-    IREE::Stream::ExecutableExportOp exportOp,
-    SmallVector<IREE::Stream::CmdDispatchOp> &dispatchOps) {
+static PipelineLayout
+deriveExportLayout(IREE::Stream::ExecutableExportOp exportOp,
+                   SmallVector<IREE::Stream::CmdDispatchOp> &dispatchOps) {
   auto funcOp = exportOp.lookupFunctionRef();
   assert(funcOp && "export target not found");
 
@@ -138,8 +138,9 @@
   return pipelineLayout;
 }
 
-static BindingLayoutAnalysis::ExportLayoutMap deriveExportLayouts(
-    Operation *rootOp, BindingLayoutAnalysis::ExportDispatchMap dispatchMap) {
+static BindingLayoutAnalysis::ExportLayoutMap
+deriveExportLayouts(Operation *rootOp,
+                    BindingLayoutAnalysis::ExportDispatchMap dispatchMap) {
   BindingLayoutAnalysis::ExportLayoutMap layoutMap;
   rootOp->walk([&](IREE::Stream::ExecutableExportOp exportOp) {
     auto &dispatchOps = dispatchMap[exportOp];
@@ -157,7 +158,8 @@
 BindingLayoutAnalysis::getExportDispatches(
     IREE::Stream::ExecutableExportOp exportOp) const {
   auto it = exportDispatches.find(exportOp);
-  if (it == exportDispatches.end()) return {};  // no dispatches
+  if (it == exportDispatches.end())
+    return {}; // no dispatches
   return it->second;
 }
 
@@ -168,7 +170,7 @@
   return it->second;
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Analysis/BindingLayout.h b/compiler/src/iree/compiler/Dialect/HAL/Analysis/BindingLayout.h
index 0847f76..c6daeae 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Analysis/BindingLayout.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Analysis/BindingLayout.h

@@ -58,7 +58,7 @@
 //
 // NOTE: erasing dispatch ops will invalidate this analysis.
 class BindingLayoutAnalysis {
- public:
+public:
   using ExportDispatchMap =
       DenseMap<Operation *, SmallVector<IREE::Stream::CmdDispatchOp>>;
   using ExportLayoutMap = DenseMap<Operation *, PipelineLayout>;
@@ -66,14 +66,14 @@
   explicit BindingLayoutAnalysis(Operation *rootOp);
 
   // Returns all of the dispatches to the given executable export.
-  SmallVector<IREE::Stream::CmdDispatchOp> getExportDispatches(
-      IREE::Stream::ExecutableExportOp exportOp) const;
+  SmallVector<IREE::Stream::CmdDispatchOp>
+  getExportDispatches(IREE::Stream::ExecutableExportOp exportOp) const;
 
   // Returns a layout used for the given executable export op.
-  const PipelineLayout &getPipelineLayout(
-      IREE::Stream::ExecutableExportOp exportOp) const;
+  const PipelineLayout &
+  getPipelineLayout(IREE::Stream::ExecutableExportOp exportOp) const;
 
- private:
+private:
   // All dispatches to a particular executable IREE::Stream::ExecutableExportOp.
   ExportDispatchMap exportDispatches;
   // Pipeline layout for each IREE::Stream::ExecutableExportOp.
@@ -81,9 +81,9 @@
   ExportLayoutMap exportLayouts;
 };
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_ANALYSIS_BINDINGLAYOUT_
+#endif // IREE_COMPILER_DIALECT_HAL_ANALYSIS_BINDINGLAYOUT_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/ConversionDialectInterface.h b/compiler/src/iree/compiler/Dialect/HAL/Conversion/ConversionDialectInterface.h
index ffb7dff..5824341 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/ConversionDialectInterface.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/ConversionDialectInterface.h

@@ -24,7 +24,7 @@
 // same dialect.
 class HALConversionDialectInterface
     : public DialectInterface::Base<HALConversionDialectInterface> {
- public:
+public:
   HALConversionDialectInterface(Dialect *dialect) : Base(dialect) {}
 
   // Populates |patterns| with rewrites that convert from a higher-level
@@ -43,7 +43,7 @@
   }
 };
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_CONVERSION_CONVERSIONDIALECTINTERFACE_H_
+#endif // IREE_COMPILER_DIALECT_HAL_CONVERSION_CONVERSIONDIALECTINTERFACE_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/ConversionTarget.cpp b/compiler/src/iree/compiler/Dialect/HAL/Conversion/ConversionTarget.cpp
index 6458e99..f1a1f74 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/ConversionTarget.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/ConversionTarget.cpp

@@ -76,5 +76,5 @@
   return success();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/ConversionTarget.h b/compiler/src/iree/compiler/Dialect/HAL/Conversion/ConversionTarget.h
index d9ab289..b41dd1f 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/ConversionTarget.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/ConversionTarget.h

@@ -20,14 +20,15 @@
 // A conversion target for the HAL dialect that ensures that tensor types are
 // fully removed. Conversions targeting the HAL dialect should always use this.
 class HALConversionTarget : public ConversionTarget {
- public:
+public:
   HALConversionTarget(MLIRContext *context, TypeConverter &typeConverter);
 
   // Attempts to rewrite an op that may use tensor values into an op using HAL
   // buffers. See HALOpConversion for more information.
-  static LogicalResult applyDefaultBufferRewrite(
-      Operation *srcOp, ValueRange operands, StringRef dstOpName,
-      TypeConverter &typeConverter, ConversionPatternRewriter &rewriter);
+  static LogicalResult
+  applyDefaultBufferRewrite(Operation *srcOp, ValueRange operands,
+                            StringRef dstOpName, TypeConverter &typeConverter,
+                            ConversionPatternRewriter &rewriter);
 };
 
 // HAL tensor-to-buffer conversion utility.
@@ -48,23 +49,23 @@
 //   my.buffer_op(%arg0_view : !hal.buffer_view)
 template <typename SRC, typename DST>
 class HALOpConversion : public OpConversionPattern<SRC> {
- public:
+public:
   HALOpConversion(MLIRContext *context, TypeConverter &typeConverter)
       : OpConversionPattern<SRC>(context), typeConverter(typeConverter) {}
 
-  LogicalResult matchAndRewrite(
-      SRC srcOp, typename SRC::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(SRC srcOp, typename SRC::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     return HALConversionTarget::applyDefaultBufferRewrite(
         srcOp, adaptor.getOperands(), DST::getOperationName(), typeConverter,
         rewriter);
   }
 
- protected:
+protected:
   TypeConverter &typeConverter;
 };
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_CONVERSION_CONVERSIONTARGET_H_
+#endif // IREE_COMPILER_DIALECT_HAL_CONVERSION_CONVERSIONTARGET_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToHAL/Patterns.cpp b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToHAL/Patterns.cpp
index 495cdaa..fad2d6e 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToHAL/Patterns.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToHAL/Patterns.cpp

@@ -19,9 +19,10 @@
 struct ConvertExecutableCalculateWorkgroupsOp
     : public OpConversionPattern<IREE::HAL::ExecutableCalculateWorkgroupsOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::ExecutableCalculateWorkgroupsOp calculateOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::ExecutableCalculateWorkgroupsOp calculateOp,
+                  OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto exportOp =
         SymbolTable::lookupNearestSymbolFrom<IREE::HAL::ExecutableExportOp>(
             calculateOp, calculateOp.getEntryPoint());
@@ -37,7 +38,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateHALToHALPatterns(MLIRContext *context,
                               ConversionTarget &conversionTarget,
@@ -48,5 +49,5 @@
                                                           context);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToHAL/Patterns.h b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToHAL/Patterns.h
index 4233e55..f8d16be 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToHAL/Patterns.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToHAL/Patterns.h

@@ -21,7 +21,7 @@
                               TypeConverter &typeConverter,
                               RewritePatternSet &patterns);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_CONVERSION_HALTOHAL_PATTERNS_H_
+#endif // IREE_COMPILER_DIALECT_HAL_CONVERSION_HALTOHAL_PATTERNS_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertAllocatorOps.cpp b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertAllocatorOps.cpp
index 2cdcec2..2b2ba05 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertAllocatorOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertAllocatorOps.cpp

@@ -15,7 +15,7 @@
 
 class AllocatorAllocateInitializedOpConversion
     : public OpConversionPattern<IREE::HAL::AllocatorAllocateInitializedOp> {
- public:
+public:
   AllocatorAllocateInitializedOpConversion(TypeConverter &typeConverter,
                                            MLIRContext *context,
                                            SymbolTable &importSymbols)
@@ -25,9 +25,10 @@
     assert(importOp);
   }
 
-  LogicalResult matchAndRewrite(
-      IREE::HAL::AllocatorAllocateInitializedOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::AllocatorAllocateInitializedOp op,
+                  OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto callOp = rewriter.replaceOpWithNewOp<IREE::VM::CallOp>(
         op, importOp.getName(),
         ArrayRef<Type>{
@@ -49,13 +50,13 @@
     return success();
   }
 
- private:
+private:
   mutable IREE::VM::ImportOp importOp;
 };
 
 class AllocatorTryMapOpConversion
     : public OpConversionPattern<IREE::HAL::AllocatorTryMapOp> {
- public:
+public:
   AllocatorTryMapOpConversion(TypeConverter &typeConverter,
                               MLIRContext *context, SymbolTable &importSymbols)
       : OpConversionPattern(typeConverter, context) {
@@ -64,9 +65,9 @@
     assert(importOp);
   }
 
-  LogicalResult matchAndRewrite(
-      IREE::HAL::AllocatorTryMapOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::AllocatorTryMapOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto callOp = rewriter.create<IREE::VM::CallOp>(
         op.getLoc(), importOp.getName(),
         ArrayRef<Type>{
@@ -93,11 +94,11 @@
     return success();
   }
 
- private:
+private:
   mutable IREE::VM::ImportOp importOp;
 };
 
-}  // namespace
+} // namespace
 
 void populateHALAllocatorToVMPatterns(MLIRContext *context,
                                       SymbolTable &importSymbols,
@@ -111,5 +112,5 @@
                                                importSymbols);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertBufferOps.cpp b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertBufferOps.cpp
index 0e70a4e..13d3eb7 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertBufferOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertBufferOps.cpp

@@ -15,7 +15,7 @@
 
 class BufferLoadOpConversion
     : public OpConversionPattern<IREE::HAL::BufferLoadOp> {
- public:
+public:
   BufferLoadOpConversion(MLIRContext *context, SymbolTable &importSymbols,
                          TypeConverter &typeConverter, StringRef importName)
       : OpConversionPattern(typeConverter, context) {
@@ -23,9 +23,9 @@
     assert(importOp);
   }
 
-  LogicalResult matchAndRewrite(
-      IREE::HAL::BufferLoadOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::BufferLoadOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto importType = importOp.getFunctionType();
 
     auto originalType = op.getResult().getType();
@@ -91,13 +91,13 @@
     return success();
   }
 
- private:
+private:
   mutable IREE::VM::ImportOp importOp;
 };
 
 class BufferStoreOpConversion
     : public OpConversionPattern<IREE::HAL::BufferStoreOp> {
- public:
+public:
   BufferStoreOpConversion(MLIRContext *context, SymbolTable &importSymbols,
                           TypeConverter &typeConverter, StringRef importName)
       : OpConversionPattern(context) {
@@ -105,9 +105,9 @@
     assert(importOp);
   }
 
-  LogicalResult matchAndRewrite(
-      IREE::HAL::BufferStoreOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::BufferStoreOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto importType = importOp.getFunctionType();
 
     auto elementType = op.getValue().getType();
@@ -171,7 +171,7 @@
     return success();
   }
 
- private:
+private:
   mutable IREE::VM::ImportOp importOp;
 };
 
@@ -191,5 +191,5 @@
                                            typeConverter, "hal.buffer.store");
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertBufferViewOps.cpp b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertBufferViewOps.cpp
index 209d7c5..2b15536 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertBufferViewOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertBufferViewOps.cpp

@@ -33,5 +33,5 @@
       context, importSymbols, typeConverter, "hal.buffer_view.trace");
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertChannelOps.cpp b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertChannelOps.cpp
index 8523092..6667100 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertChannelOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertChannelOps.cpp

@@ -23,5 +23,5 @@
       context, importSymbols, typeConverter, "hal.channel.rank_and_count");
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertCommandBufferOps.cpp b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertCommandBufferOps.cpp
index d4590e1..45ec186 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertCommandBufferOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertCommandBufferOps.cpp

@@ -21,7 +21,7 @@
 // operand set) but we need it for the fixed call signature.
 class CommandBufferCreateOpConversion
     : public OpConversionPattern<IREE::HAL::CommandBufferCreateOp> {
- public:
+public:
   CommandBufferCreateOpConversion(MLIRContext *context,
                                   SymbolTable &importSymbols,
                                   TypeConverter &typeConverter,
@@ -31,9 +31,9 @@
     assert(importOp);
   }
 
-  LogicalResult matchAndRewrite(
-      IREE::HAL::CommandBufferCreateOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::CommandBufferCreateOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto importType = importOp.getFunctionType();
 
     SmallVector<Value, 8> callOperands = {
@@ -41,12 +41,14 @@
     };
     auto modesValue = detail::rewriteAttrToOperands(
         op.getLoc(), adaptor.getModesAttr(), rewriter.getI32Type(), rewriter);
-    if (!modesValue.has_value()) return failure();
+    if (!modesValue.has_value())
+      return failure();
     callOperands.append(modesValue.value());
     auto categoriesValue = detail::rewriteAttrToOperands(
         op.getLoc(), adaptor.getCommandCategoriesAttr(), rewriter.getI32Type(),
         rewriter);
-    if (!categoriesValue.has_value()) return failure();
+    if (!categoriesValue.has_value())
+      return failure();
     callOperands.append(categoriesValue.value());
     if (adaptor.getBindingCapacity()) {
       callOperands.push_back(castToImportType(adaptor.getBindingCapacity(),
@@ -64,13 +66,13 @@
     return success();
   }
 
- private:
+private:
   mutable IREE::VM::ImportOp importOp;
 };
 
 class CommandBufferFillBufferOpConversion
     : public OpConversionPattern<IREE::HAL::CommandBufferFillBufferOp> {
- public:
+public:
   CommandBufferFillBufferOpConversion(MLIRContext *context,
                                       SymbolTable &importSymbols,
                                       TypeConverter &typeConverter,
@@ -80,9 +82,9 @@
     assert(importOp);
   }
 
-  LogicalResult matchAndRewrite(
-      IREE::HAL::CommandBufferFillBufferOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::CommandBufferFillBufferOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto importType = importOp.getFunctionType();
 
     SmallVector<Value, 8> callOperands = {
@@ -120,13 +122,13 @@
     return success();
   }
 
- private:
+private:
   mutable IREE::VM::ImportOp importOp;
 };
 
 class CommandBufferCollectiveOpConversion
     : public OpConversionPattern<IREE::HAL::CommandBufferCollectiveOp> {
- public:
+public:
   CommandBufferCollectiveOpConversion(MLIRContext *context,
                                       SymbolTable &importSymbols,
                                       TypeConverter &typeConverter,
@@ -136,9 +138,9 @@
     assert(importOp);
   }
 
-  LogicalResult matchAndRewrite(
-      IREE::HAL::CommandBufferCollectiveOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::CommandBufferCollectiveOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto importType = importOp.getFunctionType();
 
     Value nullBuffer;
@@ -212,13 +214,13 @@
     return success();
   }
 
- private:
+private:
   mutable IREE::VM::ImportOp importOp;
 };
 
 class CommandBufferPushDescriptorSetOpConversion
     : public OpConversionPattern<IREE::HAL::CommandBufferPushDescriptorSetOp> {
- public:
+public:
   CommandBufferPushDescriptorSetOpConversion(MLIRContext *context,
                                              SymbolTable &importSymbols,
                                              TypeConverter &typeConverter,
@@ -228,9 +230,10 @@
     assert(importOp);
   }
 
-  LogicalResult matchAndRewrite(
-      IREE::HAL::CommandBufferPushDescriptorSetOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::CommandBufferPushDescriptorSetOp op,
+                  OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto importType = importOp.getFunctionType();
 
     // Memoize zeros/nulls ala IndexSet.
@@ -297,11 +300,11 @@
     return success();
   }
 
- private:
+private:
   mutable IREE::VM::ImportOp importOp;
 };
 
-}  // namespace
+} // namespace
 
 void populateHALCommandBufferToVMPatterns(MLIRContext *context,
                                           SymbolTable &importSymbols,
@@ -344,5 +347,5 @@
           "hal.command_buffer.dispatch.indirect");
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertDeviceOps.cpp b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertDeviceOps.cpp
index d3ec3fc..321143c 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertDeviceOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertDeviceOps.cpp

@@ -16,18 +16,20 @@
 // i64 with a truncation on the result.
 class DeviceQueryIntCastOpConversion
     : public OpConversionPattern<IREE::HAL::DeviceQueryOp> {
- public:
+public:
   DeviceQueryIntCastOpConversion(MLIRContext *context,
                                  TypeConverter &typeConverter)
       : OpConversionPattern(typeConverter, context) {}
 
-  LogicalResult matchAndRewrite(
-      IREE::HAL::DeviceQueryOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::DeviceQueryOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // We only deal with in-dialect conversions to i32 in this pattern.
     auto targetType = op.getValue().getType();
-    if (targetType.isInteger(64)) return failure();
-    if (!targetType.isIntOrIndex()) return failure();
+    if (targetType.isInteger(64))
+      return failure();
+    if (!targetType.isIntOrIndex())
+      return failure();
 
     // Query as I64.
     // Note that due to type conversion we need to handle the default logic
@@ -78,7 +80,7 @@
 
 class DeviceQueryI64OpConversion
     : public OpConversionPattern<IREE::HAL::DeviceQueryOp> {
- public:
+public:
   DeviceQueryI64OpConversion(MLIRContext *context, SymbolTable &importSymbols,
                              TypeConverter &typeConverter, StringRef importName)
       : OpConversionPattern(typeConverter, context) {
@@ -86,13 +88,15 @@
     assert(importOp);
   }
 
-  LogicalResult matchAndRewrite(
-      IREE::HAL::DeviceQueryOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
-    if (!op.getValue().getType().isInteger(64)) return failure();
+  LogicalResult
+  matchAndRewrite(IREE::HAL::DeviceQueryOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    if (!op.getValue().getType().isInteger(64))
+      return failure();
     auto results =
         rewriteToCall(op, adaptor, importOp, *getTypeConverter(), rewriter);
-    if (!results.has_value()) return failure();
+    if (!results.has_value())
+      return failure();
     auto ok = results->front();
     auto value = results->back();
     if (op.getDefaultValue().has_value()) {
@@ -106,7 +110,7 @@
     return success();
   }
 
- private:
+private:
   mutable IREE::VM::ImportOp importOp;
 };
 
@@ -131,5 +135,5 @@
       context, importSymbols, typeConverter, "hal.device.queue.flush");
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertExecutableOps.cpp b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertExecutableOps.cpp
index 8034afd..7b15929 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertExecutableOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertExecutableOps.cpp

@@ -58,7 +58,8 @@
   // extra IR for the indices. We should batch them up and append in one go.
   for (auto constantValue : llvm::enumerate(constantValues)) {
     // Buffer is zero-initialized so we can skip zero values.
-    if (mlir::matchPattern(constantValue.value(), m_Zero())) continue;
+    if (mlir::matchPattern(constantValue.value(), m_Zero()))
+      continue;
     auto constantLoc = constantValue.value().getLoc();
     builder.create<IREE::VM::BufferStoreI32Op>(
         constantLoc, constantBuffer,
@@ -70,8 +71,9 @@
   return constantBuffer;
 }
 
-IREE::VM::RodataOp createExecutableBinaryRodata(
-    IREE::HAL::ExecutableBinaryOp binaryOp, OpBuilder &builder) {
+IREE::VM::RodataOp
+createExecutableBinaryRodata(IREE::HAL::ExecutableBinaryOp binaryOp,
+                             OpBuilder &builder) {
   auto executableOp =
       binaryOp.getOperation()->getParentOfType<IREE::HAL::ExecutableOp>();
   auto insertPoint = builder.saveInsertionPoint();
@@ -99,12 +101,12 @@
 
 class RemoveExecutableOpConversion
     : public OpConversionPattern<IREE::HAL::ExecutableOp> {
- public:
+public:
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      IREE::HAL::ExecutableOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::ExecutableOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.eraseOp(op);
     return success();
   }
@@ -112,7 +114,7 @@
 
 class ExecutableCreateOpConversion
     : public OpConversionPattern<IREE::HAL::ExecutableCreateOp> {
- public:
+public:
   ExecutableCreateOpConversion(MLIRContext *context, SymbolTable &importSymbols,
                                TypeConverter &typeConverter,
                                StringRef importName)
@@ -121,9 +123,9 @@
     assert(importOp);
   }
 
-  LogicalResult matchAndRewrite(
-      IREE::HAL::ExecutableCreateOp createOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::ExecutableCreateOp createOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Materialize vm.rodata for the binary.
     auto executableBinaryOp =
         SymbolTable::lookupNearestSymbolFrom<IREE::HAL::ExecutableBinaryOp>(
@@ -166,11 +168,11 @@
     return success();
   }
 
- private:
+private:
   mutable IREE::VM::ImportOp importOp;
 };
 
-}  // namespace
+} // namespace
 
 void populateHALExecutableToVMPatterns(MLIRContext *context,
                                        SymbolTable &importSymbols,
@@ -190,5 +192,5 @@
       context, importSymbols, typeConverter, "hal.pipeline_layout.create");
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertExperimentalOps.cpp b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertExperimentalOps.cpp
index 93b3f14..2490b67 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertExperimentalOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertExperimentalOps.cpp

@@ -19,5 +19,5 @@
       context, importSymbols, typeConverter, "hal.ex.shared_device");
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertFenceOps.cpp b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertFenceOps.cpp
index e9fe827..8915264 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertFenceOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertFenceOps.cpp

@@ -29,5 +29,5 @@
       context, importSymbols, typeConverter, "hal.fence.await");
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/Patterns.cpp b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/Patterns.cpp
index aabb8a4..f635d79 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/Patterns.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/Patterns.cpp

@@ -92,7 +92,7 @@
 // A pass converting the IREE HAL dialect into the IREE VM dialect.
 class ConvertHALToVMPass
     : public PassWrapper<ConvertHALToVMPass, OperationPass<ModuleOp>> {
- public:
+public:
   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ConvertHALToVMPass)
 
   explicit ConvertHALToVMPass(IREE::VM::TargetOptions targetOptions)
@@ -109,7 +109,8 @@
   }
 
   void runOnOperation() override {
-    if (getOperation().getBody()->empty()) return;
+    if (getOperation().getBody()->empty())
+      return;
     auto *context = &getContext();
 
     VMConversionTarget conversionTarget(context);
@@ -139,14 +140,14 @@
     }
   }
 
- private:
+private:
   IREE::VM::TargetOptions targetOptions_;
 };
 
-}  // namespace
+} // namespace
 
-std::unique_ptr<OperationPass<ModuleOp>> createConvertHALToVMPass(
-    IREE::VM::TargetOptions targetOptions) {
+std::unique_ptr<OperationPass<ModuleOp>>
+createConvertHALToVMPass(IREE::VM::TargetOptions targetOptions) {
   return std::make_unique<ConvertHALToVMPass>(targetOptions);
 }
 
@@ -155,5 +156,5 @@
   return std::make_unique<ConvertHALToVMPass>(options);
 });
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/Patterns.h b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/Patterns.h
index 84cb1f0..ade6480 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/Patterns.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/Patterns.h

@@ -25,10 +25,11 @@
                                  OpBuilder &builder);
 
 // Creates a vm.rodata containing the contents of a hal.executable.binary.
-IREE::VM::RodataOp createExecutableBinaryRodata(
-    IREE::HAL::ExecutableBinaryOp binaryOp, OpBuilder &builder);
+IREE::VM::RodataOp
+createExecutableBinaryRodata(IREE::HAL::ExecutableBinaryOp binaryOp,
+                             OpBuilder &builder);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_CONVERSION_HALTOVM_PATTERNS_H_
+#endif // IREE_COMPILER_DIALECT_HAL_CONVERSION_HALTOVM_PATTERNS_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/Passes.h b/compiler/src/iree/compiler/Dialect/HAL/Conversion/Passes.h
index 92734e2..e823f61 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/Passes.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/Passes.h

@@ -13,14 +13,14 @@
 namespace mlir {
 namespace iree_compiler {
 
-std::unique_ptr<OperationPass<ModuleOp>> createConvertHALToVMPass(
-    IREE::VM::TargetOptions targetOptions);
+std::unique_ptr<OperationPass<ModuleOp>>
+createConvertHALToVMPass(IREE::VM::TargetOptions targetOptions);
 
 inline void registerHALConversionPasses() {
   createConvertHALToVMPass(IREE::VM::TargetOptions::FromFlags::get());
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_CONVERSIONS_PASSES_H_
+#endif // IREE_COMPILER_DIALECT_HAL_CONVERSIONS_PASSES_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StandardToHAL/ConvertShapeOps.cpp b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StandardToHAL/ConvertShapeOps.cpp
index 1f976b1..59dd14d 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StandardToHAL/ConvertShapeOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StandardToHAL/ConvertShapeOps.cpp

@@ -19,9 +19,9 @@
 
 struct BufferViewDimPattern : public OpConversionPattern<tensor::DimOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      tensor::DimOp dimOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(tensor::DimOp dimOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (!llvm::isa<IREE::HAL::BufferViewType>(adaptor.getSource().getType())) {
       return failure();
     }
@@ -36,9 +36,9 @@
 
 struct BufferViewRankPattern : public OpConversionPattern<tensor::RankOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      tensor::RankOp rankOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(tensor::RankOp rankOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (!llvm::isa<IREE::HAL::BufferViewType>(adaptor.getTensor().getType())) {
       return failure();
     }
@@ -48,7 +48,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateStandardShapeToHALPatterns(MLIRContext *context,
                                         ConversionTarget &conversionTarget,
@@ -61,5 +61,5 @@
   patterns.insert<BufferViewDimPattern, BufferViewRankPattern>(context);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StandardToHAL/Patterns.cpp b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StandardToHAL/Patterns.cpp
index 6704eb2..7d0a447 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StandardToHAL/Patterns.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StandardToHAL/Patterns.cpp

@@ -30,5 +30,5 @@
                                      typeConverter);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StandardToHAL/Patterns.h b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StandardToHAL/Patterns.h
index 70b9724..ecbded9 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StandardToHAL/Patterns.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StandardToHAL/Patterns.h

@@ -19,7 +19,7 @@
                                    TypeConverter &typeConverter,
                                    RewritePatternSet &patterns);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_CONVERSION_STANDARDTOHAL_PATTERNS_H_
+#endif // IREE_COMPILER_DIALECT_HAL_CONVERSION_STANDARDTOHAL_PATTERNS_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/Patterns.cpp b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/Patterns.cpp
index 868d310..9e087cb 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/Patterns.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/Patterns.cpp

@@ -48,8 +48,8 @@
   return builder.create<arith::ConstantIntOp>(op->getLoc(), -1, 64);
 }
 
-static std::tuple<Value, Value> lookupDeviceAndQueueAffinityFor(
-    Operation *op, OpBuilder &builder) {
+static std::tuple<Value, Value>
+lookupDeviceAndQueueAffinityFor(Operation *op, OpBuilder &builder) {
   // NOTE: we have this combined method so that we can reuse any expensive
   // lookups we need to do. Today we aren't duplicating the lookups and don't
   // bother.
@@ -74,7 +74,8 @@
 // Returns the |timepointFence| or a util.null.
 static Value getOrCreateWaitFence(Location loc, Value timepointFence,
                                   OpBuilder &builder) {
-  if (timepointFence) return timepointFence;
+  if (timepointFence)
+    return timepointFence;
   return builder.create<IREE::Util::NullOp>(
       loc, builder.getType<IREE::HAL::FenceType>());
 }
@@ -85,12 +86,14 @@
 static Value consumeBoundFence(Value timepoint,
                                ConversionPatternRewriter &rewriter) {
   // Must only have one use. We can't consume a fence multiple times.
-  if (!timepoint.hasOneUse()) return nullptr;  // >1 use
+  if (!timepoint.hasOneUse())
+    return nullptr; // >1 use
 
   // The use must be an export to a fence.
   auto chainOp = dyn_cast<IREE::Stream::TimepointChainExternalOp>(
       *timepoint.getUsers().begin());
-  if (!chainOp) return nullptr;  // non-export use
+  if (!chainOp)
+    return nullptr; // non-export use
   assert(!chainOp.getExternalValues().empty());
   auto fence = chainOp.getExternalValues().front();
   if (!fence || !llvm::isa<IREE::HAL::FenceType>(fence.getType()))
@@ -104,13 +107,13 @@
   // that the functions could consume it but inlining is pretty aggressive now.
   if (!IREE::Util::isValueUsableForOp(fence, rewriter.getBlock(),
                                       rewriter.getInsertionPoint())) {
-    return nullptr;  // unusable
+    return nullptr; // unusable
   }
 
   // Consume the op by erasing it.
   rewriter.eraseOp(chainOp);
 
-  return fence;  // usable
+  return fence; // usable
 }
 
 // Returns the a new fence for |timepoint| or an existing fence if one was
@@ -128,7 +131,8 @@
   // Check to see if the timepoint is associated with a fence. In common cases
   // when along ABI boundaries we can usually find an association.
   auto fence = consumeBoundFence(timepoint, rewriter);
-  if (fence) return fence;
+  if (fence)
+    return fence;
 
   // Create a new fence.
   return rewriter.create<IREE::HAL::FenceCreateOp>(
@@ -137,8 +141,8 @@
 }
 
 // Scans all of the stream.cmd.* ops in the region to derive a command category.
-static IREE::HAL::CommandCategoryBitfield deriveCommandCategories(
-    Region &region) {
+static IREE::HAL::CommandCategoryBitfield
+deriveCommandCategories(Region &region) {
   auto bits = IREE::HAL::CommandCategoryBitfield::None;
   for (auto &block : region) {
     for (auto &op : block) {
@@ -165,49 +169,50 @@
 // This will fail if the resource type is not directly mappable to HAL bits.
 // The bits set here are those that must be set for the buffer to be used as the
 // buffer within the program with its defined resource lifetime.
-static LogicalResult deriveRequiredResourceBufferBits(
-    Location loc, IREE::Stream::ResourceType resourceType,
-    IREE::HAL::MemoryTypeBitfield &memoryTypes,
-    IREE::HAL::BufferUsageBitfield &bufferUsage) {
+static LogicalResult
+deriveRequiredResourceBufferBits(Location loc,
+                                 IREE::Stream::ResourceType resourceType,
+                                 IREE::HAL::MemoryTypeBitfield &memoryTypes,
+                                 IREE::HAL::BufferUsageBitfield &bufferUsage) {
   memoryTypes = IREE::HAL::MemoryTypeBitfield::None;
   bufferUsage = IREE::HAL::BufferUsageBitfield::None;
   switch (resourceType.getLifetime()) {
-    default:
-      return mlir::emitError(loc)
-             << "unsupported resource lifetime: "
-             << IREE::Stream::stringifyLifetime(resourceType.getLifetime());
-    case IREE::Stream::Lifetime::Constant:
-      // Device local; copies required to get into external resources.
-      memoryTypes = memoryTypes | IREE::HAL::MemoryTypeBitfield::DeviceLocal;
-      bufferUsage =
-          bufferUsage | IREE::HAL::BufferUsageBitfield::SharingImmutable;
-      break;
-    case IREE::Stream::Lifetime::Variable:
-      // Device local; copies required to get into external resources.
-      memoryTypes = memoryTypes | IREE::HAL::MemoryTypeBitfield::DeviceLocal;
-      break;
-    case IREE::Stream::Lifetime::External:
-      // We only require device-visible for external buffers (as we don't today
-      // do anything else with them on the host). They may be mappable for user
-      // convenience. Ideally they would have been placed in device-local memory
-      // but so long as they are device visible the program will execute
-      // correctly.
-      memoryTypes = memoryTypes | IREE::HAL::MemoryTypeBitfield::DeviceVisible;
-      break;
-    case IREE::Stream::Lifetime::Staging:
-      // Host local; copies required to get into device resources.
-      // We could vary this based on staging usage (upload/download) by
-      // making it device-local|host-visible, but host-local means we have
-      // a better chance of mapping it during uploads.
-      memoryTypes = memoryTypes | IREE::HAL::MemoryTypeBitfield::HostLocal |
-                    IREE::HAL::MemoryTypeBitfield::DeviceVisible;
-      bufferUsage = bufferUsage | IREE::HAL::BufferUsageBitfield::Transfer |
-                    IREE::HAL::BufferUsageBitfield::Mapping;
-      break;
-    case IREE::Stream::Lifetime::Transient:
-      // Device local; copies required to get into external resources.
-      memoryTypes = memoryTypes | IREE::HAL::MemoryTypeBitfield::DeviceLocal;
-      break;
+  default:
+    return mlir::emitError(loc)
+           << "unsupported resource lifetime: "
+           << IREE::Stream::stringifyLifetime(resourceType.getLifetime());
+  case IREE::Stream::Lifetime::Constant:
+    // Device local; copies required to get into external resources.
+    memoryTypes = memoryTypes | IREE::HAL::MemoryTypeBitfield::DeviceLocal;
+    bufferUsage =
+        bufferUsage | IREE::HAL::BufferUsageBitfield::SharingImmutable;
+    break;
+  case IREE::Stream::Lifetime::Variable:
+    // Device local; copies required to get into external resources.
+    memoryTypes = memoryTypes | IREE::HAL::MemoryTypeBitfield::DeviceLocal;
+    break;
+  case IREE::Stream::Lifetime::External:
+    // We only require device-visible for external buffers (as we don't today
+    // do anything else with them on the host). They may be mappable for user
+    // convenience. Ideally they would have been placed in device-local memory
+    // but so long as they are device visible the program will execute
+    // correctly.
+    memoryTypes = memoryTypes | IREE::HAL::MemoryTypeBitfield::DeviceVisible;
+    break;
+  case IREE::Stream::Lifetime::Staging:
+    // Host local; copies required to get into device resources.
+    // We could vary this based on staging usage (upload/download) by
+    // making it device-local|host-visible, but host-local means we have
+    // a better chance of mapping it during uploads.
+    memoryTypes = memoryTypes | IREE::HAL::MemoryTypeBitfield::HostLocal |
+                  IREE::HAL::MemoryTypeBitfield::DeviceVisible;
+    bufferUsage = bufferUsage | IREE::HAL::BufferUsageBitfield::Transfer |
+                  IREE::HAL::BufferUsageBitfield::Mapping;
+    break;
+  case IREE::Stream::Lifetime::Transient:
+    // Device local; copies required to get into external resources.
+    memoryTypes = memoryTypes | IREE::HAL::MemoryTypeBitfield::DeviceLocal;
+    break;
   }
 
   // TODO(benvanik): refine usage based on analysis.
@@ -222,10 +227,11 @@
 // The bits set here represent the superset of required and allowed bits and
 // are useful for providing buffers back to users via the ABI that may need to
 // be used for more than just what the internal program requires.
-static LogicalResult deriveAllowedResourceBufferBits(
-    Location loc, IREE::Stream::ResourceType resourceType,
-    IREE::HAL::MemoryTypeBitfield &memoryTypes,
-    IREE::HAL::BufferUsageBitfield &bufferUsage) {
+static LogicalResult
+deriveAllowedResourceBufferBits(Location loc,
+                                IREE::Stream::ResourceType resourceType,
+                                IREE::HAL::MemoryTypeBitfield &memoryTypes,
+                                IREE::HAL::BufferUsageBitfield &bufferUsage) {
   memoryTypes = IREE::HAL::MemoryTypeBitfield::None;
   bufferUsage = IREE::HAL::BufferUsageBitfield::None;
   if (failed(deriveRequiredResourceBufferBits(loc, resourceType, memoryTypes,
@@ -233,27 +239,27 @@
     return failure();
   }
   switch (resourceType.getLifetime()) {
-    default:
-      break;
-    case IREE::Stream::Lifetime::External:
-      // #yolo; these come from/go to outside the program.
-      // Today we assume they are device-local|host-visible just for
-      // practical purposes but that does not have to be true. We really
-      // want this to be something we analyze and handle on the edges
-      // (transferring devices/etc if needed).
-      memoryTypes = memoryTypes | IREE::HAL::MemoryTypeBitfield::DeviceLocal |
-                    IREE::HAL::MemoryTypeBitfield::HostVisible;
-      // NOTE: we may not map it but users may after they get them back.
-      // Another reason we should annotate this - having a buffer be
-      // mappable is potentially expensive (may get a 2nd copy in memory!).
-      bufferUsage = bufferUsage | IREE::HAL::BufferUsageBitfield::Mapping;
-      break;
+  default:
+    break;
+  case IREE::Stream::Lifetime::External:
+    // #yolo; these come from/go to outside the program.
+    // Today we assume they are device-local|host-visible just for
+    // practical purposes but that does not have to be true. We really
+    // want this to be something we analyze and handle on the edges
+    // (transferring devices/etc if needed).
+    memoryTypes = memoryTypes | IREE::HAL::MemoryTypeBitfield::DeviceLocal |
+                  IREE::HAL::MemoryTypeBitfield::HostVisible;
+    // NOTE: we may not map it but users may after they get them back.
+    // Another reason we should annotate this - having a buffer be
+    // mappable is potentially expensive (may get a 2nd copy in memory!).
+    bufferUsage = bufferUsage | IREE::HAL::BufferUsageBitfield::Mapping;
+    break;
   }
   return success();
 }
 
 class StreamConversionMapping {
- public:
+public:
   // Maps the stream dialect |executeOp| to the hal dialect |commandBuffer|
   // value used during recording. Patterns can use this to find the SSA value
   // they need to make hal.command_buffer.* ops.
@@ -279,7 +285,7 @@
     return it->second;
   }
 
- private:
+private:
   // Ops within stream.cmd.execute ops -> !hal.command_buffer.
   DenseMap<Operation *, Value> commandBuffers;
 };
@@ -298,9 +304,9 @@
 struct ResourceAllocOpPattern
     : public StreamConversionPattern<IREE::Stream::ResourceAllocOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::ResourceAllocOp allocOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::ResourceAllocOp allocOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto allocator = lookupAllocatorFor(allocOp, rewriter);
     auto bufferType = rewriter.getType<IREE::HAL::BufferType>();
 
@@ -331,9 +337,9 @@
 struct ResourceAllocaOpPattern
     : public StreamConversionPattern<IREE::Stream::ResourceAllocaOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::ResourceAllocaOp allocaOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::ResourceAllocaOp allocaOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto loc = allocaOp.getLoc();
     auto [device, queueAffinity] =
         lookupDeviceAndQueueAffinityFor(allocaOp, rewriter);
@@ -370,9 +376,10 @@
 struct ResourceDeallocaOpPattern
     : public StreamConversionPattern<IREE::Stream::ResourceDeallocaOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::ResourceDeallocaOp deallocaOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::ResourceDeallocaOp deallocaOp,
+                  OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto loc = deallocaOp.getLoc();
     auto [device, queueAffinity] =
         lookupDeviceAndQueueAffinityFor(deallocaOp, rewriter);
@@ -396,9 +403,9 @@
 struct ResourceSizeOpPattern
     : public StreamConversionPattern<IREE::Stream::ResourceSizeOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::ResourceSizeOp sizeOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::ResourceSizeOp sizeOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<IREE::HAL::BufferLengthOp>(
         sizeOp, rewriter.getIndexType(), adaptor.getOperand());
     return success();
@@ -408,9 +415,9 @@
 struct ResourceMapOpPattern
     : public StreamConversionPattern<IREE::Stream::ResourceMapOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::ResourceMapOp mapOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::ResourceMapOp mapOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto allocator = lookupAllocatorFor(mapOp, rewriter);
     auto bufferType = rewriter.getType<IREE::HAL::BufferType>();
 
@@ -432,9 +439,9 @@
 struct ResourceTryMapOpPattern
     : public StreamConversionPattern<IREE::Stream::ResourceTryMapOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::ResourceTryMapOp tryMapOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::ResourceTryMapOp tryMapOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto allocator = lookupAllocatorFor(tryMapOp, rewriter);
     auto resourceType =
         llvm::cast<IREE::Stream::ResourceType>(tryMapOp.getResult().getType());
@@ -443,26 +450,26 @@
     auto memoryTypes = IREE::HAL::MemoryTypeBitfield::None;
     auto bufferUsage = IREE::HAL::BufferUsageBitfield::None;
     switch (resourceType.getLifetime()) {
-      default:
-        return tryMapOp.emitOpError()
-               << "unsupported resource lifetime: "
-               << IREE::Stream::stringifyLifetime(resourceType.getLifetime());
-      case IREE::Stream::Lifetime::Constant:
-        // Device local; copies required to get into external resources.
-        memoryTypes = memoryTypes | IREE::HAL::MemoryTypeBitfield::DeviceLocal;
-        bufferUsage =
-            bufferUsage | IREE::HAL::BufferUsageBitfield::SharingImmutable;
-        break;
-      case IREE::Stream::Lifetime::Staging:
-        // Host local; copies required to get into device resources.
-        // We could vary this based on staging usage (upload/download) by
-        // making it device-local|host-visible, but host-local means we have
-        // a better chance of mapping it during uploads.
-        memoryTypes = memoryTypes | IREE::HAL::MemoryTypeBitfield::HostLocal |
-                      IREE::HAL::MemoryTypeBitfield::DeviceVisible;
-        bufferUsage = bufferUsage | IREE::HAL::BufferUsageBitfield::Transfer |
-                      IREE::HAL::BufferUsageBitfield::Mapping;
-        break;
+    default:
+      return tryMapOp.emitOpError()
+             << "unsupported resource lifetime: "
+             << IREE::Stream::stringifyLifetime(resourceType.getLifetime());
+    case IREE::Stream::Lifetime::Constant:
+      // Device local; copies required to get into external resources.
+      memoryTypes = memoryTypes | IREE::HAL::MemoryTypeBitfield::DeviceLocal;
+      bufferUsage =
+          bufferUsage | IREE::HAL::BufferUsageBitfield::SharingImmutable;
+      break;
+    case IREE::Stream::Lifetime::Staging:
+      // Host local; copies required to get into device resources.
+      // We could vary this based on staging usage (upload/download) by
+      // making it device-local|host-visible, but host-local means we have
+      // a better chance of mapping it during uploads.
+      memoryTypes = memoryTypes | IREE::HAL::MemoryTypeBitfield::HostLocal |
+                    IREE::HAL::MemoryTypeBitfield::DeviceVisible;
+      bufferUsage = bufferUsage | IREE::HAL::BufferUsageBitfield::Transfer |
+                    IREE::HAL::BufferUsageBitfield::Mapping;
+      break;
     }
 
     // TODO(benvanik): refine usage based on analysis.
@@ -480,9 +487,9 @@
 struct ResourceLoadOpPattern
     : public StreamConversionPattern<IREE::Stream::ResourceLoadOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::ResourceLoadOp loadOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::ResourceLoadOp loadOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto loadType =
         getTypeConverter()->convertType(loadOp.getResult().getType());
     rewriter.replaceOpWithNewOp<IREE::HAL::BufferLoadOp>(
@@ -494,9 +501,9 @@
 struct ResourceStoreOpPattern
     : public StreamConversionPattern<IREE::Stream::ResourceStoreOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::ResourceStoreOp storeOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::ResourceStoreOp storeOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<IREE::HAL::BufferStoreOp>(
         storeOp, adaptor.getValue(), adaptor.getTarget(),
         adaptor.getTargetOffset());
@@ -507,9 +514,9 @@
 struct ResourceSubviewOpPattern
     : public StreamConversionPattern<IREE::Stream::ResourceSubviewOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::ResourceSubviewOp subviewOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::ResourceSubviewOp subviewOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto bufferType = rewriter.getType<IREE::HAL::BufferType>();
     // NOTE: this aliases! We assume at this point all useful alias analysis
     // has been performed and it's fine to lose the tie information here.
@@ -525,10 +532,11 @@
 // buffer must have compatibility with our target device allocator and the
 // buffer must have at least the minimum expected size (additional padding is
 // ok).
-static LogicalResult buildStorageAssertions(
-    Location loc, Value buffer, StringAttr message, Value allocator,
-    Value minimumLength, IREE::Stream::ResourceType resourceType,
-    OpBuilder &builder) {
+static LogicalResult
+buildStorageAssertions(Location loc, Value buffer, StringAttr message,
+                       Value allocator, Value minimumLength,
+                       IREE::Stream::ResourceType resourceType,
+                       OpBuilder &builder) {
   auto memoryTypes = IREE::HAL::MemoryTypeBitfield::None;
   auto bufferUsage = IREE::HAL::BufferUsageBitfield::None;
   if (failed(deriveRequiredResourceBufferBits(loc, resourceType, memoryTypes,
@@ -550,9 +558,9 @@
 struct TensorImportBufferOpPattern
     : public StreamConversionPattern<IREE::Stream::TensorImportOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::TensorImportOp importOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::TensorImportOp importOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (!llvm::isa<IREE::HAL::BufferType>(importOp.getSource().getType())) {
       return failure();
     }
@@ -581,9 +589,9 @@
 struct TensorImportBufferViewOpPattern
     : public StreamConversionPattern<IREE::Stream::TensorImportOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::TensorImportOp importOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::TensorImportOp importOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto sourceType = importOp.getSource().getType();
     if (!llvm::isa<IREE::HAL::BufferViewType>(sourceType) &&
         !llvm::isa<TensorType>(sourceType)) {
@@ -617,9 +625,9 @@
 struct TensorExportBufferOpPattern
     : public StreamConversionPattern<IREE::Stream::TensorExportOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::TensorExportOp exportOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::TensorExportOp exportOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (!llvm::isa<IREE::HAL::BufferType>(exportOp.getResult().getType())) {
       return failure();
     }
@@ -631,9 +639,9 @@
 struct TensorExportBufferViewOpPattern
     : public StreamConversionPattern<IREE::Stream::TensorExportOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::TensorExportOp exportOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::TensorExportOp exportOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto targetType = exportOp.getResult().getType();
     if (!llvm::isa<IREE::HAL::BufferViewType>(targetType) &&
         !llvm::isa<TensorType>(targetType)) {
@@ -677,9 +685,9 @@
 struct TensorTraceOpPattern
     : public StreamConversionPattern<IREE::Stream::TensorTraceOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::TensorTraceOp traceOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::TensorTraceOp traceOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<IREE::HAL::BufferViewTraceOp>(
         traceOp, traceOp.getKeyAttr(), adaptor.getOperands());
     return success();
@@ -689,9 +697,9 @@
 struct CmdFlushOpPattern
     : public StreamConversionPattern<IREE::Stream::CmdFlushOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::CmdFlushOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::CmdFlushOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // TODO(benvanik): HAL command buffer op for flush.
     rewriter.eraseOp(op);
     return success();
@@ -701,9 +709,9 @@
 struct CmdInvalidateOpPattern
     : public StreamConversionPattern<IREE::Stream::CmdInvalidateOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::CmdInvalidateOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::CmdInvalidateOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // TODO(benvanik): HAL command buffer op for invalidate.
     rewriter.eraseOp(op);
     return success();
@@ -713,9 +721,9 @@
 struct CmdDiscardOpPattern
     : public StreamConversionPattern<IREE::Stream::CmdDiscardOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::CmdDiscardOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::CmdDiscardOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // TODO(benvanik): HAL command buffer op for discard.
     rewriter.eraseOp(op);
     return success();
@@ -725,9 +733,9 @@
 struct CmdFillOpPattern
     : public StreamConversionPattern<IREE::Stream::CmdFillOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::CmdFillOp fillOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::CmdFillOp fillOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto commandBuffer = mapping->lookupCommandBufferFor(fillOp);
     rewriter.replaceOpWithNewOp<IREE::HAL::CommandBufferFillBufferOp>(
         fillOp, commandBuffer, adaptor.getTarget(), adaptor.getTargetOffset(),
@@ -739,9 +747,9 @@
 struct CmdCopyOpPattern
     : public StreamConversionPattern<IREE::Stream::CmdCopyOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::CmdCopyOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::CmdCopyOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto commandBuffer = mapping->lookupCommandBufferFor(op);
     rewriter.replaceOpWithNewOp<IREE::HAL::CommandBufferCopyBufferOp>(
         op, commandBuffer, adaptor.getSource(), adaptor.getSourceOffset(),
@@ -751,12 +759,13 @@
 };
 
 // NOTE: this relies on the enums being the same today. Ew.
-static IREE::HAL::CollectiveAttr convertCollectiveAttr(
-    IREE::Stream::CollectiveAttr sourceAttr) {
+static IREE::HAL::CollectiveAttr
+convertCollectiveAttr(IREE::Stream::CollectiveAttr sourceAttr) {
   auto convertReductionOp =
       [](std::optional<IREE::Stream::CollectiveReductionOp> op)
       -> std::optional<IREE::HAL::CollectiveReductionOp> {
-    if (!op.has_value()) return std::nullopt;
+    if (!op.has_value())
+      return std::nullopt;
     return static_cast<IREE::HAL::CollectiveReductionOp>(op.value());
   };
   return IREE::HAL::CollectiveAttr::get(
@@ -770,9 +779,9 @@
 struct CmdCollectiveOpPattern
     : public StreamConversionPattern<IREE::Stream::CmdCollectiveOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::CmdCollectiveOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::CmdCollectiveOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto commandBuffer = mapping->lookupCommandBufferFor(op);
 
     Value sendBuffer;
@@ -782,27 +791,27 @@
     Value recvOffset;
     Value recvLength;
     switch (adaptor.getOp().getKind()) {
-      default:
-        assert(adaptor.getResources().size() == 2 && "should have verified");
-        sendBuffer = adaptor.getResources()[0];
-        sendOffset = adaptor.getResourceOffsets()[0];
-        sendLength = adaptor.getResourceLengths()[0];
-        recvBuffer = adaptor.getResources()[1];
-        recvOffset = adaptor.getResourceOffsets()[1];
-        recvLength = adaptor.getResourceLengths()[1];
-        break;
-      case IREE::Stream::CollectiveKind::Send:
-        assert(adaptor.getResources().size() == 1 && "should have verified");
-        sendBuffer = adaptor.getResources()[0];
-        sendOffset = adaptor.getResourceOffsets()[0];
-        sendLength = adaptor.getResourceLengths()[0];
-        break;
-      case IREE::Stream::CollectiveKind::Recv:
-        assert(adaptor.getResources().size() == 1 && "should have verified");
-        recvBuffer = adaptor.getResources()[0];
-        recvOffset = adaptor.getResourceOffsets()[0];
-        recvLength = adaptor.getResourceLengths()[0];
-        break;
+    default:
+      assert(adaptor.getResources().size() == 2 && "should have verified");
+      sendBuffer = adaptor.getResources()[0];
+      sendOffset = adaptor.getResourceOffsets()[0];
+      sendLength = adaptor.getResourceLengths()[0];
+      recvBuffer = adaptor.getResources()[1];
+      recvOffset = adaptor.getResourceOffsets()[1];
+      recvLength = adaptor.getResourceLengths()[1];
+      break;
+    case IREE::Stream::CollectiveKind::Send:
+      assert(adaptor.getResources().size() == 1 && "should have verified");
+      sendBuffer = adaptor.getResources()[0];
+      sendOffset = adaptor.getResourceOffsets()[0];
+      sendLength = adaptor.getResourceLengths()[0];
+      break;
+    case IREE::Stream::CollectiveKind::Recv:
+      assert(adaptor.getResources().size() == 1 && "should have verified");
+      recvBuffer = adaptor.getResources()[0];
+      recvOffset = adaptor.getResourceOffsets()[0];
+      recvLength = adaptor.getResourceLengths()[0];
+      break;
     }
 
     rewriter.replaceOpWithNewOp<IREE::HAL::CommandBufferCollectiveOp>(
@@ -815,8 +824,8 @@
 };
 
 // Returns a hal.device.switch match expression that selects the given export.
-static Attribute getExportConditionAttr(
-    IREE::HAL::ExecutableExportOp exportOp) {
+static Attribute
+getExportConditionAttr(IREE::HAL::ExecutableExportOp exportOp) {
   // TODO(benvanik): customizable selection logic. Today this just checks
   // whether the variant target is supported but we can also allow
   // specialization of entry points based on dispatch site parameters.
@@ -827,9 +836,9 @@
 struct CmdDispatchOpPattern
     : public StreamConversionPattern<IREE::Stream::CmdDispatchOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::CmdDispatchOp dispatchOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::CmdDispatchOp dispatchOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto loc = dispatchOp.getLoc();
     auto commandBuffer = mapping->lookupCommandBufferFor(dispatchOp);
 
@@ -891,7 +900,7 @@
     // TODO(#5322): symbolic push constant names on the hal.interface so we can
     // sparsely pack these.
     if (!adaptor.getUniformOperands().empty()) {
-      int pushConstantBase = 0;  // always 0 today
+      int pushConstantBase = 0; // always 0 today
       SmallVector<Value> pushConstants;
       for (auto operand : adaptor.getUniformOperands()) {
         assert(
@@ -922,7 +931,8 @@
       auto bindingAttr =
           llvm::cast<IREE::HAL::InterfaceBindingAttr>(bindingAttrs[i]);
       int64_t set = bindingAttr.getSet();
-      if (currentSet != -1 && currentSet != set) flushSet();
+      if (currentSet != -1 && currentSet != set)
+        flushSet();
       currentSet = set;
       IREE::HAL::DescriptorSetBindingValue binding;
       binding.ordinal =
@@ -932,20 +942,21 @@
       binding.byteLength = adaptor.getResourceLengths()[i];
       bindings.push_back(binding);
     }
-    if (currentSet != -1) flushSet();
+    if (currentSet != -1)
+      flushSet();
   }
 };
 
 struct CmdFuncOpPattern
     : public StreamConversionPattern<IREE::Stream::CmdFuncOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::CmdFuncOp funcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::CmdFuncOp funcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     SmallVector<Type> newArgTypes;
     SmallVector<DictionaryAttr> newArgAttrs;
     newArgTypes.push_back(rewriter.getType<IREE::HAL::CommandBufferType>());
-    newArgAttrs.push_back(rewriter.getDictionaryAttr({}));  // command buffer
+    newArgAttrs.push_back(rewriter.getDictionaryAttr({})); // command buffer
     funcOp.getAllArgAttrs(newArgAttrs);
     SmallVector<Type> newResultTypes;
     if (failed(getTypeConverter()->convertTypes(funcOp.getArgumentTypes(),
@@ -969,9 +980,9 @@
 struct CmdCallOpPattern
     : public StreamConversionPattern<IREE::Stream::CmdCallOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::CmdCallOp callOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::CmdCallOp callOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto commandBuffer = mapping->lookupCommandBufferFor(callOp);
 
     // Always pass the command buffer as the first arg.
@@ -1027,9 +1038,11 @@
   // Note that we can't mutate the block while iterating it so we first grab
   // all the original ops.
   SmallVector<Operation *> serialOps;
-  for (auto &op : block) serialOps.push_back(&op);
+  for (auto &op : block)
+    serialOps.push_back(&op);
   for (auto *op : serialOps) {
-    if (op->hasTrait<OpTrait::IsTerminator>()) continue;
+    if (op->hasTrait<OpTrait::IsTerminator>())
+      continue;
     builder.setInsertionPointAfter(op);
     builder.create<IREE::HAL::CommandBufferExecutionBarrierOp>(
         loc, commandBuffer, sourceStage, targetStage, flags);
@@ -1039,9 +1052,9 @@
 struct CmdExecuteOpPattern
     : public StreamConversionPattern<IREE::Stream::CmdExecuteOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::CmdExecuteOp executeOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::CmdExecuteOp executeOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto loc = executeOp.getLoc();
     auto [device, queueAffinity] =
         lookupDeviceAndQueueAffinityFor(executeOp, rewriter);
@@ -1101,9 +1114,9 @@
 struct CmdSerialOpPattern
     : public StreamConversionPattern<IREE::Stream::CmdSerialOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::CmdSerialOp serialOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::CmdSerialOp serialOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto commandBuffer = mapping->lookupCommandBufferFor(serialOp);
 
     // Run through the execution region and serialize execution by inserting
@@ -1122,9 +1135,9 @@
 struct CmdConcurrentOpPattern
     : public StreamConversionPattern<IREE::Stream::CmdConcurrentOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::CmdConcurrentOp concurrentOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::CmdConcurrentOp concurrentOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Inline the concurrent execution region.
     // TODO(benvanik): split barriers (event set/wait) when nesting.
     rewriter.inlineBlockBefore(&concurrentOp.getBody().front(), concurrentOp);
@@ -1136,9 +1149,10 @@
 struct TimepointImmediateOpPattern
     : public StreamConversionPattern<IREE::Stream::TimepointImmediateOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::TimepointImmediateOp immediateOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::TimepointImmediateOp immediateOp,
+                  OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<IREE::Util::NullOp>(
         immediateOp, rewriter.getType<IREE::HAL::FenceType>());
     return success();
@@ -1148,9 +1162,9 @@
 struct TimepointImportOpPattern
     : public StreamConversionPattern<IREE::Stream::TimepointImportOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::TimepointImportOp importOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::TimepointImportOp importOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Only handle imports from HAL semaphores _or_ fences.
     auto operands = adaptor.getOperands();
     if (operands.size() == 1 &&
@@ -1167,9 +1181,9 @@
 struct TimepointExportOpPattern
     : public StreamConversionPattern<IREE::Stream::TimepointExportOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::TimepointExportOp exportOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::TimepointExportOp exportOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Only handle exports into HAL fences.
     if (exportOp.getNumResults() != 1 ||
         !llvm::isa<IREE::HAL::FenceType>(exportOp.getResult(0).getType())) {
@@ -1184,9 +1198,10 @@
 struct TimepointChainExternalOpPattern
     : public StreamConversionPattern<IREE::Stream::TimepointChainExternalOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::TimepointChainExternalOp exportOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::TimepointChainExternalOp exportOp,
+                  OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Only handle exports into HAL fences.
     auto externalValues = exportOp.getExternalValues();
     if (externalValues.size() != 1 ||
@@ -1207,9 +1222,9 @@
 struct TimepointJoinOpPattern
     : public StreamConversionPattern<IREE::Stream::TimepointJoinOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::TimepointJoinOp joinOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::TimepointJoinOp joinOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<IREE::HAL::FenceJoinOp>(
         joinOp, rewriter.getType<IREE::HAL::FenceType>(),
         adaptor.getAwaitTimepoints());
@@ -1220,9 +1235,9 @@
 struct TimepointBarrierOpPattern
     : public StreamConversionPattern<IREE::Stream::TimepointBarrierOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::TimepointBarrierOp barrierOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::TimepointBarrierOp barrierOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Replace with a signaled fence.
     // NOTE: this assumes that if this op still exists the input resource is
     // already available. If it isn't then timepoint propagation should have
@@ -1237,9 +1252,9 @@
 struct TimepointAwaitOpPattern
     : public StreamConversionPattern<IREE::Stream::TimepointAwaitOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::TimepointAwaitOp awaitOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::TimepointAwaitOp awaitOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto loc = awaitOp.getLoc();
 
     // Perform the blocking wait.
@@ -1258,9 +1273,9 @@
 struct ChannelCreateOpPattern
     : public StreamConversionPattern<IREE::Stream::ChannelCreateOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::ChannelCreateOp createOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::ChannelCreateOp createOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto [device, queueAffinity] =
         lookupDeviceAndQueueAffinityFor(createOp, rewriter);
     Value neg1I32;
@@ -1310,9 +1325,9 @@
 struct ChannelSplitOpPattern
     : public StreamConversionPattern<IREE::Stream::ChannelSplitOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::ChannelSplitOp splitOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::ChannelSplitOp splitOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Value color = rewriter.create<arith::IndexCastOp>(
         splitOp.getLoc(), rewriter.getI32Type(), adaptor.getColor());
     Value key = rewriter.create<arith::IndexCastOp>(
@@ -1328,9 +1343,9 @@
 struct ChannelRankOpPattern
     : public StreamConversionPattern<IREE::Stream::ChannelRankOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::ChannelRankOp rankOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::ChannelRankOp rankOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto newOp = rewriter.create<IREE::HAL::ChannelRankAndCountOp>(
         rankOp.getLoc(), rewriter.getI32Type(), rewriter.getI32Type(),
         adaptor.getChannel());
@@ -1344,9 +1359,9 @@
 struct ChannelCountOpPattern
     : public StreamConversionPattern<IREE::Stream::ChannelCountOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::ChannelCountOp countOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::ChannelCountOp countOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto newOp = rewriter.create<IREE::HAL::ChannelRankAndCountOp>(
         countOp.getLoc(), rewriter.getI32Type(), rewriter.getI32Type(),
         adaptor.getChannel());
@@ -1360,9 +1375,9 @@
 struct ElideYieldOpPattern
     : public StreamConversionPattern<IREE::Stream::YieldOp> {
   using StreamConversionPattern::StreamConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::YieldOp yieldOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::YieldOp yieldOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.eraseOp(yieldOp);
     return success();
   }
@@ -1373,11 +1388,12 @@
 struct GlobalTimepointConversionPattern
     : public OpConversionPattern<IREE::Util::GlobalOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Util::GlobalOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::GlobalOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto initialValue = op.getInitialValue();
-    if (!initialValue.has_value()) return failure();
+    if (!initialValue.has_value())
+      return failure();
     if (!llvm::isa<IREE::Stream::TimepointAttr>(*initialValue))
       return failure();
     rewriter.updateRootInPlace(op, [&]() { op.removeInitialValueAttr(); });
@@ -1385,7 +1401,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateStreamToHALPatterns(MLIRContext *context,
                                  ConversionTarget &conversionTarget,
@@ -1441,5 +1457,5 @@
   patterns.insert<ElideYieldOpPattern>(mapping, typeConverter, context);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/Patterns.h b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/Patterns.h
index b39554a..5c61396 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/Patterns.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/Patterns.h

@@ -19,7 +19,7 @@
                                  TypeConverter &typeConverter,
                                  RewritePatternSet &patterns);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_CONVERSION_STREAMTOHAL_PATTERNS_H_
+#endif // IREE_COMPILER_DIALECT_HAL_CONVERSION_STREAMTOHAL_PATTERNS_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/TypeConverter.cpp b/compiler/src/iree/compiler/Dialect/HAL/Conversion/TypeConverter.cpp
index a0a504b..d143875 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/TypeConverter.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/TypeConverter.cpp

@@ -85,5 +85,5 @@
   });
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/TypeConverter.h b/compiler/src/iree/compiler/Dialect/HAL/Conversion/TypeConverter.h
index 4c57d9f..5ea5a1c 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/TypeConverter.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/TypeConverter.h

@@ -17,7 +17,7 @@
 namespace iree_compiler {
 
 class HALTypeConverter : public TypeConverter {
- public:
+public:
   explicit HALTypeConverter(
       ArrayRef<const HALConversionDialectInterface *> conversionInterfaces);
 
@@ -32,12 +32,12 @@
     return false;
   }
 
- private:
+private:
   // The set of dialect conversion interfaces we should query to convert types.
   std::vector<const HALConversionDialectInterface *> conversionInterfaces;
 };
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_CONVERSION_TYPECONVERTER_H_
+#endif // IREE_COMPILER_DIALECT_HAL_CONVERSION_TYPECONVERTER_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/UtilToHAL/Patterns.cpp b/compiler/src/iree/compiler/Dialect/HAL/Conversion/UtilToHAL/Patterns.cpp
index ef9600b..8ffc23d 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/UtilToHAL/Patterns.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/UtilToHAL/Patterns.cpp

@@ -22,11 +22,12 @@
 struct GlobalConversionPattern
     : public OpConversionPattern<IREE::Util::GlobalOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Util::GlobalOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::GlobalOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto newType = getTypeConverter()->convertType(op.getType());
-    if (newType == op.getType()) return failure();
+    if (newType == op.getType())
+      return failure();
     rewriter.updateRootInPlace(op, [&]() {
       // NOTE: the initial value may be invalid here! We rely on
       // dialect-specific conversions to handle it.
@@ -36,7 +37,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateUtilToHALPatterns(MLIRContext *context,
                                ConversionTarget &conversionTarget,
@@ -62,5 +63,5 @@
                                               typeConverter, patterns);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/UtilToHAL/Patterns.h b/compiler/src/iree/compiler/Dialect/HAL/Conversion/UtilToHAL/Patterns.h
index ecdf1ce..d10202c 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/UtilToHAL/Patterns.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/UtilToHAL/Patterns.h

@@ -21,7 +21,7 @@
                                TypeConverter &typeConverter,
                                RewritePatternSet &patterns);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_CONVERSION_UTILTOHAL_PATTERNS_H_
+#endif // IREE_COMPILER_DIALECT_HAL_CONVERSION_UTILTOHAL_PATTERNS_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALDialect.cpp b/compiler/src/iree/compiler/Dialect/HAL/IR/HALDialect.cpp
index b723253..7d76552 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALDialect.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALDialect.cpp

@@ -78,7 +78,7 @@
 };
 
 class HALToVMConversionInterface : public VMConversionDialectInterface {
- public:
+public:
   using VMConversionDialectInterface::VMConversionDialectInterface;
 
   OwningOpRef<mlir::ModuleOp> parseVMImportModule() const override {
@@ -88,10 +88,11 @@
         getDialect()->getContext());
   }
 
-  void populateVMConversionPatterns(
-      SymbolTable &importSymbols, RewritePatternSet &patterns,
-      ConversionTarget &conversionTarget,
-      TypeConverter &typeConverter) const override {
+  void
+  populateVMConversionPatterns(SymbolTable &importSymbols,
+                               RewritePatternSet &patterns,
+                               ConversionTarget &conversionTarget,
+                               TypeConverter &typeConverter) const override {
     populateHALToVMPatterns(getDialect()->getContext(), importSymbols, patterns,
                             typeConverter);
   }
@@ -121,7 +122,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 HALDialect::HALDialect(MLIRContext *context)
     : Dialect(getDialectNamespace(), context, TypeID::get<HALDialect>()) {
@@ -154,7 +155,7 @@
   return nullptr;
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALDialect.h b/compiler/src/iree/compiler/Dialect/HAL/IR/HALDialect.h
index 2a9e824..33f59d3 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALDialect.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALDialect.h

@@ -16,7 +16,7 @@
 namespace HAL {
 
 class HALDialect : public Dialect {
- public:
+public:
   explicit HALDialect(MLIRContext *context);
   static StringRef getDialectNamespace() { return "hal"; }
 
@@ -29,14 +29,14 @@
   Operation *materializeConstant(OpBuilder &builder, Attribute value, Type type,
                                  Location loc) override;
 
- private:
+private:
   void registerAttributes();
   void registerTypes();
 };
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_IR_HALDIALECT_H_
+#endif // IREE_COMPILER_DIALECT_HAL_IR_HALDIALECT_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALOpFolders.cpp b/compiler/src/iree/compiler/Dialect/HAL/IR/HALOpFolders.cpp
index 798779d..0ae6e58 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALOpFolders.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALOpFolders.cpp

@@ -43,13 +43,14 @@
       : OpRewritePattern<Op>(context, /*benefit=*/1000) {}
   LogicalResult matchAndRewrite(Op op,
                                 PatternRewriter &rewriter) const override {
-    if (!op.use_empty()) return failure();
+    if (!op.use_empty())
+      return failure();
     rewriter.eraseOp(op);
     return success();
   }
 };
 
-}  // namespace
+} // namespace
 
 //===----------------------------------------------------------------------===//
 // hal.tensor.import/export
@@ -87,16 +88,18 @@
   using OpRewritePattern::OpRewritePattern;
   LogicalResult matchAndRewrite(TensorBarrierOp op,
                                 PatternRewriter &rewriter) const override {
-    DenseMap<Value, unsigned> uniqueSources;  // source -> unique index
+    DenseMap<Value, unsigned> uniqueSources; // source -> unique index
     SmallVector<Value> orderedSources;
-    SmallVector<unsigned> resultMapping;  // old -> new result index
+    SmallVector<unsigned> resultMapping; // old -> new result index
     for (auto source : op.getSources()) {
       auto it =
           uniqueSources.insert(std::make_pair(source, orderedSources.size()));
-      if (it.second) orderedSources.push_back(source);
+      if (it.second)
+        orderedSources.push_back(source);
       resultMapping.push_back(it.first->second);
     }
-    if (orderedSources.size() == op.getSources().size()) return failure();
+    if (orderedSources.size() == op.getSources().size())
+      return failure();
     auto newOp = rewriter.create<TensorBarrierOp>(op.getLoc(), orderedSources,
                                                   op.getSignalFence());
     SmallVector<Value> newResults;
@@ -109,7 +112,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void TensorBarrierOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                   MLIRContext *context) {
@@ -142,7 +145,8 @@
       needsUpdate = true;
     }
     rewriter.restoreInsertionPoint(ip);
-    if (!needsUpdate) return failure();
+    if (!needsUpdate)
+      return failure();
     rewriter.updateRootInPlace(op, [&]() {
       op.getSourceBufferMutable().assign(newSourceBuffer);
       op.getSourceOffsetMutable().assign(newSourceOffset);
@@ -151,7 +155,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void BufferViewCreateOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                      MLIRContext *context) {
@@ -176,7 +180,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void BufferViewBufferOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                      MLIRContext *context) {
@@ -224,7 +228,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void CommandBufferDeviceOp::getCanonicalizationPatterns(
     RewritePatternSet &results, MLIRContext *context) {
@@ -254,7 +258,8 @@
       needsUpdate = true;
     }
     rewriter.restoreInsertionPoint(ip);
-    if (!needsUpdate) return failure();
+    if (!needsUpdate)
+      return failure();
     rewriter.updateRootInPlace(op, [&]() {
       op.getTargetBufferMutable().assign(newTargetBuffer);
       op.getTargetOffsetMutable().assign(newTargetOffset);
@@ -263,7 +268,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void CommandBufferFillBufferOp::getCanonicalizationPatterns(
     RewritePatternSet &results, MLIRContext *context) {
@@ -303,7 +308,8 @@
       needsUpdate = true;
     }
     rewriter.restoreInsertionPoint(ip);
-    if (!needsUpdate) return failure();
+    if (!needsUpdate)
+      return failure();
     rewriter.updateRootInPlace(op, [&]() {
       op.getSourceBufferMutable().assign(newSourceBuffer);
       op.getSourceOffsetMutable().assign(newSourceOffset);
@@ -314,7 +320,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void CommandBufferCopyBufferOp::getCanonicalizationPatterns(
     RewritePatternSet &results, MLIRContext *context) {
@@ -338,7 +344,8 @@
     auto bindingOffsets = llvm::to_vector(op.getBindingOffsets());
     for (size_t i = 0; i < bindingBuffers.size(); ++i) {
       auto *definingOp = bindingBuffers[i].getDefiningOp();
-      if (!definingOp) continue;
+      if (!definingOp)
+        continue;
       if (auto subspanOp = dyn_cast<BufferSubspanOp>(definingOp)) {
         needsUpdate = true;
         bindingBuffers[i] = subspanOp.getSourceBuffer();
@@ -347,7 +354,8 @@
       }
     }
     rewriter.restoreInsertionPoint(ip);
-    if (!needsUpdate) return failure();
+    if (!needsUpdate)
+      return failure();
     rewriter.updateRootInPlace(op, [&]() {
       auto mutableBindingBuffers = op.getBindingBuffersMutable();
       mutableBindingBuffers.clear();
@@ -360,7 +368,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void CommandBufferPushDescriptorSetOp::getCanonicalizationPatterns(
     RewritePatternSet &results, MLIRContext *context) {
@@ -392,15 +400,18 @@
 // same basic block. We need an abstract interpreter to do much more as we'd
 // need to track conditionals/branching logic.
 static bool isOpAlwaysExecutedWith(Operation *before, Operation *after) {
-  if (before == after) return true;
-  if (before->getBlock() != after->getBlock()) return false;
+  if (before == after)
+    return true;
+  if (before->getBlock() != after->getBlock())
+    return false;
   return before->isBeforeInBlock(after);
 }
 
 // Returns true if |op| was hoisted before |insertBefore| without breaking
 // SSA invariants. Returns false if no IR modifications were made.
 static bool tryHoistOpBeforeUser(Operation *op, Operation *insertBefore) {
-  if (op == insertBefore) return false;
+  if (op == insertBefore)
+    return false;
 
   // Currently conservative - should be doing a domination check.
   if (op->getBlock() != insertBefore->getBlock()) {
@@ -437,7 +448,8 @@
   LogicalResult matchAndRewrite(DeviceQueueExecuteOp barrierOp,
                                 PatternRewriter &rewriter) const override {
     // Only looking for ops performing basic barriers.
-    if (!barrierOp.isBarrier()) return failure();
+    if (!barrierOp.isBarrier())
+      return failure();
 
     // Check for whether we know the wait fence is immediately resolved in the
     // local scope. A more involved data flow analysis would let us handle more
@@ -469,7 +481,8 @@
   LogicalResult matchAndRewrite(DeviceQueueExecuteOp barrierOp,
                                 PatternRewriter &rewriter) const override {
     // Only looking for ops performing basic barriers.
-    if (!barrierOp.isBarrier()) return failure();
+    if (!barrierOp.isBarrier())
+      return failure();
 
     // See if we can observe the original fence creation in the local scope.
     auto waitFence = barrierOp.getWaitFence();
@@ -529,7 +542,8 @@
   LogicalResult matchAndRewrite(DeviceQueueExecuteOp barrierOp,
                                 PatternRewriter &rewriter) const override {
     // Only looking for ops performing basic barriers.
-    if (!barrierOp.isBarrier()) return failure();
+    if (!barrierOp.isBarrier())
+      return failure();
 
     // We're looking at the wait fence on the barrier back up to the signal
     // operation on that fence.
@@ -624,7 +638,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void DeviceQueueExecuteOp::getCanonicalizationPatterns(
     RewritePatternSet &results, MLIRContext *context) {
@@ -661,9 +675,11 @@
   // Get all of the return ops - if there's only one then the requirement is
   // already satisfied and we can exit early.
   auto returnOps = llvm::to_vector(region.getOps<IREE::HAL::ReturnOp>());
-  if (returnOps.size() <= 1) return;  // no-op
+  if (returnOps.size() <= 1)
+    return; // no-op
   SmallVector<Location> returnLocs;
-  for (auto returnOp : returnOps) returnLocs.push_back(returnOp.getLoc());
+  for (auto returnOp : returnOps)
+    returnLocs.push_back(returnOp.getLoc());
 
   // Create the new exit block with arguments matching 1:1 with results.
   auto anyReturnOp = returnOps.front();
@@ -708,7 +724,8 @@
     SmallVector<Location> resultLocs;
     for (auto blockOp : blockOps) {
       blockLocs.push_back(blockOp.getLoc());
-      if (blockOp.getNumArguments() > 0) anyRequireDevice = true;
+      if (blockOp.getNumArguments() > 0)
+        anyRequireDevice = true;
       llvm::append_range(resultTypes, blockOp.getResultTypes());
       llvm::append_range(resultKeys, blockOp.getKeys().getValue());
       llvm::append_range(
@@ -797,7 +814,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void ExecutableVariantOp::getCanonicalizationPatterns(
     RewritePatternSet &results, MLIRContext *context) {
@@ -812,7 +829,8 @@
        llvm::make_early_inc_range(blockOp.getOps<IREE::HAL::ReturnOp>())) {
     SmallVector<Value> operands;
     for (auto [i, operand] : llvm::enumerate(returnOp.getOperands())) {
-      if (preservedIndices.test(i)) operands.push_back(operand);
+      if (preservedIndices.test(i))
+        operands.push_back(operand);
     }
     returnOp.getOperandsMutable().assign(operands);
   }
@@ -824,9 +842,11 @@
   using OpRewritePattern::OpRewritePattern;
   LogicalResult matchAndRewrite(ExecutableConstantBlockOp blockOp,
                                 PatternRewriter &rewriter) const override {
-    if (blockOp.getNumArguments() == 0) return failure();
+    if (blockOp.getNumArguments() == 0)
+      return failure();
     auto deviceArg = blockOp.getArgument(0);
-    if (!deviceArg.use_empty()) return failure();
+    if (!deviceArg.use_empty())
+      return failure();
     rewriter.updateRootInPlace(blockOp, [&]() {
       blockOp.eraseArgument(0);
       blockOp.setFunctionTypeAttr(TypeAttr::get(
@@ -876,7 +896,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void ExecutableConstantBlockOp::getCanonicalizationPatterns(
     RewritePatternSet &results, MLIRContext *context) {
@@ -898,7 +918,8 @@
 //===----------------------------------------------------------------------===//
 
 OpFoldResult FenceJoinOp::fold(FoldAdaptor operands) {
-  if (getFences().size() == 1) return getFences().front();
+  if (getFences().size() == 1)
+    return getFences().front();
   return {};
 }
 
@@ -909,7 +930,8 @@
   using OpRewritePattern::OpRewritePattern;
   LogicalResult matchAndRewrite(FenceJoinOp op,
                                 PatternRewriter &rewriter) const override {
-    if (op.getNumOperands() != 0) return failure();
+    if (op.getNumOperands() != 0)
+      return failure();
     rewriter.replaceOpWithNewOp<IREE::Util::NullOp>(op,
                                                     op.getResult().getType());
     return success();
@@ -918,8 +940,8 @@
 
 // Produces a deduplicated and null-elided operand list.
 // Returns std::nullopt if nothing changed.
-static std::optional<std::vector<Value>> deduplicateFenceOperands(
-    ValueRange operands) {
+static std::optional<std::vector<Value>>
+deduplicateFenceOperands(ValueRange operands) {
   SetVector<Value> newOperands;
   for (auto operand : operands) {
     if (isa_and_nonnull<IREE::Util::NullOp>(operand.getDefiningOp())) {
@@ -930,7 +952,8 @@
     newOperands.insert(operand);
   }
 
-  if (newOperands.size() == operands.size()) return std::nullopt;
+  if (newOperands.size() == operands.size())
+    return std::nullopt;
   return newOperands.takeVector();
 }
 
@@ -940,14 +963,15 @@
   LogicalResult matchAndRewrite(FenceJoinOp op,
                                 PatternRewriter &rewriter) const override {
     auto newOperands = deduplicateFenceOperands(op.getFences());
-    if (!newOperands) return failure();
+    if (!newOperands)
+      return failure();
     rewriter.replaceOpWithNewOp<FenceJoinOp>(op, op.getResult().getType(),
                                              newOperands.value());
     return success();
   }
 };
 
-}  // namespace
+} // namespace
 
 void FenceJoinOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                               MLIRContext *context) {
@@ -980,7 +1004,8 @@
     auto fence = signalOp.getFence();
     auto createOp =
         dyn_cast_or_null<IREE::HAL::FenceCreateOp>(fence.getDefiningOp());
-    if (!createOp) return failure();
+    if (!createOp)
+      return failure();
 
     // TODO(benvanik): broader analysis - likely in a dedicated fence elision
     // pass so we can do IPO. For now block-only.
@@ -1012,7 +1037,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void FenceSignalOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                 MLIRContext *context) {
@@ -1030,7 +1055,8 @@
   using OpRewritePattern::OpRewritePattern;
   LogicalResult matchAndRewrite(FenceAwaitOp op,
                                 PatternRewriter &rewriter) const override {
-    if (!op.getFences().empty()) return failure();
+    if (!op.getFences().empty())
+      return failure();
     rewriter.replaceOpWithNewOp<arith::ConstantIntOp>(op, /*ok=*/0, 32);
     return success();
   }
@@ -1042,7 +1068,8 @@
   LogicalResult matchAndRewrite(FenceAwaitOp op,
                                 PatternRewriter &rewriter) const override {
     auto newOperands = deduplicateFenceOperands(op.getFences());
-    if (newOperands == std::nullopt) return failure();
+    if (newOperands == std::nullopt)
+      return failure();
     rewriter.replaceOpWithNewOp<FenceAwaitOp>(op, op.getStatus().getType(),
                                               op.getTimeoutMillis(),
                                               newOperands.value());
@@ -1050,7 +1077,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void FenceAwaitOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                MLIRContext *context) {
@@ -1058,7 +1085,7 @@
   results.insert<DeduplicateFenceAwaitFences>(context);
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.cpp b/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.cpp
index e62dcb2..0ccf60e 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.cpp

@@ -38,10 +38,12 @@
 static ParseResult parseDescriptorType(OpAsmParser &parser,
                                        DescriptorTypeAttr &dtAttr) {
   StringRef enumKeyword;
-  if (failed(parser.parseKeyword(&enumKeyword))) return failure();
+  if (failed(parser.parseKeyword(&enumKeyword)))
+    return failure();
   std::optional<DescriptorType> maybeEnum =
       symbolizeDescriptorType(enumKeyword);
-  if (!maybeEnum) return failure();
+  if (!maybeEnum)
+    return failure();
   dtAttr = DescriptorTypeAttr::get(parser.getContext(), *maybeEnum);
   return success();
 }
@@ -177,7 +179,8 @@
          "information is required");
   SmallVector<Value> dynamicDims;
   for (int64_t i = 0; i < shapedType.getRank(); ++i) {
-    if (!shapedType.isDynamicDim(i)) continue;
+    if (!shapedType.isDynamicDim(i))
+      continue;
     dynamicDims.push_back(builder.createOrFold<IREE::HAL::BufferViewDimOp>(
         result.location, builder.getIndexType(), source,
         builder.getIndexAttr(i)));
@@ -190,22 +193,24 @@
   return IREE::Util::TiedOpInterface::findTiedBaseValue(getSource());
 }
 
-::std::optional<unsigned> TensorImportOp::getTiedResultOperandIndex(
-    unsigned resultIndex) {
-  return {0};  // source
+::std::optional<unsigned>
+TensorImportOp::getTiedResultOperandIndex(unsigned resultIndex) {
+  return {0}; // source
 }
 
 SmallVector<int64_t> TensorImportOp::getTiedResultOperandIndices() {
-  return {0};  // source
+  return {0}; // source
 }
 
 static LogicalResult verifyTypeStorageCompatibility(Operation *op,
                                                     Type encodingType,
                                                     Type storageType) {
-  if (encodingType == storageType) return success();
+  if (encodingType == storageType)
+    return success();
   auto encodingShapedType = llvm::dyn_cast<ShapedType>(encodingType);
   auto storageShapedType = llvm::dyn_cast<ShapedType>(storageType);
-  if (!encodingShapedType || !storageShapedType) return success();
+  if (!encodingShapedType || !storageShapedType)
+    return success();
 
   if (IREE::Util::getRoundedElementByteWidth(
           encodingShapedType.getElementType()) !=
@@ -268,13 +273,13 @@
   return IREE::Util::TiedOpInterface::findTiedBaseValue(getSource());
 }
 
-::std::optional<unsigned> TensorExportOp::getTiedResultOperandIndex(
-    unsigned resultIndex) {
-  return {0};  // source
+::std::optional<unsigned>
+TensorExportOp::getTiedResultOperandIndex(unsigned resultIndex) {
+  return {0}; // source
 }
 
 SmallVector<int64_t> TensorExportOp::getTiedResultOperandIndices() {
-  return {0};  // source
+  return {0}; // source
 }
 
 LogicalResult TensorExportOp::verify() {
@@ -304,9 +309,9 @@
       getSources()[resultIndex]);
 }
 
-::std::optional<unsigned> TensorBarrierOp::getTiedResultOperandIndex(
-    unsigned resultIndex) {
-  return {resultIndex};  // sources[i]
+::std::optional<unsigned>
+TensorBarrierOp::getTiedResultOperandIndex(unsigned resultIndex) {
+  return {resultIndex}; // sources[i]
 }
 
 SmallVector<int64_t> TensorBarrierOp::getTiedResultOperandIndices() {
@@ -757,7 +762,8 @@
   p.printOptionalAttrDictWithKeyword(
       op->getAttrs(),
       /*elidedAttrs=*/{"sym_name", "layout", "ordinal"});
-  if (getWorkgroupCount().empty()) return;
+  if (getWorkgroupCount().empty())
+    return;
   p << " ";
   p.printRegion(getWorkgroupCount());
 }
@@ -766,7 +772,8 @@
   ExecutableExportOp op = *this;
   Block *body = getWorkgroupCountBody();
   // When there is no body, nothing to verify.
-  if (!body) return success();
+  if (!body)
+    return success();
 
   if (!llvm::hasSingleElement(getWorkgroupCount())) {
     return op.emitOpError() << "expected a single region block";
@@ -806,9 +813,10 @@
 
 // Calculates the workgroup count (x, y, z) given the total N-dimensional
 // |workload| and specific |workgroupSize|.
-static std::array<Value, 3> calculateWorkloadWorkgroupCount(
-    Location loc, ValueRange workload,
-    const std::array<Value, 3> &workgroupSize, OpBuilder &builder) {
+static std::array<Value, 3>
+calculateWorkloadWorkgroupCount(Location loc, ValueRange workload,
+                                const std::array<Value, 3> &workgroupSize,
+                                OpBuilder &builder) {
   std::array<Value, 3> result;
 
   auto constantOne = builder.createOrFold<arith::ConstantIndexOp>(loc, 1);
@@ -853,9 +861,9 @@
   return result;
 }
 
-static std::array<Value, 3> calculateWorkgroupCountFromRegion(
-    Location loc, Block *body, Value device, ValueRange workload,
-    OpBuilder &builder) {
+static std::array<Value, 3>
+calculateWorkgroupCountFromRegion(Location loc, Block *body, Value device,
+                                  ValueRange workload, OpBuilder &builder) {
   // TODO(benvanik): replace with region inlining util.
   IRMapping bvm;
   bvm.map(body->getArgument(0), device);
@@ -951,7 +959,8 @@
     return failure();
   }
   SmallVector<Type> argTypes;
-  for (auto &arg : entryArgs) argTypes.push_back(arg.type);
+  for (auto &arg : entryArgs)
+    argTypes.push_back(arg.type);
   auto fnType = builder.getFunctionType(argTypes, resultTypes);
   result.addAttribute(getFunctionTypeAttrName(result.name),
                       TypeAttr::get(fnType));
@@ -960,17 +969,20 @@
   // There must be one key per result. Note that we support omitted parens when
   // only one result is present.
   SmallVector<Attribute> keyAttrs;
-  if (failed(parser.parseKeyword("as"))) return failure();
+  if (failed(parser.parseKeyword("as")))
+    return failure();
   if (resultTypes.size() == 1) {
     std::string key;
-    if (failed(parser.parseString(&key))) return failure();
+    if (failed(parser.parseString(&key)))
+      return failure();
     keyAttrs.push_back(builder.getStringAttr(key));
   } else {
     if (failed(parser.parseCommaSeparatedList(
             AsmParser::Delimiter::OptionalParen,
             [&]() {
               std::string key;
-              if (failed(parser.parseString(&key))) return failure();
+              if (failed(parser.parseString(&key)))
+                return failure();
               keyAttrs.push_back(builder.getStringAttr(key));
               return success();
             },
@@ -1017,10 +1029,12 @@
       p, cast<FunctionOpInterface>(op), argTypes, /*isVariadic=*/false,
       resultTypes);
   p << " as ";
-  if (resultTypes.size() != 1) p << '(';
+  if (resultTypes.size() != 1)
+    p << '(';
   llvm::interleaveComma(getKeys().getValue(), p,
                         [&](Attribute attr) { p << attr; });
-  if (resultTypes.size() != 1) p << ')';
+  if (resultTypes.size() != 1)
+    p << ')';
   mlir::function_interface_impl::printFunctionAttributes(
       p, op, {getFunctionTypeAttrName(), getKeysAttrName()});
   p << " ";
@@ -1151,17 +1165,20 @@
 
   // If the binding has no assigned alignment we fall back to natural alignment.
   auto baseAlignment = getBaseAlignment();
-  if (!baseAlignment) return naturalAlignment;
+  if (!baseAlignment)
+    return naturalAlignment;
 
   // If there's no offset specified then we can use the binding alignment
   // directly.
-  if (!getByteOffset()) return baseAlignment.value();
+  if (!getByteOffset())
+    return baseAlignment.value();
 
   // Try to get the alignment of the byte offset. If it's a constant then we can
   // find a common alignment between it and the base and otherwise we need to
   // try to infer the alignment from the IR - otherwise we fall back.
   auto offsetOrAlignment = lookupOffsetOrAlignment(getByteOffset());
-  if (!offsetOrAlignment.has_value()) return naturalAlignment;
+  if (!offsetOrAlignment.has_value())
+    return naturalAlignment;
 
   // Compute the common alignment between that of the binding base and that of
   // the byte offset.
@@ -1177,15 +1194,15 @@
     StringRef prefix, const APInt &dimension, Value result,
     function_ref<void(Value, StringRef)> setNameFn) {
   switch (dimension.getZExtValue()) {
-    case 0:
-      setNameFn(result, (prefix + "x").str());
-      return;
-    case 1:
-      setNameFn(result, (prefix + "y").str());
-      return;
-    case 2:
-      setNameFn(result, (prefix + "z").str());
-      return;
+  case 0:
+    setNameFn(result, (prefix + "x").str());
+    return;
+  case 1:
+    setNameFn(result, (prefix + "y").str());
+    return;
+  case 2:
+    setNameFn(result, (prefix + "z").str());
+    return;
   }
 }
 
@@ -1244,10 +1261,10 @@
   setNameFn(getStatus(), "status");
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
 //===----------------------------------------------------------------------===//
 // TableGen definitions (intentionally last)

diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.h b/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.h
index b417181..83d4c40 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.h

@@ -25,6 +25,6 @@
 #include "mlir/Interfaces/ViewLikeInterface.h"
 
 #define GET_OP_CLASSES
-#include "iree/compiler/Dialect/HAL/IR/HALOps.h.inc"  // IWYU pragma: export
+#include "iree/compiler/Dialect/HAL/IR/HALOps.h.inc" // IWYU pragma: export
 
-#endif  // IREE_COMPILER_DIALECT_HAL_IR_HALOPS_H_
+#endif // IREE_COMPILER_DIALECT_HAL_IR_HALOPS_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALTraits.h b/compiler/src/iree/compiler/Dialect/HAL/IR/HALTraits.h
index 3986087..48eb3b0 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALTraits.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALTraits.h

@@ -16,13 +16,13 @@
 
 template <typename ConcreteType>
 class DeviceQuery : public OpTrait::TraitBase<ConcreteType, DeviceQuery> {
- public:
+public:
   static LogicalResult verifyTrait(Operation *op) { return success(); }
 };
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace OpTrait
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace OpTrait
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_IR_HALTRAITS_H_
+#endif // IREE_COMPILER_DIALECT_HAL_IR_HALTRAITS_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALTypes.cpp b/compiler/src/iree/compiler/Dialect/HAL/IR/HALTypes.cpp
index f2a27b0..4a15790 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALTypes.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALTypes.cpp

@@ -23,8 +23,8 @@
 
 // clang-format off: must be included after all LLVM/MLIR headers.
 #define GET_ATTRDEF_CLASSES
-#include "iree/compiler/Dialect/HAL/IR/HALAttrs.cpp.inc"  // IWYU pragma: keep
-#include "iree/compiler/Dialect/HAL/IR/HALEnums.cpp.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/HAL/IR/HALAttrs.cpp.inc" // IWYU pragma: keep
+#include "iree/compiler/Dialect/HAL/IR/HALEnums.cpp.inc" // IWYU pragma: keep
 // clang-format on
 
 namespace mlir {
@@ -93,7 +93,7 @@
                                               int32_t bitCount) {
   return (static_cast<uint32_t>(numericalType) << 24) | bitCount;
 }
-}  // namespace
+} // namespace
 
 std::optional<int32_t> getElementTypeValue(Type type) {
   if (auto intType = llvm::dyn_cast_if_present<IntegerType>(type)) {
@@ -114,17 +114,17 @@
     return makeElementTypeValue(numericalType, intType.getWidth());
   } else if (auto floatType = llvm::dyn_cast_if_present<FloatType>(type)) {
     switch (APFloat::SemanticsToEnum(floatType.getFloatSemantics())) {
-      case APFloat::S_IEEEhalf:
-      case APFloat::S_IEEEsingle:
-      case APFloat::S_IEEEdouble:
-      case APFloat::S_IEEEquad:
-        return makeElementTypeValue(NumericalType::kFloatIEEE,
-                                    floatType.getWidth());
-      case APFloat::S_BFloat:
-        return makeElementTypeValue(NumericalType::kFloatBrain,
-                                    floatType.getWidth());
-      default:
-        return std::nullopt;
+    case APFloat::S_IEEEhalf:
+    case APFloat::S_IEEEsingle:
+    case APFloat::S_IEEEdouble:
+    case APFloat::S_IEEEquad:
+      return makeElementTypeValue(NumericalType::kFloatIEEE,
+                                  floatType.getWidth());
+    case APFloat::S_BFloat:
+      return makeElementTypeValue(NumericalType::kFloatBrain,
+                                  floatType.getWidth());
+    default:
+      return std::nullopt;
     }
   } else if (auto complexType = llvm::dyn_cast_if_present<ComplexType>(type)) {
     return makeElementTypeValue(
@@ -144,7 +144,7 @@
 // See the iree/hal/command_buffer.h iree_hal_collective_op_t for details.
 uint32_t CollectiveAttr::getEncodedValue() const {
   union {
-    uint32_t packed;  // packed value
+    uint32_t packed; // packed value
     struct {
       uint8_t kind;
       uint8_t reduction;
@@ -164,7 +164,8 @@
 //===----------------------------------------------------------------------===//
 
 llvm::MaybeAlign commonAlignment(llvm::MaybeAlign lhs, llvm::MaybeAlign rhs) {
-  if (!lhs.has_value() || !rhs.has_value()) return std::nullopt;
+  if (!lhs.has_value() || !rhs.has_value())
+    return std::nullopt;
   return llvm::MaybeAlign(
       llvm::MinAlign(lhs.value().value(), rhs.value().value()));
 }
@@ -179,7 +180,8 @@
   }
 
   auto op = value.getDefiningOp();
-  if (!op) return std::nullopt;
+  if (!op)
+    return std::nullopt;
   if (auto alignmentAttr = op->getAttrOfType<IntegerAttr>("stream.alignment")) {
     // The op has an alignment tagged on it we can use directly.
     return alignmentAttr.getValue().getZExtValue();
@@ -291,8 +293,8 @@
 }
 
 // static
-SmallVector<IREE::HAL::DeviceTargetAttr, 4> DeviceTargetAttr::lookup(
-    Operation *op) {
+SmallVector<IREE::HAL::DeviceTargetAttr, 4>
+DeviceTargetAttr::lookup(Operation *op) {
   auto attrId = mlir::StringAttr::get(op->getContext(), "hal.device.targets");
   while (op) {
     auto targetsAttr = op->getAttrOfType<ArrayAttr>(attrId);
@@ -305,32 +307,36 @@
     }
     op = op->getParentOp();
   }
-  return {};  // No devices found; let caller decide what to do.
+  return {}; // No devices found; let caller decide what to do.
 }
 
 // Returns a set of all configuration attributes from all device targets with
 // a configuration set. Targets with no configuration set are ignored.
 static SmallVector<DictionaryAttr> lookupOptionalConfigAttrs(Operation *op) {
   auto targetAttrs = IREE::HAL::DeviceTargetAttr::lookup(op);
-  if (targetAttrs.empty()) return {};
+  if (targetAttrs.empty())
+    return {};
   SmallVector<DictionaryAttr> configAttrs;
   for (auto targetAttr : targetAttrs) {
     auto configAttr = targetAttr.getConfiguration();
-    if (configAttr) configAttrs.push_back(configAttr);
+    if (configAttr)
+      configAttrs.push_back(configAttr);
   }
   return configAttrs;
 }
 
 // Returns a set of all configuration attributes from all device targets.
 // Returns nullopt if any target is missing a configuration attribute.
-static std::optional<SmallVector<DictionaryAttr>> lookupRequiredConfigAttrs(
-    Operation *op) {
+static std::optional<SmallVector<DictionaryAttr>>
+lookupRequiredConfigAttrs(Operation *op) {
   auto targetAttrs = IREE::HAL::DeviceTargetAttr::lookup(op);
-  if (targetAttrs.empty()) return std::nullopt;
+  if (targetAttrs.empty())
+    return std::nullopt;
   SmallVector<DictionaryAttr> configAttrs;
   for (auto targetAttr : targetAttrs) {
     auto configAttr = targetAttr.getConfiguration();
-    if (!configAttr) return std::nullopt;
+    if (!configAttr)
+      return std::nullopt;
     configAttrs.push_back(configAttr);
   }
   return configAttrs;
@@ -342,13 +348,16 @@
     std::function<typename AttrT::ValueType(typename AttrT::ValueType,
                                             typename AttrT::ValueType)>
         join) {
-  if (configAttrs.empty()) return std::nullopt;
+  if (configAttrs.empty())
+    return std::nullopt;
   auto firstValue = configAttrs.front().getAs<AttrT>(name);
-  if (!firstValue) return std::nullopt;
+  if (!firstValue)
+    return std::nullopt;
   auto result = firstValue.getValue();
   for (auto configAttr : configAttrs.drop_front(1)) {
     auto value = configAttr.getAs<AttrT>(name);
-    if (!value) return std::nullopt;
+    if (!value)
+      return std::nullopt;
     result = join(result, value.getValue());
   }
   return result;
@@ -361,13 +370,16 @@
                            StaticRange<typename AttrT::ValueType>,
                            StaticRange<typename AttrT::ValueType>)>
                            join) {
-  if (configAttrs.empty()) return std::nullopt;
+  if (configAttrs.empty())
+    return std::nullopt;
   auto firstValue = configAttrs.front().getAs<AttrT>(name);
-  if (!firstValue) return std::nullopt;
+  if (!firstValue)
+    return std::nullopt;
   StaticRange<typename AttrT::ValueType> result{firstValue.getValue()};
   for (auto configAttr : configAttrs.drop_front(1)) {
     auto value = configAttr.getAs<AttrT>(name);
-    if (!value) return std::nullopt;
+    if (!value)
+      return std::nullopt;
     result =
         join(result, StaticRange<typename AttrT::ValueType>{value.getValue()});
   }
@@ -377,9 +389,11 @@
 // static
 bool DeviceTargetAttr::lookupConfigAttrAny(Operation *op, StringRef name) {
   auto configAttrs = lookupOptionalConfigAttrs(op);
-  if (configAttrs.empty()) return false;
+  if (configAttrs.empty())
+    return false;
   for (auto configAttr : configAttrs) {
-    if (configAttr.get(name)) return true;
+    if (configAttr.get(name))
+      return true;
   }
   return false;
 }
@@ -387,9 +401,11 @@
 // static
 bool DeviceTargetAttr::lookupConfigAttrAll(Operation *op, StringRef name) {
   auto configAttrs = lookupRequiredConfigAttrs(op);
-  if (!configAttrs) return false;
+  if (!configAttrs)
+    return false;
   for (auto configAttr : *configAttrs) {
-    if (!configAttr.get(name)) return false;
+    if (!configAttr.get(name))
+      return false;
   }
   return true;
 }
@@ -398,7 +414,8 @@
 std::optional<bool> DeviceTargetAttr::lookupConfigAttrAnd(Operation *op,
                                                           StringRef name) {
   auto configAttrs = lookupRequiredConfigAttrs(op);
-  if (!configAttrs) return std::nullopt;
+  if (!configAttrs)
+    return std::nullopt;
   return joinConfigAttrs<BoolAttr>(
       configAttrs.value(), name, [](bool lhs, bool rhs) { return lhs && rhs; });
 }
@@ -407,16 +424,18 @@
 std::optional<bool> DeviceTargetAttr::lookupConfigAttrOr(Operation *op,
                                                          StringRef name) {
   auto configAttrs = lookupRequiredConfigAttrs(op);
-  if (!configAttrs) return std::nullopt;
+  if (!configAttrs)
+    return std::nullopt;
   return joinConfigAttrs<BoolAttr>(
       configAttrs.value(), name, [](bool lhs, bool rhs) { return lhs || rhs; });
 }
 
 // static
-std::optional<StaticRange<APInt>> DeviceTargetAttr::lookupConfigAttrRange(
-    Operation *op, StringRef name) {
+std::optional<StaticRange<APInt>>
+DeviceTargetAttr::lookupConfigAttrRange(Operation *op, StringRef name) {
   auto configAttrs = lookupRequiredConfigAttrs(op);
-  if (!configAttrs) return std::nullopt;
+  if (!configAttrs)
+    return std::nullopt;
   return joinConfigStaticRanges<IntegerAttr>(
       configAttrs.value(), name,
       [](StaticRange<APInt> lhs, StaticRange<APInt> rhs) {
@@ -428,8 +447,8 @@
 }
 
 // static
-SmallVector<ExecutableTargetAttr, 4> DeviceTargetAttr::lookupExecutableTargets(
-    Operation *op) {
+SmallVector<ExecutableTargetAttr, 4>
+DeviceTargetAttr::lookupExecutableTargets(Operation *op) {
   SmallVector<ExecutableTargetAttr, 4> resultAttrs;
   for (auto deviceTargetAttr : lookup(op)) {
     for (auto executableTargetAttr : deviceTargetAttr.getExecutableTargets()) {
@@ -515,16 +534,17 @@
   // This is the most common case for users manually specifying targets.
   auto genericConfigAttr = getConfiguration();
   auto specificConfigAttr = specificAttr.getConfiguration();
-  if (!genericConfigAttr || !specificConfigAttr) return true;
+  if (!genericConfigAttr || !specificConfigAttr)
+    return true;
 
   // Ensure all fields in specificConfigAttr either don't exist or match.
   for (auto expectedAttr : specificConfigAttr.getValue()) {
     auto actualValue = genericConfigAttr.getNamed(expectedAttr.getName());
     if (!actualValue) {
-      continue;  // ignore, not present in generic
+      continue; // ignore, not present in generic
     }
     if (actualValue->getValue() != expectedAttr.getValue()) {
-      return false;  // mismatch, both have values but they differ
+      return false; // mismatch, both have values but they differ
     }
   }
 
@@ -532,7 +552,7 @@
   // If missing then the generic is _more_ specific and can't match.
   for (auto actualAttr : genericConfigAttr.getValue()) {
     if (!specificConfigAttr.getNamed(actualAttr.getName())) {
-      return false;  // mismatch, present in generic but not specific
+      return false; // mismatch, present in generic but not specific
     }
   }
 
@@ -551,7 +571,8 @@
     }
     // Use an override if specified.
     auto attr = op->getAttrOfType<ExecutableTargetAttr>(attrId);
-    if (attr) return attr;
+    if (attr)
+      return attr;
     // Continue walk.
     op = op->getParentOp();
   }
@@ -596,7 +617,8 @@
 void ExecutableObjectAttr::filterObjects(
     ArrayAttr objectAttrs, ArrayRef<StringRef> extensions,
     SmallVectorImpl<ExecutableObjectAttr> &filteredAttrs) {
-  if (!objectAttrs) return;
+  if (!objectAttrs)
+    return;
   for (auto objectAttr :
        objectAttrs.getAsRange<IREE::HAL::ExecutableObjectAttr>()) {
     auto path = objectAttr.getPath();
@@ -610,17 +632,19 @@
 // Tries to find |filePath| on disk either at its absolute path or joined with
 // any of the specified |searchPaths| in order.
 // Returns the absolute file path when found or a failure if there are no hits.
-static FailureOr<std::string> findFileInPaths(
-    StringRef filePath, ArrayRef<std::string> searchPaths) {
+static FailureOr<std::string>
+findFileInPaths(StringRef filePath, ArrayRef<std::string> searchPaths) {
   // First try to see if it's an absolute path - we don't want to perform any
   // additional processing on top of that.
   if (llvm::sys::path::is_absolute(filePath)) {
-    if (llvm::sys::fs::exists(filePath)) return filePath.str();
+    if (llvm::sys::fs::exists(filePath))
+      return filePath.str();
     return failure();
   }
 
   // Try a relative lookup from the current working directory.
-  if (llvm::sys::fs::exists(filePath)) return filePath.str();
+  if (llvm::sys::fs::exists(filePath))
+    return filePath.str();
 
   // Search each path in turn for a file that exists.
   // It doesn't mean we can open it but we'll get a better error out of the
@@ -628,7 +652,8 @@
   for (auto searchPath : searchPaths) {
     SmallVector<char> tryPath{searchPath.begin(), searchPath.end()};
     llvm::sys::path::append(tryPath, filePath);
-    if (llvm::sys::fs::exists(Twine(tryPath))) return Twine(tryPath).str();
+    if (llvm::sys::fs::exists(Twine(tryPath)))
+      return Twine(tryPath).str();
   }
 
   // Not found in either the user-specified absolute path, cwd, or the search
@@ -644,7 +669,8 @@
 
 FailureOr<std::string> ExecutableObjectAttr::getAbsolutePath() {
   auto pathAttr = getPath();
-  if (!pathAttr) return failure();  // not a file reference
+  if (!pathAttr)
+    return failure(); // not a file reference
   return findFileInPaths(pathAttr.getValue(), clExecutableObjectSearchPath);
 }
 
@@ -668,7 +694,8 @@
       return std::nullopt;
     }
     auto file = llvm::MemoryBuffer::getFile(*filePath);
-    if (!file) return std::nullopt;
+    if (!file)
+      return std::nullopt;
     return std::string((*file)->getBuffer());
   }
   return std::nullopt;
@@ -706,7 +733,8 @@
   // `<{` target = [objects, ...], ... `}>`
   SmallVector<Attribute> targetAttrs;
   SmallVector<Attribute> objectsAttrs;
-  if (failed(p.parseLess())) return {};
+  if (failed(p.parseLess()))
+    return {};
   if (succeeded(p.parseLBrace()) && !succeeded(p.parseOptionalRBrace())) {
     do {
       Attribute targetAttr;
@@ -718,9 +746,11 @@
       targetAttrs.push_back(targetAttr);
       objectsAttrs.push_back(objectsAttr);
     } while (succeeded(p.parseOptionalComma()));
-    if (failed(p.parseRBrace())) return {};
+    if (failed(p.parseRBrace()))
+      return {};
   }
-  if (failed(p.parseGreater())) return {};
+  if (failed(p.parseGreater()))
+    return {};
   return get(p.getContext(), ArrayAttr::get(p.getContext(), targetAttrs),
              ArrayAttr::get(p.getContext(), objectsAttrs));
 }
@@ -749,7 +779,8 @@
       allObjectAttrs.append(objectsArrayAttr.begin(), objectsArrayAttr.end());
     }
   }
-  if (allObjectAttrs.empty()) return std::nullopt;
+  if (allObjectAttrs.empty())
+    return std::nullopt;
   return ArrayAttr::get(specificTargetAttr.getContext(), allObjectAttrs);
 }
 
@@ -761,7 +792,8 @@
 Attribute AffinityQueueAttr::parse(AsmParser &p, Type type) {
   int64_t mask = 0;
   // `<`
-  if (failed(p.parseLess())) return {};
+  if (failed(p.parseLess()))
+    return {};
   // `*` (any)
   if (succeeded(p.parseOptionalStar())) {
     mask = -1;
@@ -769,7 +801,8 @@
     // `[`queue_bit[, ...] `]`
     if (failed(p.parseCommaSeparatedList(AsmParser::Delimiter::Square, [&]() {
           int64_t i = 0;
-          if (failed(p.parseInteger(i))) return failure();
+          if (failed(p.parseInteger(i)))
+            return failure();
           mask |= 1ll << i;
           return success();
         }))) {
@@ -777,7 +810,8 @@
     }
   }
   // `>`
-  if (failed(p.parseGreater())) return {};
+  if (failed(p.parseGreater()))
+    return {};
   return get(p.getContext(), mask);
 }
 
@@ -791,7 +825,8 @@
     os << "[";
     for (int i = 0, j = 0; i < sizeof(mask) * 8; ++i) {
       if (mask & (1ll << i)) {
-        if (j++ > 0) os << ", ";
+        if (j++ > 0)
+          os << ", ";
         os << i;
       }
     }
@@ -802,21 +837,25 @@
 
 bool AffinityQueueAttr::isExecutableWith(
     IREE::Stream::AffinityAttr other) const {
-  if (!other) return true;
+  if (!other)
+    return true;
   // Only compatible with other queue affinities today. When we extend the
   // attributes to specify device targets we'd want to check here.
   auto otherQueueAttr = llvm::dyn_cast_if_present<AffinityQueueAttr>(other);
-  if (!otherQueueAttr) return false;
+  if (!otherQueueAttr)
+    return false;
   // If this affinity is a subset of the target affinity then it can execute
   // with it.
-  if ((getMask() & otherQueueAttr.getMask()) == getMask()) return true;
+  if ((getMask() & otherQueueAttr.getMask()) == getMask())
+    return true;
   // Otherwise not compatible.
   return false;
 }
 
-IREE::Stream::AffinityAttr AffinityQueueAttr::joinOR(
-    IREE::Stream::AffinityAttr other) const {
-  if (!other) return *this;
+IREE::Stream::AffinityAttr
+AffinityQueueAttr::joinOR(IREE::Stream::AffinityAttr other) const {
+  if (!other)
+    return *this;
   if (!IREE::Stream::AffinityAttr::canExecuteTogether(*this, other)) {
     return nullptr;
   }
@@ -825,9 +864,10 @@
                                 getMask() | otherQueueAttr.getMask());
 }
 
-IREE::Stream::AffinityAttr AffinityQueueAttr::joinAND(
-    IREE::Stream::AffinityAttr other) const {
-  if (!other) return *this;
+IREE::Stream::AffinityAttr
+AffinityQueueAttr::joinAND(IREE::Stream::AffinityAttr other) const {
+  if (!other)
+    return *this;
   if (!IREE::Stream::AffinityAttr::canExecuteTogether(*this, other)) {
     return nullptr;
   }
@@ -1057,7 +1097,7 @@
 
   addAttributes<
 #define GET_ATTRDEF_LIST
-#include "iree/compiler/Dialect/HAL/IR/HALAttrs.cpp.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/HAL/IR/HALAttrs.cpp.inc" // IWYU pragma: keep
       >();
 }
 
@@ -1078,7 +1118,8 @@
   Attribute genAttr;
   OptionalParseResult parseResult =
       generatedAttributeParser(parser, &mnemonic, type, genAttr);
-  if (parseResult.has_value()) return genAttr;
+  if (parseResult.has_value())
+    return genAttr;
   parser.emitError(parser.getNameLoc())
       << "unknown HAL attribute: " << mnemonic;
   return {};
@@ -1098,7 +1139,8 @@
 
 Type HALDialect::parseType(DialectAsmParser &parser) const {
   StringRef typeKind;
-  if (parser.parseKeyword(&typeKind)) return {};
+  if (parser.parseKeyword(&typeKind))
+    return {};
   auto type =
       llvm::StringSwitch<Type>(typeKind)
           .Case("allocator", AllocatorType::get(getContext()))
@@ -1155,7 +1197,7 @@
   }
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALTypes.h b/compiler/src/iree/compiler/Dialect/HAL/IR/HALTypes.h
index 77172eb..ad7123a 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALTypes.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALTypes.h

@@ -27,7 +27,7 @@
 #include "mlir/Support/LLVM.h"
 
 // clang-format off: must be included after all LLVM/MLIR headers.
-#include "iree/compiler/Dialect/HAL/IR/HALEnums.h.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/HAL/IR/HALEnums.h.inc" // IWYU pragma: keep
 // clang-format on
 
 namespace mlir {
@@ -35,9 +35,9 @@
 namespace IREE {
 namespace HAL {
 
-#include "iree/compiler/Dialect/HAL/IR/HALAttrInterfaces.h.inc"  // IWYU pragma: export
-#include "iree/compiler/Dialect/HAL/IR/HALOpInterfaces.h.inc"  // IWYU pragma: export
-#include "iree/compiler/Dialect/HAL/IR/HALTypeInterfaces.h.inc"  // IWYU pragma: export
+#include "iree/compiler/Dialect/HAL/IR/HALAttrInterfaces.h.inc" // IWYU pragma: export
+#include "iree/compiler/Dialect/HAL/IR/HALOpInterfaces.h.inc" // IWYU pragma: export
+#include "iree/compiler/Dialect/HAL/IR/HALTypeInterfaces.h.inc" // IWYU pragma: export
 
 //===----------------------------------------------------------------------===//
 // Enum utilities
@@ -162,10 +162,10 @@
   StaticRange(T min, T max) : min(min), max(max) {}
 };
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
 // It's unfortunate this is required.
 namespace mlir {
@@ -173,20 +173,22 @@
 template <>
 struct FieldParser<
     std::optional<mlir::iree_compiler::IREE::HAL::CollectiveReductionOp>> {
-  static FailureOr<mlir::iree_compiler::IREE::HAL::CollectiveReductionOp> parse(
-      AsmParser &parser) {
+  static FailureOr<mlir::iree_compiler::IREE::HAL::CollectiveReductionOp>
+  parse(AsmParser &parser) {
     std::string value;
-    if (parser.parseKeywordOrString(&value)) return failure();
+    if (parser.parseKeywordOrString(&value))
+      return failure();
     auto result = mlir::iree_compiler::IREE::HAL::symbolizeEnum<
         mlir::iree_compiler::IREE::HAL::CollectiveReductionOp>(value);
-    if (!result.has_value()) return failure();
+    if (!result.has_value())
+      return failure();
     return result.value();
   }
 };
-static inline AsmPrinter &operator<<(
-    AsmPrinter &printer,
-    std::optional<mlir::iree_compiler::IREE::HAL::CollectiveReductionOp>
-        param) {
+static inline AsmPrinter &
+operator<<(AsmPrinter &printer,
+           std::optional<mlir::iree_compiler::IREE::HAL::CollectiveReductionOp>
+               param) {
   printer << (param.has_value()
                   ? mlir::iree_compiler::IREE::HAL::stringifyEnum(param.value())
                   : StringRef{""});
@@ -196,13 +198,15 @@
 template <>
 struct FieldParser<
     std::optional<mlir::iree_compiler::IREE::HAL::DescriptorFlags>> {
-  static FailureOr<mlir::iree_compiler::IREE::HAL::DescriptorFlags> parse(
-      AsmParser &parser) {
+  static FailureOr<mlir::iree_compiler::IREE::HAL::DescriptorFlags>
+  parse(AsmParser &parser) {
     std::string value;
-    if (parser.parseKeywordOrString(&value)) return failure();
+    if (parser.parseKeywordOrString(&value))
+      return failure();
     auto result = mlir::iree_compiler::IREE::HAL::symbolizeEnum<
         mlir::iree_compiler::IREE::HAL::DescriptorFlags>(value);
-    if (!result.has_value()) return failure();
+    if (!result.has_value())
+      return failure();
     return result.value();
   }
 };
@@ -215,17 +219,18 @@
   return printer;
 }
 
-static inline AsmPrinter &operator<<(
-    AsmPrinter &printer, mlir::iree_compiler::IREE::HAL::DescriptorType param) {
+static inline AsmPrinter &
+operator<<(AsmPrinter &printer,
+           mlir::iree_compiler::IREE::HAL::DescriptorType param) {
   printer << mlir::iree_compiler::IREE::HAL::stringifyEnum(param);
   return printer;
 }
 
-}  // namespace mlir
+} // namespace mlir
 
 // clang-format off: must be included after all LLVM/MLIR headers.
 #define GET_ATTRDEF_CLASSES
-#include "iree/compiler/Dialect/HAL/IR/HALAttrs.h.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/HAL/IR/HALAttrs.h.inc" // IWYU pragma: keep
 // clang-format on
 
-#endif  // IREE_COMPILER_DIALECT_HAL_IR_HALTYPES_H_
+#endif // IREE_COMPILER_DIALECT_HAL_IR_HALTYPES_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/Builtins/Device.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/Builtins/Device.cpp
index 7db9e0a..6c99432 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/Builtins/Device.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/Builtins/Device.cpp

@@ -20,13 +20,14 @@
 static const iree_file_toc_t *lookupDeviceFile(StringRef filename) {
   for (size_t i = 0; i < iree_builtins_libdevice_bitcode_size(); ++i) {
     const auto &file_toc = iree_builtins_libdevice_bitcode_create()[i];
-    if (filename == file_toc.name) return &file_toc;
+    if (filename == file_toc.name)
+      return &file_toc;
   }
   return nullptr;
 }
 
-static const iree_file_toc_t *lookupDeviceFile(
-    llvm::TargetMachine *targetMachine) {
+static const iree_file_toc_t *
+lookupDeviceFile(llvm::TargetMachine *targetMachine) {
   const auto &triple = targetMachine->getTargetTriple();
 
   // NOTE: other arch-specific checks go here.
@@ -52,8 +53,9 @@
   }
 }
 
-llvm::Expected<std::unique_ptr<llvm::Module>> loadDeviceBitcode(
-    llvm::TargetMachine *targetMachine, llvm::LLVMContext &context) {
+llvm::Expected<std::unique_ptr<llvm::Module>>
+loadDeviceBitcode(llvm::TargetMachine *targetMachine,
+                  llvm::LLVMContext &context) {
   // Find a bitcode file for the current architecture.
   const auto *file = lookupDeviceFile(targetMachine);
   if (!file) {
@@ -65,7 +67,8 @@
   llvm::MemoryBufferRef bitcodeBufferRef(
       llvm::StringRef(file->data, file->size), file->name);
   auto bitcodeModuleValue = llvm::parseBitcodeFile(bitcodeBufferRef, context);
-  if (!bitcodeModuleValue) return bitcodeModuleValue;
+  if (!bitcodeModuleValue)
+    return bitcodeModuleValue;
   auto bitcodeModule = std::move(bitcodeModuleValue.get());
 
   // Clang adds its own per-function attributes that we need to strip so that
@@ -83,7 +86,8 @@
                                    uint32_t newValue) {
   // NOTE: the global will not be defined if it is not used in the module.
   auto *globalValue = module.getNamedGlobal(globalName);
-  if (!globalValue) return;
+  if (!globalValue)
+    return;
   globalValue->setLinkage(llvm::GlobalValue::PrivateLinkage);
   globalValue->setDSOLocal(true);
   globalValue->setConstant(true);
@@ -97,7 +101,7 @@
   overridePlatformGlobal(module, "libdevice_platform_example_flag", 0u);
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/Builtins/Device.h b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/Builtins/Device.h
index f75a5c2..4b8337b 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/Builtins/Device.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/Builtins/Device.h

@@ -18,17 +18,18 @@
 namespace HAL {
 
 // Loads the libdevice bitcode file and specializes it for |targetMachine|.
-llvm::Expected<std::unique_ptr<llvm::Module>> loadDeviceBitcode(
-    llvm::TargetMachine *targetMachine, llvm::LLVMContext &context);
+llvm::Expected<std::unique_ptr<llvm::Module>>
+loadDeviceBitcode(llvm::TargetMachine *targetMachine,
+                  llvm::LLVMContext &context);
 
 // Specializes |module| using |targetMachine|.
 void specializeDeviceModule(IREE::HAL::ExecutableVariantOp variantOp,
                             llvm::Module &module,
                             llvm::TargetMachine &targetMachine);
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_TARGET_LLVMCPU_BUILTINS_DEVICE_H_
+#endif // IREE_COMPILER_DIALECT_HAL_TARGET_LLVMCPU_BUILTINS_DEVICE_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/Builtins/Musl.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/Builtins/Musl.cpp
index 3351501..b277eb4 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/Builtins/Musl.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/Builtins/Musl.cpp

@@ -19,13 +19,14 @@
 static const iree_file_toc_t *lookupMuslFile(StringRef filename) {
   for (size_t i = 0; i < iree_builtins_libmusl_size(); ++i) {
     const auto &file_toc = iree_builtins_libmusl_create()[i];
-    if (filename == file_toc.name) return &file_toc;
+    if (filename == file_toc.name)
+      return &file_toc;
   }
   return nullptr;
 }
 
-static const iree_file_toc_t *lookupMuslFile(
-    llvm::TargetMachine *targetMachine) {
+static const iree_file_toc_t *
+lookupMuslFile(llvm::TargetMachine *targetMachine) {
   const auto &triple = targetMachine->getTargetTriple();
 
   // NOTE: other arch-specific checks go here.
@@ -41,8 +42,9 @@
   }
 }
 
-llvm::Expected<std::unique_ptr<llvm::Module>> loadMuslBitcode(
-    llvm::TargetMachine *targetMachine, llvm::LLVMContext &context) {
+llvm::Expected<std::unique_ptr<llvm::Module>>
+loadMuslBitcode(llvm::TargetMachine *targetMachine,
+                llvm::LLVMContext &context) {
   // Find a bitcode file for the current architecture.
   const auto *file = lookupMuslFile(targetMachine);
   if (!file) {
@@ -56,7 +58,7 @@
   return llvm::parseBitcodeFile(bitcodeBufferRef, context);
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/Builtins/Musl.h b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/Builtins/Musl.h
index b39c94d..b574cdb 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/Builtins/Musl.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/Builtins/Musl.h

@@ -15,12 +15,12 @@
 namespace IREE {
 namespace HAL {
 
-llvm::Expected<std::unique_ptr<llvm::Module>> loadMuslBitcode(
-    llvm::TargetMachine *targetMachine, llvm::LLVMContext &context);
+llvm::Expected<std::unique_ptr<llvm::Module>>
+loadMuslBitcode(llvm::TargetMachine *targetMachine, llvm::LLVMContext &context);
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_TARGET_LLVMCPU_BUILTINS_MUSL_H_
+#endif // IREE_COMPILER_DIALECT_HAL_TARGET_LLVMCPU_BUILTINS_MUSL_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/Builtins/UKernel.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/Builtins/UKernel.cpp
index e5f26bf..89f4807 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/Builtins/UKernel.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/Builtins/UKernel.cpp

@@ -17,24 +17,26 @@
 namespace IREE {
 namespace HAL {
 
-static std::unique_ptr<llvm::Module> loadUKernelBitcodeFile(
-    StringRef filename, llvm::LLVMContext& context) {
-  const iree_file_toc_t* file_start = iree_ukernel_bitcode_create();
-  const iree_file_toc_t* file_end = file_start + iree_ukernel_bitcode_size();
-  for (const iree_file_toc_t* file = file_start; file < file_end; ++file) {
+static std::unique_ptr<llvm::Module>
+loadUKernelBitcodeFile(StringRef filename, llvm::LLVMContext &context) {
+  const iree_file_toc_t *file_start = iree_ukernel_bitcode_create();
+  const iree_file_toc_t *file_end = file_start + iree_ukernel_bitcode_size();
+  for (const iree_file_toc_t *file = file_start; file < file_end; ++file) {
     if (filename == file->name) {
       llvm::MemoryBufferRef bitcodeBufferRef(
           llvm::StringRef(file->data, file->size), file->name);
       auto bitcodeFile = llvm::parseBitcodeFile(bitcodeBufferRef, context);
-      if (!bitcodeFile) return nullptr;
+      if (!bitcodeFile)
+        return nullptr;
       return std::move(*bitcodeFile);
     }
   }
   return nullptr;
 }
 
-std::unique_ptr<llvm::Module> loadUKernelBaseBitcode(
-    llvm::TargetMachine* targetMachine, llvm::LLVMContext& context) {
+std::unique_ptr<llvm::Module>
+loadUKernelBaseBitcode(llvm::TargetMachine *targetMachine,
+                       llvm::LLVMContext &context) {
   llvm::Triple triple = targetMachine->getTargetTriple();
   StringRef filename;
   if (triple.isArch64Bit()) {
@@ -51,7 +53,7 @@
   // Copied from Device.cpp - TODO: move this to a shared utility.
   // Clang adds its own per-function attributes that we need to strip so that
   // our current executable variant target is used instead.
-  for (auto& func : baseBitcode->functions()) {
+  for (auto &func : baseBitcode->functions()) {
     func.removeFnAttr("target-cpu");
     func.removeFnAttr("tune-cpu");
     func.removeFnAttr("target-features");
@@ -60,9 +62,10 @@
   return baseBitcode;
 }
 
-std::unique_ptr<llvm::Module> loadUKernelArchBitcode(
-    llvm::TargetMachine* targetMachine, llvm::LLVMContext& context) {
-  const char* archName =
+std::unique_ptr<llvm::Module>
+loadUKernelArchBitcode(llvm::TargetMachine *targetMachine,
+                       llvm::LLVMContext &context) {
+  const char *archName =
       getIreeArchNameForTargetTriple(targetMachine->getTargetTriple());
   char archBitcodeFilename[64];
   snprintf(archBitcodeFilename, sizeof archBitcodeFilename,
@@ -74,7 +77,7 @@
   return archBitcode;
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/Builtins/UKernel.h b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/Builtins/UKernel.h
index 8ef2606..6821df8 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/Builtins/UKernel.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/Builtins/UKernel.h

@@ -15,15 +15,17 @@
 namespace IREE {
 namespace HAL {
 
-std::unique_ptr<llvm::Module> loadUKernelBaseBitcode(
-    llvm::TargetMachine *targetMachine, llvm::LLVMContext &context);
+std::unique_ptr<llvm::Module>
+loadUKernelBaseBitcode(llvm::TargetMachine *targetMachine,
+                       llvm::LLVMContext &context);
 
-std::unique_ptr<llvm::Module> loadUKernelArchBitcode(
-    llvm::TargetMachine *targetMachine, llvm::LLVMContext &context);
+std::unique_ptr<llvm::Module>
+loadUKernelArchBitcode(llvm::TargetMachine *targetMachine,
+                       llvm::LLVMContext &context);
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_TARGET_LLVMCPU_BUILTINS_UKERNEL_H_
+#endif // IREE_COMPILER_DIALECT_HAL_TARGET_LLVMCPU_BUILTINS_UKERNEL_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.cpp
index 726cde8..10a5960 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.cpp

@@ -115,7 +115,7 @@
 
   // Matches iree_hal_system_executable_footer_t.
   struct Footer {
-    uint8_t magic[8];  // IREEDBG\0
+    uint8_t magic[8]; // IREEDBG\0
     uint32_t version;
     uint32_t flags;
     uint64_t libraryOffset;
@@ -146,15 +146,17 @@
 // drive everything.
 static bool hasMicrokernel(IREE::HAL::ExecutableVariantOp variantOp) {
   IREE::HAL::ExecutableTargetAttr targetAttr = variantOp.getTarget();
-  if (!targetAttr) return false;
+  if (!targetAttr)
+    return false;
   auto config = targetAttr.getConfiguration();
-  if (!config) return false;
+  if (!config)
+    return false;
   auto attr = config.getAs<BoolAttr>("ukernels");
   return attr && attr.getValue();
 }
 
 class LLVMCPUTargetBackend final : public TargetBackend {
- public:
+public:
   explicit LLVMCPUTargetBackend(LLVMTargetOptions options)
       : options_(std::move(options)) {
     initializeConfiguration(options_);
@@ -177,8 +179,8 @@
     // clang-format on
   }
 
-  IREE::HAL::DeviceTargetAttr getDefaultDeviceTarget(
-      MLIRContext *context) const override {
+  IREE::HAL::DeviceTargetAttr
+  getDefaultDeviceTarget(MLIRContext *context) const override {
     Builder b(context);
     SmallVector<NamedAttribute> configItems;
 
@@ -205,9 +207,11 @@
   LLVMTarget getVariantTarget(IREE::HAL::ExecutableVariantOp variantOp) {
     auto configAttr = variantOp.getTarget().getConfiguration();
     auto tryAttrLookup = [&](StringRef name, StringRef fallback) {
-      if (!configAttr) return fallback.str();
+      if (!configAttr)
+        return fallback.str();
       auto value = llvm::dyn_cast_if_present<StringAttr>(configAttr.get(name));
-      if (!value) return fallback.str();
+      if (!value)
+        return fallback.str();
       return value.str();
     };
     LLVMTarget target;
@@ -291,22 +295,22 @@
                                   LibraryBuilder::Version::LATEST);
 
     switch (options_.sanitizerKind) {
-      case SanitizerKind::kNone: {
-        libraryBuilder.setSanitizerKind(LibraryBuilder::SanitizerKind::NONE);
-        break;
+    case SanitizerKind::kNone: {
+      libraryBuilder.setSanitizerKind(LibraryBuilder::SanitizerKind::NONE);
+      break;
+    }
+    case SanitizerKind::kAddress: {
+      libraryBuilder.setSanitizerKind(LibraryBuilder::SanitizerKind::ADDRESS);
+      for (auto &function : llvmModule->getFunctionList()) {
+        function.addFnAttr(llvm::Attribute::SanitizeAddress);
       }
-      case SanitizerKind::kAddress: {
-        libraryBuilder.setSanitizerKind(LibraryBuilder::SanitizerKind::ADDRESS);
-        for (auto &function : llvmModule->getFunctionList()) {
-          function.addFnAttr(llvm::Attribute::SanitizeAddress);
-        }
-      } break;
-      case SanitizerKind::kThread: {
-        libraryBuilder.setSanitizerKind(LibraryBuilder::SanitizerKind::THREAD);
-        for (auto &function : llvmModule->getFunctionList()) {
-          function.addFnAttr(llvm::Attribute::SanitizeThread);
-        }
-      } break;
+    } break;
+    case SanitizerKind::kThread: {
+      libraryBuilder.setSanitizerKind(LibraryBuilder::SanitizerKind::THREAD);
+      for (auto &function : llvmModule->getFunctionList()) {
+        function.addFnAttr(llvm::Attribute::SanitizeThread);
+      }
+    } break;
     }
 
     // Declare dynamically imported functions.
@@ -499,7 +503,8 @@
     // Strip any compiler identifiers that may have snuck in. We let the linker
     // tag the module.
     auto *llvmIdent = llvmModule->getNamedMetadata("llvm.ident");
-    if (llvmIdent) llvmIdent->clearOperands();
+    if (llvmIdent)
+      llvmIdent->clearOperands();
 
     // Dump all linked bitcode prior to optimization.
     if (!options.dumpIntermediatesPath.empty()) {
@@ -685,25 +690,25 @@
       const char *mimeType = nullptr;
       const char *extension = "";
       switch (targetTriple.getObjectFormat()) {
-        case llvm::Triple::ObjectFormatType::COFF:
-          mimeType = "application/x-msdownload";
-          extension = ".dll";
-          break;
-        case llvm::Triple::ObjectFormatType::ELF:
-          mimeType = "application/x-elf";
-          extension = ".so";
-          break;
-        case llvm::Triple::ObjectFormatType::MachO:
-          mimeType = "application/x-dylib";
-          extension = ".dylib";
-          break;
-        case llvm::Triple::ObjectFormatType::Wasm:
-          mimeType = "application/wasm";
-          extension = ".wasm";
-          break;
-        default:
-          mimeType = "application/octet-stream";
-          break;
+      case llvm::Triple::ObjectFormatType::COFF:
+        mimeType = "application/x-msdownload";
+        extension = ".dll";
+        break;
+      case llvm::Triple::ObjectFormatType::ELF:
+        mimeType = "application/x-elf";
+        extension = ".so";
+        break;
+      case llvm::Triple::ObjectFormatType::MachO:
+        mimeType = "application/x-dylib";
+        extension = ".dylib";
+        break;
+      case llvm::Triple::ObjectFormatType::Wasm:
+        mimeType = "application/wasm";
+        extension = ".wasm";
+        break;
+      default:
+        mimeType = "application/octet-stream";
+        break;
       }
 
       // Load the linked system library and optionally tag on the debug
@@ -742,7 +747,7 @@
     return success();
   }
 
- private:
+private:
   ArrayAttr getExecutableTargets(MLIRContext *context) const {
     SmallVector<Attribute> targetAttrs;
     // This is where we would multiversion.
@@ -750,8 +755,8 @@
     return ArrayAttr::get(context, targetAttrs);
   }
 
-  IREE::HAL::ExecutableTargetAttr getExecutableTarget(
-      MLIRContext *context) const {
+  IREE::HAL::ExecutableTargetAttr
+  getExecutableTarget(MLIRContext *context) const {
     std::string format;
     if (options_.linkStatic) {
       // Static libraries are just string references when serialized so we don't
@@ -767,21 +772,21 @@
         // System-specific shared library format.
         format += "system-";
         switch (targetTriple.getObjectFormat()) {
-          case llvm::Triple::ObjectFormatType::COFF:
-            format += "dll-";
-            break;
-          case llvm::Triple::ObjectFormatType::ELF:
-            format += "elf-";
-            break;
-          case llvm::Triple::ObjectFormatType::MachO:
-            format += "dylib-";
-            break;
-          case llvm::Triple::ObjectFormatType::Wasm:
-            format += "wasm-";
-            break;
-          default:
-            format += "unknown-";
-            break;
+        case llvm::Triple::ObjectFormatType::COFF:
+          format += "dll-";
+          break;
+        case llvm::Triple::ObjectFormatType::ELF:
+          format += "elf-";
+          break;
+        case llvm::Triple::ObjectFormatType::MachO:
+          format += "dylib-";
+          break;
+        case llvm::Triple::ObjectFormatType::Wasm:
+          format += "wasm-";
+          break;
+        default:
+          format += "unknown-";
+          break;
         }
       }
       format += getIreeArchNameForTargetTriple(targetTriple);
@@ -888,11 +893,11 @@
 // require build support, which is a pain to manage across platforms.
 //
 // See comments below.
-#define LLVM_INITIALIZE_GENERIC(TargetName) \
-  LLVMInitialize##TargetName##Target();     \
-  LLVMInitialize##TargetName##TargetMC();   \
-  LLVMInitialize##TargetName##TargetInfo(); \
-  LLVMInitialize##TargetName##AsmPrinter(); \
+#define LLVM_INITIALIZE_GENERIC(TargetName)                                    \
+  LLVMInitialize##TargetName##Target();                                        \
+  LLVMInitialize##TargetName##TargetMC();                                      \
+  LLVMInitialize##TargetName##TargetInfo();                                    \
+  LLVMInitialize##TargetName##AsmPrinter();                                    \
   LLVMInitialize##TargetName##AsmParser();
 
 // CPU targets that we care about and have hard-linked against are here.
@@ -902,7 +907,7 @@
 #define LLVM_INITIALIZE_TARGET_ARM() LLVM_INITIALIZE_GENERIC(ARM)
 #define LLVM_INITIALIZE_TARGET_RISCV() LLVM_INITIALIZE_GENERIC(RISCV)
 #define LLVM_INITIALIZE_TARGET_X86() LLVM_INITIALIZE_GENERIC(X86)
-#define LLVM_INITIALIZE_TARGET_WebAssembly() \
+#define LLVM_INITIALIZE_TARGET_WebAssembly()                                   \
   LLVM_INITIALIZE_GENERIC(WebAssembly)
 
 // We must no-op the name of each target we don't care about. This is annoying,
@@ -935,7 +940,7 @@
   static TargetBackendRegistration registration("llvm-cpu", backendFactory);
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.h b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.h
index 2b3e7ee..84ded5b 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.h

@@ -19,9 +19,9 @@
 void registerLLVMCPUTargetBackends(
     std::function<LLVMTargetOptions()> queryOptions);
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_TARGET_LLVMCPU_LLVMCPUTARGET_H_
+#endif // IREE_COMPILER_DIALECT_HAL_TARGET_LLVMCPU_LLVMCPUTARGET_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMIRPasses.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMIRPasses.cpp
index caf09b8..1e398ed 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMIRPasses.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMIRPasses.cpp

@@ -27,12 +27,14 @@
 namespace IREE {
 namespace HAL {
 
-std::unique_ptr<llvm::TargetMachine> createTargetMachine(
-    const LLVMTarget &target, const LLVMTargetOptions &targetOptions) {
+std::unique_ptr<llvm::TargetMachine>
+createTargetMachine(const LLVMTarget &target,
+                    const LLVMTargetOptions &targetOptions) {
   std::string errorMessage;
   auto llvmTarget =
       llvm::TargetRegistry::lookupTarget(target.triple, errorMessage);
-  if (!llvmTarget) return nullptr;
+  if (!llvmTarget)
+    return nullptr;
   std::unique_ptr<llvm::TargetMachine> machine(llvmTarget->createTargetMachine(
       target.triple, target.cpu /* cpu e.g k8*/,
       target.cpuFeatures /* cpu features e.g avx512fma*/, targetOptions.options,
@@ -68,35 +70,34 @@
                                    cGSCCAnalysisManager, moduleAnalysisManager);
 
   switch (options.sanitizerKind) {
-    case SanitizerKind::kNone:
-      break;
-    case SanitizerKind::kAddress: {
-      passBuilder.registerOptimizerLastEPCallback(
-          [](llvm::ModulePassManager &modulePassManager,
-             llvm::OptimizationLevel Level) {
-            llvm::AddressSanitizerOptions opts;
-            // Can use Never or Always, just not the default Runtime, which
-            // introduces a reference to
-            // __asan_option_detect_stack_use_after_return, causing linker
-            // errors, and anyway we wouldn't really want bother to with a
-            // runtime switch for that.
-            opts.UseAfterReturn =
-                llvm::AsanDetectStackUseAfterReturnMode::Always;
-            bool moduleUseAfterScope = false;
-            bool useOdrIndicator = false;
-            modulePassManager.addPass(llvm::AddressSanitizerPass(
-                opts, moduleUseAfterScope, useOdrIndicator));
-          });
-    } break;
-    case SanitizerKind::kThread: {
-      passBuilder.registerOptimizerLastEPCallback(
-          [](llvm::ModulePassManager &modulePassManager,
-             llvm::OptimizationLevel Level) {
-            modulePassManager.addPass(llvm::ModuleThreadSanitizerPass());
-            modulePassManager.addPass(llvm::createModuleToFunctionPassAdaptor(
-                llvm::ThreadSanitizerPass()));
-          });
-    } break;
+  case SanitizerKind::kNone:
+    break;
+  case SanitizerKind::kAddress: {
+    passBuilder.registerOptimizerLastEPCallback(
+        [](llvm::ModulePassManager &modulePassManager,
+           llvm::OptimizationLevel Level) {
+          llvm::AddressSanitizerOptions opts;
+          // Can use Never or Always, just not the default Runtime, which
+          // introduces a reference to
+          // __asan_option_detect_stack_use_after_return, causing linker
+          // errors, and anyway we wouldn't really want bother to with a
+          // runtime switch for that.
+          opts.UseAfterReturn = llvm::AsanDetectStackUseAfterReturnMode::Always;
+          bool moduleUseAfterScope = false;
+          bool useOdrIndicator = false;
+          modulePassManager.addPass(llvm::AddressSanitizerPass(
+              opts, moduleUseAfterScope, useOdrIndicator));
+        });
+  } break;
+  case SanitizerKind::kThread: {
+    passBuilder.registerOptimizerLastEPCallback(
+        [](llvm::ModulePassManager &modulePassManager,
+           llvm::OptimizationLevel Level) {
+          modulePassManager.addPass(llvm::ModuleThreadSanitizerPass());
+          modulePassManager.addPass(llvm::createModuleToFunctionPassAdaptor(
+              llvm::ThreadSanitizerPass()));
+        });
+  } break;
   }
 
   if (options.optimizerOptLevel != llvm::OptimizationLevel::O0 ||
@@ -107,7 +108,8 @@
     modulePassManager.run(*module, moduleAnalysisManager);
   }
 
-  if (llvm::verifyModule(*module)) return failure();
+  if (llvm::verifyModule(*module))
+    return failure();
 
   return success();
 }
@@ -135,7 +137,7 @@
   return success();
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMIRPasses.h b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMIRPasses.h
index 2fda41a..555cd92 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMIRPasses.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMIRPasses.h

@@ -20,8 +20,8 @@
 namespace HAL {
 
 // Creates target machine form target options.
-std::unique_ptr<llvm::TargetMachine> createTargetMachine(
-    const LLVMTarget &target, const LLVMTargetOptions &options);
+std::unique_ptr<llvm::TargetMachine>
+createTargetMachine(const LLVMTarget &target, const LLVMTargetOptions &options);
 
 // Creates and runs LLVMIR optimization passes defined in LLVMTargetOptions.
 LogicalResult runLLVMIRPasses(const LLVMTargetOptions &options,
@@ -34,9 +34,9 @@
                                    llvm::CodeGenFileType fileType,
                                    std::string *objData);
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_TARGET_LLVMCPU_LLVMIRPASSES_H_
+#endif // IREE_COMPILER_DIALECT_HAL_TARGET_LLVMCPU_LLVMIRPASSES_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMTargetOptions.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMTargetOptions.cpp
index 5719be7..f21622c 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMTargetOptions.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMTargetOptions.cpp

@@ -243,7 +243,7 @@
   return targetOptions;
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMTargetOptions.h b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMTargetOptions.h
index 9bf986f..2ccd3be 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMTargetOptions.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMTargetOptions.h

@@ -85,9 +85,9 @@
 // Returns LLVMTargetOptions struct intialized with the iree-llvmcpu-* flags.
 LLVMTargetOptions getLLVMTargetOptionsFromFlags();
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_TARGET_LLVMCPU_LLVMTARGETOPTIONS_H_
+#endif // IREE_COMPILER_DIALECT_HAL_TARGET_LLVMCPU_LLVMTARGETOPTIONS_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LibraryBuilder.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LibraryBuilder.cpp
index 8c01d50..35863d4 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LibraryBuilder.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LibraryBuilder.cpp

@@ -97,8 +97,8 @@
 // i32 (%struct.iree_hal_executable_environment_v0_t*,
 //      %struct.iree_hal_executable_dispatch_state_v0_t*,
 //      i8*)
-static llvm::FunctionType *makeDispatchFunctionType(
-    llvm::LLVMContext &context) {
+static llvm::FunctionType *
+makeDispatchFunctionType(llvm::LLVMContext &context) {
   auto *environmentType = makeEnvironmentType(context);
   auto *dispatchStateType = makeDispatchStateType(context);
   auto *workgroupStateType = makeWorkgroupStateType(context);
@@ -324,8 +324,8 @@
   return func;
 }
 
-llvm::Constant *LibraryBuilder::buildLibraryV0ImportTable(
-    std::string libraryName) {
+llvm::Constant *
+LibraryBuilder::buildLibraryV0ImportTable(std::string libraryName) {
   auto &context = module->getContext();
   auto *importTableType = makeImportTableType(context);
   auto *i8Type = llvm::IntegerType::getInt8Ty(context);
@@ -363,8 +363,8 @@
                        });
 }
 
-llvm::Constant *LibraryBuilder::buildLibraryV0ExportTable(
-    std::string libraryName) {
+llvm::Constant *
+LibraryBuilder::buildLibraryV0ExportTable(std::string libraryName) {
   auto &context = module->getContext();
   auto *exportTableType = makeExportTableType(context);
   auto *dispatchFunctionType = makeDispatchFunctionType(context);
@@ -511,8 +511,8 @@
                        });
 }
 
-llvm::Constant *LibraryBuilder::buildLibraryV0ConstantTable(
-    std::string libraryName) {
+llvm::Constant *
+LibraryBuilder::buildLibraryV0ConstantTable(std::string libraryName) {
   auto &context = module->getContext();
   auto *constantTableType = makeConstantTableType(context);
   auto *i32Type = llvm::IntegerType::getInt32Ty(context);
@@ -574,7 +574,7 @@
   return library;
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LibraryBuilder.h b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LibraryBuilder.h
index c3e40da..ef32fc8 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LibraryBuilder.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LibraryBuilder.h

@@ -33,7 +33,7 @@
 //  auto *queryFunc = builder.build("_query_library_foo");
 //  // call queryFunc, export it, etc
 class LibraryBuilder {
- public:
+public:
   // Builder mode setting.
   enum class Mode : uint32_t {
     NONE = 0u,
@@ -48,7 +48,7 @@
     // NOTE: until we hit v1 the versioning scheme here is not set in stone.
     // We may want to make this major release number, date codes (0x20220307),
     // or some semantic versioning we track in whatever spec we end up having.
-    V_0_3 = 0x0000'0003u,  // v0.3 - ~2022-08-08
+    V_0_3 = 0x0000'0003u, // v0.3 - ~2022-08-08
 
     // Pinned to the latest version.
     // Requires that the runtime be compiled with the same version.
@@ -130,7 +130,7 @@
   // unit, etc).
   llvm::Function *build(StringRef queryFuncName);
 
- private:
+private:
   // Builds and returns an iree_hal_executable_library_v0_t global constant.
   llvm::Constant *buildLibraryV0(std::string libraryName);
   llvm::Constant *buildLibraryV0ImportTable(std::string libraryName);
@@ -162,9 +162,9 @@
   size_t constantCount = 0;
 };
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_TARGET_LLVMCPU_LIBRARYBUILDER_H_
+#endif // IREE_COMPILER_DIALECT_HAL_TARGET_LLVMCPU_LIBRARYBUILDER_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LinkerTool.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LinkerTool.cpp
index 230a542..510fcef 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LinkerTool.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LinkerTool.cpp

@@ -59,7 +59,8 @@
 }
 
 void Artifact::keep() const {
-  if (outputFile) outputFile->keep();
+  if (outputFile)
+    outputFile->keep();
 }
 
 std::optional<std::vector<int8_t>> Artifact::read() const {
@@ -126,19 +127,20 @@
     commandLine = ("set " + env + " && " + commandLine).str();
 #else
     commandLine = (env + " " + commandLine).str();
-#endif  // _WIN32
+#endif // _WIN32
   } else {
     commandLine = escapeCommandLineComponent(commandLine);
   }
   int exitCode = system(commandLine.c_str());
-  if (exitCode == 0) return success();
+  if (exitCode == 0)
+    return success();
   llvm::errs() << "Linking failed; escaped command line returned exit code "
                << exitCode << ":\n\n"
                << commandLine << "\n\n";
   return failure();
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LinkerTool.h b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LinkerTool.h
index 02afa4e..0b815fa 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LinkerTool.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/LinkerTool.h

@@ -78,11 +78,12 @@
 
 // Base type for linker tools that can turn object files into shared objects.
 class LinkerTool {
- public:
+public:
   // Gets an instance of a linker tool for the given target options. This may
   // be a completely different toolchain than that of the host.
-  static std::unique_ptr<LinkerTool> getForTarget(
-      const llvm::Triple &targetTriple, LLVMTargetOptions &targetOptions);
+  static std::unique_ptr<LinkerTool>
+  getForTarget(const llvm::Triple &targetTriple,
+               LLVMTargetOptions &targetOptions);
 
   explicit LinkerTool(llvm::Triple targetTriple,
                       LLVMTargetOptions targetOptions)
@@ -97,18 +98,19 @@
 
   // Configures a module prior to compilation with any additional
   // functions/exports it may need, such as shared object initializer functions.
-  virtual LogicalResult configureModule(
-      llvm::Module *llvmModule, ArrayRef<llvm::Function *> exportedFuncs) {
+  virtual LogicalResult
+  configureModule(llvm::Module *llvmModule,
+                  ArrayRef<llvm::Function *> exportedFuncs) {
     return success();
   }
 
   // Links the given object files into a dynamically loadable library.
   // The resulting library (and other associated artifacts) will be returned on
   // success.
-  virtual std::optional<Artifacts> linkDynamicLibrary(
-      StringRef libraryName, ArrayRef<Artifact> objectFiles) = 0;
+  virtual std::optional<Artifacts>
+  linkDynamicLibrary(StringRef libraryName, ArrayRef<Artifact> objectFiles) = 0;
 
- protected:
+protected:
   // Runs the given command line on the shell, logging failures.
   LogicalResult runLinkCommand(std::string commandLine, StringRef env = "");
 
@@ -116,9 +118,9 @@
   LLVMTargetOptions targetOptions;
 };
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_TARGET_LLVMCPU_LINKERTOOL_H_
+#endif // IREE_COMPILER_DIALECT_HAL_TARGET_LLVMCPU_LINKERTOOL_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/StaticLibraryGenerator.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/StaticLibraryGenerator.cpp
index bc6bd70..a85d41c 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/StaticLibraryGenerator.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/StaticLibraryGenerator.cpp

@@ -72,9 +72,10 @@
      << "_\n";
 }
 
-static bool generateExecutableLibraryHeader(
-    const std::string &library_name, const std::string &query_function_name,
-    const std::string &header_file_path) {
+static bool
+generateExecutableLibraryHeader(const std::string &library_name,
+                                const std::string &query_function_name,
+                                const std::string &header_file_path) {
   std::error_code ec;
   llvm::raw_fd_ostream os(header_file_path, ec);
 
@@ -106,7 +107,7 @@
                                          header_file_path.c_str());
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/StaticLibraryGenerator.h b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/StaticLibraryGenerator.h
index accda19..8c027bf 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/StaticLibraryGenerator.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/StaticLibraryGenerator.h

@@ -24,9 +24,9 @@
                          const std::string &library_output_path,
                          const std::string &temp_object_path);
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_TARGET_LLVMCPU_STATICLIBRARYGENERATOR_H_
+#endif // IREE_COMPILER_DIALECT_HAL_TARGET_LLVMCPU_STATICLIBRARYGENERATOR_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/internal/AndroidLinkerTool.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/internal/AndroidLinkerTool.cpp
index 328a0dc..44cd142 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/internal/AndroidLinkerTool.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/internal/AndroidLinkerTool.cpp

@@ -47,19 +47,19 @@
 // Returns the canonical target name for the Android NDK prebuilt versions.
 static const char *getNDKTargetPlatform(const Triple &targetTriple) {
   switch (targetTriple.getArch()) {
-    case Triple::arm:
-      return "armv7a";
-    case Triple::aarch64:
-      return "aarch64";
-    case Triple::x86:
-      return "i686";
-    case Triple::x86_64:
-      return "x86_64";
-    default:
-      llvm::errs()
-          << "No (known) Android NDK prebuilt name for this target platform ('"
-          << targetTriple.str() << "')";
-      return "";
+  case Triple::arm:
+    return "armv7a";
+  case Triple::aarch64:
+    return "aarch64";
+  case Triple::x86:
+    return "i686";
+  case Triple::x86_64:
+    return "x86_64";
+  default:
+    llvm::errs()
+        << "No (known) Android NDK prebuilt name for this target platform ('"
+        << targetTriple.str() << "')";
+    return "";
   }
 }
 
@@ -101,12 +101,13 @@
 //   $ android-ndk-r23/toolchains/llvm/prebuilt/linux-x86_64/bin/ld --version
 //   LLD 12.0.5 (/buildbot/src/android/llvm-toolchain/out/llvm-project/lld ...
 class AndroidLinkerTool : public LinkerTool {
- public:
+public:
   using LinkerTool::LinkerTool;
 
   std::string getSystemToolPath() const override {
     auto toolPath = LinkerTool::getSystemToolPath();
-    if (!toolPath.empty()) return toolPath;
+    if (!toolPath.empty())
+      return toolPath;
 
     // ANDROID_NDK must be set for us to infer the tool path.
     char *androidNDKPath = std::getenv("ANDROID_NDK");
@@ -117,7 +118,7 @@
 
     // Extract the Android version from the `android30` like triple piece.
     llvm::VersionTuple androidEnv = targetTriple.getEnvironmentVersion();
-    unsigned androidVersion = androidEnv.getMajor();  // like '30'
+    unsigned androidVersion = androidEnv.getMajor(); // like '30'
 
     // Select prebuilt toolchain based on both host and target
     // architecture/platform:
@@ -133,8 +134,9 @@
         .str();
   }
 
-  std::optional<Artifacts> linkDynamicLibrary(
-      StringRef libraryName, ArrayRef<Artifact> objectFiles) override {
+  std::optional<Artifacts>
+  linkDynamicLibrary(StringRef libraryName,
+                     ArrayRef<Artifact> objectFiles) override {
     Artifacts artifacts;
 
     // Create the shared object name; if we only have a single input object we
@@ -156,7 +158,7 @@
         // It matters that this flag isn't prefixed with --for-linker=. Doing so
         // results in a dlopen error: 'cannot locate symbol "main" referenced by
         // "iree_dylib_foo.so"'
-        "-nostdlib",  // -nodefaultlibs + -nostartfiles
+        "-nostdlib", // -nodefaultlibs + -nostartfiles
 
         "-o " + artifacts.libraryFile.path,
     };
@@ -216,19 +218,21 @@
     flagsToPrefixForLinker.clear();
 
     auto commandLine = llvm::join(flags, " ");
-    if (failed(runLinkCommand(commandLine))) return std::nullopt;
+    if (failed(runLinkCommand(commandLine)))
+      return std::nullopt;
     return artifacts;
   }
 };
 
-std::unique_ptr<LinkerTool> createAndroidLinkerTool(
-    const llvm::Triple &targetTriple, LLVMTargetOptions &targetOptions) {
+std::unique_ptr<LinkerTool>
+createAndroidLinkerTool(const llvm::Triple &targetTriple,
+                        LLVMTargetOptions &targetOptions) {
   assert(targetTriple.isAndroid() &&
          "only use the AndroidLinkerTool for Android targets");
   return std::make_unique<AndroidLinkerTool>(targetTriple, targetOptions);
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/internal/EmbeddedLinkerTool.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/internal/EmbeddedLinkerTool.cpp
index 42a6dba..82f47f7 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/internal/EmbeddedLinkerTool.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/internal/EmbeddedLinkerTool.cpp

@@ -38,7 +38,7 @@
 // it to tools or use it ourselves to generate backtraces but since all release
 // usage should be stripped nothing relies upon it.
 class EmbeddedLinkerTool : public LinkerTool {
- public:
+public:
   using LinkerTool::LinkerTool;
 
   std::string getEmbeddedToolPath() const {
@@ -52,13 +52,15 @@
     // Fall back to check for setting the linker explicitly via environment
     // variables.
     char *envVarPath = std::getenv("IREE_LLVM_EMBEDDED_LINKER_PATH");
-    if (envVarPath && envVarPath[0] != '\0') return std::string(envVarPath);
+    if (envVarPath && envVarPath[0] != '\0')
+      return std::string(envVarPath);
 
     // No explicit linker specified, search the install/build dir or env.
     const SmallVector<std::string> &toolNames{"iree-lld", "lld", "ld.lld",
                                               "lld-link"};
     std::string toolPath = findTool(toolNames);
-    if (!toolPath.empty()) return toolPath;
+    if (!toolPath.empty())
+      return toolPath;
 
     llvm::errs()
         << "error: required embedded linker tool (typically `lld`) not found "
@@ -71,9 +73,9 @@
     return "";
   }
 
-  LogicalResult configureModule(
-      llvm::Module *llvmModule,
-      ArrayRef<llvm::Function *> exportedFuncs) override {
+  LogicalResult
+  configureModule(llvm::Module *llvmModule,
+                  ArrayRef<llvm::Function *> exportedFuncs) override {
     for (auto &llvmFunc : *llvmModule) {
       // -fno-plt - prevent PLT on calls to imports.
       llvmFunc.addFnAttr("nonlazybind");
@@ -103,8 +105,9 @@
     return success();
   }
 
-  std::optional<Artifacts> linkDynamicLibrary(
-      StringRef libraryName, ArrayRef<Artifact> objectFiles) override {
+  std::optional<Artifacts>
+  linkDynamicLibrary(StringRef libraryName,
+                     ArrayRef<Artifact> objectFiles) override {
     Artifacts artifacts;
 
     // Create the shared object name; if we only have a single input object we
@@ -118,7 +121,8 @@
     artifacts.libraryFile.close();
 
     std::string embeddedToolPath = getEmbeddedToolPath();
-    if (embeddedToolPath.empty()) return std::nullopt;
+    if (embeddedToolPath.empty())
+      return std::nullopt;
 
     SmallVector<std::string, 8> flags = {
         embeddedToolPath,
@@ -137,7 +141,7 @@
 
     // Avoids including any libc/startup files that initialize the CRT as
     // we don't use any of that. Our shared libraries must be freestanding.
-    flags.push_back("-nostdlib");  // -nodefaultlibs + -nostartfiles
+    flags.push_back("-nostdlib"); // -nodefaultlibs + -nostartfiles
 
     // Statically link all dependencies so we don't have any runtime deps.
     // We cannot have any imports in the module we produce.
@@ -211,12 +215,13 @@
   }
 };
 
-std::unique_ptr<LinkerTool> createEmbeddedLinkerTool(
-    const llvm::Triple &targetTriple, LLVMTargetOptions &targetOptions) {
+std::unique_ptr<LinkerTool>
+createEmbeddedLinkerTool(const llvm::Triple &targetTriple,
+                         LLVMTargetOptions &targetOptions) {
   return std::make_unique<EmbeddedLinkerTool>(targetTriple, targetOptions);
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/internal/LinkerTools.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/internal/LinkerTools.cpp
index 9b25cf7..47a35fd 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/internal/LinkerTools.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/internal/LinkerTools.cpp

@@ -14,20 +14,26 @@
 // TODO(benvanik): add other platforms:
 // createMacLinkerTool using ld64.lld
 
-std::unique_ptr<LinkerTool> createAndroidLinkerTool(
-    const llvm::Triple &targetTriple, LLVMTargetOptions &targetOptions);
-std::unique_ptr<LinkerTool> createEmbeddedLinkerTool(
-    const llvm::Triple &targetTriple, LLVMTargetOptions &targetOptions);
-std::unique_ptr<LinkerTool> createUnixLinkerTool(
-    const llvm::Triple &targetTriple, LLVMTargetOptions &targetOptions);
-std::unique_ptr<LinkerTool> createWasmLinkerTool(
-    const llvm::Triple &targetTriple, LLVMTargetOptions &targetOptions);
-std::unique_ptr<LinkerTool> createWindowsLinkerTool(
-    const llvm::Triple &targetTriple, LLVMTargetOptions &targetOptions);
+std::unique_ptr<LinkerTool>
+createAndroidLinkerTool(const llvm::Triple &targetTriple,
+                        LLVMTargetOptions &targetOptions);
+std::unique_ptr<LinkerTool>
+createEmbeddedLinkerTool(const llvm::Triple &targetTriple,
+                         LLVMTargetOptions &targetOptions);
+std::unique_ptr<LinkerTool>
+createUnixLinkerTool(const llvm::Triple &targetTriple,
+                     LLVMTargetOptions &targetOptions);
+std::unique_ptr<LinkerTool>
+createWasmLinkerTool(const llvm::Triple &targetTriple,
+                     LLVMTargetOptions &targetOptions);
+std::unique_ptr<LinkerTool>
+createWindowsLinkerTool(const llvm::Triple &targetTriple,
+                        LLVMTargetOptions &targetOptions);
 
 // static
-std::unique_ptr<LinkerTool> LinkerTool::getForTarget(
-    const llvm::Triple &targetTriple, LLVMTargetOptions &targetOptions) {
+std::unique_ptr<LinkerTool>
+LinkerTool::getForTarget(const llvm::Triple &targetTriple,
+                         LLVMTargetOptions &targetOptions) {
   if (targetOptions.linkEmbedded) {
     return createEmbeddedLinkerTool(targetTriple, targetOptions);
   } else if (targetTriple.isAndroid()) {
@@ -41,7 +47,7 @@
   return createUnixLinkerTool(targetTriple, targetOptions);
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/internal/UnixLinkerTool.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/internal/UnixLinkerTool.cpp
index eceedcb..47852d4 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/internal/UnixLinkerTool.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/internal/UnixLinkerTool.cpp

@@ -20,13 +20,14 @@
 
 // Unix linker (ld-like); for ELF files.
 class UnixLinkerTool : public LinkerTool {
- public:
+public:
   using LinkerTool::LinkerTool;
 
   std::string getSystemToolPath() const override {
     // First check for setting the linker explicitly.
     auto toolPath = LinkerTool::getSystemToolPath();
-    if (!toolPath.empty()) return toolPath;
+    if (!toolPath.empty())
+      return toolPath;
 
     // No explicit linker specified, search the environment for common tools.
     // We want LLD:
@@ -54,14 +55,16 @@
       // of these, at least given current behavior.
       toolPath = findToolInEnvironment({"ld.lld", "ld"});
     }
-    if (!toolPath.empty()) return toolPath;
+    if (!toolPath.empty())
+      return toolPath;
 
     llvm::errs() << "No Unix linker tool found in environment.\n";
     return "";
   }
 
-  std::optional<Artifacts> linkDynamicLibrary(
-      StringRef libraryName, ArrayRef<Artifact> objectFiles) override {
+  std::optional<Artifacts>
+  linkDynamicLibrary(StringRef libraryName,
+                     ArrayRef<Artifact> objectFiles) override {
     Artifacts artifacts;
 
     // Create the shared object name; if we only have a single input object we
@@ -93,7 +96,7 @@
     } else {
       // Avoids including any libc/startup files that initialize the CRT as
       // we don't use any of that. Our shared libraries must be freestanding.
-      flags.push_back("-nostdlib");  // -nodefaultlibs + -nostartfiles
+      flags.push_back("-nostdlib"); // -nodefaultlibs + -nostartfiles
 
       // Statically link all dependencies so we don't have any runtime deps.
       // We cannot have any imports in the module we produce.
@@ -128,22 +131,24 @@
     }
 
     auto commandLine = llvm::join(flags, " ");
-    if (failed(runLinkCommand(commandLine))) return std::nullopt;
+    if (failed(runLinkCommand(commandLine)))
+      return std::nullopt;
     return artifacts;
   }
 
- private:
+private:
   bool targetIsApple() const {
     return targetTriple.isOSDarwin() || targetTriple.isiOS();
   }
 };
 
-std::unique_ptr<LinkerTool> createUnixLinkerTool(
-    const llvm::Triple &targetTriple, LLVMTargetOptions &targetOptions) {
+std::unique_ptr<LinkerTool>
+createUnixLinkerTool(const llvm::Triple &targetTriple,
+                     LLVMTargetOptions &targetOptions) {
   return std::make_unique<UnixLinkerTool>(targetTriple, targetOptions);
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/internal/WasmLinkerTool.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/internal/WasmLinkerTool.cpp
index 9e07944..b7b4704 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/internal/WasmLinkerTool.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/internal/WasmLinkerTool.cpp

@@ -36,7 +36,7 @@
 // equivalent). For SIMD support, also set
 // `-iree-llvmcpu-target-cpu-features=+simd128`.
 class WasmLinkerTool : public LinkerTool {
- public:
+public:
   using LinkerTool::LinkerTool;
 
   std::string getWasmToolPath() const {
@@ -56,15 +56,16 @@
     // or install directories) for common tools.
     std::string toolPath = findToolFromExecutableDir(
         {"wasm-ld", "iree-lld", "lld", "ld.lld", "lld-link"});
-    if (!toolPath.empty()) return toolPath;
+    if (!toolPath.empty())
+      return toolPath;
 
     llvm::errs() << "No Wasm linker tool specified or discovered\n";
     return "";
   }
 
-  LogicalResult configureModule(
-      llvm::Module *llvmModule,
-      ArrayRef<llvm::Function *> exportedFuncs) override {
+  LogicalResult
+  configureModule(llvm::Module *llvmModule,
+                  ArrayRef<llvm::Function *> exportedFuncs) override {
     // https://lld.llvm.org/WebAssembly.html#exports
     // Note: once we can set --shared this shouldn't be needed, since we set
     // default visibility on exported functions.
@@ -75,8 +76,9 @@
     return success();
   }
 
-  std::optional<Artifacts> linkDynamicLibrary(
-      StringRef libraryName, ArrayRef<Artifact> objectFiles) override {
+  std::optional<Artifacts>
+  linkDynamicLibrary(StringRef libraryName,
+                     ArrayRef<Artifact> objectFiles) override {
     Artifacts artifacts;
 
     // Create the wasm binary file name; if we only have a single input object
@@ -131,17 +133,19 @@
     }
 
     auto commandLine = llvm::join(flags, " ");
-    if (failed(runLinkCommand(commandLine))) return std::nullopt;
+    if (failed(runLinkCommand(commandLine)))
+      return std::nullopt;
     return artifacts;
   }
 };
 
-std::unique_ptr<LinkerTool> createWasmLinkerTool(
-    const llvm::Triple &targetTriple, LLVMTargetOptions &targetOptions) {
+std::unique_ptr<LinkerTool>
+createWasmLinkerTool(const llvm::Triple &targetTriple,
+                     LLVMTargetOptions &targetOptions) {
   return std::make_unique<WasmLinkerTool>(targetTriple, targetOptions);
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/internal/WindowsLinkerTool.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/internal/WindowsLinkerTool.cpp
index e6fc5c9..c8c3181 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/internal/WindowsLinkerTool.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMCPU/internal/WindowsLinkerTool.cpp

@@ -19,26 +19,28 @@
 
 // Windows linker (MSVC link.exe-like); for DLL files.
 class WindowsLinkerTool : public LinkerTool {
- public:
+public:
   using LinkerTool::LinkerTool;
 
   std::string getSystemToolPath() const override {
     // First check for setting the linker explicitly.
     auto toolPath = LinkerTool::getSystemToolPath();
-    if (!toolPath.empty()) return toolPath;
+    if (!toolPath.empty())
+      return toolPath;
 
     // No explicit linker specified, search the executable directory (i.e. our
     // own build or install directories) for common tools.
     toolPath = findToolFromExecutableDir({"lld-link"});
-    if (!toolPath.empty()) return toolPath;
+    if (!toolPath.empty())
+      return toolPath;
 
     llvm::errs() << "No Windows linker tool specified or discovered\n";
     return "";
   }
 
-  LogicalResult configureModule(
-      llvm::Module *llvmModule,
-      ArrayRef<llvm::Function *> exportedFuncs) override {
+  LogicalResult
+  configureModule(llvm::Module *llvmModule,
+                  ArrayRef<llvm::Function *> exportedFuncs) override {
     auto &ctx = llvmModule->getContext();
 
     // Create a _DllMainCRTStartup replacement that does not initialize the CRT.
@@ -90,8 +92,9 @@
     return success();
   }
 
-  std::optional<Artifacts> linkDynamicLibrary(
-      StringRef libraryName, ArrayRef<Artifact> objectFiles) override {
+  std::optional<Artifacts>
+  linkDynamicLibrary(StringRef libraryName,
+                     ArrayRef<Artifact> objectFiles) override {
     Artifacts artifacts;
 
     // Create the shared object name; if we only have a single input object we
@@ -232,9 +235,9 @@
     // matrix (dynamic/static and debug/release).
     int libIndex = 0;
     if (targetOptions.optimizerOptLevel.getSpeedupLevel() == 0) {
-      libIndex += 0;  // debug
+      libIndex += 0; // debug
     } else {
-      libIndex += 2;  // release
+      libIndex += 2; // release
     }
     libIndex += targetOptions.linkStatic ? 1 : 0;
 
@@ -272,7 +275,8 @@
     }
 
     auto commandLine = llvm::join(flags, " ");
-    if (failed(runLinkCommand(commandLine))) return std::nullopt;
+    if (failed(runLinkCommand(commandLine)))
+      return std::nullopt;
 
     // PDB file gets generated wtih the same path + .pdb.
     artifacts.debugFile =
@@ -289,12 +293,13 @@
   }
 };
 
-std::unique_ptr<LinkerTool> createWindowsLinkerTool(
-    const llvm::Triple &targetTriple, LLVMTargetOptions &targetOptions) {
+std::unique_ptr<LinkerTool>
+createWindowsLinkerTool(const llvm::Triple &targetTriple,
+                        LLVMTargetOptions &targetOptions) {
   return std::make_unique<WindowsLinkerTool>(targetTriple, targetOptions);
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMLinkerUtils.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMLinkerUtils.cpp
index 65fc19d..05868ee 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMLinkerUtils.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMLinkerUtils.cpp

@@ -74,8 +74,9 @@
   return success();
 }
 
-llvm::Expected<std::unique_ptr<llvm::Module>> loadBitcodeObject(
-    IREE::HAL::ExecutableObjectAttr objectAttr, llvm::LLVMContext &context) {
+llvm::Expected<std::unique_ptr<llvm::Module>>
+loadBitcodeObject(IREE::HAL::ExecutableObjectAttr objectAttr,
+                  llvm::LLVMContext &context) {
   // Load the object data into memory.
   auto objectData = objectAttr.loadData();
   if (!objectData) {
@@ -87,16 +88,17 @@
   llvm::MemoryBufferRef bitcodeBufferRef(objectData.value(),
                                          objectAttr.getPath());
   auto bitcodeModuleValue = llvm::parseBitcodeFile(bitcodeBufferRef, context);
-  if (!bitcodeModuleValue) return bitcodeModuleValue;
+  if (!bitcodeModuleValue)
+    return bitcodeModuleValue;
   // NOTE: at this point the bitcode may not have the expected data layout!
   return std::move(bitcodeModuleValue.get());
 }
 
-LogicalResult linkBitcodeObjects(
-    Location loc, llvm::Linker &linker, unsigned linkerFlags,
-    llvm::TargetMachine &targetMachine, ArrayAttr objectAttrs,
-    llvm::LLVMContext &context,
-    ModuleSpecializationCallback specializationCallback) {
+LogicalResult
+linkBitcodeObjects(Location loc, llvm::Linker &linker, unsigned linkerFlags,
+                   llvm::TargetMachine &targetMachine, ArrayAttr objectAttrs,
+                   llvm::LLVMContext &context,
+                   ModuleSpecializationCallback specializationCallback) {
   // Gather only the bitcode objects.
   SmallVector<IREE::HAL::ExecutableObjectAttr> bitcodeObjectAttrs;
   IREE::HAL::ExecutableObjectAttr::filterObjects(objectAttrs, {".bc"},
@@ -139,7 +141,7 @@
   return success();
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMLinkerUtils.h b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMLinkerUtils.h
index 872fd46..6020997 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMLinkerUtils.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVMLinkerUtils.h

@@ -40,24 +40,25 @@
 
 // Loads a bitcode file specified by the |objectAttr| and specializes it for
 // |targetMachine|.
-llvm::Expected<std::unique_ptr<llvm::Module>> loadBitcodeObject(
-    IREE::HAL::ExecutableObjectAttr objectAttr, llvm::LLVMContext &context);
+llvm::Expected<std::unique_ptr<llvm::Module>>
+loadBitcodeObject(IREE::HAL::ExecutableObjectAttr objectAttr,
+                  llvm::LLVMContext &context);
 
 // Links all .bc objects in |objectAttrs| into |linker|.
-LogicalResult linkBitcodeObjects(
-    Location loc, llvm::Linker &linker, unsigned linkerFlags,
-    llvm::TargetMachine &targetMachine, ArrayAttr objectAttrs,
-    llvm::LLVMContext &context,
-    ModuleSpecializationCallback specializationCallback = {});
+LogicalResult
+linkBitcodeObjects(Location loc, llvm::Linker &linker, unsigned linkerFlags,
+                   llvm::TargetMachine &targetMachine, ArrayAttr objectAttrs,
+                   llvm::LLVMContext &context,
+                   ModuleSpecializationCallback specializationCallback = {});
 
 LogicalResult linkCmdlineBitcodeFile(Location loc, llvm::Linker &linker,
                                      unsigned linkerFlags,
                                      llvm::TargetMachine &targetMachine,
                                      llvm::LLVMContext &context);
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  //  IREE_COMPILER_DIALECT_HAL_TARGET_LLVMLINKERUTILS_H_
+#endif //  IREE_COMPILER_DIALECT_HAL_TARGET_LLVMLINKERUTILS_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MSLToMetalLib.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MSLToMetalLib.cpp
index 59fabac..d3ef018 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MSLToMetalLib.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MSLToMetalLib.cpp

@@ -28,15 +28,15 @@
                                           StringRef libFile) {
   const char *sdk = "";
   switch (platform) {
-    case MetalTargetPlatform::macOS:
-      sdk = "macosx";
-      break;
-    case MetalTargetPlatform::iOS:
-      sdk = "iphoneos";
-      break;
-    case MetalTargetPlatform::iOSSimulator:
-      sdk = "iphonesimulator";
-      break;
+  case MetalTargetPlatform::macOS:
+    sdk = "macosx";
+    break;
+  case MetalTargetPlatform::iOS:
+    sdk = "iphoneos";
+    break;
+  case MetalTargetPlatform::iOSSimulator:
+    sdk = "iphonesimulator";
+    break;
   }
 
   // Metal shader offline compilation involves two steps:
@@ -57,15 +57,16 @@
 static LogicalResult runSystemCommand(StringRef command) {
   LLVM_DEBUG(llvm::dbgs() << "Running system command: '" << command << "'\n");
   int exitCode = system(command.data());
-  if (exitCode == 0) return success();
+  if (exitCode == 0)
+    return success();
   llvm::errs() << "Failed to run system command '" << command
                << "' with error code: " << exitCode << "\n";
   return failure();
 }
 
-std::unique_ptr<llvm::MemoryBuffer> compileMSLToMetalLib(
-    MetalTargetPlatform targetPlatform, StringRef mslCode,
-    StringRef entryPoint) {
+std::unique_ptr<llvm::MemoryBuffer>
+compileMSLToMetalLib(MetalTargetPlatform targetPlatform, StringRef mslCode,
+                     StringRef entryPoint) {
   SmallString<32> mslFile, airFile, libFile;
   int mslFd = 0;
   llvm::sys::fs::createTemporaryFile(entryPoint, "metal", mslFd, mslFile);
@@ -73,14 +74,15 @@
   llvm::FileRemover mslRemover(mslFile.c_str());
   llvm::FileRemover libRemover(libFile.c_str());
 
-  {  // Write input MSL code to the temporary file.
+  { // Write input MSL code to the temporary file.
     llvm::raw_fd_ostream inputStream(mslFd, /*shouldClose=*/true);
     inputStream << mslCode << "\n";
   }
 
   std::string command =
       getMetalCompileCommand(targetPlatform, mslFile, libFile);
-  if (failed(runSystemCommand(command))) return nullptr;
+  if (failed(runSystemCommand(command)))
+    return nullptr;
 
   auto fileOrErr =
       llvm::MemoryBuffer::getFileOrSTDIN(libFile, /*isText=*/false);
@@ -93,7 +95,7 @@
   return std::move(*fileOrErr);
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MSLToMetalLib.h b/compiler/src/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MSLToMetalLib.h
index bfa606a..1e2db55 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MSLToMetalLib.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MSLToMetalLib.h

@@ -19,13 +19,13 @@
 // Invokes system commands to compile the given |mslCode| into a Metal library
 // and returns the library binary code. |fileName| will be used as a hint for
 // creating intermediate files.
-std::unique_ptr<llvm::MemoryBuffer> compileMSLToMetalLib(
-    MetalTargetPlatform targetPlatform, llvm::StringRef mslCode,
-    llvm::StringRef fileName);
+std::unique_ptr<llvm::MemoryBuffer>
+compileMSLToMetalLib(MetalTargetPlatform targetPlatform,
+                     llvm::StringRef mslCode, llvm::StringRef fileName);
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_TARGET_METALSPIRV_MSLTOMETALLIB_H_
+#endif // IREE_COMPILER_DIALECT_HAL_TARGET_METALSPIRV_MSLTOMETALLIB_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.cpp
index 4696078..d0d4417 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.cpp

@@ -101,7 +101,7 @@
 }
 
 class MetalSPIRVTargetBackend : public TargetBackend {
- public:
+public:
   MetalSPIRVTargetBackend() = default;
 
   // NOTE: we could vary this based on the options such as 'metal-v2'.
@@ -112,8 +112,8 @@
                     IREE::Flow::FlowDialect, spirv::SPIRVDialect>();
   }
 
-  IREE::HAL::DeviceTargetAttr getDefaultDeviceTarget(
-      MLIRContext *context) const override {
+  IREE::HAL::DeviceTargetAttr
+  getDefaultDeviceTarget(MLIRContext *context) const override {
     Builder b(context);
     SmallVector<NamedAttribute> configItems;
 
@@ -130,7 +130,8 @@
     // For now we disable translation if the variant has external object files.
     // We could instead perform linking with those objects (if they're Metal
     // archives, etc).
-    if (variantOp.isExternal()) return;
+    if (variantOp.isExternal())
+      return;
 
     buildSPIRVCodegenPassPipeline(passManager, /*enableFastMath=*/false);
   }
@@ -234,9 +235,10 @@
                                                        threadgroupSizesRef);
 
     if (metalLibs.empty()) {
-      auto shaderSourcesRef = builder.createStringVec(llvm::map_range(
-          mslShaders,
-          [&](const MetalShader &shader) { return shader.source; }));
+      auto shaderSourcesRef = builder.createStringVec(
+          llvm::map_range(mslShaders, [&](const MetalShader &shader) {
+            return shader.source;
+          }));
       iree_hal_metal_ExecutableDef_shader_sources_add(builder,
                                                       shaderSourcesRef);
     } else {
@@ -263,7 +265,7 @@
     return success();
   }
 
- private:
+private:
   ArrayAttr getExecutableTargets(MLIRContext *context) const {
     SmallVector<Attribute> targetAttrs;
     // If we had multiple target environments we would generate one target attr
@@ -273,8 +275,9 @@
     return ArrayAttr::get(context, targetAttrs);
   }
 
-  IREE::HAL::ExecutableTargetAttr getExecutableTarget(
-      MLIRContext *context, spirv::TargetEnvAttr targetEnv) const {
+  IREE::HAL::ExecutableTargetAttr
+  getExecutableTarget(MLIRContext *context,
+                      spirv::TargetEnvAttr targetEnv) const {
     Builder b(context);
     SmallVector<NamedAttribute> configItems;
 
@@ -298,7 +301,7 @@
   static TargetBackendRegistration registration1("metal-spirv", backendFactory);
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.h b/compiler/src/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.h
index ae9e96b..849546e 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.h

@@ -17,9 +17,9 @@
 // Registers the Metal/SPIR-V backends.
 void registerMetalSPIRVTargetBackends();
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_TARGET_METALSPIRV_METALSPIRVTARGET_H_
+#endif // IREE_COMPILER_DIALECT_HAL_TARGET_METALSPIRV_METALSPIRVTARGET_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalTargetPlatform.h b/compiler/src/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalTargetPlatform.h
index 1b3c953..aadce61 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalTargetPlatform.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalTargetPlatform.h

@@ -17,9 +17,9 @@
 /// Metal target platforms.
 enum class MetalTargetPlatform { macOS, iOS, iOSSimulator };
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_TARGET_METALSPIRV_METALTARGETPLATFORM_H_
+#endif // IREE_COMPILER_DIALECT_HAL_TARGET_METALSPIRV_METALTARGETPLATFORM_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/MetalSPIRV/SPIRVToMSL.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/MetalSPIRV/SPIRVToMSL.cpp
index 91e5ca8..0bcc92d 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/MetalSPIRV/SPIRVToMSL.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/MetalSPIRV/SPIRVToMSL.cpp

@@ -25,16 +25,17 @@
 
 namespace {
 class SPIRVToMSLCompiler : public SPIRV_CROSS_NAMESPACE::CompilerMSL {
- public:
+public:
   using CompilerMSL::CompilerMSL;
 
-  MetalShader::ThreadGroupSize getWorkgroupSizeForEntryPoint(
-      StringRef entryName) {
-    const auto& entryPoint = get_entry_point(
+  MetalShader::ThreadGroupSize
+  getWorkgroupSizeForEntryPoint(StringRef entryName) {
+    const auto &entryPoint = get_entry_point(
         entryName.str(), spv::ExecutionModel::ExecutionModelGLCompute);
-    const auto& workgroupSize = entryPoint.workgroup_size;
+    const auto &workgroupSize = entryPoint.workgroup_size;
     // TODO(antiagainst): support specialization constant.
-    if (workgroupSize.constant != 0) return {0, 0, 0};
+    if (workgroupSize.constant != 0)
+      return {0, 0, 0};
     return {workgroupSize.x, workgroupSize.y, workgroupSize.z};
   }
 
@@ -45,7 +46,7 @@
 
     Descriptor(uint32_t s, uint32_t b) : set(s), binding(b) {}
 
-    friend bool operator<(const Descriptor& l, const Descriptor& r) {
+    friend bool operator<(const Descriptor &l, const Descriptor &r) {
       return std::tie(l.set, l.binding) < std::tie(r.set, r.binding);
     }
   };
@@ -53,36 +54,36 @@
   // Updates `descriptors` with resource set and binding number pairs in
   // increasing order, and `hasPushConstant` if with push constants.
   // Returns true if no unsupported cases are encountered.
-  bool getResources(SmallVectorImpl<Descriptor>* descriptors,
-                    bool* hasPushConstant) {
+  bool getResources(SmallVectorImpl<Descriptor> *descriptors,
+                    bool *hasPushConstant) {
     descriptors->clear();
     *hasPushConstant = false;
 
     // Iterate over all variables in the SPIR-V blob.
     bool hasUnknownCase = false;
     ir.for_each_typed_id<SPIRV_CROSS_NAMESPACE::SPIRVariable>(
-        [&](uint32_t id, SPIRV_CROSS_NAMESPACE::SPIRVariable& var) {
+        [&](uint32_t id, SPIRV_CROSS_NAMESPACE::SPIRVariable &var) {
           auto storage = var.storage;
           switch (storage) {
-              // Non-interface variables. We don't care.
-            case spv::StorageClassFunction:
-            case spv::StorageClassPrivate:
-            case spv::StorageClassWorkgroup:
-              // Builtin variables. We don't care either.
-            case spv::StorageClassInput:
-              return;
-            case spv::StorageClassPushConstant:
-              *hasPushConstant = true;
-              return;
-            case spv::StorageClassUniform:
-            case spv::StorageClassStorageBuffer: {
-              uint32_t setNo = get_decoration(id, spv::DecorationDescriptorSet);
-              uint32_t bindingNo = get_decoration(id, spv::DecorationBinding);
-              descriptors->emplace_back(setNo, bindingNo);
-              return;
-            }
-            default:
-              break;
+            // Non-interface variables. We don't care.
+          case spv::StorageClassFunction:
+          case spv::StorageClassPrivate:
+          case spv::StorageClassWorkgroup:
+            // Builtin variables. We don't care either.
+          case spv::StorageClassInput:
+            return;
+          case spv::StorageClassPushConstant:
+            *hasPushConstant = true;
+            return;
+          case spv::StorageClassUniform:
+          case spv::StorageClassStorageBuffer: {
+            uint32_t setNo = get_decoration(id, spv::DecorationDescriptorSet);
+            uint32_t bindingNo = get_decoration(id, spv::DecorationBinding);
+            descriptors->emplace_back(setNo, bindingNo);
+            return;
+          }
+          default:
+            break;
           }
           hasUnknownCase = true;
         });
@@ -96,13 +97,13 @@
     // family.
     SPIRVToMSLCompiler::Options spvCrossOptions;
     switch (platform) {
-      case IREE::HAL::MetalTargetPlatform::macOS:
-        spvCrossOptions.platform = SPIRVToMSLCompiler::Options::Platform::macOS;
-        break;
-      case IREE::HAL::MetalTargetPlatform::iOS:
-      case IREE::HAL::MetalTargetPlatform::iOSSimulator:
-        spvCrossOptions.platform = SPIRVToMSLCompiler::Options::Platform::iOS;
-        break;
+    case IREE::HAL::MetalTargetPlatform::macOS:
+      spvCrossOptions.platform = SPIRVToMSLCompiler::Options::Platform::macOS;
+      break;
+    case IREE::HAL::MetalTargetPlatform::iOS:
+    case IREE::HAL::MetalTargetPlatform::iOSSimulator:
+      spvCrossOptions.platform = SPIRVToMSLCompiler::Options::Platform::iOS;
+      break;
     }
     spvCrossOptions.msl_version =
         SPIRVToMSLCompiler::Options::make_msl_version(3, 0);
@@ -112,11 +113,12 @@
     return spvCrossOptions;
   }
 };
-}  // namespace
+} // namespace
 
-std::optional<std::pair<MetalShader, std::string>> crossCompileSPIRVToMSL(
-    IREE::HAL::MetalTargetPlatform targetPlatform,
-    llvm::ArrayRef<uint32_t> spvBinary, StringRef entryPoint) {
+std::optional<std::pair<MetalShader, std::string>>
+crossCompileSPIRVToMSL(IREE::HAL::MetalTargetPlatform targetPlatform,
+                       llvm::ArrayRef<uint32_t> spvBinary,
+                       StringRef entryPoint) {
   SPIRVToMSLCompiler spvCrossCompiler(spvBinary.data(), spvBinary.size());
 
   // All spirv-cross operations work on the current entry point. It should be
@@ -131,7 +133,7 @@
 
   // Explicitly set the argument buffer [[id(N)]] location for each SPIR-V
   // resource variable.
-  for (const auto& descriptor : descriptors) {
+  for (const auto &descriptor : descriptors) {
     SPIRV_CROSS_NAMESPACE::MSLResourceBinding binding = {};
     binding.stage = spv::ExecutionModelGLCompute;
     binding.desc_set = descriptor.set;
@@ -161,7 +163,7 @@
   // code, where we may run into the case that we are using reserved keyword for
   // the entry point name, e.g., `abs`. Under such circumstances, it will be
   // revised to avoid collision.
-  const auto& spirvEntryPoint = spvCrossCompiler.get_entry_point(
+  const auto &spirvEntryPoint = spvCrossCompiler.get_entry_point(
       entryPoint.str(), spv::ExecutionModel::ExecutionModelGLCompute);
   LLVM_DEBUG({
     llvm::dbgs() << "Original entry point name: '" << spirvEntryPoint.orig_name
@@ -180,5 +182,5 @@
                         spirvEntryPoint.name);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/MetalSPIRV/SPIRVToMSL.h b/compiler/src/iree/compiler/Dialect/HAL/Target/MetalSPIRV/SPIRVToMSL.h
index b882dc7..4da7761 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/MetalSPIRV/SPIRVToMSL.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/MetalSPIRV/SPIRVToMSL.h

@@ -31,11 +31,12 @@
 // Cross compiles SPIR-V into Metal Shading Language source code for the
 // compute shader with |entryPoint| and returns the MSL source and the new
 // entry point name. Returns std::nullopt on failure.
-std::optional<std::pair<MetalShader, std::string>> crossCompileSPIRVToMSL(
-    IREE::HAL::MetalTargetPlatform targetPlatform,
-    llvm::ArrayRef<uint32_t> spvBinary, StringRef entryPoint);
+std::optional<std::pair<MetalShader, std::string>>
+crossCompileSPIRVToMSL(IREE::HAL::MetalTargetPlatform targetPlatform,
+                       llvm::ArrayRef<uint32_t> spvBinary,
+                       StringRef entryPoint);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_TARGET_METALSPIRV_SPIRVTOMSL_H_
+#endif // IREE_COMPILER_DIALECT_HAL_TARGET_METALSPIRV_SPIRVTOMSL_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/ROCM/ROCMTarget.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/ROCM/ROCMTarget.cpp
index 5bbeb8a..d5187c7 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/ROCM/ROCMTarget.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/ROCM/ROCMTarget.cpp

@@ -27,18 +27,20 @@
 #include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h"
 #include "mlir/Target/LLVMIR/Export.h"
 
-static llvm::cl::opt<std::string> clROCMTargetChip(
-    "iree-rocm-target-chip", llvm::cl::desc("ROCm target Chip"),
-    llvm::cl::init("gfx908"));
+static llvm::cl::opt<std::string>
+    clROCMTargetChip("iree-rocm-target-chip",
+                     llvm::cl::desc("ROCm target Chip"),
+                     llvm::cl::init("gfx908"));
 
-static llvm::cl::opt<bool> clROCMLinkBC(
-    "iree-rocm-link-bc",
-    llvm::cl::desc("Whether to try Linking to AMD Bitcodes"),
-    llvm::cl::init(false));
+static llvm::cl::opt<bool>
+    clROCMLinkBC("iree-rocm-link-bc",
+                 llvm::cl::desc("Whether to try Linking to AMD Bitcodes"),
+                 llvm::cl::init(false));
 
-static llvm::cl::opt<std::string> clROCMBitcodeDir(
-    "iree-rocm-bc-dir", llvm::cl::desc("Directory of ROCM Bitcode"),
-    llvm::cl::init("/opt/rocm/amdgcn/bitcode"));
+static llvm::cl::opt<std::string>
+    clROCMBitcodeDir("iree-rocm-bc-dir",
+                     llvm::cl::desc("Directory of ROCM Bitcode"),
+                     llvm::cl::init("/opt/rocm/amdgcn/bitcode"));
 
 namespace mlir {
 namespace iree_compiler {
@@ -73,7 +75,7 @@
   return targetISA;
 }
 class ROCMTargetBackend final : public TargetBackend {
- public:
+public:
   std::string name() const override { return "rocm"; }
 
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -83,8 +85,8 @@
     registry.insert<IREE::Codegen::IREECodegenDialect>();
   }
 
-  IREE::HAL::DeviceTargetAttr getDefaultDeviceTarget(
-      MLIRContext *context) const override {
+  IREE::HAL::DeviceTargetAttr
+  getDefaultDeviceTarget(MLIRContext *context) const override {
     Builder b(context);
     SmallVector<NamedAttribute> configItems;
 
@@ -105,7 +107,8 @@
     // For now we disable translation if the variant has external object files.
     // We could instead perform linking with those objects (if they're bitcode
     // ala libdevice.bc, etc).
-    if (variantOp.isExternal()) return;
+    if (variantOp.isExternal())
+      return;
 
     buildLLVMGPUTransformPassPipeline(passManager, true);
   }
@@ -149,7 +152,8 @@
     for (auto func : innerModuleOp.getOps<LLVM::LLVMFuncOp>()) {
       int32_t flatWgSize = 1;
       auto *llvmFunc = llvmModule->getFunction(func.getName());
-      if (llvmFunc->isDeclaration()) continue;
+      if (llvmFunc->isDeclaration())
+        continue;
       std::array<int32_t, 3> workgroupSize;
       auto exportOp = exportOps[func.getName()];
       if (std::optional<ArrayAttr> workgroupSizeAttr =
@@ -248,7 +252,7 @@
     return success();
   }
 
- private:
+private:
   ArrayAttr getExecutableTargets(MLIRContext *context) const {
     SmallVector<Attribute> targetAttrs;
     // If we had multiple target environments we would generate one target attr
@@ -257,8 +261,8 @@
     return ArrayAttr::get(context, targetAttrs);
   }
 
-  IREE::HAL::ExecutableTargetAttr getExecutableTarget(
-      MLIRContext *context) const {
+  IREE::HAL::ExecutableTargetAttr
+  getExecutableTarget(MLIRContext *context) const {
     Builder b(context);
     SmallVector<NamedAttribute> configItems;
     // Add some configurations to the `hal.executable.target` attribute.
@@ -288,7 +292,7 @@
       });
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/ROCM/ROCMTarget.h b/compiler/src/iree/compiler/Dialect/HAL/Target/ROCM/ROCMTarget.h
index 7246592..7077ac0 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/ROCM/ROCMTarget.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/ROCM/ROCMTarget.h

@@ -25,9 +25,9 @@
 // Compiles ISAToHsaco Code
 std::string createHsaco(const std::string isa, StringRef name);
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_TARGET_ROCM_ROCMTARGET_H_
+#endif // IREE_COMPILER_DIALECT_HAL_TARGET_ROCM_ROCMTARGET_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/ROCM/ROCMTargetUtils.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/ROCM/ROCMTargetUtils.cpp
index d309fae..c3064c9 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/ROCM/ROCMTargetUtils.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/ROCM/ROCMTargetUtils.cpp

@@ -60,8 +60,9 @@
   return module;
 }
 
-LogicalResult linkWithBitcodeVector(
-    llvm::Module *module, const std::vector<std::string> &bitcode_path_vector) {
+LogicalResult
+linkWithBitcodeVector(llvm::Module *module,
+                      const std::vector<std::string> &bitcode_path_vector) {
   llvm::Linker linker(*module);
 
   for (auto &bitcode_path : bitcode_path_vector) {
@@ -210,7 +211,7 @@
 }
 //==============Create HSACO End=============//
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/TargetBackend.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/TargetBackend.cpp
index cc147d4..c3e366e 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/TargetBackend.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/TargetBackend.cpp

@@ -48,11 +48,14 @@
           "executable files (sources, benchmarks, intermediates, binaries) "
           "to."),
       llvm::cl::callback([&](const std::string &path) {
-        if (executableSourcesPath.empty()) executableSourcesPath = path;
-        if (executableBenchmarksPath.empty()) executableBenchmarksPath = path;
+        if (executableSourcesPath.empty())
+          executableSourcesPath = path;
+        if (executableBenchmarksPath.empty())
+          executableBenchmarksPath = path;
         if (executableIntermediatesPath.empty())
           executableIntermediatesPath = path;
-        if (executableBinariesPath.empty()) executableBinariesPath = path;
+        if (executableBinariesPath.empty())
+          executableBinariesPath = path;
       }),
       llvm::cl::cat(halTargetOptionsCategory));
 
@@ -97,7 +100,7 @@
   file->keep();
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/TargetBackend.h b/compiler/src/iree/compiler/Dialect/HAL/Target/TargetBackend.h
index 87a57e1..cfd71cb 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/TargetBackend.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/TargetBackend.h

@@ -118,7 +118,7 @@
 //      + hal.executable.binary attributes { ... }
 //          data blob...
 class TargetBackend {
- public:
+public:
   virtual ~TargetBackend() = default;
 
   // Returns a name for the backend used to differentiate between other targets.
@@ -135,8 +135,8 @@
   virtual void getDependentDialects(DialectRegistry &registry) const {}
 
   // Returns the default device this backend targets.
-  virtual IREE::HAL::DeviceTargetAttr getDefaultDeviceTarget(
-      MLIRContext *context) const = 0;
+  virtual IREE::HAL::DeviceTargetAttr
+  getDefaultDeviceTarget(MLIRContext *context) const = 0;
 
   // Inserts passes used to translate the `hal.executable.variant` op contents.
   // The pass manager will be nested on `hal.executable` such that the pipeline
@@ -169,8 +169,9 @@
   //       module { spirv.module { ... } }
   //     }
   //   }
-  virtual void buildTranslationPassPipeline(
-      IREE::HAL::ExecutableVariantOp variantOp, OpPassManager &passManager) = 0;
+  virtual void
+  buildTranslationPassPipeline(IREE::HAL::ExecutableVariantOp variantOp,
+                               OpPassManager &passManager) = 0;
 
   // Inserts passes used to link `hal.executable.variant` ops together.
   // The pass manager will be nested on the parent module of `hal.executable`
@@ -242,9 +243,10 @@
   //
   // If no serialization is provided then lowering the parent module into a
   // binary format (such as to the IREE VM) will fail.
-  virtual LogicalResult serializeExecutable(
-      const SerializationOptions &options,
-      IREE::HAL::ExecutableVariantOp variantOp, OpBuilder &executableBuilder) {
+  virtual LogicalResult
+  serializeExecutable(const SerializationOptions &options,
+                      IREE::HAL::ExecutableVariantOp variantOp,
+                      OpBuilder &executableBuilder) {
     assert(false && "unimplemented serializeExecutable");
     return failure();
   }
@@ -262,9 +264,9 @@
                            data.size() * sizeof(T)));
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_TARGET_TARGETBACKEND_H_
+#endif // IREE_COMPILER_DIALECT_HAL_TARGET_TARGETBACKEND_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/TargetRegistry.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/TargetRegistry.cpp
index c0d670a..1434b77 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/TargetRegistry.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/TargetRegistry.cpp

@@ -66,8 +66,8 @@
   }
 }
 
-std::vector<std::string> TargetBackendRegistry::getRegisteredTargetBackends()
-    const {
+std::vector<std::string>
+TargetBackendRegistry::getRegisteredTargetBackends() const {
   std::vector<std::string> result;
   for (auto &entry : registrations) {
     result.push_back(entry.getKey().str());
@@ -77,8 +77,8 @@
   return result;
 }
 
-std::shared_ptr<TargetBackend> TargetBackendRegistry::getTargetBackend(
-    StringRef targetName) const {
+std::shared_ptr<TargetBackend>
+TargetBackendRegistry::getTargetBackend(StringRef targetName) const {
   for (auto &entry : registrations) {
     if (entry.getKey() == targetName) {
       return entry.getValue()->acquire();
@@ -103,8 +103,8 @@
   return matches;
 }
 
-SmallVector<std::string> gatherExecutableTargetNames(
-    IREE::HAL::ExecutableOp executableOp) {
+SmallVector<std::string>
+gatherExecutableTargetNames(IREE::HAL::ExecutableOp executableOp) {
   SmallVector<std::string> targetNames;
   llvm::SmallDenseSet<StringRef> targets;
   executableOp.walk([&](IREE::HAL::ExecutableVariantOp variantOp) {
@@ -132,7 +132,7 @@
   return targetNames;
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/TargetRegistry.h b/compiler/src/iree/compiler/Dialect/HAL/Target/TargetRegistry.h
index 09a5e51..b764609 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/TargetRegistry.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/TargetRegistry.h

@@ -32,14 +32,14 @@
 //   vulkan-v1.1-low
 //   vulkan-v1.1-high
 class TargetBackendRegistration {
- public:
+public:
   // TODO: Remove the registerStaticGlobal mode once callers are migrated.
   TargetBackendRegistration(StringRef name, CreateTargetBackendFn fn,
                             bool registerStaticGlobal = true);
 
   std::shared_ptr<TargetBackend> acquire();
 
- private:
+private:
   CreateTargetBackendFn initFn;
   std::once_flag initFlag;
   std::shared_ptr<TargetBackend> cachedValue;
@@ -47,19 +47,19 @@
 
 // A registry of target
 class TargetBackendList {
- public:
+public:
   void add(llvm::StringRef name, CreateTargetBackendFn fn) {
     entries.push_back(std::make_pair(name, fn));
   }
 
- private:
+private:
   llvm::SmallVector<std::pair<llvm::StringRef, CreateTargetBackendFn>> entries;
   friend class TargetBackendRegistry;
 };
 
 // A concrete target backend registry.
 class TargetBackendRegistry {
- public:
+public:
   // Merge from a list of of targets. The registry will own the registration
   // entries.
   void mergeFrom(const TargetBackendList &targets);
@@ -79,10 +79,10 @@
   std::shared_ptr<TargetBackend> getTargetBackend(StringRef targetName) const;
 
   // Returns one backend per entry in |targetNames|.
-  SmallVector<std::shared_ptr<TargetBackend>> getTargetBackends(
-      ArrayRef<std::string> targetNames) const;
+  SmallVector<std::shared_ptr<TargetBackend>>
+  getTargetBackends(ArrayRef<std::string> targetNames) const;
 
- private:
+private:
   llvm::StringMap<TargetBackendRegistration *> registrations;
   llvm::SmallVector<std::unique_ptr<TargetBackendRegistration>>
       ownedRegistrations;
@@ -91,15 +91,15 @@
 };
 
 // Returns a sorted uniqued set of target backends used in the executable.
-SmallVector<std::string> gatherExecutableTargetNames(
-    IREE::HAL::ExecutableOp executableOp);
+SmallVector<std::string>
+gatherExecutableTargetNames(IREE::HAL::ExecutableOp executableOp);
 
 // Returns a sorted uniqued set of target backends used in the entire module.
 SmallVector<std::string> gatherExecutableTargetNames(mlir::ModuleOp moduleOp);
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_TARGET_TARGETREGISTRY_H_
+#endif // IREE_COMPILER_DIALECT_HAL_TARGET_TARGETREGISTRY_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/VMVX/VMVXTarget.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/VMVX/VMVXTarget.cpp
index c029ca4..78b7e44 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/VMVX/VMVXTarget.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/VMVX/VMVXTarget.cpp

@@ -35,8 +35,9 @@
     llvm::cl::desc("Enables microkernel lowering for vmvx (experimental)"),
     llvm::cl::init(false));
 
-static IREE::HAL::ExecutableTargetAttr getVMVXExecutableTarget(
-    MLIRContext *context, StringRef backend, StringRef format) {
+static IREE::HAL::ExecutableTargetAttr
+getVMVXExecutableTarget(MLIRContext *context, StringRef backend,
+                        StringRef format) {
   SmallVector<NamedAttribute> config;
   config.emplace_back(StringAttr::get(context, "ukernels"),
                       BoolAttr::get(context, clEnableMicrokernels));
@@ -46,7 +47,7 @@
 }
 
 class VMVXTargetBackend final : public TargetBackend {
- public:
+public:
   VMVXTargetBackend() = default;
 
   std::string name() const override { return "vmvx"; }
@@ -57,8 +58,8 @@
                     IREE::LinalgExt::IREELinalgExtDialect>();
   }
 
-  IREE::HAL::DeviceTargetAttr getDefaultDeviceTarget(
-      MLIRContext *context) const override {
+  IREE::HAL::DeviceTargetAttr
+  getDefaultDeviceTarget(MLIRContext *context) const override {
     Builder b(context);
     SmallVector<NamedAttribute> configItems;
 
@@ -70,8 +71,8 @@
         context, b.getStringAttr(deviceID()), configAttr);
   }
 
-  IREE::VM::TargetOptions getTargetOptions(
-      IREE::HAL::ExecutableTargetAttr targetAttr) {
+  IREE::VM::TargetOptions
+  getTargetOptions(IREE::HAL::ExecutableTargetAttr targetAttr) {
     // TODO(benvanik): derive these from a vm target triple.
     auto vmOptions = IREE::VM::TargetOptions::FromFlags::get();
     vmOptions.f32Extension = true;
@@ -148,7 +149,7 @@
     return success();
   }
 
- private:
+private:
   ArrayAttr getExecutableTargets(MLIRContext *context) const {
     SmallVector<Attribute> targetAttrs;
     // This is where we would multiversion.
@@ -159,7 +160,7 @@
 };
 
 class VMVXInlineTargetBackend final : public TargetBackend {
- public:
+public:
   VMVXInlineTargetBackend() = default;
 
   std::string name() const override { return "vmvx-inline"; }
@@ -169,8 +170,8 @@
         .insert<IREE::Codegen::IREECodegenDialect, IREE::VMVX::VMVXDialect>();
   }
 
-  IREE::HAL::DeviceTargetAttr getDefaultDeviceTarget(
-      MLIRContext *context) const override {
+  IREE::HAL::DeviceTargetAttr
+  getDefaultDeviceTarget(MLIRContext *context) const override {
     Builder b(context);
     SmallVector<NamedAttribute> configItems;
 
@@ -187,7 +188,7 @@
     IREE::VMVX::buildVMVXTransformPassPipeline(passManager);
   }
 
- private:
+private:
   ArrayAttr getExecutableTargets(MLIRContext *context) const {
     SmallVector<Attribute> targetAttrs;
     // This is where we would multiversion.
@@ -207,7 +208,7 @@
   });
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/VMVX/VMVXTarget.h b/compiler/src/iree/compiler/Dialect/HAL/Target/VMVX/VMVXTarget.h
index 4a5b48e..ba8b8e4 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/VMVX/VMVXTarget.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/VMVX/VMVXTarget.h

@@ -17,9 +17,9 @@
 // Registers the VMVX backends.
 void registerVMVXTargetBackends();
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_TARGET_VMVX_VMVXTARGET_H_
+#endif // IREE_COMPILER_DIALECT_HAL_TARGET_VMVX_VMVXTARGET_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.cpp
index c82c283..bdd82ff 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.cpp

@@ -64,9 +64,9 @@
 }
 
 // Returns the Vulkan target environment for conversion.
-static spirv::TargetEnvAttr getSPIRVTargetEnv(
-    const std::string &vulkanTargetEnv, const std::string &vulkanTargetTriple,
-    MLIRContext *context) {
+static spirv::TargetEnvAttr
+getSPIRVTargetEnv(const std::string &vulkanTargetEnv,
+                  const std::string &vulkanTargetTriple, MLIRContext *context) {
   if (!vulkanTargetEnv.empty()) {
     if (auto attr = parseAttribute(vulkanTargetEnv, context)) {
       if (auto vkTargetEnv = llvm::dyn_cast<Vulkan::TargetEnvAttr>(attr)) {
@@ -86,7 +86,7 @@
 }
 
 class VulkanSPIRVTargetBackend : public TargetBackend {
- public:
+public:
   VulkanSPIRVTargetBackend(VulkanSPIRVTargetOptions options)
       : options_(std::move(options)) {}
 
@@ -98,8 +98,8 @@
                     spirv::SPIRVDialect, gpu::GPUDialect>();
   }
 
-  IREE::HAL::DeviceTargetAttr getDefaultDeviceTarget(
-      MLIRContext *context) const override {
+  IREE::HAL::DeviceTargetAttr
+  getDefaultDeviceTarget(MLIRContext *context) const override {
     Builder b(context);
     SmallVector<NamedAttribute> configItems;
 
@@ -120,7 +120,8 @@
     // For now we disable translation if the variant has external object files.
     // We could instead perform linking with those objects (if they're .spv
     // files we could use spirv-link or import them into MLIR and merge here).
-    if (variantOp.isExternal()) return;
+    if (variantOp.isExternal())
+      return;
 
     buildSPIRVCodegenPassPipeline(passManager, /*enableFastMath=*/false);
   }
@@ -224,9 +225,10 @@
     return success();
   }
 
-  LogicalResult serializeExternalExecutable(
-      const SerializationOptions &options,
-      IREE::HAL::ExecutableVariantOp variantOp, OpBuilder &executableBuilder) {
+  LogicalResult
+  serializeExternalExecutable(const SerializationOptions &options,
+                              IREE::HAL::ExecutableVariantOp variantOp,
+                              OpBuilder &executableBuilder) {
     if (!variantOp.getObjects().has_value()) {
       return variantOp.emitOpError()
              << "no objects defined for external variant";
@@ -283,7 +285,7 @@
     return success();
   }
 
- private:
+private:
   ArrayAttr getExecutableTargets(MLIRContext *context) const {
     SmallVector<Attribute> targetAttrs;
     // If we had multiple target environments we would generate one target attr
@@ -294,8 +296,9 @@
     return ArrayAttr::get(context, targetAttrs);
   }
 
-  IREE::HAL::ExecutableTargetAttr getExecutableTarget(
-      MLIRContext *context, spirv::TargetEnvAttr targetEnv) const {
+  IREE::HAL::ExecutableTargetAttr
+  getExecutableTarget(MLIRContext *context,
+                      spirv::TargetEnvAttr targetEnv) const {
     Builder b(context);
     SmallVector<NamedAttribute> configItems;
 
@@ -324,7 +327,7 @@
                                                  backendFactory);
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.h b/compiler/src/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.h
index c0dcfa8..49eeea3 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.h

@@ -31,9 +31,9 @@
 void registerVulkanSPIRVTargetBackends(
     std::function<VulkanSPIRVTargetOptions()> queryOptions);
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_TARGET_VULKANSPIRV_VULKANSPIRVTARGET_H_
+#endif // IREE_COMPILER_DIALECT_HAL_TARGET_VULKANSPIRV_VULKANSPIRVTARGET_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/WebGPU/SPIRVToWGSL.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/WebGPU/SPIRVToWGSL.cpp
index 0eca686..2c91dfd 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/WebGPU/SPIRVToWGSL.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/WebGPU/SPIRVToWGSL.cpp

@@ -15,8 +15,8 @@
 namespace IREE {
 namespace HAL {
 
-std::optional<std::string> compileSPIRVToWGSL(
-    llvm::ArrayRef<uint32_t> spvBinary) {
+std::optional<std::string>
+compileSPIRVToWGSL(llvm::ArrayRef<uint32_t> spvBinary) {
   // TODO(scotttodd): reroute to MLIR diagnostics?
   auto diagPrinter = tint::diag::Printer::create(stderr, true);
   tint::diag::Formatter diagFormatter;
@@ -55,7 +55,7 @@
   return result.wgsl;
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/WebGPU/SPIRVToWGSL.h b/compiler/src/iree/compiler/Dialect/HAL/Target/WebGPU/SPIRVToWGSL.h
index ae7306a..cee8f2d 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/WebGPU/SPIRVToWGSL.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/WebGPU/SPIRVToWGSL.h

@@ -19,12 +19,12 @@
 
 // Compiles SPIR-V into WebGPU Shading Language (WGSL) source code.
 // Returns std::nullopt on failure.
-std::optional<std::string> compileSPIRVToWGSL(
-    llvm::ArrayRef<uint32_t> spvBinary);
+std::optional<std::string>
+compileSPIRVToWGSL(llvm::ArrayRef<uint32_t> spvBinary);
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_TARGET_WEBGPU_SPIRVTOWGSL_H_
+#endif // IREE_COMPILER_DIALECT_HAL_TARGET_WEBGPU_SPIRVTOWGSL_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/WebGPU/WebGPUTarget.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/WebGPU/WebGPUTarget.cpp
index 8c6557e..c5085a8 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/WebGPU/WebGPUTarget.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/WebGPU/WebGPUTarget.cpp

@@ -57,7 +57,7 @@
 }
 
 class WebGPUTargetBackend : public TargetBackend {
- public:
+public:
   WebGPUTargetBackend(WebGPUTargetOptions options)
       : options_(std::move(options)) {}
 
@@ -73,8 +73,8 @@
                     spirv::SPIRVDialect, gpu::GPUDialect>();
   }
 
-  IREE::HAL::DeviceTargetAttr getDefaultDeviceTarget(
-      MLIRContext *context) const override {
+  IREE::HAL::DeviceTargetAttr
+  getDefaultDeviceTarget(MLIRContext *context) const override {
     Builder b(context);
     SmallVector<NamedAttribute> configItems;
 
@@ -93,7 +93,8 @@
   void buildTranslationPassPipeline(IREE::HAL::ExecutableVariantOp variantOp,
                                     OpPassManager &passManager) override {
     // For now we disable translation if the variant has external object files.
-    if (variantOp.isExternal()) return;
+    if (variantOp.isExternal())
+      return;
 
     // WebGPU does not support push constants (yet?), so replace loads from
     // push constants with loads from uniform buffers.
@@ -161,7 +162,7 @@
           mlir::StringAttr::get(variantOp->getContext(), symbolName);
 
       symbolUsers.replaceAllUsesWith(entryPointFunc, nameAttr);
-      exportOp.setName(symbolName);  // Same symbol reference? Not in table?
+      exportOp.setName(symbolName); // Same symbol reference? Not in table?
       SymbolTable::setSymbolName(entryPointFunc, symbolName);
 
       // We only have one shader module right now, so all point to index 0.
@@ -249,7 +250,7 @@
     return success();
   }
 
- private:
+private:
   ArrayAttr getExecutableTargets(MLIRContext *context) const {
     SmallVector<Attribute> targetAttrs;
     // If we had multiple target environments we would generate one target attr
@@ -259,8 +260,9 @@
     return ArrayAttr::get(context, targetAttrs);
   }
 
-  IREE::HAL::ExecutableTargetAttr getExecutableTarget(
-      MLIRContext *context, spirv::TargetEnvAttr targetEnv) const {
+  IREE::HAL::ExecutableTargetAttr
+  getExecutableTarget(MLIRContext *context,
+                      spirv::TargetEnvAttr targetEnv) const {
     Builder b(context);
     SmallVector<NamedAttribute> configItems;
 
@@ -288,7 +290,7 @@
   static TargetBackendRegistration registration1("webgpu-wgsl", backendFactory);
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/WebGPU/WebGPUTarget.h b/compiler/src/iree/compiler/Dialect/HAL/Target/WebGPU/WebGPUTarget.h
index 19a3c94..45fbfde 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/WebGPU/WebGPUTarget.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/WebGPU/WebGPUTarget.h

@@ -29,9 +29,9 @@
 void registerWebGPUTargetBackends(
     std::function<WebGPUTargetOptions()> queryOptions);
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_TARGET_WEBGPU_WEBGPUTARGET_H_
+#endif // IREE_COMPILER_DIALECT_HAL_TARGET_WEBGPU_WEBGPUTARGET_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/AssignTargetDevices.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/AssignTargetDevices.cpp
index fe3e62f..1dc9eea 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/AssignTargetDevices.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/AssignTargetDevices.cpp

@@ -28,7 +28,7 @@
 
 class AssignTargetDevicesPass
     : public PassWrapper<AssignTargetDevicesPass, OperationPass<ModuleOp>> {
- public:
+public:
   AssignTargetDevicesPass()
       : targetRegistry(TargetBackendRegistry::getGlobal()) {}
   AssignTargetDevicesPass(const AssignTargetDevicesPass &pass)
@@ -106,7 +106,7 @@
                       ArrayAttr::get(moduleOp.getContext(), targetAttrs));
   }
 
- private:
+private:
   ListOption<std::string> targets{*this, "targets",
                                   llvm::cl::desc("List of devices to target."),
                                   llvm::cl::ZeroOrMore};
@@ -114,9 +114,9 @@
   const TargetBackendRegistry &targetRegistry;
 };
 
-std::unique_ptr<OperationPass<ModuleOp>> createAssignTargetDevicesPass(
-    const TargetBackendRegistry &targetRegistry,
-    ArrayRef<std::string> targets) {
+std::unique_ptr<OperationPass<ModuleOp>>
+createAssignTargetDevicesPass(const TargetBackendRegistry &targetRegistry,
+                              ArrayRef<std::string> targets) {
   return std::make_unique<AssignTargetDevicesPass>(targetRegistry, targets);
 }
 
@@ -124,7 +124,7 @@
   return std::make_unique<AssignTargetDevicesPass>();
 });
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/BenchmarkBatchDispatches.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/BenchmarkBatchDispatches.cpp
index 6246db7..98342fc 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/BenchmarkBatchDispatches.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/BenchmarkBatchDispatches.cpp

@@ -20,13 +20,13 @@
 class BenchmarkBatchDispatchesPass
     : public PassWrapper<BenchmarkBatchDispatchesPass,
                          OperationPass<func::FuncOp>> {
- public:
+public:
   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(BenchmarkBatchDispatchesPass)
 
   explicit BenchmarkBatchDispatchesPass(unsigned repeatCount)
       : repeatCount_(repeatCount) {}
 
-  void getDependentDialects(DialectRegistry& registry) const override {
+  void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<func::FuncDialect, IREE::HAL::HALDialect>();
   }
 
@@ -61,14 +61,14 @@
     }
   }
 
- private:
+private:
   unsigned repeatCount_;
 };
 
-}  // namespace
+} // namespace
 
-std::unique_ptr<OperationPass<func::FuncOp>> createBenchmarkBatchDispatchesPass(
-    unsigned repeatCount) {
+std::unique_ptr<OperationPass<func::FuncOp>>
+createBenchmarkBatchDispatchesPass(unsigned repeatCount) {
   return std::make_unique<BenchmarkBatchDispatchesPass>(repeatCount);
 }
 
@@ -76,7 +76,7 @@
   return std::make_unique<BenchmarkBatchDispatchesPass>(2);
 });
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/ConvertToHAL.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/ConvertToHAL.cpp
index 7f819c5..63b2a4c 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/ConvertToHAL.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/ConvertToHAL.cpp

@@ -40,7 +40,7 @@
 // A pass converting the IREE flow dialect into the IREE HAL dialect.
 class ConvertToHALPass
     : public PassWrapper<ConvertToHALPass, OperationPass<ModuleOp>> {
- public:
+public:
   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ConvertToHALPass)
 
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -101,7 +101,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<ModuleOp>> createConvertToHALPass() {
   return std::make_unique<ConvertToHALPass>();
@@ -109,7 +109,7 @@
 
 static PassRegistration<ConvertToHALPass> pass;
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/DumpExecutableBenchmarks.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/DumpExecutableBenchmarks.cpp
index c9372e0..9976784 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/DumpExecutableBenchmarks.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/DumpExecutableBenchmarks.cpp

@@ -136,9 +136,10 @@
 
 // Appends a global hal.buffer initialized to the size required for all
 // of the bindings in |dispatchParams| (plus alignment).
-static IREE::Util::GlobalOp appendGlobalBuffer(
-    Location loc, StringRef baseName, const DispatchParams &dispatchParams,
-    OpBuilder &moduleBuilder) {
+static IREE::Util::GlobalOp
+appendGlobalBuffer(Location loc, StringRef baseName,
+                   const DispatchParams &dispatchParams,
+                   OpBuilder &moduleBuilder) {
   // Create a global to hold the HAL buffer.
   auto globalOp = moduleBuilder.create<IREE::Util::GlobalOp>(
       loc, (baseName + "_buffer").str(),
@@ -254,7 +255,7 @@
 
   // Push constant values.
   if (int64_t pushConstantCount = layoutAttr.getPushConstants()) {
-    int pushConstantBase = 0;  // always 0 today
+    int pushConstantBase = 0; // always 0 today
     SmallVector<Value> pushConstants;
     pushConstants.reserve(pushConstantCount);
     for (int64_t i = 0; i < pushConstantCount; ++i) {
@@ -279,7 +280,8 @@
   };
   int64_t bufferOffset = 0;
   for (auto binding : dispatchParams.bindings) {
-    if (currentSet != -1 && currentSet != binding.set) flushSet();
+    if (currentSet != -1 && currentSet != binding.set)
+      flushSet();
     currentSet = binding.set;
     IREE::HAL::DescriptorSetBindingValue bindingValue;
     bindingValue.ordinal =
@@ -291,7 +293,8 @@
     bufferOffset =
         IREE::Util::align(bufferOffset + binding.size, kBufferAlignment);
   }
-  if (currentSet != -1) flushSet();
+  if (currentSet != -1)
+    flushSet();
 
   // @executable::@variant::@export
   auto exportRefAttr =
@@ -360,10 +363,10 @@
 
 // Builds a module exporting one function for each dispatch configuration
 // targeting |sourceExecutableOp|.
-static mlir::OwningOpRef<mlir::ModuleOp> buildBenchmarkModule(
-    IREE::HAL::ExecutableOp sourceExecutableOp,
-    IREE::HAL::ExecutableVariantOp sourceVariantOp,
-    const DispatchParamsMap &dispatchParamsMap) {
+static mlir::OwningOpRef<mlir::ModuleOp>
+buildBenchmarkModule(IREE::HAL::ExecutableOp sourceExecutableOp,
+                     IREE::HAL::ExecutableVariantOp sourceVariantOp,
+                     const DispatchParamsMap &dispatchParamsMap) {
   // Empty module with default name.
   // We could use the original module name here to make tracking nicer.
   mlir::OwningOpRef<mlir::ModuleOp> moduleOp =
@@ -406,7 +409,8 @@
   }
 
   // Skip the file when we could not generate any benchmarks.
-  if (!hasAnyBenchmarks) return {};
+  if (!hasAnyBenchmarks)
+    return {};
 
   // Run CSE and the canonicalizer to pretty up the output.
   PassManager passManager(moduleOp->getContext());
@@ -423,15 +427,15 @@
 static void dumpModuleToStream(mlir::ModuleOp moduleOp, StringRef fileName,
                                llvm::raw_ostream &os) {
   OpPrintingFlags flags;
-  flags.useLocalScope();  // could use global scope, but IR gets messy fast
+  flags.useLocalScope(); // could use global scope, but IR gets messy fast
   moduleOp.print(os, flags);
-  os << "\n";  // newline at end of file
+  os << "\n"; // newline at end of file
 }
 
 class DumpExecutableBenchmarksPass
     : public PassWrapper<DumpExecutableBenchmarksPass,
                          OperationPass<ModuleOp>> {
- public:
+public:
   DumpExecutableBenchmarksPass() = default;
   DumpExecutableBenchmarksPass(const DumpExecutableBenchmarksPass &pass) {}
   DumpExecutableBenchmarksPass(StringRef path) { this->path = path.str(); }
@@ -459,7 +463,8 @@
     // filtering out dispatches that have dynamic parameters we don't
     // currently support.
     auto dispatchParamsMap = gatherDispatchParams(moduleOp);
-    if (dispatchParamsMap.empty()) return;
+    if (dispatchParamsMap.empty())
+      return;
 
     // Help people out and mkdir if needed.
     if (!path.empty() && path != "-") {
@@ -472,7 +477,8 @@
            executableOp.getOps<IREE::HAL::ExecutableVariantOp>()) {
         auto benchmarkModuleOp =
             buildBenchmarkModule(executableOp, variantOp, dispatchParamsMap);
-        if (!benchmarkModuleOp) continue;
+        if (!benchmarkModuleOp)
+          continue;
         auto fileName = (moduleName + "_" + executableOp.getName() + "_" +
                          variantOp.getName() + "_benchmark.mlir")
                             .str();
@@ -495,14 +501,14 @@
     }
   }
 
- private:
+private:
   Option<std::string> path{
       *this, "path",
       llvm::cl::desc("Path to write hal.executable benchmarks into.")};
 };
 
-std::unique_ptr<OperationPass<ModuleOp>> createDumpExecutableBenchmarksPass(
-    StringRef path) {
+std::unique_ptr<OperationPass<ModuleOp>>
+createDumpExecutableBenchmarksPass(StringRef path) {
   return std::make_unique<DumpExecutableBenchmarksPass>(path);
 }
 
@@ -510,7 +516,7 @@
   return std::make_unique<DumpExecutableBenchmarksPass>();
 });
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/DumpExecutableSources.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/DumpExecutableSources.cpp
index e626ac2..d384f58 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/DumpExecutableSources.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/DumpExecutableSources.cpp

@@ -27,12 +27,12 @@
   OpPrintingFlags flags;
   flags.useLocalScope();
   mlir::generateLocationsFromIR(os, filePath, executableOp, flags);
-  os << "\n";  // newline at end of file
+  os << "\n"; // newline at end of file
 }
 
 class DumpExecutableSourcesPass
     : public PassWrapper<DumpExecutableSourcesPass, OperationPass<ModuleOp>> {
- public:
+public:
   DumpExecutableSourcesPass() = default;
   DumpExecutableSourcesPass(const DumpExecutableSourcesPass &pass) {}
   DumpExecutableSourcesPass(StringRef path) { this->path = path.str(); }
@@ -86,14 +86,14 @@
     }
   }
 
- private:
+private:
   Option<std::string> path{
       *this, "path",
       llvm::cl::desc("Path to write hal.executable source files into.")};
 };
 
-std::unique_ptr<OperationPass<ModuleOp>> createDumpExecutableSourcesPass(
-    StringRef path) {
+std::unique_ptr<OperationPass<ModuleOp>>
+createDumpExecutableSourcesPass(StringRef path) {
   return std::make_unique<DumpExecutableSourcesPass>(path);
 }
 
@@ -101,7 +101,7 @@
   return std::make_unique<DumpExecutableSourcesPass>();
 });
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/ElideRedundantCommands.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/ElideRedundantCommands.cpp
index 983956f..d91101f 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/ElideRedundantCommands.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/ElideRedundantCommands.cpp

@@ -85,7 +85,7 @@
 
 using CommandBufferStateMap = DenseMap<Value, CommandBufferState>;
 
-}  // namespace
+} // namespace
 
 static void processOp(IREE::HAL::CommandBufferExecutionBarrierOp op,
                       CommandBufferState &state) {
@@ -133,7 +133,8 @@
       stateValue = value.value();
     }
   }
-  if (redundantIndices.none()) return success();  // no-op
+  if (redundantIndices.none())
+    return success(); // no-op
 
   // If all bits are set we can just kill the op.
   if (redundantIndices.all()) {
@@ -165,7 +166,8 @@
 static LogicalResult processOp(IREE::HAL::CommandBufferPushDescriptorSetOp op,
                                CommandBufferState &state) {
   auto *setState = state.getDescriptorSet(op.getSet());
-  if (!setState) return failure();
+  if (!setState)
+    return failure();
 
   bool isLayoutEqual = setState->pipelineLayout == op.getPipelineLayout();
   setState->pipelineLayout = op.getPipelineLayout();
@@ -190,7 +192,7 @@
 
   // Bail early if no redundant bindings.
   if (isLayoutEqual && redundantIndices.none()) {
-    return success();  // no-op
+    return success(); // no-op
   }
 
   // If all bits are set we can just kill the op.
@@ -204,7 +206,7 @@
 
 class ElideRedundantCommandsPass
     : public PassWrapper<ElideRedundantCommandsPass, OperationPass<void>> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<IREE::HAL::HALDialect>();
   }
@@ -240,7 +242,8 @@
           stateMap[commandBuffer].previousFullBarrier = {};
         };
         for (auto &op : llvm::make_early_inc_range(block.getOperations())) {
-          if (!op.getDialect()) continue;
+          if (!op.getDialect())
+            continue;
           TypeSwitch<Operation *>(&op)
               .Case([&](IREE::HAL::CommandBufferFinalizeOp op) {
                 invalidateState(op.getCommandBuffer());
@@ -293,7 +296,7 @@
 
 static PassRegistration<ElideRedundantCommandsPass> pass;
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/FixupLegacySync.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/FixupLegacySync.cpp
index d36c333..3be57e3 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/FixupLegacySync.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/FixupLegacySync.cpp

@@ -63,7 +63,7 @@
   // Returns an operation waiting on |fence| that is guaranteed to have
   // executed prior to asyncOp. Returns null if no waits found.
   auto beginIt = std::prev(asyncOp->getBlock()->begin());
-  auto endIt = std::prev(asyncOp->getBlock()->end());  // ignore terminator
+  auto endIt = std::prev(asyncOp->getBlock()->end()); // ignore terminator
   auto findPrecedingAwait = [&](Value fence) -> Operation * {
     auto it = std::prev(Block::iterator(asyncOp));
     for (; it != beginIt; --it) {
@@ -76,7 +76,7 @@
           continue;
         }
       } else if (!isSafeToReorder(*it)) {
-        break;  // hit a point we can't scan past
+        break; // hit a point we can't scan past
       }
     }
     return nullptr;
@@ -96,7 +96,7 @@
           continue;
         }
       } else if (!isSafeToReorder(*it)) {
-        break;  // hit a point we can't scan past
+        break; // hit a point we can't scan past
       }
     }
     return nullptr;
@@ -105,7 +105,8 @@
   OpBuilder builder(asyncOp);
   Value timeoutMillis;
   auto makeInfiniteTimeout = [&]() {
-    if (timeoutMillis) return timeoutMillis;
+    if (timeoutMillis)
+      return timeoutMillis;
     timeoutMillis = builder.create<arith::ConstantIntOp>(loc, -1, 32);
     return timeoutMillis;
   };
@@ -189,7 +190,7 @@
 
 static PassRegistration<FixupLegacySyncPass> pass;
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/InlineDeviceSwitches.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/InlineDeviceSwitches.cpp
index 5d1f7ce..a40e7b8 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/InlineDeviceSwitches.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/InlineDeviceSwitches.cpp

@@ -136,7 +136,7 @@
 
 class InlineDeviceSwitchesPass
     : public PassWrapper<InlineDeviceSwitchesPass, OperationPass<void>> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<IREE::Util::UtilDialect>();
   }
@@ -169,7 +169,7 @@
 
 static PassRegistration<InlineDeviceSwitchesPass> pass;
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/LinkExecutables.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/LinkExecutables.cpp
index 4908e74..a4f184c 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/LinkExecutables.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/LinkExecutables.cpp

@@ -27,7 +27,7 @@
 class LinkTargetExecutablesPass
     : public PassWrapper<LinkTargetExecutablesPass,
                          OperationPass<mlir::ModuleOp>> {
- public:
+public:
   LinkTargetExecutablesPass()
       : targetRegistry(TargetBackendRegistry::getGlobal()) {}
   LinkTargetExecutablesPass(const LinkTargetExecutablesPass &pass)
@@ -72,7 +72,7 @@
     }
   }
 
- private:
+private:
   Option<std::string> target{
       *this, "target",
       llvm::cl::desc("Target backend name whose executables will be linked by "
@@ -81,8 +81,9 @@
   const TargetBackendRegistry &targetRegistry;
 };
 
-std::unique_ptr<OperationPass<mlir::ModuleOp>> createLinkTargetExecutablesPass(
-    const TargetBackendRegistry &targetRegistry, StringRef target) {
+std::unique_ptr<OperationPass<mlir::ModuleOp>>
+createLinkTargetExecutablesPass(const TargetBackendRegistry &targetRegistry,
+                                StringRef target) {
   return std::make_unique<LinkTargetExecutablesPass>(targetRegistry, target);
 }
 
@@ -92,7 +93,7 @@
 
 class LinkExecutablesPass
     : public PassWrapper<LinkExecutablesPass, OperationPass<mlir::ModuleOp>> {
- public:
+public:
   LinkExecutablesPass(const TargetBackendRegistry &targetRegistry)
       : targetRegistry(targetRegistry) {}
 
@@ -127,8 +128,8 @@
   const TargetBackendRegistry &targetRegistry;
 };
 
-std::unique_ptr<OperationPass<mlir::ModuleOp>> createLinkExecutablesPass(
-    const TargetBackendRegistry &targetRegistry) {
+std::unique_ptr<OperationPass<mlir::ModuleOp>>
+createLinkExecutablesPass(const TargetBackendRegistry &targetRegistry) {
   return std::make_unique<LinkExecutablesPass>(targetRegistry);
 }
 
@@ -137,7 +138,7 @@
       TargetBackendRegistry::getGlobal());
 });
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/MaterializeDispatchInstrumentation.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/MaterializeDispatchInstrumentation.cpp
index 4f5d586..ec7dc30 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/MaterializeDispatchInstrumentation.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/MaterializeDispatchInstrumentation.cpp

@@ -31,7 +31,8 @@
 namespace HAL {
 
 static std::string getAttrStr(Attribute attr) {
-  if (!attr) return "";
+  if (!attr)
+    return "";
   std::string result;
   llvm::raw_string_ostream os(result);
   attr.print(os, /*elideType=*/true);
@@ -71,7 +72,8 @@
 static Value createPadding(Location loc, uint64_t unalignedLength,
                            OpBuilder &builder) {
   uint64_t padding = llvm::alignTo(unalignedLength, 16) - unalignedLength;
-  if (!padding) return nullptr;
+  if (!padding)
+    return nullptr;
   auto i8Type = builder.getI8Type();
   auto zeroAttr = IntegerAttr::get(i8Type, 0);
   auto dataAttr = DenseElementsAttr::get(
@@ -98,7 +100,7 @@
 class MaterializeDispatchInstrumentationPass
     : public PassWrapper<MaterializeDispatchInstrumentationPass,
                          OperationPass<mlir::ModuleOp>> {
- public:
+public:
   MaterializeDispatchInstrumentationPass() = default;
   MaterializeDispatchInstrumentationPass(
       const MaterializeDispatchInstrumentationPass &pass) {}
@@ -123,7 +125,8 @@
 
   void runOnOperation() override {
     auto moduleOp = getOperation();
-    if (moduleOp.getBody()->empty()) return;
+    if (moduleOp.getBody()->empty())
+      return;
 
     auto moduleBuilder = OpBuilder(&moduleOp.getBody()->front());
     auto i8Type = moduleBuilder.getI8Type();
@@ -184,7 +187,8 @@
       for (auto exportOp :
            executableOp.getOps<IREE::Stream::ExecutableExportOp>()) {
         auto funcOp = exportOp.lookupFunctionRef();
-        if (!funcOp) continue;
+        if (!funcOp)
+          continue;
 
         // Capture the source before we mess with it.
         auto originalSource = getOpStr(funcOp);
@@ -272,7 +276,8 @@
               break;
             }
           }
-          if (!functionId) return;  // not instrumented
+          if (!functionId)
+            return; // not instrumented
 
           // Append dispatch site ID to correlate this op with where it lives in
           // the program and what is being dispatched. Note that multiple
@@ -379,7 +384,7 @@
     }
   }
 
- private:
+private:
   Option<llvm::cl::PowerOf2ByteSize> bufferSize{
       *this,
       "bufferSize",
@@ -397,7 +402,7 @@
   return std::make_unique<MaterializeDispatchInstrumentationPass>();
 });
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/MaterializeInterfaces.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/MaterializeInterfaces.cpp
index 79b7d1c..9128967 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/MaterializeInterfaces.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/MaterializeInterfaces.cpp

@@ -44,9 +44,11 @@
                                  IREE::HAL::ExecutableVariantOp targetOp) {
   auto objectsAttr = sourceOp->getAttrOfType<IREE::HAL::ExecutableObjectsAttr>(
       "hal.executable.objects");
-  if (!objectsAttr) return;
+  if (!objectsAttr)
+    return;
   auto objects = objectsAttr.getApplicableObjects(targetOp.getTarget());
-  if (!objects) return;
+  if (!objects)
+    return;
   targetOp.setObjectsAttr(*objects);
 }
 
@@ -104,7 +106,8 @@
     // Clone any target-specific object files specified.
     if (auto objectsAttr = sourceOp.getObjectsAttr()) {
       auto objects = objectsAttr.getApplicableObjects(targetAttr);
-      if (objects) targetVariantOp.setObjectsAttr(*objects);
+      if (objects)
+        targetVariantOp.setObjectsAttr(*objects);
     }
 
     // Clone inner module contents.
@@ -164,8 +167,9 @@
 }
 
 // Creates an pipeline layout attr from the analysis results.
-static IREE::HAL::PipelineLayoutAttr makePipelineLayoutAttr(
-    const PipelineLayout &pipelineLayout, OpBuilder &builder) {
+static IREE::HAL::PipelineLayoutAttr
+makePipelineLayoutAttr(const PipelineLayout &pipelineLayout,
+                       OpBuilder &builder) {
   SmallVector<IREE::HAL::DescriptorSetLayoutAttr> setLayoutAttrs;
   for (const auto &setLayout : pipelineLayout.setLayouts) {
     SmallVector<IREE::HAL::DescriptorSetBindingAttr> bindingAttrs;
@@ -198,11 +202,12 @@
 }
 
 // Converts the usage of the given !stream.binding |arg| to interface methods.
-static void convertBindingUsage(
-    mlir::func::FuncOp sourceFuncOp, BlockArgument arg,
-    IREE::HAL::DescriptorSetLayoutAttr setLayoutAttr,
-    IREE::HAL::DescriptorSetBindingAttr bindingAttr) {
-  if (arg.use_empty()) return;  // no-op
+static void
+convertBindingUsage(mlir::func::FuncOp sourceFuncOp, BlockArgument arg,
+                    IREE::HAL::DescriptorSetLayoutAttr setLayoutAttr,
+                    IREE::HAL::DescriptorSetBindingAttr bindingAttr) {
+  if (arg.use_empty())
+    return; // no-op
   for (auto &use : llvm::make_early_inc_range(arg.getUses())) {
     auto oldOp = dyn_cast<IREE::Stream::BindingSubspanOp>(use.getOwner());
     assert(oldOp && "bindings are only usable by stream.binding.subspan");
@@ -221,9 +226,10 @@
 
 // Clones |sourceFuncOp| and updates its signature to match the |interfaceOp|
 // and use the HAL interface access primitives.
-static mlir::func::FuncOp cloneFuncWithInterface(
-    mlir::func::FuncOp sourceFuncOp, const PipelineLayout &pipelineLayout,
-    IREE::HAL::PipelineLayoutAttr layoutAttr) {
+static mlir::func::FuncOp
+cloneFuncWithInterface(mlir::func::FuncOp sourceFuncOp,
+                       const PipelineLayout &pipelineLayout,
+                       IREE::HAL::PipelineLayoutAttr layoutAttr) {
   // Clone so that we can do a bunch of unsafe in-place updates.
   auto clonedFuncOp = sourceFuncOp.clone();
 
@@ -244,7 +250,8 @@
   }
   unsigned resourceIdx = 0;
   for (auto arg : entryBlock->getArguments()) {
-    if (!llvm::isa<IREE::Stream::BindingType>(arg.getType())) continue;
+    if (!llvm::isa<IREE::Stream::BindingType>(arg.getType()))
+      continue;
     auto setBinding = pipelineLayout.resourceMap[resourceIdx++];
     auto setLayoutAttr = layoutAttr.getSetLayouts()[setBinding.first];
     auto bindingAttr = setLayoutAttr.getBindings()[setBinding.second];
@@ -259,14 +266,14 @@
 
 // Updates the target entry point symbols of |dispatchOp| to the expanded set of
 // variant exports in |entryPointExpansions|.
-static void updateDispatchTargets(
-    IREE::Stream::CmdDispatchOp dispatchOp,
-    const EntryPointExpansions &entryPointExpansions) {
+static void
+updateDispatchTargets(IREE::Stream::CmdDispatchOp dispatchOp,
+                      const EntryPointExpansions &entryPointExpansions) {
   SmallVector<Attribute> newAttrs;
   for (auto oldAttr : dispatchOp.getEntryPointRefs()) {
     auto it = entryPointExpansions.find(oldAttr);
     if (it == entryPointExpansions.end()) {
-      newAttrs.push_back(oldAttr);  // preserve existing
+      newAttrs.push_back(oldAttr); // preserve existing
       continue;
     }
     for (auto newAttr : it->second) {
@@ -293,11 +300,11 @@
 // Adds the entry point ops with assigned ordinals for each entry function.
 // The entry points will all use the provided |interfaceOp| and be exported with
 // hal.executable.export ops.
-static LogicalResult declareEntryPointOps(
-    IREE::Stream::ExecutableOp sourceExecutableOp,
-    IREE::HAL::ExecutableOp targetExecutableOp,
-    const BindingLayoutAnalysis &layoutAnalysis,
-    EntryPointExpansions &entryPointExpansions) {
+static LogicalResult
+declareEntryPointOps(IREE::Stream::ExecutableOp sourceExecutableOp,
+                     IREE::HAL::ExecutableOp targetExecutableOp,
+                     const BindingLayoutAnalysis &layoutAnalysis,
+                     EntryPointExpansions &entryPointExpansions) {
   auto sourceModuleOp = sourceExecutableOp.getInnerModule();
   auto variantOps =
       targetExecutableOp.getBlock().getOps<IREE::HAL::ExecutableVariantOp>();
@@ -311,7 +318,8 @@
                            .getOps<IREE::Stream::ExecutableExportOp>()) {
     auto sourceFuncOp = sourceModuleOp.lookupSymbol<mlir::func::FuncOp>(
         exportOp.getFunctionRef());
-    if (failed(verifyEntryPointTypes(sourceFuncOp))) return failure();
+    if (failed(verifyEntryPointTypes(sourceFuncOp)))
+      return failure();
 
     // Create the interface for this entry point based on the analysis of its
     // usage within the program.
@@ -384,7 +392,8 @@
   // Drop the temporary target functions. We could avoid an additional clone if
   // we only had one variant but this is relatively small in cost (once per
   // variant).
-  for (auto it : targetFuncOps) it.second->erase();
+  for (auto it : targetFuncOps)
+    it.second->erase();
   targetFuncOps.clear();
 
   return success();
@@ -430,7 +439,8 @@
     assert(exportOp &&
            "must have an entry point corresponding to the parent func");
     auto workgroupSizeAttr = exportOp.getWorkgroupSizeAttr();
-    if (!workgroupSizeAttr) return failure();
+    if (!workgroupSizeAttr)
+      return failure();
 
     uint64_t dimIdx = sizeOp.getDimension().getZExtValue();
     auto dimAttr = workgroupSizeAttr[dimIdx];
@@ -440,7 +450,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 static LogicalResult convertFlowInfoOps(IREE::HAL::ExecutableOp executableOp) {
   RewritePatternSet patterns(executableOp.getContext());
@@ -462,7 +472,7 @@
 
 class MaterializeInterfacesPass
     : public PassWrapper<MaterializeInterfacesPass, OperationPass<ModuleOp>> {
- public:
+public:
   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(MaterializeInterfacesPass)
 
   MaterializeInterfacesPass() = default;
@@ -501,7 +511,8 @@
         getOperation().getOps<IREE::Stream::ExecutableOp>());
     for (auto sourceOp : sourceOps) {
       auto exportOps = sourceOp.getOps<IREE::Stream::ExecutableExportOp>();
-      if (exportOps.empty()) continue;
+      if (exportOps.empty())
+        continue;
 
       // Gather a list of all #hal.executable.targets that we should produce
       // variants for.
@@ -602,7 +613,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<ModuleOp>> createMaterializeInterfacesPass() {
   return std::make_unique<MaterializeInterfacesPass>();
@@ -612,7 +623,7 @@
   return std::make_unique<MaterializeInterfacesPass>();
 });
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/MaterializeResourceCaches.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/MaterializeResourceCaches.cpp
index 9fa3873..2faed9b 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/MaterializeResourceCaches.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/MaterializeResourceCaches.cpp

@@ -30,7 +30,7 @@
 class MaterializeResourceCachesPass
     : public PassWrapper<MaterializeResourceCachesPass,
                          OperationPass<ModuleOp>> {
- public:
+public:
   explicit MaterializeResourceCachesPass(TargetOptions targetOptions)
       : targetOptions_(targetOptions) {}
 
@@ -50,7 +50,8 @@
 
   void runOnOperation() override {
     auto moduleOp = getOperation();
-    if (moduleOp.getBody()->empty()) return;
+    if (moduleOp.getBody()->empty())
+      return;
     moduleBuilder = OpBuilder(&moduleOp.getBody()->front());
 
     // Find all relevant ops. If we don't find any we skip the pass as it's
@@ -63,7 +64,8 @@
     SmallVector<IREE::HAL::ExecutableLookupOp> executableLookupOps;
     for (Operation &funcLikeOp : moduleOp.getOps()) {
       auto funcOp = llvm::dyn_cast<FunctionOpInterface>(funcLikeOp);
-      if (!funcOp) continue;
+      if (!funcOp)
+        continue;
       for (auto &block : funcOp.getFunctionBody()) {
         block.walk([&](Operation *op) {
           if (auto lookupOp = dyn_cast<DescriptorSetLayoutLookupOp>(op)) {
@@ -115,7 +117,7 @@
     }
   }
 
- private:
+private:
   IREE::Util::GlobalOp defineDescriptorSetLayoutOp(Location loc,
                                                    ArrayAttr bindingAttrs) {
     auto existingIt = descriptorSetLayoutCache_.find(bindingAttrs);
@@ -148,8 +150,9 @@
     return globalOp;
   }
 
-  IREE::Util::GlobalOp definePipelineLayoutOp(
-      Location loc, IREE::HAL::PipelineLayoutAttr layoutAttr) {
+  IREE::Util::GlobalOp
+  definePipelineLayoutOp(Location loc,
+                         IREE::HAL::PipelineLayoutAttr layoutAttr) {
     auto existingIt = pipelineLayoutCache_.find(layoutAttr);
     if (existingIt != pipelineLayoutCache_.end()) {
       return existingIt->second;
@@ -313,8 +316,8 @@
                                [](OpResult result) -> Value { return result; });
   }
 
-  void replaceDescriptorSetLayoutLookupOp(
-      DescriptorSetLayoutLookupOp &lookupOp) {
+  void
+  replaceDescriptorSetLayoutLookupOp(DescriptorSetLayoutLookupOp &lookupOp) {
     OpBuilder builder(lookupOp);
     auto globalOp =
         defineDescriptorSetLayoutOp(lookupOp.getLoc(), lookupOp.getBindings());
@@ -361,8 +364,8 @@
   int nextUniqueDescriptorSetLayoutId = 0;
 };
 
-std::unique_ptr<OperationPass<ModuleOp>> createMaterializeResourceCachesPass(
-    TargetOptions targetOptions) {
+std::unique_ptr<OperationPass<ModuleOp>>
+createMaterializeResourceCachesPass(TargetOptions targetOptions) {
   return std::make_unique<MaterializeResourceCachesPass>(targetOptions);
 }
 
@@ -371,7 +374,7 @@
   return std::make_unique<MaterializeResourceCachesPass>(options);
 });
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/MemoizeDeviceQueries.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/MemoizeDeviceQueries.cpp
index a5698e8..8dbf8d7 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/MemoizeDeviceQueries.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/MemoizeDeviceQueries.cpp

@@ -26,7 +26,7 @@
 // support multiple devices we'll need to change this to be per-device.
 class MemoizeDeviceQueriesPass
     : public PassWrapper<MemoizeDeviceQueriesPass, OperationPass<ModuleOp>> {
- public:
+public:
   StringRef getArgument() const override {
     return "iree-hal-memoize-device-queries";
   }
@@ -127,7 +127,7 @@
 
 static PassRegistration<MemoizeDeviceQueriesPass> pass;
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp
index f1de331..917b9a6 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp

@@ -115,7 +115,7 @@
         "will fail compilation."),
 };
 
-}  // namespace
+} // namespace
 
 using FunctionLikeNest = MultiOpNest<func::FuncOp, IREE::Util::InitializerOp>;
 
@@ -230,7 +230,8 @@
         createPreprocessExecutablesPass(command));
   }
 
-  if (compileTo == PipelinePhase::ExecutableSources) return;
+  if (compileTo == PipelinePhase::ExecutableSources)
+    return;
 
   // TODO(benvanik): move translation after conversion; today translation
   // inserts the workgroup count logic we need to convert but we could instead
@@ -245,7 +246,8 @@
   passManager.addNestedPass<IREE::HAL::ExecutableOp>(
       createTranslateExecutablesPass(targetRegistry));
 
-  if (compileTo == PipelinePhase::ExecutableTargets) return;
+  if (compileTo == PipelinePhase::ExecutableTargets)
+    return;
 
   // Substitute hal.executables we've translated with those specified on the
   // command line. This developer feature allows for splicing in hand-authored
@@ -411,7 +413,7 @@
       });
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.h b/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.h
index 998deb5..81285ee 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.h

@@ -78,8 +78,9 @@
 createVerifyTargetEnvironmentPass(const TargetBackendRegistry &targetRegistry);
 
 // Assigns the HAL devices the module will target to the given list of targets.
-std::unique_ptr<OperationPass<mlir::ModuleOp>> createAssignTargetDevicesPass(
-    const TargetBackendRegistry &targetRegistry, ArrayRef<std::string> targets);
+std::unique_ptr<OperationPass<mlir::ModuleOp>>
+createAssignTargetDevicesPass(const TargetBackendRegistry &targetRegistry,
+                              ArrayRef<std::string> targets);
 
 // Applies fixups to the program for when using legacy HAL devices that only
 // support synchronous execution. Once all devices support async this will be
@@ -103,8 +104,8 @@
 createMaterializeInterfacesPass();
 
 // Dumps individual hal.executable source listings to |path|.
-std::unique_ptr<OperationPass<mlir::ModuleOp>> createDumpExecutableSourcesPass(
-    StringRef path);
+std::unique_ptr<OperationPass<mlir::ModuleOp>>
+createDumpExecutableSourcesPass(StringRef path);
 
 // Dumps standalone hal.executable benchmarks to |path|.
 std::unique_ptr<OperationPass<mlir::ModuleOp>>
@@ -113,12 +114,12 @@
 // Substitutes hal.executable ops by parsing |substitutions| in
 // `executable_name=file.xxx` strings. File paths may be absolute or relative to
 // the paths specified on `--iree-hal-executable-object-search-path=`.
-std::unique_ptr<OperationPass<mlir::ModuleOp>> createSubstituteExecutablesPass(
-    ArrayRef<std::string> substitutions = {});
+std::unique_ptr<OperationPass<mlir::ModuleOp>>
+createSubstituteExecutablesPass(ArrayRef<std::string> substitutions = {});
 // Substitutes hal.executable ops with files in the given |searchPath| matching
 // the symbol name.
-std::unique_ptr<OperationPass<mlir::ModuleOp>> createSubstituteExecutablesPass(
-    std::string searchPath);
+std::unique_ptr<OperationPass<mlir::ModuleOp>>
+createSubstituteExecutablesPass(std::string searchPath);
 
 // Preprocess each executable with either a pass pipeline or external tool.
 std::unique_ptr<OperationPass<IREE::HAL::ExecutableOp>>
@@ -143,12 +144,13 @@
 // together (if that makes sense). For example, the LLVM AOT backend may combine
 // all executable targets for the same architecture into a single executable and
 // link it as a shared library.
-std::unique_ptr<OperationPass<mlir::ModuleOp>> createLinkExecutablesPass(
-    const TargetBackendRegistry &targetRegistry);
+std::unique_ptr<OperationPass<mlir::ModuleOp>>
+createLinkExecutablesPass(const TargetBackendRegistry &targetRegistry);
 
 // Links executables for the specified |target| backend.
-std::unique_ptr<OperationPass<mlir::ModuleOp>> createLinkTargetExecutablesPass(
-    const TargetBackendRegistry &targetRegistry, StringRef target);
+std::unique_ptr<OperationPass<mlir::ModuleOp>>
+createLinkTargetExecutablesPass(const TargetBackendRegistry &targetRegistry,
+                                StringRef target);
 
 // Resolves hal.executable.export references to ordinals.
 std::unique_ptr<OperationPass<mlir::ModuleOp>>
@@ -187,8 +189,8 @@
 
 // Repeats dispatches `iree-hal-repeat-dispatch-num` times, which is 1 by
 // default.
-std::unique_ptr<OperationPass<func::FuncOp>> createBenchmarkBatchDispatchesPass(
-    unsigned repeatCount);
+std::unique_ptr<OperationPass<func::FuncOp>>
+createBenchmarkBatchDispatchesPass(unsigned repeatCount);
 
 //===----------------------------------------------------------------------===//
 // Register all Passes
@@ -222,9 +224,9 @@
   createVerifyTargetEnvironmentPass(TargetBackendRegistry::getGlobal());
 }
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_TRANSFORMS_PASSES_H_
+#endif // IREE_COMPILER_DIALECT_HAL_TRANSFORMS_PASSES_H_

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/PreprocessExecutables.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/PreprocessExecutables.cpp
index a0e6258..218dbac 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/PreprocessExecutables.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/PreprocessExecutables.cpp

@@ -125,7 +125,7 @@
     llvm::raw_fd_ostream inputStream(inputFd, /*shouldClose=*/true);
     executableOp.print(inputStream,
                        OpPrintingFlags().useLocalScope().enableDebugInfo());
-    inputStream << "\n";  // newline at end of file
+    inputStream << "\n"; // newline at end of file
   }
 
   // LLVM wants all the args split up to launch the command so we tokenize here.
@@ -138,10 +138,11 @@
   auto Tokenize = llvm::cl::TokenizeWindowsCommandLine;
 #else
   auto Tokenize = llvm::cl::TokenizeGNUCommandLine;
-#endif  // _WIN32
+#endif // _WIN32
   Tokenize(command, stringSaver, rawArgs, /*MarkEOLs=*/false);
   SmallVector<StringRef> args;
-  for (auto rawArg : rawArgs) args.push_back(StringRef(rawArg));
+  for (auto rawArg : rawArgs)
+    args.push_back(StringRef(rawArg));
 
   // Try to find the tool either by absolute path or by looking it up in env.
   auto tool = findTool(args[0].str());
@@ -152,7 +153,8 @@
 
   LLVM_DEBUG({
     llvm::dbgs() << "Launching hal.executable preprocessor: ";
-    for (auto arg : args) llvm::dbgs() << arg << " ";
+    for (auto arg : args)
+      llvm::dbgs() << arg << " ";
     llvm::dbgs() << " 1> " << stdoutFile.str() << " 2> " << stderrFile.str()
                  << "\n";
   });
@@ -216,7 +218,7 @@
 class PreprocessExecutablesPass
     : public PassWrapper<PreprocessExecutablesPass,
                          OperationPass<IREE::HAL::ExecutableOp>> {
- public:
+public:
   PreprocessExecutablesPass() = default;
   PreprocessExecutablesPass(const PreprocessExecutablesPass &pass) {}
   PreprocessExecutablesPass(std::optional<std::string> pipeline,
@@ -274,7 +276,7 @@
     }
   }
 
- private:
+private:
   Option<std::string> pipeline{
       *this,
       "pipeline",
@@ -315,7 +317,7 @@
   return std::make_unique<PreprocessExecutablesPass>();
 });
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/ResolveExportOrdinals.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/ResolveExportOrdinals.cpp
index b09dbf5..e2b782c 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/ResolveExportOrdinals.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/ResolveExportOrdinals.cpp

@@ -16,7 +16,7 @@
 
 class ResolveCommandBufferDispatchOrdinals
     : public OpRewritePattern<IREE::HAL::CommandBufferDispatchSymbolOp> {
- public:
+public:
   using OpRewritePattern<
       IREE::HAL::CommandBufferDispatchSymbolOp>::OpRewritePattern;
   LogicalResult matchAndRewrite(IREE::HAL::CommandBufferDispatchSymbolOp op,
@@ -45,12 +45,12 @@
 class ResolveCommandBufferDispatchIndirectOrdinals
     : public OpRewritePattern<
           IREE::HAL::CommandBufferDispatchIndirectSymbolOp> {
- public:
+public:
   using OpRewritePattern<
       IREE::HAL::CommandBufferDispatchIndirectSymbolOp>::OpRewritePattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::CommandBufferDispatchIndirectSymbolOp op,
-      PatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::CommandBufferDispatchIndirectSymbolOp op,
+                  PatternRewriter &rewriter) const override {
     auto symbol = SymbolTable::lookupNearestSymbolFrom(op, op.getEntryPoint());
     assert(symbol && "missing ExecutableEntryPoint symbol");
     auto exportOp = cast<IREE::HAL::ExecutableExportOp>(symbol);
@@ -74,7 +74,7 @@
 
 class ResolveExportOrdinalsPass
     : public PassWrapper<ResolveExportOrdinalsPass, OperationPass<ModuleOp>> {
- public:
+public:
   StringRef getArgument() const override {
     return "iree-hal-resolve-export-ordinals";
   }
@@ -101,7 +101,7 @@
 
 static PassRegistration<ResolveExportOrdinalsPass> pass;
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/SerializeExecutables.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/SerializeExecutables.cpp
index 074468e..b8e337e 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/SerializeExecutables.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/SerializeExecutables.cpp

@@ -27,7 +27,7 @@
 class SerializeTargetExecutablesPass
     : public PassWrapper<SerializeTargetExecutablesPass,
                          OperationPass<IREE::HAL::ExecutableOp>> {
- public:
+public:
   SerializeTargetExecutablesPass()
       : targetRegistry(TargetBackendRegistry::getGlobal()) {}
   SerializeTargetExecutablesPass(const SerializeTargetExecutablesPass &pass)
@@ -90,7 +90,8 @@
     auto variantOps = llvm::to_vector(
         executableOp.getBlock().getOps<IREE::HAL::ExecutableVariantOp>());
     for (auto variantOp : variantOps) {
-      if (variantOp.getTarget().getBackend().getValue() != target) continue;
+      if (variantOp.getTarget().getBackend().getValue() != target)
+        continue;
       OpBuilder executableBuilder(variantOp);
       // Ask the target backend to serialize the executable. Note that it
       // may create one or more hal.executable.binary ops in the case of
@@ -105,7 +106,7 @@
     }
   }
 
- private:
+private:
   Option<std::string> target{
       *this, "target",
       llvm::cl::desc(
@@ -144,14 +145,13 @@
 class SerializeExecutablesPass
     : public PassWrapper<SerializeExecutablesPass,
                          OperationPass<IREE::HAL::ExecutableOp>> {
- public:
+public:
   SerializeExecutablesPass()
       : targetRegistry(TargetBackendRegistry::getGlobal()) {}
   SerializeExecutablesPass(const TargetBackendRegistry &targetRegistry,
                            int debugLevel, std::string dumpIntermediatesPath,
                            std::string dumpBinariesPath)
-      : targetRegistry(targetRegistry),
-        debugLevel(debugLevel),
+      : targetRegistry(targetRegistry), debugLevel(debugLevel),
         dumpIntermediatesPath(dumpIntermediatesPath),
         dumpBinariesPath(dumpBinariesPath) {}
 
@@ -177,7 +177,7 @@
     }
   }
 
- private:
+private:
   const TargetBackendRegistry &targetRegistry;
   int debugLevel;
   std::string dumpIntermediatesPath;
@@ -197,7 +197,7 @@
   return std::make_unique<SerializeExecutablesPass>();
 });
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/SubstituteExecutables.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/SubstituteExecutables.cpp
index 12b2c8a..d7ce7f5 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/SubstituteExecutables.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/SubstituteExecutables.cpp

@@ -27,9 +27,9 @@
 //
 // To support round-tripping with --iree-hal-dump-executable-sources-to= we
 // support stripping file names of |prefix| when present.
-static LogicalResult scanSearchPath(
-    std::string prefix, StringRef searchPath,
-    std::unordered_map<std::string, std::string> &substitutions) {
+static LogicalResult
+scanSearchPath(std::string prefix, StringRef searchPath,
+               std::unordered_map<std::string, std::string> &substitutions) {
   if (!llvm::sys::fs::is_directory(searchPath)) {
     llvm::errs() << "iree-hal-substitute-executables source path `"
                  << searchPath << "` not found or not a directory\n";
@@ -41,25 +41,26 @@
        dir != dir_end && !ec; dir.increment(ec)) {
     auto childPath = dir->path();
     llvm::sys::fs::file_status status;
-    if (llvm::sys::fs::status(childPath, status)) continue;
+    if (llvm::sys::fs::status(childPath, status))
+      continue;
     switch (status.type()) {
-      case llvm::sys::fs::file_type::regular_file:
-      case llvm::sys::fs::file_type::symlink_file:
-      case llvm::sys::fs::file_type::type_unknown: {
-        // File we can access.
-        auto childName = llvm::sys::path::stem(childPath);
-        if (!childName.empty() && childName != "." && childName != "..") {
-          if (childName.starts_with(prefix)) {
-            // Strip prefix.
-            childName = childName.substr(prefix.size());
-          }
-          substitutions[std::string(childName)] = childPath;
+    case llvm::sys::fs::file_type::regular_file:
+    case llvm::sys::fs::file_type::symlink_file:
+    case llvm::sys::fs::file_type::type_unknown: {
+      // File we can access.
+      auto childName = llvm::sys::path::stem(childPath);
+      if (!childName.empty() && childName != "." && childName != "..") {
+        if (childName.starts_with(prefix)) {
+          // Strip prefix.
+          childName = childName.substr(prefix.size());
         }
-        break;
+        substitutions[std::string(childName)] = childPath;
       }
-      default:
-        // Directory/etc we skip.
-        break;
+      break;
+    }
+    default:
+      // Directory/etc we skip.
+      break;
     }
   }
   if (ec) {
@@ -96,12 +97,14 @@
 
 // Loads the MLIR at |filePath| and replaces |executableOp| with an executable
 // with the same name from the file.
-static LogicalResult replaceExecutableOpWithMLIR(
-    IREE::HAL::ExecutableOp executableOp, StringRef filePath) {
+static LogicalResult
+replaceExecutableOpWithMLIR(IREE::HAL::ExecutableOp executableOp,
+                            StringRef filePath) {
   // Load the replacement IR. It may have any mix of stuff in it including
   // multiple other executables.
   auto rootOpRef = loadModuleObject(executableOp.getContext(), filePath);
-  if (!rootOpRef) return failure();
+  if (!rootOpRef)
+    return failure();
   IREE::HAL::ExecutableOp replacementOp;
   if (auto moduleOp = dyn_cast<mlir::ModuleOp>(rootOpRef.get())) {
     // We expect a `hal.executable` with the same name as the one we are
@@ -140,8 +143,9 @@
 }
 
 // Drops the implementation of |executableOp| and links against |filePath|.
-static LogicalResult externalizeExecutableOp(
-    IREE::HAL::ExecutableOp executableOp, StringRef filePath) {
+static LogicalResult
+externalizeExecutableOp(IREE::HAL::ExecutableOp executableOp,
+                        StringRef filePath) {
   // Can't support multiple variants on this path. We could allow some magic way
   // to specify the full #hal.executable.objects dictionary but that's a stretch
   // for this developer tool.
@@ -160,7 +164,8 @@
   auto fileObjectAttr = builder.getAttr<IREE::HAL::ExecutableObjectAttr>(
       builder.getStringAttr(filePath), nullptr);
   auto fileContents = fileObjectAttr.loadData();
-  if (!fileContents) return failure();
+  if (!fileContents)
+    return failure();
 
   // Link the referenced object file contents. We fully replace the existing
   // objects in case there were any as this does entire executable replacement -
@@ -187,8 +192,9 @@
   return success();
 }
 
-static LogicalResult substituteExecutableOp(
-    IREE::HAL::ExecutableOp executableOp, StringRef filePath) {
+static LogicalResult
+substituteExecutableOp(IREE::HAL::ExecutableOp executableOp,
+                       StringRef filePath) {
   if (filePath.ends_with_insensitive(".mlir") ||
       filePath.ends_with_insensitive(".mlirbc")) {
     return replaceExecutableOpWithMLIR(executableOp, filePath);
@@ -199,7 +205,7 @@
 
 class SubstituteExecutablesPass
     : public PassWrapper<SubstituteExecutablesPass, OperationPass<ModuleOp>> {
- public:
+public:
   SubstituteExecutablesPass() = default;
   SubstituteExecutablesPass(const SubstituteExecutablesPass &pass) {}
   SubstituteExecutablesPass(ArrayRef<std::string> substitutions) {
@@ -251,7 +257,8 @@
       uniqueSubstitutions[std::string(key)] = value;
     }
 
-    if (uniqueSubstitutions.empty()) return;  // no-op
+    if (uniqueSubstitutions.empty())
+      return; // no-op
 
     // Walk each substitution and process the matching executable if found.
     for (auto &[executableName, filePath] : uniqueSubstitutions) {
@@ -279,7 +286,7 @@
     }
   }
 
- private:
+private:
   ListOption<std::string> substitutions{
       *this, "substitutions",
       llvm::cl::desc(
@@ -289,13 +296,13 @@
       llvm::cl::desc("Path to source executable substitutions from.")};
 };
 
-std::unique_ptr<OperationPass<mlir::ModuleOp>> createSubstituteExecutablesPass(
-    ArrayRef<std::string> substitutions) {
+std::unique_ptr<OperationPass<mlir::ModuleOp>>
+createSubstituteExecutablesPass(ArrayRef<std::string> substitutions) {
   return std::make_unique<SubstituteExecutablesPass>(substitutions);
 }
 
-std::unique_ptr<OperationPass<mlir::ModuleOp>> createSubstituteExecutablesPass(
-    std::string searchPath) {
+std::unique_ptr<OperationPass<mlir::ModuleOp>>
+createSubstituteExecutablesPass(std::string searchPath) {
   return std::make_unique<SubstituteExecutablesPass>(std::move(searchPath));
 }
 
@@ -303,7 +310,7 @@
   return std::make_unique<SubstituteExecutablesPass>();
 });
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/TranslateExecutables.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/TranslateExecutables.cpp
index e9ea98f..5745d6a 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/TranslateExecutables.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/TranslateExecutables.cpp

@@ -28,7 +28,7 @@
 class TranslateTargetExecutableVariantsPass
     : public PassWrapper<TranslateTargetExecutableVariantsPass,
                          OperationPass<IREE::HAL::ExecutableVariantOp>> {
- public:
+public:
   TranslateTargetExecutableVariantsPass()
       : targetRegistry(TargetBackendRegistry::getGlobal()) {}
   TranslateTargetExecutableVariantsPass(
@@ -59,7 +59,8 @@
 
   void runOnOperation() override {
     auto variantOp = getOperation();
-    if (variantOp.getTarget().getBackend().getValue() != target) return;
+    if (variantOp.getTarget().getBackend().getValue() != target)
+      return;
 
     auto targetBackend = targetRegistry.getTargetBackend(target);
     if (!targetBackend) {
@@ -77,7 +78,7 @@
     }
   }
 
- private:
+private:
   Option<std::string> target{
       *this, "target",
       llvm::cl::desc(
@@ -100,7 +101,7 @@
 class TranslateExecutablesPass
     : public PassWrapper<TranslateExecutablesPass,
                          OperationPass<IREE::HAL::ExecutableOp>> {
- public:
+public:
   TranslateExecutablesPass()
       : targetRegistry(TargetBackendRegistry::getGlobal()) {}
   TranslateExecutablesPass(const TranslateExecutablesPass &pass)
@@ -155,7 +156,7 @@
   return std::make_unique<TranslateExecutablesPass>();
 });
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/VerifyTargetEnvironment.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/VerifyTargetEnvironment.cpp
index 6616430..361c48a 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/VerifyTargetEnvironment.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/VerifyTargetEnvironment.cpp

@@ -26,7 +26,7 @@
 
 class VerifyTargetEnvironmentPass
     : public PassWrapper<VerifyTargetEnvironmentPass, OperationPass<ModuleOp>> {
- public:
+public:
   VerifyTargetEnvironmentPass(const TargetBackendRegistry &targetRegistry)
       : targetRegistry(targetRegistry) {}
 
@@ -57,7 +57,8 @@
         break;
       }
     }
-    if (!anyNonExecutableOps) return;
+    if (!anyNonExecutableOps)
+      return;
 
     // Must have targets specified.
     auto targetsAttr = moduleOp->getAttrOfType<ArrayAttr>("hal.device.targets");
@@ -104,8 +105,8 @@
   const TargetBackendRegistry &targetRegistry;
 };
 
-std::unique_ptr<OperationPass<ModuleOp>> createVerifyTargetEnvironmentPass(
-    const TargetBackendRegistry &targetRegistry) {
+std::unique_ptr<OperationPass<ModuleOp>>
+createVerifyTargetEnvironmentPass(const TargetBackendRegistry &targetRegistry) {
   return std::make_unique<VerifyTargetEnvironmentPass>(targetRegistry);
 }
 
@@ -114,7 +115,7 @@
       TargetBackendRegistry::getGlobal());
 });
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/HAL/Utils/DeviceSwitchBuilder.h b/compiler/src/iree/compiler/Dialect/HAL/Utils/DeviceSwitchBuilder.h
index ca95b3d..abb93b5 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Utils/DeviceSwitchBuilder.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Utils/DeviceSwitchBuilder.h

@@ -20,16 +20,13 @@
 
 // See DeviceSwitchBuilder for details.
 class DeviceSwitchCaseBuilder {
- public:
+public:
   DeviceSwitchCaseBuilder(Location loc, TypeRange resultTypes, Value device,
                           Attribute initialCondition,
                           SmallVectorImpl<IREE::HAL::DeviceSwitchOp> &caseOps,
                           OpBuilder &builder)
-      : loc_(loc),
-        resultTypes_(resultTypes),
-        device_(device),
-        initialCondition_(initialCondition),
-        caseOps_(caseOps),
+      : loc_(loc), resultTypes_(resultTypes), device_(device),
+        initialCondition_(initialCondition), caseOps_(caseOps),
         builder_(builder) {}
 
   // Result types that each region must return.
@@ -68,7 +65,7 @@
     return nest(conditionAttr).addRegion();
   }
 
- private:
+private:
   Location loc_;
   SmallVector<Type> resultTypes_;
   Value device_;
@@ -96,12 +93,10 @@
 // region that captured them. You must query the returned Region entry block
 // arguments to use them within the region.
 class DeviceSwitchBuilder {
- public:
+public:
   DeviceSwitchBuilder(Location loc, TypeRange resultTypes, Value device,
                       OpBuilder builder)
-      : loc_(loc),
-        resultTypes_(resultTypes),
-        device_(device),
+      : loc_(loc), resultTypes_(resultTypes), device_(device),
         builder_(builder) {}
 
   // Pushes a new condition onto the stack and returns a builder that must have
@@ -134,7 +129,7 @@
     return switchOp;
   }
 
- private:
+private:
   Location loc_;
   SmallVector<Type> resultTypes_;
   Value device_;
@@ -144,12 +139,10 @@
 
 // Rewriter-compatible version of DeviceSwitchBuilder.
 class DeviceSwitchRewriter {
- public:
+public:
   DeviceSwitchRewriter(Location loc, TypeRange resultTypes, Value device,
                        ConversionPatternRewriter &rewriter)
-      : loc_(loc),
-        resultTypes_(resultTypes),
-        device_(device),
+      : loc_(loc), resultTypes_(resultTypes), device_(device),
         rewriter_(rewriter) {}
 
   // Pushes a new condition onto the stack and returns a builder that must have
@@ -198,7 +191,7 @@
 
   ConversionPatternRewriter &getRewriter() const { return rewriter_; }
 
- private:
+private:
   Location loc_;
   SmallVector<Type> resultTypes_;
   Value device_;
@@ -206,9 +199,9 @@
   ConversionPatternRewriter &rewriter_;
 };
 
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_HAL_UTILS_DEVICE_SWITCH_BUILDER_H_
+#endif // IREE_COMPILER_DIALECT_HAL_UTILS_DEVICE_SWITCH_BUILDER_H_

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Analysis/Partitioning.cpp b/compiler/src/iree/compiler/Dialect/Stream/Analysis/Partitioning.cpp
index c6b7593..8a95c1e 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Analysis/Partitioning.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Analysis/Partitioning.cpp

@@ -54,7 +54,7 @@
 #else
 void Partition::dump(AsmState &asmState) {}
 void PartitionSet::dump(AsmState &asmState) {}
-#endif  // !NDEBUG
+#endif // !NDEBUG
 
 LogicalResult Partition::verify(Location loc) {
   // Ensure all ops are compatible with the partition affinity.
@@ -102,7 +102,8 @@
 LogicalResult PartitionSet::verify(Location loc) {
   // Verify each partition is consistent.
   for (auto &partition : partitions) {
-    if (failed(partition.verify(loc))) return failure();
+    if (failed(partition.verify(loc)))
+      return failure();
   }
 
   // Ensure a correct topological order of partitions. This only checks the
@@ -127,7 +128,8 @@
 }
 
 void PartitionSet::topologicalSort() {
-  if (partitions.empty()) return;
+  if (partitions.empty())
+    return;
 
   SetVector<Partition *> unsortedSet;
   DenseMap<Value, SmallVector<Partition *>> consumers;
@@ -156,7 +158,8 @@
       }
     }
   };
-  for (auto *partition : unsortedSet) postorderWalk(partition);
+  for (auto *partition : unsortedSet)
+    postorderWalk(partition);
 
   SmallVector<Partition> sortedSet;
   sortedSet.reserve(partitions.size());
@@ -172,13 +175,14 @@
   return partitionStreamableOpsReference(config, block);
 }
 
-PartitionSet partitionRegionConcurrency(
-    IREE::Stream::PartitioningConfigAttr config, Block *block) {
+PartitionSet
+partitionRegionConcurrency(IREE::Stream::PartitioningConfigAttr config,
+                           Block *block) {
   // Only one algorithm today.
   return partitionRegionConcurrencyReference(config, block);
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Analysis/Partitioning.h b/compiler/src/iree/compiler/Dialect/Stream/Analysis/Partitioning.h
index 197c0d0..282cc0a 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Analysis/Partitioning.h
+++ b/compiler/src/iree/compiler/Dialect/Stream/Analysis/Partitioning.h

@@ -108,8 +108,9 @@
 // ops in the block will be covered by a partition.
 PartitionSet partitionStreamableOps(IREE::Stream::PartitioningConfigAttr config,
                                     Block *block);
-PartitionSet partitionRegionConcurrency(
-    IREE::Stream::PartitioningConfigAttr config, Block *block);
+PartitionSet
+partitionRegionConcurrency(IREE::Stream::PartitioningConfigAttr config,
+                           Block *block);
 
 //===----------------------------------------------------------------------===//
 // Reference partitioning
@@ -117,17 +118,19 @@
 
 // Naive clustering based solely on correctness with no cost model or weighting.
 // Produces the largest possible streams for any given block. Unsatisfactory.
-PartitionSet partitionStreamableOpsReference(
-    IREE::Stream::PartitioningConfigAttr config, Block *block);
+PartitionSet
+partitionStreamableOpsReference(IREE::Stream::PartitioningConfigAttr config,
+                                Block *block);
 
 // Similarly poor algorithm to partitionStreamableOpsReference but for use
 // within partitioned streams to produce waves of concurrently executable work.
-PartitionSet partitionRegionConcurrencyReference(
-    IREE::Stream::PartitioningConfigAttr config, Block *block);
+PartitionSet
+partitionRegionConcurrencyReference(IREE::Stream::PartitioningConfigAttr config,
+                                    Block *block);
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_STREAM_ANALYSIS_PARTITIONING_H_
+#endif // IREE_COMPILER_DIALECT_STREAM_ANALYSIS_PARTITIONING_H_

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Analysis/Partitioning/ReferencePartitioning.cpp b/compiler/src/iree/compiler/Dialect/Stream/Analysis/Partitioning/ReferencePartitioning.cpp
index d740b84..1fa2d88 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Analysis/Partitioning/ReferencePartitioning.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Analysis/Partitioning/ReferencePartitioning.cpp

@@ -42,8 +42,9 @@
 // a real implementation would do. We want cost modeling for tie breakers when
 // an op could be in multiple partitions, cloning for ops that are not worth
 // spanning partitions (like splats), etc.
-PartitionSet partitionStreamableOpsReference(
-    IREE::Stream::PartitioningConfigAttr config, Block *block) {
+PartitionSet
+partitionStreamableOpsReference(IREE::Stream::PartitioningConfigAttr config,
+                                Block *block) {
   PartitionSet partitionSet;
 
   struct PartitionBuilder {
@@ -126,7 +127,8 @@
     llvm::BitVector consumers(builders.size(), /*t=*/false);
     for (auto user : op.getUsers()) {
       auto userInfoIt = opInfos.find(user);
-      if (userInfoIt == opInfos.end()) continue;
+      if (userInfoIt == opInfos.end())
+        continue;
       auto &userInfo = userInfoIt->second;
       LLVM_DEBUG({
         llvm::dbgs() << "Testing user:\n";
@@ -292,8 +294,9 @@
 
 // This looks to extract a single level of concurrency; we should be recursively
 // dividing the block to identify both serial and concurrent regions.
-PartitionSet partitionRegionConcurrencyReference(
-    IREE::Stream::PartitioningConfigAttr config, Block *block) {
+PartitionSet
+partitionRegionConcurrencyReference(IREE::Stream::PartitioningConfigAttr config,
+                                    Block *block) {
   PartitionSet waveSet;
 
   auto favor = config.getFavor().getValue();
@@ -358,7 +361,8 @@
     // dependency chain down the use-def chain to a wave.
     for (auto user : op.getUsers()) {
       auto userInfoIt = opInfos.find(user);
-      if (userInfoIt == opInfos.end()) continue;
+      if (userInfoIt == opInfos.end())
+        continue;
       auto &userInfo = userInfoIt->second;
       LLVM_DEBUG({
         llvm::dbgs() << "Testing user:\n";
@@ -456,7 +460,7 @@
   return waveSet;
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Analysis/ResourceHazards.cpp b/compiler/src/iree/compiler/Dialect/Stream/Analysis/ResourceHazards.cpp
index 875387f..4a3383e 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Analysis/ResourceHazards.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Analysis/ResourceHazards.cpp

@@ -46,12 +46,13 @@
   return access == ResourceAccessBitfield::Write;
 }
 static bool isReadWrite(ResourceAccessBitfield access) {
-  return bitEnumContainsAny(
-      access, ResourceAccessBitfield::Read | ResourceAccessBitfield::Write);
+  return bitEnumContainsAny(access, ResourceAccessBitfield::Read |
+                                        ResourceAccessBitfield::Write);
 }
 
 static bool doesRangeOverlap(AsyncAccessRange &lhs, AsyncAccessRange &rhs) {
-  if (lhs.resource != rhs.resource) return false;
+  if (lhs.resource != rhs.resource)
+    return false;
 
   if (lhs.end == rhs.start || lhs.start == rhs.end) {
     // Adjacent but not overlapping.
@@ -156,7 +157,7 @@
   return false;
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Analysis/ResourceHazards.h b/compiler/src/iree/compiler/Dialect/Stream/Analysis/ResourceHazards.h
index 126e10d..d546b5f 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Analysis/ResourceHazards.h
+++ b/compiler/src/iree/compiler/Dialect/Stream/Analysis/ResourceHazards.h

@@ -25,7 +25,7 @@
 // analyzed and the hazards between them will be available for querying via the
 // lookup functions.
 class ResourceHazardAnalysis {
- public:
+public:
   explicit ResourceHazardAnalysis(Operation *rootOp);
   ~ResourceHazardAnalysis();
 
@@ -40,13 +40,13 @@
   // allowed to run while operating on the same resource.
   bool hasHazard(Operation *producerOp, Operation *consumerOp);
 
- private:
+private:
   std::unique_ptr<AsmState> asmState;
 };
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_STREAM_ANALYSIS_RESOURCE_HAZARDS_H_
+#endif // IREE_COMPILER_DIALECT_STREAM_ANALYSIS_RESOURCE_HAZARDS_H_

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Analysis/ResourceUsage.cpp b/compiler/src/iree/compiler/Dialect/Stream/Analysis/ResourceUsage.cpp
index f32953f..054f27a 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Analysis/ResourceUsage.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Analysis/ResourceUsage.cpp

@@ -59,7 +59,7 @@
 class AbstractResourceUsage
     : public DFX::StateWrapper<DFX::BitIntegerState<uint16_t, 4095, 0>,
                                ElementT> {
- public:
+public:
   using BaseType =
       DFX::StateWrapper<DFX::BitIntegerState<uint16_t, 4095, 0>, ElementT>;
 
@@ -67,7 +67,7 @@
   enum {
     NOT_INDIRECT = 1u << 0,
     NOT_EXTERNAL = 1u << 1,
-    NOT_MUTATED = 1u << 2,  // beyond definition
+    NOT_MUTATED = 1u << 2, // beyond definition
     NOT_CONSTANT = 1u << 3,
     NOT_TRANSFER_READ = 1u << 4,
     NOT_TRANSFER_WRITE = 1u << 5,
@@ -136,27 +136,39 @@
 
   const std::string getAsStr(AsmState &asmState) const override {
     std::string str;
-    if (!isValidState()) return "*";
+    if (!isValidState())
+      return "*";
     auto append = [&](const char *part) {
-      if (!str.empty()) str += '|';
+      if (!str.empty())
+        str += '|';
       str += part;
     };
-    if (!this->isAssumed(NOT_INDIRECT)) append("indirect");
+    if (!this->isAssumed(NOT_INDIRECT))
+      append("indirect");
     append(this->isAssumed(NOT_EXTERNAL) ? "internal" : "external");
     append(this->isAssumed(NOT_MUTATED) ? "immutable" : "mutable");
-    if (!this->isAssumed(NOT_CONSTANT)) append("constant");
-    if (!this->isAssumed(NOT_TRANSFER_READ)) append("transfer_read");
-    if (!this->isAssumed(NOT_TRANSFER_WRITE)) append("transfer_write");
-    if (!this->isAssumed(NOT_STAGING_READ)) append("staging_read");
-    if (!this->isAssumed(NOT_STAGING_WRITE)) append("staging_write");
-    if (!this->isAssumed(NOT_DISPATCH_READ)) append("dispatch_read");
-    if (!this->isAssumed(NOT_DISPATCH_WRITE)) append("dispatch_write");
-    if (!this->isAssumed(NOT_GLOBAL_READ)) append("global_read");
-    if (!this->isAssumed(NOT_GLOBAL_WRITE)) append("global_write");
+    if (!this->isAssumed(NOT_CONSTANT))
+      append("constant");
+    if (!this->isAssumed(NOT_TRANSFER_READ))
+      append("transfer_read");
+    if (!this->isAssumed(NOT_TRANSFER_WRITE))
+      append("transfer_write");
+    if (!this->isAssumed(NOT_STAGING_READ))
+      append("staging_read");
+    if (!this->isAssumed(NOT_STAGING_WRITE))
+      append("staging_write");
+    if (!this->isAssumed(NOT_DISPATCH_READ))
+      append("dispatch_read");
+    if (!this->isAssumed(NOT_DISPATCH_WRITE))
+      append("dispatch_write");
+    if (!this->isAssumed(NOT_GLOBAL_READ))
+      append("global_read");
+    if (!this->isAssumed(NOT_GLOBAL_WRITE))
+      append("global_write");
     return str.empty() ? "*" : str;
   }
 
- protected:
+protected:
   explicit AbstractResourceUsage(const Position &pos) : BaseType(pos) {}
 
   // Add known bits based on the static type information available.
@@ -164,42 +176,42 @@
   void initializeFromType(IREE::Stream::ResourceType type) {
     BaseType::intersectAssumedBits(BEST_STATE);
     switch (type.getLifetime()) {
-      case Lifetime::Unknown:
-        break;
-      case Lifetime::External:
-        BaseType::intersectAssumedBits(BEST_STATE & ~NOT_EXTERNAL);
-        BaseType::addKnownBits(NOT_CONSTANT | NOT_STAGING_READ |
-                               NOT_STAGING_WRITE);
-        break;
-      case Lifetime::Staging:
-        BaseType::intersectAssumedBits(
-            BEST_STATE & (~NOT_STAGING_READ | ~NOT_STAGING_WRITE |
-                          ~NOT_TRANSFER_READ | ~NOT_TRANSFER_WRITE));
-        BaseType::addKnownBits(NOT_EXTERNAL | NOT_CONSTANT | NOT_DISPATCH_READ |
-                               NOT_DISPATCH_WRITE | NOT_GLOBAL_READ |
-                               NOT_GLOBAL_WRITE);
-        break;
-      case Lifetime::Transient:
-        BaseType::intersectAssumedBits(
-            BEST_STATE & (~NOT_DISPATCH_READ | ~NOT_DISPATCH_WRITE |
-                          ~NOT_TRANSFER_READ | ~NOT_TRANSFER_WRITE));
-        BaseType::addKnownBits(NOT_EXTERNAL | NOT_CONSTANT | NOT_STAGING_READ |
-                               NOT_STAGING_WRITE);
-        break;
-      case Lifetime::Variable:
-        BaseType::intersectAssumedBits(
-            BEST_STATE & (~NOT_GLOBAL_READ | ~NOT_GLOBAL_WRITE |
-                          ~NOT_TRANSFER_READ | ~NOT_TRANSFER_WRITE));
-        BaseType::addKnownBits(NOT_EXTERNAL | NOT_CONSTANT | NOT_STAGING_READ |
-                               NOT_STAGING_WRITE);
-        break;
-      case Lifetime::Constant:
-        BaseType::intersectAssumedBits(
-            BEST_STATE &
-            (~NOT_CONSTANT | ~NOT_TRANSFER_READ | ~NOT_TRANSFER_WRITE));
-        BaseType::addKnownBits(NOT_MUTATED | NOT_EXTERNAL | NOT_STAGING_READ |
-                               NOT_STAGING_WRITE);
-        break;
+    case Lifetime::Unknown:
+      break;
+    case Lifetime::External:
+      BaseType::intersectAssumedBits(BEST_STATE & ~NOT_EXTERNAL);
+      BaseType::addKnownBits(NOT_CONSTANT | NOT_STAGING_READ |
+                             NOT_STAGING_WRITE);
+      break;
+    case Lifetime::Staging:
+      BaseType::intersectAssumedBits(
+          BEST_STATE & (~NOT_STAGING_READ | ~NOT_STAGING_WRITE |
+                        ~NOT_TRANSFER_READ | ~NOT_TRANSFER_WRITE));
+      BaseType::addKnownBits(NOT_EXTERNAL | NOT_CONSTANT | NOT_DISPATCH_READ |
+                             NOT_DISPATCH_WRITE | NOT_GLOBAL_READ |
+                             NOT_GLOBAL_WRITE);
+      break;
+    case Lifetime::Transient:
+      BaseType::intersectAssumedBits(
+          BEST_STATE & (~NOT_DISPATCH_READ | ~NOT_DISPATCH_WRITE |
+                        ~NOT_TRANSFER_READ | ~NOT_TRANSFER_WRITE));
+      BaseType::addKnownBits(NOT_EXTERNAL | NOT_CONSTANT | NOT_STAGING_READ |
+                             NOT_STAGING_WRITE);
+      break;
+    case Lifetime::Variable:
+      BaseType::intersectAssumedBits(
+          BEST_STATE & (~NOT_GLOBAL_READ | ~NOT_GLOBAL_WRITE |
+                        ~NOT_TRANSFER_READ | ~NOT_TRANSFER_WRITE));
+      BaseType::addKnownBits(NOT_EXTERNAL | NOT_CONSTANT | NOT_STAGING_READ |
+                             NOT_STAGING_WRITE);
+      break;
+    case Lifetime::Constant:
+      BaseType::intersectAssumedBits(
+          BEST_STATE &
+          (~NOT_CONSTANT | ~NOT_TRANSFER_READ | ~NOT_TRANSFER_WRITE));
+      BaseType::addKnownBits(NOT_MUTATED | NOT_EXTERNAL | NOT_STAGING_READ |
+                             NOT_STAGING_WRITE);
+      break;
     }
   }
 };
@@ -207,7 +219,7 @@
 // Starts with the best assumed state of the value never being used for anything
 // and then works towards a worst state of it being used for everything.
 class ValueResourceUsage : public AbstractResourceUsage<DFX::ValueElement> {
- public:
+public:
   using BaseType = AbstractResourceUsage<DFX::ValueElement>;
 
   static ValueResourceUsage &createForPosition(const Position &pos,
@@ -223,7 +235,7 @@
 
   static const char ID;
 
- private:
+private:
   explicit ValueResourceUsage(const Position &pos) : BaseType(pos) {}
 
   // Starts analysis of the |value| with known bits based on its resource type.
@@ -237,7 +249,8 @@
   // itself is under analysis.
   void updateFromDefiningOp(Value value, OpResult result, DFX::Solver &solver) {
     // Some tied uses route through ops that change types - ignore those.
-    if (!llvm::isa<IREE::Stream::ResourceType>(result.getType())) return;
+    if (!llvm::isa<IREE::Stream::ResourceType>(result.getType()))
+      return;
 
     TypeSwitch<Operation *, void>(result.getOwner())
         .Case([&](mlir::arith::SelectOp op) {
@@ -263,12 +276,12 @@
           auto globalType = llvm::cast<IREE::Stream::ResourceType>(
               globalInfo->op.getGlobalType());
           switch (globalType.getLifetime()) {
-            case IREE::Stream::Lifetime::Constant:
-              removeAssumedBits(NOT_CONSTANT);
-              break;
-            case IREE::Stream::Lifetime::Variable:
-            default:
-              break;
+          case IREE::Stream::Lifetime::Constant:
+            removeAssumedBits(NOT_CONSTANT);
+            break;
+          case IREE::Stream::Lifetime::Variable:
+          default:
+            break;
           }
           auto resultUsage = solver.getElementFor<ValueResourceUsage>(
               *this, Position::forValue(op.getLoadedGlobalValue()),
@@ -450,7 +463,8 @@
   // This walks through tied uses as well.
   void updateFromUse(Value value, OpOperand &operand, DFX::Solver &solver) {
     // Some tied uses route through ops that change types - ignore those.
-    if (!llvm::isa<IREE::Stream::ResourceType>(operand.get().getType())) return;
+    if (!llvm::isa<IREE::Stream::ResourceType>(operand.get().getType()))
+      return;
 
     auto *userOp = operand.getOwner();
     unsigned operandIdx = operand.getOperandNumber();
@@ -502,12 +516,12 @@
           auto globalType = llvm::cast<IREE::Stream::ResourceType>(
               globalInfo->op.getGlobalType());
           switch (globalType.getLifetime()) {
-            case IREE::Stream::Lifetime::Constant:
-              removeAssumedBits(NOT_CONSTANT);
-              break;
-            case IREE::Stream::Lifetime::Variable:
-            default:
-              break;
+          case IREE::Stream::Lifetime::Constant:
+            removeAssumedBits(NOT_CONSTANT);
+            break;
+          case IREE::Stream::Lifetime::Variable:
+          default:
+            break;
           }
         })
         .Case([&](IREE::Util::GlobalStoreIndirectOpInterface op) {
@@ -703,7 +717,8 @@
 ResourceUsageAnalysis::tryLookupResourceUsage(Value value) {
   auto resourceUsage =
       solver.lookupElementFor<ValueResourceUsage>(Position::forValue(value));
-  if (!resourceUsage) return std::nullopt;
+  if (!resourceUsage)
+    return std::nullopt;
   return resourceUsage->getAssumedUsage();
 }
 
@@ -733,7 +748,7 @@
   return solver.run();
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Analysis/ResourceUsage.h b/compiler/src/iree/compiler/Dialect/Stream/Analysis/ResourceUsage.h
index bf70dff..e8e0d7f 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Analysis/ResourceUsage.h
+++ b/compiler/src/iree/compiler/Dialect/Stream/Analysis/ResourceUsage.h

@@ -24,7 +24,7 @@
 enum class ResourceUsageBitfield : uint32_t {
   Indirect = 1u << 0,
   External = 1u << 1,
-  Mutated = 1u << 2,  // beyond definition
+  Mutated = 1u << 2, // beyond definition
   Constant = 1u << 3,
   TransferRead = 1u << 4,
   TransferWrite = 1u << 5,
@@ -65,7 +65,7 @@
 // All `!stream.resource` SSA values will be analyzed and their usage will be
 // available for querying via the lookup functions.
 class ResourceUsageAnalysis {
- public:
+public:
   explicit ResourceUsageAnalysis(Operation *rootOp);
   ~ResourceUsageAnalysis();
 
@@ -84,15 +84,15 @@
   // Returns the analyzed resource usage of the |value| resource, if analyzed.
   std::optional<ResourceUsageBitfield> tryLookupResourceUsage(Value value);
 
- private:
+private:
   Explorer explorer;
   llvm::BumpPtrAllocator allocator;
   DFX::Solver solver;
 };
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_STREAM_ANALYSIS_RESOURCE_USAGE_H_
+#endif // IREE_COMPILER_DIALECT_STREAM_ANALYSIS_RESOURCE_USAGE_H_

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/Patterns.cpp b/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/Patterns.cpp
index 1f4f70d..0ed4955 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/Patterns.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/Patterns.cpp

@@ -24,7 +24,8 @@
 // Returns the stream affinity based on the given flow dialect |op|.
 // Returns an empty attribute when no affinity is specified.
 static IREE::Stream::AffinityAttr getAffinityFor(Operation *op) {
-  if (!op) return {};
+  if (!op)
+    return {};
   // TODO(benvanik): support upstream interfaces or something more generic?
   // We may want to allow users to come in with raw string forms or something
   // that we parse and map to an attribute. That would prevent the need for
@@ -51,9 +52,9 @@
 struct ConvertTensorReshapeOp
     : public OpConversionPattern<IREE::Flow::TensorReshapeOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Flow::TensorReshapeOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Flow::TensorReshapeOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto unknownType = rewriter.getType<IREE::Stream::ResourceType>();
     auto source =
         consumeTensorOperand(op.getLoc(), adaptor.getSource(), rewriter);
@@ -70,9 +71,9 @@
 struct ConvertTensorAllocaOp
     : public OpConversionPattern<IREE::Flow::TensorAllocaOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Flow::TensorAllocaOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Flow::TensorAllocaOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Type unknownType = IREE::Stream::ResourceType::get(getContext());
     auto resultSize = buildResultSizeOf(op.getLoc(), op.getResult(),
                                         op.getResultDims(), rewriter);
@@ -85,9 +86,9 @@
 struct ConvertTensorEmptyOp
     : public OpConversionPattern<IREE::Flow::TensorEmptyOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Flow::TensorEmptyOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Flow::TensorEmptyOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Type unknownType = IREE::Stream::ResourceType::get(getContext());
     auto resultSize = buildResultSizeOf(op.getLoc(), op.getResult(),
                                         op.getResultDims(), rewriter);
@@ -101,9 +102,9 @@
 struct ConvertTensorSplatOp
     : public OpConversionPattern<IREE::Flow::TensorSplatOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Flow::TensorSplatOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Flow::TensorSplatOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto unknownType = rewriter.getType<IREE::Stream::ResourceType>();
     auto resultSize = buildResultSizeOf(op.getLoc(), op.getResult(),
                                         op.getResultDims(), rewriter);
@@ -117,9 +118,9 @@
 struct ConvertTensorCloneOp
     : public OpConversionPattern<IREE::Flow::TensorCloneOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Flow::TensorCloneOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Flow::TensorCloneOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto unknownType = rewriter.getType<IREE::Stream::ResourceType>();
     auto operand =
         consumeTensorOperand(op.getLoc(), adaptor.getOperand(), rewriter);
@@ -134,9 +135,9 @@
 struct ConvertTensorSliceOp
     : public OpConversionPattern<IREE::Flow::TensorSliceOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Flow::TensorSliceOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Flow::TensorSliceOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto unknownType = rewriter.getType<IREE::Stream::ResourceType>();
     auto source =
         consumeTensorOperand(op.getLoc(), adaptor.getSource(), rewriter);
@@ -154,9 +155,9 @@
 struct ConvertTensorUpdateOp
     : public OpConversionPattern<IREE::Flow::TensorUpdateOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Flow::TensorUpdateOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Flow::TensorUpdateOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto update =
         consumeTensorOperand(op.getLoc(), adaptor.getUpdate(), rewriter);
     auto target =
@@ -173,9 +174,9 @@
 struct ConvertTensorLoadOp
     : public OpConversionPattern<IREE::Flow::TensorLoadOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Flow::TensorLoadOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Flow::TensorLoadOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto resultType = getTypeConverter()->convertType(op.getResult().getType());
     auto source =
         consumeTensorOperand(op.getLoc(), adaptor.getSource(), rewriter);
@@ -201,9 +202,9 @@
 struct ConvertTensorStoreOp
     : public OpConversionPattern<IREE::Flow::TensorStoreOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Flow::TensorStoreOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Flow::TensorStoreOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto target =
         consumeTensorOperand(op.getLoc(), adaptor.getTarget(), rewriter);
 
@@ -240,9 +241,9 @@
 struct ConvertTensorTraceOp
     : public OpConversionPattern<IREE::Flow::TensorTraceOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Flow::TensorTraceOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Flow::TensorTraceOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     SmallVector<Value> exportedTensors;
     for (auto [tensorOperand, resourceOperand] :
          llvm::zip_equal(op.getOperands(), adaptor.getOperands())) {
@@ -274,9 +275,9 @@
 struct ConvertChannelDefaultOp
     : public OpConversionPattern<IREE::Flow::ChannelDefaultOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Flow::ChannelDefaultOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Flow::ChannelDefaultOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto affinityAttr = IREE::Stream::AffinityAttr::lookup(op);
     rewriter.replaceOpWithNewOp<IREE::Stream::ChannelCreateOp>(
         op, /*id=*/Value{},
@@ -290,9 +291,9 @@
 struct ConvertChannelSplitOp
     : public OpConversionPattern<IREE::Flow::ChannelSplitOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Flow::ChannelSplitOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Flow::ChannelSplitOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<IREE::Stream::ChannelSplitOp>(
         op, adaptor.getChannel(), adaptor.getColor(), adaptor.getKey());
     return success();
@@ -302,9 +303,9 @@
 struct ConvertChannelRankOp
     : public OpConversionPattern<IREE::Flow::ChannelRankOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Flow::ChannelRankOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Flow::ChannelRankOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<IREE::Stream::ChannelRankOp>(
         op, adaptor.getOperands());
     return success();
@@ -314,9 +315,9 @@
 struct ConvertChannelCountOp
     : public OpConversionPattern<IREE::Flow::ChannelCountOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Flow::ChannelCountOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Flow::ChannelCountOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<IREE::Stream::ChannelCountOp>(
         op, adaptor.getOperands());
     return success();
@@ -326,9 +327,9 @@
 struct ConvertAllGatherOp
     : public OpConversionPattern<IREE::Flow::CollectiveAllGatherOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Flow::CollectiveAllGatherOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Flow::CollectiveAllGatherOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto shape = llvm::cast<ShapedType>(op.getSource().getType());
     auto collectiveAttr = IREE::Stream::CollectiveAttr::get(
         op.getContext(), IREE::Stream::CollectiveKind::AllGather,
@@ -361,9 +362,9 @@
 struct ConvertAllReduceOp
     : public OpConversionPattern<IREE::Flow::CollectiveAllReduceOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Flow::CollectiveAllReduceOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Flow::CollectiveAllReduceOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto shape = llvm::cast<ShapedType>(op.getType());
     auto collectiveAttr = IREE::Stream::CollectiveAttr::get(
         op.getContext(), IREE::Stream::CollectiveKind::AllReduce,
@@ -396,9 +397,9 @@
 struct ConvertAllToAllOp
     : public OpConversionPattern<IREE::Flow::CollectiveAllToAllOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Flow::CollectiveAllToAllOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Flow::CollectiveAllToAllOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto shape = llvm::cast<ShapedType>(op.getSource().getType());
     auto collectiveAttr = IREE::Stream::CollectiveAttr::get(
         op.getContext(), IREE::Stream::CollectiveKind::AllToAll,
@@ -431,9 +432,9 @@
 struct ConvertReduceScatterOp
     : public OpConversionPattern<IREE::Flow::CollectiveReduceScatterOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Flow::CollectiveReduceScatterOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Flow::CollectiveReduceScatterOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto shape = llvm::cast<ShapedType>(op.getType());
     auto collectiveAttr = IREE::Stream::CollectiveAttr::get(
         op.getContext(), IREE::Stream::CollectiveKind::ReduceScatter,
@@ -466,9 +467,9 @@
 struct ConvertCollectiveSendRecvOp
     : public OpConversionPattern<IREE::Flow::CollectiveSendRecvOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Flow::CollectiveSendRecvOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Flow::CollectiveSendRecvOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto shape = llvm::cast<ShapedType>(op.getType());
     auto collectiveAttr = IREE::Stream::CollectiveAttr::get(
         op.getContext(), IREE::Stream::CollectiveKind::SendRecv,
@@ -514,9 +515,9 @@
 
 struct ConvertDispatchOp : public OpConversionPattern<IREE::Flow::DispatchOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Flow::DispatchOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Flow::DispatchOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Zero is going to be used for each operand to start.
     auto zeroOffset = rewriter.create<arith::ConstantIndexOp>(op.getLoc(), 0);
 
@@ -581,9 +582,9 @@
 
 struct ConvertFuncOp : public OpConversionPattern<IREE::Flow::FuncOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Flow::FuncOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Flow::FuncOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto convertType = [&](Type type) -> Type {
       if (llvm::isa<TensorType>(type)) {
         // Tensors become resources without sizes. The default type converter
@@ -616,9 +617,9 @@
 
 struct ConvertCallOp : public OpConversionPattern<IREE::Flow::CallOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Flow::CallOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Flow::CallOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Zero is going to be used for each operand to start.
     auto zeroOffset = rewriter.create<arith::ConstantIndexOp>(op.getLoc(), 0);
 
@@ -690,7 +691,8 @@
                             IREE::Flow::DispatchTensorType tensorType,
                             Value zero, OpBuilder &builder) {
   // No uses: don't need a binding op.
-  if (arg.use_empty()) return true;
+  if (arg.use_empty())
+    return true;
 
   // Find the dynamic dimension SSA values of the argument within the region.
   // If the flow dialect properly modeled dimension associations we wouldn't
@@ -705,7 +707,8 @@
     IREE::Flow::DispatchTieShapeOp tieShapeOp;
     for (auto user : arg.getUsers()) {
       tieShapeOp = dyn_cast<IREE::Flow::DispatchTieShapeOp>(user);
-      if (tieShapeOp) break;
+      if (tieShapeOp)
+        break;
     }
     if (tieShapeOp) {
       // Found a tie shape op - we'll insert ourselves there.
@@ -757,9 +760,9 @@
 struct ConvertExecutableOp
     : public OpConversionPattern<IREE::Flow::ExecutableOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Flow::ExecutableOp flowOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Flow::ExecutableOp flowOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // flow.executable -> stream.executable
     auto streamOp = rewriter.create<IREE::Stream::ExecutableOp>(
         flowOp.getLoc(), flowOp.getSymName());
@@ -790,7 +793,8 @@
     // Dispatch tensor arguments become bindings and all others are preserved as
     // adaptor. Note that we only touch public (exported) functions.
     for (auto funcOp : moduleOp.getOps<mlir::func::FuncOp>()) {
-      if (!funcOp.isPublic()) continue;
+      if (!funcOp.isPublic())
+        continue;
 
       SmallVector<Type> newTypes;
       newTypes.reserve(funcOp.getNumArguments());
@@ -831,16 +835,16 @@
 
 struct ConvertReturnOp : public OpConversionPattern<IREE::Flow::ReturnOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Flow::ReturnOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Flow::ReturnOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<IREE::Stream::ReturnOp>(op,
                                                         adaptor.getOperands());
     return success();
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateFlowToStreamConversionPatterns(MLIRContext *context,
                                             TypeConverter &typeConverter,
@@ -880,5 +884,5 @@
   populateFlowToStreamConversionPatterns(context, typeConverter, patterns);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/Patterns.h b/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/Patterns.h
index 4f22c1e..24abfae 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/Patterns.h
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/Patterns.h

@@ -25,7 +25,7 @@
                                             TypeConverter &typeConverter,
                                             RewritePatternSet &patterns);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_STREAM_CONVERSION_FLOWTOSTREAM_PATTERNS_H_
+#endif // IREE_COMPILER_DIALECT_STREAM_CONVERSION_FLOWTOSTREAM_PATTERNS_H_

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/HALToStream/Patterns.cpp b/compiler/src/iree/compiler/Dialect/Stream/Conversion/HALToStream/Patterns.cpp
index 033c226..6c39a05 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/HALToStream/Patterns.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/HALToStream/Patterns.cpp

@@ -25,9 +25,9 @@
 struct ConvertTensorImportOp
     : public OpConversionPattern<IREE::HAL::TensorImportOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::TensorImportOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::TensorImportOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto sourceType = op.getSource().getType();
     auto targetType = op.getTargetEncoding();
     if (!llvm::isa<IREE::HAL::BufferType>(sourceType) &&
@@ -140,9 +140,9 @@
 struct ConvertTensorExportOp
     : public OpConversionPattern<IREE::HAL::TensorExportOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::TensorExportOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::TensorExportOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto sourceType = op.getSourceEncoding();
     auto targetType = op.getTarget().getType();
     if (!llvm::isa<IREE::HAL::BufferType>(targetType) &&
@@ -219,9 +219,9 @@
 struct ConvertTensorBarrierOp
     : public OpConversionPattern<IREE::HAL::TensorBarrierOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::TensorBarrierOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::TensorBarrierOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto timepointType = rewriter.getType<IREE::Stream::TimepointType>();
     SmallVector<Value> signaledResources;
     SmallVector<Value> signaledTimepoints;
@@ -244,7 +244,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateHALToStreamConversionPatterns(MLIRContext *context,
                                            TypeConverter &typeConverter,
@@ -278,5 +278,5 @@
   populateHALToStreamConversionPatterns(context, typeConverter, patterns);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/HALToStream/Patterns.h b/compiler/src/iree/compiler/Dialect/Stream/Conversion/HALToStream/Patterns.h
index c3d4e81..58845d0 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/HALToStream/Patterns.h
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/HALToStream/Patterns.h

@@ -25,7 +25,7 @@
                                            TypeConverter &typeConverter,
                                            RewritePatternSet &patterns);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_STREAM_CONVERSION_HALTOSTREAM_PATTERNS_H_
+#endif // IREE_COMPILER_DIALECT_STREAM_CONVERSION_HALTOSTREAM_PATTERNS_H_

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/PatternUtils.cpp b/compiler/src/iree/compiler/Dialect/Stream/Conversion/PatternUtils.cpp
index e183da7..9cb800b 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/PatternUtils.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/PatternUtils.cpp

@@ -43,5 +43,5 @@
   return ConvertedTensor();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/PatternUtils.h b/compiler/src/iree/compiler/Dialect/Stream/Conversion/PatternUtils.h
index 996efd2..383e3f4 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/PatternUtils.h
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/PatternUtils.h

@@ -29,7 +29,7 @@
 ConvertedTensor consumeTensorOperand(Location loc, Value operand,
                                      OpBuilder &builder);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_STREAM_CONVERSION_PATTERN_UTILS_H_
+#endif // IREE_COMPILER_DIALECT_STREAM_CONVERSION_PATTERN_UTILS_H_

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/ConvertConstantOps.cpp b/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/ConvertConstantOps.cpp
index bacdffd..57d0683 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/ConvertConstantOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/ConvertConstantOps.cpp

@@ -19,14 +19,15 @@
 namespace {
 
 struct ConvertTensorConstantOp : public OpConversionPattern<arith::ConstantOp> {
- public:
+public:
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      arith::ConstantOp constantOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(arith::ConstantOp constantOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Only handle tensor types - other arith.constant types (like i32) are
     // ignored.
-    if (!llvm::isa<TensorType>(constantOp.getType())) return failure();
+    if (!llvm::isa<TensorType>(constantOp.getType()))
+      return failure();
 
     Type constantType = IREE::Stream::ResourceType::get(
         getContext(), IREE::Stream::Lifetime::Constant);
@@ -48,7 +49,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateStandardConstantToStreamPatterns(
     MLIRContext *context, ConversionTarget &conversionTarget,
@@ -61,5 +62,5 @@
   patterns.insert<ConvertTensorConstantOp>(typeConverter, context);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/ConvertStructuralOps.cpp b/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/ConvertStructuralOps.cpp
index e0c955d..dc04db2 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/ConvertStructuralOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/ConvertStructuralOps.cpp

@@ -26,9 +26,9 @@
 struct FuncOpSignatureConversion
     : public OpConversionPattern<mlir::func::FuncOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      mlir::func::FuncOp funcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::func::FuncOp funcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto &typeConverter = *getTypeConverter();
 
     // Convert the input signature types.
@@ -65,8 +65,9 @@
   }
 };
 
-static SmallVector<Value> expandResourceOperands(
-    Location loc, ValueRange operands, ConversionPatternRewriter &rewriter) {
+static SmallVector<Value>
+expandResourceOperands(Location loc, ValueRange operands,
+                       ConversionPatternRewriter &rewriter) {
   SmallVector<Value> expandedOperands;
   expandedOperands.reserve(operands.size());
   for (auto operand : operands) {
@@ -87,9 +88,9 @@
 
 struct CallOpConversion : public OpConversionPattern<mlir::func::CallOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      mlir::func::CallOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::func::CallOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Expand any resource operands to resource + size.
     auto expandedOperands =
         expandResourceOperands(op.getLoc(), adaptor.getOperands(), rewriter);
@@ -145,9 +146,9 @@
 
 struct ReturnOpConversion : public OpConversionPattern<mlir::func::ReturnOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      mlir::func::ReturnOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::func::ReturnOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Expand any resource operands to resource + size.
     auto expandedOperands =
         expandResourceOperands(op.getLoc(), adaptor.getOperands(), rewriter);
@@ -158,9 +159,9 @@
 
 struct BranchOpConversion : public OpConversionPattern<mlir::cf::BranchOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      mlir::cf::BranchOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::cf::BranchOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Expand any resource operands to resource + size.
     auto expandedOperands = expandResourceOperands(
         op.getLoc(), adaptor.getDestOperands(), rewriter);
@@ -173,9 +174,9 @@
 struct CondBranchOpConversion
     : public OpConversionPattern<mlir::cf::CondBranchOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      mlir::cf::CondBranchOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::cf::CondBranchOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Expand any resource operands to resource + size.
     auto trueDestOperands = expandResourceOperands(
         op.getLoc(), adaptor.getTrueDestOperands(), rewriter);
@@ -190,11 +191,12 @@
 
 struct SelectOpConversion : public OpConversionPattern<mlir::arith::SelectOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      mlir::arith::SelectOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::arith::SelectOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Only handle selects where the operands are tensors (resources).
-    if (!llvm::isa<TensorType>(op.getTrueValue().getType())) return failure();
+    if (!llvm::isa<TensorType>(op.getTrueValue().getType()))
+      return failure();
     auto trueOperand =
         consumeTensorOperand(op.getLoc(), adaptor.getTrueValue(), rewriter);
     auto falseOperand =
@@ -212,7 +214,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateStandardStructuralToStreamPatterns(
     MLIRContext *context, ConversionTarget &conversionTarget,
@@ -263,5 +265,5 @@
           typeConverter, context);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/Patterns.cpp b/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/Patterns.cpp
index 79cbade..eb4cd1c 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/Patterns.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/Patterns.cpp

@@ -43,5 +43,5 @@
                                              typeConverter, patterns);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/Patterns.h b/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/Patterns.h
index 47c3100..a44ffed 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/Patterns.h
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/Patterns.h

@@ -20,7 +20,7 @@
     MLIRContext *context, ConversionTarget &conversionTarget,
     TypeConverter &typeConverter, RewritePatternSet &patterns);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_STREAM_CONVERSION_STANDARDTOSTREAM_PATTERNS_H_
+#endif // IREE_COMPILER_DIALECT_STREAM_CONVERSION_STANDARDTOSTREAM_PATTERNS_H_

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/Patterns.cpp b/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/Patterns.cpp
index 1c49281..d910417 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/Patterns.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/Patterns.cpp

@@ -34,7 +34,8 @@
 };
 
 static bool isExpandedType(Type type) {
-  if (llvm::isa<TensorType>(type)) return true;
+  if (llvm::isa<TensorType>(type))
+    return true;
   if (auto ptrType = llvm::dyn_cast<IREE::Util::PtrType>(type)) {
     return isExpandedType(ptrType);
   }
@@ -43,7 +44,7 @@
 
 template <typename T>
 class BaseGlobalConversionPattern : public OpConversionPattern<T> {
- public:
+public:
   BaseGlobalConversionPattern(
       std::shared_ptr<GlobalExpansionState> expansionState,
       TypeConverter &typeConverter, MLIRContext *context,
@@ -51,18 +52,19 @@
       : OpConversionPattern<T>(typeConverter, context, benefit),
         expansionState(std::move(expansionState)) {}
 
- protected:
+protected:
   mutable std::shared_ptr<GlobalExpansionState> expansionState;
 };
 
 struct GlobalOpExpansion
     : public BaseGlobalConversionPattern<IREE::Util::GlobalOp> {
   using BaseGlobalConversionPattern::BaseGlobalConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Util::GlobalOp globalOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::GlobalOp globalOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Only apply to expanded types (tensors/etc).
-    if (!isExpandedType(globalOp.getType())) return failure();
+    if (!isExpandedType(globalOp.getType()))
+      return failure();
 
     SmallVector<Type> newTypes;
     if (failed(getTypeConverter()->convertType(globalOp.getType(), newTypes))) {
@@ -144,11 +146,12 @@
 struct GlobalLoadOpExpansion
     : public BaseGlobalConversionPattern<IREE::Util::GlobalLoadOp> {
   using BaseGlobalConversionPattern::BaseGlobalConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Util::GlobalLoadOp loadOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::GlobalLoadOp loadOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Only apply to expanded types (tensors/etc).
-    if (!isExpandedType(loadOp.getType())) return failure();
+    if (!isExpandedType(loadOp.getType()))
+      return failure();
     auto &expandedGlobal = expansionState->globalMap[adaptor.getGlobal()];
 
     // Insert a load/transfer to the unknown resource lifetime.
@@ -176,11 +179,12 @@
 struct GlobalStoreOpExpansion
     : public BaseGlobalConversionPattern<IREE::Util::GlobalStoreOp> {
   using BaseGlobalConversionPattern::BaseGlobalConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Util::GlobalStoreOp storeOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::GlobalStoreOp storeOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Only apply to expanded types (tensors/etc).
-    if (!isExpandedType(storeOp.getValue().getType())) return failure();
+    if (!isExpandedType(storeOp.getValue().getType()))
+      return failure();
     auto &expandedGlobal = expansionState->globalMap[adaptor.getGlobal()];
 
     // Insert a transfer/store to the global with unknown lifetime. Lifetime
@@ -203,7 +207,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateUtilToStreamConversionPatterns(MLIRContext *context,
                                             TypeConverter &typeConverter,
@@ -222,7 +226,8 @@
   typeConverter.addConversion([=](IREE::Util::PtrType type,
                                   SmallVectorImpl<Type> &resultTypes) {
     // Expand pointers to tensors to [resource, sizeof resource] pointers.
-    if (!isExpandedType(type)) return failure();
+    if (!isExpandedType(type))
+      return failure();
     resultTypes.push_back(
         IREE::Util::PtrType::get(IREE::Stream::ResourceType::get(context)));
     resultTypes.push_back(IREE::Util::PtrType::get(IndexType::get(context)));
@@ -232,7 +237,8 @@
   typeConverter.addConversion(
       [=](IREE::Util::PtrType type, SmallVectorImpl<Type> &resultTypes) {
         // Expand pointers to tensors to [ptr<resource>, ptr<sizeof resource>].
-        if (!isExpandedType(type.getTargetType())) return failure();
+        if (!isExpandedType(type.getTargetType()))
+          return failure();
         resultTypes.push_back(IREE::Stream::ResourceType::get(context));
         resultTypes.push_back(IndexType::get(context));
         return success();
@@ -270,5 +276,5 @@
   populateUtilToStreamConversionPatterns(context, typeConverter, patterns);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/Patterns.h b/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/Patterns.h
index 9aa1830..cef78c6 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/Patterns.h
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/Patterns.h

@@ -25,7 +25,7 @@
                                             TypeConverter &typeConverter,
                                             RewritePatternSet &patterns);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_STREAM_CONVERSION_UTILTOSTREAM_PATTERNS_H_
+#endif // IREE_COMPILER_DIALECT_STREAM_CONVERSION_UTILTOSTREAM_PATTERNS_H_

diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamDialect.cpp b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamDialect.cpp
index 7ce4f0f..a80a869 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamDialect.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamDialect.cpp

@@ -92,7 +92,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 StreamDialect::StreamDialect(MLIRContext *context)
     : Dialect(getDialectNamespace(), context, TypeID::get<StreamDialect>()) {
@@ -128,7 +128,7 @@
   return nullptr;
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamDialect.h b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamDialect.h
index 3712132..3edfb87 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamDialect.h
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamDialect.h

@@ -17,7 +17,7 @@
 namespace Stream {
 
 class StreamDialect : public Dialect {
- public:
+public:
   explicit StreamDialect(MLIRContext *context);
   static StringRef getDialectNamespace() { return "stream"; }
 
@@ -32,14 +32,14 @@
   Type parseType(DialectAsmParser &parser) const override;
   void printType(Type type, DialectAsmPrinter &p) const override;
 
- private:
+private:
   void registerAttributes();
   void registerTypes();
 };
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_STREAM_IR_STREAMDIALECT_H_
+#endif // IREE_COMPILER_DIALECT_STREAM_IR_STREAMDIALECT_H_

diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOpFolders.cpp b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOpFolders.cpp
index 7b63e0a..0149f4a 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOpFolders.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOpFolders.cpp

@@ -55,8 +55,10 @@
 //   0xCDCDCDCD : i32 -> 0xCD : i8
 static APInt computeRequiredPatternBits(APInt pattern) {
   // Special case for well-known constant values.
-  if (pattern.isZero()) return APInt(8, 0u);
-  if (pattern.isAllOnes()) return APInt(8, 0xFF);
+  if (pattern.isZero())
+    return APInt(8, 0u);
+  if (pattern.isAllOnes())
+    return APInt(8, 0xFF);
 
   // Extend up to a power of two bit width. This makes the value easier to work
   // with as we'll be dealing with one of 4 sizes (1/2/4/8b).
@@ -70,58 +72,58 @@
   uint64_t byteWidth = bitWidth / 8;
   uint64_t value = pattern.getZExtValue();
   switch (byteWidth) {
-    case 1:
-      // Can't go smaller than 1 byte.
-      return pattern;
-    case 2: {
-      uint64_t b0 = value & 0xFF;
-      uint64_t b1 = (value >> 8) & 0xFF;
-      if (b0 == b1) {
-        // 0xAAAA : i16 => 0xAA : i8
-        return APInt(8, value & 0xFF);
-      }
-      return pattern;
+  case 1:
+    // Can't go smaller than 1 byte.
+    return pattern;
+  case 2: {
+    uint64_t b0 = value & 0xFF;
+    uint64_t b1 = (value >> 8) & 0xFF;
+    if (b0 == b1) {
+      // 0xAAAA : i16 => 0xAA : i8
+      return APInt(8, value & 0xFF);
     }
-    case 4: {
-      uint64_t b0 = value & 0xFF;
-      uint64_t b1 = (value >> 8) & 0xFF;
-      uint64_t b2 = (value >> 16) & 0xFF;
-      uint64_t b3 = (value >> 24) & 0xFF;
-      if (b0 == b1 && b0 == b2 && b0 == b3) {
-        // 0xAAAAAAAA : i32 => 0xAA : i8
-        return APInt(8, b0);
-      } else if (b0 == b2 && b1 == b3) {
-        // 0xAABBAABB : i32 => 0xAABB : i16
-        return APInt(16, b0 | (b1 << 8));
-      }
-      return pattern;
+    return pattern;
+  }
+  case 4: {
+    uint64_t b0 = value & 0xFF;
+    uint64_t b1 = (value >> 8) & 0xFF;
+    uint64_t b2 = (value >> 16) & 0xFF;
+    uint64_t b3 = (value >> 24) & 0xFF;
+    if (b0 == b1 && b0 == b2 && b0 == b3) {
+      // 0xAAAAAAAA : i32 => 0xAA : i8
+      return APInt(8, b0);
+    } else if (b0 == b2 && b1 == b3) {
+      // 0xAABBAABB : i32 => 0xAABB : i16
+      return APInt(16, b0 | (b1 << 8));
     }
-    case 8: {
-      uint64_t b0 = value & 0xFF;
-      uint64_t b1 = (value >> 8) & 0xFF;
-      uint64_t b2 = (value >> 16) & 0xFF;
-      uint64_t b3 = (value >> 24) & 0xFF;
-      uint64_t b4 = (value >> 32) & 0xFF;
-      uint64_t b5 = (value >> 40) & 0xFF;
-      uint64_t b6 = (value >> 48) & 0xFF;
-      uint64_t b7 = (value >> 56) & 0xFF;
-      if (b0 == b1 && b0 == b2 && b0 == b3 && b0 == b4 && b0 == b5 &&
-          b0 == b6 && b0 == b7) {
-        // 0xAAAAAAAAAAAAAAAA : i64 => 0xAA : i8
-        return APInt(8, b0);
-      } else if ((b0 == b2 && b0 == b4 && b0 == b6) &&
-                 (b1 == b3 && b1 == b5 && b1 == b7)) {
-        // 0xAABBAABBAABBAABB : i64 => 0xAABB : i16
-        return APInt(16, b0 | (b1 << 8));
-      } else if (b0 == b4 && b1 == b5 && b2 == b6 && b3 == b7) {
-        // 0xAABBCCDDAABBCCDD : i64 => 0xAABBCCDD : i32
-        return APInt(32, b0 | (b1 << 8) | (b2 << 16) | (b3 << 32));
-      }
-      return pattern;
+    return pattern;
+  }
+  case 8: {
+    uint64_t b0 = value & 0xFF;
+    uint64_t b1 = (value >> 8) & 0xFF;
+    uint64_t b2 = (value >> 16) & 0xFF;
+    uint64_t b3 = (value >> 24) & 0xFF;
+    uint64_t b4 = (value >> 32) & 0xFF;
+    uint64_t b5 = (value >> 40) & 0xFF;
+    uint64_t b6 = (value >> 48) & 0xFF;
+    uint64_t b7 = (value >> 56) & 0xFF;
+    if (b0 == b1 && b0 == b2 && b0 == b3 && b0 == b4 && b0 == b5 && b0 == b6 &&
+        b0 == b7) {
+      // 0xAAAAAAAAAAAAAAAA : i64 => 0xAA : i8
+      return APInt(8, b0);
+    } else if ((b0 == b2 && b0 == b4 && b0 == b6) &&
+               (b1 == b3 && b1 == b5 && b1 == b7)) {
+      // 0xAABBAABBAABBAABB : i64 => 0xAABB : i16
+      return APInt(16, b0 | (b1 << 8));
+    } else if (b0 == b4 && b1 == b5 && b2 == b6 && b3 == b7) {
+      // 0xAABBCCDDAABBCCDD : i64 => 0xAABBCCDD : i32
+      return APInt(32, b0 | (b1 << 8) | (b2 << 16) | (b3 << 32));
     }
-    default:
-      // Unhandled bit width.
-      return pattern;
+    return pattern;
+  }
+  default:
+    // Unhandled bit width.
+    return pattern;
   }
 }
 
@@ -145,7 +147,8 @@
 
   // Try narrowing the pattern.
   auto newPattern = computeRequiredPatternBits(oldPattern);
-  if (newPattern.getBitWidth() == oldPattern.getBitWidth()) return patternAttr;
+  if (newPattern.getBitWidth() == oldPattern.getBitWidth())
+    return patternAttr;
 
   // Wrap the result in an attribute - note that it is always an integer.
   return IntegerAttr::get(
@@ -165,7 +168,8 @@
       return failure();
     }
     auto newPatternAttr = tryNarrowPatternBits(oldPatternAttr);
-    if (newPatternAttr == oldPatternAttr) return failure();
+    if (newPatternAttr == oldPatternAttr)
+      return failure();
 
     // Replace the pattern on the op with the new one.
     auto narrowValue =
@@ -183,10 +187,13 @@
 //    stream.yield
 //  }
 static std::optional<IREE::Stream::YieldOp> getYieldIfOnlyOp(Block &block) {
-  if (block.empty()) return std::nullopt;
-  if (&block.front() != &block.back()) return std::nullopt;
+  if (block.empty())
+    return std::nullopt;
+  if (&block.front() != &block.back())
+    return std::nullopt;
   auto yieldOp = dyn_cast<IREE::Stream::YieldOp>(block.back());
-  if (yieldOp) return yieldOp;
+  if (yieldOp)
+    return yieldOp;
   return std::nullopt;
 }
 
@@ -249,7 +256,8 @@
   // If the sinking is to a different block, then it okay, since for any later
   // sinkings, this reduces the problem to stable sinking within a single
   // block (handled below).
-  if (toBeSunkOp->getBlock() != targetOp->getBlock()) return true;
+  if (toBeSunkOp->getBlock() != targetOp->getBlock())
+    return true;
 
   SmallPtrSet<Operation *, 4> producerOps;
   if (allowUseDefPruning) {
@@ -266,9 +274,11 @@
                                         Block::iterator(targetOp))) {
     // If the intervening op that is not even a sink candidate itself,
     // then it cannot fight.
-    if (!isSinkCandidate(&op)) return true;
+    if (!isSinkCandidate(&op))
+      return true;
     // If the op is pruned by use-def chains, then it won't fight.
-    if (allowUseDefPruning && !producerOps.contains(&op)) return true;
+    if (allowUseDefPruning && !producerOps.contains(&op))
+      return true;
   }
   return false;
 }
@@ -276,7 +286,8 @@
 // Sinks |op| down to |targetOp|, ensuring that we don't oscillate.
 // Returns success if the op was sunk and failure if sinking was not needed.
 static LogicalResult sinkOp(Operation *op, Operation *targetOp) {
-  if (!canStablySinkTo(op, targetOp)) return failure();
+  if (!canStablySinkTo(op, targetOp))
+    return failure();
   op->moveBefore(targetOp);
   return success();
 }
@@ -308,7 +319,8 @@
       : OpRewritePattern<Op>(context, /*benefit=*/1000) {}
   LogicalResult matchAndRewrite(Op op,
                                 PatternRewriter &rewriter) const override {
-    if (!op.use_empty()) return failure();
+    if (!op.use_empty())
+      return failure();
     rewriter.eraseOp(op);
     return success();
   }
@@ -328,11 +340,13 @@
   using OpRewritePattern<Op>::OpRewritePattern;
   LogicalResult matchAndRewrite(Op cloneOp,
                                 PatternRewriter &rewriter) const override {
-    if (cloneOp.use_empty()) return failure();
+    if (cloneOp.use_empty())
+      return failure();
     auto sourceOp =
         cloneOp.getSource()
             .template getDefiningOp<IREE::Stream::StreamableOpInterface>();
-    if (!sourceOp || !sourceOp.preferCloneToConsumers()) return failure();
+    if (!sourceOp || !sourceOp.preferCloneToConsumers())
+      return failure();
     for (auto &use :
          llvm::make_early_inc_range(cloneOp.getResult().getUses())) {
       rewriter.setInsertionPoint(use.getOwner());
@@ -351,7 +365,8 @@
 static bool materializeCOW(Location loc, Value rootValue, OpBuilder &builder) {
   auto valueType =
       llvm::dyn_cast<IREE::Stream::ResourceType>(rootValue.getType());
-  if (!valueType) return false;
+  if (!valueType)
+    return false;
 
   // If our rootValue is a constant then we need to ensure that we aren't
   // tied to a constant operand. If we are we need to clone to a
@@ -370,7 +385,8 @@
   SmallVector<TiedUse> tiedUses;
   unsigned untiedUses = 0;
   for (auto &use : rootValue.getUses()) {
-    if (isa<IREE::Stream::TimepointAwaitOp>(use.getOwner())) continue;
+    if (isa<IREE::Stream::TimepointAwaitOp>(use.getOwner()))
+      continue;
     auto tiedOp = dyn_cast<IREE::Util::TiedOpInterface>(use.getOwner());
     bool isTied = tiedOp && tiedOp.isOperandTied(use.getOperandNumber());
     if (isTied) {
@@ -464,7 +480,7 @@
     for (auto yieldOp : op.template getOps<IREE::Stream::YieldOp>()) {
       for (auto result : llvm::enumerate(yieldOp.getResourceOperands())) {
         if (op.getTiedResultOperandIndex(result.index()).has_value()) {
-          continue;  // Already tied.
+          continue; // Already tied.
         }
         auto baseValue =
             IREE::Util::TiedOpInterface::findTiedBaseValue(result.value());
@@ -521,7 +537,8 @@
     bool isImmediate =
         op.getAwaitTimepoint() && isa_and_nonnull<TimepointImmediateOp>(
                                       op.getAwaitTimepoint().getDefiningOp());
-    if (!isImmediate) return failure();
+    if (!isImmediate)
+      return failure();
     rewriter.updateRootInPlace(
         op, [&]() { op.getAwaitTimepointMutable().clear(); });
     return success();
@@ -553,7 +570,8 @@
             operand.index(), awaitOp.getTiedResultOperand(operand.value())));
       }
     }
-    if (replacements.empty()) return failure();
+    if (replacements.empty())
+      return failure();
     rewriter.updateRootInPlace(op, [&]() {
       auto newTimepoint = joinAwaitTimepoints(
           op.getLoc(), op.getAwaitTimepoint(), newTimepoints, rewriter);
@@ -568,7 +586,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 //===----------------------------------------------------------------------===//
 // stream.resource.alloc
@@ -630,7 +648,8 @@
   LogicalResult matchAndRewrite(ResourceSizeOp op,
                                 PatternRewriter &rewriter) const override {
     auto selectOp = op.getOperand().getDefiningOp<mlir::arith::SelectOp>();
-    if (!selectOp) return failure();
+    if (!selectOp)
+      return failure();
     auto trueSize = rewriter.createOrFold<IREE::Stream::ResourceSizeOp>(
         op.getLoc(), selectOp.getTrueValue(), op.getAffinityAttr());
     auto falseSize = rewriter.createOrFold<IREE::Stream::ResourceSizeOp>(
@@ -641,7 +660,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void ResourceSizeOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                  MLIRContext *context) {
@@ -688,7 +707,8 @@
   LogicalResult matchAndRewrite(ResourceLoadOp op,
                                 PatternRewriter &rewriter) const override {
     auto subviewOp = ResourceSubviewOp::findSubviewOp(op.getSource());
-    if (!subviewOp) return failure();
+    if (!subviewOp)
+      return failure();
     auto fusedLoc = rewriter.getFusedLoc({subviewOp.getLoc(), op.getLoc()});
     auto newOffset = rewriter.createOrFold<arith::AddIOp>(
         fusedLoc, subviewOp.getSourceOffset(), op.getSourceOffset());
@@ -701,7 +721,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void ResourceLoadOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                  MLIRContext *context) {
@@ -732,7 +752,8 @@
   LogicalResult matchAndRewrite(ResourceStoreOp op,
                                 PatternRewriter &rewriter) const override {
     auto subviewOp = ResourceSubviewOp::findSubviewOp(op.getTarget());
-    if (!subviewOp) return failure();
+    if (!subviewOp)
+      return failure();
     auto fusedLoc = rewriter.getFusedLoc({subviewOp.getLoc(), op.getLoc()});
     auto newOffset = rewriter.createOrFold<arith::AddIOp>(
         fusedLoc, subviewOp.getSourceOffset(), op.getTargetOffset());
@@ -745,7 +766,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void ResourceStoreOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                   MLIRContext *context) {
@@ -798,7 +819,8 @@
                                 PatternRewriter &rewriter) const override {
     // Offset is optional.
     auto baseOffset = op.getOffset();
-    if (!baseOffset) return failure();
+    if (!baseOffset)
+      return failure();
 
     // We always strip the offset here.
     rewriter.updateRootInPlace(op, [&]() { op.getOffsetMutable().clear(); });
@@ -856,7 +878,8 @@
         break;
       }
     }
-    if (!orderChanged) return failure();
+    if (!orderChanged)
+      return failure();
 
     // TODO(benvanik): compact the slice ranges.
 
@@ -887,7 +910,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void ResourcePackOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                  MLIRContext *context) {
@@ -916,7 +939,8 @@
   LogicalResult matchAndRewrite(ResourceSubviewOp op,
                                 PatternRewriter &rewriter) const override {
     auto parentOp = ResourceSubviewOp::findSubviewOp(op.getSource());
-    if (!parentOp) return failure();
+    if (!parentOp)
+      return failure();
     auto fusedLoc = rewriter.getFusedLoc({parentOp.getLoc(), op.getLoc()});
     auto newOffset = rewriter.createOrFold<arith::AddIOp>(
         fusedLoc, parentOp.getSourceOffset(), op.getSourceOffset());
@@ -943,12 +967,14 @@
   using OpRewritePattern::OpRewritePattern;
   LogicalResult matchAndRewrite(mlir::arith::SelectOp op,
                                 PatternRewriter &rewriter) const override {
-    if (!llvm::isa<IREE::Stream::ResourceType>(op.getType())) return failure();
+    if (!llvm::isa<IREE::Stream::ResourceType>(op.getType()))
+      return failure();
     auto trueSubview = dyn_cast_or_null<IREE::Stream::ResourceSubviewOp>(
         op.getTrueValue().getDefiningOp());
     auto falseSubview = dyn_cast_or_null<IREE::Stream::ResourceSubviewOp>(
         op.getFalseValue().getDefiningOp());
-    if (!trueSubview || !falseSubview) return failure();
+    if (!trueSubview || !falseSubview)
+      return failure();
     if (trueSubview.getSource() != falseSubview.getSource() ||
         trueSubview.getResultSize() != falseSubview.getResultSize()) {
       return failure();
@@ -964,7 +990,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void ResourceSubviewOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                     MLIRContext *context) {
@@ -1036,7 +1062,8 @@
                                 PatternRewriter &rewriter) const override {
     auto shapedType =
         llvm::dyn_cast<ShapedType>(constantOp.getResultEncoding());
-    if (!shapedType) return failure();
+    if (!shapedType)
+      return failure();
 
     // See if any dim (including dynamic ones) is known zero.
     // It's still possible for empty tensors to slip through if their dynamic
@@ -1056,7 +1083,8 @@
         break;
       }
     }
-    if (!anyZeroDims) return failure();
+    if (!anyZeroDims)
+      return failure();
 
     // Definitely empty if here.
     auto resultSize = rewriter.createOrFold<IREE::Stream::TensorSizeOfOp>(
@@ -1112,7 +1140,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void TensorConstantOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                    MLIRContext *context) {
@@ -1159,7 +1187,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void TensorCloneOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                 MLIRContext *context) {
@@ -1251,7 +1279,8 @@
   LogicalResult matchAndRewrite(Op producerOp,
                                 PatternRewriter &rewriter) const override {
     auto users = llvm::to_vector(producerOp->getUsers());
-    if (users.size() == 0) return failure();
+    if (users.size() == 0)
+      return failure();
 
     // If we have a single user then we can sink right to it.
     if (users.size() == 1) {
@@ -1300,7 +1329,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void AsyncAllocaOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                 MLIRContext *context) {
@@ -1322,7 +1351,8 @@
   LogicalResult matchAndRewrite(AsyncConstantOp constantOp,
                                 PatternRewriter &rewriter) const override {
     auto value = constantOp.getValue();
-    if (!value.isSplat()) return failure();
+    if (!value.isSplat())
+      return failure();
 
     auto splatElementAttr =
         llvm::dyn_cast<SplatElementsAttr>(value).getSplatValue<TypedAttr>();
@@ -1335,7 +1365,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void AsyncConstantOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                   MLIRContext *context) {
@@ -1404,7 +1434,8 @@
                                 PatternRewriter &rewriter) const override {
     auto splatOp =
         sliceOp.getSource().getDefiningOp<IREE::Stream::AsyncSplatOp>();
-    if (!splatOp) return failure();
+    if (!splatOp)
+      return failure();
     rewriter.replaceOpWithNewOp<IREE::Stream::AsyncSplatOp>(
         sliceOp, sliceOp.getResult().getType(), splatOp.getValue(),
         sliceOp.getResultSize(), sliceOp.getAffinityAttr());
@@ -1412,7 +1443,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void AsyncSliceOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                MLIRContext *context) {
@@ -1465,7 +1496,8 @@
                                 PatternRewriter &rewriter) const override {
     auto splatOp = dyn_cast_or_null<IREE::Stream::AsyncSplatOp>(
         fillOp.getTarget().getDefiningOp());
-    if (!splatOp) return failure();
+    if (!splatOp)
+      return failure();
     if (splatOp.getValue() != fillOp.getValue()) {
       return rewriter.notifyMatchFailure(fillOp,
                                          "fill patterns are not compatible");
@@ -1495,7 +1527,8 @@
                                 PatternRewriter &rewriter) const override {
     auto sourceOp = dyn_cast_or_null<IREE::Stream::AsyncFillOp>(
         fillOp.getTarget().getDefiningOp());
-    if (!sourceOp) return failure();
+    if (!sourceOp)
+      return failure();
     if (!sourceOp.getResult().hasOneUse()) {
       // Note that hazard analysis could make this work if we can guarantee that
       // the source result is only ever sliced out to a range that doesn't
@@ -1552,7 +1585,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void AsyncFillOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                               MLIRContext *context) {
@@ -1592,7 +1625,8 @@
                                 PatternRewriter &rewriter) const override {
     auto splatOp =
         updateOp.getUpdate().getDefiningOp<IREE::Stream::AsyncSplatOp>();
-    if (!splatOp) return failure();
+    if (!splatOp)
+      return failure();
     rewriter.replaceOpWithNewOp<IREE::Stream::AsyncFillOp>(
         updateOp, updateOp.getResult().getType(), updateOp.getTarget(),
         updateOp.getTargetSize(), updateOp.getTargetOffset(),
@@ -1641,7 +1675,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void AsyncUpdateOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                 MLIRContext *context) {
@@ -1685,7 +1719,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void AsyncCopyOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                               MLIRContext *context) {
@@ -1746,10 +1780,12 @@
       auto source = originTransferOp.getSource();
       auto previousTransferOp =
           dyn_cast_or_null<AsyncTransferOp>(source.getDefiningOp());
-      if (!previousTransferOp) break;
+      if (!previousTransferOp)
+        break;
       originTransferOp = previousTransferOp;
     }
-    if (originTransferOp == transferOp) return failure();
+    if (originTransferOp == transferOp)
+      return failure();
     rewriter.replaceOpWithNewOp<AsyncTransferOp>(
         transferOp, transferOp.getResult().getType(),
         originTransferOp.getSource(), originTransferOp.getSourceSize(),
@@ -1759,7 +1795,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void AsyncTransferOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                   MLIRContext *context) {
@@ -1782,10 +1818,12 @@
   LogicalResult matchAndRewrite(AsyncLoadOp loadOp,
                                 PatternRewriter &rewriter) const override {
     auto loadedValue = loadOp.getResult();
-    if (!loadedValue.hasOneUse()) return failure();
+    if (!loadedValue.hasOneUse())
+      return failure();
     auto bitcastOp =
         dyn_cast<arith::BitcastOp>(*loadedValue.getUsers().begin());
-    if (!bitcastOp) return failure();
+    if (!bitcastOp)
+      return failure();
     rewriter.updateRootInPlace(
         loadOp, [&]() { loadedValue.setType(bitcastOp.getType()); });
     rewriter.replaceOp(bitcastOp, loadedValue);
@@ -1793,7 +1831,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void AsyncLoadOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                               MLIRContext *context) {
@@ -1829,7 +1867,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void AsyncStoreOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                MLIRContext *context) {
@@ -1879,11 +1917,13 @@
     SmallVector<SubviewCapture> captures;
     for (auto operand : llvm::enumerate(op.getResourceOperands())) {
       auto subviewOp = ResourceSubviewOp::findSubviewOp(operand.value());
-      if (!subviewOp) continue;
+      if (!subviewOp)
+        continue;
       captures.push_back(
           SubviewCapture{static_cast<unsigned>(operand.index()), subviewOp});
     }
-    if (captures.empty()) return failure();
+    if (captures.empty())
+      return failure();
     rewriter.startRootUpdate(op);
 
     auto &entryBlock = op.getBody().front();
@@ -1948,7 +1988,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void AsyncExecuteOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                  MLIRContext *context) {
@@ -1993,7 +2033,8 @@
   LogicalResult matchAndRewrite(CmdFlushOp op,
                                 PatternRewriter &rewriter) const override {
     auto subviewOp = ResourceSubviewOp::findSubviewOp(op.getTarget());
-    if (!subviewOp) return failure();
+    if (!subviewOp)
+      return failure();
     setInsertionPointToParentExecutionScope(op, rewriter);
     auto fusedLoc = rewriter.getFusedLoc({subviewOp.getLoc(), op.getLoc()});
     auto newOffset = rewriter.createOrFold<arith::AddIOp>(
@@ -2007,7 +2048,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void CmdFlushOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                              MLIRContext *context) {
@@ -2034,7 +2075,8 @@
   LogicalResult matchAndRewrite(CmdInvalidateOp op,
                                 PatternRewriter &rewriter) const override {
     auto subviewOp = ResourceSubviewOp::findSubviewOp(op.getTarget());
-    if (!subviewOp) return failure();
+    if (!subviewOp)
+      return failure();
     setInsertionPointToParentExecutionScope(op, rewriter);
     auto fusedLoc = rewriter.getFusedLoc({subviewOp.getLoc(), op.getLoc()});
     auto newOffset = rewriter.createOrFold<arith::AddIOp>(
@@ -2048,7 +2090,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void CmdInvalidateOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                   MLIRContext *context) {
@@ -2074,7 +2116,8 @@
   LogicalResult matchAndRewrite(CmdDiscardOp op,
                                 PatternRewriter &rewriter) const override {
     auto subviewOp = ResourceSubviewOp::findSubviewOp(op.getTarget());
-    if (!subviewOp) return failure();
+    if (!subviewOp)
+      return failure();
     setInsertionPointToParentExecutionScope(op, rewriter);
     auto fusedLoc = rewriter.getFusedLoc({subviewOp.getLoc(), op.getLoc()});
     auto newOffset = rewriter.createOrFold<arith::AddIOp>(
@@ -2088,7 +2131,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void CmdDiscardOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                MLIRContext *context) {
@@ -2114,7 +2157,8 @@
   LogicalResult matchAndRewrite(CmdFillOp op,
                                 PatternRewriter &rewriter) const override {
     auto subviewOp = ResourceSubviewOp::findSubviewOp(op.getTarget());
-    if (!subviewOp) return failure();
+    if (!subviewOp)
+      return failure();
     setInsertionPointToParentExecutionScope(op, rewriter);
     auto fusedLoc = rewriter.getFusedLoc({subviewOp.getLoc(), op.getLoc()});
     auto newOffset = rewriter.createOrFold<arith::AddIOp>(
@@ -2128,7 +2172,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void CmdFillOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                             MLIRContext *context) {
@@ -2156,7 +2200,8 @@
                                 PatternRewriter &rewriter) const override {
     auto sourceSubviewOp = ResourceSubviewOp::findSubviewOp(op.getSource());
     auto targetSubviewOp = ResourceSubviewOp::findSubviewOp(op.getTarget());
-    if (!sourceSubviewOp && !targetSubviewOp) return failure();
+    if (!sourceSubviewOp && !targetSubviewOp)
+      return failure();
     setInsertionPointToParentExecutionScope(op, rewriter);
     if (sourceSubviewOp) {
       auto fusedLoc =
@@ -2184,7 +2229,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void CmdCopyOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                             MLIRContext *context) {
@@ -2221,16 +2266,19 @@
     bool anySubviewOps = false;
     for (auto operand : op.getResources()) {
       auto subviewOp = ResourceSubviewOp::findSubviewOp(operand);
-      if (subviewOp) anySubviewOps = true;
+      if (subviewOp)
+        anySubviewOps = true;
       resourceSubviewOps.push_back(subviewOp);
     }
-    if (!anySubviewOps) return failure();
+    if (!anySubviewOps)
+      return failure();
     rewriter.startRootUpdate(op);
 
     setInsertionPointToParentExecutionScope(op, rewriter);
     for (auto [resourceIndex, subviewOp] :
          llvm::enumerate(resourceSubviewOps)) {
-      if (!subviewOp) continue;
+      if (!subviewOp)
+        continue;
       auto fusedLoc = rewriter.getFusedLoc({subviewOp.getLoc(), op.getLoc()});
       auto newOffset = rewriter.createOrFold<arith::AddIOp>(
           fusedLoc, subviewOp.getSourceOffset(),
@@ -2249,7 +2297,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void CmdCollectiveOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                   MLIRContext *context) {
@@ -2285,18 +2333,21 @@
          llvm::enumerate(op.getResourceOperands())) {
       if (llvm::isa<IREE::Stream::ResourceType>(operand.getType())) {
         auto subviewOp = ResourceSubviewOp::findSubviewOp(operand);
-        if (subviewOp) anySubviewOps = true;
+        if (subviewOp)
+          anySubviewOps = true;
         resourceSubviewOps.push_back({operandIndex, subviewOp});
       }
     }
-    if (!anySubviewOps) return failure();
+    if (!anySubviewOps)
+      return failure();
     rewriter.startRootUpdate(op);
 
     setInsertionPointToParentExecutionScope(op, rewriter);
     for (auto [resourceIndex, resourceSubviewOp] :
          llvm::enumerate(resourceSubviewOps)) {
       auto [operandIndex, subviewOp] = resourceSubviewOp;
-      if (!subviewOp) continue;
+      if (!subviewOp)
+        continue;
       auto fusedLoc = rewriter.getFusedLoc({subviewOp.getLoc(), op.getLoc()});
       auto newOffset = rewriter.createOrFold<arith::AddIOp>(
           fusedLoc, subviewOp.getSourceOffset(),
@@ -2317,7 +2368,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void CmdCallOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                             MLIRContext *context) {
@@ -2353,11 +2404,13 @@
     SmallVector<SubviewCapture> captures;
     for (auto operand : llvm::enumerate(op.getResourceOperands())) {
       auto subviewOp = ResourceSubviewOp::findSubviewOp(operand.value());
-      if (!subviewOp) continue;
+      if (!subviewOp)
+        continue;
       captures.push_back(
           SubviewCapture{static_cast<unsigned>(operand.index()), subviewOp});
     }
-    if (captures.empty()) return failure();
+    if (captures.empty())
+      return failure();
     rewriter.startRootUpdate(op);
 
     auto &entryBlock = op.getBody().front();
@@ -2408,7 +2461,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void CmdExecuteOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                MLIRContext *context) {
@@ -2444,7 +2497,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void CmdSerialOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                               MLIRContext *context) {
@@ -2555,7 +2608,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void TimepointChainExternalOp::getCanonicalizationPatterns(
     RewritePatternSet &results, MLIRContext *context) {
@@ -2646,7 +2699,8 @@
         newTimepoints.insert(timepoint);
       }
     }
-    if (!didExpand) return failure();
+    if (!didExpand)
+      return failure();
     rewriter.updateRootInPlace(op, [&]() {
       op.getAwaitTimepointsMutable().assign(newTimepoints.takeVector());
     });
@@ -2654,7 +2708,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void TimepointJoinOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                   MLIRContext *context) {
@@ -2677,7 +2731,8 @@
   // TODO(benvanik): data flow analysis/at least walk up tied ops. For now we
   // err on the conservative side and only check for a few common scenarios.
   auto *definingOp = resource.getDefiningOp();
-  if (!definingOp) return false;
+  if (!definingOp)
+    return false;
   return TypeSwitch<Operation *, bool>(definingOp)
       .Case<IREE::Stream::ResourceAllocOp, IREE::Stream::TensorImportOp>(
           [](auto op) { return true; })
@@ -2713,8 +2768,8 @@
 // Walks up the tied op SSA def chain to find a stream.timepoint.await op that
 // produces the resource. Returns nullptr if no await op is found or local
 // analysis cannot determine the source (spans across a branch, etc).
-static std::pair<IREE::Stream::TimepointAwaitOp, Value> findSourceAwaitOp(
-    Value resource) {
+static std::pair<IREE::Stream::TimepointAwaitOp, Value>
+findSourceAwaitOp(Value resource) {
   Value baseResource = resource;
   while (auto definingOp = dyn_cast_or_null<IREE::Util::TiedOpInterface>(
              baseResource.getDefiningOp())) {
@@ -2723,7 +2778,8 @@
       return {awaitOp, baseResource};
     }
     auto tiedValue = definingOp.getTiedResultOperand(baseResource);
-    if (!tiedValue) break;
+    if (!tiedValue)
+      break;
     baseResource = tiedValue;
   }
   return {nullptr, nullptr};
@@ -2745,7 +2801,8 @@
     // Try to find an await op. This may traverse through any number of tied ops
     // along the way.
     auto [awaitOp, baseResource] = findSourceAwaitOp(barrierOp.getResource());
-    if (!awaitOp) return failure();
+    if (!awaitOp)
+      return failure();
 
     // TODO(benvanik): move this to a pass that can do IPO. Local analysis is
     // insufficient for this. For now we conservatively ignore any case where
@@ -2766,7 +2823,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void TimepointBarrierOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                      MLIRContext *context) {
@@ -2827,7 +2884,8 @@
         }
       }
     }
-    if (!commonDominator) return failure();
+    if (!commonDominator)
+      return failure();
 
     // Find the first use within the dominator block (if any) so that we
     // can sink down to it.
@@ -2842,7 +2900,8 @@
 
     // If sinking to `firstUserInDominator` could result in patterns
     // fighting each other, then don't sink.
-    if (!canStablySinkTo(op, firstUserInDominator)) return failure();
+    if (!canStablySinkTo(op, firstUserInDominator))
+      return failure();
 
     rewriter.updateRootInPlace(op,
                                [&]() { op->moveBefore(firstUserInDominator); });
@@ -2862,7 +2921,8 @@
     for (auto operand : llvm::enumerate(op.getResourceOperands())) {
       auto subviewOp =
           operand.value().getDefiningOp<IREE::Stream::ResourceSubviewOp>();
-      if (!subviewOp) continue;
+      if (!subviewOp)
+        continue;
       didChange = true;
       unsigned operandIdx = static_cast<unsigned>(operand.index());
 
@@ -2899,7 +2959,8 @@
 static bool areAllOperandsDefinedBy(Operation *op, Operation *insertionPoint,
                                     DominanceInfo &dominanceInfo) {
   for (auto operand : op->getOperands()) {
-    if (!dominanceInfo.dominates(operand, insertionPoint)) return false;
+    if (!dominanceInfo.dominates(operand, insertionPoint))
+      return false;
   }
   return true;
 }
@@ -2927,9 +2988,12 @@
       // TODO(benvanik): make this handle joins/ties; today we get blocked
       // there. We rely on other canonicalizers to sink things such that
       // (hopefully) we get them directly accessible here.
-      if (use.getOwner() == op) continue;
-      if (op->getBlock() != use.getOwner()->getBlock()) continue;
-      if (dominanceInfo.dominates(use.getOwner(), op)) continue;
+      if (use.getOwner() == op)
+        continue;
+      if (op->getBlock() != use.getOwner()->getBlock())
+        continue;
+      if (dominanceInfo.dominates(use.getOwner(), op))
+        continue;
       auto awaitOp = dyn_cast<TimepointAwaitOp>(use.getOwner());
       if (!awaitOp ||
           !AffinityAttr::areCompatible(
@@ -2948,7 +3012,8 @@
       }
       coveredOps.push_back(awaitOp);
     }
-    if (coveredOps.empty()) return failure();
+    if (coveredOps.empty())
+      return failure();
     coveredOps.push_back(op);
 
     // Sort the ops by their definition order; this gives us a deterministic
@@ -3010,7 +3075,7 @@
       replacements.push_back(std::make_pair(result, resultIdx));
     }
     if (newOperands.size() == op.getResourceOperands().size()) {
-      return failure();  // No change.
+      return failure(); // No change.
     }
 
     // Create replacement op with deduped operands/results.
@@ -3031,7 +3096,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void TimepointAwaitOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                    MLIRContext *context) {
@@ -3086,7 +3151,7 @@
   return {};
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.cpp b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.cpp
index ca3f989..82b8024 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.cpp

@@ -50,9 +50,9 @@
 }
 
 // Verifies that a dispatch |op|'s |workload| matches that of the |exportOp|.
-static LogicalResult verifyDispatchWorkload(
-    Operation *op, IREE::Stream::ExecutableExportOp exportOp,
-    ValueRange workload) {
+static LogicalResult
+verifyDispatchWorkload(Operation *op, IREE::Stream::ExecutableExportOp exportOp,
+                       ValueRange workload) {
   // If the target has a workgroup count computation function we can verify that
   // the workload here matches what is expected.
   if (!exportOp.getWorkgroupCount().empty()) {
@@ -143,8 +143,10 @@
       availableResources.insert(result);
     }
     for (auto operand : op.getOperands()) {
-      if (!operand) continue;
-      if (!llvm::isa<IREE::Stream::ResourceType>(operand.getType())) continue;
+      if (!operand)
+        continue;
+      if (!llvm::isa<IREE::Stream::ResourceType>(operand.getType()))
+        continue;
       if (!availableResources.contains(operand)) {
         return op.emitOpError() << "used resource not listed in explicit "
                                    "captures (or produced internally)";
@@ -192,7 +194,8 @@
   DenseSet<Value> processedValues;
   SmallVector<Value> worklist;
   auto enqueueValue = [&](Value value) {
-    if (processedValues.contains(value)) return;
+    if (processedValues.contains(value))
+      return;
     processedValues.insert(value);
     worklist.push_back(value);
   };
@@ -232,7 +235,8 @@
       if (auto tiedOp = dyn_cast<IREE::Util::TiedOpInterface>(user)) {
         auto tiedIndices = tiedOp.getTiedResultOperandIndices();
         for (int64_t tiedIndex : tiedIndices) {
-          if (tiedIndex == IREE::Util::TiedOpInterface::kUntiedIndex) continue;
+          if (tiedIndex == IREE::Util::TiedOpInterface::kUntiedIndex)
+            continue;
           auto operand = user->getOperand(tiedIndex);
           if (operand == value) {
             // Tied operand.
@@ -255,7 +259,8 @@
                                      ArrayRef<unsigned> excludedResultIndices) {
   for (auto &block : region.getBlocks()) {
     auto yieldOp = dyn_cast<IREE::Stream::YieldOp>(block.getTerminator());
-    if (!yieldOp) continue;
+    if (!yieldOp)
+      continue;
     llvm::SmallVector<Value> newOperands;
     for (auto i : llvm::reverse(excludedResultIndices)) {
       yieldOp.getResourceOperandsMutable().erase(i);
@@ -351,10 +356,12 @@
   p << ")";
   if (!resultTypes.empty()) {
     p << " -> ";
-    if (resultTypes.size() != 1) p << "(";
+    if (resultTypes.size() != 1)
+      p << "(";
     printShapedResultList(p, op, operands, operandTypes, operandSizes,
                           resultTypes, resultSizes, tiedOperands);
-    if (resultTypes.size() != 1) p << ")";
+    if (resultTypes.size() != 1)
+      p << ")";
   }
   p << " ";
   p.printRegion(body, /*printEntryBlockArgs=*/false,
@@ -447,7 +454,8 @@
   auto indexType = parser.getBuilder().getIndexType();
   SmallVector<Attribute> lifetimeRangeValues;
   do {
-    if (failed(parser.parseOptionalLSquare())) break;
+    if (failed(parser.parseOptionalLSquare()))
+      break;
     IntegerAttr lifetimeStart;
     IntegerAttr lifetimeEnd;
     OpAsmParser::UnresolvedOperand dynamicSliceSize;
@@ -471,7 +479,8 @@
                                  ArrayAttr lifetimeIntervals,
                                  ValueRange dynamicSliceSizes,
                                  TypeRange packedOffsetTypes) {
-  if (packedOffsetTypes.empty()) return;
+  if (packedOffsetTypes.empty())
+    return;
   for (unsigned i = 0; i < packedOffsetTypes.size(); ++i) {
     auto lifetimeStart = lifetimeIntervals[i * 2];
     auto lifetimeEnd = lifetimeIntervals[i * 2 + 1];
@@ -483,7 +492,8 @@
     p.printAttributeWithoutType(lifetimeEnd);
     p << "] = ";
     p.printOperand(sliceSize);
-    if (i < packedOffsetTypes.size() - 1) p << ",";
+    if (i < packedOffsetTypes.size() - 1)
+      p << ",";
   }
   p.printNewline();
 }
@@ -521,14 +531,16 @@
 static void printConstantValueList(OpAsmPrinter &p, Operation *op,
                                    TypeRange resultTypes,
                                    ValueRange resultSizes, ArrayAttr values) {
-  if (resultTypes.empty()) return;
+  if (resultTypes.empty())
+    return;
   for (unsigned i = 0; i < resultTypes.size(); ++i) {
     p.printNewline();
     p << "  ";
     printSizeAwareType(p, op, resultTypes[i], resultSizes[i]);
     p << " = ";
     p.printAttribute(values[i]);
-    if (i < resultTypes.size() - 1) p << ",";
+    if (i < resultTypes.size() - 1)
+      p << ",";
   }
 }
 
@@ -582,11 +594,13 @@
 
 static void printWorkgroupCountRegion(OpAsmPrinter &p, Operation *op,
                                       Region &body) {
-  if (body.empty()) return;
+  if (body.empty())
+    return;
   p << "workgroups(";
   auto args = body.getArguments();
   for (unsigned i = 0; i < args.size(); ++i) {
-    if (i > 0) p << ", ";
+    if (i > 0)
+      p << ", ";
     p.printRegionArgument(args[i]);
   }
   p << ")";
@@ -743,13 +757,13 @@
   return IREE::Util::TiedOpInterface::findTiedBaseValue(getSource());
 }
 
-::std::optional<unsigned> ResourceSubviewOp::getTiedResultOperandIndex(
-    unsigned resultIndex) {
-  return {0};  // source
+::std::optional<unsigned>
+ResourceSubviewOp::getTiedResultOperandIndex(unsigned resultIndex) {
+  return {0}; // source
 }
 
 SmallVector<int64_t> ResourceSubviewOp::getTiedResultOperandIndices() {
-  return {0};  // source
+  return {0}; // source
 }
 
 // static
@@ -795,13 +809,13 @@
   return IREE::Util::TiedOpInterface::findTiedBaseValue(getSource());
 }
 
-::std::optional<unsigned> TensorImportOp::getTiedResultOperandIndex(
-    unsigned resultIndex) {
-  return {0};  // source
+::std::optional<unsigned>
+TensorImportOp::getTiedResultOperandIndex(unsigned resultIndex) {
+  return {0}; // source
 }
 
 SmallVector<int64_t> TensorImportOp::getTiedResultOperandIndices() {
-  return {0};  // source
+  return {0}; // source
 }
 
 //===----------------------------------------------------------------------===//
@@ -822,13 +836,13 @@
   return IREE::Util::TiedOpInterface::findTiedBaseValue(getSource());
 }
 
-::std::optional<unsigned> TensorExportOp::getTiedResultOperandIndex(
-    unsigned resultIndex) {
-  return {0};  // source
+::std::optional<unsigned>
+TensorExportOp::getTiedResultOperandIndex(unsigned resultIndex) {
+  return {0}; // source
 }
 
 SmallVector<int64_t> TensorExportOp::getTiedResultOperandIndices() {
-  return {0};  // source
+  return {0}; // source
 }
 
 //===----------------------------------------------------------------------===//
@@ -965,13 +979,13 @@
   return IREE::Util::TiedOpInterface::findTiedBaseValue(getTarget());
 }
 
-::std::optional<unsigned> TensorUpdateOp::getTiedResultOperandIndex(
-    unsigned resultIndex) {
-  return {0};  // target
+::std::optional<unsigned>
+TensorUpdateOp::getTiedResultOperandIndex(unsigned resultIndex) {
+  return {0}; // target
 }
 
 SmallVector<int64_t> TensorUpdateOp::getTiedResultOperandIndices() {
-  return {0};  // target
+  return {0}; // target
 }
 
 //===----------------------------------------------------------------------===//
@@ -992,13 +1006,13 @@
   return IREE::Util::TiedOpInterface::findTiedBaseValue(getTarget());
 }
 
-::std::optional<unsigned> TensorFillOp::getTiedResultOperandIndex(
-    unsigned resultIndex) {
-  return {0};  // target
+::std::optional<unsigned>
+TensorFillOp::getTiedResultOperandIndex(unsigned resultIndex) {
+  return {0}; // target
 }
 
 SmallVector<int64_t> TensorFillOp::getTiedResultOperandIndices() {
-  return {0};  // target
+  return {0}; // target
 }
 
 //===----------------------------------------------------------------------===//
@@ -1041,13 +1055,13 @@
   return IREE::Util::TiedOpInterface::findTiedBaseValue(getTarget());
 }
 
-::std::optional<unsigned> TensorStoreOp::getTiedResultOperandIndex(
-    unsigned resultIndex) {
-  return {0};  // target
+::std::optional<unsigned>
+TensorStoreOp::getTiedResultOperandIndex(unsigned resultIndex) {
+  return {0}; // target
 }
 
 SmallVector<int64_t> TensorStoreOp::getTiedResultOperandIndices() {
-  return {0};  // target
+  return {0}; // target
 }
 
 //===----------------------------------------------------------------------===//
@@ -1162,13 +1176,13 @@
   return IREE::Util::TiedOpInterface::findTiedBaseValue(getTarget());
 }
 
-::std::optional<unsigned> AsyncFillOp::getTiedResultOperandIndex(
-    unsigned resultIndex) {
-  return {0};  // target
+::std::optional<unsigned>
+AsyncFillOp::getTiedResultOperandIndex(unsigned resultIndex) {
+  return {0}; // target
 }
 
 SmallVector<int64_t> AsyncFillOp::getTiedResultOperandIndices() {
-  return {0};  // target
+  return {0}; // target
 }
 
 void AsyncFillOp::getAsyncAccessRanges(
@@ -1196,13 +1210,13 @@
   return IREE::Util::TiedOpInterface::findTiedBaseValue(getTarget());
 }
 
-::std::optional<unsigned> AsyncUpdateOp::getTiedResultOperandIndex(
-    unsigned resultIndex) {
-  return {0};  // target
+::std::optional<unsigned>
+AsyncUpdateOp::getTiedResultOperandIndex(unsigned resultIndex) {
+  return {0}; // target
 }
 
 SmallVector<int64_t> AsyncUpdateOp::getTiedResultOperandIndices() {
-  return {0};  // target
+  return {0}; // target
 }
 
 void AsyncUpdateOp::getAsyncAccessRanges(
@@ -1239,13 +1253,13 @@
   return IREE::Util::TiedOpInterface::findTiedBaseValue(getTarget());
 }
 
-::std::optional<unsigned> AsyncCopyOp::getTiedResultOperandIndex(
-    unsigned resultIndex) {
-  return {0};  // target
+::std::optional<unsigned>
+AsyncCopyOp::getTiedResultOperandIndex(unsigned resultIndex) {
+  return {0}; // target
 }
 
 SmallVector<int64_t> AsyncCopyOp::getTiedResultOperandIndices() {
-  return {0};  // target
+  return {0}; // target
 }
 
 void AsyncCopyOp::getAsyncAccessRanges(
@@ -1265,18 +1279,18 @@
 static const char *getCollectiveParamKeyword(Attribute opAttr) {
   auto attr = llvm::cast<IREE::Stream::CollectiveAttr>(opAttr);
   switch (attr.getKind()) {
-    case IREE::Stream::CollectiveKind::Broadcast:
-      return "source";
-    case IREE::Stream::CollectiveKind::Reduce:
-      return "target";
-    case IREE::Stream::CollectiveKind::Send:
-      return "target";
-    case IREE::Stream::CollectiveKind::Recv:
-      return "source";
-    case IREE::Stream::CollectiveKind::SendRecv:
-      return "source_target_pair";
-    default:
-      return nullptr;
+  case IREE::Stream::CollectiveKind::Broadcast:
+    return "source";
+  case IREE::Stream::CollectiveKind::Reduce:
+    return "target";
+  case IREE::Stream::CollectiveKind::Send:
+    return "target";
+  case IREE::Stream::CollectiveKind::Recv:
+    return "source";
+  case IREE::Stream::CollectiveKind::SendRecv:
+    return "source_target_pair";
+  default:
+    return nullptr;
   }
 }
 
@@ -1284,7 +1298,8 @@
     OpAsmParser &parser, Attribute opAttr,
     std::optional<OpAsmParser::UnresolvedOperand> &optionalParamValue) {
   const char *keyword = getCollectiveParamKeyword(opAttr);
-  if (!keyword) return success();  // optional
+  if (!keyword)
+    return success(); // optional
   OpAsmParser::UnresolvedOperand paramValue;
   if (failed(parser.parseKeyword(keyword)) || failed(parser.parseLParen()) ||
       failed(parser.parseOperand(paramValue)) || failed(parser.parseRParen())) {
@@ -1336,13 +1351,13 @@
   return IREE::Util::TiedOpInterface::findTiedBaseValue(getTarget());
 }
 
-::std::optional<unsigned> AsyncCollectiveOp::getTiedResultOperandIndex(
-    unsigned resultIndex) {
-  return {0};  // target
+::std::optional<unsigned>
+AsyncCollectiveOp::getTiedResultOperandIndex(unsigned resultIndex) {
+  return {0}; // target
 }
 
 SmallVector<int64_t> AsyncCollectiveOp::getTiedResultOperandIndices() {
-  return {0};  // target
+  return {0}; // target
 }
 
 void AsyncCollectiveOp::getAsyncAccessRanges(
@@ -1404,13 +1419,13 @@
   return IREE::Util::TiedOpInterface::findTiedBaseValue(getTarget());
 }
 
-::std::optional<unsigned> AsyncStoreOp::getTiedResultOperandIndex(
-    unsigned resultIndex) {
-  return {0};  // target
+::std::optional<unsigned>
+AsyncStoreOp::getTiedResultOperandIndex(unsigned resultIndex) {
+  return {0}; // target
 }
 
 SmallVector<int64_t> AsyncStoreOp::getTiedResultOperandIndices() {
-  return {0};  // target
+  return {0}; // target
 }
 
 //===----------------------------------------------------------------------===//
@@ -1423,13 +1438,16 @@
     SmallVectorImpl<OpAsmParser::UnresolvedOperand> &resourceOffsets,
     SmallVectorImpl<OpAsmParser::UnresolvedOperand> &resourceEnds,
     SmallVectorImpl<OpAsmParser::UnresolvedOperand> &resourceLengths) {
-  if (failed(parser.parseLParen())) return failure();
+  if (failed(parser.parseLParen()))
+    return failure();
   // Handle the case of no operands specially.
-  if (succeeded(parser.parseOptionalRParen())) return success();
+  if (succeeded(parser.parseOptionalRParen()))
+    return success();
   do {
     // All entries at least have an %operand.
     resourceOperands.emplace_back();
-    if (failed(parser.parseOperand(resourceOperands.back()))) return failure();
+    if (failed(parser.parseOperand(resourceOperands.back())))
+      return failure();
     // Resources have a range.
     if (succeeded(parser.parseOptionalLSquare())) {
       resourceOffsets.emplace_back();
@@ -1445,7 +1463,8 @@
       }
     }
   } while (succeeded(parser.parseOptionalComma()));
-  if (failed(parser.parseRParen())) return failure();
+  if (failed(parser.parseRParen()))
+    return failure();
   return success();
 }
 
@@ -1499,8 +1518,8 @@
   return success();
 }
 
-LogicalResult AsyncDispatchOp::verifySymbolUses(
-    SymbolTableCollection &symbolTable) {
+LogicalResult
+AsyncDispatchOp::verifySymbolUses(SymbolTableCollection &symbolTable) {
   Operation *op = getOperation();
   auto exportOp =
       symbolTable.lookupNearestSymbolFrom<IREE::Stream::ExecutableExportOp>(
@@ -1526,7 +1545,7 @@
 }
 
 std::pair<unsigned, unsigned> AsyncDispatchOp::getTiedOperandsIndexAndLength() {
-  return getODSOperandIndexAndLength(1);  // $operands
+  return getODSOperandIndexAndLength(1); // $operands
 }
 
 void AsyncDispatchOp::getAsyncAccessRanges(
@@ -1534,7 +1553,8 @@
   unsigned rangeIndex = 0;
   unsigned tiedOperandBase = getTiedOperandsIndexAndLength().first;
   for (auto [operandIndex, operand] : llvm::enumerate(getResourceOperands())) {
-    if (!llvm::isa<IREE::Stream::ResourceType>(operand.getType())) continue;
+    if (!llvm::isa<IREE::Stream::ResourceType>(operand.getType()))
+      continue;
     ResourceAccessBitfield access = ResourceAccessBitfield::Read;
     auto tiedResults = getOperandTiedResults(tiedOperandBase + operandIndex);
     if (!tiedResults.empty()) {
@@ -1602,10 +1622,12 @@
 
 bool AsyncFuncOp::isResultTied(int resultIndex) {
   auto tiedOperandsAttr = getTiedOperandsAttr();
-  if (!tiedOperandsAttr) return false;
+  if (!tiedOperandsAttr)
+    return false;
   auto indexAttr = llvm::dyn_cast_if_present<IntegerAttr>(
       tiedOperandsAttr.getValue()[resultIndex]);
-  if (!indexAttr) return false;
+  if (!indexAttr)
+    return false;
   return indexAttr.getInt() != IREE::Util::TiedOpInterface::kUntiedIndex;
 }
 
@@ -1655,8 +1677,8 @@
   return success();
 }
 
-LogicalResult AsyncCallOp::verifySymbolUses(
-    SymbolTableCollection &symbolTable) {
+LogicalResult
+AsyncCallOp::verifySymbolUses(SymbolTableCollection &symbolTable) {
   Operation *op = getOperation();
   auto calleeOp =
       symbolTable.lookupNearestSymbolFrom<IREE::Stream::AsyncFuncOp>(
@@ -1674,7 +1696,8 @@
            << expectedType << " but callee is " << calleeType;
   }
   auto typesCompatible = [](Type actual, Type expected) {
-    if (actual == expected) return true;
+    if (actual == expected)
+      return true;
     auto calleeResource = llvm::dyn_cast<IREE::Stream::ResourceType>(actual);
     auto expectedResource =
         llvm::dyn_cast<IREE::Stream::ResourceType>(expected);
@@ -1713,7 +1736,7 @@
 }
 
 std::pair<unsigned, unsigned> AsyncCallOp::getTiedOperandsIndexAndLength() {
-  return getODSOperandIndexAndLength(0);  // $operands
+  return getODSOperandIndexAndLength(0); // $operands
 }
 
 void AsyncCallOp::getAsyncAccessRanges(
@@ -1721,7 +1744,8 @@
   unsigned rangeIndex = 0;
   unsigned tiedOperandBase = getTiedOperandsIndexAndLength().first;
   for (auto [operandIndex, operand] : llvm::enumerate(getResourceOperands())) {
-    if (!llvm::isa<IREE::Stream::ResourceType>(operand.getType())) continue;
+    if (!llvm::isa<IREE::Stream::ResourceType>(operand.getType()))
+      continue;
     ResourceAccessBitfield access = ResourceAccessBitfield::Read;
     auto tiedResults = getOperandTiedResults(tiedOperandBase + operandIndex);
     if (!tiedResults.empty()) {
@@ -1763,7 +1787,8 @@
   state.addOperands(operands);
   state.addOperands(operandSizes);
   state.addOperands(resultSizes);
-  if (awaitTimepoint) state.addOperands(awaitTimepoint);
+  if (awaitTimepoint)
+    state.addOperands(awaitTimepoint);
   state.addAttributes(attributes);
   state.attributes.erase(IREE::Util::TiedOpInterface::getStorageAttrName());
   state.addAttribute(IREE::Util::TiedOpInterface::getStorageAttrName(),
@@ -1798,8 +1823,8 @@
   return {0, getResults().size()};
 }
 
-OperandRange AsyncExecuteOp::getSuccessorEntryOperands(
-    std::optional<unsigned> index) {
+OperandRange
+AsyncExecuteOp::getSuccessorEntryOperands(std::optional<unsigned> index) {
   assert(index && index.value() == 0 && "invalid region index");
   return getResourceOperands();
 }
@@ -1820,13 +1845,15 @@
 // Gets the async access ranges for the generic stream execution op capturing
 // resources.
 template <typename Op>
-static void getExecutionAsyncAccessRanges(
-    Op op, SmallVectorImpl<AsyncAccessRange> &ranges) {
+static void
+getExecutionAsyncAccessRanges(Op op,
+                              SmallVectorImpl<AsyncAccessRange> &ranges) {
   unsigned tiedOperandBase = op.getTiedOperandsIndexAndLength().first;
   for (auto [i, operand, operandSize] : llvm::zip_equal(
            llvm::seq<unsigned>(0, op.getResourceOperands().size()),
            op.getResourceOperands(), op.getResourceOperandSizes())) {
-    if (!llvm::isa<IREE::Stream::ResourceType>(operand.getType())) continue;
+    if (!llvm::isa<IREE::Stream::ResourceType>(operand.getType()))
+      continue;
     ResourceAccessBitfield access = ResourceAccessBitfield::Read;
     auto tiedResults = op.getOperandTiedResults(tiedOperandBase + i);
     if (!tiedResults.empty()) {
@@ -1864,8 +1891,8 @@
 
 bool AsyncExecuteOp::canClosureContainOp(Operation *op) { return false; }
 
-IREE::Util::ValueAccess AsyncExecuteOp::getOperandAccess(
-    unsigned operandIndex) {
+IREE::Util::ValueAccess
+AsyncExecuteOp::getOperandAccess(unsigned operandIndex) {
   auto arg = getBody().getArgument(operandIndex);
   return computeValueAccess(arg);
 }
@@ -1904,7 +1931,8 @@
 
   auto &block = newBody.front();
   BitVector eraseIndices(block.getNumArguments());
-  for (auto i : excludedOperandIndices) eraseIndices.set(i);
+  for (auto i : excludedOperandIndices)
+    eraseIndices.set(i);
   block.eraseArguments(eraseIndices);
   return newOp;
 }
@@ -1951,8 +1979,8 @@
   return success();
 }
 
-OperandRange AsyncConcurrentOp::getSuccessorEntryOperands(
-    std::optional<unsigned> index) {
+OperandRange
+AsyncConcurrentOp::getSuccessorEntryOperands(std::optional<unsigned> index) {
   assert(index && index.value() == 0 && "invalid region index");
   return getResourceOperands();
 }
@@ -1985,14 +2013,14 @@
 
 bool AsyncConcurrentOp::canClosureContainOp(Operation *op) { return false; }
 
-IREE::Util::ValueAccess AsyncConcurrentOp::getOperandAccess(
-    unsigned operandIndex) {
+IREE::Util::ValueAccess
+AsyncConcurrentOp::getOperandAccess(unsigned operandIndex) {
   auto arg = getBody().getArgument(operandIndex);
   return computeValueAccess(arg);
 }
 
-IREE::Util::ValueAccess AsyncConcurrentOp::getResultAccess(
-    unsigned resultIndex) {
+IREE::Util::ValueAccess
+AsyncConcurrentOp::getResultAccess(unsigned resultIndex) {
   auto yieldOp = cast<YieldOp>(getBody().getBlocks().front().getTerminator());
   return computeValueAccess(yieldOp.getOperand(resultIndex));
 }
@@ -2023,7 +2051,8 @@
   eraseStreamRegionResults(newBody, excludedResultIndices);
   auto &block = newBody.front();
   BitVector eraseIndices(block.getNumArguments());
-  for (auto i : excludedOperandIndices) eraseIndices.set(i);
+  for (auto i : excludedOperandIndices)
+    eraseIndices.set(i);
   block.eraseArguments(eraseIndices);
   return newOp;
 }
@@ -2110,19 +2139,19 @@
       IREE::Stream::ResourceAccessBitfield::None,
   };
   switch (getOp().getKind()) {
-    default:
-      requiredCount = 2;  // send & recv
-      requiredAccess[0] = IREE::Stream::ResourceAccessBitfield::Read;
-      requiredAccess[1] = IREE::Stream::ResourceAccessBitfield::Write;
-      break;
-    case IREE::Stream::CollectiveKind::Send:
-      requiredCount = 1;  // send
-      requiredAccess[0] = IREE::Stream::ResourceAccessBitfield::Read;
-      break;
-    case IREE::Stream::CollectiveKind::Recv:
-      requiredCount = 1;  // recv
-      requiredAccess[0] = IREE::Stream::ResourceAccessBitfield::Write;
-      break;
+  default:
+    requiredCount = 2; // send & recv
+    requiredAccess[0] = IREE::Stream::ResourceAccessBitfield::Read;
+    requiredAccess[1] = IREE::Stream::ResourceAccessBitfield::Write;
+    break;
+  case IREE::Stream::CollectiveKind::Send:
+    requiredCount = 1; // send
+    requiredAccess[0] = IREE::Stream::ResourceAccessBitfield::Read;
+    break;
+  case IREE::Stream::CollectiveKind::Recv:
+    requiredCount = 1; // recv
+    requiredAccess[0] = IREE::Stream::ResourceAccessBitfield::Write;
+    break;
   }
   if (resourceCount != requiredCount) {
     return op->emitOpError()
@@ -2161,8 +2190,8 @@
   return success();
 }
 
-LogicalResult CmdDispatchOp::verifySymbolUses(
-    SymbolTableCollection &symbolTable) {
+LogicalResult
+CmdDispatchOp::verifySymbolUses(SymbolTableCollection &symbolTable) {
   Operation *op = getOperation();
   auto entryPointRefs = getEntryPointRefs();
   if (entryPointRefs.empty()) {
@@ -2199,13 +2228,16 @@
   if (succeeded(parser.parseOptionalLBrace())) {
     do {
       SymbolRefAttr entryPointAttr;
-      if (failed(parser.parseAttribute(entryPointAttr))) return failure();
+      if (failed(parser.parseAttribute(entryPointAttr)))
+        return failure();
       entryPointAttrs.push_back(entryPointAttr);
     } while (succeeded(parser.parseOptionalComma()));
-    if (failed(parser.parseRBrace())) return failure();
+    if (failed(parser.parseRBrace()))
+      return failure();
   } else {
     SymbolRefAttr entryPointAttr;
-    if (failed(parser.parseAttribute(entryPointAttr))) return failure();
+    if (failed(parser.parseAttribute(entryPointAttr)))
+      return failure();
     entryPointAttrs.push_back(entryPointAttr);
   }
   entryPointAttrsArray = parser.getBuilder().getArrayAttr(entryPointAttrs);
@@ -2269,13 +2301,11 @@
   return success();
 }
 
-static void printDispatchResources(OpAsmPrinter &p, Operation *op,
-                                   ValueRange resources,
-                                   TypeRange resourceTypes,
-                                   ValueRange resourceSizes,
-                                   ValueRange resourceOffsets,
-                                   ValueRange resourceLengths,
-                                   ArrayAttr resourceAccesses) {
+static void
+printDispatchResources(OpAsmPrinter &p, Operation *op, ValueRange resources,
+                       TypeRange resourceTypes, ValueRange resourceSizes,
+                       ValueRange resourceOffsets, ValueRange resourceLengths,
+                       ArrayAttr resourceAccesses) {
   for (size_t i = 0; i < resources.size(); ++i) {
     auto resource = resources[i];
     auto resourceType = resourceTypes[i];
@@ -2307,18 +2337,20 @@
     p.printOperand(resourceLength);
     p << "] : ";
     printSizeAwareType(p, op, resourceType, resourceSize);
-    if (i < resources.size() - 1) p << ",";
+    if (i < resources.size() - 1)
+      p << ",";
   }
 }
 
 // This is sloppy because the function has interleaved bindings and operands;
 // if we had our own op we could just reuse the map we have for operands.
 // static
-SmallVector<unsigned> CmdDispatchOp::makeOperandToArgMap(
-    mlir::func::FuncOp funcOp) {
-  unsigned operandCount = llvm::count_if(
-      funcOp.getArgumentTypes(),
-      [](Type type) { return !llvm::isa<IREE::Stream::BindingType>(type); });
+SmallVector<unsigned>
+CmdDispatchOp::makeOperandToArgMap(mlir::func::FuncOp funcOp) {
+  unsigned operandCount =
+      llvm::count_if(funcOp.getArgumentTypes(), [](Type type) {
+        return !llvm::isa<IREE::Stream::BindingType>(type);
+      });
   SmallVector<unsigned> map(operandCount);
   unsigned operandIdx = 0;
   for (auto it : llvm::enumerate(funcOp.getArgumentTypes())) {
@@ -2332,11 +2364,12 @@
 }
 
 // static
-SmallVector<unsigned> CmdDispatchOp::makeResourceToArgMap(
-    mlir::func::FuncOp funcOp) {
-  unsigned operandCount = llvm::count_if(
-      funcOp.getArgumentTypes(),
-      [](Type type) { return llvm::isa<IREE::Stream::BindingType>(type); });
+SmallVector<unsigned>
+CmdDispatchOp::makeResourceToArgMap(mlir::func::FuncOp funcOp) {
+  unsigned operandCount =
+      llvm::count_if(funcOp.getArgumentTypes(), [](Type type) {
+        return llvm::isa<IREE::Stream::BindingType>(type);
+      });
   SmallVector<unsigned> map(operandCount);
   unsigned operandIdx = 0;
   for (auto it : llvm::enumerate(funcOp.getArgumentTypes())) {
@@ -2395,7 +2428,8 @@
   SmallVector<Attribute> argAttrsVec;
   do {
     OpAsmParser::UnresolvedOperand arg;
-    if (failed(parser.parseOperand(arg))) return failure();
+    if (failed(parser.parseOperand(arg)))
+      return failure();
     bool hasOffsetLength = false;
     OpAsmParser::UnresolvedOperand offsetArg;
     OpAsmParser::UnresolvedOperand lengthArg;
@@ -2433,9 +2467,10 @@
   return success();
 }
 
-static ParseResult parseDispatchFunctionResultList(
-    OpAsmParser &parser, SmallVectorImpl<Type> &resultTypes,
-    ArrayAttr &resultAttrs) {
+static ParseResult
+parseDispatchFunctionResultList(OpAsmParser &parser,
+                                SmallVectorImpl<Type> &resultTypes,
+                                ArrayAttr &resultAttrs) {
   SmallVector<Attribute> resultAttrsVec;
   SmallVector<int64_t> tiedOperandIndices;
   do {
@@ -2469,7 +2504,8 @@
         p.printOptionalAttrDict(attrs.getValue());
       }
     }
-    if (i < resultTypes.size() - 1) p << ", ";
+    if (i < resultTypes.size() - 1)
+      p << ", ";
   }
 }
 
@@ -2480,7 +2516,8 @@
   SmallVector<OpAsmParser::UnresolvedOperand> args;
   SmallVector<Type> argTypes;
   SmallVector<Type> resultTypes;
-  if (failed(parser.parseLParen())) return failure();
+  if (failed(parser.parseLParen()))
+    return failure();
   if (failed(parser.parseOptionalRParen())) {
     if (failed(parseDispatchFunctionArgumentList(parser, args, argTypes,
                                                  argAttrs)) ||
@@ -2513,7 +2550,8 @@
   auto functionType = llvm::cast<FunctionType>(functionTypeAttr.getValue());
   p << "(";
   for (size_t argIndex = 0; argIndex < functionType.getNumInputs();) {
-    if (argIndex) p << ", ";
+    if (argIndex)
+      p << ", ";
     int baseArgIndex = argIndex;
     auto type = functionType.getInput(baseArgIndex);
     p << "%arg";
@@ -2521,9 +2559,9 @@
     if (llvm::isa<IREE::Stream::ResourceType>(type)) {
       p << "[%arg" << (baseArgIndex + 1) << " for %arg" << (baseArgIndex + 2)
         << "]";
-      argIndex += 3;  // <resource, offset, length>
+      argIndex += 3; // <resource, offset, length>
     } else {
-      argIndex += 1;  // unmodified arg
+      argIndex += 1; // unmodified arg
     }
     p << ": ";
     p.printType(type);
@@ -2539,9 +2577,11 @@
   auto resultTypes = functionType.getResults();
   if (!resultTypes.empty()) {
     p << " -> ";
-    if (resultTypes.size() != 1) p << "(";
+    if (resultTypes.size() != 1)
+      p << "(";
     printDispatchFunctionResultList(p, op, resultTypes, resultAttrs);
-    if (resultTypes.size() != 1) p << ")";
+    if (resultTypes.size() != 1)
+      p << ")";
   }
 }
 
@@ -2603,9 +2643,11 @@
     SmallVectorImpl<OpAsmParser::UnresolvedOperand> &resourceOffsets,
     SmallVectorImpl<OpAsmParser::UnresolvedOperand> &resourceLengths,
     ArrayAttr &resourceAccesses) {
-  if (failed(parser.parseLParen())) return failure();
+  if (failed(parser.parseLParen()))
+    return failure();
   // Handle the case of no operands specially.
-  if (succeeded(parser.parseOptionalRParen())) return success();
+  if (succeeded(parser.parseOptionalRParen()))
+    return success();
   SmallVector<Attribute> accessAttrs;
   do {
     StringRef accessStr;
@@ -2644,7 +2686,8 @@
     }
   } while (succeeded(parser.parseOptionalComma()));
   resourceAccesses = parser.getBuilder().getArrayAttr(accessAttrs);
-  if (failed(parser.parseRParen())) return failure();
+  if (failed(parser.parseRParen()))
+    return failure();
   return success();
 }
 
@@ -2690,7 +2733,8 @@
       // Primitive/custom type.
       p.printOperand(operand);
     }
-    if (i < resourceOperands.size() - 1) p << ", ";
+    if (i < resourceOperands.size() - 1)
+      p << ", ";
   }
   p << ")";
 }
@@ -2706,7 +2750,8 @@
   state.addTypes(IREE::Stream::TimepointType::get(builder.getContext()));
   state.addOperands(operands);
   state.addOperands(operandSizes);
-  if (awaitTimepoint) state.addOperands(awaitTimepoint);
+  if (awaitTimepoint)
+    state.addOperands(awaitTimepoint);
   state.addAttributes(attributes);
   state.attributes.erase(getOperandSegmentSizeAttr());
   state.addAttribute(getOperandSegmentSizeAttr(),
@@ -2740,13 +2785,14 @@
     return failure();
   }
   for (auto &nestedOp : op.getBody().front()) {
-    if (failed(verifyCmdOp(&nestedOp))) return failure();
+    if (failed(verifyCmdOp(&nestedOp)))
+      return failure();
   }
   return success();
 }
 
-OperandRange CmdExecuteOp::getSuccessorEntryOperands(
-    std::optional<unsigned> index) {
+OperandRange
+CmdExecuteOp::getSuccessorEntryOperands(std::optional<unsigned> index) {
   assert(index && index.value() == 0 && "invalid region index");
   return getResourceOperands();
 }
@@ -2802,7 +2848,8 @@
   newBody.takeBody(getClosureBodyRegion());
   auto &block = newBody.front();
   BitVector eraseIndices(block.getNumArguments());
-  for (auto i : excludedOperandIndices) eraseIndices.set(i);
+  for (auto i : excludedOperandIndices)
+    eraseIndices.set(i);
   block.eraseArguments(eraseIndices);
   return newOp;
 }
@@ -2814,7 +2861,8 @@
 LogicalResult CmdSerialOp::verify() {
   CmdSerialOp op = *this;
   for (auto &nestedOp : op.getBody().front()) {
-    if (failed(verifyCmdOp(&nestedOp))) return failure();
+    if (failed(verifyCmdOp(&nestedOp)))
+      return failure();
   }
   return success();
 }
@@ -2839,7 +2887,8 @@
 LogicalResult CmdConcurrentOp::verify() {
   CmdConcurrentOp op = *this;
   for (auto &nestedOp : op.getBody().front()) {
-    if (failed(verifyCmdOp(&nestedOp))) return failure();
+    if (failed(verifyCmdOp(&nestedOp)))
+      return failure();
   }
   return success();
 }
@@ -2884,8 +2933,8 @@
   return IREE::Util::TiedOpInterface::findTiedBaseValue(getResource());
 }
 
-::std::optional<unsigned> TimepointBarrierOp::getTiedResultOperandIndex(
-    unsigned resultIndex) {
+::std::optional<unsigned>
+TimepointBarrierOp::getTiedResultOperandIndex(unsigned resultIndex) {
   return {0};
 }
 
@@ -2917,7 +2966,7 @@
                      builder.getDenseI32ArrayAttr({
                          static_cast<int32_t>(operands.size()),
                          static_cast<int32_t>(operandSizes.size()),
-                         static_cast<int32_t>(1),  // timepoint
+                         static_cast<int32_t>(1), // timepoint
                      }));
 }
 
@@ -2932,8 +2981,8 @@
   return success();
 }
 
-::std::optional<unsigned> TimepointAwaitOp::getTiedResultOperandIndex(
-    unsigned resultIndex) {
+::std::optional<unsigned>
+TimepointAwaitOp::getTiedResultOperandIndex(unsigned resultIndex) {
   return {resultIndex};
 }
 
@@ -3023,7 +3072,8 @@
 ::mlir::func::FuncOp ExecutableExportOp::lookupFunctionRef() {
   auto executableOp =
       this->getOperation()->getParentOfType<IREE::Stream::ExecutableOp>();
-  if (!executableOp) return {};
+  if (!executableOp)
+    return {};
   return executableOp.getInnerModule().lookupSymbol<::mlir::func::FuncOp>(
       getFunctionRef());
 }
@@ -3047,8 +3097,8 @@
 // stream.return
 //===----------------------------------------------------------------------===//
 
-MutableOperandRange ReturnOp::getMutableSuccessorOperands(
-    std::optional<unsigned> index) {
+MutableOperandRange
+ReturnOp::getMutableSuccessorOperands(std::optional<unsigned> index) {
   return getOperandsMutable();
 }
 
@@ -3056,19 +3106,19 @@
 // stream.yield
 //===----------------------------------------------------------------------===//
 
-MutableOperandRange YieldOp::getMutableSuccessorOperands(
-    std::optional<unsigned> index) {
+MutableOperandRange
+YieldOp::getMutableSuccessorOperands(std::optional<unsigned> index) {
   return getResourceOperandsMutable();
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
 //===----------------------------------------------------------------------===//
 // TableGen definitions (intentionally last)
 //===----------------------------------------------------------------------===//
 
 #define GET_OP_CLASSES
-#include "iree/compiler/Dialect/Stream/IR/StreamOps.cpp.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/Stream/IR/StreamOps.cpp.inc" // IWYU pragma: keep

diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.h b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.h
index 8b17a0f..da8f900 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.h
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.h

@@ -28,6 +28,6 @@
 #include "mlir/Interfaces/ViewLikeInterface.h"
 
 #define GET_OP_CLASSES
-#include "iree/compiler/Dialect/Stream/IR/StreamOps.h.inc"  // IWYU pragma: export
+#include "iree/compiler/Dialect/Stream/IR/StreamOps.h.inc" // IWYU pragma: export
 
-#endif  // IREE_COMPILER_DIALECT_STREAM_IR_STREAMOPS_H_
+#endif // IREE_COMPILER_DIALECT_STREAM_IR_STREAMOPS_H_

diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamTraits.h b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamTraits.h
index cc79a6d..45c925d 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamTraits.h
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamTraits.h

@@ -16,25 +16,25 @@
 
 template <typename ConcreteType>
 class TensorPhaseOp : public OpTrait::TraitBase<ConcreteType, TensorPhaseOp> {
- public:
+public:
   static LogicalResult verifyTrait(Operation *op) { return success(); }
 };
 
 template <typename ConcreteType>
 class AsyncPhaseOp : public OpTrait::TraitBase<ConcreteType, AsyncPhaseOp> {
- public:
+public:
   static LogicalResult verifyTrait(Operation *op) { return success(); }
 };
 
 template <typename ConcreteType>
 class CmdPhaseOp : public OpTrait::TraitBase<ConcreteType, CmdPhaseOp> {
- public:
+public:
   static LogicalResult verifyTrait(Operation *op) { return success(); }
 };
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace OpTrait
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace OpTrait
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_STREAM_IR_STREAMTRAITS_H_
+#endif // IREE_COMPILER_DIALECT_STREAM_IR_STREAMTRAITS_H_

diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamTypes.cpp b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamTypes.cpp
index 8a922ca..2c25108 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamTypes.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamTypes.cpp

@@ -14,10 +14,10 @@
 
 // clang-format off: must be included after all LLVM/MLIR headers.
 #define GET_ATTRDEF_CLASSES
-#include "iree/compiler/Dialect/Stream/IR/StreamAttrs.cpp.inc"  // IWYU pragma: keep
-#include "iree/compiler/Dialect/Stream/IR/StreamEnums.cpp.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/Stream/IR/StreamAttrs.cpp.inc" // IWYU pragma: keep
+#include "iree/compiler/Dialect/Stream/IR/StreamEnums.cpp.inc" // IWYU pragma: keep
 #define GET_TYPEDEF_CLASSES
-#include "iree/compiler/Dialect/Stream/IR/StreamTypes.cpp.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/Stream/IR/StreamTypes.cpp.inc" // IWYU pragma: keep
 // clang-format on
 
 namespace mlir {
@@ -75,7 +75,8 @@
 
 // static
 Attribute ResourceConfigAttr::parse(AsmParser &p, Type type) {
-  if (failed(p.parseLess()) || failed(p.parseLBrace())) return {};
+  if (failed(p.parseLess()) || failed(p.parseLBrace()))
+    return {};
 
   int64_t maxAllocationSize = 0;
   int64_t minBufferOffsetAlignment = 0;
@@ -105,7 +106,8 @@
     }
     (void)p.parseOptionalComma();
   }
-  if (failed(p.parseGreater())) return {};
+  if (failed(p.parseGreater()))
+    return {};
 
   return ResourceConfigAttr::get(
       p.getContext(), maxAllocationSize, minBufferOffsetAlignment,
@@ -126,10 +128,13 @@
 }
 
 // static
-ResourceConfigAttr ResourceConfigAttr::intersectBufferConstraints(
-    ResourceConfigAttr lhs, ResourceConfigAttr rhs) {
-  if (!lhs) return rhs;
-  if (!rhs) return lhs;
+ResourceConfigAttr
+ResourceConfigAttr::intersectBufferConstraints(ResourceConfigAttr lhs,
+                                               ResourceConfigAttr rhs) {
+  if (!lhs)
+    return rhs;
+  if (!rhs)
+    return lhs;
   Builder b(lhs.getContext());
   return ResourceConfigAttr::get(
       b.getContext(),
@@ -144,8 +149,8 @@
 }
 
 // static
-ResourceConfigAttr ResourceConfigAttr::getDefaultHostConstraints(
-    MLIRContext *context) {
+ResourceConfigAttr
+ResourceConfigAttr::getDefaultHostConstraints(MLIRContext *context) {
   // Picked to represent what we kind of want on CPU today.
   // We should be able to get rid of queries for this from real programs and
   // only use this during testing by ensuring affinities are always assigned.
@@ -162,13 +167,15 @@
   while (op) {
     // Use an override if specified.
     auto attr = op->getAttrOfType<ResourceConfigAttr>(attrId);
-    if (attr) return attr;
+    if (attr)
+      return attr;
     // See if the affinity specified provides a resource configuration.
     if (auto affinityOp = llvm::dyn_cast<AffinityOpInterface>(op)) {
       auto affinityAttr = affinityOp.getAffinity();
       if (affinityAttr) {
         auto attr = affinityAttr.getResourceConfigAttr();
-        if (attr) return attr;
+        if (attr)
+          return attr;
       }
     }
     op = op->getParentOp();
@@ -183,11 +190,13 @@
 
 Attribute TimepointAttr::parse(AsmParser &p, Type type) {
   StringRef timeStr;
-  if (failed(p.parseLess())) return {};
+  if (failed(p.parseLess()))
+    return {};
   if (failed(p.parseKeyword(&timeStr))) {
     return {};
   }
-  if (failed(p.parseGreater())) return {};
+  if (failed(p.parseGreater()))
+    return {};
   if (timeStr != "immediate") {
     p.emitError(p.getCurrentLocation(),
                 "only immediate timepoint attrs are supported");
@@ -211,19 +220,22 @@
   while (op) {
     if (auto affinityOp = llvm::dyn_cast<AffinityOpInterface>(op)) {
       auto affinity = affinityOp.getAffinity();
-      if (affinity) return affinity;
+      if (affinity)
+        return affinity;
     }
     auto attr = op->getAttrOfType<AffinityAttr>(attrId);
-    if (attr) return attr;
+    if (attr)
+      return attr;
     op = op->getParentOp();
   }
-  return {};  // No affinity found; let caller decide what to do.
+  return {}; // No affinity found; let caller decide what to do.
 }
 
 // static
 bool AffinityAttr::areCompatible(AffinityAttr desiredAffinity,
                                  AffinityAttr requiredAffinity) {
-  if (desiredAffinity == requiredAffinity) return true;
+  if (desiredAffinity == requiredAffinity)
+    return true;
   if ((desiredAffinity && !requiredAffinity) ||
       (requiredAffinity && !desiredAffinity)) {
     return true;
@@ -234,8 +246,10 @@
 
 // static
 bool AffinityAttr::canExecuteTogether(AffinityAttr lhs, AffinityAttr rhs) {
-  if (lhs == rhs) return true;
-  if ((lhs && !rhs) || (rhs && !lhs)) return true;
+  if (lhs == rhs)
+    return true;
+  if ((lhs && !rhs) || (rhs && !lhs))
+    return true;
   return lhs.isExecutableWith(rhs);
 }
 
@@ -245,13 +259,15 @@
 
 Attribute PartitioningConfigAttr::parse(AsmParser &p, Type type) {
   std::string favorStr;
-  if (failed(p.parseLess())) return {};
+  if (failed(p.parseLess()))
+    return {};
   if (succeeded(p.parseOptionalStar())) {
     favorStr = "size";
   } else if (failed(p.parseString(&favorStr))) {
     return {};
   }
-  if (failed(p.parseGreater())) return {};
+  if (failed(p.parseGreater()))
+    return {};
   auto favor = symbolizeFavor(favorStr);
   if (!favor.has_value()) {
     p.emitError(p.getNameLoc(), "unknown favor value: ") << favorStr;
@@ -272,7 +288,8 @@
   auto attrId = StringAttr::get(op->getContext(), "stream.partitioning");
   while (op) {
     auto attr = op->getAttrOfType<PartitioningConfigAttr>(attrId);
-    if (attr) return attr;
+    if (attr)
+      return attr;
     op = op->getParentOp();
   }
   // No config found; use defaults.
@@ -312,13 +329,15 @@
 
 Type ResourceType::parse(AsmParser &p) {
   StringRef lifetimeStr;
-  if (failed(p.parseLess())) return {};
+  if (failed(p.parseLess()))
+    return {};
   if (succeeded(p.parseOptionalStar())) {
     lifetimeStr = "*";
   } else if (failed(p.parseKeyword(&lifetimeStr))) {
     return {};
   }
-  if (failed(p.parseGreater())) return {};
+  if (failed(p.parseGreater()))
+    return {};
   auto lifetime = parseLifetime(lifetimeStr);
   if (!lifetime.has_value()) {
     p.emitError(p.getNameLoc(), "unknown lifetime value: ") << lifetimeStr;
@@ -360,9 +379,9 @@
 // Dialect registration
 //===----------------------------------------------------------------------===//
 
-#include "iree/compiler/Dialect/Stream/IR/StreamAttrInterfaces.cpp.inc"  // IWYU pragma: export
-#include "iree/compiler/Dialect/Stream/IR/StreamOpInterfaces.cpp.inc"  // IWYU pragma: keep
-#include "iree/compiler/Dialect/Stream/IR/StreamTypeInterfaces.cpp.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/Stream/IR/StreamAttrInterfaces.cpp.inc" // IWYU pragma: export
+#include "iree/compiler/Dialect/Stream/IR/StreamOpInterfaces.cpp.inc" // IWYU pragma: keep
+#include "iree/compiler/Dialect/Stream/IR/StreamTypeInterfaces.cpp.inc" // IWYU pragma: keep
 
 void StreamDialect::registerAttributes() {
   // Register command line flags:
@@ -374,18 +393,18 @@
 
   addAttributes<
 #define GET_ATTRDEF_LIST
-#include "iree/compiler/Dialect/Stream/IR/StreamAttrs.cpp.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/Stream/IR/StreamAttrs.cpp.inc" // IWYU pragma: keep
       >();
 }
 
 void StreamDialect::registerTypes() {
   addTypes<
 #define GET_TYPEDEF_LIST
-#include "iree/compiler/Dialect/Stream/IR/StreamTypes.cpp.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/Stream/IR/StreamTypes.cpp.inc" // IWYU pragma: keep
       >();
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamTypes.h b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamTypes.h
index 0c0d239..eae1d47 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamTypes.h
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamTypes.h

@@ -23,7 +23,7 @@
 #include "mlir/Support/LLVM.h"
 
 // clang-format off: must be included after all LLVM/MLIR headers.
-#include "iree/compiler/Dialect/Stream/IR/StreamEnums.h.inc"  // IWYU pragma: export
+#include "iree/compiler/Dialect/Stream/IR/StreamEnums.h.inc" // IWYU pragma: export
 // clang-format on
 
 // It's unfortunate this is required.
@@ -35,10 +35,12 @@
   static FailureOr<mlir::iree_compiler::IREE::Stream::CollectiveReductionOp>
   parse(AsmParser &parser) {
     std::string value;
-    if (parser.parseKeywordOrString(&value)) return failure();
+    if (parser.parseKeywordOrString(&value))
+      return failure();
     auto result = mlir::iree_compiler::IREE::Stream::symbolizeEnum<
         mlir::iree_compiler::IREE::Stream::CollectiveReductionOp>(value);
-    if (!result.has_value()) return failure();
+    if (!result.has_value())
+      return failure();
     return result.value();
   }
 };
@@ -53,30 +55,30 @@
   return printer;
 }
 
-}  // namespace mlir
+} // namespace mlir
 
 // clang-format off: must be included after all LLVM/MLIR headers.
 #define GET_ATTRDEF_CLASSES
-#include "iree/compiler/Dialect/Stream/IR/StreamAttrs.h.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/Stream/IR/StreamAttrs.h.inc" // IWYU pragma: keep
 // clang-format on
 
-#include "iree/compiler/Dialect/Stream/IR/StreamAttrInterfaces.h.inc"  // IWYU pragma: export
+#include "iree/compiler/Dialect/Stream/IR/StreamAttrInterfaces.h.inc" // IWYU pragma: export
 
 namespace mlir {
 namespace iree_compiler {
 namespace IREE {
 namespace Stream {
 
-#include "iree/compiler/Dialect/Stream/IR/StreamTypeInterfaces.h.inc"  // IWYU pragma: export
+#include "iree/compiler/Dialect/Stream/IR/StreamTypeInterfaces.h.inc" // IWYU pragma: export
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
 // clang-format off: must be included after all LLVM/MLIR headers.
 #define GET_TYPEDEF_CLASSES
-#include "iree/compiler/Dialect/Stream/IR/StreamTypes.h.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/Stream/IR/StreamTypes.h.inc" // IWYU pragma: keep
 // clang-format on
 
 namespace mlir {
@@ -87,16 +89,16 @@
 struct AsyncAccessRange {
   ResourceAccessBitfield access;
   Value resource;
-  Value start;  // may be nullptr to indicate 0
+  Value start; // may be nullptr to indicate 0
   Value end;
   Value length;
 };
 
-#include "iree/compiler/Dialect/Stream/IR/StreamOpInterfaces.h.inc"  // IWYU pragma: export
+#include "iree/compiler/Dialect/Stream/IR/StreamOpInterfaces.h.inc" // IWYU pragma: export
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_STREAM_IR_STREAMTYPES_H_
+#endif // IREE_COMPILER_DIALECT_STREAM_IR_STREAMTYPES_H_

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/AnnotateDispatchArguments.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/AnnotateDispatchArguments.cpp
index d23566c..3d9a859 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/AnnotateDispatchArguments.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/AnnotateDispatchArguments.cpp

@@ -41,8 +41,8 @@
 // TODO(benvanik): move to Util/Analysis/ as this would be useful in other
 // passes as well and only depends on util.align and upstream ops.
 
-static std::string getPVSAsStr(
-    const DFX::PotentialConstantIntValuesState &pvs) {
+static std::string
+getPVSAsStr(const DFX::PotentialConstantIntValuesState &pvs) {
   std::string str;
   llvm::raw_string_ostream sstream(str);
   sstream << "pvs: ";
@@ -69,7 +69,7 @@
 class GlobalPVS : public DFX::StateWrapper<
                       DFX::PotentialConstantIntValuesState,
                       DFX::TypedOperationElement<IREE::Util::GlobalOp>> {
- public:
+public:
   using BaseType =
       DFX::StateWrapper<DFX::PotentialConstantIntValuesState,
                         DFX::TypedOperationElement<IREE::Util::GlobalOp>>;
@@ -90,7 +90,7 @@
     return getPVSAsStr(getState());
   }
 
- private:
+private:
   explicit GlobalPVS(const Position &pos) : BaseType(pos) {}
 
   void initializeOperation(IREE::Util::GlobalOp globalOp,
@@ -104,7 +104,7 @@
 
 class ValuePVS : public DFX::StateWrapper<DFX::PotentialConstantIntValuesState,
                                           DFX::ValueElement> {
- public:
+public:
   using BaseType = DFX::StateWrapper<DFX::PotentialConstantIntValuesState,
                                      DFX::ValueElement>;
 
@@ -123,7 +123,7 @@
     return getPVSAsStr(getState());
   }
 
- private:
+private:
   explicit ValuePVS(const Position &pos) : BaseType(pos) {}
 
   void initializeValue(Value value, DFX::Solver &solver) override {
@@ -216,7 +216,8 @@
   auto *globalInfo = solver.getExplorer().getGlobalInfo(globalOp);
   for (auto use : globalInfo->uses) {
     auto storeOp = dyn_cast<IREE::Util::GlobalStoreOpInterface>(use);
-    if (!storeOp) continue;
+    if (!storeOp)
+      continue;
     auto value = solver.getElementFor<ValuePVS>(
         *this, Position::forValue(storeOp.getStoredGlobalValue()),
         DFX::Resolution::REQUIRED);
@@ -235,7 +236,7 @@
 using AlignmentStateType = DFX::IncIntegerState<uint64_t, kMaximumAlignment, 1>;
 class ValueAlignment
     : public DFX::StateWrapper<AlignmentStateType, DFX::ValueElement> {
- public:
+public:
   using BaseType = DFX::StateWrapper<AlignmentStateType, DFX::ValueElement>;
 
   static ValueAlignment &createForPosition(const Position &pos,
@@ -263,7 +264,7 @@
            std::to_string(getAssumedAlignment().valueOrOne().value());
   }
 
- private:
+private:
   explicit ValueAlignment(const Position &pos) : BaseType(pos) {}
 
   void initializeValue(Value value, DFX::Solver &solver) override {
@@ -274,7 +275,8 @@
   }
 
   static llvm::MaybeAlign computeAlignment(const ValuePVS::SetTy &set) {
-    if (set.empty()) return llvm::MaybeAlign();
+    if (set.empty())
+      return llvm::MaybeAlign();
     llvm::MaybeAlign alignment;
     for (auto value : set) {
       APInt valueDivisor = (value & (~(value - 1)));
@@ -326,7 +328,7 @@
 const char ValueAlignment::ID = 0;
 
 class ArgumentAnalysis {
- public:
+public:
   explicit ArgumentAnalysis(Operation *rootOp)
       : explorer(rootOp, TraversalAction::SHALLOW),
         solver(explorer, allocator) {
@@ -372,10 +374,11 @@
 
   // Returns a list of dispatch sites in arbitrary order to the given
   // |exportOp|.
-  ArrayRef<IREE::Stream::CmdDispatchOp> getDispatchSites(
-      IREE::Stream::ExecutableExportOp exportOp) {
+  ArrayRef<IREE::Stream::CmdDispatchOp>
+  getDispatchSites(IREE::Stream::ExecutableExportOp exportOp) {
     auto it = entryDispatchMap.find(exportOp);
-    if (it == entryDispatchMap.end()) return {};
+    if (it == entryDispatchMap.end())
+      return {};
     return it->second;
   }
 
@@ -384,7 +387,8 @@
   llvm::MaybeAlign getAlignmentFor(Value value) {
     auto element =
         solver.lookupElementFor<ValueAlignment>(Position::forValue(value));
-    if (!element) return llvm::MaybeAlign();
+    if (!element)
+      return llvm::MaybeAlign();
     return element->getAssumedAlignment();
   }
 
@@ -396,8 +400,9 @@
 
   // Returns the potential constant values across all dispatch sites to
   // |exportOp| for the operand at |operandIdx|.
-  DFX::PotentialConstantIntValuesState getOperandPVS(
-      IREE::Stream::ExecutableExportOp exportOp, unsigned operandIdx) {
+  DFX::PotentialConstantIntValuesState
+  getOperandPVS(IREE::Stream::ExecutableExportOp exportOp,
+                unsigned operandIdx) {
     DFX::PotentialConstantIntValuesState state;
     for (auto dispatchOp : getDispatchSites(exportOp)) {
       auto element = solver.lookupElementFor<ValuePVS>(
@@ -414,13 +419,15 @@
 
   // Returns the minimum alignment across all dispatch sites to |exportOp| for
   // the operand at |operandIdx|.
-  llvm::MaybeAlign getOperandAlignment(
-      IREE::Stream::ExecutableExportOp exportOp, unsigned operandIdx) {
+  llvm::MaybeAlign
+  getOperandAlignment(IREE::Stream::ExecutableExportOp exportOp,
+                      unsigned operandIdx) {
     llvm::MaybeAlign alignment;
     for (auto dispatchOp : getDispatchSites(exportOp)) {
       auto element = solver.lookupElementFor<ValueAlignment>(
           Position::forValue(dispatchOp.getUniformOperands()[operandIdx]));
-      if (!element || !element->isValidState()) return llvm::MaybeAlign();
+      if (!element || !element->isValidState())
+        return llvm::MaybeAlign();
       alignment = commonAlignment(alignment, element->getAssumedAlignment());
     }
     if (alignment.valueOrOne().value() == kMaximumAlignment) {
@@ -431,13 +438,15 @@
 
   // Returns the minimum alignment across all dispatch sites to |exportOp| for
   // the resource offset at |resourceIdx|.
-  llvm::MaybeAlign getResourceOffsetAlignment(
-      IREE::Stream::ExecutableExportOp exportOp, unsigned resourceIdx) {
+  llvm::MaybeAlign
+  getResourceOffsetAlignment(IREE::Stream::ExecutableExportOp exportOp,
+                             unsigned resourceIdx) {
     llvm::MaybeAlign alignment;
     for (auto dispatchOp : getDispatchSites(exportOp)) {
       auto element = solver.lookupElementFor<ValueAlignment>(
           Position::forValue(dispatchOp.getResourceOffsets()[resourceIdx]));
-      if (!element || !element->isValidState()) return llvm::MaybeAlign();
+      if (!element || !element->isValidState())
+        return llvm::MaybeAlign();
       alignment = commonAlignment(alignment, element->getAssumedAlignment());
     }
     if (alignment.valueOrOne().value() == kMaximumAlignment) {
@@ -449,7 +458,7 @@
     return alignment;
   }
 
- private:
+private:
   Explorer explorer;
   llvm::BumpPtrAllocator allocator;
   DFX::Solver solver;
@@ -495,7 +504,8 @@
       llvm::sort(potentialValues, [](Attribute lhs, Attribute rhs) {
         auto lhsInt = llvm::dyn_cast<IntegerAttr>(lhs);
         auto rhsInt = llvm::dyn_cast<IntegerAttr>(rhs);
-        if (!lhsInt || !rhsInt) return false;
+        if (!lhsInt || !rhsInt)
+          return false;
         return lhsInt.getValue().ult(rhsInt.getValue());
       });
       auto potentialValuesAttr = ArrayAttr::get(context, potentialValues);
@@ -531,7 +541,7 @@
 
 class AnnotateDispatchArgumentsPass
     : public AnnotateDispatchArgumentsBase<AnnotateDispatchArgumentsPass> {
- public:
+public:
   AnnotateDispatchArgumentsPass() = default;
 
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -556,14 +566,14 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>>
 createAnnotateDispatchArgumentsPass() {
   return std::make_unique<AnnotateDispatchArgumentsPass>();
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/ConvertToStream.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/ConvertToStream.cpp
index 549d365..f3b56a1 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/ConvertToStream.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/ConvertToStream.cpp

@@ -115,14 +115,16 @@
 // Returns true if |op| has tensor I/O that is not yet imported/exported using
 // the stream ops that capture encodings and shapes.
 static bool doesOperationNeedWrapping(Operation *op) {
-  return llvm::any_of(
-             op->getOperands(),
-             [&](Value operand) {
-               if (!llvm::isa<TensorType>(operand.getType())) return false;
-               return !isa_and_nonnull<TensorExportOp>(operand.getDefiningOp());
-             }) ||
+  return llvm::any_of(op->getOperands(),
+                      [&](Value operand) {
+                        if (!llvm::isa<TensorType>(operand.getType()))
+                          return false;
+                        return !isa_and_nonnull<TensorExportOp>(
+                            operand.getDefiningOp());
+                      }) ||
          llvm::any_of(op->getResults(), [&](Value result) {
-           if (!llvm::isa<TensorType>(result.getType())) return false;
+           if (!llvm::isa<TensorType>(result.getType()))
+             return false;
            return !llvm::all_of(result.getUsers(), [&](Operation *user) {
              return isa<TensorImportOp>(user);
            });
@@ -134,10 +136,11 @@
 struct GenericResourcePattern : public ConversionPattern {
   GenericResourcePattern(MLIRContext *context, TypeConverter &converter)
       : ConversionPattern(converter, MatchAnyOpTypeTag(), 0, context) {}
-  LogicalResult matchAndRewrite(
-      Operation *op, ArrayRef<Value> operands,
-      ConversionPatternRewriter &rewriter) const override {
-    if (!doesOperationNeedWrapping(op)) return failure();
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
+    if (!doesOperationNeedWrapping(op))
+      return failure();
 
     // Export resources into tensor operands for the op to consume.
     SmallVector<Value> newOperands;
@@ -164,7 +167,8 @@
     rewriter.setInsertionPointAfter(op);
     for (auto result : op->getResults()) {
       auto tensorType = llvm::dyn_cast<TensorType>(result.getType());
-      if (!tensorType) continue;
+      if (!tensorType)
+        continue;
 
       auto dynamicDims =
           IREE::Util::buildDynamicDimsForValue(op->getLoc(), result, rewriter);
@@ -180,7 +184,7 @@
 };
 
 class ConvertToStreamPass : public ConvertToStreamBase<ConvertToStreamPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<mlir::func::FuncDialect>();
     registry.insert<mlir::arith::ArithDialect>();
@@ -265,13 +269,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>> createConvertToStreamPass() {
   return std::make_unique<ConvertToStreamPass>();
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/DumpStatistics.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/DumpStatistics.cpp
index 6b66eea..b493916 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/DumpStatistics.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/DumpStatistics.cpp

@@ -128,17 +128,18 @@
     for (auto [name, globalOp] : usageInfo.resourceGlobalOps) {
       auto globalType =
           llvm::dyn_cast<IREE::Stream::ResourceType>(globalOp.getType());
-      if (!globalType) continue;
+      if (!globalType)
+        continue;
       // TODO(benvanik): analyze size in UsageInfo where possible.
       switch (globalType.getLifetime()) {
-        case IREE::Stream::Lifetime::Constant:
-          ++constantCount;
-          break;
-        case IREE::Stream::Lifetime::Variable:
-          ++variableCount;
-          break;
-        default:
-          continue;
+      case IREE::Stream::Lifetime::Constant:
+        ++constantCount;
+        break;
+      case IREE::Stream::Lifetime::Variable:
+        ++variableCount;
+        break;
+      default:
+        continue;
       }
     }
     for (auto constantOp : usageInfo.bufferConstantOps) {
@@ -332,10 +333,10 @@
     const UsageInfo &usageInfo, IREE::Stream::ExecutableOp executableOp,
     IREE::Stream::ExecutableExportOp exportOp, llvm::raw_fd_ostream &os) {
   auto funcOp = exportOp.lookupFunctionRef();
-  prettyPrintItemHeader(
-      llvm::formatv("stream.executable.export @{0}::@{1}",
-                    executableOp.getName(), exportOp.getName()),
-      os);
+  prettyPrintItemHeader(llvm::formatv("stream.executable.export @{0}::@{1}",
+                                      executableOp.getName(),
+                                      exportOp.getName()),
+                        os);
   os << "// ";
   prettyPrintOpBreadcrumb(funcOp, os);
   os << "\n";
@@ -440,12 +441,14 @@
     TypeSwitch<Operation *>(op)
         .Case<IREE::Stream::CmdSerialOp>([&](auto op) {
           ++depth;
-          for (auto &nestedOp : op.getBody().front()) dumpRow(&nestedOp);
+          for (auto &nestedOp : op.getBody().front())
+            dumpRow(&nestedOp);
           --depth;
         })
         .Case<IREE::Stream::CmdConcurrentOp>([&](auto op) {
           ++depth;
-          for (auto &nestedOp : op.getBody().front()) dumpRow(&nestedOp);
+          for (auto &nestedOp : op.getBody().front())
+            dumpRow(&nestedOp);
           --depth;
         })
         .Case<IREE::Stream::CmdFillOp>([&](auto op) {
@@ -464,7 +467,8 @@
           auto workload = op.getWorkload();
           SmallString<32> workloadStr;
           for (unsigned i = 0; i < workload.size(); ++i) {
-            if (i > 0) workloadStr.append(";");
+            if (i > 0)
+              workloadStr.append(";");
             APInt dimValue;
             if (matchPattern(workload[i], m_ConstantInt(&dimValue))) {
               dimValue.toString(workloadStr, 10, /*signed=*/true);
@@ -566,25 +570,26 @@
 // Opens a canonical |filePath| for text output.
 // An empty path can be used to target stderr and `-` will go to stdout.
 // If the file cannot be opened stderr will be used.
-static std::unique_ptr<llvm::raw_fd_ostream> openOutputFile(
-    StringRef filePath) {
+static std::unique_ptr<llvm::raw_fd_ostream>
+openOutputFile(StringRef filePath) {
   if (filePath.empty()) {
-    return std::make_unique<llvm::raw_fd_ostream>(2, false);  // stderr
+    return std::make_unique<llvm::raw_fd_ostream>(2, false); // stderr
   } else if (filePath == "-") {
-    return std::make_unique<llvm::raw_fd_ostream>(1, false);  // stdout
+    return std::make_unique<llvm::raw_fd_ostream>(1, false); // stdout
   } else {
     std::error_code ec;
     auto result = std::make_unique<llvm::raw_fd_ostream>(
         filePath, ec, llvm::sys::fs::OF_TextWithCRLF);
-    if (!ec) return result;
+    if (!ec)
+      return result;
     llvm::errs() << "Error opening iree-stream-dump-statistics output file '"
                  << filePath << "'\n";
-    return std::make_unique<llvm::raw_fd_ostream>(2, false);  // stderr.
+    return std::make_unique<llvm::raw_fd_ostream>(2, false); // stderr.
   }
 }
 
 class DumpStatisticsPass : public DumpStatisticsBase<DumpStatisticsPass> {
- public:
+public:
   DumpStatisticsPass() = default;
   DumpStatisticsPass(DumpOutputFormat outputFormat, std::string outputFile) {
     this->outputFormat = outputFormat;
@@ -597,7 +602,8 @@
   }
 
   void runOnOperation() override {
-    if (outputFormat == DumpOutputFormat::None) return;
+    if (outputFormat == DumpOutputFormat::None)
+      return;
 
     // Open the output file we'll be streaming to.
     // Since we are processing the entire module at once we overwrite the file.
@@ -609,33 +615,34 @@
     usageInfo.analyze(moduleOp);
 
     switch (outputFormat) {
-      case DumpOutputFormat::Pretty:
-      case DumpOutputFormat::Verbose:
-        prettyPrintUsageInfo(usageInfo,
-                             outputFormat == DumpOutputFormat::Verbose, *os);
-        break;
-      case DumpOutputFormat::CSV:
-        dumpCSVTables(usageInfo, *os);
-        break;
-      case DumpOutputFormat::JSON:
-        dumpJSONStructures(usageInfo, *os);
-        break;
-      default:
-        break;
+    case DumpOutputFormat::Pretty:
+    case DumpOutputFormat::Verbose:
+      prettyPrintUsageInfo(usageInfo, outputFormat == DumpOutputFormat::Verbose,
+                           *os);
+      break;
+    case DumpOutputFormat::CSV:
+      dumpCSVTables(usageInfo, *os);
+      break;
+    case DumpOutputFormat::JSON:
+      dumpJSONStructures(usageInfo, *os);
+      break;
+    default:
+      break;
     }
 
     os->flush();
   }
 };
 
-}  // namespace
+} // namespace
 
-std::unique_ptr<OperationPass<mlir::ModuleOp>> createDumpStatisticsPass(
-    DumpOutputFormat outputFormat, std::string outputFile) {
+std::unique_ptr<OperationPass<mlir::ModuleOp>>
+createDumpStatisticsPass(DumpOutputFormat outputFormat,
+                         std::string outputFile) {
   return std::make_unique<DumpStatisticsPass>(outputFormat, outputFile);
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/ElideAsyncCopies.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/ElideAsyncCopies.cpp
index c37e20f..ab64d37 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/ElideAsyncCopies.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/ElideAsyncCopies.cpp

@@ -45,7 +45,7 @@
 class LastUsers
     : public DFX::StateWrapper<DFX::PotentialValuesState<Operation *>,
                                DFX::ValueElement> {
- public:
+public:
   using BaseType = DFX::StateWrapper<DFX::PotentialValuesState<Operation *>,
                                      DFX::ValueElement>;
 
@@ -71,7 +71,7 @@
     return std::string("last users: ") + std::to_string(getAssumedSet().size());
   }
 
- private:
+private:
   explicit LastUsers(const Position &pos) : BaseType(pos) {}
 
   void initializeValue(Value value, DFX::Solver &solver) override {
@@ -119,7 +119,7 @@
 class ArgumentSemantics
     : public DFX::StateWrapper<DFX::BitIntegerState<uint8_t, 3, 0>,
                                DFX::ValueElement> {
- public:
+public:
   using BaseType =
       DFX::StateWrapper<DFX::BitIntegerState<uint8_t, 3, 0>, DFX::ValueElement>;
 
@@ -165,7 +165,8 @@
   const std::string getAsStr(AsmState &asmState) const override {
     std::string str;
     auto append = [&](const char *part) {
-      if (!str.empty()) str += '|';
+      if (!str.empty())
+        str += '|';
       str += part;
     };
     append(this->isAssumed(NOT_MUTATED) ? "immutable" : "mutable");
@@ -173,7 +174,7 @@
     return str.empty() ? "*" : str;
   }
 
- private:
+private:
   explicit ArgumentSemantics(const Position &pos) : BaseType(pos) {}
 
   // Returns true if |operand| is tied to a result on its owner indicating an
@@ -181,7 +182,8 @@
   static bool isTiedUse(OpOperand &operand) {
     if (auto tiedOp =
             dyn_cast<IREE::Util::TiedOpInterface>(operand.getOwner())) {
-      if (tiedOp.isOperandTied(operand.getOperandNumber())) return true;
+      if (tiedOp.isOperandTied(operand.getOperandNumber()))
+        return true;
     }
     return false;
   }
@@ -286,7 +288,7 @@
 // that to insert stream-ordered deallocs and know when timepoints have been
 // discard as they go out of scope. For now this strictly checks last use.
 class LastUseAnalysis {
- public:
+public:
   explicit LastUseAnalysis(Operation *rootOp)
       : explorer(rootOp, TraversalAction::SHALLOW),
         solver(explorer, allocator) {
@@ -309,7 +311,8 @@
     // Seed all block arguments throughout the program.
     for (auto callableOp : getTopLevelOps()) {
       auto *region = callableOp.getCallableRegion();
-      if (!region) continue;
+      if (!region)
+        continue;
       for (auto &block : *region) {
         for (auto arg : block.getArguments()) {
           if (llvm::isa<IREE::Stream::ResourceType>(arg.getType())) {
@@ -335,7 +338,8 @@
   bool isArgMoved(BlockArgument arg) {
     auto argumentSemantics =
         solver.lookupElementFor<ArgumentSemantics>(Position::forValue(arg));
-    if (!argumentSemantics) return false;
+    if (!argumentSemantics)
+      return false;
     return argumentSemantics->getAssumedByValue();
   }
 
@@ -346,7 +350,7 @@
     return lastUsers.isAssumedLastUser(userOp);
   }
 
- private:
+private:
   Explorer explorer;
   llvm::BumpPtrAllocator allocator;
   DFX::Solver solver;
@@ -431,7 +435,8 @@
 // Returns true if the op was elided.
 static bool tryElideCloneOp(IREE::Stream::AsyncCloneOp cloneOp,
                             LastUseAnalysis &analysis) {
-  if (!isSafeToElideCloneOp(cloneOp, analysis)) return false;
+  if (!isSafeToElideCloneOp(cloneOp, analysis))
+    return false;
   cloneOp.replaceAllUsesWith(cloneOp.getSource());
   cloneOp.erase();
   return true;
@@ -445,7 +450,8 @@
   for (auto &block : region) {
     for (auto cloneOp : llvm::make_early_inc_range(
              block.getOps<IREE::Stream::AsyncCloneOp>())) {
-      if (!isSafeToElideCloneOp(cloneOp, analysis)) continue;
+      if (!isSafeToElideCloneOp(cloneOp, analysis))
+        continue;
       cloneOp.replaceAllUsesWith(cloneOp.getSource());
       cloneOp.erase();
       didChange = true;
@@ -474,7 +480,7 @@
 // copies are elided: we are guaranteed to reach a fixed point as we are only
 // removing copies in this pass and not introducing any new ops.
 class ElideAsyncCopiesPass : public ElideAsyncCopiesBase<ElideAsyncCopiesPass> {
- public:
+public:
   ElideAsyncCopiesPass() = default;
 
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -484,7 +490,8 @@
 
   void runOnOperation() override {
     auto moduleOp = getOperation();
-    if (moduleOp.getBody()->empty()) return;
+    if (moduleOp.getBody()->empty())
+      return;
 
     // Try analyzing the program and eliding the unneeded copies until we reach
     // a fixed point (no more copies can be elided).
@@ -504,10 +511,12 @@
       bool didChange = false;
       for (auto callableOp : analysis.getTopLevelOps()) {
         auto *region = callableOp.getCallableRegion();
-        if (!region) continue;
+        if (!region)
+          continue;
         didChange = tryElideAsyncCopiesInRegion(*region, analysis) || didChange;
       }
-      if (!didChange) break;
+      if (!didChange)
+        break;
     }
     if (iterationCount == maxIterationCount) {
       // If you find yourself hitting this we can evaluate increasing the
@@ -523,13 +532,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>> createElideAsyncCopiesPass() {
   return std::make_unique<ElideAsyncCopiesPass>();
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/ElideTimepoints.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/ElideTimepoints.cpp
index 67e99b3..ff19d69 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/ElideTimepoints.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/ElideTimepoints.cpp

@@ -54,7 +54,7 @@
 class IsGlobalImmediate
     : public DFX::StateWrapper<
           DFX::BooleanState, DFX::TypedOperationElement<IREE::Util::GlobalOp>> {
- public:
+public:
   using BaseType =
       DFX::StateWrapper<DFX::BooleanState,
                         DFX::TypedOperationElement<IREE::Util::GlobalOp>>;
@@ -77,7 +77,7 @@
     return std::string("is_immediate: ") + std::to_string(isAssumed());
   }
 
- private:
+private:
   explicit IsGlobalImmediate(const Position &pos) : BaseType(pos) {}
 
   void initializeOperation(IREE::Util::GlobalOp globalOp,
@@ -125,7 +125,7 @@
 // Boolean state will be set to false if any sources are non-immediate.
 class IsImmediate
     : public DFX::StateWrapper<DFX::BooleanState, DFX::ValueElement> {
- public:
+public:
   using BaseType = DFX::StateWrapper<DFX::BooleanState, DFX::ValueElement>;
 
   static IsImmediate &createForPosition(const Position &pos,
@@ -146,7 +146,7 @@
     return std::string("is_immediate: ") + std::to_string(isAssumed());
   }
 
- private:
+private:
   explicit IsImmediate(const Position &pos) : BaseType(pos) {}
 
   void initializeValue(Value value, DFX::Solver &solver) override {
@@ -310,7 +310,7 @@
 class TimepointCoverage
     : public DFX::StateWrapper<DFX::PotentialValuesState<Value>,
                                DFX::ValueElement> {
- public:
+public:
   using BaseType =
       DFX::StateWrapper<DFX::PotentialValuesState<Value>, DFX::ValueElement>;
 
@@ -351,7 +351,7 @@
     return str;
   }
 
- private:
+private:
   explicit TimepointCoverage(const Position &pos) : BaseType(pos) {}
 
   void initializeValue(Value value, DFX::Solver &solver) override {
@@ -511,7 +511,7 @@
 const char TimepointCoverage::ID = 0;
 
 class TimepointCoverageAnalysis {
- public:
+public:
   explicit TimepointCoverageAnalysis(Operation *rootOp)
       : explorer(rootOp, TraversalAction::SHALLOW),
         solver(explorer, allocator) {
@@ -595,7 +595,8 @@
     };
     for (auto callableOp : getTopLevelOps()) {
       auto *region = callableOp.getCallableRegion();
-      if (!region || region->empty()) continue;
+      if (!region || region->empty())
+        continue;
       seedRegion(*region);
     }
 
@@ -612,7 +613,8 @@
 
   // Returns true if |value| is known to be immediately resolved.
   bool isImmediate(Value value) {
-    if (isDefinedImmediate(value)) return true;
+    if (isDefinedImmediate(value))
+      return true;
     auto &isImmediate =
         solver.getOrCreateElementFor<IsImmediate>(Position::forValue(value));
     return isImmediate.isValidState() && isImmediate.isKnown();
@@ -622,14 +624,15 @@
   bool unionTransitivelyReachedTimepoints(Value value, SetVector<Value> &set) {
     auto coverage = solver.getOrCreateElementFor<TimepointCoverage>(
         Position::forValue(value));
-    if (!coverage.isValidState() || coverage.isUndefContained()) return false;
+    if (!coverage.isValidState() || coverage.isUndefContained())
+      return false;
     for (auto reached : coverage.getAssumedSet()) {
       set.insert(reached);
     }
     return true;
   }
 
- private:
+private:
   Explorer explorer;
   llvm::BumpPtrAllocator allocator;
   DFX::Solver solver;
@@ -638,9 +641,9 @@
 
 // Prunes |possibleTimepoints| into a set of required timepoints.
 // Any timepoints not in the resulting set are required.
-static SetVector<Value> buildRequiredCoverageSet(
-    SmallVector<Value> possibleTimepoints,
-    TimepointCoverageAnalysis &analysis) {
+static SetVector<Value>
+buildRequiredCoverageSet(SmallVector<Value> possibleTimepoints,
+                         TimepointCoverageAnalysis &analysis) {
   // Build a map that effectively tracks an incoming edge counter for each
   // timepoint. Values with no incoming edges are required.
   DenseMap<Value, int> coverageMap;
@@ -654,7 +657,8 @@
     if (isValid) {
       for (auto reachedTimepoint : reachedTimepoints) {
         // TODO(benvanik): avoid self-references so we don't need this check.
-        if (reachedTimepoint == possibleTimepoint) continue;
+        if (reachedTimepoint == possibleTimepoint)
+          continue;
         ++coverageMap[reachedTimepoint];
       }
     }
@@ -706,7 +710,8 @@
   // Elides |elidedTimepoint| by replacing all its uses by |op| with an
   // immediate timepoint value.
   auto elideTimepointOperand = [&](Operation *op, Value elidedTimepoint) {
-    if (isDefinedImmediate(elidedTimepoint)) return;  // already immediate
+    if (isDefinedImmediate(elidedTimepoint))
+      return; // already immediate
     auto immediateTimepoint = makeImmediate(elidedTimepoint, OpBuilder(op));
     elidedTimepoint.replaceUsesWithIf(
         immediateTimepoint,
@@ -717,8 +722,10 @@
   // Elides all timepoint operands of |op| that are immediately resolved.
   auto elideTimepointOperands = [&](Operation *op) {
     for (auto operand : llvm::make_early_inc_range(op->getOperands())) {
-      if (!llvm::isa<IREE::Stream::TimepointType>(operand.getType())) continue;
-      if (isDefinedImmediate(operand)) continue;
+      if (!llvm::isa<IREE::Stream::TimepointType>(operand.getType()))
+        continue;
+      if (isDefinedImmediate(operand))
+        continue;
       if (analysis.isImmediate(operand)) {
         LLVM_DEBUG({
           llvm::dbgs() << "  >>> eliding known-immediate operand ";
@@ -733,8 +740,10 @@
   // Elides |elidedTimepoint| by replacing all its uses with an immediate
   // timepoint value. The original value will end up with zero uses.
   auto elideTimepointResult = [&](Operation *op, Value elidedTimepoint) {
-    if (elidedTimepoint.use_empty()) return;          // no-op
-    if (isDefinedImmediate(elidedTimepoint)) return;  // already immediate
+    if (elidedTimepoint.use_empty())
+      return; // no-op
+    if (isDefinedImmediate(elidedTimepoint))
+      return; // already immediate
     OpBuilder afterBuilder(op);
     afterBuilder.setInsertionPointAfterValue(elidedTimepoint);
     Value immediateTimepoint =
@@ -753,8 +762,10 @@
     //  %imm0 = immediate
     //  %imm1 = immediate
     for (auto result : llvm::reverse(op->getResults())) {
-      if (!llvm::isa<IREE::Stream::TimepointType>(result.getType())) continue;
-      if (isDefinedImmediate(result)) continue;
+      if (!llvm::isa<IREE::Stream::TimepointType>(result.getType()))
+        continue;
+      if (isDefinedImmediate(result))
+        continue;
       if (analysis.isImmediate(result)) {
         LLVM_DEBUG({
           llvm::dbgs() << "  >>> eliding known-immediate result ";
@@ -772,7 +783,8 @@
   auto processTimelineOp = [&](IREE::Stream::TimelineOpInterface op) {
     auto resultTimepoint = op.getResultTimepoint();
     auto awaitTimepoints = op.getAwaitTimepoints();
-    if (awaitTimepoints.empty()) return;
+    if (awaitTimepoints.empty())
+      return;
 
     LLVM_DEBUG({
       llvm::dbgs() << "[ElideTimepoints] pruning " << op->getName()
@@ -819,7 +831,8 @@
     }
 
     // If there's only one timepoint we don't have to worry with coverage.
-    if (possibleTimepoints.size() <= 1) return;
+    if (possibleTimepoints.size() <= 1)
+      return;
 
     // Perform the analysis on the possible timepoints to find which are covered
     // by others and elide all of those known-covered.
@@ -890,7 +903,7 @@
 //   %timepoint1 = ... await(%timepoint0)
 //   %timepoint2 = %timepoint1
 class ElideTimepointsPass : public ElideTimepointsBase<ElideTimepointsPass> {
- public:
+public:
   ElideTimepointsPass() = default;
 
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -900,7 +913,8 @@
 
   void runOnOperation() override {
     auto moduleOp = getOperation();
-    if (moduleOp.getBody()->empty()) return;
+    if (moduleOp.getBody()->empty())
+      return;
 
     // Perform whole-program analysis to find for each timepoint what other
     // timepoints are known to be reached.
@@ -917,21 +931,23 @@
     // fixed-point iteration continues.
     for (auto callableOp : analysis.getTopLevelOps()) {
       auto *region = callableOp.getCallableRegion();
-      if (!region || region->empty()) continue;
+      if (!region || region->empty())
+        continue;
       didChange = tryElideTimepointsInRegion(*region, analysis) || didChange;
     }
 
-    if (didChange) signalFixedPointModified(moduleOp);
+    if (didChange)
+      signalFixedPointModified(moduleOp);
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>> createElideTimepointsPass() {
   return std::make_unique<ElideTimepointsPass>();
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/EmplaceAllocations.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/EmplaceAllocations.cpp
index 13c6639..d93d604 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/EmplaceAllocations.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/EmplaceAllocations.cpp

@@ -59,11 +59,13 @@
   for (auto [resultIndex, result] : llvm::enumerate(dispatchOp.getResults())) {
     // Ignore results with multiple users. We could potentially place these but
     // that makes tracking much more complicated.
-    if (!result.hasOneUse()) continue;
+    if (!result.hasOneUse())
+      continue;
     // Ignore already-tied operands.
     // TODO(benvanik): update tied range if we want to place into a superset?
     auto operandIndex = dispatchOp.getTiedResultOperandIndex(resultIndex);
-    if (operandIndex.has_value()) continue;
+    if (operandIndex.has_value())
+      continue;
 
     // Find potential.
     Value targetResource;
@@ -75,7 +77,8 @@
     Value targetResultSize;
     Operation *userOp = *result.user_begin();
     if (auto updateOp = dyn_cast<IREE::Stream::AsyncUpdateOp>(userOp)) {
-      if (updateOp.getUpdate() != result) continue;
+      if (updateOp.getUpdate() != result)
+        continue;
       if (!IREE::Util::tryMoveProducerBefore(updateOp.getTarget(),
                                              dispatchOp)) {
         // Failed to move while keeping valid SSA dominance.
@@ -95,7 +98,8 @@
       targetResult = updateOp.getResult();
       targetResultSize = updateOp.getTargetSize();
     }
-    if (!targetResource) continue;
+    if (!targetResource)
+      continue;
 
     // Add operand and tie the result.
     operandIndex = dispatchOp.getResourceOperands().size();
@@ -126,7 +130,8 @@
   bool didChange = false;
   for (auto &block : region.getBlocks()) {
     for (auto &op : block) {
-      if (!op.hasTrait<OpTrait::IREE::Stream::AsyncPhaseOp>()) continue;
+      if (!op.hasTrait<OpTrait::IREE::Stream::AsyncPhaseOp>())
+        continue;
       // TODO(benvanik): support placement for more ops e.g. copies/collectives.
       didChange = TypeSwitch<Operation *, bool>(&op)
                       // TODO(#11249): support in-place collective ops.
@@ -145,7 +150,7 @@
 
 class EmplaceAllocationsPass
     : public EmplaceAllocationsBase<EmplaceAllocationsPass> {
- public:
+public:
   EmplaceAllocationsPass() = default;
 
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -165,13 +170,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<>> createEmplaceAllocationsPass() {
   return std::make_unique<EmplaceAllocationsPass>();
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/EncodeTensors.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/EncodeTensors.cpp
index d15a9db..ec196de 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/EncodeTensors.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/EncodeTensors.cpp

@@ -58,7 +58,8 @@
 static RankedTensorType alignTensorType(RankedTensorType originalType) {
   Type elementType = originalType.getElementType();
   Type alignedType = legalizeStorageElementType(elementType);
-  if (alignedType == elementType) return originalType;
+  if (alignedType == elementType)
+    return originalType;
   return RankedTensorType::get(originalType.getShape(), alignedType,
                                originalType.getEncoding());
 }
@@ -76,7 +77,8 @@
   // Map from absolute dimension index to the compact dynamic index.
   unsigned di = 0;
   for (unsigned j = 0; j < i; ++j) {
-    if (tensorType.isDynamicDim(j)) ++di;
+    if (tensorType.isDynamicDim(j))
+      ++di;
   }
   return dynamicDims[di];
 }
@@ -592,7 +594,7 @@
 
 class EncodeHostTensorsPass
     : public EncodeHostTensorsBase<EncodeHostTensorsPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<mlir::func::FuncDialect>();
     registry.insert<mlir::arith::ArithDialect>();
@@ -622,11 +624,12 @@
 
 // Aligns the element type of a !flow.dispatch.tensor<> to a byte-aligned power
 // of 2 bit width.
-static IREE::Flow::DispatchTensorType alignDispatchTensorType(
-    IREE::Flow::DispatchTensorType originalType) {
+static IREE::Flow::DispatchTensorType
+alignDispatchTensorType(IREE::Flow::DispatchTensorType originalType) {
   Type elementType = originalType.getBoundElementType();
   Type alignedType = legalizeStorageElementType(elementType);
-  if (alignedType == elementType) return originalType;
+  if (alignedType == elementType)
+    return originalType;
   return IREE::Flow::DispatchTensorType::get(
       originalType.getAccess(), originalType.getShape(), alignedType);
 }
@@ -652,7 +655,8 @@
     // Align the element type, if needed.
     IREE::Flow::DispatchTensorType alignedType =
         alignDispatchTensorType(originalType);
-    if (originalType == alignedType) return failure();  // already aligned.
+    if (originalType == alignedType)
+      return failure(); // already aligned.
 
     // Directly swap the type with the one, changing all uses in the IR.
     // This works because
@@ -676,7 +680,8 @@
 
     // Align the element type, if needed.
     RankedTensorType alignedType = alignTensorType(targetType);
-    if (targetType == alignedType) return failure();  // already aligned.
+    if (targetType == alignedType)
+      return failure(); // already aligned.
 
     // Loads always truncate from an byte aligned type to a sub-byte one.
     assert(targetType.getElementTypeBitWidth() <
@@ -709,7 +714,8 @@
 
     // Align the element type, if needed.
     RankedTensorType alignedType = alignTensorType(sourceType);
-    if (sourceType == alignedType) return failure();  // already aligned.
+    if (sourceType == alignedType)
+      return failure(); // already aligned.
 
     // Stores always extend from a sub-byte aligned type to a byte aligned one.
     assert(sourceType.getElementTypeBitWidth() <
@@ -731,7 +737,7 @@
 
 class EncodeDeviceTensorsPass
     : public EncodeDeviceTensorsBase<EncodeDeviceTensorsPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<mlir::func::FuncDialect>();
     registry.insert<mlir::arith::ArithDialect>();
@@ -752,7 +758,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<>> createEncodeHostTensorsPass() {
   return std::make_unique<EncodeHostTensorsPass>();
@@ -762,7 +768,7 @@
   return std::make_unique<EncodeDeviceTensorsPass>();
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/FoldUniformOperands.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/FoldUniformOperands.cpp
index c5b504e..365f847 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/FoldUniformOperands.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/FoldUniformOperands.cpp

@@ -43,9 +43,9 @@
 //  ->
 //   stream.cmd.dispatch @foo(%0, %1 : index, index)
 // + deduped arguments in the executable
-static void deduplicateOperands(
-    mlir::func::FuncOp funcOp,
-    SmallVector<IREE::Stream::CmdDispatchOp> &dispatchOps) {
+static void
+deduplicateOperands(mlir::func::FuncOp funcOp,
+                    SmallVector<IREE::Stream::CmdDispatchOp> &dispatchOps) {
   auto &entryBlock = funcOp.front();
   auto anyDispatchOp = dispatchOps.front();
   unsigned operandCount = anyDispatchOp.getUniformOperands().size();
@@ -73,15 +73,16 @@
   llvm::BitVector sameValues(operandCount);
   llvm::BitVector deadOperandsMap(operandCount);
   auto uniformDupeIndexMap =
-      llvm::to_vector(llvm::seq(0u, operandCount));  // old -> new
+      llvm::to_vector(llvm::seq(0u, operandCount)); // old -> new
   for (unsigned idx = 0; idx < operandCount; ++idx) {
-    if (deadOperandsMap.test(idx)) continue;
+    if (deadOperandsMap.test(idx))
+      continue;
     // Each bit represents an operand that duplicates the operand at idx.
     // We walk all the sites and AND their masks together to get the safe
     // set of duplicate operands.
     // Example for %0: (%a, %b, %a) -> b001
     // Example for %1: (%a, %b, %a) -> b000
-    sameValues.set();  // note reused
+    sameValues.set(); // note reused
     for (auto &dupeIndexMap : dupeIndexMaps) {
       for (unsigned i = 0; i < operandCount; ++i) {
         if (i == idx || dupeIndexMap[i] != idx) {
@@ -106,7 +107,7 @@
 
   // Build a map of old duplicate arguments to their base arguments.
   auto argReplacementMap =
-      llvm::to_vector(llvm::seq(0u, funcOp.getNumArguments()));  // old -> new
+      llvm::to_vector(llvm::seq(0u, funcOp.getNumArguments())); // old -> new
   auto operandToArgMap =
       IREE::Stream::CmdDispatchOp::makeOperandToArgMap(funcOp);
   for (auto dupe : llvm::enumerate(uniformDupeIndexMap)) {
@@ -121,7 +122,8 @@
     llvm::interleaveComma(deadOperandsMap.set_bits(), llvm::dbgs());
     llvm::dbgs() << "\n";
     for (auto replacement : llvm::enumerate(argReplacementMap)) {
-      if (replacement.index() == replacement.value()) continue;
+      if (replacement.index() == replacement.value())
+        continue;
       llvm::dbgs() << "  %arg" << replacement.index() << " -> %arg"
                    << replacement.value() << "\n";
     }
@@ -132,7 +134,8 @@
   for (auto replacement : llvm::enumerate(argReplacementMap)) {
     unsigned deadIdx = replacement.index();
     unsigned liveIdx = replacement.value();
-    if (deadIdx == liveIdx) continue;
+    if (deadIdx == liveIdx)
+      continue;
     deadArgMap.set(deadIdx);
     entryBlock.getArgument(deadIdx).replaceAllUsesWith(
         entryBlock.getArgument(liveIdx));
@@ -140,7 +143,8 @@
 
   // Update each dispatch site to remove duplicates.
   SmallVector<unsigned> deadOperands;
-  for (auto idx : deadOperandsMap.set_bits()) deadOperands.push_back(idx);
+  for (auto idx : deadOperandsMap.set_bits())
+    deadOperands.push_back(idx);
   for (auto dispatchOp : dispatchOps) {
     for (auto idx : llvm::reverse(deadOperands)) {
       dispatchOp.getUniformOperandsMutable().erase(idx);
@@ -165,9 +169,9 @@
 //   stream.cmd.dispatch @foo(%c100 : index)
 //   stream.cmd.dispatch @foo(%c101 : index)
 // + inlined %c1 in the executable
-static void inlineUniformConstants(
-    mlir::func::FuncOp funcOp,
-    SmallVector<IREE::Stream::CmdDispatchOp> &dispatchOps) {
+static void
+inlineUniformConstants(mlir::func::FuncOp funcOp,
+                       SmallVector<IREE::Stream::CmdDispatchOp> &dispatchOps) {
   auto &entryBlock = funcOp.front();
   auto anyDispatchOp = dispatchOps.front();
   unsigned operandCount = anyDispatchOp.getUniformOperands().size();
@@ -178,7 +182,8 @@
   llvm::BitVector uniformOperandMap(operandCount, /*t=*/true);
   for (auto dispatchOp : dispatchOps) {
     for (unsigned idx = 0; idx < operandCount; ++idx) {
-      if (!uniformOperandMap.test(idx)) continue;
+      if (!uniformOperandMap.test(idx))
+        continue;
       auto value = dispatchOp.getUniformOperands()[idx];
       APInt intValue;
       if (!matchPattern(value, m_ConstantInt(&intValue))) {
@@ -208,7 +213,8 @@
     llvm::dbgs() << "inlineUniformConstants for " << funcOp.getSymName()
                  << "\n";
     for (unsigned i = 0; i < operandValues.size(); ++i) {
-      if (!operandValues[i].has_value()) continue;
+      if (!operandValues[i].has_value())
+        continue;
       llvm::dbgs() << "  operand " << i << " = " << operandValues[i].value()
                    << "\n";
     }
@@ -233,7 +239,8 @@
 
   // Update each dispatch site to remove duplicates.
   SmallVector<unsigned> deadOperands;
-  for (auto idx : uniformOperandMap.set_bits()) deadOperands.push_back(idx);
+  for (auto idx : uniformOperandMap.set_bits())
+    deadOperands.push_back(idx);
   for (auto dispatchOp : dispatchOps) {
     for (auto idx : llvm::reverse(deadOperands)) {
       dispatchOp.getUniformOperandsMutable().erase(idx);
@@ -252,7 +259,7 @@
 
 class FoldUniformOperandsPass
     : public FoldUniformOperandsBase<FoldUniformOperandsPass> {
- public:
+public:
   FoldUniformOperandsPass() = default;
 
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -280,7 +287,8 @@
       for (auto exportOp :
            executableOp.getOps<IREE::Stream::ExecutableExportOp>()) {
         auto &dispatchOps = entryDispatchMap[exportOp];
-        if (dispatchOps.empty()) continue;  // no-op if no dispatches
+        if (dispatchOps.empty())
+          continue; // no-op if no dispatches
 
         auto funcOp = exportOp.lookupFunctionRef();
 
@@ -296,13 +304,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>> createFoldUniformOperandsPass() {
   return std::make_unique<FoldUniformOperandsPass>();
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/FuseDispatchBindings.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/FuseDispatchBindings.cpp
index 4b90e35..667abd7 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/FuseDispatchBindings.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/FuseDispatchBindings.cpp

@@ -39,10 +39,9 @@
 struct BindingRange {
   BindingRange() = default;
   BindingRange(IREE::Stream::CmdDispatchOp dispatchOp, unsigned idx)
-      : idx(idx),
-        access(llvm::cast<IREE::Stream::ResourceAccessBitfieldAttr>(
-                   dispatchOp.getResourceAccesses()[idx])
-                   .getValue()),
+      : idx(idx), access(llvm::cast<IREE::Stream::ResourceAccessBitfieldAttr>(
+                             dispatchOp.getResourceAccesses()[idx])
+                             .getValue()),
         resource(dispatchOp.getResources()[idx]),
         resourceSize(dispatchOp.getResourceSizes()[idx]),
         offset(dispatchOp.getResourceOffsets()[idx]),
@@ -76,9 +75,10 @@
 // those we can prove are the same. We could in the future introduce new entry
 // points if we had minor divergence in order to gain more fusion in the common
 // cases.
-static SmallVector<Binding> findCorrelatedBindings(
-    unsigned bindingCount, ArrayRef<IREE::Stream::CmdDispatchOp> dispatchOps,
-    bool aliasMutableBindings) {
+static SmallVector<Binding>
+findCorrelatedBindings(unsigned bindingCount,
+                       ArrayRef<IREE::Stream::CmdDispatchOp> dispatchOps,
+                       bool aliasMutableBindings) {
   // For each dispatch build equivalence classes indicating which bindings are
   // from the same base resource. Note that not all dispatches will have the
   // same duplicate bindings (though we hope they do!).
@@ -149,7 +149,8 @@
   llvm::BitVector handledBindings(bindingCount, /*t=*/false);
   for (unsigned i = 0; i < bindingCount; ++i) {
     // Ignore bindings we've already covered earlier during iteration.
-    if (handledBindings.test(i)) continue;
+    if (handledBindings.test(i))
+      continue;
 
     // Build new binding.
     Binding binding;
@@ -311,12 +312,13 @@
 }
 
 // Fuses bindings on an |exportOp| based on all |dispatchOps| invoking it.
-static void fuseDispatchBindings(
-    IREE::Stream::ExecutableOp executableOp,
-    IREE::Stream::ExecutableExportOp exportOp,
-    ArrayRef<IREE::Stream::CmdDispatchOp> dispatchOps,
-    MemoizedCmdZeros &memoizedZeros) {
-  if (dispatchOps.empty()) return;  // no-op if no dispatches
+static void
+fuseDispatchBindings(IREE::Stream::ExecutableOp executableOp,
+                     IREE::Stream::ExecutableExportOp exportOp,
+                     ArrayRef<IREE::Stream::CmdDispatchOp> dispatchOps,
+                     MemoizedCmdZeros &memoizedZeros) {
+  if (dispatchOps.empty())
+    return; // no-op if no dispatches
   auto anyDispatchOp = dispatchOps.front();
   unsigned bindingCount = anyDispatchOp.getResources().size();
 
@@ -403,7 +405,7 @@
   for (auto dispatchOp : dispatchOps) {
     updateDispatchSite(dispatchOp, bindings, memoizedZeros);
   }
-  bindings.clear();  // invalidated above
+  bindings.clear(); // invalidated above
 }
 
 //===----------------------------------------------------------------------===//
@@ -412,7 +414,7 @@
 
 class FuseDispatchBindingsPass
     : public FuseDispatchBindingsBase<FuseDispatchBindingsPass> {
- public:
+public:
   FuseDispatchBindingsPass() = default;
 
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -457,14 +459,14 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>>
 createFuseDispatchBindingsPass() {
   return std::make_unique<FuseDispatchBindingsPass>();
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/LayoutSlices.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/LayoutSlices.cpp
index 53b6447..87c38d0 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/LayoutSlices.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/LayoutSlices.cpp

@@ -41,10 +41,11 @@
 // Slice packed offset SSA values will be updated and start at the given
 // |baseOffset|. Returns |baseOffset| + the total size of the allocation
 // aligned to the requirements of |resourceConfig|.
-static Value packSlicesWithNoAliasing(
-    IREE::Stream::ResourcePackOp packOp, Value baseOffset,
-    ArrayRef<Slice> slices, IREE::Stream::ResourceConfigAttr resourceConfig,
-    IndexSet &indexSet, OpBuilder &builder) {
+static Value
+packSlicesWithNoAliasing(IREE::Stream::ResourcePackOp packOp, Value baseOffset,
+                         ArrayRef<Slice> slices,
+                         IREE::Stream::ResourceConfigAttr resourceConfig,
+                         IndexSet &indexSet, OpBuilder &builder) {
   auto loc = packOp.getLoc();
   int64_t offsetAlignment = resourceConfig.getMinBufferOffsetAlignment();
   int64_t rangeAlignment = resourceConfig.getMinBufferRangeAlignment();
@@ -81,10 +82,11 @@
 // Slice packed offset SSA values will be updated and start at the given
 // |baseOffset|. Returns |baseOffset| + the total size of the allocation
 // aligned to the requirements of |resourceConfig|.
-static Value packStaticSlicesGreedily(
-    IREE::Stream::ResourcePackOp packOp, Value baseOffset,
-    ArrayRef<Slice> slices, IREE::Stream::ResourceConfigAttr resourceConfig,
-    IndexSet &indexSet, OpBuilder &builder) {
+static Value
+packStaticSlicesGreedily(IREE::Stream::ResourcePackOp packOp, Value baseOffset,
+                         ArrayRef<Slice> slices,
+                         IREE::Stream::ResourceConfigAttr resourceConfig,
+                         IndexSet &indexSet, OpBuilder &builder) {
   int64_t offsetAlignment = resourceConfig.getMinBufferOffsetAlignment();
   int64_t rangeAlignment = resourceConfig.getMinBufferRangeAlignment();
 
@@ -123,8 +125,8 @@
         bestOffset = alignedOffset;
         bestOffsetFit = reservation.staticOffset - currentOffset;
       }
-      currentOffset = std::max(
-          currentOffset, reservation.staticOffset + reservation.staticSize);
+      currentOffset = std::max(currentOffset, reservation.staticOffset +
+                                                  reservation.staticSize);
     }
     if (bestOffset == UNASSIGNED) {
       bestOffset = IREE::Util::align(currentOffset, offsetAlignment);
@@ -172,10 +174,11 @@
 // Slice packed offset SSA values will be updated and start at the given
 // |baseOffset|. Returns |baseOffset| + the total size of the allocation
 // aligned to the requirements of |resourceConfig|.
-static Value packDynamicSlicesConservatively(
-    IREE::Stream::ResourcePackOp packOp, Value baseOffset,
-    ArrayRef<Slice> slices, IREE::Stream::ResourceConfigAttr resourceConfig,
-    IndexSet &indexSet, OpBuilder &builder) {
+static Value
+packDynamicSlicesConservatively(IREE::Stream::ResourcePackOp packOp,
+                                Value baseOffset, ArrayRef<Slice> slices,
+                                IREE::Stream::ResourceConfigAttr resourceConfig,
+                                IndexSet &indexSet, OpBuilder &builder) {
   auto loc = packOp.getLoc();
   int64_t offsetAlignment = resourceConfig.getMinBufferOffsetAlignment();
   int64_t rangeAlignment = resourceConfig.getMinBufferRangeAlignment();
@@ -208,7 +211,8 @@
       SmallVector<const Slice *> slices;
       bool intersects(const Slice &slice) const {
         for (auto *binSlice : slices) {
-          if (binSlice->intersects(slice)) return true;
+          if (binSlice->intersects(slice))
+            return true;
         }
         return false;
       }
@@ -245,7 +249,7 @@
 //===----------------------------------------------------------------------===//
 
 class LayoutSlicesPass : public LayoutSlicesBase<LayoutSlicesPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<mlir::func::FuncDialect>();
     registry.insert<mlir::arith::ArithDialect>();
@@ -316,13 +320,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<InterfacePass<CallableOpInterface>> createLayoutSlicesPass() {
   return std::make_unique<LayoutSlicesPass>();
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/MaterializeBuiltins.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/MaterializeBuiltins.cpp
index 024e611..8d5d694 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/MaterializeBuiltins.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/MaterializeBuiltins.cpp

@@ -45,10 +45,10 @@
 
 // TODO(#13984): memset emulation of all types is required for CUDA graphs due
 // to a driver bug. Once it's fixed we can remove this global flag.
-static llvm::cl::opt<bool> clEmulateMemset(
-    "iree-stream-emulate-memset",
-    llvm::cl::desc("Emulate all memset types with dispatches."),
-    llvm::cl::init(false));
+static llvm::cl::opt<bool>
+    clEmulateMemset("iree-stream-emulate-memset",
+                    llvm::cl::desc("Emulate all memset types with dispatches."),
+                    llvm::cl::init(false));
 
 namespace mlir {
 namespace iree_compiler {
@@ -69,8 +69,8 @@
 };
 
 // Returns the support for the given type based on the resource configuration.
-static TypeSupport queryTypeSupport(
-    IREE::Stream::ResourceConfigAttr resourceConfig, Type type) {
+static TypeSupport
+queryTypeSupport(IREE::Stream::ResourceConfigAttr resourceConfig, Type type) {
   unsigned bitWidth = IREE::Util::getTypeBitWidth(type);
 
   if (bitWidth > 32) {
@@ -125,7 +125,7 @@
 // Tracks all builtin modules required by the program and the locations of the
 // ops that require them.
 class RequiredModules {
- public:
+public:
   // Inserts a module into the required module set for use by the op at |loc|.
   void insert(Location loc, StringRef moduleFile) {
     modules[moduleFile.str()].push_back(loc);
@@ -144,9 +144,9 @@
     return success();
   }
 
- private:
+private:
   // A map of builtin module filename to the locations using it.
-  std::map<std::string, SmallVector<Location>> modules;  // ordered
+  std::map<std::string, SmallVector<Location>> modules; // ordered
 };
 
 // Returns an OpBuilder at a point safe to insert arith/etc ops.
@@ -168,25 +168,24 @@
   unsigned bitWidth = pattern.getType().getIntOrFloatBitWidth();
   StringRef builtinName;
   switch (bitWidth) {
-    case 8:
-      builtinName = "__builtin_splat_i8";
-      requiredModules.insert(loc, "splat_i8.mlir");
-      break;
-    case 16:
-      builtinName = "__builtin_splat_i16";
-      requiredModules.insert(loc, "splat_i16.mlir");
-      break;
-    case 32:
-      builtinName = "__builtin_splat_i32";
-      requiredModules.insert(loc, "splat_i32.mlir");
-      break;
-    case 64:
-      builtinName = "__builtin_splat_i64";
-      requiredModules.insert(loc, "splat_i64.mlir");
-      break;
-    default:
-      return splatOp.emitOpError()
-             << "has no builtin for bit width " << bitWidth;
+  case 8:
+    builtinName = "__builtin_splat_i8";
+    requiredModules.insert(loc, "splat_i8.mlir");
+    break;
+  case 16:
+    builtinName = "__builtin_splat_i16";
+    requiredModules.insert(loc, "splat_i16.mlir");
+    break;
+  case 32:
+    builtinName = "__builtin_splat_i32";
+    requiredModules.insert(loc, "splat_i32.mlir");
+    break;
+  case 64:
+    builtinName = "__builtin_splat_i64";
+    requiredModules.insert(loc, "splat_i64.mlir");
+    break;
+  default:
+    return splatOp.emitOpError() << "has no builtin for bit width " << bitWidth;
   }
 
   auto arithBuilder = getParentBuilder(splatOp);
@@ -234,17 +233,17 @@
   auto resourceConfig = IREE::Stream::ResourceConfigAttr::lookup(splatOp);
   auto pattern = splatOp.getValue();
   switch (queryTypeSupport(resourceConfig, pattern.getType())) {
-    case TypeSupport::Native:
-      // Already ok!
-      return success();
-    case TypeSupport::Builtin:
-      return replaceBuiltinSplatOp(splatOp, pattern, requiredModules);
-    default:
-    case TypeSupport::Unsupported:
-      return splatOp.emitOpError()
-             << "has unsupported fill pattern type "
-             << splatOp.getValue().getType() << " (tried converting to "
-             << pattern.getType() << ")";
+  case TypeSupport::Native:
+    // Already ok!
+    return success();
+  case TypeSupport::Builtin:
+    return replaceBuiltinSplatOp(splatOp, pattern, requiredModules);
+  default:
+  case TypeSupport::Unsupported:
+    return splatOp.emitOpError()
+           << "has unsupported fill pattern type "
+           << splatOp.getValue().getType() << " (tried converting to "
+           << pattern.getType() << ")";
   }
 }
 
@@ -255,25 +254,24 @@
   unsigned bitWidth = pattern.getType().getIntOrFloatBitWidth();
   StringRef builtinName;
   switch (bitWidth) {
-    case 8:
-      builtinName = "__builtin_fill_i8";
-      requiredModules.insert(loc, "fill_i8.mlir");
-      break;
-    case 16:
-      builtinName = "__builtin_fill_i16";
-      requiredModules.insert(loc, "fill_i16.mlir");
-      break;
-    case 32:
-      builtinName = "__builtin_fill_i32";
-      requiredModules.insert(loc, "fill_i32.mlir");
-      break;
-    case 64:
-      builtinName = "__builtin_fill_i64";
-      requiredModules.insert(loc, "fill_i64.mlir");
-      break;
-    default:
-      return fillOp.emitOpError()
-             << "has no builtin for bit width " << bitWidth;
+  case 8:
+    builtinName = "__builtin_fill_i8";
+    requiredModules.insert(loc, "fill_i8.mlir");
+    break;
+  case 16:
+    builtinName = "__builtin_fill_i16";
+    requiredModules.insert(loc, "fill_i16.mlir");
+    break;
+  case 32:
+    builtinName = "__builtin_fill_i32";
+    requiredModules.insert(loc, "fill_i32.mlir");
+    break;
+  case 64:
+    builtinName = "__builtin_fill_i64";
+    requiredModules.insert(loc, "fill_i64.mlir");
+    break;
+  default:
+    return fillOp.emitOpError() << "has no builtin for bit width " << bitWidth;
   }
 
   auto arithBuilder = getParentBuilder(fillOp);
@@ -331,17 +329,17 @@
   auto resourceConfig = IREE::Stream::ResourceConfigAttr::lookup(fillOp);
   auto pattern = fillOp.getValue();
   switch (queryTypeSupport(resourceConfig, pattern.getType())) {
-    case TypeSupport::Native:
-      // Already ok!
-      return success();
-    case TypeSupport::Builtin:
-      return replaceBuiltinFillOp(fillOp, pattern, requiredModules);
-    default:
-    case TypeSupport::Unsupported:
-      return fillOp.emitOpError()
-             << "has unsupported fill pattern type "
-             << fillOp.getValue().getType() << " (tried converting to "
-             << pattern.getType() << ")";
+  case TypeSupport::Native:
+    // Already ok!
+    return success();
+  case TypeSupport::Builtin:
+    return replaceBuiltinFillOp(fillOp, pattern, requiredModules);
+  default:
+  case TypeSupport::Unsupported:
+    return fillOp.emitOpError()
+           << "has unsupported fill pattern type "
+           << fillOp.getValue().getType() << " (tried converting to "
+           << pattern.getType() << ")";
   }
 }
 
@@ -351,7 +349,7 @@
 
 class MaterializeBuiltinsPass
     : public MaterializeBuiltinsBase<MaterializeBuiltinsPass> {
- public:
+public:
   MaterializeBuiltinsPass() = default;
 
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -369,7 +367,8 @@
 
   void runOnOperation() override {
     auto moduleOp = getOperation();
-    if (moduleOp.getBody()->empty()) return;
+    if (moduleOp.getBody()->empty())
+      return;
 
     // Find and replace (if needed) ops that we want to turn into builtins
     // across the entire program.
@@ -405,7 +404,7 @@
   return std::make_unique<MaterializeBuiltinsPass>();
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/MaterializeCopyOnWrite.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/MaterializeCopyOnWrite.cpp
index 956f613..363d9ef 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/MaterializeCopyOnWrite.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/MaterializeCopyOnWrite.cpp

@@ -43,19 +43,22 @@
 static bool isSafeToElideCOW(Value operand, IREE::Stream::ResourceType type) {
   // Can't do anything with block args without analysis - we don't know if the
   // value they carry is the last user (move semantics).
-  if (llvm::isa<BlockArgument>(operand)) return false;
+  if (llvm::isa<BlockArgument>(operand))
+    return false;
 
   // If our value is a constant then we need to ensure that we aren't
   // tied to a constant operand. If we are we need to clone to a
   // non-constant value. We could make this work in cases where constants are
   // being initialized, however those are best modeled as transfer operations
   // where no mutations will occur on the constant transfer target.
-  if (type.getLifetime() == IREE::Stream::Lifetime::Constant) return false;
+  if (type.getLifetime() == IREE::Stream::Lifetime::Constant)
+    return false;
 
   // If there's more than one user we can't make a local decision. It's
   // expensive to query relative operation order within a block and within a
   // region the lifetime of values may vary - all things we can't tell here.
-  if (!operand.hasOneUse()) return false;
+  if (!operand.hasOneUse())
+    return false;
 
   // We are the only user and the value is contained entirely within the
   // current region. We by construction know we do not need to worry.
@@ -73,8 +76,10 @@
   // has to wait until a subsequent pass.
   auto resourceType =
       llvm::dyn_cast<IREE::Stream::ResourceType>(operand.get().getType());
-  if (!resourceType) return false;
-  if (isSafeToElideCOW(operand.get(), resourceType)) return false;
+  if (!resourceType)
+    return false;
+  if (isSafeToElideCOW(operand.get(), resourceType))
+    return false;
 
   // Materialize a clone operation just for the operand provided.
   auto sizeAwareType =
@@ -104,7 +109,8 @@
   auto tiedOperandIndices = tiedOp.getTiedResultOperandIndices();
   for (unsigned i = 0; i < tiedOperandIndices.size(); ++i) {
     int64_t operandIdx = tiedOperandIndices[i];
-    if (operandIdx == IREE::Util::TiedOpInterface::kUntiedIndex) continue;
+    if (operandIdx == IREE::Util::TiedOpInterface::kUntiedIndex)
+      continue;
     auto &operand = tiedOp->getOpOperand(operandIdx);
     didChange =
         materializeOperandCOW(tiedOp.getLoc(), operand, affinity, builder) ||
@@ -120,7 +126,8 @@
   bool didChange = false;
   for (auto &block : region.getBlocks()) {
     for (auto &op : block) {
-      if (!op.hasTrait<OpTrait::IREE::Stream::AsyncPhaseOp>()) continue;
+      if (!op.hasTrait<OpTrait::IREE::Stream::AsyncPhaseOp>())
+        continue;
       didChange =
           TypeSwitch<Operation *, bool>(&op)
               .Case<IREE::Stream::TensorImportOp, IREE::Stream::TensorExportOp,
@@ -159,7 +166,7 @@
 // and also makes it easy to disable copy elision to ferret out issues.
 class MaterializeCopyOnWritePass
     : public MaterializeCopyOnWriteBase<MaterializeCopyOnWritePass> {
- public:
+public:
   MaterializeCopyOnWritePass() = default;
 
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -179,13 +186,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<>> createMaterializeCopyOnWritePass() {
   return std::make_unique<MaterializeCopyOnWritePass>();
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/OutlineConstants.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/OutlineConstants.cpp
index 2982cf1..ae2ceee 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/OutlineConstants.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/OutlineConstants.cpp

@@ -45,7 +45,8 @@
   SmallVector<ConstantDef> results;
   for (auto callableOp : moduleOp.getOps<CallableOpInterface>()) {
     auto *region = callableOp.getCallableRegion();
-    if (!region) continue;
+    if (!region)
+      continue;
     for (auto &block : *region) {
       for (auto &op : block.getOperations()) {
         if (auto constantOp = dyn_cast<arith::ConstantOp>(op)) {
@@ -64,7 +65,7 @@
 }
 
 class OutlineConstantsPass : public OutlineConstantsBase<OutlineConstantsPass> {
- public:
+public:
   OutlineConstantsPass() = default;
 
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -75,7 +76,8 @@
 
   void runOnOperation() override {
     auto moduleOp = getOperation();
-    if (moduleOp.getBody()->empty()) return;
+    if (moduleOp.getBody()->empty())
+      return;
 
     SymbolTable moduleSymbols(moduleOp);
     std::string baseName = "_constant";
@@ -89,7 +91,7 @@
       auto globalOp = moduleBuilder.create<IREE::Util::GlobalOp>(
           def.op->getLoc(), baseName, /*isMutable=*/false, def.type, def.value);
       globalOp.setPrivate();
-      moduleSymbols.insert(globalOp);  // uniques name
+      moduleSymbols.insert(globalOp); // uniques name
       replacements.emplace_back(def.op, globalOp);
 
       // Prevent the variable from being re-inlined if the canonicalizer runs.
@@ -127,7 +129,7 @@
   return std::make_unique<OutlineConstantsPass>();
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/PackAllocations.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/PackAllocations.cpp
index 0cfce76..4e4ed62 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/PackAllocations.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/PackAllocations.cpp

@@ -34,7 +34,7 @@
 //===----------------------------------------------------------------------===//
 
 class PackAllocationsPass : public PackAllocationsBase<PackAllocationsPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<mlir::func::FuncDialect>();
     registry.insert<IREE::Stream::StreamDialect>();
@@ -62,7 +62,8 @@
     // are mutually exclusive.
     parentOp.walk([&](IREE::Stream::ResourceAllocOp allocOp) {
       // If just one result then ignore (nothing to pack).
-      if (allocOp.getResults().size() == 1) return;
+      if (allocOp.getResults().size() == 1)
+        return;
       auto resourceType = allocOp.getResults().front().getType();
 
       // NOTE: this is risky: we are assuming right now that all of the
@@ -105,14 +106,14 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<InterfacePass<CallableOpInterface>>
 createPackAllocationsPass() {
   return std::make_unique<PackAllocationsPass>();
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/PackConstants.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/PackConstants.cpp
index d523782..b1fe2eb 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/PackConstants.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/PackConstants.cpp

@@ -142,7 +142,8 @@
   SmallVector<Attribute> values;
   int64_t offset = 0;
   for (auto &constantSpan : storageBuffer.spans) {
-    if (constantSpan.length == 0) continue;
+    if (constantSpan.length == 0)
+      continue;
 
     int64_t start = constantSpan.offset;
     int64_t end = start + constantSpan.length;
@@ -185,9 +186,10 @@
 // Assume that |slices| have been ordered by prior passes and that order may
 // have some performance-sensitivity (constants are grouped by
 // locality/lifetime/etc).
-static SmallVector<StorageResource, 8> computePackingMap(
-    ArrayRef<ConstantSlice> slices,
-    IREE::Stream::ResourceConfigAttr resourceConfig, MLIRContext *context) {
+static SmallVector<StorageResource, 8>
+computePackingMap(ArrayRef<ConstantSlice> slices,
+                  IREE::Stream::ResourceConfigAttr resourceConfig,
+                  MLIRContext *context) {
   // This is literally all my brain has brain for right now. The ideal here is
   // that we have a basic static (and ideally profile-guided) sorting pass
   // that keeps constant values that are accessed sorted together.
@@ -254,11 +256,12 @@
 // issue an async copy from source to result. To avoid a bunch of overhead when
 // there are multiple storage buffers we invert the logic so that we put all the
 // async copies into a single region.
-static UploadResult buildStagingUpload(
-    Location loc, IREE::Stream::AffinityAttr affinityAttr,
-    IREE::Stream::ResourceType resourceType,
-    ArrayRef<StorageResource> storageResources, ArrayRef<Value> storageBuffers,
-    IndexSet &indexSet, OpBuilder &builder) {
+static UploadResult
+buildStagingUpload(Location loc, IREE::Stream::AffinityAttr affinityAttr,
+                   IREE::Stream::ResourceType resourceType,
+                   ArrayRef<StorageResource> storageResources,
+                   ArrayRef<Value> storageBuffers, IndexSet &indexSet,
+                   OpBuilder &builder) {
   UploadResult uploadResult;
   auto stagingType = builder.getType<IREE::Stream::ResourceType>(
       IREE::Stream::Lifetime::Staging);
@@ -321,7 +324,8 @@
   auto executeOp = builder.create<IREE::Stream::CmdExecuteOp>(
       loc, /*awaitTimepoint=*/Value{}, capturedResources,
       capturedResourceSizes);
-  if (affinityAttr) executeOp.setAffinityAttr(affinityAttr);
+  if (affinityAttr)
+    executeOp.setAffinityAttr(affinityAttr);
   uploadResult.timepoint = executeOp.getResultTimepoint();
 
   // Map captured resources into the execution region.
@@ -434,7 +438,8 @@
                        constantsOp.getValues())) {
     auto resourceType =
         llvm::cast<IREE::Stream::ResourceType>(result.getType());
-    if (resourceType.getLifetime() != lifetime) continue;
+    if (resourceType.getLifetime() != lifetime)
+      continue;
     slices.push_back(ConstantSlice{
         result,
         resultSize,
@@ -446,7 +451,8 @@
   // will need and where each value will be placed.
   auto storageResources =
       computePackingMap(slices, resourceConfig, constantsOp.getContext());
-  if (storageResources.empty()) return nullptr;
+  if (storageResources.empty())
+    return nullptr;
 
   // Emit rodata storage for the constant values.
   // As our upload paths may vary this ensures that we are only emitting
@@ -508,7 +514,7 @@
 // never a case where this matters by construction; which is a feature :P
 
 class PackConstantsPass : public PackConstantsBase<PackConstantsPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<mlir::func::FuncDialect>();
     registry.insert<mlir::arith::ArithDialect>();
@@ -545,7 +551,8 @@
                              resourceConfig, indexSet, builder)) {
         timepoints.push_back(timepoint);
       }
-      if (timepoints.empty()) return;
+      if (timepoints.empty())
+        return;
 
       // Join on storage timepoints for our transitive dependencies to await.
       // We could do this at a finer granularity if we were to split the
@@ -564,13 +571,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<InterfacePass<CallableOpInterface>> createPackConstantsPass() {
   return std::make_unique<PackConstantsPass>();
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/PackDispatchOperands.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/PackDispatchOperands.cpp
index 7dfb410..e83aa08 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/PackDispatchOperands.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/PackDispatchOperands.cpp

@@ -232,14 +232,16 @@
   // Preserve the arg attrs on either the final op or the function argument
   // if none was required.
   if (auto definingOp = value.getDefiningOp()) {
-    if (oldArgAttr) definingOp->setAttrs(oldArgAttr);
+    if (oldArgAttr)
+      definingOp->setAttrs(oldArgAttr);
     newArgAttrs.push_back(nullptr);
   } else {
     newArgAttrs.push_back(oldArgAttr);
   }
   // Note that if we had decomposed the arg we'll expect that there are two attr
   // dicts for the two new args.
-  if (wasDecomposed) newArgAttrs.push_back(nullptr);
+  if (wasDecomposed)
+    newArgAttrs.push_back(nullptr);
 
   return value;
 }
@@ -283,7 +285,7 @@
 
 class PackDispatchOperandsPass
     : public PackDispatchOperandsBase<PackDispatchOperandsPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<mlir::arith::ArithDialect>();
     registry.insert<mlir::complex::ComplexDialect>();
@@ -320,13 +322,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<ModuleOp>> createPackDispatchOperandsPass() {
   return std::make_unique<PackDispatchOperandsPass>();
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/PassDetail.h b/compiler/src/iree/compiler/Dialect/Stream/Transforms/PassDetail.h
index b9b45c3..01597e3 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/PassDetail.h
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/PassDetail.h

@@ -30,11 +30,11 @@
 };
 
 #define GEN_PASS_CLASSES
-#include "iree/compiler/Dialect/Stream/Transforms/Passes.h.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/Stream/Transforms/Passes.h.inc" // IWYU pragma: keep
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_STREAM_TRANSFORMS_PASS_DETAIL_H_
+#endif // IREE_COMPILER_DIALECT_STREAM_TRANSFORMS_PASS_DETAIL_H_

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/Passes.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/Passes.cpp
index 508553a..b3233e6 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/Passes.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/Passes.cpp

@@ -384,8 +384,8 @@
 
 namespace {
 #define GEN_PASS_REGISTRATION
-#include "iree/compiler/Dialect/Stream/Transforms/Passes.h.inc"  // IWYU pragma: export
-}  // namespace
+#include "iree/compiler/Dialect/Stream/Transforms/Passes.h.inc" // IWYU pragma: export
+} // namespace
 
 void registerStreamPasses() {
   // Generated.
@@ -395,7 +395,7 @@
   registerStreamTransformPassPipelines();
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/Passes.h b/compiler/src/iree/compiler/Dialect/Stream/Transforms/Passes.h
index a7cf768..91aef3b 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/Passes.h
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/Passes.h

@@ -179,9 +179,9 @@
 
 void registerStreamPasses();
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_STREAM_TRANSFORMS_PASSES_H_
+#endif // IREE_COMPILER_DIALECT_STREAM_TRANSFORMS_PASSES_H_

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/PropagateTimepoints.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/PropagateTimepoints.cpp
index 8524717..f0fab6b 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/PropagateTimepoints.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/PropagateTimepoints.cpp

@@ -60,7 +60,8 @@
   // Gather all of the resource globals in the root.
   for (auto &region : rootOp->getRegions()) {
     for (auto globalOp : region.getOps<IREE::Util::GlobalOp>()) {
-      if (!llvm::isa<IREE::Stream::ResourceType>(globalOp.getType())) continue;
+      if (!llvm::isa<IREE::Stream::ResourceType>(globalOp.getType()))
+        continue;
       expandedGlobals[globalOp.getName()].resourceOp = globalOp;
     }
   }
@@ -102,7 +103,8 @@
 // Expands resources in the given |types| list to (timepoint, resource).
 // This could be changed to some iterator magic to avoid the alloc.
 static SmallVector<Type> expandTypes(TypeRange types) {
-  if (types.empty()) return {};
+  if (types.empty())
+    return {};
   auto timepointType =
       IREE::Stream::TimepointType::get(types.front().getContext());
   SmallVector<Type> newTypes;
@@ -176,7 +178,8 @@
   if (auto sizeAwareOp = dyn_cast_or_null<IREE::Util::SizeAwareOpInterface>(
           resourceValue.getDefiningOp())) {
     auto sizeValue = sizeAwareOp.getResultSizeFromValue(resourceValue);
-    if (sizeValue) return sizeValue;
+    if (sizeValue)
+      return sizeValue;
   }
 
   // Try first to scan uses in the IR. Since we carry the shape in most ops we
@@ -184,9 +187,11 @@
   for (auto &use : resourceValue.getUses()) {
     auto sizeAwareOp =
         dyn_cast<IREE::Util::SizeAwareOpInterface>(use.getOwner());
-    if (!sizeAwareOp) continue;
+    if (!sizeAwareOp)
+      continue;
     auto sizeValue = sizeAwareOp.getOperandSize(use.getOperandNumber());
-    if (!sizeValue) continue;
+    if (!sizeValue)
+      continue;
     if (sizeValue.getParentRegion()->isProperAncestor(
             builder.getInsertionBlock()->getParent())) {
       // Size value found and implicitly captured; we can reuse (could be
@@ -216,19 +221,23 @@
 static void expandRegion(Region &region, bool canModifyEntryBlock,
                          ExpandedGlobalMap &globalMap,
                          IRMapping resourceTimepointMap) {
-  if (region.empty()) return;
+  if (region.empty())
+    return;
 
   // Update all block arguments.
   auto timepointType = IREE::Stream::TimepointType::get(region.getContext());
   for (auto &block : region.getBlocks()) {
-    if (!llvm::any_of(block.getArgumentTypes(), isResourceType)) continue;
-    if (block.isEntryBlock() && !canModifyEntryBlock) continue;
+    if (!llvm::any_of(block.getArgumentTypes(), isResourceType))
+      continue;
+    if (block.isEntryBlock() && !canModifyEntryBlock)
+      continue;
 
     // Insert and build a list of expanded (timepoint, resource) pairs.
     SmallVector<std::pair<Value, Value>> expansions;
     for (int i = block.getNumArguments() - 1; i >= 0; --i) {
       auto resourceArg = block.getArgument(i);
-      if (!isResourceType(resourceArg.getType())) continue;
+      if (!isResourceType(resourceArg.getType()))
+        continue;
       auto timepointArg =
           block.insertArgument(i, timepointType, resourceArg.getLoc());
       expansions.push_back(std::make_pair(timepointArg, resourceArg));
@@ -289,7 +298,8 @@
 static void expandGlobalLoadOp(IREE::Util::GlobalLoadOp op,
                                ExpandedGlobalMap &globalMap,
                                IRMapping &resourceTimepointMap) {
-  if (!usesResources(op)) return;
+  if (!usesResources(op))
+    return;
   OpBuilder builder(op);
   auto &expandedGlobal = globalMap[op.getGlobal()];
   auto timepoint =
@@ -336,7 +346,8 @@
 static void expandGlobalStoreOp(IREE::Util::GlobalStoreOp op,
                                 ExpandedGlobalMap &globalMap,
                                 IRMapping &resourceTimepointMap) {
-  if (!usesResources(op)) return;
+  if (!usesResources(op))
+    return;
   OpBuilder builder(op);
   auto timepointOperand = consumeTimepoint(op.getLoc(), op.getValue(),
                                            resourceTimepointMap, builder);
@@ -358,10 +369,12 @@
 // external and resolved later on. We can't modify their signatures.
 static bool isPublicOrExternal(CallableOpInterface callableOp) {
   if (auto symbolOp = dyn_cast<SymbolOpInterface>(callableOp.getOperation())) {
-    if (symbolOp.isPublic()) return true;
+    if (symbolOp.isPublic())
+      return true;
   }
   auto *region = callableOp.getCallableRegion();
-  if (!region || region->empty()) return true;
+  if (!region || region->empty())
+    return true;
   return false;
 }
 
@@ -409,12 +422,14 @@
 //  stream.timepoint.await %rt, %t
 static void expandCallOp(mlir::func::CallOp op,
                          IRMapping &resourceTimepointMap) {
-  if (!usesResources(op)) return;
+  if (!usesResources(op))
+    return;
 
   // Ignore calls to public/external functions.
   auto calleeOp = SymbolTable::lookupNearestSymbolFrom<CallableOpInterface>(
       op, op.getCalleeAttr());
-  if (isPublicOrExternal(calleeOp)) return;
+  if (isPublicOrExternal(calleeOp))
+    return;
 
   // Build the new call op with expanded operands and results.
   OpBuilder builder(op);
@@ -461,8 +476,10 @@
 //  return %t, %0
 static void expandReturnOp(mlir::func::ReturnOp op,
                            IRMapping &resourceTimepointMap) {
-  if (!usesResources(op)) return;
-  if (isPublicOrExternal(op->getParentOfType<mlir::func::FuncOp>())) return;
+  if (!usesResources(op))
+    return;
+  if (isPublicOrExternal(op->getParentOfType<mlir::func::FuncOp>()))
+    return;
   OpBuilder builder(op);
   auto operands = expandOperands(op.getLoc(), op.getOperands(),
                                  resourceTimepointMap, builder);
@@ -483,7 +500,8 @@
 //    %1 = stream.timepoint.await %a, %b
 static void expandBranchOp(mlir::cf::BranchOp op,
                            IRMapping &resourceTimepointMap) {
-  if (!usesResources(op)) return;
+  if (!usesResources(op))
+    return;
   OpBuilder builder(op);
   auto operands = expandOperands(op.getLoc(), op.getDestOperands(),
                                  resourceTimepointMap, builder);
@@ -493,7 +511,8 @@
 
 static void expandCondBranchOp(mlir::cf::CondBranchOp op,
                                IRMapping &resourceTimepointMap) {
-  if (!usesResources(op)) return;
+  if (!usesResources(op))
+    return;
   OpBuilder builder(op);
   builder.create<mlir::cf::CondBranchOp>(
       op.getLoc(), op.getCondition(), op.getTrueDest(),
@@ -603,7 +622,7 @@
 // always awaited, with the elision/deduplication/etc left until cleanup.
 class PropagateTimepointsPass
     : public PropagateTimepointsBase<PropagateTimepointsPass> {
- public:
+public:
   PropagateTimepointsPass() = default;
 
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -630,13 +649,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>> createPropagateTimepointsPass() {
   return std::make_unique<PropagateTimepointsPass>();
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/RefineUsage.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/RefineUsage.cpp
index ab612a9..03afdc0 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/RefineUsage.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/RefineUsage.cpp

@@ -83,7 +83,8 @@
   // Returns true if a change was made.
   bool applyArgTransition(BlockArgument arg, PatternRewriter &rewriter) const {
     auto oldType = llvm::dyn_cast<IREE::Stream::ResourceType>(arg.getType());
-    if (!oldType) return false;
+    if (!oldType)
+      return false;
     auto newUsage = analysis.lookupResourceUsage(arg);
     auto newLifetime = convertUsageToLifetime(newUsage);
     auto newType = rewriter.getType<IREE::Stream::ResourceType>(newLifetime);
@@ -105,7 +106,8 @@
   bool applyResultTransition(Operation *op, Value result,
                              PatternRewriter &rewriter) const {
     auto oldType = llvm::dyn_cast<IREE::Stream::ResourceType>(result.getType());
-    if (!oldType) return false;
+    if (!oldType)
+      return false;
     auto newUsage = analysis.lookupResourceUsage(result);
     auto newLifetime = convertUsageToLifetime(newUsage);
     auto newType = rewriter.getType<IREE::Stream::ResourceType>(newLifetime);
@@ -142,7 +144,8 @@
                              IREE::Stream::AffinityAttr affinityAttr,
                              PatternRewriter &rewriter) const {
     auto oldType = llvm::dyn_cast<IREE::Stream::ResourceType>(result.getType());
-    if (!oldType) return false;
+    if (!oldType)
+      return false;
     auto newUsage = analysis.lookupResourceUsage(result);
     auto newLifetime = convertUsageToLifetime(newUsage);
     auto newType = rewriter.getType<IREE::Stream::ResourceType>(newLifetime);
@@ -275,7 +278,8 @@
     }
 
     // Blocks and nested operations:
-    if (this->applyRegionTransitions(op, rewriter)) didChange = true;
+    if (this->applyRegionTransitions(op, rewriter))
+      didChange = true;
 
     return success(didChange);
   }
@@ -295,7 +299,8 @@
     for (unsigned i = 0; i < op->getNumResults(); ++i) {
       auto result = op->getResult(i);
       if (llvm::isa<IREE::Stream::ResourceType>(result.getType())) {
-        if (this->applyResultTransition(op, result, rewriter)) didChange = true;
+        if (this->applyResultTransition(op, result, rewriter))
+          didChange = true;
       }
     }
     if (didChange) {
@@ -385,7 +390,7 @@
 //===----------------------------------------------------------------------===//
 
 class RefineUsagePass : public RefineUsageBase<RefineUsagePass> {
- public:
+public:
   RefineUsagePass() = default;
 
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -396,7 +401,8 @@
 
   void runOnOperation() override {
     auto moduleOp = getOperation();
-    if (moduleOp.getBody()->empty()) return;
+    if (moduleOp.getBody()->empty())
+      return;
 
     // Run analysis on the entire module.
     ResourceUsageAnalysis analysis(moduleOp);
@@ -418,13 +424,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>> createRefineUsagePass() {
   return std::make_unique<RefineUsagePass>();
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/ScheduleAllocation.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/ScheduleAllocation.cpp
index 82c6cf8..87b18de 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/ScheduleAllocation.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/ScheduleAllocation.cpp

@@ -90,7 +90,8 @@
     // Tied results reuse their operand buffer.
     auto tiedOp = dyn_cast<IREE::Util::TiedOpInterface>(op);
     for (auto result : op.getResults()) {
-      if (!llvm::isa<IREE::Stream::ResourceType>(result.getType())) continue;
+      if (!llvm::isa<IREE::Stream::ResourceType>(result.getType()))
+        continue;
       if (tiedOp) {
         auto tiedOperand = tiedOp.getTiedResultOperand(result);
         if (tiedOperand) {
@@ -116,8 +117,8 @@
 // Builds a map of value aliases from aliasee to a set of aliasers.
 // Only values that alias will be present in the map. The map may contain
 // values nested within the |executeOp|.
-static ValueAliasingMap computeExecutionRegionValueAliases(
-    IREE::Stream::AsyncExecuteOp executeOp) {
+static ValueAliasingMap
+computeExecutionRegionValueAliases(IREE::Stream::AsyncExecuteOp executeOp) {
   ValueAliasingMap valueAliases;
   computeRegionValueAliases(executeOp, valueAliases);
   return valueAliases;
@@ -132,7 +133,7 @@
 struct LivenessInterval {
   int start = 0;
   int end = 0;
-  int ordinal = -1;  // unique per value
+  int ordinal = -1; // unique per value
   Value value;
   bool operator<(const LivenessInterval &rhs) const {
     return ordinal < rhs.ordinal;
@@ -151,9 +152,9 @@
 // constituent ranges - including block arguments. Note that not all values will
 // have buffers allocated to them - we are just tracking transitive SSA value
 // lifetime.
-static LivenessIntervalList computeExecutionRegionLivenessIntervals(
-    IREE::Stream::AsyncExecuteOp executeOp,
-    const ValueAliasingMap &valueAliases) {
+static LivenessIntervalList
+computeExecutionRegionLivenessIntervals(IREE::Stream::AsyncExecuteOp executeOp,
+                                        const ValueAliasingMap &valueAliases) {
   // Perform a liveness analysis on the execution region.
   // Fragments have a single block and as such the live-in/live-out block
   // information derived here applies to the entire stream region.
@@ -173,7 +174,8 @@
   SmallPtrSet<Value, 16> liveOuts;
   auto yieldOp = cast<IREE::Stream::YieldOp>(streamBlock->back());
   for (auto returnValue : yieldOp.getResourceOperands()) {
-    if (!llvm::isa<IREE::Stream::ResourceType>(returnValue.getType())) continue;
+    if (!llvm::isa<IREE::Stream::ResourceType>(returnValue.getType()))
+      continue;
     liveOuts.insert(returnValue);
   }
 
@@ -182,7 +184,8 @@
   LivenessIntervalMap valueIntervals;
   int ordinal = 0;
   for (Value value : streamBlock->getArguments()) {
-    if (!llvm::isa<IREE::Stream::ResourceType>(value.getType())) continue;
+    if (!llvm::isa<IREE::Stream::ResourceType>(value.getType()))
+      continue;
     LivenessInterval interval;
     interval.start = LIVE_IN;
     if (liveOuts.contains(value)) {
@@ -208,8 +211,10 @@
       // the duration of the region.
       concurrentOp.walk([&](Operation *op) {
         for (auto value : op->getResults()) {
-          if (!llvm::isa<IREE::Stream::ResourceType>(value.getType())) continue;
-          if (!value.use_empty()) continue;
+          if (!llvm::isa<IREE::Stream::ResourceType>(value.getType()))
+            continue;
+          if (!value.use_empty())
+            continue;
           LivenessInterval interval;
           interval.start = start;
           interval.end = start;
@@ -220,7 +225,8 @@
       });
     }
     for (auto value : op.getResults()) {
-      if (!llvm::isa<IREE::Stream::ResourceType>(value.getType())) continue;
+      if (!llvm::isa<IREE::Stream::ResourceType>(value.getType()))
+        continue;
       LivenessInterval interval;
       interval.start = start;
       if (liveOuts.contains(value)) {
@@ -290,9 +296,7 @@
       : resource(resource), resourceSize(resourceSize) {}
   explicit ResourceRange(Value resource, Value resourceSize, Value offset,
                          Value length)
-      : resource(resource),
-        resourceSize(resourceSize),
-        offset(offset),
+      : resource(resource), resourceSize(resourceSize), offset(offset),
         length(length) {}
 
   Value resource = nullptr;
@@ -346,7 +350,8 @@
   // Returns a memoized ConstantIndexOp of |value|.
   Value lookupOrCreateIndex(int64_t value) {
     auto it = indexConstantMap.find(value);
-    if (it != indexConstantMap.end()) return it->second;
+    if (it != indexConstantMap.end())
+      return it->second;
     auto constantValue = OpBuilder(rootOp).createOrFold<arith::ConstantIndexOp>(
         rootOp->getLoc(), value);
     indexConstantMap.insert(std::make_pair(value, constantValue));
@@ -356,8 +361,10 @@
   // Performs a memoized add (as many adds of offsets or lengths are redundant).
   Value add(Location loc, Value lhs, Value rhs) {
     // TODO(benvanik): memoize - if worth it. Needs profiling.
-    if (matchPattern(lhs, m_Zero())) return rhs;
-    if (matchPattern(rhs, m_Zero())) return lhs;
+    if (matchPattern(lhs, m_Zero()))
+      return rhs;
+    if (matchPattern(rhs, m_Zero()))
+      return lhs;
     auto result = OpBuilder(rootOp).createOrFold<arith::AddIOp>(loc, lhs, rhs);
     return result;
   }
@@ -366,7 +373,8 @@
   // All aliases of |resource| will also be mapped.
   void mapResourceRange(Value resource, ResourceRange resourceRange,
                         AsmState *asmState) {
-    if (resourceRangeMap.count(resource)) return;
+    if (resourceRangeMap.count(resource))
+      return;
 
     if (!resourceRange.offset && !resourceRange.length) {
       resourceRange.offset = lookupOrCreateIndex(0);
@@ -438,7 +446,7 @@
     }
   }
 
- private:
+private:
   Operation *rootOp;
 
   // All values that have aliases mapped to a set of all of the values they
@@ -453,9 +461,9 @@
   DenseMap<Value, ResourceRange> resourceRangeMap;
 };
 
-static LogicalResult applyResourceSubviewOp(
-    IREE::Stream::ResourceSubviewOp asyncOp, AllocationScope &scope,
-    OpBuilder builder) {
+static LogicalResult
+applyResourceSubviewOp(IREE::Stream::ResourceSubviewOp asyncOp,
+                       AllocationScope &scope, OpBuilder builder) {
   // Allocation should have taken care of this by propagating the range.
   // By the time we walk to this op there should be no more users.
   asyncOp.erase();
@@ -565,9 +573,9 @@
   return success();
 }
 
-static LogicalResult applyAsyncCollectiveOp(
-    IREE::Stream::AsyncCollectiveOp asyncOp, AllocationScope &scope,
-    OpBuilder builder) {
+static LogicalResult
+applyAsyncCollectiveOp(IREE::Stream::AsyncCollectiveOp asyncOp,
+                       AllocationScope &scope, OpBuilder builder) {
   SmallVector<Value> newResources;
   SmallVector<Value> newResourceSizes;
   SmallVector<Value> newResourceOffsets;
@@ -734,11 +742,11 @@
   for (auto [i, oldInput] : llvm::enumerate(oldFunctionType.getInputs())) {
     auto oldArgAttr = asyncOp.getArgAttrDict(i);
     if (llvm::isa<IREE::Stream::ResourceType>(oldInput)) {
-      newInputs.push_back(oldInput);  // resource
+      newInputs.push_back(oldInput); // resource
       newArgAttrs.push_back(oldArgAttr);
-      newInputs.push_back(indexType);  // offset
+      newInputs.push_back(indexType); // offset
       newArgAttrs.push_back(nullptr);
-      newInputs.push_back(indexType);  // length
+      newInputs.push_back(indexType); // length
       newArgAttrs.push_back(nullptr);
     } else {
       newInputs.push_back(oldInput);
@@ -755,11 +763,11 @@
         // Tied results reuse the operands they are tied to.
         continue;
       }
-      newInputs.push_back(oldResult);  // resource
+      newInputs.push_back(oldResult); // resource
       newArgAttrs.push_back(oldResultAttr);
-      newInputs.push_back(indexType);  // offset
+      newInputs.push_back(indexType); // offset
       newArgAttrs.push_back(nullptr);
-      newInputs.push_back(indexType);  // length
+      newInputs.push_back(indexType); // length
       newArgAttrs.push_back(nullptr);
     } else {
       newResults.push_back(oldResult);
@@ -857,9 +865,9 @@
 static LogicalResult applyAsyncAllocations(Region &region,
                                            AllocationScope &scope);
 
-static LogicalResult applyAsyncConcurrentOp(
-    IREE::Stream::AsyncConcurrentOp asyncOp, AllocationScope &scope,
-    OpBuilder builder) {
+static LogicalResult
+applyAsyncConcurrentOp(IREE::Stream::AsyncConcurrentOp asyncOp,
+                       AllocationScope &scope, OpBuilder builder) {
   // Remove operands from the yield now that we aren't returning anything.
   // Must do this before we recurse so that the ops we are transforming have no
   // uses.
@@ -897,7 +905,8 @@
   auto ops = llvm::map_to_vector(llvm::reverse(block),
                                  [&](Operation &op) { return &op; });
   for (auto *op : ops) {
-    if (op->hasTrait<OpTrait::IsTerminator>()) continue;
+    if (op->hasTrait<OpTrait::IsTerminator>())
+      continue;
     if (failed(TypeSwitch<Operation *, LogicalResult>(op)
                    .Case([&](IREE::Stream::ResourceSubviewOp op) {
                      return applyResourceSubviewOp(op, scope, OpBuilder(op));
@@ -969,9 +978,9 @@
 // Performs allocation for all local transients in the execution region (those
 // !stream.resource<transient> values that don't escape). A new allocation op
 // will be inserted using |externalBuilder| and mappings added to |scope|.
-static std::optional<TransientAllocation> allocateLocalTransients(
-    IREE::Stream::AsyncExecuteOp executeOp, AllocationScope &scope,
-    OpBuilder &externalBuilder) {
+static std::optional<TransientAllocation>
+allocateLocalTransients(IREE::Stream::AsyncExecuteOp executeOp,
+                        AllocationScope &scope, OpBuilder &externalBuilder) {
   // Track which values we've already reserved. This makes it easier to early-
   // exit on aliased values.
   SmallPtrSet<Value, 16> coveredValues;
@@ -988,7 +997,8 @@
     assert(value && "must have value for interval");
     auto valueType =
         llvm::dyn_cast<IREE::Stream::ResourceType>(value.getType());
-    if (!valueType) continue;
+    if (!valueType)
+      continue;
 
     // Only handle transient buffers (created/used/dropped within the stream).
     if (valueInterval.start == LIVE_IN || valueInterval.end == LIVE_OUT) {
@@ -1078,7 +1088,8 @@
 // Returns true if |value| has one use and it is a stream.yield op.
 static bool isOnlyUseYield(Value value) {
   for (auto *user : value.getUsers()) {
-    if (!isa<IREE::Stream::YieldOp>(user)) return false;
+    if (!isa<IREE::Stream::YieldOp>(user))
+      return false;
   }
   return true;
 }
@@ -1086,12 +1097,14 @@
 // Extracts stream.async.constant ops from |executeOp| into their own dedicated
 // stream.resource.constants upload op. The uploaded constants will be captured
 // by the region for use within as if they had still existed in there.
-static std::optional<ConstantAllocation> extractConstants(
-    IREE::Stream::AsyncExecuteOp executeOp, OpBuilder &externalBuilder) {
+static std::optional<ConstantAllocation>
+extractConstants(IREE::Stream::AsyncExecuteOp executeOp,
+                 OpBuilder &externalBuilder) {
   // Gather all constant ops from the region, if any.
   auto constantOps =
       llvm::to_vector(executeOp.getOps<IREE::Stream::AsyncConstantOp>());
-  if (constantOps.empty()) return std::nullopt;
+  if (constantOps.empty())
+    return std::nullopt;
 
   // Allocate a new constant upload op and insert a subview for each constant.
   SmallVector<Location> locs;
@@ -1172,8 +1185,8 @@
 
 // Produces parameters for one or more result allocations composed of an ordered
 // set of |reservations| with matching lifetimes.
-static ResultAllocation reserveResultAllocation(
-    ArrayRef<ResultReservation> reservations) {
+static ResultAllocation
+reserveResultAllocation(ArrayRef<ResultReservation> reservations) {
   // We want deterministic ordering of the allocations for each lifetime type
   // so we build them all here and then just nuke the ones we don't end up
   // using.
@@ -1235,8 +1248,8 @@
 
 // Performs allocation for all results and local region transients of the given
 // |executeOp| region. IR will be inserted around the op in its parent block.
-static LogicalResult allocateExecutionRegion(
-    IREE::Stream::AsyncExecuteOp executeOp) {
+static LogicalResult
+allocateExecutionRegion(IREE::Stream::AsyncExecuteOp executeOp) {
   LLVM_DEBUG(llvm::dbgs() << "[[ Allocating execution region ]]\n");
 
   AllocationScope scope(executeOp);
@@ -1330,7 +1343,8 @@
     // Replace results of escaping uploads with the upload values.
     for (auto &reservation : constantAllocation->reservations) {
       auto result = findTiedYieldResult(reservation.constantOp.getResult());
-      if (!result) continue;
+      if (!result)
+        continue;
       result.replaceAllUsesWith(reservation.resource);
       handledResults.insert(result);
       LLVM_DEBUG({
@@ -1536,7 +1550,8 @@
   executeOp.getResultTimepoint().replaceAllUsesWith(
       newExecuteOp.getResultTimepoint());
   for (auto replacement : resultReplacements) {
-    if (!replacement.second) continue;  // handled already
+    if (!replacement.second)
+      continue; // handled already
     replacement.first.replaceAllUsesWith(replacement.second);
   }
   scope.replaceRootOp(newExecuteOp);
@@ -1653,7 +1668,7 @@
 
 class ScheduleAllocationPass
     : public ScheduleAllocationBase<ScheduleAllocationPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<mlir::func::FuncDialect>();
     registry.insert<mlir::arith::ArithDialect>();
@@ -1693,13 +1708,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>> createScheduleAllocationPass() {
   return std::make_unique<ScheduleAllocationPass>();
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/ScheduleConcurrency.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/ScheduleConcurrency.cpp
index 6fce209..f4cd556 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/ScheduleConcurrency.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/ScheduleConcurrency.cpp

@@ -55,12 +55,14 @@
     Operation *insertionPt = nullptr;
     for (auto in : partition->ins) {
       auto *definingOp = in.getDefiningOp();
-      if (!definingOp) continue;
-      if (definingOp->getBlock() != parentBlock) continue;
+      if (!definingOp)
+        continue;
+      if (definingOp->getBlock() != parentBlock)
+        continue;
       if (!insertionPt) {
-        insertionPt = definingOp;  // first defining op
+        insertionPt = definingOp; // first defining op
       } else if (insertionPt->isBeforeInBlock(definingOp)) {
-        insertionPt = definingOp;  // moving insertion point down
+        insertionPt = definingOp; // moving insertion point down
       }
     }
     OpBuilder parentBuilder(context);
@@ -82,7 +84,8 @@
       resultTypes.push_back(out.getType());
       auto resultSize = IREE::Util::SizeAwareTypeInterface::queryValueSize(
           fusedLoc, out, parentBuilder);
-      if (resultSize) resultSizes.push_back(resultSize);
+      if (resultSize)
+        resultSizes.push_back(resultSize);
     }
     SmallVector<Value> operands;
     SmallVector<Type> operandTypes;
@@ -91,12 +94,14 @@
     operandTypes.reserve(partition->ins.size());
     operandSizes.reserve(partition->ins.size());
     for (auto in : partition->ins) {
-      if (!llvm::isa<IREE::Stream::ResourceType>(in.getType())) continue;
+      if (!llvm::isa<IREE::Stream::ResourceType>(in.getType()))
+        continue;
       operands.push_back(in);
       operandTypes.push_back(in.getType());
       auto operandSize = IREE::Util::SizeAwareTypeInterface::queryValueSize(
           fusedLoc, in, parentBuilder);
-      if (operandSize) operandSizes.push_back(operandSize);
+      if (operandSize)
+        operandSizes.push_back(operandSize);
     }
 
     // TODO(benvanik): tie operands, or leave to canonicalization.
@@ -130,7 +135,8 @@
   //
   // Returns true if the operation was cloned into the partition.
   bool visit(Operation *op) {
-    if (!partition->ops.contains(op)) return false;
+    if (!partition->ops.contains(op))
+      return false;
 
     // Clone the op into the partition and remap it.
     auto *clonedOp = builder.clone(*op, mapping);
@@ -154,7 +160,8 @@
       results.push_back(newResult);
       auto resultSize = IREE::Util::SizeAwareTypeInterface::queryValueSize(
           concurrentOp.getLoc(), newResult, builder);
-      if (resultSize) resultSizes.push_back(resultSize);
+      if (resultSize)
+        resultSizes.push_back(resultSize);
     }
     builder.create<IREE::Stream::YieldOp>(concurrentOp.getLoc(), results,
                                           resultSizes);
@@ -169,7 +176,7 @@
 
 class ScheduleConcurrencyPass
     : public ScheduleConcurrencyBase<ScheduleConcurrencyPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<IREE::Stream::StreamDialect>();
     registry.insert<IREE::Util::UtilDialect>();
@@ -183,7 +190,8 @@
     }
     for (auto executeOp :
          parentOp.getCallableRegion()->getOps<IREE::Stream::AsyncExecuteOp>()) {
-      if (failed(runOnRegion(executeOp))) return signalPassFailure();
+      if (failed(runOnRegion(executeOp)))
+        return signalPassFailure();
     }
   }
 
@@ -199,8 +207,10 @@
     // Compute a set of partitions covering all of the streamable ops in the
     // execution region.
     auto waveSet = partitionRegionConcurrency(configAttr, block);
-    if (waveSet.empty()) return success();
-    if (failed(waveSet.verify(parentOp.getLoc()))) return failure();
+    if (waveSet.empty())
+      return success();
+    if (failed(waveSet.verify(parentOp.getLoc())))
+      return failure();
 
     // Create partition builders for each partition.
     // We'll clone ops into each and insert them into the block at the
@@ -209,7 +219,8 @@
     SmallVector<WavePartitionBuilder> partitionBuilders;
     partitionBuilders.reserve(waveSet.size());
     for (auto partition : llvm::enumerate(waveSet.partitions)) {
-      if (partition.value().ops.size() == 1) continue;
+      if (partition.value().ops.size() == 1)
+        continue;
       partitionBuilders.push_back(WavePartitionBuilder(block, partition.index(),
                                                        &partition.value(),
                                                        mapping, &getContext()));
@@ -222,7 +233,8 @@
     // creates a lot of new IR (up to O(op*partitions)).
     SetVector<Operation *> deadOps;
     for (auto &op : *block) {
-      if (op.hasTrait<OpTrait::IsTerminator>()) continue;
+      if (op.hasTrait<OpTrait::IsTerminator>())
+        continue;
       bool handled = false;
       for (auto &partitionBuilder : partitionBuilders) {
         handled = partitionBuilder.visit(&op) || handled;
@@ -260,14 +272,14 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<InterfacePass<CallableOpInterface>>
 createScheduleConcurrencyPass() {
   return std::make_unique<ScheduleConcurrencyPass>();
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/ScheduleExecution.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/ScheduleExecution.cpp
index 73c9b2b..46f4002 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/ScheduleExecution.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/ScheduleExecution.cpp

@@ -56,11 +56,12 @@
     // This is at the last op in the partition.
     Operation *insertionPt = nullptr;
     for (auto *op : partition->ops) {
-      if (op->getBlock() != parentBlock) continue;
+      if (op->getBlock() != parentBlock)
+        continue;
       if (!insertionPt) {
-        insertionPt = op;  // first defining op
+        insertionPt = op; // first defining op
       } else if (insertionPt->isBeforeInBlock(op)) {
-        insertionPt = op;  // moving insertion point down
+        insertionPt = op; // moving insertion point down
       }
     }
     OpBuilder parentBuilder(context);
@@ -82,7 +83,8 @@
       resultTypes.push_back(out.getType());
       auto resultSize = IREE::Util::SizeAwareTypeInterface::queryValueSize(
           fusedLoc, out, parentBuilder);
-      if (resultSize) resultSizes.push_back(resultSize);
+      if (resultSize)
+        resultSizes.push_back(resultSize);
     }
     SmallVector<Value> operands;
     SmallVector<Type> operandTypes;
@@ -91,12 +93,14 @@
     operandTypes.reserve(partition->ins.size());
     operandSizes.reserve(partition->ins.size());
     for (auto in : partition->ins) {
-      if (!llvm::isa<IREE::Stream::ResourceType>(in.getType())) continue;
+      if (!llvm::isa<IREE::Stream::ResourceType>(in.getType()))
+        continue;
       operands.push_back(in);
       operandTypes.push_back(in.getType());
       auto operandSize = IREE::Util::SizeAwareTypeInterface::queryValueSize(
           fusedLoc, in, parentBuilder);
-      if (operandSize) operandSizes.push_back(operandSize);
+      if (operandSize)
+        operandSizes.push_back(operandSize);
     }
 
     // TODO(benvanik): tie operands, or leave to canonicalization.
@@ -132,7 +136,8 @@
   //
   // Returns true if the operation was cloned into the partition.
   bool visit(Operation *op) {
-    if (!partition->ops.contains(op)) return false;
+    if (!partition->ops.contains(op))
+      return false;
 
     // Clone the op into the partition and remap it.
     auto *clonedOp = builder.clone(*op, mapping);
@@ -166,7 +171,8 @@
       results.push_back(newResult);
       auto resultSize = IREE::Util::SizeAwareTypeInterface::queryValueSize(
           executeOp.getLoc(), newResult, builder);
-      if (resultSize) resultSizes.push_back(resultSize);
+      if (resultSize)
+        resultSizes.push_back(resultSize);
     }
     builder.create<IREE::Stream::YieldOp>(executeOp.getLoc(), results,
                                           resultSizes);
@@ -196,7 +202,8 @@
   }
   llvm::SmallSetVector<Block *, 8> markedBlocks;
   std::function<void(Block *)> visit = [&](Block *block) {
-    if (markedBlocks.count(block) > 0) return;
+    if (markedBlocks.count(block) > 0)
+      return;
     for (auto *childBlock : dominanceInfo.getNode(block)->children()) {
       visit(childBlock->getBlock());
     }
@@ -212,7 +219,7 @@
 
 class ScheduleExecutionPass
     : public ScheduleExecutionBase<ScheduleExecutionPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<IREE::Stream::StreamDialect>();
     registry.insert<IREE::Util::UtilDialect>();
@@ -239,7 +246,8 @@
       // Compute a set of partitions covering all of the streamable ops in the
       // block.
       auto partitionSet = partitionStreamableOps(configAttr, block);
-      if (partitionSet.empty()) continue;
+      if (partitionSet.empty())
+        continue;
       if (failed(partitionSet.verify(parentOp.getLoc()))) {
         return signalPassFailure();
       }
@@ -262,7 +270,8 @@
       // creates a lot of new IR (up to O(op*partitions)).
       SetVector<Operation *> deadOps;
       for (auto &op : *block) {
-        if (op.hasTrait<OpTrait::IsTerminator>()) continue;
+        if (op.hasTrait<OpTrait::IsTerminator>())
+          continue;
         for (auto &partitionBuilder : partitionBuilders) {
           partitionBuilder.visit(&op);
         }
@@ -327,14 +336,14 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<InterfacePass<CallableOpInterface>>
 createScheduleExecutionPass() {
   return std::make_unique<ScheduleExecutionPass>();
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/SpecializeDispatches.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/SpecializeDispatches.cpp
index 4d66449..97cddb0 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/SpecializeDispatches.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/SpecializeDispatches.cpp

@@ -54,9 +54,9 @@
 // Builds a constant table composed of unique per-dispatch constant values.
 // Each dispatch gets a row in the table that can be selected based on the
 // dispatch ordinal.
-static ConstantTable buildConstantTable(
-    mlir::func::FuncOp funcOp,
-    SmallVector<IREE::Stream::CmdDispatchOp> &dispatchOps) {
+static ConstantTable
+buildConstantTable(mlir::func::FuncOp funcOp,
+                   SmallVector<IREE::Stream::CmdDispatchOp> &dispatchOps) {
   auto anyDispatchOp = dispatchOps.front();
   unsigned operandCount = anyDispatchOp.getUniformOperands().size();
 
@@ -64,7 +64,8 @@
   llvm::BitVector constantOperandMap(operandCount, /*t=*/true);
   for (auto dispatchOp : dispatchOps) {
     for (unsigned idx = 0; idx < operandCount; ++idx) {
-      if (!constantOperandMap.test(idx)) continue;
+      if (!constantOperandMap.test(idx))
+        continue;
       auto value = dispatchOp.getUniformOperands()[idx];
       Attribute constantValue;
       if (!matchPattern(value, m_Constant(&constantValue))) {
@@ -85,7 +86,8 @@
   DenseMap<Type, ConstantSet> typeSets;
   SmallVector<Type> typeOrder;
   for (unsigned idx = 0; idx < operandCount; ++idx) {
-    if (!constantOperandMap.test(idx)) continue;
+    if (!constantOperandMap.test(idx))
+      continue;
     auto operandType = anyDispatchOp.getUniformOperands()[idx].getType();
     auto &set = typeSets[operandType];
     if (!set.type) {
@@ -280,18 +282,20 @@
 // Since we've already deduplicated things we (in theory) don't have to worry
 // about introducing divergence. There's potential for later deduping to happen
 // while performing a second round of specialization per-backend.
-static void specializeDispatches(
-    IREE::Stream::ExecutableOp executableOp,
-    IREE::Stream::ExecutableExportOp exportOp,
-    SmallVector<IREE::Stream::CmdDispatchOp> &dispatchOps,
-    MemoizedCmdConstants &memoizedConstants) {
-  if (dispatchOps.empty()) return;  // no-op if no dispatches
+static void
+specializeDispatches(IREE::Stream::ExecutableOp executableOp,
+                     IREE::Stream::ExecutableExportOp exportOp,
+                     SmallVector<IREE::Stream::CmdDispatchOp> &dispatchOps,
+                     MemoizedCmdConstants &memoizedConstants) {
+  if (dispatchOps.empty())
+    return; // no-op if no dispatches
 
   auto funcOp = exportOp.lookupFunctionRef();
 
   // Build a constant table for unique per-dispatch constant values.
   auto constantTable = buildConstantTable(funcOp, dispatchOps);
-  if (constantTable.coveredOperands.none()) return;
+  if (constantTable.coveredOperands.none())
+    return;
 
   LLVM_DEBUG({
     AsmState asmState(executableOp->getParentOp());
@@ -327,7 +331,7 @@
 
 class SpecializeDispatchesPass
     : public SpecializeDispatchesBase<SpecializeDispatchesPass> {
- public:
+public:
   SpecializeDispatchesPass() = default;
 
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -363,14 +367,14 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>>
 createSpecializeDispatchesPass() {
   return std::make_unique<SpecializeDispatchesPass>();
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/VerifyAsyncAccessRanges.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/VerifyAsyncAccessRanges.cpp
index f447ed9..bc94f5a 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/VerifyAsyncAccessRanges.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/VerifyAsyncAccessRanges.cpp

@@ -24,15 +24,17 @@
 namespace {
 
 static std::optional<int64_t> matchConstant(Value value) {
-  if (!value) return std::nullopt;
+  if (!value)
+    return std::nullopt;
   APInt constant;
-  if (!matchPattern(value, m_ConstantInt(&constant))) return std::nullopt;
+  if (!matchPattern(value, m_ConstantInt(&constant)))
+    return std::nullopt;
   return constant.getSExtValue();
 }
 
-static LogicalResult verifyAsyncAccessRange(
-    IREE::Stream::AsyncAccessOpInterface accessOp,
-    IREE::Stream::AsyncAccessRange &range) {
+static LogicalResult
+verifyAsyncAccessRange(IREE::Stream::AsyncAccessOpInterface accessOp,
+                       IREE::Stream::AsyncAccessRange &range) {
   auto start = matchConstant(range.start);
   auto length = matchConstant(range.length);
   auto end = matchConstant(range.end);
@@ -94,8 +96,8 @@
 
 // Statically verifies that the ranges used by |accessOp| are in bounds.
 // Emits errors for all ranges declared on the op that are invalid.
-static LogicalResult verifyAsyncAccessOp(
-    IREE::Stream::AsyncAccessOpInterface accessOp) {
+static LogicalResult
+verifyAsyncAccessOp(IREE::Stream::AsyncAccessOpInterface accessOp) {
   SmallVector<AsyncAccessRange> ranges;
   accessOp.getAsyncAccessRanges(ranges);
   bool allSucceeded = true;
@@ -109,7 +111,7 @@
 
 class VerifyAsyncAccessRangesPass
     : public VerifyAsyncAccessRangesBase<VerifyAsyncAccessRangesPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<IREE::Stream::StreamDialect>();
   }
@@ -130,14 +132,14 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>>
 createVerifyAsyncAccessRangesPass() {
   return std::make_unique<VerifyAsyncAccessRangesPass>();
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/VerifyLowerings.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/VerifyLowerings.cpp
index 7004495..99ad8a2 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/VerifyLowerings.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/VerifyLowerings.cpp

@@ -31,7 +31,7 @@
 //===----------------------------------------------------------------------===//
 
 class Verifier {
- public:
+public:
   enum class Legality {
     LEGAL,
     RECURSIVELY_LEGAL,
@@ -103,29 +103,31 @@
       // Check for op legality - can skip the expensive work if known-illegal.
       auto legality = getOpLegality(op);
       switch (legality) {
-        case Legality::LEGAL:
-          // Op itself is legal but may not have valid operands/results.
-          break;
-        case Legality::RECURSIVELY_LEGAL:
-          // If the entire op w/ nested ops is legal then skip.
-          return WalkResult::skip();
-        default:
-        case Legality::ILLEGAL:
-          // Early-exit on illegal ops without recursing.
-          emitIllegalOpError(op);
-          foundAnyIllegal = true;
-          return WalkResult::skip();
+      case Legality::LEGAL:
+        // Op itself is legal but may not have valid operands/results.
+        break;
+      case Legality::RECURSIVELY_LEGAL:
+        // If the entire op w/ nested ops is legal then skip.
+        return WalkResult::skip();
+      default:
+      case Legality::ILLEGAL:
+        // Early-exit on illegal ops without recursing.
+        emitIllegalOpError(op);
+        foundAnyIllegal = true;
+        return WalkResult::skip();
       }
 
       // Check types for operands/results.
       for (auto operandType : llvm::enumerate(op->getOperandTypes())) {
-        if (isTypeLegal(operandType.value())) continue;
+        if (isTypeLegal(operandType.value()))
+          continue;
         emitIllegalTypeError(op, "operand", operandType.index(),
                              operandType.value());
         foundAnyIllegal = true;
       }
       for (auto resultType : llvm::enumerate(op->getResultTypes())) {
-        if (isTypeLegal(resultType.value())) continue;
+        if (isTypeLegal(resultType.value()))
+          continue;
         emitIllegalTypeError(op, "result", resultType.index(),
                              resultType.value());
         foundAnyIllegal = true;
@@ -136,7 +138,7 @@
     return success(!foundAnyIllegal);
   }
 
- private:
+private:
   Legality getOpLegality(Operation *op) {
     auto opName = op->getName();
 
@@ -251,7 +253,7 @@
 namespace {
 
 class VerifyInputPass : public VerifyInputBase<VerifyInputPass> {
- public:
+public:
   VerifyInputPass() = default;
 
   void runOnOperation() override {
@@ -269,7 +271,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>> createVerifyInputPass() {
   return std::make_unique<VerifyInputPass>();
@@ -298,7 +300,7 @@
 
 class VerifyLoweringToTensorsPass
     : public VerifyLoweringToTensorsBase<VerifyLoweringToTensorsPass> {
- public:
+public:
   VerifyLoweringToTensorsPass() = default;
 
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -319,7 +321,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>>
 createVerifyLoweringToTensorsPass() {
@@ -334,7 +336,7 @@
 
 class VerifyLoweringToAsyncPass
     : public VerifyLoweringToAsyncBase<VerifyLoweringToAsyncPass> {
- public:
+public:
   VerifyLoweringToAsyncPass() = default;
 
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -368,7 +370,8 @@
           }
 
           // Allow metadata ops outside of execution regions.
-          if (op.isMetadata()) return Verifier::Legality::LEGAL;
+          if (op.isMetadata())
+            return Verifier::Legality::LEGAL;
 
           // TODO(benvanik): execution region interface to make this generic.
           if (!op->template getParentOfType<IREE::Stream::AsyncExecuteOp>()) {
@@ -385,7 +388,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>>
 createVerifyLoweringToAsyncPass() {
@@ -400,7 +403,7 @@
 
 class VerifyLoweringToCmdPass
     : public VerifyLoweringToCmdBase<VerifyLoweringToCmdPass> {
- public:
+public:
   VerifyLoweringToCmdPass() = default;
 
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -429,13 +432,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>> createVerifyLoweringToCmdPass() {
   return std::make_unique<VerifyLoweringToCmdPass>();
 }
 
-}  // namespace Stream
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Stream
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Analysis/Attributes/PotentialValues.cpp b/compiler/src/iree/compiler/Dialect/Util/Analysis/Attributes/PotentialValues.cpp
index dd1099d..318fd8a 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Analysis/Attributes/PotentialValues.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Analysis/Attributes/PotentialValues.cpp

@@ -76,7 +76,7 @@
   return str;
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Analysis/Attributes/PotentialValues.h b/compiler/src/iree/compiler/Dialect/Util/Analysis/Attributes/PotentialValues.h
index e87d27a..8b82be4 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Analysis/Attributes/PotentialValues.h
+++ b/compiler/src/iree/compiler/Dialect/Util/Analysis/Attributes/PotentialValues.h

@@ -26,7 +26,7 @@
 class ConstantAttributePVS
     : public DFX::StateWrapper<DFX::PotentialValuesState<Attribute>,
                                DFX::ValueElement> {
- public:
+public:
   using BaseType = DFX::StateWrapper<DFX::PotentialValuesState<Attribute>,
                                      DFX::ValueElement>;
   using BaseType::BaseType;
@@ -46,14 +46,14 @@
 
   const std::string getAsStr(AsmState &asmState) const override;
 
- private:
+private:
   void initializeValue(Value value, DFX::Solver &solver) override;
   ChangeStatus updateValue(Value value, DFX::Solver &solver) override;
 };
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_IREE_UTIL_ANALYSIS_ATTRIBUTES_POTENTIAL_VALUES_H_
+#endif // IREE_COMPILER_DIALECT_IREE_UTIL_ANALYSIS_ATTRIBUTES_POTENTIAL_VALUES_H_

diff --git a/compiler/src/iree/compiler/Dialect/Util/Analysis/Attributes/Range.cpp b/compiler/src/iree/compiler/Dialect/Util/Analysis/Attributes/Range.cpp
index fc0614e..2582214 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Analysis/Attributes/Range.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Analysis/Attributes/Range.cpp

@@ -9,7 +9,7 @@
 #include "iree/compiler/Dialect/Util/Analysis/Attributes/PotentialValues.h"
 #include "llvm/Support/Debug.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
-#include "mlir/Dialect/Linalg/IR/Linalg.h"  // TODO: Remove
+#include "mlir/Dialect/Linalg/IR/Linalg.h" // TODO: Remove
 #include "mlir/Dialect/Math/IR/Math.h"
 
 #define DEBUG_TYPE "iree-util-attributes"
@@ -46,19 +46,20 @@
 }
 
 std::string FloatRangeStats::getAsStr(AsmState &asmState) const {
-  if (!valid) return std::string("<<INVALID>>");
+  if (!valid)
+    return std::string("<<INVALID>>");
   std::string s("[");
   s += std::to_string(minValue);
   s += ", ";
   s += std::to_string(maxValue);
   s += ", ";
   switch (truncationFlag) {
-    case TRUNC_UNKNOWN:
-      s += "!trunc";
-      break;
-    case TRUNC:
-      s += "TRUNC";
-      break;
+  case TRUNC_UNKNOWN:
+    s += "!trunc";
+    break;
+  case TRUNC:
+    s += "TRUNC";
+    break;
   }
   s += "]";
   return s;
@@ -262,7 +263,7 @@
   return s;
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Analysis/Attributes/Range.h b/compiler/src/iree/compiler/Dialect/Util/Analysis/Attributes/Range.h
index bd32c35..37ed030 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Analysis/Attributes/Range.h
+++ b/compiler/src/iree/compiler/Dialect/Util/Analysis/Attributes/Range.h

@@ -139,7 +139,7 @@
     assumed += rhsAssumed;
   }
 
- private:
+private:
   FloatRangeStats assumed = FloatRangeStats::getInvalid();
   FloatRangeStats known = FloatRangeStats::getWidest();
 };
@@ -147,7 +147,7 @@
 // Attribute known floating point range and flags to an IR Value.
 class FloatRangeValueElement
     : public DFX::StateWrapper<FloatRangeState, DFX::ValueElement> {
- public:
+public:
   using BaseType = DFX::StateWrapper<FloatRangeState, DFX::ValueElement>;
   using BaseType::BaseType;
 
@@ -167,12 +167,12 @@
   }
   const std::string getAsStr(AsmState &asmState) const override;
 
- private:
+private:
   void initializeValue(Value value, DFX::Solver &solver) override;
   ChangeStatus updateValue(Value value, DFX::Solver &solver) override;
 };
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Analysis/Constant/ConstExpr.cpp b/compiler/src/iree/compiler/Dialect/Util/Analysis/Constant/ConstExpr.cpp
index a24486f..8c49e80 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Analysis/Constant/ConstExpr.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Analysis/Constant/ConstExpr.cpp

@@ -28,12 +28,13 @@
 namespace {
 OpOperand *findOperandFor(Operation *op, Value input) {
   for (OpOperand &operand : op->getOpOperands()) {
-    if (operand.get() == input) return &operand;
+    if (operand.get() == input)
+      return &operand;
   }
   return nullptr;
 }
 
-}  // namespace
+} // namespace
 
 bool ConstExprAnalysis::ConstValueInfo::hasNonAnalyzedConsumer() const {
   // The analysis cannot represent zero-result operations, so detect that
@@ -53,11 +54,14 @@
   // Populate the constant roots for globals.
   explorer.forEachGlobal([&](const Explorer::GlobalInfo *info) {
     // Rely on globals having been canonicalized to immutable correctly.
-    if (info->op.isGlobalMutable()) return;
-    if (info->isIndirect) return;
+    if (info->op.isGlobalMutable())
+      return;
+    if (info->isIndirect)
+      return;
     for (auto *use : info->uses) {
       auto loadOp = llvm::dyn_cast<GlobalLoadOp>(use);
-      if (!loadOp) continue;
+      if (!loadOp)
+        continue;
       constantRoots[loadOp.getResult()] = loadOp;
     }
   });
@@ -98,7 +102,8 @@
     iterWorklist.clear();
     iterWorklist.swap(worklist);
     for (ConstValueInfo *info : iterWorklist) {
-      if (info->state != ConstValueInfo::UNKNOWN) continue;
+      if (info->state != ConstValueInfo::UNKNOWN)
+        continue;
       bool allConstants = true;
       for (ConstValueInfo *producerInfo : info->producers) {
         if (producerInfo->state == ConstValueInfo::UNKNOWN) {
@@ -151,8 +156,8 @@
   }
 }
 
-ConstExprAnalysis::ConstValueInfo *ConstExprAnalysis::addInfo(
-    Value constValue) {
+ConstExprAnalysis::ConstValueInfo *
+ConstExprAnalysis::addInfo(Value constValue) {
   auto info = std::make_unique<ConstValueInfo>(constValue);
   constInfoMap[constValue] = info.get();
   allocedConstInfos.push_back(std::move(info));
@@ -163,7 +168,8 @@
   ConstExprOpInfo opInfo = ConstExprOpInfo::getForOp(op);
   for (auto result : op->getResults()) {
     auto foundIt = constInfoMap.find(result);
-    if (foundIt != constInfoMap.end()) continue;
+    if (foundIt != constInfoMap.end())
+      continue;
 
     // Generate new info record.
     auto *valueInfo = addInfo(result);
@@ -198,7 +204,8 @@
 void ConstExprAnalysis::print(raw_ostream &os) const {
   os << "\nFOUND CONSTANTS:\n----------------\n";
   for (auto &info : allocedConstInfos) {
-    if (info->state != ConstValueInfo::CONSTANT || info->isRoot) continue;
+    if (info->state != ConstValueInfo::CONSTANT || info->isRoot)
+      continue;
     if (!info->roots.empty()) {
       os << "\n::" << info->constValue << "\n";
       os << "    WITH ROOTS:\n";
@@ -260,7 +267,8 @@
     bool madeChange = false;
     for (auto *info : worklist) {
       Decision *decision = getDecision(info);
-      if (decision->getOutcome() != UNDECIDED) continue;
+      if (decision->getOutcome() != UNDECIDED)
+        continue;
       makeDecision(info, decision);
 
       if (decision->getOutcome() != UNDECIDED) {
@@ -321,7 +329,8 @@
   if (!hasLegalEscape) {
     for (auto *consumerInfo : info->consumers) {
       Decision *consumerDecision = getDecision(consumerInfo);
-      if (consumerDecision->getOutcome() != DISABLE_HOIST) continue;
+      if (consumerDecision->getOutcome() != DISABLE_HOIST)
+        continue;
 
       Operation *consumerOp = consumerInfo->getOperation();
       OpOperand *consumerOperand = findOperandFor(consumerOp, info->constValue);
@@ -346,7 +355,7 @@
   decision->enableHoist();
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Analysis/Constant/ConstExpr.h b/compiler/src/iree/compiler/Dialect/Util/Analysis/Constant/ConstExpr.h
index 91ff33b..65d7ab8 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Analysis/Constant/ConstExpr.h
+++ b/compiler/src/iree/compiler/Dialect/Util/Analysis/Constant/ConstExpr.h

@@ -26,7 +26,7 @@
 //
 // Modifying any of the analyzed operations invalidates this analysis.
 class ConstExprAnalysis {
- public:
+public:
   struct ConstValueInfo;
   explicit ConstExprAnalysis(Operation *rootOp);
 
@@ -42,7 +42,8 @@
   // an operation's results will either all be const-expr or not, so we just
   // check the first. 0-result ops cannot be const-expr.
   const ConstValueInfo *lookup(Operation *queryOp) const {
-    if (queryOp->getNumResults() == 0) return nullptr;
+    if (queryOp->getNumResults() == 0)
+      return nullptr;
     return lookup(queryOp->getResult(0));
   }
 
@@ -51,7 +52,8 @@
   // existing constants returns false.
   bool isConstExprValue(Value queryValue) const {
     ConstValueInfo *found = constInfoMap.lookup(queryValue);
-    if (!found) return false;
+    if (!found)
+      return false;
     return found->state == ConstValueInfo::CONSTANT && !found->isRoot;
   }
 
@@ -59,7 +61,8 @@
   // an operation's results will either all be const-expr or not, so we just
   // check the first. 0-result ops cannot be const-expr.
   bool isConstExprOperation(Operation *queryOp) const {
-    if (queryOp->getNumResults() == 0) return false;
+    if (queryOp->getNumResults() == 0)
+      return false;
     return isConstExprValue(queryOp->getResult(0));
   }
 
@@ -124,7 +127,7 @@
     }
   };
 
- private:
+private:
   // Expands the frontier to include all results of a given op in an UNKNOWN
   // state. This also checks that all of its operands are known, adding
   // them recusrively if not.
@@ -159,7 +162,7 @@
 //
 // The default base class will hoist everything that is eligible.
 class ConstExprHoistingPolicy {
- public:
+public:
   using Worklist = llvm::SmallVector<const ConstExprAnalysis::ConstValueInfo *>;
   enum Outcome {
     UNDECIDED = 0,
@@ -167,7 +170,7 @@
     DISABLE_HOIST = 2,
   };
   class Decision {
-   public:
+  public:
     void disableHoist() {
       assert(outcome == UNDECIDED &&
              "can only disable hoisting of an undecided decision");
@@ -181,7 +184,7 @@
 
     Outcome getOutcome() const { return outcome; }
 
-   private:
+  private:
     Outcome outcome = UNDECIDED;
   };
 
@@ -191,7 +194,7 @@
     return &decisions[info];
   }
 
- private:
+private:
   // At initialization time, makes any fixed decisions. This hook can only
   // make decisions that do not depend on any const-exprs outside of what is
   // passed.
@@ -217,9 +220,9 @@
   return os;
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_IREE_UTIL_ANALYSIS_CONSTANT_CONST_EXPR_H_
+#endif // IREE_COMPILER_DIALECT_IREE_UTIL_ANALYSIS_CONSTANT_CONST_EXPR_H_

diff --git a/compiler/src/iree/compiler/Dialect/Util/Analysis/Constant/OpOracle.cpp b/compiler/src/iree/compiler/Dialect/Util/Analysis/Constant/OpOracle.cpp
index 1d7654f..195bced 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Analysis/Constant/OpOracle.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Analysis/Constant/OpOracle.cpp

@@ -48,7 +48,7 @@
   return info;
 }
 
-}  // namespace
+} // namespace
 
 void registerConstExprDependentDialects(DialectRegistry &registry) {
   registry.insert<IREE::Util::UtilDialect>();
@@ -158,7 +158,7 @@
   return true;
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Analysis/Constant/OpOracle.h b/compiler/src/iree/compiler/Dialect/Util/Analysis/Constant/OpOracle.h
index 343d04e..3c81f3b 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Analysis/Constant/OpOracle.h
+++ b/compiler/src/iree/compiler/Dialect/Util/Analysis/Constant/OpOracle.h

@@ -54,9 +54,9 @@
 // This is used to exclude certain operands that we never want in globals.
 bool isHoistableConstExprConsumingOperand(OpOperand *operand);
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_IREE_UTIL_ANALYSIS_CONSTANT_OP_ORACLE_H_
+#endif // IREE_COMPILER_DIALECT_IREE_UTIL_ANALYSIS_CONSTANT_OP_ORACLE_H_

diff --git a/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/DepGraph.cpp b/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/DepGraph.cpp
index 16ab385..33cb73f 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/DepGraph.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/DepGraph.cpp

@@ -29,7 +29,7 @@
   }
 };
 
-}  // end namespace llvm
+} // end namespace llvm
 
 namespace mlir {
 namespace iree_compiler {
@@ -51,11 +51,12 @@
 
   std::error_code ec;
   llvm::raw_fd_ostream file(filename, ec, llvm::sys::fs::OF_TextWithCRLF);
-  if (!ec) llvm::WriteGraph(file, this);
+  if (!ec)
+    llvm::WriteGraph(file, this);
 
   callTimes++;
 }
 
-}  // namespace DFX
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace DFX
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/DepGraph.h b/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/DepGraph.h
index cf2c526..d8471c6 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/DepGraph.h
+++ b/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/DepGraph.h

@@ -33,12 +33,12 @@
 
 // The data structure for the nodes of a dependency graph
 class DepGraphNode {
- public:
+public:
   using DepTy = llvm::PointerIntPair<DepGraphNode *, 1>;
 
   virtual ~DepGraphNode() = default;
 
- protected:
+protected:
   // Set of dependency graph nodes which should be updated if this one
   // is updated. The bit encodes if it is optional.
   TinyPtrVector<DepTy> deps;
@@ -50,7 +50,7 @@
 
   operator AbstractElement *() { return cast<AbstractElement>(this); }
 
- public:
+public:
   using iterator = llvm::mapped_iterator<TinyPtrVector<DepTy>::iterator,
                                          decltype(&DepGetVal)>;
   using aaiterator = llvm::mapped_iterator<TinyPtrVector<DepTy>::iterator,
@@ -99,9 +99,9 @@
   AsmState &asmState;
 };
 
-}  // namespace DFX
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace DFX
+} // namespace iree_compiler
+} // namespace mlir
 
 namespace llvm {
 
@@ -141,6 +141,6 @@
   static nodes_iterator nodes_end(DFXDepGraph *graph) { return graph->end(); }
 };
 
-}  // end namespace llvm
+} // end namespace llvm
 
-#endif  // IREE_COMPILER_DIALECT_UTIL_ANALYSIS_DFX_DEPGRAPH_H_
+#endif // IREE_COMPILER_DIALECT_UTIL_ANALYSIS_DFX_DEPGRAPH_H_

diff --git a/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/Element.cpp b/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/Element.cpp
index 13272c2..6b02035 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/Element.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/Element.cpp

@@ -17,7 +17,8 @@
 
 ChangeStatus AbstractElement::update(Solver &solver) {
   ChangeStatus changeStatus = ChangeStatus::UNCHANGED;
-  if (getState().isAtFixpoint()) return changeStatus;
+  if (getState().isAtFixpoint())
+    return changeStatus;
 
   LLVM_DEBUG({
     llvm::dbgs() << "[Solver] updating: ";
@@ -75,6 +76,6 @@
   print(llvm::dbgs(), asmState);
 }
 
-}  // namespace DFX
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace DFX
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/Element.h b/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/Element.h
index 281b1c0..ddf8f3a 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/Element.h
+++ b/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/Element.h

@@ -27,7 +27,7 @@
 // Each element represents some assumed and known knowledge anchored on a
 // specific position in the IR such as a Value or Operation.
 class AbstractElement : public Position, public DepGraphNode {
- public:
+public:
   using StateType = AbstractState;
 
   AbstractElement(const Position &pos) : Position(pos) {}
@@ -74,7 +74,7 @@
 
   friend class Solver;
 
- protected:
+protected:
   // Hook for the solver to trigger an update of the internal state.
   //
   // If this attribute is already fixed this method will return UNCHANGED,
@@ -145,7 +145,8 @@
   ChangeStatus updateImpl(Solver &solver) override {
     if (isOperation()) {
       auto op = dyn_cast<OpT>(getOperation());
-      if (op) return updateOperation(op, solver);
+      if (op)
+        return updateOperation(op, solver);
     }
     return getState().indicatePessimisticFixpoint();
   }
@@ -184,8 +185,8 @@
   virtual ChangeStatus updateValue(Value value, Solver &solver) = 0;
 };
 
-}  // namespace DFX
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace DFX
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_UTIL_ANALYSIS_DFX_ELEMENT_H_
+#endif // IREE_COMPILER_DIALECT_UTIL_ANALYSIS_DFX_ELEMENT_H_

diff --git a/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/Solver.cpp b/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/Solver.cpp
index 83311cb..b898a20 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/Solver.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/Solver.cpp

@@ -116,7 +116,8 @@
 
       // Use the invalidElements vector to propagate invalid states fast
       // transitively without requiring updates.
-      if (!elementState.isValidState()) invalidElements.insert(element);
+      if (!elementState.isValidState())
+        invalidElements.insert(element);
     }
 
     // Add elements to the changed set if they have been created in the last
@@ -142,7 +143,8 @@
   SmallPtrSet<AbstractElement *, 32> visitedElements;
   for (size_t i = 0; i < changedElements.size(); i++) {
     auto *changedElement = changedElements[i];
-    if (!visitedElements.insert(changedElement).second) continue;
+    if (!visitedElements.insert(changedElement).second)
+      continue;
 
     auto &elementState = changedElement->getState();
     if (!elementState.isAtFixpoint()) {
@@ -184,7 +186,8 @@
     // will not change and we can indicate that right away.
     elementState.indicateOptimisticFixpoint();
   }
-  if (!elementState.isAtFixpoint()) rememberDependences();
+  if (!elementState.isAtFixpoint())
+    rememberDependences();
 
   // Verify the stack is balanced by ensuring we pop the vector we pushed above.
   auto *poppedDependencies = dependenceStack.pop_back_val();
@@ -198,12 +201,15 @@
 void Solver::recordDependence(const AbstractElement &fromElement,
                               const AbstractElement &toElement,
                               Resolution resolution) {
-  if (resolution == Resolution::NONE) return;
+  if (resolution == Resolution::NONE)
+    return;
   // If we are outside of an update, thus before the actual fixpoint iteration
   // started (= when we create elements), we do not track dependences because we
   // will put all elements into the initial worklist anyway.
-  if (dependenceStack.empty()) return;
-  if (fromElement.getState().isAtFixpoint()) return;
+  if (dependenceStack.empty())
+    return;
+  if (fromElement.getState().isAtFixpoint())
+    return;
   dependenceStack.back()->push_back({&fromElement, &toElement, resolution});
 }
 
@@ -225,6 +231,6 @@
 
 void Solver::dumpGraph() { depGraph.dumpGraph(); }
 
-}  // namespace DFX
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace DFX
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/Solver.h b/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/Solver.h
index 9e7a7a9..7ae92a8 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/Solver.h
+++ b/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/Solver.h

@@ -44,14 +44,12 @@
 // while it is in-use. Modifying the IR invalidates the state and may lead to
 // crashes as pointer references into the IR structure are retained.
 class Solver {
- public:
+public:
   // Creates a solver that uses |explorer| for walking the IR tree and
   // |allocator| for transient allocations of abstract elements.
   explicit Solver(Explorer &explorer, llvm::BumpPtrAllocator &allocator)
-      : explorer(explorer),
-        asmState(explorer.getAsmState()),
-        allocator(allocator),
-        depGraph(explorer.getAsmState()) {}
+      : explorer(explorer), asmState(explorer.getAsmState()),
+        allocator(allocator), depGraph(explorer.getAsmState()) {}
   ~Solver();
 
   // Initialized explorer for walking the IR.
@@ -96,11 +94,10 @@
   //
   // NOTE: |forceUpdate| is ignored in any stage other than the update stage.
   template <typename ElementT>
-  const ElementT &getOrCreateElementFor(Position pos,
-                                        const AbstractElement *queryingElement,
-                                        Resolution resolution,
-                                        bool forceUpdate = false,
-                                        bool updateAfterInit = true) {
+  const ElementT &
+  getOrCreateElementFor(Position pos, const AbstractElement *queryingElement,
+                        Resolution resolution, bool forceUpdate = false,
+                        bool updateAfterInit = true) {
     if (auto *elementPtr =
             lookupElementFor<ElementT>(pos, queryingElement, resolution,
                                        /*allowInvalidState=*/true)) {
@@ -173,7 +170,8 @@
     // Lookup the abstract element of type ElementT and if found return it after
     // registering a dependence of queryingElement on the one returned element.
     auto *elementPtr = elementMap.lookup({&ElementT::ID, pos});
-    if (!elementPtr) return nullptr;
+    if (!elementPtr)
+      return nullptr;
     auto *element = static_cast<ElementT *>(elementPtr);
 
     // Do not register a dependence on an element with an invalid state.
@@ -245,7 +243,7 @@
   // Dumps a .dot of the constraint dependency graph to a file.
   void dumpGraph();
 
- protected:
+protected:
   friend DepGraph;
 
   Explorer &explorer;
@@ -311,8 +309,8 @@
   SmallVector<DependenceVector *, 16> dependenceStack;
 };
 
-}  // namespace DFX
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace DFX
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_UTIL_ANALYSIS_DFX_SOLVER_H_
+#endif // IREE_COMPILER_DIALECT_UTIL_ANALYSIS_DFX_SOLVER_H_

diff --git a/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/State.cpp b/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/State.cpp
index a6f3c15..f75a093 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/State.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/State.cpp

@@ -25,12 +25,14 @@
   if (!S.isValidState()) {
     os << "full-set";
   } else {
-    for (auto &it : S.getAssumedSet()) os << it << ", ";
-    if (S.isUndefContained()) os << "undef ";
+    for (auto &it : S.getAssumedSet())
+      os << it << ", ";
+    if (S.isUndefContained())
+      os << "undef ";
   }
   os << "} >)";
   return os;
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/State.h b/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/State.h
index 1ad543d..dc87bc9 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/State.h
+++ b/compiler/src/iree/compiler/Dialect/Util/Analysis/DFX/State.h

@@ -116,13 +116,13 @@
   // Returns the assumed state encoding.
   base_t getAssumed() const { return assumed; }
 
-  bool operator==(
-      const IntegerStateBase<base_t, BestState, WorstState> &rhs) const {
+  bool
+  operator==(const IntegerStateBase<base_t, BestState, WorstState> &rhs) const {
     return this->getAssumed() == rhs.getAssumed() &&
            this->getKnown() == rhs.getKnown();
   }
-  bool operator!=(
-      const IntegerStateBase<base_t, BestState, WorstState> &rhs) const {
+  bool
+  operator!=(const IntegerStateBase<base_t, BestState, WorstState> &rhs) const {
     return !(*this == rhs);
   }
 
@@ -148,7 +148,7 @@
     joinAND(rhs.getAssumed(), rhs.getKnown());
   }
 
- protected:
+protected:
   // Handles a new known value |value|. Subtype dependent.
   virtual void handleNewKnownValue(base_t value) = 0;
 
@@ -194,12 +194,14 @@
   // Sets the assumed value to |value| but never below the known one.
   void setAssumed(bool value) { assumed &= (known | value); }
 
- private:
+private:
   void handleNewKnownValue(base_t value) override {
-    if (value) known = (assumed = value);
+    if (value)
+      known = (assumed = value);
   }
   void handleNewAssumedValue(base_t value) override {
-    if (!value) assumed = known;
+    if (!value)
+      assumed = known;
   }
 
   void joinOR(base_t assumedValue, base_t knownValue) override {
@@ -260,7 +262,7 @@
     return intersectAssumedBits(~BitsEncoding);
   }
 
- private:
+private:
   void handleNewKnownValue(base_t value) override { addKnownBits(value); }
   void handleNewAssumedValue(base_t value) override {
     intersectAssumedBits(value);
@@ -295,8 +297,8 @@
 
   // Returns the best possible representable state.
   static constexpr base_t getBestState() { return BestState; }
-  static constexpr base_t getBestState(
-      const IncIntegerState<BaseTy, BestState, WorstState> &) {
+  static constexpr base_t
+  getBestState(const IncIntegerState<BaseTy, BestState, WorstState> &) {
     return getBestState();
   }
 
@@ -315,7 +317,7 @@
     return *this;
   }
 
- private:
+private:
   void handleNewKnownValue(base_t value) override { takeKnownMaximum(value); }
   void handleNewAssumedValue(base_t value) override {
     takeAssumedMinimum(value);
@@ -357,7 +359,7 @@
     return *this;
   }
 
- private:
+private:
   void handleNewKnownValue(base_t value) override { takeKnownMinimum(value); }
   void handleNewAssumedValue(base_t value) override {
     takeAssumedMaximum(value);
@@ -422,9 +424,12 @@
   }
 
   bool operator==(const PotentialValuesState &rhs) const {
-    if (isValidState() != rhs.isValidState()) return false;
-    if (!isValidState() && !rhs.isValidState()) return true;
-    if (isUndefContained() != rhs.isUndefContained()) return false;
+    if (isValidState() != rhs.isValidState())
+      return false;
+    if (!isValidState() && !rhs.isValidState())
+      return true;
+    if (isUndefContained() != rhs.isUndefContained())
+      return false;
     return set == rhs.getAssumedSet();
   }
 
@@ -466,7 +471,7 @@
     return *this;
   }
 
- private:
+private:
   // Checks the size of this set and invalidates when the size exceeds the
   // specified maxPotentialValues threshold.
   void checkAndInvalidate() {
@@ -483,7 +488,8 @@
 
   // Inserts an element into this set.
   void insert(const MemberTy &c) {
-    if (!isValidState()) return;
+    if (!isValidState())
+      return;
     set.insert(c);
     checkAndInvalidate();
   }
@@ -491,13 +497,15 @@
   // Takes union with |rhs|.
   void unionWith(const PotentialValuesState &rhs) {
     // If this is a full set, do nothing.
-    if (!isValidState()) return;
+    if (!isValidState())
+      return;
     // If rhs is full set, change L to a full set.
     if (!rhs.isValidState()) {
       indicatePessimisticFixpoint();
       return;
     }
-    for (const MemberTy &c : rhs.set) set.insert(c);
+    for (const MemberTy &c : rhs.set)
+      set.insert(c);
     undefIsContained |= rhs.isUndefContained();
     checkAndInvalidate();
   }
@@ -511,7 +519,8 @@
   // Takes intersection with |rhs|.
   void intersectWith(const PotentialValuesState &rhs) {
     // If rhs is a full set, do nothing.
-    if (!rhs.isValidState()) return;
+    if (!rhs.isValidState())
+      return;
     // If this is a full set, change this to rhs.
     if (!isValidState()) {
       *this = rhs;
@@ -519,7 +528,8 @@
     }
     SetTy intersectSet;
     for (const MemberTy &c : set) {
-      if (rhs.set.count(c)) intersectSet.insert(c);
+      if (rhs.set.count(c))
+        intersectSet.insert(c);
     }
     set = intersectSet;
     undefIsContained &= rhs.isUndefContained();
@@ -569,7 +579,7 @@
   const StateType &getState() const override { return *this; }
 };
 
-}  // namespace DFX
+} // namespace DFX
 
 //===----------------------------------------------------------------------===//
 // Debugging utilities
@@ -581,17 +591,18 @@
                               const DFX::AbstractState &state);
 
 template <typename base_ty, base_ty BestState, base_ty WorstState>
-llvm::raw_ostream &operator<<(
-    llvm::raw_ostream &os,
-    const DFX::IntegerStateBase<base_ty, BestState, WorstState> &state) {
+llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os,
+           const DFX::IntegerStateBase<base_ty, BestState, WorstState> &state) {
   return os << "(" << state.getKnown() << "-" << state.getAssumed() << ")"
             << static_cast<const DFX::AbstractState &>(state);
 }
 
-llvm::raw_ostream &operator<<(
-    llvm::raw_ostream &os, const DFX::PotentialConstantIntValuesState &state);
+llvm::raw_ostream &
+operator<<(llvm::raw_ostream &os,
+           const DFX::PotentialConstantIntValuesState &state);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_UTIL_ANALYSIS_DFX_STATE_H_
+#endif // IREE_COMPILER_DIALECT_UTIL_ANALYSIS_DFX_STATE_H_

diff --git a/compiler/src/iree/compiler/Dialect/Util/Analysis/Explorer.cpp b/compiler/src/iree/compiler/Dialect/Util/Analysis/Explorer.cpp
index 0f6bc79..dc7120a 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Analysis/Explorer.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Analysis/Explorer.cpp

@@ -33,7 +33,8 @@
   // I don't know if there's a better way to do this - the interface doesn't
   // help.
   auto operandRange = branchOp.getSuccessorOperands(successorIdx);
-  if (operandRange.empty()) return std::nullopt;
+  if (operandRange.empty())
+    return std::nullopt;
   unsigned beginIdx =
       operandRange.getForwardedOperands().getBeginOperandIndex();
   if (operandIdx >= beginIdx && operandIdx < beginIdx + operandRange.size()) {
@@ -46,15 +47,15 @@
 Explorer::Explorer(Operation *rootOp, TraversalAction defaultAction)
     : rootOp(rootOp),
       asmState(rootOp, OpPrintingFlags().elideLargeElementsAttrs()),
-      callGraph(rootOp),
-      defaultAction(defaultAction),
+      callGraph(rootOp), defaultAction(defaultAction),
       analysisManager(rootOp, /*passInstrumentor=*/nullptr) {}
 
 Explorer::~Explorer() = default;
 
 TraversalAction Explorer::getTraversalAction(Operation *op) {
   auto opIt = opActions.find(op->getName());
-  if (opIt != opActions.end()) return opIt->second;
+  if (opIt != opActions.end())
+    return opIt->second;
   auto *dialect = op->getDialect();
   if (!dialect) {
     // Unregistered dialect/op - ignore.
@@ -66,7 +67,8 @@
     return TraversalAction::IGNORE;
   }
   auto dialectIt = dialectActions.find(dialect->getNamespace());
-  if (dialectIt != dialectActions.end()) return dialectIt->second;
+  if (dialectIt != dialectActions.end())
+    return dialectIt->second;
   return defaultAction;
 }
 
@@ -96,11 +98,13 @@
   // today we don't yet have the actions specified so we can't.
 
   auto allUses = symbolTable.getSymbolUses(&symbolTableOp->getRegion(0));
-  if (!allUses.has_value()) return;
+  if (!allUses.has_value())
+    return;
   for (auto use : allUses.value()) {
     auto *symbolOp =
         symbolTable.lookupNearestSymbolFrom(use.getUser(), use.getSymbolRef());
-    if (!isa_and_nonnull<IREE::Util::GlobalOpInterface>(symbolOp)) continue;
+    if (!isa_and_nonnull<IREE::Util::GlobalOpInterface>(symbolOp))
+      continue;
     auto &globalInfo = globalInfos[symbolOp];
     globalInfo.op = cast<IREE::Util::GlobalOpInterface>(symbolOp);
     if (isa<IREE::Util::GlobalAddressOpInterface>(use.getUser())) {
@@ -131,10 +135,11 @@
   });
 }
 
-const Explorer::GlobalInfo *Explorer::getGlobalInfo(
-    IREE::Util::GlobalOpInterface globalOp) {
+const Explorer::GlobalInfo *
+Explorer::getGlobalInfo(IREE::Util::GlobalOpInterface globalOp) {
   auto it = globalInfos.find(globalOp);
-  if (it == globalInfos.end()) return nullptr;
+  if (it == globalInfos.end())
+    return nullptr;
   return &it->second;
 }
 
@@ -144,9 +149,11 @@
   auto &symbolTable = symbolTables.getSymbolTable(symbolTableOp);
   auto op = symbolTable.lookupNearestSymbolFrom<IREE::Util::GlobalOpInterface>(
       from, StringAttr::get(from->getContext(), globalName));
-  if (!op) return nullptr;
+  if (!op)
+    return nullptr;
   auto it = globalInfos.find(op);
-  if (it == globalInfos.end()) return nullptr;
+  if (it == globalInfos.end())
+    return nullptr;
   return &it->second;
 }
 
@@ -157,7 +164,8 @@
 }
 
 bool Explorer::mayValuesAlias(Value a, Value b) {
-  if (a == b) return true;
+  if (a == b)
+    return true;
   bool mayAlias = false;
   auto traversalResult = walkTransitiveUses(a, [&](OpOperand &value) {
     mayAlias = value.get() == b;
@@ -184,7 +192,8 @@
     LLVM_DEBUG(llvm::dbgs()
                << "? entering scc slice with " << scc.size() << " callables\n");
     for (auto *node : scc) {
-      if (node->isExternal()) continue;
+      if (node->isExternal())
+        continue;
 
       // Ensure we want to step into this region.
       // Note that SCC returns every function like in the whole program,
@@ -192,7 +201,8 @@
       auto &callableRegion = *node->getCallableRegion();
       auto *callableOp = callableRegion.getParentOp();
       auto action = getTraversalAction(callableOp);
-      if (action == TraversalAction::IGNORE) continue;
+      if (action == TraversalAction::IGNORE)
+        continue;
       bool validInPlace = true;
       for (auto *parentOp = callableOp->getParentOp(); parentOp != rootOp;
            parentOp = parentOp->getParentOp()) {
@@ -210,8 +220,10 @@
       LLVM_DEBUG(llvm::dbgs() << "   + entering callable region @"
                               << getRegionName(callableRegion) << "\n");
       auto emitResult = recursiveWalk(callableOp, fn);
-      if (emitResult.wasInterrupted()) break;
-      if (emitResult.wasSkipped()) continue;
+      if (emitResult.wasInterrupted())
+        break;
+      if (emitResult.wasSkipped())
+        continue;
     }
   }
 
@@ -231,8 +243,10 @@
   LLVM_DEBUG(llvm::dbgs() << "  == emitting op " << getOpName(parentOp)
                           << "\n");
   auto emitResult = fn(parentOp);
-  if (emitResult.wasInterrupted()) return WalkResult::interrupt();
-  if (emitResult.wasSkipped()) return WalkResult::advance();
+  if (emitResult.wasInterrupted())
+    return WalkResult::interrupt();
+  if (emitResult.wasSkipped())
+    return WalkResult::advance();
 
   if (parentOp->getNumRegions() == 0 ||
       parentAction != TraversalAction::RECURSE) {
@@ -246,7 +260,8 @@
     for (auto &block : region.getBlocks()) {
       for (auto &op : block) {
         auto opResult = recursiveWalk(&op, fn);
-        if (opResult.wasInterrupted()) return WalkResult::interrupt();
+        if (opResult.wasInterrupted())
+          return WalkResult::interrupt();
       }
     }
   }
@@ -263,7 +278,8 @@
     LLVM_DEBUG(llvm::dbgs()
                << "? entering scc slice with " << scc.size() << " callables\n");
     for (auto *node : scc) {
-      if (node->isExternal()) continue;
+      if (node->isExternal())
+        continue;
 
       // Ensure we want to step into this region.
       // Note that SCC returns every function like in the whole program,
@@ -271,7 +287,8 @@
       auto &callableRegion = *node->getCallableRegion();
       auto *callableOp = callableRegion.getParentOp();
       auto action = getTraversalAction(callableOp);
-      if (action == TraversalAction::IGNORE) continue;
+      if (action == TraversalAction::IGNORE)
+        continue;
       bool validInPlace = true;
       for (auto *parentOp = callableOp->getParentOp(); parentOp != rootOp;
            parentOp = parentOp->getParentOp()) {
@@ -289,8 +306,10 @@
       LLVM_DEBUG(llvm::dbgs() << "   + entering callable region @"
                               << getRegionName(callableRegion) << "\n");
       auto emitResult = recursiveWalkValues(callableOp, visitedValues, fn);
-      if (emitResult.wasInterrupted()) break;
-      if (emitResult.wasSkipped()) continue;
+      if (emitResult.wasInterrupted())
+        break;
+      if (emitResult.wasSkipped())
+        continue;
     }
   }
 
@@ -331,7 +350,8 @@
           result.printAsOperand(llvm::dbgs(), asmState);
           llvm::dbgs() << "\n";
         });
-        if (fn(result).wasInterrupted()) return WalkResult::interrupt();
+        if (fn(result).wasInterrupted())
+          return WalkResult::interrupt();
       }
     }
   }
@@ -359,26 +379,29 @@
               arg.printAsOperand(llvm::dbgs(), asmState);
               llvm::dbgs() << "\n";
             });
-            if (fn(arg).wasInterrupted()) return WalkResult::interrupt();
+            if (fn(arg).wasInterrupted())
+              return WalkResult::interrupt();
           }
         }
       }
       for (auto &op : block) {
         auto opResult = recursiveWalkValues(&op, visitedValues, fn);
-        if (opResult.wasInterrupted()) return WalkResult::interrupt();
+        if (opResult.wasInterrupted())
+          return WalkResult::interrupt();
       }
     }
   }
   return WalkResult::advance();
 }
 
-TraversalResult Explorer::walkIncomingCalls(
-    CallableOpInterface callableOp,
-    std::function<WalkResult(CallOpInterface)> fn) {
+TraversalResult
+Explorer::walkIncomingCalls(CallableOpInterface callableOp,
+                            std::function<WalkResult(CallOpInterface)> fn) {
   auto it = callGraphInv.find(callableOp.getCallableRegion());
   if (it != callGraphInv.end()) {
     for (auto &callOp : it->second) {
-      if (fn(callOp).wasInterrupted()) break;
+      if (fn(callOp).wasInterrupted())
+        break;
     }
   }
   bool isPublic = false;
@@ -422,7 +445,8 @@
       return WalkResult::advance();
     };
     for (auto &region : regionOp->getRegions()) {
-      if (enumerateTerminatorOps(region).wasInterrupted()) break;
+      if (enumerateTerminatorOps(region).wasInterrupted())
+        break;
     }
   } else if (auto parentFuncOp =
                  llvm::dyn_cast<FunctionOpInterface>(parentOp)) {
@@ -443,7 +467,8 @@
               terminatorOp->print(llvm::dbgs(), asmState);
               llvm::dbgs() << "\n";
             });
-            if (fn(terminatorOp).wasInterrupted()) break;
+            if (fn(terminatorOp).wasInterrupted())
+              break;
           }
         }
       }
@@ -550,7 +575,8 @@
        ++successorIdx) {
     auto successorOperandIdx =
         mapSuccessorOperand(branchOp, successorIdx, operandIdx);
-    if (!successorOperandIdx.has_value()) continue;
+    if (!successorOperandIdx.has_value())
+      continue;
     auto *targetBlock = branchOp->getSuccessor(successorIdx);
     auto blockArg = targetBlock->getArgument(*successorOperandIdx);
     if (fn(targetBlock, blockArg).wasInterrupted()) {
@@ -669,7 +695,8 @@
                             << loadOp.getGlobalName() << ":\n");
     for (auto *user : globalInfo->uses) {
       auto storeOp = dyn_cast<IREE::Util::GlobalStoreOpInterface>(user);
-      if (!storeOp) continue;
+      if (!storeOp)
+        continue;
       LLVM_DEBUG({
         llvm::dbgs() << "   + queuing stored value from ";
         storeOp.print(llvm::dbgs(), asmState);
@@ -711,7 +738,8 @@
   do {
     // Pop the next work item; avoiding processing values more than once.
     auto work = worklist.pop_back_val();
-    if (!processedValues.insert(work.getAsOpaquePointer()).second) continue;
+    if (!processedValues.insert(work.getAsOpaquePointer()).second)
+      continue;
 
     LLVM_DEBUG({
       llvm::dbgs() << "   ? working on ";
@@ -741,8 +769,10 @@
     LLVM_DEBUG(llvm::dbgs() << "  == emitting op "
                             << definingOp->getName().getStringRef() << "\n");
     auto fnResult = fn(resultValue);
-    if (fnResult.wasInterrupted()) break;
-    if (fnResult.wasSkipped()) continue;
+    if (fnResult.wasInterrupted())
+      break;
+    if (fnResult.wasSkipped())
+      continue;
 
     // If the op is tied we may need to walk up to the operand the result is
     // tied to.
@@ -931,7 +961,8 @@
                             << storeOp.getGlobalName() << ":\n");
     for (auto *user : globalInfo->uses) {
       auto loadOp = dyn_cast<IREE::Util::GlobalLoadOpInterface>(user);
-      if (!loadOp) continue;
+      if (!loadOp)
+        continue;
       LLVM_DEBUG({
         llvm::dbgs() << "   + queuing loaded value from ";
         loadOp.print(llvm::dbgs(), asmState);
@@ -958,7 +989,8 @@
     // times!).
     for (auto &use : work.getUses()) {
       auto *ownerOp = use.getOwner();
-      if (!processedValues.insert(&use).second) continue;
+      if (!processedValues.insert(&use).second)
+        continue;
 
       auto action = getTraversalAction(ownerOp);
       if (action == TraversalAction::IGNORE) {
@@ -971,7 +1003,8 @@
       // Emit for the op itself.
       LLVM_DEBUG(llvm::dbgs() << "  == emitting op "
                               << ownerOp->getName().getStringRef() << "\n");
-      if (fn(use).wasInterrupted()) break;
+      if (fn(use).wasInterrupted())
+        break;
 
       // If the op is tied we may need to walk down to the results the operand
       // is tied to (multiple results can tie the same operand).
@@ -1036,5 +1069,5 @@
   });
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Analysis/Explorer.h b/compiler/src/iree/compiler/Dialect/Util/Analysis/Explorer.h
index 8d4772f..bc38918 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Analysis/Explorer.h
+++ b/compiler/src/iree/compiler/Dialect/Util/Analysis/Explorer.h

@@ -77,7 +77,7 @@
 //
 // TODO(#7389): make this an abstract interface and hide the IREE details.
 class Explorer {
- public:
+public:
   Explorer(Operation *rootOp, TraversalAction defaultAction);
   ~Explorer();
 
@@ -214,9 +214,9 @@
 
   // Walks all of the call ops calling into the given |callableOp|.
   // May be incomplete if there are indirect calls in the program.
-  TraversalResult walkIncomingCalls(
-      CallableOpInterface callableOp,
-      std::function<WalkResult(CallOpInterface)> fn);
+  TraversalResult
+  walkIncomingCalls(CallableOpInterface callableOp,
+                    std::function<WalkResult(CallOpInterface)> fn);
 
   // Walks all return-like (or region terminators to parent) ops in |parentOp|.
   // The operations enumerated will be either ReturnLike or implement
@@ -307,7 +307,7 @@
   // deduplication on the owner of the use.
   TraversalResult walkTransitiveUsers(Value value, OperationWalkFn fn);
 
- private:
+private:
   // Maps callee callable region -> call sites.
   using InverseCallGraph = DenseMap<Region *, SmallVector<CallOpInterface>>;
 
@@ -337,7 +337,7 @@
   ModuleAnalysisManager analysisManager;
 };
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_UTIL_ANALYSIS_EXPLORER_H_
+#endif // IREE_COMPILER_DIALECT_UTIL_ANALYSIS_EXPLORER_H_

diff --git a/compiler/src/iree/compiler/Dialect/Util/Analysis/Position.cpp b/compiler/src/iree/compiler/Dialect/Util/Analysis/Position.cpp
index 0d17588..8076b70 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Analysis/Position.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Analysis/Position.cpp

@@ -14,8 +14,9 @@
                                   llvm::DenseMapInfo<void *>::getEmptyKey(), 0);
 
 // static
-const Position Position::TombstoneKey(
-    ENC_BLOCK, llvm::DenseMapInfo<void *>::getTombstoneKey(), 0);
+const Position
+    Position::TombstoneKey(ENC_BLOCK,
+                           llvm::DenseMapInfo<void *>::getTombstoneKey(), 0);
 
 void Position::print(llvm::raw_ostream &os) const {
   if (*this == Position::EmptyKey) {
@@ -26,22 +27,22 @@
     // Suboptimal printing, but it's not worth instantiating an AsmState.
     // Use the print(os, asmState) version instead of <<.
     switch (enc.getInt()) {
-      case Position::ENC_VALUE:
-        os << "value";
-        break;
-      case Position::ENC_RETURNED_VALUE:
-        os << "returned value";
-        break;
-      case Position::ENC_OPERATION: {
-        auto symbol = dyn_cast<mlir::SymbolOpInterface>(getOperation());
-        os << "op "
-           << (symbol ? symbol.getName()
-                      : getOperation().getName().getStringRef());
-        break;
-      }
-      case Position::ENC_BLOCK:
-        os << "block";
-        break;
+    case Position::ENC_VALUE:
+      os << "value";
+      break;
+    case Position::ENC_RETURNED_VALUE:
+      os << "returned value";
+      break;
+    case Position::ENC_OPERATION: {
+      auto symbol = dyn_cast<mlir::SymbolOpInterface>(getOperation());
+      os << "op "
+         << (symbol ? symbol.getName()
+                    : getOperation().getName().getStringRef());
+      break;
+    }
+    case Position::ENC_BLOCK:
+      os << "block";
+      break;
     }
   }
 }
@@ -53,32 +54,32 @@
     os << "(tombstone)";
   } else {
     switch (enc.getInt()) {
-      case Position::ENC_VALUE: {
-        getValue().printAsOperand(os, asmState);
-        break;
+    case Position::ENC_VALUE: {
+      getValue().printAsOperand(os, asmState);
+      break;
+    }
+    case Position::ENC_RETURNED_VALUE: {
+      auto returnedValue = getReturnedValue();
+      os << returnedValue.first->getName().getStringRef();
+      auto symbol = dyn_cast<mlir::SymbolOpInterface>(returnedValue.first);
+      if (symbol) {
+        os << " @" << symbol.getName();
       }
-      case Position::ENC_RETURNED_VALUE: {
-        auto returnedValue = getReturnedValue();
-        os << returnedValue.first->getName().getStringRef();
-        auto symbol = dyn_cast<mlir::SymbolOpInterface>(returnedValue.first);
-        if (symbol) {
-          os << " @" << symbol.getName();
-        }
-        os << " result " << returnedValue.second;
-        break;
+      os << " result " << returnedValue.second;
+      break;
+    }
+    case Position::ENC_OPERATION: {
+      os << getOperation().getName().getStringRef();
+      auto symbol = dyn_cast<mlir::SymbolOpInterface>(getOperation());
+      if (symbol) {
+        os << " @" << symbol.getName();
       }
-      case Position::ENC_OPERATION: {
-        os << getOperation().getName().getStringRef();
-        auto symbol = dyn_cast<mlir::SymbolOpInterface>(getOperation());
-        if (symbol) {
-          os << " @" << symbol.getName();
-        }
-        break;
-      }
-      case Position::ENC_BLOCK: {
-        getBlock().printAsOperand(os, asmState);
-        break;
-      }
+      break;
+    }
+    case Position::ENC_BLOCK: {
+      getBlock().printAsOperand(os, asmState);
+      break;
+    }
     }
   }
 }
@@ -117,5 +118,5 @@
   return {};
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Analysis/Position.h b/compiler/src/iree/compiler/Dialect/Util/Analysis/Position.h
index b86ab5c..3a42e62 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Analysis/Position.h
+++ b/compiler/src/iree/compiler/Dialect/Util/Analysis/Position.h

@@ -26,7 +26,7 @@
 // This is the MLIR equivalent to the IRPosition used in LLVM (see
 // llvm/Transforms/IPO/Attributor.h).
 class Position {
- public:
+public:
   static const Position EmptyKey;
   static const Position TombstoneKey;
 
@@ -81,7 +81,7 @@
   void print(llvm::raw_ostream &os) const;
   void print(llvm::raw_ostream &os, AsmState &asmState) const;
 
- private:
+private:
   template <typename T, typename Enable>
   friend struct llvm::DenseMapInfo;
   friend llvm::raw_ostream &operator<<(llvm::raw_ostream &os, Position pos);
@@ -99,7 +99,7 @@
       llvm::PointerLikeTypeTraits<void *>::NumLowBitsAvailable;
   static_assert(NumEncodingBits >= 2, "At least two bits are required!");
   llvm::PointerIntPair<void *, NumEncodingBits, char> enc;
-  unsigned ordinal;  // used only with ENC_RETURNED_VALUE
+  unsigned ordinal; // used only with ENC_RETURNED_VALUE
 };
 
 llvm::raw_ostream &operator<<(llvm::raw_ostream &os, Position pos);
@@ -136,8 +136,8 @@
 // positions are references to the combined results of the region.
 SmallVector<Position> getReturnedValuePositions(Region &region);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
 namespace llvm {
 
@@ -156,6 +156,6 @@
   static bool isEqual(const Position &a, const Position &b) { return a == b; }
 };
 
-}  // end namespace llvm
+} // end namespace llvm
 
-#endif  // IREE_COMPILER_DIALECT_UTIL_ANALYSIS_POSITION_H_
+#endif // IREE_COMPILER_DIALECT_UTIL_ANALYSIS_POSITION_H_

diff --git a/compiler/src/iree/compiler/Dialect/Util/Conversion/ConversionPatterns.cpp b/compiler/src/iree/compiler/Dialect/Util/Conversion/ConversionPatterns.cpp
index e789976..13945ea 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Conversion/ConversionPatterns.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Conversion/ConversionPatterns.cpp

@@ -79,9 +79,9 @@
 struct ConvertInitializerOp
     : public OpConversionPattern<IREE::Util::InitializerOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Util::InitializerOp initializerOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::InitializerOp initializerOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto &typeConverter = *getTypeConverter();
     rewriter.startRootUpdate(initializerOp);
     if (failed(rewriter.convertRegionTypes(&initializerOp.getBody(),
@@ -97,9 +97,9 @@
 
 struct ConvertFuncOp : public OpConversionPattern<mlir::func::FuncOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      mlir::func::FuncOp funcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::func::FuncOp funcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto &typeConverter = *getTypeConverter();
 
     // Convert the input signature types.
@@ -140,9 +140,9 @@
 
 struct ConvertCallOp : public OpConversionPattern<mlir::func::CallOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      mlir::func::CallOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::func::CallOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     SmallVector<Type> resultTypes;
     if (failed(getTypeConverter()->convertTypes(op.getResultTypes(),
                                                 resultTypes))) {
@@ -156,9 +156,9 @@
 
 struct ConvertReturnOp : public OpConversionPattern<mlir::func::ReturnOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      mlir::func::ReturnOp returnOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::func::ReturnOp returnOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<mlir::func::ReturnOp>(returnOp,
                                                       adaptor.getOperands());
     return success();
@@ -167,9 +167,9 @@
 
 struct ConvertBranchOp : public OpConversionPattern<mlir::cf::BranchOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      mlir::cf::BranchOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::cf::BranchOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<mlir::cf::BranchOp>(op, op.getDest(),
                                                     adaptor.getDestOperands());
     return success();
@@ -179,9 +179,9 @@
 struct ConvertCondBranchOp
     : public OpConversionPattern<mlir::cf::CondBranchOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      mlir::cf::CondBranchOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::cf::CondBranchOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<mlir::cf::CondBranchOp>(
         op, adaptor.getCondition(), op.getTrueDest(),
         adaptor.getTrueDestOperands(), op.getFalseDest(),
@@ -192,9 +192,9 @@
 
 struct ConvertSelectOp : public OpConversionPattern<mlir::arith::SelectOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      mlir::arith::SelectOp selectOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::arith::SelectOp selectOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<mlir::arith::SelectOp>(
         selectOp, adaptor.getCondition(), adaptor.getTrueValue(),
         adaptor.getFalseValue());
@@ -204,12 +204,13 @@
 
 struct ConvertIfOp : public OpConversionPattern<scf::IfOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      scf::IfOp ifOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
-    auto resultTypes = llvm::map_to_vector(
-        ifOp.getResultTypes(),
-        [&](Type type) { return getTypeConverter()->convertType(type); });
+  LogicalResult
+  matchAndRewrite(scf::IfOp ifOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    auto resultTypes =
+        llvm::map_to_vector(ifOp.getResultTypes(), [&](Type type) {
+          return getTypeConverter()->convertType(type);
+        });
     auto newOp = rewriter.create<scf::IfOp>(ifOp.getLoc(), resultTypes,
                                             adaptor.getCondition(),
                                             ifOp.elseBlock() != nullptr);
@@ -228,15 +229,15 @@
 
 struct ConvertYieldOp : public OpConversionPattern<scf::YieldOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      scf::YieldOp yieldOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(scf::YieldOp yieldOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<scf::YieldOp>(yieldOp, adaptor.getResults());
     return success();
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateGenericStructuralConversionPatterns(
     MLIRContext *context, ConversionTarget &conversionTarget,
@@ -268,5 +269,5 @@
                                                                 context);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Conversion/ConversionPatterns.h b/compiler/src/iree/compiler/Dialect/Util/Conversion/ConversionPatterns.h
index 71763b8..2d2f926 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Conversion/ConversionPatterns.h
+++ b/compiler/src/iree/compiler/Dialect/Util/Conversion/ConversionPatterns.h

@@ -16,9 +16,9 @@
 template <typename T>
 struct GenericConvertTypesPattern : public OpConversionPattern<T> {
   using OpConversionPattern<T>::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      T op, typename T::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(T op, typename T::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     SmallVector<Type> resultTypes;
     for (auto oldType : op.getOperation()->getResultTypes()) {
       SmallVector<Type> newTypes;
@@ -67,7 +67,7 @@
     MLIRContext *context, ConversionTarget &conversionTarget,
     TypeConverter &typeConverter, RewritePatternSet &patterns);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_UTIL_CONVERSION_CONVERSIONPATTERNS_H_
+#endif // IREE_COMPILER_DIALECT_UTIL_CONVERSION_CONVERSIONPATTERNS_H_

diff --git a/compiler/src/iree/compiler/Dialect/Util/Conversion/MemRefToUtil/Patterns.cpp b/compiler/src/iree/compiler/Dialect/Util/Conversion/MemRefToUtil/Patterns.cpp
index b196a57..6edbc2f 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Conversion/MemRefToUtil/Patterns.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Conversion/MemRefToUtil/Patterns.cpp

@@ -92,9 +92,9 @@
 template <typename OpTy>
 struct FoldAsNoOp final : public OpConversionPattern<OpTy> {
   using OpConversionPattern<OpTy>::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      OpTy op, typename OpTy::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(OpTy op, typename OpTy::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOp(op, adaptor.getOperands());
     return success();
   }
@@ -105,9 +105,9 @@
 template <typename OpTy>
 struct ElideNoOp final : public OpConversionPattern<OpTy> {
   using OpConversionPattern<OpTy>::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      OpTy op, typename OpTy::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(OpTy op, typename OpTy::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.eraseOp(op);
     return success();
   }
@@ -115,9 +115,9 @@
 
 struct ConvertMemRefGlobalOp : public OpConversionPattern<memref::GlobalOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      memref::GlobalOp globalOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(memref::GlobalOp globalOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (!isRankZeroOrOneMemRef(globalOp.getType())) {
       return rewriter.notifyMatchFailure(
           globalOp,
@@ -159,9 +159,9 @@
 struct ConvertMemRefGetGlobalOp
     : public OpConversionPattern<memref::GetGlobalOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      memref::GetGlobalOp getOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(memref::GetGlobalOp getOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (!isRankZeroOrOneMemRef(getOp.getResult().getType())) {
       return rewriter.notifyMatchFailure(
           getOp, "only rank-0 and rank-1 memrefs are supported; flatten first");
@@ -174,9 +174,9 @@
 
 struct ConvertMemRefAllocaOp : public OpConversionPattern<memref::AllocaOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      memref::AllocaOp allocaOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(memref::AllocaOp allocaOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Location loc = allocaOp.getLoc();
     auto allocationSize = getByteLength(rewriter, loc, allocaOp.getMemref());
     uint64_t alignment = allocaOp.getAlignment().value_or(0);
@@ -189,9 +189,9 @@
 
 struct ConvertMemRefDimOp : public OpConversionPattern<memref::DimOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      memref::DimOp dimOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(memref::DimOp dimOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (!isRankZeroOrOneMemRef(dimOp.getSource().getType())) {
       return rewriter.notifyMatchFailure(
           dimOp, "only rank-0 and rank-1 memrefs are supported; flatten first");
@@ -210,9 +210,9 @@
 
 struct ConvertMemRefLoadOp : public OpConversionPattern<memref::LoadOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      memref::LoadOp loadOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(memref::LoadOp loadOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (!isRankZeroOrOneMemRef(loadOp.getMemref().getType())) {
       return rewriter.notifyMatchFailure(
           loadOp,
@@ -248,9 +248,9 @@
 
 struct ConvertMemRefStoreOp : public OpConversionPattern<memref::StoreOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      memref::StoreOp storeOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(memref::StoreOp storeOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (!isRankZeroOrOneMemRef(storeOp.getMemref().getType())) {
       return rewriter.notifyMatchFailure(
           storeOp,
@@ -289,15 +289,15 @@
     : public OpConversionPattern<memref::ReinterpretCastOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      memref::ReinterpretCastOp castOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(memref::ReinterpretCastOp castOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOp(castOp, adaptor.getSource());
     return success();
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateMemRefToUtilPatterns(MLIRContext *context,
                                   ConversionTarget &conversionTarget,
@@ -329,5 +329,5 @@
           typeConverter, context);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Conversion/MemRefToUtil/Patterns.h b/compiler/src/iree/compiler/Dialect/Util/Conversion/MemRefToUtil/Patterns.h
index f2b2653..dbfe661 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Conversion/MemRefToUtil/Patterns.h
+++ b/compiler/src/iree/compiler/Dialect/Util/Conversion/MemRefToUtil/Patterns.h

@@ -23,7 +23,7 @@
                                   RewritePatternSet &patterns,
                                   Type convertedBufferType = {});
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_UTIL_CONVERSION_MEMREFTOUTIL_PATTERN_H_
+#endif // IREE_COMPILER_DIALECT_UTIL_CONVERSION_MEMREFTOUTIL_PATTERN_H_

diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/ClosureOpUtils.cpp b/compiler/src/iree/compiler/Dialect/Util/IR/ClosureOpUtils.cpp
index 642ca09..07e4340 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/ClosureOpUtils.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/ClosureOpUtils.cpp

@@ -182,13 +182,15 @@
   for (auto opArg : llvm::enumerate(closureOp.getClosureOperands())) {
     auto outerValue = opArg.value();
     auto *sourceOp = outerValue.getDefiningOp();
-    if (!sourceOp) continue;  // can't clone block arguments into closures
+    if (!sourceOp)
+      continue; // can't clone block arguments into closures
 
     // We cannot just simply inline and replace all users if this is an
     // argument that can be written; for example, the region might perform
     // work after loading a initial constant from the argument and then
     // write back.
-    if (!closureOp.getOperandAccess(opArg.index()).isReadOnly()) continue;
+    if (!closureOp.getOperandAccess(opArg.index()).isReadOnly())
+      continue;
 
     if (closureOp.canClosureContainOp(sourceOp) &&
         shouldInlineIntoClosure(options, outerValue)) {
@@ -319,7 +321,7 @@
   return success();
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/ClosureOpUtils.h b/compiler/src/iree/compiler/Dialect/Util/IR/ClosureOpUtils.h
index 90ec044..fa3785e 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/ClosureOpUtils.h
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/ClosureOpUtils.h

@@ -65,7 +65,7 @@
 // T must implement the IREE::Util::ClosureOpInterface.
 template <typename T>
 class ClosureOptimizationPattern : public OpRewritePattern<T> {
- public:
+public:
   ClosureOptimizationPattern(MLIRContext *context,
                              ClosureOptimizationOptions options = {},
                              PatternBenefit benefit = 1)
@@ -77,13 +77,13 @@
     return optimizeClosureLikeOp(options, closureOp, rewriter);
   }
 
- private:
+private:
   const ClosureOptimizationOptions options;
 };
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_UTIL_IR_CLOSUREOPUTILS_H_
+#endif // IREE_COMPILER_DIALECT_UTIL_IR_CLOSUREOPUTILS_H_

diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilAttrs.cpp b/compiler/src/iree/compiler/Dialect/Util/IR/UtilAttrs.cpp
index 20913a5..ca3cb0b 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/UtilAttrs.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/UtilAttrs.cpp

@@ -23,7 +23,7 @@
 
 // clang-format off: must be included after all LLVM/MLIR headers.
 #define GET_ATTRDEF_CLASSES
-#include "iree/compiler/Dialect/Util/IR/UtilAttrs.cpp.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/Util/IR/UtilAttrs.cpp.inc" // IWYU pragma: keep
 // clang-format on
 
 namespace mlir {
@@ -44,7 +44,7 @@
 // Assumes that no more data will be written than is allocated in the provided
 // storage buffer.
 class raw_inplace_ostream : public llvm::raw_pwrite_stream {
- public:
+public:
   explicit raw_inplace_ostream(ArrayRef<char> storage) : storage(storage) {
     SetUnbuffered();
   }
@@ -54,7 +54,7 @@
 
   void reserveExtraSpace(uint64_t extraSize) override {}
 
- private:
+private:
   uint64_t current_pos() const override { return offset; }
 
   void write_impl(const char *ptr, size_t size) override {
@@ -103,32 +103,32 @@
                                      SmallVectorImpl<char> &buffer) {
   buffer.resize(bitWidth / 8);
   switch (bitWidth) {
-    case 8: {
-      uint8_t rawValue = llvm::support::endian::byte_swap<uint8_t>(
-          value.extractBitsAsZExtValue(8, 0), endian);
-      std::memcpy(buffer.data(), &rawValue, sizeof(rawValue));
-      return success();
-    }
-    case 16: {
-      uint16_t rawValue = llvm::support::endian::byte_swap<uint16_t>(
-          value.extractBitsAsZExtValue(16, 0), endian);
-      std::memcpy(buffer.data(), &rawValue, sizeof(rawValue));
-      return success();
-    }
-    case 32: {
-      uint32_t rawValue = llvm::support::endian::byte_swap<uint32_t>(
-          value.extractBitsAsZExtValue(32, 0), endian);
-      std::memcpy(buffer.data(), &rawValue, sizeof(rawValue));
-      return success();
-    }
-    case 64: {
-      uint64_t rawValue = llvm::support::endian::byte_swap<uint64_t>(
-          value.extractBitsAsZExtValue(64, 0), endian);
-      std::memcpy(buffer.data(), &rawValue, sizeof(rawValue));
-      return success();
-    }
-    default:
-      return failure();
+  case 8: {
+    uint8_t rawValue = llvm::support::endian::byte_swap<uint8_t>(
+        value.extractBitsAsZExtValue(8, 0), endian);
+    std::memcpy(buffer.data(), &rawValue, sizeof(rawValue));
+    return success();
+  }
+  case 16: {
+    uint16_t rawValue = llvm::support::endian::byte_swap<uint16_t>(
+        value.extractBitsAsZExtValue(16, 0), endian);
+    std::memcpy(buffer.data(), &rawValue, sizeof(rawValue));
+    return success();
+  }
+  case 32: {
+    uint32_t rawValue = llvm::support::endian::byte_swap<uint32_t>(
+        value.extractBitsAsZExtValue(32, 0), endian);
+    std::memcpy(buffer.data(), &rawValue, sizeof(rawValue));
+    return success();
+  }
+  case 64: {
+    uint64_t rawValue = llvm::support::endian::byte_swap<uint64_t>(
+        value.extractBitsAsZExtValue(64, 0), endian);
+    std::memcpy(buffer.data(), &rawValue, sizeof(rawValue));
+    return success();
+  }
+  default:
+    return failure();
   }
 }
 
@@ -138,26 +138,26 @@
                                        SmallVectorImpl<char> &buffer) {
   buffer.resize(bitWidth / 8);
   switch (bitWidth) {
-    case 16: {
-      uint16_t rawValue = llvm::support::endian::byte_swap<uint16_t>(
-          value.bitcastToAPInt().extractBitsAsZExtValue(16, 0), endian);
-      std::memcpy(buffer.data(), &rawValue, sizeof(rawValue));
-      return success();
-    }
-    case 32: {
-      float rawValue = llvm::support::endian::byte_swap<float>(
-          value.convertToFloat(), endian);
-      std::memcpy(buffer.data(), &rawValue, sizeof(rawValue));
-      return success();
-    }
-    case 64: {
-      double rawValue = llvm::support::endian::byte_swap<double>(
-          value.convertToDouble(), endian);
-      std::memcpy(buffer.data(), &rawValue, sizeof(rawValue));
-      return success();
-    }
-    default:
-      return failure();
+  case 16: {
+    uint16_t rawValue = llvm::support::endian::byte_swap<uint16_t>(
+        value.bitcastToAPInt().extractBitsAsZExtValue(16, 0), endian);
+    std::memcpy(buffer.data(), &rawValue, sizeof(rawValue));
+    return success();
+  }
+  case 32: {
+    float rawValue =
+        llvm::support::endian::byte_swap<float>(value.convertToFloat(), endian);
+    std::memcpy(buffer.data(), &rawValue, sizeof(rawValue));
+    return success();
+  }
+  case 64: {
+    double rawValue = llvm::support::endian::byte_swap<double>(
+        value.convertToDouble(), endian);
+    std::memcpy(buffer.data(), &rawValue, sizeof(rawValue));
+    return success();
+  }
+  default:
+    return failure();
   }
 }
 
@@ -209,9 +209,10 @@
 }
 
 template <typename elementType, unsigned numBits = sizeof(elementType) * 8>
-static LogicalResult serializeGenericIntElements(
-    DenseIntElementsAttr attr, llvm::support::endianness endian,
-    llvm::raw_ostream &os) {
+static LogicalResult
+serializeGenericIntElements(DenseIntElementsAttr attr,
+                            llvm::support::endianness endian,
+                            llvm::raw_ostream &os) {
   for (const APInt &value : attr.getValues<APInt>()) {
     elementType rawValue = llvm::support::endian::byte_swap<elementType>(
         value.extractBitsAsZExtValue(numBits, 0), endian);
@@ -220,9 +221,10 @@
   return success();
 }
 
-static LogicalResult serializeGenericF16Elements(
-    DenseFPElementsAttr attr, llvm::support::endianness endian,
-    llvm::raw_ostream &os) {
+static LogicalResult
+serializeGenericF16Elements(DenseFPElementsAttr attr,
+                            llvm::support::endianness endian,
+                            llvm::raw_ostream &os) {
   for (const APFloat &value : attr.getValues<APFloat>()) {
     uint16_t rawValue = llvm::support::endian::byte_swap<uint16_t>(
         value.bitcastToAPInt().extractBitsAsZExtValue(16, 0), endian);
@@ -231,9 +233,10 @@
   return success();
 }
 
-static LogicalResult serializeGenericF32Elements(
-    DenseFPElementsAttr attr, llvm::support::endianness endian,
-    llvm::raw_ostream &os) {
+static LogicalResult
+serializeGenericF32Elements(DenseFPElementsAttr attr,
+                            llvm::support::endianness endian,
+                            llvm::raw_ostream &os) {
   for (const APFloat &value : attr.getValues<APFloat>()) {
     float rawValue =
         llvm::support::endian::byte_swap<float>(value.convertToFloat(), endian);
@@ -242,9 +245,10 @@
   return success();
 }
 
-static LogicalResult serializeGenericF64Elements(
-    DenseFPElementsAttr attr, llvm::support::endianness endian,
-    llvm::raw_ostream &os) {
+static LogicalResult
+serializeGenericF64Elements(DenseFPElementsAttr attr,
+                            llvm::support::endianness endian,
+                            llvm::raw_ostream &os) {
   for (const APFloat &value : attr.getValues<APFloat>()) {
     double rawValue = llvm::support::endian::byte_swap<double>(
         value.convertToDouble(), endian);
@@ -255,42 +259,43 @@
 
 // Performs slow generic serialization of all of the elements in |elementsAttr|.
 // Respects the target |endian| setting, performing byte swaps if required.
-static LogicalResult serializeGenericElementData(
-    DenseElementsAttr elementsAttr, llvm::support::endianness endian,
-    llvm::raw_ostream &os) {
+static LogicalResult
+serializeGenericElementData(DenseElementsAttr elementsAttr,
+                            llvm::support::endianness endian,
+                            llvm::raw_ostream &os) {
   if (auto attr = llvm::dyn_cast<DenseIntElementsAttr>(elementsAttr)) {
     // Don't hoist |bitwidth| given `getElementTypeBitWidth()` asserts if the
     // element type is not integer or floating-point.
     int32_t bitwidth = attr.getType().getElementTypeBitWidth();
     switch (bitwidth) {
-      case 8:
-        return serializeRawData(attr, os);
-      case 16:
-        return serializeGenericIntElements<uint16_t>(attr, endian, os);
-      case 32:
-        return serializeGenericIntElements<uint32_t>(attr, endian, os);
-      case 64:
-        return serializeGenericIntElements<uint64_t>(attr, endian, os);
-      default:
-        return emitError(UnknownLoc::get(elementsAttr.getContext()))
-               << "unhandled integer element bitwidth " << bitwidth
-               << " for type " << elementsAttr.getType();
+    case 8:
+      return serializeRawData(attr, os);
+    case 16:
+      return serializeGenericIntElements<uint16_t>(attr, endian, os);
+    case 32:
+      return serializeGenericIntElements<uint32_t>(attr, endian, os);
+    case 64:
+      return serializeGenericIntElements<uint64_t>(attr, endian, os);
+    default:
+      return emitError(UnknownLoc::get(elementsAttr.getContext()))
+             << "unhandled integer element bitwidth " << bitwidth
+             << " for type " << elementsAttr.getType();
     }
   } else if (auto attr = llvm::dyn_cast<DenseFPElementsAttr>(elementsAttr)) {
     // Don't hoist |bitwidth| given `getElementTypeBitWidth()` asserts if the
     // element type is not integer or floating-point.
     int32_t bitwidth = attr.getType().getElementTypeBitWidth();
     switch (bitwidth) {
-      case 16:
-        return serializeGenericF16Elements(attr, endian, os);
-      case 32:
-        return serializeGenericF32Elements(attr, endian, os);
-      case 64:
-        return serializeGenericF64Elements(attr, endian, os);
-      default:
-        return emitError(UnknownLoc::get(elementsAttr.getContext()))
-               << "unhandled float element bitwidth " << bitwidth
-               << " for type " << elementsAttr.getType();
+    case 16:
+      return serializeGenericF16Elements(attr, endian, os);
+    case 32:
+      return serializeGenericF32Elements(attr, endian, os);
+    case 64:
+      return serializeGenericF64Elements(attr, endian, os);
+    default:
+      return emitError(UnknownLoc::get(elementsAttr.getContext()))
+             << "unhandled float element bitwidth " << bitwidth << " for type "
+             << elementsAttr.getType();
     }
   }
   return emitError(UnknownLoc::get(elementsAttr.getContext()))
@@ -302,16 +307,17 @@
 //===----------------------------------------------------------------------===//
 
 Attribute ByteRangeAttr::parse(AsmParser &p, Type type) {
-  if (failed(p.parseLess())) return {};
+  if (failed(p.parseLess()))
+    return {};
 
   // TODO(benvanik): support the range syntax; the dialect asm parser fights
   // with it though by checking for proper []/() nesting.
 
   // Try first the range style: byte_range<[start..end)>
   bool startInclusive;
-  if (succeeded(p.parseOptionalLSquare())) {  // [...
+  if (succeeded(p.parseOptionalLSquare())) { // [...
     startInclusive = true;
-  } else if (succeeded(p.parseOptionalLParen())) {  // (...
+  } else if (succeeded(p.parseOptionalLParen())) { // (...
     startInclusive = false;
   } else {
     // byte_range<offset, length>
@@ -332,16 +338,17 @@
   }
 
   bool endInclusive;
-  if (succeeded(p.parseOptionalRSquare())) {  // ...]
+  if (succeeded(p.parseOptionalRSquare())) { // ...]
     endInclusive = true;
-  } else if (succeeded(p.parseOptionalRParen())) {  // ...)
+  } else if (succeeded(p.parseOptionalRParen())) { // ...)
     endInclusive = false;
   } else {
     p.emitError(p.getCurrentLocation()) << "expected ] or ) to end range";
     return {};
   }
 
-  if (failed(p.parseGreater())) return {};
+  if (failed(p.parseGreater()))
+    return {};
 
   start = startInclusive ? start : start + 1;
   end = endInclusive ? end : end - 1;
@@ -376,9 +383,9 @@
 }
 
 // static
-LogicalResult CompositeAttr::verify(
-    function_ref<InFlightDiagnostic()> emitError, int64_t totalLength,
-    ArrayAttr valueAttrs) {
+LogicalResult
+CompositeAttr::verify(function_ref<InFlightDiagnostic()> emitError,
+                      int64_t totalLength, ArrayAttr valueAttrs) {
   int64_t calculatedLength = 0;
   for (auto valueAttr : valueAttrs) {
     if (auto serializableAttr =
@@ -634,7 +641,7 @@
 
   addAttributes<
 #define GET_ATTRDEF_LIST
-#include "iree/compiler/Dialect/Util/IR/UtilAttrs.cpp.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/Util/IR/UtilAttrs.cpp.inc" // IWYU pragma: keep
       >();
 
   // NOTE: we only handle dense elements today; sparse will require a separate
@@ -651,7 +658,7 @@
   StringAttr::attachInterface<SerializableStringAttrModel>(context);
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilDialect.cpp b/compiler/src/iree/compiler/Dialect/Util/IR/UtilDialect.cpp
index 8aee709..8edb525 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/UtilDialect.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/UtilDialect.cpp

@@ -69,7 +69,8 @@
 
   void handleTerminator(Operation *op, Block *newDest) const final {
     auto returnOp = dyn_cast<IREE::Util::InitializerReturnOp>(op);
-    if (!returnOp) return;
+    if (!returnOp)
+      return;
     // util.initialize.return takes no args - just branch to the new block.
     OpBuilder builder(op);
     builder.create<mlir::cf::BranchOp>(op->getLoc(), newDest, ValueRange{});
@@ -108,7 +109,8 @@
                                 PatternRewriter &rewriter) const override {
     auto shapeAwareOp =
         dyn_cast_or_null<ShapeAwareOpInterface>(op.getSource().getDefiningOp());
-    if (!shapeAwareOp) return failure();
+    if (!shapeAwareOp)
+      return failure();
 
     // We only support static dimension indices today (as in general we only
     // support ranked shapes). If we find dynamic indices sneaking in we will
@@ -145,7 +147,7 @@
   results.insert<FoldDimOp<tensor::DimOp>>(getContext());
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilDialect.h b/compiler/src/iree/compiler/Dialect/Util/IR/UtilDialect.h
index b809ca7..f98190b 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/UtilDialect.h
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/UtilDialect.h

@@ -16,29 +16,29 @@
 namespace Util {
 
 class UtilDialect : public Dialect {
- public:
-  explicit UtilDialect(MLIRContext* context);
+public:
+  explicit UtilDialect(MLIRContext *context);
   static StringRef getDialectNamespace() { return "util"; }
 
-  Attribute parseAttribute(DialectAsmParser& parser, Type type) const override;
-  void printAttribute(Attribute attr, DialectAsmPrinter& p) const override;
+  Attribute parseAttribute(DialectAsmParser &parser, Type type) const override;
+  void printAttribute(Attribute attr, DialectAsmPrinter &p) const override;
 
-  Type parseType(DialectAsmParser& parser) const override;
-  void printType(Type type, DialectAsmPrinter& os) const override;
+  Type parseType(DialectAsmParser &parser) const override;
+  void printType(Type type, DialectAsmPrinter &os) const override;
 
-  Operation* materializeConstant(OpBuilder& builder, Attribute value, Type type,
+  Operation *materializeConstant(OpBuilder &builder, Attribute value, Type type,
                                  Location loc) override;
 
-  void getCanonicalizationPatterns(RewritePatternSet& results) const override;
+  void getCanonicalizationPatterns(RewritePatternSet &results) const override;
 
- private:
+private:
   void registerAttributes();
   void registerTypes();
 };
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_UTIL_IR_UTILDIALECT_H_
+#endif // IREE_COMPILER_DIALECT_UTIL_IR_UTILDIALECT_H_

diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilExternalModels.cpp b/compiler/src/iree/compiler/Dialect/Util/IR/UtilExternalModels.cpp
index 2b8add1..62de8d5 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/UtilExternalModels.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/UtilExternalModels.cpp

@@ -51,13 +51,13 @@
         insertSliceOp.getDest());
   }
 
-  ::std::optional<unsigned> getTiedResultOperandIndex(
-      Operation *op, unsigned resultIndex) const {
-    return {1};  // dest
+  ::std::optional<unsigned>
+  getTiedResultOperandIndex(Operation *op, unsigned resultIndex) const {
+    return {1}; // dest
   }
 
   SmallVector<int64_t> getTiedResultOperandIndices(Operation *op) const {
-    return {1};  // dest
+    return {1}; // dest
   }
 };
 
@@ -71,8 +71,8 @@
         linalgOp.getDpsInitOperands()[resultIndex]->get());
   }
 
-  ::std::optional<unsigned> getTiedResultOperandIndex(
-      Operation *op, unsigned resultIndex) const {
+  ::std::optional<unsigned>
+  getTiedResultOperandIndex(Operation *op, unsigned resultIndex) const {
     auto linalgOp = cast<OpTy>(op);
     return {linalgOp.getDpsInitOperands()[resultIndex]->getOperandNumber()};
   }
@@ -115,7 +115,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void registerUtilExternalModels(DialectRegistry &registry) {
   // Must ensure that any dependent dialects are registered.
@@ -150,33 +150,32 @@
 
   // TODO(matthias-springer): Use a helper instead of listing all ops. This is
   // tricky because LinalgExtOps.td includes YieldOp.
-  registry.addExtension(
-      +[](MLIRContext *ctx, LinalgExt::IREELinalgExtDialect *dialect) {
-        LinalgExt::ScatterOp::attachInterface<
-            LinalgOpTiedOpInterface<LinalgExt::ScatterOp>>(*ctx);
-        LinalgExt::SortOp::attachInterface<
-            LinalgOpTiedOpInterface<LinalgExt::SortOp>>(*ctx);
-        LinalgExt::FftOp::attachInterface<
-            LinalgOpTiedOpInterface<LinalgExt::FftOp>>(*ctx);
-        LinalgExt::ScanOp::attachInterface<
-            LinalgOpTiedOpInterface<LinalgExt::ScanOp>>(*ctx);
-        LinalgExt::ReverseOp::attachInterface<
-            LinalgOpTiedOpInterface<LinalgExt::ReverseOp>>(*ctx);
-        LinalgExt::TopkOp::attachInterface<
-            LinalgOpTiedOpInterface<LinalgExt::TopkOp>>(*ctx);
-        LinalgExt::WinogradInputTransformOp::attachInterface<
-            LinalgOpTiedOpInterface<LinalgExt::WinogradInputTransformOp>>(*ctx);
-        LinalgExt::WinogradOutputTransformOp::attachInterface<
-            LinalgOpTiedOpInterface<LinalgExt::WinogradOutputTransformOp>>(
-            *ctx);
-        LinalgExt::SoftmaxOp::attachInterface<
-            LinalgOpTiedOpInterface<LinalgExt::SoftmaxOp>>(*ctx);
-        LinalgExt::AttentionOp::attachInterface<
-            LinalgOpTiedOpInterface<LinalgExt::AttentionOp>>(*ctx);
-      });
+  registry.addExtension(+[](MLIRContext *ctx,
+                            LinalgExt::IREELinalgExtDialect *dialect) {
+    LinalgExt::ScatterOp::attachInterface<
+        LinalgOpTiedOpInterface<LinalgExt::ScatterOp>>(*ctx);
+    LinalgExt::SortOp::attachInterface<
+        LinalgOpTiedOpInterface<LinalgExt::SortOp>>(*ctx);
+    LinalgExt::FftOp::attachInterface<
+        LinalgOpTiedOpInterface<LinalgExt::FftOp>>(*ctx);
+    LinalgExt::ScanOp::attachInterface<
+        LinalgOpTiedOpInterface<LinalgExt::ScanOp>>(*ctx);
+    LinalgExt::ReverseOp::attachInterface<
+        LinalgOpTiedOpInterface<LinalgExt::ReverseOp>>(*ctx);
+    LinalgExt::TopkOp::attachInterface<
+        LinalgOpTiedOpInterface<LinalgExt::TopkOp>>(*ctx);
+    LinalgExt::WinogradInputTransformOp::attachInterface<
+        LinalgOpTiedOpInterface<LinalgExt::WinogradInputTransformOp>>(*ctx);
+    LinalgExt::WinogradOutputTransformOp::attachInterface<
+        LinalgOpTiedOpInterface<LinalgExt::WinogradOutputTransformOp>>(*ctx);
+    LinalgExt::SoftmaxOp::attachInterface<
+        LinalgOpTiedOpInterface<LinalgExt::SoftmaxOp>>(*ctx);
+    LinalgExt::AttentionOp::attachInterface<
+        LinalgOpTiedOpInterface<LinalgExt::AttentionOp>>(*ctx);
+  });
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilExternalModels.h b/compiler/src/iree/compiler/Dialect/Util/IR/UtilExternalModels.h
index c5928cf..316a8e1 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/UtilExternalModels.h
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/UtilExternalModels.h

@@ -14,11 +14,11 @@
 namespace IREE {
 namespace Util {
 
-void registerUtilExternalModels(DialectRegistry& registry);
+void registerUtilExternalModels(DialectRegistry &registry);
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_UTIL_IR_UTILEXTERNALMODELS_H_
+#endif // IREE_COMPILER_DIALECT_UTIL_IR_UTILEXTERNALMODELS_H_

diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilOpFolders.cpp b/compiler/src/iree/compiler/Dialect/Util/IR/UtilOpFolders.cpp
index c352e36..fe3954a 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/UtilOpFolders.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/UtilOpFolders.cpp

@@ -49,13 +49,14 @@
   LogicalResult matchAndRewrite(CastOp castOp,
                                 PatternRewriter &rewriter) const override {
     auto nullOp = dyn_cast_or_null<NullOp>(castOp.getOperand().getDefiningOp());
-    if (!nullOp) return failure();
+    if (!nullOp)
+      return failure();
     rewriter.replaceOpWithNewOp<NullOp>(castOp, castOp.getResult().getType());
     return success();
   }
 };
 
-}  // namespace
+} // namespace
 
 void CastOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                          MLIRContext *context) {
@@ -102,7 +103,8 @@
   int64_t value = initialValue;
   for (auto operand : attrOperands) {
     auto intValue = llvm::dyn_cast_if_present<IntegerAttr>(operand);
-    if (!intValue) return {};
+    if (!intValue)
+      return {};
     value = expr(value, intValue.getValue().getSExtValue());
   }
   return IntegerAttr::get(type, value);
@@ -182,7 +184,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void RangeMinOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                              MLIRContext *context) {
@@ -242,7 +244,8 @@
         lengths.push_back(length);
       }
     }
-    if (offsets.size() == op.getOffsets().size()) return failure();
+    if (offsets.size() == op.getOffsets().size())
+      return failure();
 
     // Preserve dynamic ranges.
     Value min;
@@ -300,7 +303,8 @@
                                  op.getLengths().back(), one, rewriter);
       maxValue = rewriter.create<arith::MaxUIOp>(loc, endLhs, endRhs);
     }
-    if (!minValue || !maxValue) return failure();
+    if (!minValue || !maxValue)
+      return failure();
     rewriter.replaceOp(op, {minValue, maxValue});
     return success();
   }
@@ -317,7 +321,8 @@
     for (auto range : llvm::zip_equal(op.getOffsets(), op.getLengths())) {
       ranges.insert(range);
     }
-    if (ranges.size() == op.getOffsets().size()) return failure();
+    if (ranges.size() == op.getOffsets().size())
+      return failure();
 
     // Recreate with the deduplicated ranges.
     SmallVector<Value> offsets;
@@ -334,7 +339,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void RangeExtentsOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                  MLIRContext *context) {
@@ -364,14 +369,15 @@
       matchPattern(alignment, m_ConstantInt(&staticAlignment))) {
     // If this value is itself a multiple of the alignment then we can fold.
     if (staticValue.urem(staticAlignment).isZero()) {
-      return true;  // value % alignment == 0
+      return true; // value % alignment == 0
     }
   }
 
   // If the value is produced by an align op we can check that.
   if (auto sourceAlignOp = value.getDefiningOp<IREE::Util::AlignOp>()) {
     // Check for same exact alignment - even if dynamic.
-    if (sourceAlignOp.getAlignment() == alignment) return true;
+    if (sourceAlignOp.getAlignment() == alignment)
+      return true;
 
     // If the alignments are constant we can compare them inline.
     APInt sourceAlignment;
@@ -380,7 +386,7 @@
                      m_ConstantInt(&sourceAlignment)) &&
         matchPattern(alignment, m_ConstantInt(&selfAlignment))) {
       if (sourceAlignment.uge(selfAlignment)) {
-        return true;  // source alignment is >= our alignment
+        return true; // source alignment is >= our alignment
       }
     }
 
@@ -420,7 +426,8 @@
 OpFoldResult AlignOp::fold(FoldAdaptor operands) {
   // If aligning an already-aligned value then fold if this is provably a
   // no-op. We can check this for equality even with dynamic alignments.
-  if (isAlignedTo(getValue(), getAlignment())) return getValue();
+  if (isAlignedTo(getValue(), getAlignment()))
+    return getValue();
 
   // If values are static we can perform the alignment here.
   APInt staticValue;
@@ -497,7 +504,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void UnfoldableConstantOp::getCanonicalizationPatterns(
     RewritePatternSet &results, MLIRContext *context) {
@@ -516,7 +523,8 @@
 
   LogicalResult matchAndRewrite(InitializerOp op,
                                 PatternRewriter &rewriter) const override {
-    if (op.getBody().getBlocks().size() != 1) return failure();
+    if (op.getBody().getBlocks().size() != 1)
+      return failure();
     auto &block = op.getBody().front();
     if (block.empty() || isa<InitializerReturnOp>(block.front())) {
       rewriter.eraseOp(op);
@@ -550,13 +558,15 @@
 
       deadOps.push_back(storeOp);
     });
-    if (deadOps.empty()) return failure();
-    for (auto deadOp : deadOps) rewriter.eraseOp(deadOp);
+    if (deadOps.empty())
+      return failure();
+    for (auto deadOp : deadOps)
+      rewriter.eraseOp(deadOp);
     return success();
   }
 };
 
-}  // namespace
+} // namespace
 
 void InitializerOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                 MLIRContext *context) {
@@ -574,7 +584,7 @@
     : public OpRewritePattern<GlobalLoadIndirectOp> {
   using OpRewritePattern::OpRewritePattern;
 
- public:
+public:
   LogicalResult matchAndRewrite(GlobalLoadIndirectOp op,
                                 PatternRewriter &rewriter) const override {
     if (auto addressOp = dyn_cast_or_null<GlobalAddressOpInterface>(
@@ -587,7 +597,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void GlobalLoadIndirectOp::getCanonicalizationPatterns(
     RewritePatternSet &results, MLIRContext *context) {
@@ -616,7 +626,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void GlobalStoreOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                 MLIRContext *context) {
@@ -630,7 +640,7 @@
     : public OpRewritePattern<GlobalStoreIndirectOp> {
   using OpRewritePattern::OpRewritePattern;
 
- public:
+public:
   LogicalResult matchAndRewrite(GlobalStoreIndirectOp op,
                                 PatternRewriter &rewriter) const override {
     if (auto addressOp = dyn_cast_or_null<GlobalAddressOpInterface>(
@@ -643,7 +653,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void GlobalStoreIndirectOp::getCanonicalizationPatterns(
     RewritePatternSet &results, MLIRContext *context) {
@@ -680,7 +690,8 @@
   LogicalResult matchAndRewrite(BufferSubspanOp op,
                                 PatternRewriter &rewriter) const override {
     auto parentOp = BufferSubspanOp::findSubspanOp(op.getSource());
-    if (!parentOp) return failure();
+    if (!parentOp)
+      return failure();
     auto fusedLoc = rewriter.getFusedLoc({parentOp.getLoc(), op.getLoc()});
     auto newOffset = rewriter.createOrFold<arith::AddIOp>(
         fusedLoc, parentOp.getSourceOffset(), op.getSourceOffset());
@@ -710,7 +721,8 @@
     for (auto &use : llvm::make_early_inc_range(op.getResult().getUses())) {
       auto subrangeOp =
           dyn_cast<IREE::Util::SubrangeOperandOpInterface>(use.getOwner());
-      if (!subrangeOp) continue;
+      if (!subrangeOp)
+        continue;
       didUpdateAny = true;
       rewriter.setInsertionPoint(subrangeOp);
       auto oldRange = subrangeOp.getSubrangeOperand(use.getOperandNumber());
@@ -743,12 +755,14 @@
   using OpRewritePattern::OpRewritePattern;
   LogicalResult matchAndRewrite(mlir::arith::SelectOp op,
                                 PatternRewriter &rewriter) const override {
-    if (!llvm::isa<IREE::Util::BufferType>(op.getType())) return failure();
+    if (!llvm::isa<IREE::Util::BufferType>(op.getType()))
+      return failure();
     auto trueSubspan = dyn_cast_or_null<IREE::Util::BufferSubspanOp>(
         op.getTrueValue().getDefiningOp());
     auto falseSubspan = dyn_cast_or_null<IREE::Util::BufferSubspanOp>(
         op.getFalseValue().getDefiningOp());
-    if (!trueSubspan || !falseSubspan) return failure();
+    if (!trueSubspan || !falseSubspan)
+      return failure();
     if (trueSubspan.getSource() != falseSubspan.getSource() ||
         trueSubspan.getResultSize() != falseSubspan.getResultSize()) {
       return failure();
@@ -764,7 +778,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void BufferSubspanOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                   MLIRContext *context) {
@@ -824,7 +838,8 @@
   LogicalResult matchAndRewrite(BufferSizeOp op,
                                 PatternRewriter &rewriter) const override {
     auto selectOp = op.getOperand().getDefiningOp<mlir::arith::SelectOp>();
-    if (!selectOp) return failure();
+    if (!selectOp)
+      return failure();
     auto trueSize = rewriter.createOrFold<IREE::Util::BufferSizeOp>(
         op.getLoc(), selectOp.getTrueValue());
     auto falseSize = rewriter.createOrFold<IREE::Util::BufferSizeOp>(
@@ -835,7 +850,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void BufferSizeOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                MLIRContext *context) {
@@ -861,7 +876,8 @@
   LogicalResult matchAndRewrite(BufferStorageOp op,
                                 PatternRewriter &rewriter) const override {
     auto subspanOp = BufferSubspanOp::findSubspanOp(op.getOperand());
-    if (!subspanOp) return failure();
+    if (!subspanOp)
+      return failure();
     auto fusedLoc = rewriter.getFusedLoc({subspanOp.getLoc(), op.getLoc()});
     rewriter.setInsertionPointAfter(op);
     auto newOffset = rewriter.createOrFold<arith::AddIOp>(
@@ -880,7 +896,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void BufferStorageOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                   MLIRContext *context) {
@@ -896,7 +912,7 @@
   return {};
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.cpp b/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.cpp
index c7ce4e0..94ea831 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.cpp

@@ -114,10 +114,11 @@
   if (!typedAttr || typedAttr.getType() != type.getValue()) {
     p << ": ";
     p.printAttribute(type);
-    needsSpace = true;  // subsequent attr value needs a space separator
+    needsSpace = true; // subsequent attr value needs a space separator
   }
   if (attr) {
-    if (needsSpace) p << ' ';
+    if (needsSpace)
+      p << ' ';
     p << "= ";
     p.printAttribute(attr);
   }
@@ -163,10 +164,12 @@
 ParseResult parseTypeAlias(OpAsmParser &parser, TypeAttr &encodingTypeAttr,
                            Type &storageType) {
   Type encodingType;
-  if (failed(parser.parseType(encodingType))) return failure();
+  if (failed(parser.parseType(encodingType)))
+    return failure();
   storageType = encodingType;
   if (succeeded(parser.parseOptionalKeyword("as"))) {
-    if (failed(parser.parseType(storageType))) return failure();
+    if (failed(parser.parseType(storageType)))
+      return failure();
   }
   encodingTypeAttr = TypeAttr::get(encodingType);
   return success();
@@ -185,10 +188,10 @@
 // custom<TypedValueList>(ref($type_value), $values)
 //===----------------------------------------------------------------------===//
 
-ParseResult parseTypedValueList(
-    OpAsmParser &parser, Type type,
-    SmallVectorImpl<OpAsmParser::UnresolvedOperand> &values,
-    SmallVectorImpl<Type> &valueTypes) {
+ParseResult
+parseTypedValueList(OpAsmParser &parser, Type type,
+                    SmallVectorImpl<OpAsmParser::UnresolvedOperand> &values,
+                    SmallVectorImpl<Type> &valueTypes) {
   if (failed(parser.parseOperandList(values, AsmParser::Delimiter::Square))) {
     return failure();
   }
@@ -208,10 +211,10 @@
 //===----------------------------------------------------------------------===//
 // [%offset for %length], [%offset for %length], ...
 
-ParseResult parseRangeList(
-    OpAsmParser &parser,
-    SmallVectorImpl<OpAsmParser::UnresolvedOperand> &offsets,
-    SmallVectorImpl<OpAsmParser::UnresolvedOperand> &lengths) {
+ParseResult
+parseRangeList(OpAsmParser &parser,
+               SmallVectorImpl<OpAsmParser::UnresolvedOperand> &offsets,
+               SmallVectorImpl<OpAsmParser::UnresolvedOperand> &lengths) {
   do {
     OpAsmParser::UnresolvedOperand offset;
     OpAsmParser::UnresolvedOperand length;
@@ -265,12 +268,13 @@
 //===----------------------------------------------------------------------===//
 // type{%size0}, type, type{%size1}
 
-ParseResult parseSizeAwareTypeList(
-    OpAsmParser &parser, SmallVectorImpl<Type> &types,
-    SmallVectorImpl<OpAsmParser::UnresolvedOperand> &sizes) {
+ParseResult
+parseSizeAwareTypeList(OpAsmParser &parser, SmallVectorImpl<Type> &types,
+                       SmallVectorImpl<OpAsmParser::UnresolvedOperand> &sizes) {
   do {
     Type type;
-    if (failed(parser.parseType(type))) return failure();
+    if (failed(parser.parseType(type)))
+      return failure();
     if (llvm::isa<IREE::Util::SizeAwareTypeInterface>(type)) {
       OpAsmParser::UnresolvedOperand size;
       if (failed(parser.parseLBrace()) || failed(parser.parseOperand(size)) ||
@@ -297,11 +301,12 @@
   });
 }
 
-ParseResult parseSizeAwareTypeList(
-    OpAsmParser &parser, SmallVectorImpl<Type> &types0,
-    SmallVectorImpl<Type> &types1,
-    SmallVectorImpl<OpAsmParser::UnresolvedOperand> &sizes) {
-  if (failed(parseSizeAwareTypeList(parser, types0, sizes))) return failure();
+ParseResult
+parseSizeAwareTypeList(OpAsmParser &parser, SmallVectorImpl<Type> &types0,
+                       SmallVectorImpl<Type> &types1,
+                       SmallVectorImpl<OpAsmParser::UnresolvedOperand> &sizes) {
+  if (failed(parseSizeAwareTypeList(parser, types0, sizes)))
+    return failure();
   types1 = types0;
   return success();
 }
@@ -333,9 +338,11 @@
   int64_t tiedOperandIndex = IREE::Util::TiedOpInterface::kUntiedIndex;
   if (res.has_value() && succeeded(res.value())) {
     tiedOperandIndex = 0;
-    if (failed(parser.parseKeyword("as"))) return failure();
+    if (failed(parser.parseKeyword("as")))
+      return failure();
   }
-  if (failed(parser.parseType(resultType))) return failure();
+  if (failed(parser.parseType(resultType)))
+    return failure();
   if (auto shapedType = llvm::dyn_cast<ShapedType>(resultType)) {
     if (!shapedType.hasStaticShape()) {
       SmallVector<OpAsmParser::UnresolvedOperand> dynamicDims;
@@ -405,12 +412,13 @@
 //===----------------------------------------------------------------------===//
 // (type, type{%dim0, %dim1}, type) -> (type{%dim2}, %operand4)
 
-static ParseResult parseShapedOperandList(
-    OpAsmParser &parser, SmallVectorImpl<Type> &types,
-    SmallVectorImpl<OpAsmParser::UnresolvedOperand> &dims) {
+static ParseResult
+parseShapedOperandList(OpAsmParser &parser, SmallVectorImpl<Type> &types,
+                       SmallVectorImpl<OpAsmParser::UnresolvedOperand> &dims) {
   do {
     Type type;
-    if (failed(parser.parseType(type))) return failure();
+    if (failed(parser.parseType(type)))
+      return failure();
     if (auto shapedType = llvm::dyn_cast<ShapedType>(type)) {
       if (!shapedType.hasStaticShape()) {
         SmallVector<OpAsmParser::UnresolvedOperand> dynamicDims;
@@ -439,9 +447,9 @@
 
 // Finds the operand index in |operands| that |tiedResult| references.
 // Returns TiedOpInterface::kUntiedIndex if no operand is found.
-static int64_t findTiedOperand(
-    OpAsmParser::UnresolvedOperand tiedResult,
-    ArrayRef<OpAsmParser::UnresolvedOperand> operands) {
+static int64_t
+findTiedOperand(OpAsmParser::UnresolvedOperand tiedResult,
+                ArrayRef<OpAsmParser::UnresolvedOperand> operands) {
   int64_t operandIndex = IREE::Util::TiedOpInterface::kUntiedIndex;
   for (int64_t i = 0; i < operands.size(); ++i) {
     if (operands[i].name == tiedResult.name &&
@@ -475,7 +483,8 @@
       }
       if (succeeded(parser.parseOptionalKeyword("as"))) {
         // Type _may_ differ from the operand.
-        if (failed(parser.parseType(type))) return failure();
+        if (failed(parser.parseType(type)))
+          return failure();
       } else {
         // Use the operands type.
         type = operandTypes[tiedOperandIndex];
@@ -557,7 +566,8 @@
       p << "}";
       resultDims = resultDims.drop_front(1);
     }
-    if (i < resultTypes.size() - 1) p << ", ";
+    if (i < resultTypes.size() - 1)
+      p << ", ";
   }
 }
 
@@ -568,14 +578,16 @@
     SmallVectorImpl<Type> &resultTypes,
     SmallVectorImpl<OpAsmParser::UnresolvedOperand> &resultDims,
     ArrayAttr &tiedOperands) {
-  if (failed(parser.parseLParen())) return failure();
+  if (failed(parser.parseLParen()))
+    return failure();
   if (failed(parser.parseOptionalRParen())) {
     if (failed(parseShapedOperandList(parser, operandTypes, operandDims)) ||
         failed(parser.parseRParen())) {
       return failure();
     }
   }
-  if (failed(parser.parseArrow())) return failure();
+  if (failed(parser.parseArrow()))
+    return failure();
   if (succeeded(parser.parseOptionalLParen())) {
     if (succeeded(parser.parseOptionalRParen())) {
       // Empty list/no results `()`.
@@ -628,10 +640,12 @@
     }
   });
   p << ") -> ";
-  if (resultTypes.size() != 1) p << "(";
+  if (resultTypes.size() != 1)
+    p << "(";
   printShapedResultList(p, op, operands, operandTypes, operandDims, resultTypes,
                         resultDims, tiedOperands);
-  if (resultTypes.size() != 1) p << ")";
+  if (resultTypes.size() != 1)
+    p << ")";
 }
 
 //===----------------------------------------------------------------------===//
@@ -682,7 +696,8 @@
       }
       if (succeeded(parser.parseOptionalKeyword("as"))) {
         // Type _may_ differ from the operand.
-        if (failed(parser.parseType(type))) return failure();
+        if (failed(parser.parseType(type)))
+          return failure();
       } else {
         // Use the operands type.
         type = argTypes[tiedOperandIndex];
@@ -736,7 +751,8 @@
         p.printOptionalAttrDict(attrs.getValue());
       }
     }
-    if (i < resultTypes.size() - 1) p << ", ";
+    if (i < resultTypes.size() - 1)
+      p << ", ";
   }
 }
 
@@ -748,7 +764,8 @@
   SmallVector<OpAsmParser::UnresolvedOperand> args;
   SmallVector<Type> argTypes;
   SmallVector<Type> resultTypes;
-  if (failed(parser.parseLParen())) return failure();
+  if (failed(parser.parseLParen()))
+    return failure();
   if (failed(parser.parseOptionalRParen())) {
     if (failed(parseShapedFunctionArgumentList(parser, args, argTypes,
                                                argAttrs)) ||
@@ -802,10 +819,12 @@
   auto resultTypes = functionType.getResults();
   if (!resultTypes.empty()) {
     p << " -> ";
-    if (resultTypes.size() != 1) p << "(";
+    if (resultTypes.size() != 1)
+      p << "(";
     printShapedFunctionResultList(p, op, functionType.getInputs(), resultTypes,
                                   resultAttrs, tiedOperands);
-    if (resultTypes.size() != 1) p << ")";
+    if (resultTypes.size() != 1)
+      p << ")";
   }
 }
 
@@ -873,11 +892,13 @@
   p << " ";
   p.printOptionalAttrDict(op->getAttrs(), /*elidedAttrs=*/{"value"});
 
-  if (op->getAttrs().size() > 1) p << ' ';
+  if (op->getAttrs().size() > 1)
+    p << ' ';
   p << getValue();
 
   // If the value is a symbol reference, print a trailing type.
-  if (llvm::isa<SymbolRefAttr>(getValue())) p << " : " << getType();
+  if (llvm::isa<SymbolRefAttr>(getValue()))
+    p << " : " << getType();
 }
 
 //===----------------------------------------------------------------------===//
@@ -885,7 +906,8 @@
 //===----------------------------------------------------------------------===//
 
 bool CastOp::areCastCompatible(TypeRange inputs, TypeRange outputs) {
-  if (inputs.size() != 1 || outputs.size() != 1) return false;
+  if (inputs.size() != 1 || outputs.size() != 1)
+    return false;
   Type a = inputs.front(), b = outputs.front();
   if (a == b) {
     // Both types are the same.
@@ -921,13 +943,13 @@
 
 Value CastOp::getTiedResultOperand(Value result) { return getOperand(); }
 
-::std::optional<unsigned> CastOp::getTiedResultOperandIndex(
-    unsigned resultIndex) {
-  return {0};  // operand
+::std::optional<unsigned>
+CastOp::getTiedResultOperandIndex(unsigned resultIndex) {
+  return {0}; // operand
 }
 
 SmallVector<int64_t> CastOp::getTiedResultOperandIndices() {
-  return {0};  // operand
+  return {0}; // operand
 }
 
 //===----------------------------------------------------------------------===//
@@ -936,7 +958,8 @@
 
 std::optional<std::pair<int64_t, int64_t>>
 NumericOptionalNarrowOp::getIntegerRange() {
-  if (!getMinValue() || !getMaxValue()) return {};
+  if (!getMinValue() || !getMaxValue())
+    return {};
   bool signExtend = isSigned();
   // Note: Cannot sign extend 0 bit values.
   int64_t minValue = signExtend && getMinValue()->getBitWidth() > 0
@@ -1291,13 +1314,13 @@
   getResultSizeMutable().assign(operand.length);
 }
 
-::std::optional<unsigned> BufferSubspanOp::getTiedResultOperandIndex(
-    unsigned resultIndex) {
-  return {0};  // source
+::std::optional<unsigned>
+BufferSubspanOp::getTiedResultOperandIndex(unsigned resultIndex) {
+  return {0}; // source
 }
 
 SmallVector<int64_t> BufferSubspanOp::getTiedResultOperandIndices() {
-  return {0};  // source
+  return {0}; // source
 }
 
 // static
@@ -1448,10 +1471,10 @@
   getLengthMutable().assign(operand.length);
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
 #define GET_OP_CLASSES
 #include "iree/compiler/Dialect/Util/IR/UtilOps.cpp.inc"

diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.h b/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.h
index 40c13c3..009997e 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.h
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.h

@@ -25,7 +25,7 @@
 #include "mlir/Transforms/DialectConversion.h"
 
 #define GET_OP_CLASSES
-#include "iree/compiler/Dialect/Util/IR/UtilOps.h.inc"  // IWYU pragma: export
+#include "iree/compiler/Dialect/Util/IR/UtilOps.h.inc" // IWYU pragma: export
 
 namespace mlir {
 namespace iree_compiler {
@@ -103,15 +103,15 @@
 //===----------------------------------------------------------------------===//
 // (type{%size0}, type, type{%size1})
 
-ParseResult parseSizeAwareTypeList(
-    OpAsmParser &parser, SmallVectorImpl<Type> &types,
-    SmallVectorImpl<OpAsmParser::UnresolvedOperand> &sizes);
+ParseResult
+parseSizeAwareTypeList(OpAsmParser &parser, SmallVectorImpl<Type> &types,
+                       SmallVectorImpl<OpAsmParser::UnresolvedOperand> &sizes);
 void printSizeAwareTypeList(OpAsmPrinter &p, Operation *op, TypeRange types,
                             OperandRange sizes);
-ParseResult parseSizeAwareTypeList(
-    OpAsmParser &parser, SmallVectorImpl<Type> &types0,
-    SmallVectorImpl<Type> &types1,
-    SmallVectorImpl<OpAsmParser::UnresolvedOperand> &sizes);
+ParseResult
+parseSizeAwareTypeList(OpAsmParser &parser, SmallVectorImpl<Type> &types0,
+                       SmallVectorImpl<Type> &types1,
+                       SmallVectorImpl<OpAsmParser::UnresolvedOperand> &sizes);
 void printSizeAwareTypeList(OpAsmPrinter &p, Operation *op, TypeRange types0,
                             TypeRange types1, OperandRange sizes);
 
@@ -124,9 +124,9 @@
 ParseResult parseShapedTiedResult(
     OpAsmParser &parser, Type &resultType,
     SmallVectorImpl<OpAsmParser::UnresolvedOperand> &resultDims);
-inline ParseResult parseShapedTiedResult(
-    OpAsmParser &parser, Type &resultType,
-    OpAsmParser::UnresolvedOperand &resultDim) {
+inline ParseResult
+parseShapedTiedResult(OpAsmParser &parser, Type &resultType,
+                      OpAsmParser::UnresolvedOperand &resultDim) {
   SmallVector<OpAsmParser::UnresolvedOperand, 1> resultDims;
   if (failed(parseShapedTiedResult(parser, resultType, resultDims))) {
     return failure();
@@ -145,9 +145,10 @@
 void printShapedTiedResult(OpAsmPrinter &p, Operation *op, Type resultType,
                            ValueRange resultDims, ArrayAttr tiedOperands);
 
-inline ParseResult parseShapedTiedResult(
-    OpAsmParser &parser, Type &resultType,
-    OpAsmParser::UnresolvedOperand &resultDim, ArrayAttr &tiedOperands) {
+inline ParseResult
+parseShapedTiedResult(OpAsmParser &parser, Type &resultType,
+                      OpAsmParser::UnresolvedOperand &resultDim,
+                      ArrayAttr &tiedOperands) {
   SmallVector<OpAsmParser::UnresolvedOperand> resultDims;
   if (failed(parseShapedTiedResult(parser, resultType, resultDims,
                                    tiedOperands))) {
@@ -207,7 +208,7 @@
                                   ArrayAttr tiedOperands, ArrayAttr argAttrs,
                                   ArrayAttr resultAttrs);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_UTIL_IR_UTILOPS_H_
+#endif // IREE_COMPILER_DIALECT_UTIL_IR_UTILOPS_H_

diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilTraits.h b/compiler/src/iree/compiler/Dialect/Util/IR/UtilTraits.h
index 042100e..4319164 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/UtilTraits.h
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/UtilTraits.h

@@ -37,9 +37,9 @@
   static LogicalResult verifyTrait(Operation *op) { return success(); }
 };
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace OpTrait
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace OpTrait
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_UTIL_IR_UTILTRAITS_H_
+#endif // IREE_COMPILER_DIALECT_UTIL_IR_UTILTRAITS_H_

diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilTypes.cpp b/compiler/src/iree/compiler/Dialect/Util/IR/UtilTypes.cpp
index b5bbcf6..c719647 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/UtilTypes.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/UtilTypes.cpp

@@ -146,9 +146,9 @@
 
 // Walks up the ancestors of |sourceBlock| until |targetBlock| is reached.
 // Returns an insertion point in the |targetBlock|.
-static std::pair<Block *, Block::iterator> findCommonBlockInsertionPoint(
-    Block *targetBlock, Block *sourceBlock,
-    Block::iterator sourceInsertionPoint) {
+static std::pair<Block *, Block::iterator>
+findCommonBlockInsertionPoint(Block *targetBlock, Block *sourceBlock,
+                              Block::iterator sourceInsertionPoint) {
   auto *ancestorOp = targetBlock->findAncestorOpInBlock(*sourceInsertionPoint);
   if (ancestorOp) {
     return std::make_pair(ancestorOp->getBlock(), Block::iterator(ancestorOp));
@@ -169,8 +169,10 @@
   }
   if (definingBlock == block) {
     // Defined in the same block; ensure block order.
-    if (llvm::isa<BlockArgument>(value)) return true;
-    if (insertionPoint == block->end()) return true;
+    if (llvm::isa<BlockArgument>(value))
+      return true;
+    if (insertionPoint == block->end())
+      return true;
     if (value.getDefiningOp()->isBeforeInBlock(&*insertionPoint)) {
       return true;
     }
@@ -194,7 +196,7 @@
 bool tryMoveProducerBefore(Value value, Operation *consumerOp) {
   auto *producerOp = value.getDefiningOp();
   if (!producerOp) {
-    return true;  // block arg, ok to use
+    return true; // block arg, ok to use
   }
 
   // Producers and consumers in the same block are easy to check.
@@ -266,8 +268,9 @@
   return success();
 }
 
-LogicalResult detail::verifyGlobalAddressOp(
-    GlobalAddressOpInterface addressOp, SymbolTableCollection &symbolTable) {
+LogicalResult
+detail::verifyGlobalAddressOp(GlobalAddressOpInterface addressOp,
+                              SymbolTableCollection &symbolTable) {
   if (!isa_and_nonnull<IREE::Util::GlobalOpInterface>(
           symbolTable.lookupNearestSymbolFrom(addressOp.getOperation(),
                                               addressOp.getGlobalAttr()))) {
@@ -328,9 +331,9 @@
   return success();
 }
 
-IREE::Util::GlobalOpInterface lookupGlobalOp(
-    Operation *accessorOp, SymbolRefAttr globalRefAttr,
-    SymbolTableCollection &symbolTable) {
+IREE::Util::GlobalOpInterface
+lookupGlobalOp(Operation *accessorOp, SymbolRefAttr globalRefAttr,
+               SymbolTableCollection &symbolTable) {
   return symbolTable.lookupNearestSymbolFrom<IREE::Util::GlobalOpInterface>(
       accessorOp->getParentOp(), globalRefAttr);
 }
@@ -339,21 +342,26 @@
 // IREE::Util::TiedOpInterface
 //===----------------------------------------------------------------------===//
 
-std::optional<unsigned> detail::getTiedResultOperandIndex(
-    Operation *op, unsigned resultIndex) {
+std::optional<unsigned>
+detail::getTiedResultOperandIndex(Operation *op, unsigned resultIndex) {
   auto storageAttr = op->getAttrOfType<ArrayAttr>(
       IREE::Util::TiedOpInterface::getStorageAttrName());
-  if (!storageAttr) return std::nullopt;
+  if (!storageAttr)
+    return std::nullopt;
   auto valueAttrs = storageAttr.getValue();
-  if (valueAttrs.empty()) return std::nullopt;
+  if (valueAttrs.empty())
+    return std::nullopt;
   if (auto tiedOp = dyn_cast<IREE::Util::TiedOpInterface>(op)) {
     auto indexAndLength = tiedOp.getTiedResultsIndexAndLength();
-    if (resultIndex < indexAndLength.first) return std::nullopt;
+    if (resultIndex < indexAndLength.first)
+      return std::nullopt;
     resultIndex -= indexAndLength.first;
-    if (resultIndex >= indexAndLength.second) return std::nullopt;
+    if (resultIndex >= indexAndLength.second)
+      return std::nullopt;
   }
   int64_t value = llvm::cast<IntegerAttr>(valueAttrs[resultIndex]).getInt();
-  if (value == IREE::Util::TiedOpInterface::kUntiedIndex) return std::nullopt;
+  if (value == IREE::Util::TiedOpInterface::kUntiedIndex)
+    return std::nullopt;
   if (auto tiedOp = dyn_cast<IREE::Util::TiedOpInterface>(op)) {
     unsigned tiedOperandsOffset = tiedOp.getTiedOperandsIndexAndLength().first;
     return tiedOperandsOffset + static_cast<unsigned>(value);
@@ -377,7 +385,8 @@
     // returned by `getTiedOperandsIndexAndLength`.
     unsigned tiedOperandsOffset = tiedOp.getTiedOperandsIndexAndLength().first;
     for (auto &index : indices) {
-      if (index != TiedOpInterface::kUntiedIndex) index -= tiedOperandsOffset;
+      if (index != TiedOpInterface::kUntiedIndex)
+        index -= tiedOperandsOffset;
     }
   }
 
@@ -390,9 +399,11 @@
   SmallVector<int64_t> indices;
   auto storageAttr =
       op->getAttrOfType<ArrayAttr>(TiedOpInterface::getStorageAttrName());
-  if (!storageAttr) return indices;
+  if (!storageAttr)
+    return indices;
   auto valueAttrs = storageAttr.getValue();
-  if (valueAttrs.empty()) return indices;
+  if (valueAttrs.empty())
+    return indices;
   auto tiedOp = cast<TiedOpInterface>(op);
   auto resultRange = tiedOp.getTiedResultsIndexAndLength();
   unsigned tiedOperandsOffset = tiedOp.getTiedOperandsIndexAndLength().first;
@@ -412,7 +423,8 @@
   while (auto definingOp =
              dyn_cast_or_null<TiedOpInterface>(baseValue.getDefiningOp())) {
     auto tiedValue = definingOp.getTiedResultOperand(baseValue);
-    if (!tiedValue) break;
+    if (!tiedValue)
+      break;
     baseValue = tiedValue;
   }
   return baseValue;
@@ -422,15 +434,18 @@
 bool TiedOpInterface::hasAnyTiedUses(Value value) {
   for (auto &use : value.getUses()) {
     auto tiedOp = dyn_cast<IREE::Util::TiedOpInterface>(use.getOwner());
-    if (!tiedOp) continue;
-    if (tiedOp.isOperandTied(use.getOperandNumber())) return true;
+    if (!tiedOp)
+      continue;
+    if (tiedOp.isOperandTied(use.getOperandNumber()))
+      return true;
   }
   return false;
 }
 
 bool detail::isOperandTied(Operation *op, unsigned operandIndex) {
   auto tiedOp = dyn_cast<TiedOpInterface>(op);
-  if (!tiedOp) return false;
+  if (!tiedOp)
+    return false;
   auto tiedIndices = tiedOp.getTiedResultOperandIndices();
   for (unsigned i = 0; i < tiedIndices.size(); ++i) {
     if (tiedIndices[i] == operandIndex) {
@@ -443,7 +458,8 @@
 SmallVector<Value> detail::getOperandTiedResults(Operation *op,
                                                  unsigned operandIndex) {
   auto tiedOp = dyn_cast<TiedOpInterface>(op);
-  if (!tiedOp) return {};
+  if (!tiedOp)
+    return {};
   auto resultRange = tiedOp.getTiedResultsIndexAndLength();
   SmallVector<Value> results;
   auto tiedIndices = tiedOp.getTiedResultOperandIndices();
@@ -457,7 +473,8 @@
 
 LogicalResult detail::verifyTiedOp(TiedOpInterface tiedOp) {
   auto tiedOperandIndices = tiedOp.getTiedResultOperandIndices();
-  if (tiedOperandIndices.empty()) return success();
+  if (tiedOperandIndices.empty())
+    return success();
   auto resultRange = tiedOp.getTiedResultsIndexAndLength();
   if (tiedOperandIndices.size() != resultRange.second) {
     return tiedOp.emitError("op results/tied operand indices mismatch");
@@ -489,7 +506,7 @@
   for (auto it : llvm::enumerate(oldTiedOperandIndices)) {
     unsigned resultIndex = it.index();
     if (llvm::is_contained(excludedResultIndices, resultIndex)) {
-      continue;  // result removed
+      continue; // result removed
     }
 
     int64_t tiedOperandIndex = it.value();
@@ -504,7 +521,8 @@
       // Count up the number of removed operands prior to this one.
       unsigned offset = 0;
       for (unsigned i = 0; i < tiedOperandIndex; ++i) {
-        if (i < excludedOperands.size() && excludedOperands[i]) ++offset;
+        if (i < excludedOperands.size() && excludedOperands[i])
+          ++offset;
       }
 
       tiedOperandIndex -= offset;
@@ -528,14 +546,16 @@
   while (!worklist.empty()) {
     auto value = worklist.pop_back_val();
     auto *definingOp = value.getDefiningOp();
-    if (!definingOp) continue;
+    if (!definingOp)
+      continue;
     if (auto sizeAwareOp =
             llvm::dyn_cast<IREE::Util::SizeAwareOpInterface>(definingOp)) {
       return sizeAwareOp.getResultSizeFromValue(value);
     }
     if (auto tiedOp = llvm::dyn_cast<IREE::Util::TiedOpInterface>(definingOp)) {
       auto tiedOperand = tiedOp.getTiedResultOperand(value);
-      if (tiedOperand) worklist.push_back(tiedOperand);
+      if (tiedOperand)
+        worklist.push_back(tiedOperand);
     }
   }
 
@@ -569,13 +589,13 @@
   auto sizeAwareType = llvm::dyn_cast<IREE::Util::SizeAwareTypeInterface>(
       resourceValue.getType());
   if (!sizeAwareType) {
-    return {};  // Not a sized type.
+    return {}; // Not a sized type.
   }
   if (!builder.getInsertionPoint().getNodePtr()->isKnownSentinel()) {
     auto sizeValue = sizeAwareType.findSizeValue(
         resourceValue, builder.getBlock(), builder.getInsertionPoint());
     if (sizeValue) {
-      return sizeValue;  // Found in IR.
+      return sizeValue; // Found in IR.
     }
   }
   // TODO(benvanik): make this cleaner.
@@ -604,12 +624,14 @@
   while (!worklist.empty()) {
     auto workValue = worklist.pop_back_val();
     auto workOp = workValue.getDefiningOp();
-    if (!workOp) continue;
+    if (!workOp)
+      continue;
     if (auto shapeAwareOp = dyn_cast<ShapeAwareOpInterface>(workOp)) {
       return shapeAwareOp.getResultDynamicDimsFromValue(workValue);
     } else if (auto tiedOp = dyn_cast<TiedOpInterface>(workOp)) {
       auto tiedValue = tiedOp.getTiedResultOperand(workValue);
-      if (tiedValue) worklist.push_back(tiedValue);
+      if (tiedValue)
+        worklist.push_back(tiedValue);
     }
   }
   return std::nullopt;
@@ -620,7 +642,8 @@
   // Look up the use-def chain: always safe, as any value we reach dominates
   // {|block|, |insertionPoint|} implicitly.
   auto upwardRange = findDynamicDims(shapedValue);
-  if (upwardRange.has_value()) return upwardRange.value();
+  if (upwardRange.has_value())
+    return upwardRange.value();
 
   // Look down the use-def chain: not safe at some point because we'll move past
   // where {|block|, |insertionPoint|} is dominated. This is often fine for a
@@ -645,10 +668,12 @@
                                    ValueRange dynamicDims) {
   auto value = values[idx];
   auto shapedType = llvm::dyn_cast<ShapedType>(value.getType());
-  if (!shapedType) return ValueRange{};
+  if (!shapedType)
+    return ValueRange{};
 
   // Bail immediately if the shape is static.
-  if (shapedType.hasStaticShape()) return ValueRange{};
+  if (shapedType.hasStaticShape())
+    return ValueRange{};
 
   // Find where the dynamic dims start in the flattened list.
   unsigned offset = 0;
@@ -742,10 +767,10 @@
   return buildShape(op.getLoc(), type, dynamicDims, builder);
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
 //===----------------------------------------------------------------------===//
 // IREE::Util::UtilDialect
@@ -753,7 +778,7 @@
 
 // clang-format off: must be included after all LLVM/MLIR headers.
 #define GET_TYPEDEF_CLASSES
-#include "iree/compiler/Dialect/Util/IR/UtilTypes.cpp.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/Util/IR/UtilTypes.cpp.inc" // IWYU pragma: keep
 // clang-format on
 
 namespace mlir {
@@ -768,11 +793,11 @@
 void UtilDialect::registerTypes() {
   addTypes<
 #define GET_TYPEDEF_LIST
-#include "iree/compiler/Dialect/Util/IR/UtilTypes.cpp.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/Util/IR/UtilTypes.cpp.inc" // IWYU pragma: keep
       >();
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilTypes.h b/compiler/src/iree/compiler/Dialect/Util/IR/UtilTypes.h
index c33e5dc..e02750e 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/UtilTypes.h
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/UtilTypes.h

@@ -116,11 +116,11 @@
 LogicalResult verifyGlobalStoreOp(GlobalStoreOpInterface storeOp,
                                   SymbolTableCollection &symbolTable);
 
-}  // namespace detail
+} // namespace detail
 
-IREE::Util::GlobalOpInterface lookupGlobalOp(
-    Operation *accessorOp, SymbolRefAttr globalRefAttr,
-    SymbolTableCollection &symbolTable);
+IREE::Util::GlobalOpInterface
+lookupGlobalOp(Operation *accessorOp, SymbolRefAttr globalRefAttr,
+               SymbolTableCollection &symbolTable);
 
 //===----------------------------------------------------------------------===//
 // Tied operand interface utilities
@@ -137,7 +137,7 @@
 SmallVector<Value> getOperandTiedResults(Operation *op, unsigned operandIndex);
 LogicalResult verifyTiedOp(TiedOpInterface tiedOp);
 
-}  // namespace detail
+} // namespace detail
 
 // Resets or removes the indices in |tiedOperandIndices| based on the given
 // exclusion lists.
@@ -226,23 +226,23 @@
   return type.getIntOrFloatBitWidth();
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#include "iree/compiler/Dialect/Util/IR/UtilAttrInterfaces.h.inc"  // IWYU pragma: export
-#include "iree/compiler/Dialect/Util/IR/UtilOpInterfaces.h.inc"  // IWYU pragma: export
-#include "iree/compiler/Dialect/Util/IR/UtilTypeInterfaces.h.inc"  // IWYU pragma: export
+#include "iree/compiler/Dialect/Util/IR/UtilAttrInterfaces.h.inc" // IWYU pragma: export
+#include "iree/compiler/Dialect/Util/IR/UtilOpInterfaces.h.inc" // IWYU pragma: export
+#include "iree/compiler/Dialect/Util/IR/UtilTypeInterfaces.h.inc" // IWYU pragma: export
 
 // clang-format off: must be included after all LLVM/MLIR headers.
 #define GET_ATTRDEF_CLASSES
-#include "iree/compiler/Dialect/Util/IR/UtilAttrs.h.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/Util/IR/UtilAttrs.h.inc" // IWYU pragma: keep
 // clang-format on
 
 // clang-format off: must be included after all LLVM/MLIR headers.
 #define GET_TYPEDEF_CLASSES
-#include "iree/compiler/Dialect/Util/IR/UtilTypes.h.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/Util/IR/UtilTypes.h.inc" // IWYU pragma: keep
 // clang-format on
 
-#endif  // IREE_COMPILER_DIALECT_UTIL_IR_UTILTYPES_H_
+#endif // IREE_COMPILER_DIALECT_UTIL_IR_UTILTYPES_H_

diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/AnnotateOpOrdinals.cpp b/compiler/src/iree/compiler/Dialect/Util/Transforms/AnnotateOpOrdinals.cpp
index 6e4e200..cc051b3 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/AnnotateOpOrdinals.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/AnnotateOpOrdinals.cpp

@@ -19,7 +19,7 @@
 
 class AnnotateOpOrdinalsPass
     : public AnnotateOpOrdinalsBase<AnnotateOpOrdinalsPass> {
- public:
+public:
   void runOnOperation() override {
     auto *context = &getContext();
     auto attrName = StringAttr::get(context, "util.ordinal");
@@ -31,13 +31,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<ModuleOp>> createAnnotateOpOrdinalsPass() {
   return std::make_unique<AnnotateOpOrdinalsPass>();
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/ApplyPatterns.cpp b/compiler/src/iree/compiler/Dialect/Util/Transforms/ApplyPatterns.cpp
index 9c05063..ebebeb2 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/ApplyPatterns.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/ApplyPatterns.cpp

@@ -22,7 +22,7 @@
 namespace Util {
 
 class ApplyPatternsPass : public ApplyPatternsBase<ApplyPatternsPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry
         .insert<BuiltinDialect, func::FuncDialect, IREE::Util::UtilDialect>();
@@ -54,7 +54,7 @@
   return std::make_unique<ApplyPatternsPass>();
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/CombineInitializers.cpp b/compiler/src/iree/compiler/Dialect/Util/Transforms/CombineInitializers.cpp
index 70c7add..d295b75 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/CombineInitializers.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/CombineInitializers.cpp

@@ -32,7 +32,7 @@
 
 class CombineInitializersPass
     : public CombineInitializersBase<CombineInitializersPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<IREE::Util::UtilDialect>();
   }
@@ -48,7 +48,8 @@
       initializerOps.push_back(initializerOp);
       locs.push_back(initializerOp.getLoc());
     }
-    if (initializerOps.size() <= 1) return;
+    if (initializerOps.size() <= 1)
+      return;
     auto fusedLoc = FusedLoc::get(&getContext(), locs);
 
     // Make the new initializer op in the same location as the last initializer
@@ -76,13 +77,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>> createCombineInitializersPass() {
   return std::make_unique<CombineInitializersPass>();
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/ConvertPrimitiveType.cpp b/compiler/src/iree/compiler/Dialect/Util/Transforms/ConvertPrimitiveType.cpp
index 6621a01..169870c 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/ConvertPrimitiveType.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/ConvertPrimitiveType.cpp

@@ -41,7 +41,8 @@
                          Location loc) {
   Type eTy = getElementTypeOrSelf(type);
   Type inputETy = getElementTypeOrSelf(inputs[0].getType());
-  if (!llvm::isa<FloatType>(getElementTypeOrSelf(type))) return nullptr;
+  if (!llvm::isa<FloatType>(getElementTypeOrSelf(type)))
+    return nullptr;
 
   if (inputETy.getIntOrFloatBitWidth() > eTy.getIntOrFloatBitWidth()) {
     return builder.create<arith::TruncFOp>(loc, type, inputs[0]);
@@ -58,7 +59,8 @@
                            Location loc) {
   Type eTy = getElementTypeOrSelf(type);
   Type inputETy = getElementTypeOrSelf(inputs[0].getType());
-  if (!llvm::isa<FloatType>(getElementTypeOrSelf(type))) return nullptr;
+  if (!llvm::isa<FloatType>(getElementTypeOrSelf(type)))
+    return nullptr;
   bool isUnsigned = eTy.isUnsignedInteger();
 
   int64_t inBitwidth = inputETy.getIntOrFloatBitWidth();
@@ -85,7 +87,8 @@
   explicit PrimitiveTypeConverter() {
     addConversion([](Type type) { return type; });
     addConversion([&](SourceType type) -> Type {
-      if (!isSourceType(type)) return type;
+      if (!isSourceType(type))
+        return type;
       return getTargetType(type);
     });
     addConversion([&](ComplexType type) {
@@ -142,9 +145,9 @@
   GenericTypeConversionPattern(MLIRContext *context,
                                TypeConverter &typeConverter)
       : ConversionPattern(typeConverter, MatchAnyOpTypeTag(), 0, context) {}
-  LogicalResult matchAndRewrite(
-      Operation *op, ArrayRef<Value> operands,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
     // Convert attributes only if this is a constant-like op.
     // This is because some ops use typed attributes for structural information
     // - like linalg ops using i64 for dimension indices - and if we converted
@@ -188,9 +191,9 @@
           typename OperandToResultWidthLegalityRelation>
 struct ConvertTypeSensitiveArithCastOp : public OpConversionPattern<OpTy> {
   using OpConversionPattern<OpTy>::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      OpTy op, typename OpTy::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(OpTy op, typename OpTy::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto resultType =
         this->getTypeConverter()->convertType(op.getResult().getType());
     auto operandType =
@@ -252,20 +255,25 @@
         return typeConverter.isLegal(globalOp.getGlobalType());
       } else if (auto funcOp = dyn_cast<func::FuncOp>(op)) {
         for (Type type : funcOp.getFunctionType().getInputs()) {
-          if (!typeConverter.isLegal(type)) return false;
+          if (!typeConverter.isLegal(type))
+            return false;
         }
         for (Type type : funcOp.getFunctionType().getResults()) {
-          if (!typeConverter.isLegal(type)) return false;
+          if (!typeConverter.isLegal(type))
+            return false;
         }
       }
       for (Type type : op->getResultTypes()) {
-        if (!typeConverter.isLegal(type)) return false;
+        if (!typeConverter.isLegal(type))
+          return false;
       }
       for (Type type : op->getOperandTypes()) {
-        if (!typeConverter.isLegal(type)) return false;
+        if (!typeConverter.isLegal(type))
+          return false;
       }
       for (auto &region : op->getRegions()) {
-        if (!typeConverter.isLegal(&region)) return false;
+        if (!typeConverter.isLegal(&region))
+          return false;
       }
       return true;
     });
@@ -279,7 +287,7 @@
 
   Converter typeConverter;
 };
-}  // namespace
+} // namespace
 
 namespace {
 struct DemoteI64ToI32Converter
@@ -292,7 +300,7 @@
 struct DemoteI64ToI32Pass
     : public ConvertTypesPass<DemoteI64ToI32Base<DemoteI64ToI32Pass>,
                               DemoteI64ToI32Converter> {};
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>> createDemoteI64ToI32Pass() {
   return std::make_unique<DemoteI64ToI32Pass>();
@@ -308,7 +316,7 @@
 struct DemoteF32ToF16Pass
     : public ConvertTypesPass<DemoteF32ToF16Base<DemoteF32ToF16Pass>,
                               DemoteF32ToF16Converter> {};
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>> createDemoteF32ToF16Pass() {
   return std::make_unique<DemoteF32ToF16Pass>();
@@ -324,7 +332,7 @@
 struct PromoteF16ToF32Pass
     : public ConvertTypesPass<PromoteF16ToF32Base<PromoteF16ToF32Pass>,
                               PromoteF16ToF32Converter> {};
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>> createPromoteF16ToF32Pass() {
   return std::make_unique<PromoteF16ToF32Pass>();
@@ -340,7 +348,7 @@
 struct PromoteBF16ToF32Pass
     : public ConvertTypesPass<PromoteBF16ToF32Base<PromoteBF16ToF32Pass>,
                               PromoteBF16ToF32Converter> {};
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>> createPromoteBF16ToF32Pass() {
   return std::make_unique<PromoteBF16ToF32Pass>();
@@ -356,13 +364,13 @@
 struct DemoteF64ToF32Pass
     : public ConvertTypesPass<DemoteF64ToF32Base<DemoteF64ToF32Pass>,
                               DemoteF64ToF32Converter> {};
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>> createDemoteF64ToF32Pass() {
   return std::make_unique<DemoteF64ToF32Pass>();
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/DropCompilerHints.cpp b/compiler/src/iree/compiler/Dialect/Util/Transforms/DropCompilerHints.cpp
index 9b9142e..b8cc3be 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/DropCompilerHints.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/DropCompilerHints.cpp

@@ -18,7 +18,7 @@
 
 class DropCompilerHintsPass
     : public DropCompilerHintsBase<DropCompilerHintsPass> {
- public:
+public:
   void runOnOperation() override {
     // We can't use patterns and applyPatternsAndFoldGreedily because that
     // automatically does canonicalization.
@@ -33,7 +33,7 @@
   return std::make_unique<DropCompilerHintsPass>();
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/FixedPointIterator.cpp b/compiler/src/iree/compiler/Dialect/Util/Transforms/FixedPointIterator.cpp
index 1c5355a..698a370 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/FixedPointIterator.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/FixedPointIterator.cpp

@@ -27,13 +27,13 @@
 // continue.
 class FixedPointIteratorPass
     : public FixedPointIteratorBase<FixedPointIteratorPass> {
- public:
+public:
   FixedPointIteratorPass() = default;
   FixedPointIteratorPass(const FixedPointIteratorPass &other)
       : FixedPointIteratorBase<FixedPointIteratorPass>(other) {}
   FixedPointIteratorPass(OpPassManager pipeline);
 
- private:
+private:
   LogicalResult initializeOptions(StringRef options) override;
   void getDependentDialects(DialectRegistry &registry) const override;
   void runOnOperation() override;
@@ -56,8 +56,10 @@
 }
 
 LogicalResult FixedPointIteratorPass::initializeOptions(StringRef options) {
-  if (failed(Pass::initializeOptions(options))) return failure();
-  if (pipeline) return success();
+  if (failed(Pass::initializeOptions(options)))
+    return failure();
+  if (pipeline)
+    return success();
 
   // Pipelines are expected to be of the form `<op-name>(<pipeline>)`.
   // TODO: This was lifted from the Inliner pass. We should provide a parse
@@ -114,14 +116,14 @@
   return signalPassFailure();
 }
 
-}  // namespace
+} // namespace
 
-std::unique_ptr<OperationPass<void>> createFixedPointIteratorPass(
-    OpPassManager pipeline) {
+std::unique_ptr<OperationPass<void>>
+createFixedPointIteratorPass(OpPassManager pipeline) {
   return std::make_unique<FixedPointIteratorPass>(std::move(pipeline));
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/FoldGlobals.cpp b/compiler/src/iree/compiler/Dialect/Util/Transforms/FoldGlobals.cpp
index be137e6..1d3980c 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/FoldGlobals.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/FoldGlobals.cpp

@@ -95,28 +95,28 @@
       auto globalName = globalOrder[i];
       auto action = fn(globalMap[globalName]);
       switch (action) {
-        case GlobalAction::PRESERVE: {
-          ++i;
-          break;
+      case GlobalAction::PRESERVE: {
+        ++i;
+        break;
+      }
+      case GlobalAction::UPDATE: {
+        didChange |= true;
+        ++i;
+        break;
+      }
+      case GlobalAction::DELETE: {
+        didChange |= true;
+        auto &global = globalMap[globalName];
+        assert(global.op.isGlobalPrivate() && "can't delete public globals");
+        assert(global.loadOps.empty() && "must not be used");
+        for (auto storeOp : global.storeOps) {
+          storeOp.erase();
         }
-        case GlobalAction::UPDATE: {
-          didChange |= true;
-          ++i;
-          break;
-        }
-        case GlobalAction::DELETE: {
-          didChange |= true;
-          auto &global = globalMap[globalName];
-          assert(global.op.isGlobalPrivate() && "can't delete public globals");
-          assert(global.loadOps.empty() && "must not be used");
-          for (auto storeOp : global.storeOps) {
-            storeOp.erase();
-          }
-          global.op.erase();
-          globalMap.erase(globalName);
-          globalOrder.erase(globalOrder.begin() + i);
-          break;
-        }
+        global.op.erase();
+        globalMap.erase(globalName);
+        globalOrder.erase(globalOrder.begin() + i);
+        break;
+      }
       }
     }
     return didChange;
@@ -140,7 +140,8 @@
 //  util.global @a = 5 : i32
 static bool inlineConstantGlobalStores(GlobalTable &globalTable) {
   return globalTable.forEach([&](Global &global) {
-    if (global.isIndirect) return GlobalAction::PRESERVE;
+    if (global.isIndirect)
+      return GlobalAction::PRESERVE;
 
     // Find the constant value used in all stores.
     // All stores must match the initial value of the global _or_ the global
@@ -164,7 +165,8 @@
         break;
       }
     }
-    if (!constantValue) return GlobalAction::PRESERVE;
+    if (!constantValue)
+      return GlobalAction::PRESERVE;
 
     // Propagate constant into the initial value. Note that there may have been
     // a previous initial value that is being replaced.
@@ -192,7 +194,8 @@
 //  util.global.mutable @chained0 : i32
 static bool renameChainedGlobals(GlobalTable &globalTable) {
   return globalTable.forEach([&](Global &global) {
-    if (!global.isCandidate()) return GlobalAction::PRESERVE;
+    if (!global.isCandidate())
+      return GlobalAction::PRESERVE;
 
     // Find the other symbol this global is chained with by looking for uniform
     // stores. Note that we don't care about initializers.
@@ -213,7 +216,8 @@
         break;
       }
     }
-    if (!aliasName) return GlobalAction::PRESERVE;
+    if (!aliasName)
+      return GlobalAction::PRESERVE;
 
     // Replace all loads from the global with the aliased global.
     auto &aliasGlobal = globalTable.globalMap[aliasName.getValue()];
@@ -245,9 +249,12 @@
 //  util.global @a = 5 : i32
 static bool updateGlobalImmutability(GlobalTable &globalTable) {
   return globalTable.forEach([&](Global &global) {
-    if (!global.isCandidate()) return GlobalAction::PRESERVE;
-    if (!global.storeOps.empty()) return GlobalAction::PRESERVE;
-    if (!global.op.isGlobalMutable()) return GlobalAction::PRESERVE;
+    if (!global.isCandidate())
+      return GlobalAction::PRESERVE;
+    if (!global.storeOps.empty())
+      return GlobalAction::PRESERVE;
+    if (!global.op.isGlobalMutable())
+      return GlobalAction::PRESERVE;
     global.op.setGlobalMutable(false);
     return GlobalAction::UPDATE;
   });
@@ -266,18 +273,24 @@
   }
   // Fallback that asks a dialect to materialize things. This may fail!
   auto *op = attr.getDialect().materializeConstant(builder, attr, type, loc);
-  if (!op) return nullptr;
+  if (!op)
+    return nullptr;
   return op->getResult(0);
 }
 
 // Inlines constant global values that are known to not change.
 static bool inlineConstantGlobalLoads(GlobalTable &globalTable) {
   return globalTable.forEach([&](Global &global) {
-    if (global.isIndirect) return GlobalAction::PRESERVE;
-    if (!global.storeOps.empty()) return GlobalAction::PRESERVE;
-    if (global.op.isGlobalMutable()) return GlobalAction::PRESERVE;
-    if (global.op->hasAttr("noinline")) return GlobalAction::PRESERVE;
-    if (!global.op.getGlobalInitialValue()) return GlobalAction::PRESERVE;
+    if (global.isIndirect)
+      return GlobalAction::PRESERVE;
+    if (!global.storeOps.empty())
+      return GlobalAction::PRESERVE;
+    if (global.op.isGlobalMutable())
+      return GlobalAction::PRESERVE;
+    if (global.op->hasAttr("noinline"))
+      return GlobalAction::PRESERVE;
+    if (!global.op.getGlobalInitialValue())
+      return GlobalAction::PRESERVE;
 
     if (llvm::isa<IREE::Util::ReferenceTypeInterface>(
             global.op.getGlobalType())) {
@@ -315,7 +328,8 @@
 // are discarded.
 static bool eraseUnusedGlobals(GlobalTable &globalTable) {
   return globalTable.forEach([&](Global &global) {
-    if (!global.isCandidate()) return GlobalAction::PRESERVE;
+    if (!global.isCandidate())
+      return GlobalAction::PRESERVE;
     if (global.loadOps.empty()) {
       // No loads; remove entirely.
       return GlobalAction::DELETE;
@@ -332,7 +346,8 @@
   DenseMap<Attribute, StringRef> leaderMap;
   for (auto globalIt : globalTable.globalMap) {
     auto &global = globalIt.getSecond();
-    if (!global.isCandidate()) continue;
+    if (!global.isCandidate())
+      continue;
     if (!global.storeOps.empty()) {
       // Stores - not eligible for deduplication.
       continue;
@@ -350,8 +365,10 @@
 
   bool didChange = false;
   for (auto it = ec.begin(), end = ec.end(); it != end; ++it) {
-    if (!it->isLeader()) continue;  // Ignore non-leader sets.
-    if (++ec.member_begin(it) == ec.member_end()) continue;
+    if (!it->isLeader())
+      continue; // Ignore non-leader sets.
+    if (++ec.member_begin(it) == ec.member_end())
+      continue;
     IREE::Util::GlobalOpInterface baseGlobalOp =
         globalTable.globalMap[it->getData()].op;
 
@@ -374,7 +391,8 @@
         baseGlobalOp.getContext(), baseGlobalOp.getGlobalName());
     for (auto mi = ec.member_begin(it); mi != ec.member_end(); ++mi) {
       Global &global = globalTable.globalMap[*mi];
-      if (global.op == baseGlobalOp) continue;
+      if (global.op == baseGlobalOp)
+        continue;
       for (auto loadOp : global.loadOps) {
         loadOp.setGlobalAttr(baseGlobalNameAttr);
       }
@@ -393,7 +411,7 @@
 }
 
 class FoldGlobalsPass : public FoldGlobalsBase<FoldGlobalsPass> {
- public:
+public:
   explicit FoldGlobalsPass() = default;
   FoldGlobalsPass(const FoldGlobalsPass &pass) {}
 
@@ -464,27 +482,28 @@
         didChange = true;
       }
 
-      if (!didChange) break;
+      if (!didChange)
+        break;
     }
 
     afterFoldingGlobals =
         count(moduleOp.getOps<IREE::Util::GlobalOpInterface>());
   }
 
- private:
+private:
   Statistic beforeFoldingGlobals{this, "global ops before folding",
                                  "Number of util.global ops before folding"};
   Statistic afterFoldingGlobals{this, "global ops after folding",
                                 "Number of util.global ops after folding"};
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>> createFoldGlobalsPass() {
   return std::make_unique<FoldGlobalsPass>();
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/FuseGlobals.cpp b/compiler/src/iree/compiler/Dialect/Util/Transforms/FuseGlobals.cpp
index 7f04208..86ffc55 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/FuseGlobals.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/FuseGlobals.cpp

@@ -100,7 +100,7 @@
 //  builtin.func @foo(%arg0: i32) {
 //    util.global.store %arg0, @fused : i32
 class FuseGlobalsPass : public FuseGlobalsBase<FuseGlobalsPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<IREE::Util::UtilDialect>();
   }
@@ -129,7 +129,8 @@
         llvm::dbgs() << ":\n";
       });
       auto *region = callableOp.getCallableRegion();
-      if (!region) continue;
+      if (!region)
+        continue;
       for (auto &block : *region) {
         DenseMap<Value, SmallVector<IREE::Util::GlobalStoreOpInterface>>
             valueStores;
@@ -141,7 +142,8 @@
             storeOp.print(llvm::dbgs(), *asmState);
             llvm::dbgs() << "; candidate=" << global.isCandidate() << "\n";
           });
-          if (!global.isCandidate()) continue;
+          if (!global.isCandidate())
+            continue;
           valueStores[storeOp.getStoredGlobalValue()].push_back(storeOp);
         }
         for (auto valueStore : valueStores) {
@@ -182,7 +184,8 @@
       llvm::BitVector tempBits = correlationBits;
       for (auto ordinal : correlationBits.set_bits()) {
         auto &otherGlobalName = globalTable.globalOrder[ordinal];
-        if (otherGlobalName == globalName) continue;
+        if (otherGlobalName == globalName)
+          continue;
         auto &otherBits = correlationMap[otherGlobalName];
         if (!otherBits.test(global.ordinal)) {
           LLVM_DEBUG(llvm::dbgs() << "Fixup: " << globalName
@@ -230,8 +233,10 @@
     // differ.
     SmallVector<SmallVector<Global *>> fusableSets;
     for (auto it = ec.begin(), end = ec.end(); it != end; ++it) {
-      if (!it->isLeader()) continue;  // Ignore non-leader sets.
-      if (++ec.member_begin(it) == ec.member_end()) continue;  // size 1
+      if (!it->isLeader())
+        continue; // Ignore non-leader sets.
+      if (++ec.member_begin(it) == ec.member_end())
+        continue; // size 1
       DenseMap<Attribute, SmallVector<Global *>> initialValueMap;
       for (auto mi = ec.member_begin(it); mi != ec.member_end(); ++mi) {
         Global &global = globalTable.globalMap[*mi];
@@ -267,7 +272,8 @@
       auto baseGlobalNameAttr = FlatSymbolRefAttr::get(
           baseGlobalOp.getContext(), baseGlobalOp.getGlobalName());
       for (auto *global : fusableSet) {
-        if (global->op == baseGlobalOp) continue;
+        if (global->op == baseGlobalOp)
+          continue;
 
         // Redirect all loads to the new fused global.
         for (auto loadOp : global->loadOps) {
@@ -285,13 +291,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>> createFuseGlobalsPass() {
   return std::make_unique<FuseGlobalsPass>();
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/HoistIntoGlobals.cpp b/compiler/src/iree/compiler/Dialect/Util/Transforms/HoistIntoGlobals.cpp
index e7af662..1c41f32 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/HoistIntoGlobals.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/HoistIntoGlobals.cpp

@@ -35,7 +35,7 @@
 // can sink globals into the program where it is profitable to reduce
 // working set size.
 class HoistIntoGlobalsPass : public HoistIntoGlobalsBase<HoistIntoGlobalsPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registerConstExprDependentDialects(registry);
   }
@@ -116,11 +116,13 @@
     return existingGlobal;
   }
 
-  void cloneProducerTreeInto(
-      OpBuilder &builder, const ConstExprAnalysis::ConstValueInfo *producerInfo,
-      HoistedValueMap &hoistedMap, IRMapping &cloneMapping,
-      const ConstExprAnalysis &constExprs) {
-    if (cloneMapping.contains(producerInfo->constValue)) return;
+  void
+  cloneProducerTreeInto(OpBuilder &builder,
+                        const ConstExprAnalysis::ConstValueInfo *producerInfo,
+                        HoistedValueMap &hoistedMap, IRMapping &cloneMapping,
+                        const ConstExprAnalysis &constExprs) {
+    if (cloneMapping.contains(producerInfo->constValue))
+      return;
 
     // We either have a global associated already or we need to traverse
     // down and materialize producers.
@@ -225,13 +227,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>> createHoistIntoGlobalsPass() {
   return std::make_unique<HoistIntoGlobalsPass>();
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/IPO.cpp b/compiler/src/iree/compiler/Dialect/Util/Transforms/IPO.cpp
index 1b561c8..196b416 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/IPO.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/IPO.cpp

@@ -165,7 +165,8 @@
 
   // Walk callee arguments.
   for (auto [i, value] : llvm::enumerate(funcOp.getArguments())) {
-    if (value.use_empty()) analysis.calleeUsedArgs.reset(i);
+    if (value.use_empty())
+      analysis.calleeUsedArgs.reset(i);
   }
 
   // Walk all return sites in the function.
@@ -277,7 +278,8 @@
     // Note that we need to track unused results as an AND such that all callers
     // need to not use them. We'll flip the bits below so that `used = true`.
     for (auto [i, value] : llvm::enumerate(callOp.getResults())) {
-      if (!value.use_empty()) callerUnusedResults.reset(i);
+      if (!value.use_empty())
+        callerUnusedResults.reset(i);
     }
   }
   if (!analysis.callOps.empty()) {
@@ -325,7 +327,8 @@
   // we know all callers will stop passing them.
   for (unsigned i = 0; i < resultCount; ++i) {
     int argIndex = analysis.passthroughResultArgs[i];
-    if (argIndex == kUnassigned) continue;
+    if (argIndex == kUnassigned)
+      continue;
     auto arg = funcOp.getArgument(argIndex);
     bool onlyReturnUsers = true;
     for (auto user : arg.getUsers()) {
@@ -457,12 +460,14 @@
   }
 
   // Early out if no changes.
-  if (deadArgs.none() && deadResults.none()) return false;
+  if (deadArgs.none() && deadResults.none())
+    return false;
 
   // Erase dead results from all return sites.
   funcOp.walk([&](func::ReturnOp returnOp) {
     for (int i = deadResults.size() - 1; i >= 0; --i) {
-      if (deadResults.test(i)) returnOp.getOperandsMutable().erase(i);
+      if (deadResults.test(i))
+        returnOp.getOperandsMutable().erase(i);
     }
   });
 
@@ -542,7 +547,8 @@
   }
 
   // Early out if no changes.
-  if (deadOperands.none() && deadResults.none()) return false;
+  if (deadOperands.none() && deadResults.none())
+    return false;
 
   // Fully replace call op because we may have changed result count.
   auto newCallOp = OpBuilder(callOp).create<func::CallOp>(
@@ -562,7 +568,7 @@
 }
 
 class IPOPass : public IPOBase<IPOPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<arith::ArithDialect>();
     registry.insert<IREE::Util::UtilDialect>();
@@ -597,7 +603,8 @@
     // Use analysis results to mutate functions.
     bool anyChanges = false;
     for (auto &analysis : analysisResults) {
-      if (analysis.isIncomplete) continue;
+      if (analysis.isIncomplete)
+        continue;
       anyChanges = applyFuncChanges(analysis, analysis.funcOp) || anyChanges;
       for (auto callOp : analysis.callOps) {
         anyChanges = applyCallChanges(analysis, callOp) || anyChanges;
@@ -612,7 +619,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>> createIPOPass() {
   return std::make_unique<IPOPass>();
@@ -620,7 +627,7 @@
 
 static PassRegistration<IPOPass> pass;
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/PassDetail.h b/compiler/src/iree/compiler/Dialect/Util/Transforms/PassDetail.h
index 8a5ae9c..965c272 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/PassDetail.h
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/PassDetail.h

@@ -17,11 +17,11 @@
 namespace Util {
 
 #define GEN_PASS_CLASSES
-#include "iree/compiler/Dialect/Util/Transforms/Passes.h.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/Util/Transforms/Passes.h.inc" // IWYU pragma: keep
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_UTIL_TRANSFORMS_PASS_DETAIL_H_
+#endif // IREE_COMPILER_DIALECT_UTIL_TRANSFORMS_PASS_DETAIL_H_

diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/Passes.cpp b/compiler/src/iree/compiler/Dialect/Util/Transforms/Passes.cpp
index 40d0d4e..3451375 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/Passes.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/Passes.cpp

@@ -13,15 +13,15 @@
 
 namespace {
 #define GEN_PASS_REGISTRATION
-#include "iree/compiler/Dialect/Util/Transforms/Passes.h.inc"  // IWYU pragma: export
-}  // namespace
+#include "iree/compiler/Dialect/Util/Transforms/Passes.h.inc" // IWYU pragma: export
+} // namespace
 
 void registerTransformPasses() {
   // Generated.
   registerPasses();
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/Passes.h b/compiler/src/iree/compiler/Dialect/Util/Transforms/Passes.h
index 1009279..39c219f 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/Passes.h
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/Passes.h

@@ -19,8 +19,8 @@
 std::unique_ptr<OperationPass<void>> createApplyPatternsPass();
 std::unique_ptr<OperationPass<mlir::ModuleOp>> createCombineInitializersPass();
 std::unique_ptr<OperationPass<void>> createDropCompilerHintsPass();
-std::unique_ptr<OperationPass<void>> createFixedPointIteratorPass(
-    OpPassManager pipeline);
+std::unique_ptr<OperationPass<void>>
+createFixedPointIteratorPass(OpPassManager pipeline);
 std::unique_ptr<OperationPass<mlir::ModuleOp>> createFoldGlobalsPass();
 std::unique_ptr<OperationPass<mlir::ModuleOp>> createFuseGlobalsPass();
 std::unique_ptr<OperationPass<mlir::ModuleOp>> createHoistIntoGlobalsPass();
@@ -44,9 +44,9 @@
 // Register all Passes
 void registerTransformPasses();
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_IREE_TRANSFORMS_PASSES_H_
+#endif // IREE_COMPILER_DIALECT_IREE_TRANSFORMS_PASSES_H_

diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/Patterns.cpp b/compiler/src/iree/compiler/Dialect/Util/Transforms/Patterns.cpp
index dd3f715..2ada301 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/Patterns.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/Patterns.cpp

@@ -46,9 +46,11 @@
   using OpInterfaceRewritePattern::OpInterfaceRewritePattern;
   LogicalResult matchAndRewrite(CallableOpInterface op,
                                 PatternRewriter &rewriter) const override {
-    if (!op.getCallableRegion()) return failure();
+    if (!op.getCallableRegion())
+      return failure();
     auto &region = *op.getCallableRegion();
-    if (region.empty()) return failure();
+    if (region.empty())
+      return failure();
 
     // Analyze all branches in the op to compute the information we'll need to
     // analyze across branch sources.
@@ -86,7 +88,7 @@
       }
     }
     if (!hasAnyDupes) {
-      return failure();  // no dupes at all
+      return failure(); // no dupes at all
     }
 
     rewriter.startRootUpdate(op);
@@ -97,9 +99,11 @@
     for (auto &block : llvm::make_range(++region.getBlocks().begin(),
                                         region.getBlocks().end())) {
       unsigned numArgs = block.getNumArguments();
-      if (numArgs == 0) continue;
+      if (numArgs == 0)
+        continue;
       auto blockSources = llvm::ArrayRef(blockSourceMap[&block]);
-      if (blockSources.size() == 0) continue;
+      if (blockSources.size() == 0)
+        continue;
 
       // Which args we'll end up erasing.
       // We need to do the actual removal after we've done the remapping below
@@ -110,15 +114,15 @@
       // See if each block argument is foldable across all block sources.
       // In order to fold we need each source to share some duplicates but note
       // that the sources may not have identical sets.
-      llvm::BitVector sameValues(numArgs);    // reused
-      llvm::BitVector sourceValues(numArgs);  // reused
+      llvm::BitVector sameValues(numArgs);   // reused
+      llvm::BitVector sourceValues(numArgs); // reused
       for (unsigned argIndex = 0; argIndex < numArgs; ++argIndex) {
         // Each bit represents an argument that duplicates the arg at argIndex.
         // We walk all the sources and AND their masks together to get the safe
         // set of duplicate operands.
         // Example for %0: (%a, %b, %a) -> b001
         // Example for %1: (%a, %b, %a) -> b000
-        sameValues.set();  // note reused
+        sameValues.set(); // note reused
         for (auto &blockSource : blockSources) {
           sourceValues.reset();
           for (auto mit = blockSource.duplicates.findLeader(argIndex);
@@ -128,7 +132,7 @@
           sameValues &= sourceValues;
         }
         if (sameValues.none()) {
-          continue;  // arg unused/not duplicated
+          continue; // arg unused/not duplicated
         }
 
         // Remove the base argument from the set so we don't erase it and can
@@ -187,9 +191,11 @@
   using OpInterfaceRewritePattern::OpInterfaceRewritePattern;
   LogicalResult matchAndRewrite(CallableOpInterface op,
                                 PatternRewriter &rewriter) const override {
-    if (!op.getCallableRegion()) return failure();
+    if (!op.getCallableRegion())
+      return failure();
     auto &region = *op.getCallableRegion();
-    if (region.empty()) return failure();
+    if (region.empty())
+      return failure();
     DominanceInfo dominance(op);
 
     // Analyze all branches to build a map of blocks to their sources.
@@ -220,9 +226,11 @@
     for (auto &block : llvm::make_range(++region.getBlocks().begin(),
                                         region.getBlocks().end())) {
       unsigned numArgs = block.getNumArguments();
-      if (numArgs == 0) continue;
+      if (numArgs == 0)
+        continue;
       auto blockSources = llvm::ArrayRef(blockSourceMap[&block]);
-      if (blockSources.size() == 0) continue;
+      if (blockSources.size() == 0)
+        continue;
 
       // Which args we'll end up erasing.
       // We need to do the actual removal after we've done the remapping below
@@ -262,7 +270,8 @@
           uniformValue = nullptr;
           break;
         }
-        if (!uniformValue) continue;
+        if (!uniformValue)
+          continue;
 
         // See if the uniform value dominates this block; if so we can use it.
         if (!uniformValue.getDefiningOp() ||
@@ -273,7 +282,8 @@
           elidedArgs.set(argIndex);
         }
       }
-      if (elidedArgs.none()) continue;
+      if (elidedArgs.none())
+        continue;
 
       // Erase all the block arguments we remapped.
       for (auto &blockSource : blockSources) {
@@ -300,7 +310,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateCommonPatterns(MLIRContext *context, RewritePatternSet &patterns) {
   context->getOrLoadDialect<IREE::Util::UtilDialect>()
@@ -311,7 +321,7 @@
       context);
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/Patterns.h b/compiler/src/iree/compiler/Dialect/Util/Transforms/Patterns.h
index 6669e36..6b88ac7 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/Patterns.h
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/Patterns.h

@@ -19,9 +19,9 @@
 // upstreamed after some more exhaustive investigation.
 void populateCommonPatterns(MLIRContext *context, RewritePatternSet &patterns);
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_IREE_TRANSFORMS_PATTERNS_H_
+#endif // IREE_COMPILER_DIALECT_IREE_TRANSFORMS_PATTERNS_H_

diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/PropagateSubranges.cpp b/compiler/src/iree/compiler/Dialect/Util/Transforms/PropagateSubranges.cpp
index cc84e2e..7930337 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/PropagateSubranges.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/PropagateSubranges.cpp

@@ -63,7 +63,8 @@
   // Gather all of the resource globals in the root.
   for (auto &region : rootOp->getRegions()) {
     for (auto globalOp : region.getOps<IREE::Util::GlobalOp>()) {
-      if (!isResourceType(globalOp.getType())) continue;
+      if (!isResourceType(globalOp.getType()))
+        continue;
       expandedGlobals[globalOp.getName()].resourceOp = globalOp;
     }
   }
@@ -117,16 +118,17 @@
 // Expands resources in the given |types| list to (resource, size, offset, len).
 // This could be changed to some iterator magic to avoid the alloc.
 static SmallVector<Type> expandTypes(TypeRange types) {
-  if (types.empty()) return {};
+  if (types.empty())
+    return {};
   auto indexType = IndexType::get(types.front().getContext());
   SmallVector<Type> newTypes;
   newTypes.reserve(types.size() * 2);
   for (auto type : types) {
     newTypes.push_back(type);
     if (isResourceType(type)) {
-      newTypes.push_back(indexType);  // resource size
-      newTypes.push_back(indexType);  // subrange offset
-      newTypes.push_back(indexType);  // subrange length
+      newTypes.push_back(indexType); // resource size
+      newTypes.push_back(indexType); // subrange offset
+      newTypes.push_back(indexType); // subrange length
     }
   }
   return newTypes;
@@ -210,19 +212,23 @@
 static void expandRegion(Region &region, bool canModifyEntryBlock,
                          ExpandedGlobalMap &globalMap, IndexSet &indexSet,
                          SubrangeMap subrangeMap) {
-  if (region.empty()) return;
+  if (region.empty())
+    return;
 
   // Update all block arguments.
   auto indexType = IndexType::get(region.getContext());
   for (auto &block : region.getBlocks()) {
-    if (!llvm::any_of(block.getArgumentTypes(), isResourceType)) continue;
-    if (block.isEntryBlock() && !canModifyEntryBlock) continue;
+    if (!llvm::any_of(block.getArgumentTypes(), isResourceType))
+      continue;
+    if (block.isEntryBlock() && !canModifyEntryBlock)
+      continue;
 
     // Insert and build a list of expanded (resource, size, offset) tuples.
     SmallVector<Subrange> expansions;
     for (int i = block.getNumArguments() - 1; i >= 0; --i) {
       auto arg = block.getArgument(i);
-      if (!isResourceType(arg.getType())) continue;
+      if (!isResourceType(arg.getType()))
+        continue;
       Subrange subrange;
       subrange.resource = arg;
       subrange.resourceSize =
@@ -280,8 +286,10 @@
   // Ignore ops that are already in the map (we likely inserted them ourselves
   // earlier).
   auto resultResource = op.getSubrangeResult();
-  if (!resultResource) return;
-  if (subrangeMap.count(resultResource)) return;
+  if (!resultResource)
+    return;
+  if (subrangeMap.count(resultResource))
+    return;
 
   // Get the subrange of the source resource which we should have by way of the
   // other insertions (func/block args, etc).
@@ -289,7 +297,8 @@
   builder.setInsertionPointAfter(op);
   auto sourceSubrange = consumeSubrange(op.getLoc(), op.getSubrangeResource(),
                                         subrangeMap, indexSet, builder);
-  if (op.getSubrangeResource() == sourceSubrange.resource) return;
+  if (op.getSubrangeResource() == sourceSubrange.resource)
+    return;
 
   // Update the subrange in the map by adding the source offset and the local
   // offset from the op. Future ops that consume subranges will reference back
@@ -318,7 +327,8 @@
 static void expandGlobalLoadOp(IREE::Util::GlobalLoadOp op,
                                ExpandedGlobalMap &globalMap, IndexSet &indexSet,
                                SubrangeMap &subrangeMap) {
-  if (!usesResources(op)) return;
+  if (!usesResources(op))
+    return;
   OpBuilder builder(op);
   builder.setInsertionPointAfter(op);
   auto indexType = builder.getIndexType();
@@ -362,7 +372,8 @@
 static void expandGlobalStoreOp(IREE::Util::GlobalStoreOp op,
                                 ExpandedGlobalMap &globalMap,
                                 IndexSet &indexSet, SubrangeMap &subrangeMap) {
-  if (!usesResources(op)) return;
+  if (!usesResources(op))
+    return;
   OpBuilder builder(op);
   builder.setInsertionPointAfter(op);
   auto subrange = consumeSubrange(op.getLoc(), op.getValue(), subrangeMap,
@@ -393,10 +404,12 @@
 // external and resolved later on. We can't modify their signatures.
 static bool isPublicOrExternal(CallableOpInterface callableOp) {
   if (auto symbolOp = dyn_cast<SymbolOpInterface>(callableOp.getOperation())) {
-    if (symbolOp.isPublic()) return true;
+    if (symbolOp.isPublic())
+      return true;
   }
   auto *region = callableOp.getCallableRegion();
-  if (!region || region->empty()) return true;
+  if (!region || region->empty())
+    return true;
   return false;
 }
 
@@ -445,12 +458,14 @@
 //  %2 = stream.resource.subview %r[%ro] : {%rsz} -> {%rl}
 static void expandCallOp(mlir::func::CallOp op, IndexSet &indexSet,
                          SubrangeMap &subrangeMap) {
-  if (!usesResources(op)) return;
+  if (!usesResources(op))
+    return;
 
   // Ignore calls to public/external functions.
   auto calleeOp = SymbolTable::lookupNearestSymbolFrom<CallableOpInterface>(
       op, op.getCalleeAttr());
-  if (isPublicOrExternal(calleeOp)) return;
+  if (isPublicOrExternal(calleeOp))
+    return;
 
   // Build the new call op with expanded operands and results.
   OpBuilder builder(op);
@@ -498,8 +513,10 @@
 //  return %0, %sz, %o, %l
 static void expandReturnOp(mlir::func::ReturnOp op, IndexSet &indexSet,
                            SubrangeMap &subrangeMap) {
-  if (!usesResources(op)) return;
-  if (isPublicOrExternal(op->getParentOfType<mlir::func::FuncOp>())) return;
+  if (!usesResources(op))
+    return;
+  if (isPublicOrExternal(op->getParentOfType<mlir::func::FuncOp>()))
+    return;
   OpBuilder builder(op);
   auto operands = expandOperands(op.getLoc(), op.getOperands(), subrangeMap,
                                  indexSet, builder);
@@ -529,7 +546,8 @@
 
 static void expandCondBranchOp(mlir::cf::CondBranchOp op, IndexSet &indexSet,
                                SubrangeMap &subrangeMap) {
-  if (!usesResources(op)) return;
+  if (!usesResources(op))
+    return;
   OpBuilder builder(op);
   builder.create<mlir::cf::CondBranchOp>(
       op.getLoc(), op.getCondition(), op.getTrueDest(),
@@ -599,7 +617,7 @@
 // until cleanup.
 class PropagateSubrangesPass
     : public PropagateSubrangesBase<PropagateSubrangesPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<mlir::arith::ArithDialect>();
     registry.insert<mlir::func::FuncDialect>();
@@ -621,7 +639,8 @@
       // NOTE: the callable may be empty (like when an extern) - we still want
       // to process it but don't need an IndexSet.
       auto *region = callableOp.getCallableRegion();
-      if (!region || region->empty()) continue;
+      if (!region || region->empty())
+        continue;
       IndexSet indexSet(callableOp.getLoc(),
                         OpBuilder::atBlockBegin(&region->front()));
       SubrangeMap subrangeMap;
@@ -630,13 +649,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>> createPropagateSubrangesPass() {
   return std::make_unique<PropagateSubrangesPass>();
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/SimplifyGlobalAccesses.cpp b/compiler/src/iree/compiler/Dialect/Util/Transforms/SimplifyGlobalAccesses.cpp
index 6ee6e0f..563425e 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/SimplifyGlobalAccesses.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/SimplifyGlobalAccesses.cpp

@@ -51,7 +51,8 @@
     auto ops =
         llvm::to_vector<8>(block.getOps<IREE::Util::GlobalLoadOpInterface>());
     for (auto &op : ops) {
-      if (!immutableGlobals.contains(op.getGlobalName())) continue;
+      if (!immutableGlobals.contains(op.getGlobalName()))
+        continue;
       auto globalRef = llvm::cast<Attribute>(op.getGlobalAttr());
       auto it = loadOps.find(globalRef);
       if (it == loadOps.end()) {
@@ -88,22 +89,25 @@
 
 static void moveOpUpInBlock(Block &block, Operation *op) {
   while (op->getPrevNode()) {
-    if (doesOpBlockMotion(op->getPrevNode())) break;
+    if (doesOpBlockMotion(op->getPrevNode()))
+      break;
     op->moveBefore(op->getPrevNode());
   }
 }
 
 static void moveOpDownInBlock(Block &block, Operation *op) {
   while (op->getNextNode()) {
-    if (doesOpBlockMotion(op->getNextNode())) break;
+    if (doesOpBlockMotion(op->getNextNode()))
+      break;
     op->moveAfter(op->getNextNode());
   }
 }
 
 // Optimizes the load/store ops for each given bucket.
 // Returns true if any op was removed.
-static bool optimizeBuckets(
-    Block &block, std::map<StringRef, SmallVector<Operation *>> &buckets) {
+static bool
+optimizeBuckets(Block &block,
+                std::map<StringRef, SmallVector<Operation *>> &buckets) {
   bool didRemoveAny = false;
   for (auto &bucket : buckets) {
     // First perform basic load-store forwarding and such.
@@ -152,7 +156,8 @@
         didRemoveAny = true;
       }
     }
-    if (ops.empty()) continue;
+    if (ops.empty())
+      continue;
 
     if (auto loadOp =
             dyn_cast<IREE::Util::GlobalLoadOpInterface>(ops.front())) {
@@ -192,8 +197,9 @@
 //     if (tail == store) move store to back
 //
 // Returns true if there were any removals and the block should be reprocessed.
-static bool rearrangeBlockGlobalAccesses(
-    Block &block, DenseSet<StringRef> &immutableGlobals) {
+static bool
+rearrangeBlockGlobalAccesses(Block &block,
+                             DenseSet<StringRef> &immutableGlobals) {
   // Gather sequences of operations that are safe to reorder.
   // Certain ops - like calls/barriers/etc - prevent us from moving any
   // global operations across them.
@@ -203,7 +209,7 @@
   // not be needed but the global count is low and it's nice to not care about
   // op order issues.
   SmallVector<std::map<StringRef, SmallVector<Operation *>>> sequencedBuckets;
-  sequencedBuckets.push_back({});  // Start in a sequence.
+  sequencedBuckets.push_back({}); // Start in a sequence.
   block.walk([&](Operation *op) {
     auto &buckets = sequencedBuckets.back();
     if (auto loadOp = dyn_cast<IREE::Util::GlobalLoadOpInterface>(op)) {
@@ -232,7 +238,7 @@
 
 class SimplifyGlobalAccessesPass
     : public SimplifyGlobalAccessesBase<SimplifyGlobalAccessesPass> {
- public:
+public:
   void runOnOperation() override {
     auto callableOp = getOperation();
     if (!callableOp.getCallableRegion() ||
@@ -285,13 +291,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<void>> createSimplifyGlobalAccessesPass() {
   return std::make_unique<SimplifyGlobalAccessesPass>();
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/StripDebugOps.cpp b/compiler/src/iree/compiler/Dialect/Util/Transforms/StripDebugOps.cpp
index b180bed..86b2a0b 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/StripDebugOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/StripDebugOps.cpp

@@ -21,7 +21,7 @@
 namespace {
 
 class StripDebugOpsPass : public StripDebugOpsBase<StripDebugOpsPass> {
- public:
+public:
   void runOnOperation() override {
     getOperation()->walk([](Operation *op) {
       if (isa<mlir::cf::AssertOp>(op) ||
@@ -32,13 +32,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<void>> createStripDebugOpsPass() {
   return std::make_unique<StripDebugOpsPass>();
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/TestConversion.cpp b/compiler/src/iree/compiler/Dialect/Util/Transforms/TestConversion.cpp
index 7ec64c0..b7707af 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/TestConversion.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/TestConversion.cpp

@@ -25,7 +25,7 @@
 namespace {
 
 class TestConversionPass : public TestConversionBase<TestConversionPass> {
- public:
+public:
   TestConversionPass() = default;
   TestConversionPass(const TestConversionPass &) {}
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -74,13 +74,13 @@
       llvm::cl::desc("Tests type conversion by widening integers to i32")};
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<ModuleOp>> createTestConversionPass() {
   return std::make_unique<TestConversionPass>();
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/TestFloatRangeAnalysis.cpp b/compiler/src/iree/compiler/Dialect/Util/Transforms/TestFloatRangeAnalysis.cpp
index e4d53b9..03fb88a 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/TestFloatRangeAnalysis.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/TestFloatRangeAnalysis.cpp

@@ -20,7 +20,7 @@
 
 class TestFloatRangeAnalysisPass
     : public TestFloatRangeAnalysisBase<TestFloatRangeAnalysisPass> {
- public:
+public:
   void runOnOperation() override {
     Explorer explorer(getOperation(), TraversalAction::SHALLOW);
     llvm::BumpPtrAllocator allocator;
@@ -47,21 +47,20 @@
 
     // Update.
     for (auto &it : queryOps) {
-      it.first->setAttr(
-          "analysis",
-          StringAttr::get(&getContext(),
-                          it.second->getAsStr(solver.getAsmState())));
+      it.first->setAttr("analysis", StringAttr::get(&getContext(),
+                                                    it.second->getAsStr(
+                                                        solver.getAsmState())));
     }
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<void>> createTestFloatRangeAnalysisPass() {
   return std::make_unique<TestFloatRangeAnalysisPass>();
 }
 
-}  // namespace Util
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Util
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Analysis/RegisterAllocation.cpp b/compiler/src/iree/compiler/Dialect/VM/Analysis/RegisterAllocation.cpp
index 8857527..adf0a33 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Analysis/RegisterAllocation.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Analysis/RegisterAllocation.cpp

@@ -49,7 +49,8 @@
                           getStrArrayAttr(builder, blockRegStrs));
 
     for (auto &op : block.getOperations()) {
-      if (op.getNumResults() == 0) continue;
+      if (op.getNumResults() == 0)
+        continue;
       SmallVector<std::string, 8> regStrs;
       regStrs.reserve(op.getNumResults());
       for (auto result : op.getResults()) {
@@ -174,8 +175,8 @@
 // all of the following blocks are dominated only by blocks that have come
 // before them in the list. This ensures that we always know all registers for
 // block live-in values as we walk the blocks.
-static SmallVector<Block *, 8> sortBlocksInDominanceOrder(
-    IREE::VM::FuncOp funcOp) {
+static SmallVector<Block *, 8>
+sortBlocksInDominanceOrder(IREE::VM::FuncOp funcOp) {
   if (funcOp.getBlocks().size() == 1) {
     // Dominance info cannot be computed for regions with one block.
     return {&funcOp.getBlocks().front()};
@@ -188,7 +189,8 @@
   }
   llvm::SmallSetVector<Block *, 8> markedBlocks;
   std::function<void(Block *)> visit = [&](Block *block) {
-    if (markedBlocks.count(block) > 0) return;
+    if (markedBlocks.count(block) > 0)
+      return;
     for (auto *childBlock : dominanceInfo.getNode(block)->children()) {
       visit(childBlock->getBlock());
     }
@@ -410,15 +412,18 @@
       SmallVector<FASEdge> outEdges;
       outEdges.reserve(node->outdegree);
       for (auto &edge : edges) {
-        if (edge.sink == node) inEdges.push_back(edge);
-        if (edge.source == node) outEdges.push_back(edge);
+        if (edge.sink == node)
+          inEdges.push_back(edge);
+        if (edge.source == node)
+          outEdges.push_back(edge);
       }
       bool collectInEdges = node->indegree <= node->outdegree;
       bool collectOutEdges = !collectInEdges;
 
       SmallVector<Edge> results;
       for (auto &edge : inEdges) {
-        if (edge.source == node) continue;
+        if (edge.source == node)
+          continue;
         if (collectInEdges) {
           results.push_back({edge.source->id, edge.sink->id});
         }
@@ -428,7 +433,8 @@
         assignBucket(edge.source);
       }
       for (auto &edge : outEdges) {
-        if (edge.sink == node) continue;
+        if (edge.sink == node)
+          continue;
         if (collectOutEdges) {
           results.push_back({edge.source->id, edge.sink->id});
         }
@@ -454,9 +460,11 @@
         ends.erase(ends.begin());
         removeNode(node);
       }
-      if (remainingNodes.empty()) break;
+      if (remainingNodes.empty())
+        break;
       for (ssize_t i = buckets.size() - 1; i >= 0; --i) {
-        if (buckets[i].empty()) continue;
+        if (buckets[i].empty())
+          continue;
         auto *bucket = buckets[i].front();
         buckets[i].erase(buckets[i].begin());
         auto feedbackEdges = removeNode(bucket);
@@ -486,9 +494,11 @@
     llvm::SmallSetVector<NodeID, 8> unmarkedNodes = acyclicNodes;
     llvm::SmallSetVector<NodeID, 8> markedNodes;
     std::function<void(NodeID)> visit = [&](NodeID node) {
-      if (markedNodes.count(node) > 0) return;
+      if (markedNodes.count(node) > 0)
+        return;
       for (auto &edge : acyclicEdges) {
-        if (edge.first != node) continue;
+        if (edge.first != node)
+          continue;
         visit(edge.second);
       }
       markedNodes.insert(node);
@@ -498,7 +508,8 @@
     }
     for (auto node : markedNodes.takeVector()) {
       for (auto &edge : acyclicEdges) {
-        if (edge.first != node) continue;
+        if (edge.first != node)
+          continue;
         result.acyclicEdges.push_back({edge.first, edge.second});
       }
     }
@@ -578,5 +589,5 @@
   return feedbackArcSet.acyclicEdges;
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Analysis/RegisterAllocation.h b/compiler/src/iree/compiler/Dialect/VM/Analysis/RegisterAllocation.h
index db6f2b6..8dab791 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Analysis/RegisterAllocation.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Analysis/RegisterAllocation.h

@@ -32,7 +32,7 @@
 // the receiving op/call. This allows reference count increment elision, though
 // the VM is free to ignore this if it so chooses.
 class Register {
- public:
+public:
   static constexpr int kInt32RegisterCount = 0x7FFF;
   static constexpr int kRefRegisterCount = 0x3FFF;
   static constexpr uint16_t kRefTypeBit = 0x8000;
@@ -68,13 +68,8 @@
   }
 
   Register()
-      : null_(1),
-        tombstone_(0),
-        isRef_(0),
-        isMove_(0),
-        byteWidth_(0),
-        reserved_(0),
-        ordinal_(0) {}
+      : null_(1), tombstone_(0), isRef_(0), isMove_(0), byteWidth_(0),
+        reserved_(0), ordinal_(0) {}
 
   // Returns the register without any usage-specific bits set (such as move).
   Register asBaseRegister() const {
@@ -142,19 +137,15 @@
   }
   bool operator!=(const Register &other) const { return !(*this == other); }
 
- private:
+private:
   Register(bool isRef, bool isMove, size_t byteWidth, int ordinal)
-      : null_(0),
-        tombstone_(0),
-        isRef_(isRef),
-        isMove_(isMove),
-        byteWidth_(byteWidth),
-        ordinal_(ordinal) {}
+      : null_(0), tombstone_(0), isRef_(isRef), isMove_(isMove),
+        byteWidth_(byteWidth), ordinal_(ordinal) {}
 
   union {
     struct {
-      uint16_t null_ : 1;  // 1 if the register is indicating an empty value
-      uint16_t tombstone_ : 1;  // 1 if a DenseMap tombstone value
+      uint16_t null_ : 1;      // 1 if the register is indicating an empty value
+      uint16_t tombstone_ : 1; // 1 if a DenseMap tombstone value
       uint16_t isRef_ : 1;
       uint16_t isMove_ : 1;
       uint16_t byteWidth_ : 8;
@@ -169,7 +160,7 @@
 // its children. Once calculated value usages can be mapped to VM register
 // reference bytes.
 class RegisterAllocation {
- public:
+public:
   // Annotates the IR with the register mappings. This is only required if the
   // register mappings are interesting to persist beyond just encoding, such as
   // in tests where we want to compare values.
@@ -207,10 +198,10 @@
   // Remaps branch successor operands to the target block argument registers.
   // Returns a list of source to target register mappings. Source ref registers
   // may have their move bit set.
-  SmallVector<std::pair<Register, Register>, 8> remapSuccessorRegisters(
-      Operation *op, int successorIndex);
+  SmallVector<std::pair<Register, Register>, 8>
+  remapSuccessorRegisters(Operation *op, int successorIndex);
 
- private:
+private:
   int maxI32RegisterOrdinal_ = -1;
   int maxRefRegisterOrdinal_ = -1;
   int scratchI32RegisterCount_ = 0;
@@ -223,8 +214,8 @@
   llvm::DenseMap<Value, Register> map_;
 };
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
 namespace llvm {
 template <>
@@ -245,6 +236,6 @@
     return lhs == rhs;
   }
 };
-}  // namespace llvm
+} // namespace llvm
 
-#endif  // IREE_COMPILER_DIALECT_VM_ANALYSIS_REGISTERALLOCATION_H_
+#endif // IREE_COMPILER_DIALECT_VM_ANALYSIS_REGISTERALLOCATION_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/Analysis/RegisterAllocationTest.cpp b/compiler/src/iree/compiler/Dialect/VM/Analysis/RegisterAllocationTest.cpp
index e2f5e94..8f7ca3e 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Analysis/RegisterAllocationTest.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Analysis/RegisterAllocationTest.cpp

@@ -15,7 +15,7 @@
 class RegisterAllocationTestPass
     : public PassWrapper<RegisterAllocationTestPass,
                          OperationPass<IREE::VM::FuncOp>> {
- public:
+public:
   StringRef getArgument() const override {
     return "test-iree-vm-register-allocation";
   }
@@ -37,10 +37,10 @@
 createRegisterAllocationTestPass() {
   return std::make_unique<RegisterAllocationTestPass>();
 }
-}  // namespace VM
-}  // namespace IREE
+} // namespace VM
+} // namespace IREE
 
 static PassRegistration<RegisterAllocationTestPass> pass;
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Analysis/TestPasses.h b/compiler/src/iree/compiler/Dialect/VM/Analysis/TestPasses.h
index 872160c..c5afcc0 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Analysis/TestPasses.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Analysis/TestPasses.h

@@ -36,9 +36,9 @@
   createRegisterAllocationTestPass();
 }
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_ANALYSIS_TESTPASSES_H_
\ No newline at end of file
+#endif // IREE_COMPILER_DIALECT_VM_ANALYSIS_TESTPASSES_H_
\ No newline at end of file

diff --git a/compiler/src/iree/compiler/Dialect/VM/Analysis/ValueLiveness.cpp b/compiler/src/iree/compiler/Dialect/VM/Analysis/ValueLiveness.cpp
index 9e1ab3e..cb7040d 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Analysis/ValueLiveness.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Analysis/ValueLiveness.cpp

@@ -81,7 +81,7 @@
       }
 
       int equalsIndex = str.find(" =");
-      if (equalsIndex != std::string::npos) {  // heh
+      if (equalsIndex != std::string::npos) { // heh
         auto results = str.substr(0, equalsIndex);
         valueNames.push_back(builder.getStringAttr(results));
       } else {
@@ -154,8 +154,8 @@
   }
 }
 
-LogicalResult ValueLiveness::computeInitialLivenessSets(
-    IREE::VM::FuncOp funcOp) {
+LogicalResult
+ValueLiveness::computeInitialLivenessSets(IREE::VM::FuncOp funcOp) {
   for (auto &block : funcOp.getBlocks()) {
     auto &blockSets = blockLiveness_[&block];
 
@@ -261,11 +261,14 @@
 
     // Handle values entering the block and dying within.
     for (auto value : blockSets.liveIn) {
-      if (blockSets.liveOut.count(value)) continue;
+      if (blockSets.liveOut.count(value))
+        continue;
       Operation *lastUse = &block.front();
       for (auto &use : value.getUses()) {
-        if (use.getOwner()->getBlock() != &block) continue;
-        if (lastUse == use.getOwner()) continue;
+        if (use.getOwner()->getBlock() != &block)
+          continue;
+        if (lastUse == use.getOwner())
+          continue;
         if (lastUse->isBeforeInBlock(use.getOwner())) {
           lastUse = use.getOwner();
         }
@@ -275,12 +278,14 @@
 
     // Handle values defined within the block and not escaping.
     for (auto value : blockSets.defined) {
-      if (blockSets.liveOut.count(value)) continue;
+      if (blockSets.liveOut.count(value))
+        continue;
       Operation *firstUse =
           value.getDefiningOp() ? value.getDefiningOp() : &block.front();
       Operation *lastUse = firstUse;
       for (auto &use : value.getUses()) {
-        if (use.getOwner()->getBlock() != &block) continue;
+        if (use.getOwner()->getBlock() != &block)
+          continue;
         if (lastUse->isBeforeInBlock(use.getOwner())) {
           lastUse = use.getOwner();
         }
@@ -329,5 +334,5 @@
   return false;
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Analysis/ValueLiveness.h b/compiler/src/iree/compiler/Dialect/VM/Analysis/ValueLiveness.h
index 3573fac..23f8b65 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Analysis/ValueLiveness.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Analysis/ValueLiveness.h

@@ -23,7 +23,7 @@
 // These live ranges can be queried for information such as whether two values
 // interfere or when a value is no longer live.
 class ValueLiveness {
- public:
+public:
   // Annotates the IR with the liveness information. This is only required if
   // the liveness information (block in/out, intervals, etc) are interesting to
   // persist beyond just encoding, such as in tests where we want to compare
@@ -51,7 +51,7 @@
   // value.
   bool isLastValueUse(Value value, Operation *useOp, int operandIndex);
 
- private:
+private:
   // Produces an op ordering for the entire function.
   // The ordering is only useful for computing bitmap ordinals as the CFG is not
   // sorted in any defined order (don't rely on op A < op B meaning that A is
@@ -97,7 +97,7 @@
   DenseMap<Value, llvm::BitVector> liveRanges_;
 };
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_ANALYSIS_VALUELIVENESS_H_
+#endif // IREE_COMPILER_DIALECT_VM_ANALYSIS_VALUELIVENESS_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/Analysis/ValueLivenessTest.cpp b/compiler/src/iree/compiler/Dialect/VM/Analysis/ValueLivenessTest.cpp
index 7b47ac1..d406000 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Analysis/ValueLivenessTest.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Analysis/ValueLivenessTest.cpp

@@ -15,7 +15,7 @@
 class ValueLivenessTestPass
     : public PassWrapper<ValueLivenessTestPass,
                          OperationPass<IREE::VM::FuncOp>> {
- public:
+public:
   StringRef getArgument() const override {
     return "test-iree-vm-value-liveness";
   }
@@ -36,10 +36,10 @@
 std::unique_ptr<OperationPass<IREE::VM::FuncOp>> createValueLivenessTestPass() {
   return std::make_unique<ValueLivenessTestPass>();
 }
-}  // namespace VM
-}  // namespace IREE
+} // namespace VM
+} // namespace IREE
 
 static PassRegistration<ValueLivenessTestPass> pass;
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/ConversionDialectInterface.h b/compiler/src/iree/compiler/Dialect/VM/Conversion/ConversionDialectInterface.h
index 2923f33..3f3a495 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/ConversionDialectInterface.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/ConversionDialectInterface.h

@@ -22,7 +22,7 @@
 // to the VM dialect.
 class VMConversionDialectInterface
     : public DialectInterface::Base<VMConversionDialectInterface> {
- public:
+public:
   VMConversionDialectInterface(Dialect *dialect) : Base(dialect) {}
 
   // Returns a module containing one or more vm.modules with vm.import ops.
@@ -40,10 +40,11 @@
   //
   // |importSymbols| contains all vm.imports that have been queried from all
   // used dialects, not just this dialect.
-  virtual void populateVMConversionPatterns(
-      SymbolTable &importSymbols, RewritePatternSet &patterns,
-      ConversionTarget &conversionTarget,
-      TypeConverter &typeConverter) const = 0;
+  virtual void
+  populateVMConversionPatterns(SymbolTable &importSymbols,
+                               RewritePatternSet &patterns,
+                               ConversionTarget &conversionTarget,
+                               TypeConverter &typeConverter) const = 0;
 
   // Walks all child attributes defined within a custom dialect attribute;
   // returns false on unknown attributes.
@@ -53,16 +54,16 @@
     return success();
   }
 
- protected:
+protected:
   // Parses the vm.import module to be cached by the caller.
   virtual OwningOpRef<mlir::ModuleOp> parseVMImportModule() const = 0;
 
- private:
+private:
   mutable std::once_flag importParseFlag;
   mutable OwningOpRef<mlir::ModuleOp> importModuleRef;
 };
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_CONVERSION_CONVERSIONDIALECTINTERFACE_H_
+#endif // IREE_COMPILER_DIALECT_VM_CONVERSION_CONVERSIONDIALECTINTERFACE_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/ConversionTarget.cpp b/compiler/src/iree/compiler/Dialect/VM/Conversion/ConversionTarget.cpp
index 1828e9d..67915c8 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/ConversionTarget.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/ConversionTarget.cpp

@@ -45,5 +45,5 @@
       +[](mlir::ModuleOp op) { return isTopLevelModule(op); });
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/ConversionTarget.h b/compiler/src/iree/compiler/Dialect/VM/Conversion/ConversionTarget.h
index a892664..275ede6 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/ConversionTarget.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/ConversionTarget.h

@@ -19,7 +19,7 @@
 // nested module conversion.
 // Conversions targeting the VM dialect should always use this.
 class VMConversionTarget : public ConversionTarget {
- public:
+public:
   // Ensures that a module has double-nesting to allow for module conversion.
   // If the module is already nested then this is a no-op.
   // Returns a pair of (outer module, inner module).
@@ -28,8 +28,8 @@
   //  module { func.func @foo() { ... } }
   // ->
   //  module attributes {vm.toplevel} { module { func.func @foo() { ... } } }
-  static std::pair<mlir::ModuleOp, mlir::ModuleOp> nestModuleForConversion(
-      mlir::ModuleOp outerModuleOp);
+  static std::pair<mlir::ModuleOp, mlir::ModuleOp>
+  nestModuleForConversion(mlir::ModuleOp outerModuleOp);
 
   // Returns whether this is the outer module as setup via
   // nestModuleForConversion. Use for patterns which need to distinguish.
@@ -38,7 +38,7 @@
   VMConversionTarget(MLIRContext *context);
 };
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_CONVERSION_CONVERSIONTARGET_H_
+#endif // IREE_COMPILER_DIALECT_VM_CONVERSION_CONVERSIONTARGET_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/ImportUtils.cpp b/compiler/src/iree/compiler/Dialect/VM/Conversion/ImportUtils.cpp
index 798f6fc..81a659c 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/ImportUtils.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/ImportUtils.cpp

@@ -62,7 +62,8 @@
 Value castToImportType(Value value, Type targetType,
                        ConversionPatternRewriter &rewriter) {
   auto sourceType = value.getType();
-  if (sourceType == targetType) return value;
+  if (sourceType == targetType)
+    return value;
   bool sourceIsInteger = llvm::isa<IntegerType>(sourceType);
 
   // Allow bitcast between same width float/int types. This is used for
@@ -118,9 +119,9 @@
   }
 }
 
-std::optional<SmallVector<Value>> rewriteAttrToOperands(
-    Location loc, Attribute attrValue, Type inputType,
-    ConversionPatternRewriter &rewriter) {
+std::optional<SmallVector<Value>>
+rewriteAttrToOperands(Location loc, Attribute attrValue, Type inputType,
+                      ConversionPatternRewriter &rewriter) {
   if (auto intAttr = llvm::dyn_cast<IntegerAttr>(attrValue)) {
     // NOTE: we intentionally go to std.constant ops so that the standard
     // conversions can do their job. If we want to remove the dependency
@@ -147,7 +148,8 @@
     for (auto elementAttr : arrayAttr) {
       auto flattenedValues =
           rewriteAttrToOperands(loc, elementAttr, inputType, rewriter);
-      if (!flattenedValues) return std::nullopt;
+      if (!flattenedValues)
+        return std::nullopt;
       allValues.append(flattenedValues->begin(), flattenedValues->end());
     }
     return allValues;
@@ -171,7 +173,8 @@
       int ordinal = 0;
       LogicalResult walkStatus = conversionInterface->walkAttributeStorage(
           attrValue, [&](Attribute elementAttr) {
-            if (anyFailed) return;
+            if (anyFailed)
+              return;
             auto elementType = tupleTypes[ordinal++];
             auto flattenedValues =
                 rewriteAttrToOperands(loc, elementAttr, elementType, rewriter);
@@ -181,12 +184,14 @@
             }
             allValues.append(flattenedValues->begin(), flattenedValues->end());
           });
-      if (failed(walkStatus)) return std::nullopt;
+      if (failed(walkStatus))
+        return std::nullopt;
     } else {
       // Custom dialect type maps into zero or more input types (ala arrays).
       LogicalResult walkStatus = conversionInterface->walkAttributeStorage(
           attrValue, [&](Attribute elementAttr) {
-            if (anyFailed) return;
+            if (anyFailed)
+              return;
             auto flattenedValues =
                 rewriteAttrToOperands(loc, elementAttr, inputType, rewriter);
             if (!flattenedValues) {
@@ -195,9 +200,11 @@
             }
             allValues.append(flattenedValues->begin(), flattenedValues->end());
           });
-      if (failed(walkStatus)) return std::nullopt;
+      if (failed(walkStatus))
+        return std::nullopt;
     }
-    if (anyFailed) return std::nullopt;
+    if (anyFailed)
+      return std::nullopt;
     return allValues;
   }
 
@@ -205,7 +212,7 @@
   return std::nullopt;
 }
 
-}  // namespace detail
+} // namespace detail
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/ImportUtils.h b/compiler/src/iree/compiler/Dialect/VM/Conversion/ImportUtils.h
index 9f7dde0..48ff4d7 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/ImportUtils.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/ImportUtils.h

@@ -32,10 +32,10 @@
 
 namespace detail {
 size_t getSegmentSpanSize(Type spanType);
-std::optional<SmallVector<Value>> rewriteAttrToOperands(
-    Location loc, Attribute attrValue, Type inputType,
-    ConversionPatternRewriter &rewriter);
-}  // namespace detail
+std::optional<SmallVector<Value>>
+rewriteAttrToOperands(Location loc, Attribute attrValue, Type inputType,
+                      ConversionPatternRewriter &rewriter);
+} // namespace detail
 
 // Casts |value| to |targetType| ala static_cast for when the declared type
 // differs from the type provided by the input dialect.
@@ -56,9 +56,10 @@
 // Automatically handles type conversion and special logic for variadic operands
 // and special types (such as ranked shape).
 template <typename T, typename Adaptor = typename T::Adaptor>
-std::optional<SmallVector<Value>> rewriteToCall(
-    T op, Adaptor adaptor, IREE::VM::ImportOp importOp,
-    TypeConverter &typeConverter, ConversionPatternRewriter &rewriter) {
+std::optional<SmallVector<Value>>
+rewriteToCall(T op, Adaptor adaptor, IREE::VM::ImportOp importOp,
+              TypeConverter &typeConverter,
+              ConversionPatternRewriter &rewriter) {
   auto *operation = op.getOperation();
   bool isOpVariadic = importOp.isVariadic();
   OperationState state{
@@ -78,7 +79,8 @@
     if (auto attrValue = op->getAttr(inputName)) {
       auto flattenedAttrs = detail::rewriteAttrToOperands(
           op.getLoc(), attrValue, inputType, rewriter);
-      if (!flattenedAttrs) return std::nullopt;
+      if (!flattenedAttrs)
+        return std::nullopt;
       state.addOperands(*flattenedAttrs);
       if (importOp.isFuncArgumentVariadic(input.index())) {
         segmentSizes.push_back(flattenedAttrs->size() /
@@ -137,7 +139,8 @@
   for (auto [result, targetType] :
        llvm::zip_equal(callOp->getResults(), operation->getResultTypes())) {
     targetType = typeConverter.convertType(targetType);
-    if (!targetType) return std::nullopt;
+    if (!targetType)
+      return std::nullopt;
     results.push_back(castFromImportType(result, targetType, rewriter));
   }
   return results;
@@ -146,7 +149,7 @@
 // Utility for op to vm.call conversion.
 template <typename T, typename Adaptor = typename T::Adaptor>
 class VMImportOpConversion : public OpConversionPattern<T> {
- public:
+public:
   VMImportOpConversion(MLIRContext *context, SymbolTable &importSymbols,
                        TypeConverter &typeConverter, StringRef importName)
       : OpConversionPattern<T>(typeConverter, context) {
@@ -154,21 +157,22 @@
     assert(importOp);
   }
 
-  LogicalResult matchAndRewrite(
-      T op, typename T::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(T op, typename T::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto results = rewriteToCall(op, adaptor, importOp,
                                  *this->getTypeConverter(), rewriter);
-    if (!results.has_value()) return failure();
+    if (!results.has_value())
+      return failure();
     rewriter.replaceOp(op, results.value());
     return success();
   }
 
- protected:
+protected:
   mutable IREE::VM::ImportOp importOp;
 };
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_CONVERSION_IMPORTUTILS_H_
+#endif // IREE_COMPILER_DIALECT_VM_CONVERSION_IMPORTUTILS_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/MathToVM/ConvertMathToVM.cpp b/compiler/src/iree/compiler/Dialect/VM/Conversion/MathToVM/ConvertMathToVM.cpp
index 831e483..746ec41 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/MathToVM/ConvertMathToVM.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/MathToVM/ConvertMathToVM.cpp

@@ -28,23 +28,24 @@
 class UnaryArithmeticOpConversion : public OpConversionPattern<SrcOpTy> {
   using OpConversionPattern<SrcOpTy>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      SrcOpTy srcOp, typename SrcOpTy::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(SrcOpTy srcOp, typename SrcOpTy::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // TODO(benvanik): support vectors.
-    if (llvm::isa<VectorType>(srcOp.getResult().getType())) return failure();
+    if (llvm::isa<VectorType>(srcOp.getResult().getType()))
+      return failure();
 
     switch (adaptor.getOperand().getType().getIntOrFloatBitWidth()) {
-      case 32:
-        rewriter.replaceOpWithNewOp<Dst32OpTy>(
-            srcOp, adaptor.getOperand().getType(), adaptor.getOperand());
-        break;
-      case 64:
-        rewriter.replaceOpWithNewOp<Dst64OpTy>(
-            srcOp, adaptor.getOperand().getType(), adaptor.getOperand());
-        break;
-      default:
-        assert(false && "invalid target type");
+    case 32:
+      rewriter.replaceOpWithNewOp<Dst32OpTy>(
+          srcOp, adaptor.getOperand().getType(), adaptor.getOperand());
+      break;
+    case 64:
+      rewriter.replaceOpWithNewOp<Dst64OpTy>(
+          srcOp, adaptor.getOperand().getType(), adaptor.getOperand());
+      break;
+    default:
+      assert(false && "invalid target type");
     }
     return success();
   }
@@ -54,31 +55,32 @@
 class BinaryArithmeticOpConversion : public OpConversionPattern<SrcOpTy> {
   using OpConversionPattern<SrcOpTy>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      SrcOpTy srcOp, typename SrcOpTy::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(SrcOpTy srcOp, typename SrcOpTy::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // TODO(benvanik): support vectors.
-    if (llvm::isa<VectorType>(srcOp.getResult().getType())) return failure();
+    if (llvm::isa<VectorType>(srcOp.getResult().getType()))
+      return failure();
 
     switch (adaptor.getLhs().getType().getIntOrFloatBitWidth()) {
-      case 32:
-        rewriter.replaceOpWithNewOp<Dst32OpTy>(
-            srcOp, adaptor.getLhs().getType(), adaptor.getLhs(),
-            adaptor.getRhs());
-        break;
-      case 64:
-        rewriter.replaceOpWithNewOp<Dst64OpTy>(
-            srcOp, adaptor.getLhs().getType(), adaptor.getLhs(),
-            adaptor.getRhs());
-        break;
-      default:
-        assert(false && "invalid target type");
+    case 32:
+      rewriter.replaceOpWithNewOp<Dst32OpTy>(srcOp, adaptor.getLhs().getType(),
+                                             adaptor.getLhs(),
+                                             adaptor.getRhs());
+      break;
+    case 64:
+      rewriter.replaceOpWithNewOp<Dst64OpTy>(srcOp, adaptor.getLhs().getType(),
+                                             adaptor.getLhs(),
+                                             adaptor.getRhs());
+      break;
+    default:
+      assert(false && "invalid target type");
     }
     return success();
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateMathToVMPatterns(MLIRContext *context,
                               TypeConverter &typeConverter,
@@ -131,5 +133,5 @@
       typeConverter, context);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/MathToVM/ConvertMathToVM.h b/compiler/src/iree/compiler/Dialect/VM/Conversion/MathToVM/ConvertMathToVM.h
index 6bc6f9e..11c7329 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/MathToVM/ConvertMathToVM.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/MathToVM/ConvertMathToVM.h

@@ -18,7 +18,7 @@
                               TypeConverter &typeConverter,
                               RewritePatternSet &patterns);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_CONVERSION_MATHTOVM_CONVERTMATHTOVM_H_
+#endif // IREE_COMPILER_DIALECT_VM_CONVERSION_MATHTOVM_CONVERTMATHTOVM_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/StandardToVM/ConvertStandardToVM.cpp b/compiler/src/iree/compiler/Dialect/VM/Conversion/StandardToVM/ConvertStandardToVM.cpp
index c7b203e..29614f2 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/StandardToVM/ConvertStandardToVM.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/StandardToVM/ConvertStandardToVM.cpp

@@ -31,9 +31,9 @@
 class ModuleOpConversion : public OpConversionPattern<ModuleOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      ModuleOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(ModuleOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Do not attempt to convert the top level module.
     // This mechanism can only support rewriting non top-level modules.
     if (VMConversionTarget::isTopLevelModule(srcOp)) {
@@ -63,10 +63,10 @@
 };
 
 // Converts a function signature with the given |signatureConversion| util.
-static FailureOr<FunctionType> convertFuncSignature(
-    func::FuncOp srcOp, TypeConverter &typeConverter,
-    TypeConverter::SignatureConversion &signatureConversion,
-    ConversionPatternRewriter &rewriter) {
+static FailureOr<FunctionType>
+convertFuncSignature(func::FuncOp srcOp, TypeConverter &typeConverter,
+                     TypeConverter::SignatureConversion &signatureConversion,
+                     ConversionPatternRewriter &rewriter) {
   FunctionType srcFuncType = srcOp.getFunctionType();
   for (unsigned i = 0, e = srcFuncType.getNumInputs(); i < e; ++i) {
     if (failed(typeConverter.convertSignatureArg(i, srcFuncType.getInput(i),
@@ -108,18 +108,20 @@
 
 class FuncOpConversion : public OpConversionPattern<func::FuncOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      func::FuncOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(func::FuncOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Handled by import-specific conversion.
-    if (srcOp.isExternal()) return failure();
+    if (srcOp.isExternal())
+      return failure();
 
     // Convert function signature.
     TypeConverter::SignatureConversion signatureConversion(
         srcOp.getNumArguments());
     auto newFuncType = convertFuncSignature(srcOp, *getTypeConverter(),
                                             signatureConversion, rewriter);
-    if (failed(newFuncType)) return failure();
+    if (failed(newFuncType))
+      return failure();
 
     // Create new function with converted argument and result types.
     // Note that attributes are dropped. Consider preserving some if needed.
@@ -181,11 +183,12 @@
 
 class ExternalFuncOpConversion : public OpConversionPattern<func::FuncOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      func::FuncOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(func::FuncOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Handled by internal-specific conversion.
-    if (!srcOp.isExternal()) return failure();
+    if (!srcOp.isExternal())
+      return failure();
 
     // If the user declared an intended signature then we can use that instead
     // of running conversion ourselves. This can be used in cases where the
@@ -205,7 +208,8 @@
           srcOp.getNumArguments());
       auto convertedSignature = convertFuncSignature(
           srcOp, *getTypeConverter(), signatureConversion, rewriter);
-      if (failed(convertedSignature)) return failure();
+      if (failed(convertedSignature))
+        return failure();
       newSignature = *convertedSignature;
     }
 
@@ -235,9 +239,9 @@
 
 class CallOpConversion : public OpConversionPattern<func::CallOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      func::CallOp callOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(func::CallOp callOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Regardless of what the call (or import) does we need to match the
     // result types defined by the type converter. We will insert casts as
     // needed to these types before replacing the op.
@@ -267,10 +271,10 @@
 
   // Converts a call to some function which may be internal or an import.
   // Returns the new converted call results.
-  FailureOr<SmallVector<Value>> convertCallOp(
-      Operation *rootOp, Location loc, StringRef calleeName,
-      ValueRange operands, TypeRange resultTypes,
-      ConversionPatternRewriter &rewriter) const {
+  FailureOr<SmallVector<Value>>
+  convertCallOp(Operation *rootOp, Location loc, StringRef calleeName,
+                ValueRange operands, TypeRange resultTypes,
+                ConversionPatternRewriter &rewriter) const {
     // (Slow) lookup of the target function, which may be an import that we need
     // to perform type conversion for.
     auto calleeOp = SymbolTable::lookupSymbolIn(rootOp, calleeName);
@@ -311,10 +315,11 @@
 
   // Converts a call to an import that may be optional.
   // Returns the new converted call results.
-  FailureOr<SmallVector<Value>> convertImportCallOp(
-      Operation *rootOp, Location loc, StringRef calleeName,
-      ValueRange operands, TypeRange resultTypes, FunctionType importSignature,
-      Operation *calleeOp, ConversionPatternRewriter &rewriter) const {
+  FailureOr<SmallVector<Value>>
+  convertImportCallOp(Operation *rootOp, Location loc, StringRef calleeName,
+                      ValueRange operands, TypeRange resultTypes,
+                      FunctionType importSignature, Operation *calleeOp,
+                      ConversionPatternRewriter &rewriter) const {
     auto fallbackAttr = calleeOp->getAttrOfType<SymbolRefAttr>("vm.fallback");
     return fallbackAttr
                ? convertOptionalImportCallOp(
@@ -367,7 +372,8 @@
     rewriter.setInsertionPointToStart(fallbackBlock);
     auto fallbackResults = convertCallOp(rootOp, loc, fallbackName, operands,
                                          resultTypes, rewriter);
-    if (failed(fallbackResults)) return failure();
+    if (failed(fallbackResults))
+      return failure();
     rewriter.create<IREE::VM::BranchOp>(loc, exitBlock, *fallbackResults);
 
     return exitResults;
@@ -405,9 +411,9 @@
 
 class ReturnOpConversion : public OpConversionPattern<mlir::func::ReturnOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      mlir::func::ReturnOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::func::ReturnOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<IREE::VM::ReturnOp>(srcOp,
                                                     adaptor.getOperands());
     return success();
@@ -418,9 +424,9 @@
   TypeConverter &typeConverter;
   ConstantOpConversion(MLIRContext *context, TypeConverter &typeConverter)
       : OpConversionPattern(context), typeConverter(typeConverter) {}
-  LogicalResult matchAndRewrite(
-      arith::ConstantOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(arith::ConstantOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto targetType = typeConverter.convertType(srcOp.getType());
     if (!targetType) {
       return srcOp.emitError() << "could not convert type: " << srcOp.getType()
@@ -432,27 +438,27 @@
         return srcOp.emitRemark() << "unsupported const type for dialect";
       }
       switch (targetType.getIntOrFloatBitWidth()) {
-        case 1:
-        case 32:
-          if (integerAttr.getInt()) {
-            rewriter.replaceOpWithNewOp<IREE::VM::ConstI32Op>(
-                srcOp,
-                integerAttr.getType().isInteger(1) ? 1 : integerAttr.getInt());
-          } else {
-            rewriter.replaceOpWithNewOp<IREE::VM::ConstI32ZeroOp>(srcOp);
-          }
-          break;
-        case 64:
-          if (integerAttr.getInt()) {
-            rewriter.replaceOpWithNewOp<IREE::VM::ConstI64Op>(
-                srcOp, integerAttr.getInt());
-          } else {
-            rewriter.replaceOpWithNewOp<IREE::VM::ConstI64ZeroOp>(srcOp);
-          }
-          break;
-        default:
-          return srcOp.emitRemark()
-                 << "unsupported const integer bit width for dialect";
+      case 1:
+      case 32:
+        if (integerAttr.getInt()) {
+          rewriter.replaceOpWithNewOp<IREE::VM::ConstI32Op>(
+              srcOp,
+              integerAttr.getType().isInteger(1) ? 1 : integerAttr.getInt());
+        } else {
+          rewriter.replaceOpWithNewOp<IREE::VM::ConstI32ZeroOp>(srcOp);
+        }
+        break;
+      case 64:
+        if (integerAttr.getInt()) {
+          rewriter.replaceOpWithNewOp<IREE::VM::ConstI64Op>(
+              srcOp, integerAttr.getInt());
+        } else {
+          rewriter.replaceOpWithNewOp<IREE::VM::ConstI64ZeroOp>(srcOp);
+        }
+        break;
+      default:
+        return srcOp.emitRemark()
+               << "unsupported const integer bit width for dialect";
       }
     } else if (llvm::isa<FloatType>(targetType)) {
       auto floatAttr = llvm::dyn_cast<FloatAttr>(srcOp.getValue());
@@ -460,23 +466,23 @@
         return srcOp.emitRemark() << "unsupported const type for dialect";
       }
       switch (targetType.getIntOrFloatBitWidth()) {
-        case 32:
-          if (floatAttr.getValue().isZero()) {
-            rewriter.replaceOpWithNewOp<IREE::VM::ConstF32ZeroOp>(srcOp);
-          } else {
-            rewriter.replaceOpWithNewOp<IREE::VM::ConstF32Op>(srcOp, floatAttr);
-          }
-          break;
-        case 64:
-          if (floatAttr.getValue().isZero()) {
-            rewriter.replaceOpWithNewOp<IREE::VM::ConstF64ZeroOp>(srcOp);
-          } else {
-            rewriter.replaceOpWithNewOp<IREE::VM::ConstF64Op>(srcOp, floatAttr);
-          }
-          break;
-        default:
-          return srcOp.emitRemark()
-                 << "unsupported const floating-point bit width for dialect";
+      case 32:
+        if (floatAttr.getValue().isZero()) {
+          rewriter.replaceOpWithNewOp<IREE::VM::ConstF32ZeroOp>(srcOp);
+        } else {
+          rewriter.replaceOpWithNewOp<IREE::VM::ConstF32Op>(srcOp, floatAttr);
+        }
+        break;
+      case 64:
+        if (floatAttr.getValue().isZero()) {
+          rewriter.replaceOpWithNewOp<IREE::VM::ConstF64ZeroOp>(srcOp);
+        } else {
+          rewriter.replaceOpWithNewOp<IREE::VM::ConstF64Op>(srcOp, floatAttr);
+        }
+        break;
+      default:
+        return srcOp.emitRemark()
+               << "unsupported const floating-point bit width for dialect";
       }
     } else {
       return rewriter.notifyMatchFailure(srcOp, "unsupported type");
@@ -487,196 +493,199 @@
 
 struct CmpI32OpConversion : public OpConversionPattern<arith::CmpIOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      arith::CmpIOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
-    if (!adaptor.getLhs().getType().isInteger(32)) return failure();
+  LogicalResult
+  matchAndRewrite(arith::CmpIOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    if (!adaptor.getLhs().getType().isInteger(32))
+      return failure();
     auto returnType = rewriter.getIntegerType(32);
     switch (srcOp.getPredicate()) {
-      case arith::CmpIPredicate::eq:
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpEQI32Op>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        return success();
-      case arith::CmpIPredicate::ne:
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpNEI32Op>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        return success();
-      case arith::CmpIPredicate::slt:
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpLTI32SOp>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        return success();
-      case arith::CmpIPredicate::sle:
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpLTEI32SOp>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        return success();
-      case arith::CmpIPredicate::sgt:
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpGTI32SOp>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        return success();
-      case arith::CmpIPredicate::sge:
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpGTEI32SOp>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        return success();
-      case arith::CmpIPredicate::ult:
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpLTI32UOp>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        return success();
-      case arith::CmpIPredicate::ule:
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpLTEI32UOp>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        return success();
-      case arith::CmpIPredicate::ugt:
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpGTI32UOp>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        return success();
-      case arith::CmpIPredicate::uge:
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpGTEI32UOp>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        return success();
-      default:
-        return failure();
+    case arith::CmpIPredicate::eq:
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpEQI32Op>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      return success();
+    case arith::CmpIPredicate::ne:
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpNEI32Op>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      return success();
+    case arith::CmpIPredicate::slt:
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpLTI32SOp>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      return success();
+    case arith::CmpIPredicate::sle:
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpLTEI32SOp>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      return success();
+    case arith::CmpIPredicate::sgt:
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpGTI32SOp>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      return success();
+    case arith::CmpIPredicate::sge:
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpGTEI32SOp>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      return success();
+    case arith::CmpIPredicate::ult:
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpLTI32UOp>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      return success();
+    case arith::CmpIPredicate::ule:
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpLTEI32UOp>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      return success();
+    case arith::CmpIPredicate::ugt:
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpGTI32UOp>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      return success();
+    case arith::CmpIPredicate::uge:
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpGTEI32UOp>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      return success();
+    default:
+      return failure();
     }
   }
 };
 
 struct CmpI64OpConversion : public OpConversionPattern<arith::CmpIOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      arith::CmpIOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
-    if (!adaptor.getLhs().getType().isInteger(64)) return failure();
+  LogicalResult
+  matchAndRewrite(arith::CmpIOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    if (!adaptor.getLhs().getType().isInteger(64))
+      return failure();
     auto returnType = rewriter.getIntegerType(32);
     switch (srcOp.getPredicate()) {
-      case arith::CmpIPredicate::eq:
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpEQI64Op>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        return success();
-      case arith::CmpIPredicate::ne:
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpNEI64Op>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        return success();
-      case arith::CmpIPredicate::slt:
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpLTI64SOp>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        return success();
-      case arith::CmpIPredicate::sle:
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpLTEI64SOp>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        return success();
-      case arith::CmpIPredicate::sgt:
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpGTI64SOp>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        return success();
-      case arith::CmpIPredicate::sge:
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpGTEI64SOp>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        return success();
-      case arith::CmpIPredicate::ult:
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpLTI64UOp>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        return success();
-      case arith::CmpIPredicate::ule:
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpLTEI64UOp>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        return success();
-      case arith::CmpIPredicate::ugt:
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpGTI64UOp>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        return success();
-      case arith::CmpIPredicate::uge:
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpGTEI64UOp>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        return success();
-      default:
-        return failure();
+    case arith::CmpIPredicate::eq:
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpEQI64Op>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      return success();
+    case arith::CmpIPredicate::ne:
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpNEI64Op>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      return success();
+    case arith::CmpIPredicate::slt:
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpLTI64SOp>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      return success();
+    case arith::CmpIPredicate::sle:
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpLTEI64SOp>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      return success();
+    case arith::CmpIPredicate::sgt:
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpGTI64SOp>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      return success();
+    case arith::CmpIPredicate::sge:
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpGTEI64SOp>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      return success();
+    case arith::CmpIPredicate::ult:
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpLTI64UOp>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      return success();
+    case arith::CmpIPredicate::ule:
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpLTEI64UOp>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      return success();
+    case arith::CmpIPredicate::ugt:
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpGTI64UOp>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      return success();
+    case arith::CmpIPredicate::uge:
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpGTEI64UOp>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      return success();
+    default:
+      return failure();
     }
   }
 };
 
 struct CmpF32OpConversion : public OpConversionPattern<arith::CmpFOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      arith::CmpFOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
-    if (!adaptor.getLhs().getType().isF32()) return failure();
+  LogicalResult
+  matchAndRewrite(arith::CmpFOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    if (!adaptor.getLhs().getType().isF32())
+      return failure();
     auto returnType = rewriter.getIntegerType(32);
     switch (srcOp.getPredicate()) {
-      case arith::CmpFPredicate::AlwaysFalse:  // 0
-        rewriter.replaceOpWithNewOp<IREE::VM::ConstI32ZeroOp>(srcOp);
-        break;
-      case arith::CmpFPredicate::AlwaysTrue:  // 1
-        rewriter.replaceOpWithNewOp<IREE::VM::ConstI32Op>(srcOp, 1);
-        break;
-      case arith::CmpFPredicate::UNO:  // isnan(lhs) || isnan(rhs)
-        rewriter.replaceOpWithNewOp<IREE::VM::OrI32Op>(
-            srcOp, returnType,
-            rewriter.createOrFold<IREE::VM::CmpNaNF32Op>(
-                srcOp.getLoc(), returnType, adaptor.getLhs()),
-            rewriter.createOrFold<IREE::VM::CmpNaNF32Op>(
-                srcOp.getLoc(), returnType, adaptor.getRhs()));
-        break;
-      case arith::CmpFPredicate::ORD:  // !(isnan(lhs) || isnan(rhs))
-        rewriter.replaceOpWithNewOp<IREE::VM::XorI32Op>(
-            srcOp, returnType,
-            rewriter.createOrFold<IREE::VM::ConstI32Op>(srcOp.getLoc(), 1),
-            rewriter.createOrFold<IREE::VM::AndI32Op>(
-                srcOp.getLoc(), returnType,
-                rewriter.createOrFold<IREE::VM::CmpNaNF32Op>(
-                    srcOp.getLoc(), returnType, adaptor.getLhs()),
-                rewriter.createOrFold<IREE::VM::CmpNaNF32Op>(
-                    srcOp.getLoc(), returnType, adaptor.getRhs())));
-        break;
-      case arith::CmpFPredicate::OEQ:  // ordered and equal
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpEQF32OOp>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        break;
-      case arith::CmpFPredicate::OGT:  // ordered and greater than
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpGTF32OOp>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        break;
-      case arith::CmpFPredicate::OGE:  // ordered and greater than or equal
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpGTEF32OOp>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        break;
-      case arith::CmpFPredicate::OLT:  // ordered and less than
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpLTF32OOp>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        break;
-      case arith::CmpFPredicate::OLE:  // ordered and less than or equal
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpLTEF32OOp>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        break;
-      case arith::CmpFPredicate::ONE:  // ordered and not equal
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpNEF32OOp>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        break;
-      case arith::CmpFPredicate::UEQ:  // unordered or equal
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpEQF32UOp>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        break;
-      case arith::CmpFPredicate::UGT:  // unordered or greater than
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpGTF32UOp>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        break;
-      case arith::CmpFPredicate::UGE:  // unordered or greater than or equal
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpGTEF32UOp>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        break;
-      case arith::CmpFPredicate::ULT:  // unordered or less than
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpLTF32UOp>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        break;
-      case arith::CmpFPredicate::ULE:  // unordered or less than or equal
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpLTEF32UOp>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        break;
-      case arith::CmpFPredicate::UNE:  // unordered or not equal
-        rewriter.replaceOpWithNewOp<IREE::VM::CmpNEF32UOp>(
-            srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
-        break;
-      default:
-        return rewriter.notifyMatchFailure(srcOp,
-                                           "unhandled arith::CmpFPredicate");
+    case arith::CmpFPredicate::AlwaysFalse: // 0
+      rewriter.replaceOpWithNewOp<IREE::VM::ConstI32ZeroOp>(srcOp);
+      break;
+    case arith::CmpFPredicate::AlwaysTrue: // 1
+      rewriter.replaceOpWithNewOp<IREE::VM::ConstI32Op>(srcOp, 1);
+      break;
+    case arith::CmpFPredicate::UNO: // isnan(lhs) || isnan(rhs)
+      rewriter.replaceOpWithNewOp<IREE::VM::OrI32Op>(
+          srcOp, returnType,
+          rewriter.createOrFold<IREE::VM::CmpNaNF32Op>(
+              srcOp.getLoc(), returnType, adaptor.getLhs()),
+          rewriter.createOrFold<IREE::VM::CmpNaNF32Op>(
+              srcOp.getLoc(), returnType, adaptor.getRhs()));
+      break;
+    case arith::CmpFPredicate::ORD: // !(isnan(lhs) || isnan(rhs))
+      rewriter.replaceOpWithNewOp<IREE::VM::XorI32Op>(
+          srcOp, returnType,
+          rewriter.createOrFold<IREE::VM::ConstI32Op>(srcOp.getLoc(), 1),
+          rewriter.createOrFold<IREE::VM::AndI32Op>(
+              srcOp.getLoc(), returnType,
+              rewriter.createOrFold<IREE::VM::CmpNaNF32Op>(
+                  srcOp.getLoc(), returnType, adaptor.getLhs()),
+              rewriter.createOrFold<IREE::VM::CmpNaNF32Op>(
+                  srcOp.getLoc(), returnType, adaptor.getRhs())));
+      break;
+    case arith::CmpFPredicate::OEQ: // ordered and equal
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpEQF32OOp>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      break;
+    case arith::CmpFPredicate::OGT: // ordered and greater than
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpGTF32OOp>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      break;
+    case arith::CmpFPredicate::OGE: // ordered and greater than or equal
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpGTEF32OOp>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      break;
+    case arith::CmpFPredicate::OLT: // ordered and less than
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpLTF32OOp>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      break;
+    case arith::CmpFPredicate::OLE: // ordered and less than or equal
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpLTEF32OOp>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      break;
+    case arith::CmpFPredicate::ONE: // ordered and not equal
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpNEF32OOp>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      break;
+    case arith::CmpFPredicate::UEQ: // unordered or equal
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpEQF32UOp>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      break;
+    case arith::CmpFPredicate::UGT: // unordered or greater than
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpGTF32UOp>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      break;
+    case arith::CmpFPredicate::UGE: // unordered or greater than or equal
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpGTEF32UOp>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      break;
+    case arith::CmpFPredicate::ULT: // unordered or less than
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpLTF32UOp>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      break;
+    case arith::CmpFPredicate::ULE: // unordered or less than or equal
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpLTEF32UOp>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      break;
+    case arith::CmpFPredicate::UNE: // unordered or not equal
+      rewriter.replaceOpWithNewOp<IREE::VM::CmpNEF32UOp>(
+          srcOp, returnType, adaptor.getLhs(), adaptor.getRhs());
+      break;
+    default:
+      return rewriter.notifyMatchFailure(srcOp,
+                                         "unhandled arith::CmpFPredicate");
     }
     return success();
   }
@@ -685,20 +694,20 @@
 template <typename SrcOpTy, typename Dst32OpTy, typename Dst64OpTy>
 class UnaryArithmeticOpConversion : public OpConversionPattern<SrcOpTy> {
   using OpConversionPattern<SrcOpTy>::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      SrcOpTy srcOp, typename SrcOpTy::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(SrcOpTy srcOp, typename SrcOpTy::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     switch (adaptor.getOperand().getType().getIntOrFloatBitWidth()) {
-      case 32:
-        rewriter.replaceOpWithNewOp<Dst32OpTy>(
-            srcOp, adaptor.getOperand().getType(), adaptor.getOperand());
-        break;
-      case 64:
-        rewriter.replaceOpWithNewOp<Dst64OpTy>(
-            srcOp, adaptor.getOperand().getType(), adaptor.getOperand());
-        break;
-      default:
-        return rewriter.notifyMatchFailure(srcOp, "unsupported type");
+    case 32:
+      rewriter.replaceOpWithNewOp<Dst32OpTy>(
+          srcOp, adaptor.getOperand().getType(), adaptor.getOperand());
+      break;
+    case 64:
+      rewriter.replaceOpWithNewOp<Dst64OpTy>(
+          srcOp, adaptor.getOperand().getType(), adaptor.getOperand());
+      break;
+    default:
+      return rewriter.notifyMatchFailure(srcOp, "unsupported type");
     }
     return success();
   }
@@ -707,22 +716,22 @@
 template <typename SrcOpTy, typename Dst32OpTy, typename Dst64OpTy>
 class BinaryArithmeticOpConversion : public OpConversionPattern<SrcOpTy> {
   using OpConversionPattern<SrcOpTy>::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      SrcOpTy srcOp, typename SrcOpTy::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(SrcOpTy srcOp, typename SrcOpTy::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     switch (adaptor.getLhs().getType().getIntOrFloatBitWidth()) {
-      case 32:
-        rewriter.replaceOpWithNewOp<Dst32OpTy>(
-            srcOp, adaptor.getLhs().getType(), adaptor.getLhs(),
-            adaptor.getRhs());
-        break;
-      case 64:
-        rewriter.replaceOpWithNewOp<Dst64OpTy>(
-            srcOp, adaptor.getLhs().getType(), adaptor.getLhs(),
-            adaptor.getRhs());
-        break;
-      default:
-        return rewriter.notifyMatchFailure(srcOp, "unsupported type");
+    case 32:
+      rewriter.replaceOpWithNewOp<Dst32OpTy>(srcOp, adaptor.getLhs().getType(),
+                                             adaptor.getLhs(),
+                                             adaptor.getRhs());
+      break;
+    case 64:
+      rewriter.replaceOpWithNewOp<Dst64OpTy>(srcOp, adaptor.getLhs().getType(),
+                                             adaptor.getLhs(),
+                                             adaptor.getRhs());
+      break;
+    default:
+      return rewriter.notifyMatchFailure(srcOp, "unsupported type");
     }
     return success();
   }
@@ -731,9 +740,9 @@
 template <typename SrcOpTy, typename Dst32OpTy, typename Dst64OpTy>
 class ShiftArithmeticOpConversion : public OpConversionPattern<SrcOpTy> {
   using OpConversionPattern<SrcOpTy>::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      SrcOpTy srcOp, typename SrcOpTy::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(SrcOpTy srcOp, typename SrcOpTy::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Value amount = adaptor.getRhs();
     if (amount.getType().getIntOrFloatBitWidth() > 32) {
       // Shift amounts are always 32-bit in the VM.
@@ -741,16 +750,16 @@
           srcOp.getLoc(), rewriter.getI32Type(), amount);
     }
     switch (adaptor.getLhs().getType().getIntOrFloatBitWidth()) {
-      case 32:
-        rewriter.replaceOpWithNewOp<Dst32OpTy>(srcOp, rewriter.getI32Type(),
-                                               adaptor.getLhs(), amount);
-        break;
-      case 64:
-        rewriter.replaceOpWithNewOp<Dst64OpTy>(srcOp, rewriter.getI64Type(),
-                                               adaptor.getLhs(), amount);
-        break;
-      default:
-        return rewriter.notifyMatchFailure(srcOp, "unsupported type");
+    case 32:
+      rewriter.replaceOpWithNewOp<Dst32OpTy>(srcOp, rewriter.getI32Type(),
+                                             adaptor.getLhs(), amount);
+      break;
+    case 64:
+      rewriter.replaceOpWithNewOp<Dst64OpTy>(srcOp, rewriter.getI64Type(),
+                                             adaptor.getLhs(), amount);
+      break;
+    default:
+      return rewriter.notifyMatchFailure(srcOp, "unsupported type");
     }
     return success();
   }
@@ -759,9 +768,9 @@
 template <typename StdOp>
 class CastingOpConversion : public OpConversionPattern<StdOp> {
   using OpConversionPattern<StdOp>::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      StdOp srcOp, typename StdOp::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(StdOp srcOp, typename StdOp::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOp(srcOp, adaptor.getOperands());
     return success();
   }
@@ -770,9 +779,9 @@
 template <typename OpTy, typename ExtOpTy>
 class IndexCastOpConversion : public OpConversionPattern<OpTy> {
   using OpConversionPattern<OpTy>::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      OpTy srcOp, typename OpTy::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(OpTy srcOp, typename OpTy::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // TODO(jpienaar): Audit and fix if needed.
     auto srcType = adaptor.getIn().getType();
     auto dstType =
@@ -792,9 +801,9 @@
 
 class ZeroExtendIOpConversion : public OpConversionPattern<arith::ExtUIOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      arith::ExtUIOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(arith::ExtUIOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto srcType = srcOp.getIn().getType();
     auto dstType = getTypeConverter()->convertType(srcOp.getResult().getType());
     if (srcType.isInteger(1) && dstType.isInteger(32)) {
@@ -826,9 +835,9 @@
 
 class SignExtendIOpConversion : public OpConversionPattern<arith::ExtSIOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      arith::ExtSIOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(arith::ExtSIOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto srcType = srcOp.getIn().getType();
     auto dstType = getTypeConverter()->convertType(srcOp.getResult().getType());
     if (srcType.isInteger(8) && dstType.isInteger(32)) {
@@ -852,9 +861,9 @@
 
 class TruncateIOpConversion : public OpConversionPattern<arith::TruncIOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      arith::TruncIOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(arith::TruncIOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto srcType = srcOp.getIn().getType();
     auto resultType = srcOp.getResult().getType();
     auto dstType = getTypeConverter()->convertType(resultType);
@@ -895,9 +904,9 @@
 template <typename OpTy, typename ExtOpTy, typename CastOpTy>
 class IntToFPOpConversion : public OpConversionPattern<OpTy> {
   using OpConversionPattern<OpTy>::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      OpTy srcOp, typename OpTy::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(OpTy srcOp, typename OpTy::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto srcType = srcOp.getIn().getType();
     auto dstType = srcOp.getResult().getType();
     if (!dstType.isF32() ||
@@ -922,9 +931,9 @@
 
 class FPToSIOpConversion : public OpConversionPattern<arith::FPToSIOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      arith::FPToSIOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(arith::FPToSIOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto srcType = srcOp.getIn().getType();
     auto dstType = srcOp.getResult().getType();
     auto resultType = getTypeConverter()->convertType(dstType);
@@ -943,9 +952,9 @@
 
 class FPToUIOpConversion : public OpConversionPattern<arith::FPToUIOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      arith::FPToUIOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(arith::FPToUIOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto srcType = srcOp.getIn().getType();
     auto dstType = srcOp.getResult().getType();
     auto resultType = getTypeConverter()->convertType(dstType);
@@ -962,9 +971,9 @@
 
 class BitcastOpConversion : public OpConversionPattern<arith::BitcastOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      arith::BitcastOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(arith::BitcastOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto srcType = srcOp.getIn().getType();
     auto dstType = srcOp.getResult().getType();
     auto resultType =
@@ -990,9 +999,9 @@
 
 class SelectOpConversion : public OpConversionPattern<arith::SelectOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      arith::SelectOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(arith::SelectOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto valueType = adaptor.getTrueValue().getType();
     if (valueType.isInteger(32)) {
       rewriter.replaceOpWithNewOp<IREE::VM::SelectI32Op>(
@@ -1028,9 +1037,9 @@
 
 class AssertOpConversion : public OpConversionPattern<cf::AssertOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      cf::AssertOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(cf::AssertOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto status = rewriter.create<IREE::VM::ConstI32Op>(
         srcOp.getLoc(),
         rewriter.getIntegerAttr(
@@ -1048,9 +1057,9 @@
 
 class BranchOpConversion : public OpConversionPattern<cf::BranchOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      cf::BranchOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(cf::BranchOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<IREE::VM::BranchOp>(srcOp, srcOp.getDest(),
                                                     adaptor.getOperands());
     return success();
@@ -1059,9 +1068,9 @@
 
 class CondBranchOpConversion : public OpConversionPattern<cf::CondBranchOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      cf::CondBranchOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(cf::CondBranchOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Block *trueDest = srcOp.getTrueDest();
     rewriter.replaceOpWithNewOp<IREE::VM::CondBranchOp>(
         srcOp, adaptor.getCondition(), trueDest, adaptor.getTrueDestOperands(),
@@ -1070,7 +1079,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateStandardToVMPatterns(MLIRContext *context,
                                   TypeConverter &typeConverter,
@@ -1170,5 +1179,5 @@
                                                                 context);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/StandardToVM/ConvertStandardToVM.h b/compiler/src/iree/compiler/Dialect/VM/Conversion/StandardToVM/ConvertStandardToVM.h
index f4542a8..eec2892 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/StandardToVM/ConvertStandardToVM.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/StandardToVM/ConvertStandardToVM.h

@@ -18,7 +18,7 @@
                                   TypeConverter &typeConverter,
                                   RewritePatternSet &patterns);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_CONVERSION_STANDARDTOVM_CONVERTSTANDARDTOVM_H_
+#endif // IREE_COMPILER_DIALECT_VM_CONVERSION_STANDARDTOVM_CONVERTSTANDARDTOVM_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/StandardToVM/ConvertStandardToVMTest.cpp b/compiler/src/iree/compiler/Dialect/VM/Conversion/StandardToVM/ConvertStandardToVMTest.cpp
index e027c97..508c9e5 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/StandardToVM/ConvertStandardToVMTest.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/StandardToVM/ConvertStandardToVMTest.cpp

@@ -23,7 +23,7 @@
 class ConvertStandardToVMTestPass
     : public PassWrapper<ConvertStandardToVMTestPass,
                          OperationPass<mlir::ModuleOp>> {
- public:
+public:
   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ConvertStandardToVMTestPass)
 
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -65,7 +65,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 namespace IREE {
 namespace VM {
@@ -73,10 +73,10 @@
 createConvertStandardToVMTestPass() {
   return std::make_unique<ConvertStandardToVMTestPass>();
 }
-}  // namespace VM
-}  // namespace IREE
+} // namespace VM
+} // namespace IREE
 
 static PassRegistration<ConvertStandardToVMTestPass> pass;
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/TargetOptions.cpp b/compiler/src/iree/compiler/Dialect/VM/Conversion/TargetOptions.cpp
index 02add2d..b26bc53 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/TargetOptions.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/TargetOptions.cpp

@@ -39,7 +39,7 @@
       llvm::cl::cat(vmTargetOptionsCategory));
 }
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/TargetOptions.h b/compiler/src/iree/compiler/Dialect/VM/Conversion/TargetOptions.h
index f6f9f6c..29624fa 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/TargetOptions.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/TargetOptions.h

@@ -36,9 +36,9 @@
   using FromFlags = OptionsFromFlags<TargetOptions>;
 };
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_CONVERSION_TARGETOPTIONS_H_
+#endif // IREE_COMPILER_DIALECT_VM_CONVERSION_TARGETOPTIONS_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/TypeConverter.cpp b/compiler/src/iree/compiler/Dialect/VM/Conversion/TypeConverter.cpp
index b7466aa..b7a129e 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/TypeConverter.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/TypeConverter.cpp

@@ -121,7 +121,7 @@
          Location loc) -> Value { return inputs.front(); });
 }
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/TypeConverter.h b/compiler/src/iree/compiler/Dialect/VM/Conversion/TypeConverter.h
index 56af611..4291706 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/TypeConverter.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/TypeConverter.h

@@ -16,18 +16,18 @@
 namespace VM {
 
 class TypeConverter : public mlir::TypeConverter {
- public:
+public:
   explicit TypeConverter(TargetOptions targetOptions);
 
-  const TargetOptions& targetOptions() const { return targetOptions_; }
+  const TargetOptions &targetOptions() const { return targetOptions_; }
 
- private:
+private:
   TargetOptions targetOptions_;
 };
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_CONVERSION_TYPECONVERTER_H_
+#endif // IREE_COMPILER_DIALECT_VM_CONVERSION_TYPECONVERTER_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertAlignmentOps.cpp b/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertAlignmentOps.cpp
index dbcb491..b8bcf13 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertAlignmentOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertAlignmentOps.cpp

@@ -50,9 +50,9 @@
 
 struct AlignOpConversion : public OpConversionPattern<IREE::Util::AlignOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Util::AlignOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::AlignOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Type valueType = adaptor.getValue().getType();
     if (valueType.isInteger(32)) {
       insertAlignOps<IREE::VM::ConstI32Op, IREE::VM::SubI32Op,
@@ -83,9 +83,9 @@
 struct FixateIndexSizeofConversion
     : public OpConversionPattern<IREE::Util::SizeOfOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Util::SizeOfOp sizeofOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::SizeOfOp sizeofOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Type sizedType = sizeofOp.getSizedType();
     if (llvm::isa<IndexType>(sizedType)) {
       Type converted = getTypeConverter()->convertType(sizedType);
@@ -100,7 +100,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateUtilAlignmentToVMPatterns(MLIRContext *context,
                                        ConversionTarget &conversionTarget,
@@ -113,5 +113,5 @@
                                                                   context);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertAssignmentOps.cpp b/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertAssignmentOps.cpp
index 01fc0c2..e897752 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertAssignmentOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertAssignmentOps.cpp

@@ -25,9 +25,9 @@
 
 struct SwitchOpConversion : public OpConversionPattern<IREE::Util::SwitchOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Util::SwitchOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::SwitchOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto index = rewriter.createOrFold<IREE::VM::TruncI64I32Op>(
         op.getLoc(), rewriter.getI32Type(), adaptor.getIndex());
     auto type = adaptor.getDefaultValue().getType();
@@ -52,7 +52,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateUtilAssignmentToVMPatterns(MLIRContext *context,
                                         ConversionTarget &conversionTarget,
@@ -63,5 +63,5 @@
   patterns.insert<SwitchOpConversion>(typeConverter, context);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertBufferOps.cpp b/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertBufferOps.cpp
index 62ae2e6..8655dc5 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertBufferOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertBufferOps.cpp

@@ -22,13 +22,15 @@
 namespace {
 
 static Value castToI64(Value value, OpBuilder &builder) {
-  if (value.getType().isInteger(64)) return value;
+  if (value.getType().isInteger(64))
+    return value;
   return builder.createOrFold<IREE::VM::ExtI32I64UOp>(
       value.getLoc(), builder.getI64Type(), value);
 }
 
 static Value castToIndex(Value value, OpBuilder &builder) {
-  if (value.getType().isIndex()) return value;
+  if (value.getType().isIndex())
+    return value;
   return builder.createOrFold<arith::IndexCastOp>(
       value.getLoc(), builder.getIndexType(), value);
 }
@@ -36,9 +38,9 @@
 struct BufferConstantOpConversion
     : public OpConversionPattern<IREE::Util::BufferConstantOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Util::BufferConstantOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::BufferConstantOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto alignmentAttr = op.getAlignmentAttr();
     if (alignmentAttr) {
       alignmentAttr = rewriter.getI64IntegerAttr(alignmentAttr.getInt());
@@ -64,9 +66,9 @@
 struct BufferAllocOpConversion
     : public OpConversionPattern<IREE::Util::BufferAllocOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Util::BufferAllocOp allocOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::BufferAllocOp allocOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto resultType =
         getTypeConverter()->convertType(allocOp.getResult().getType());
     rewriter.replaceOpWithNewOp<IREE::VM::BufferAllocOp>(
@@ -79,9 +81,9 @@
 struct BufferDeallocOpConversion
     : public OpConversionPattern<IREE::Util::BufferDeallocOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Util::BufferDeallocOp deallocOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::BufferDeallocOp deallocOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // No-op today. We could make this force a dealloc of the underlying storage
     // or have a vm.hint.reset or something to force a drop of the reference.
     rewriter.eraseOp(deallocOp);
@@ -95,9 +97,9 @@
 struct BufferSliceOpConversion
     : public OpConversionPattern<IREE::Util::BufferSliceOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Util::BufferSliceOp sliceOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::BufferSliceOp sliceOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto resultType =
         getTypeConverter()->convertType(sliceOp.getResult().getType());
     auto sliceLength = castToI64(adaptor.getResultSize(), rewriter);
@@ -117,9 +119,9 @@
 struct BufferSizeOpConversion
     : public OpConversionPattern<IREE::Util::BufferSizeOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Util::BufferSizeOp sizeOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::BufferSizeOp sizeOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Value size = rewriter.create<IREE::VM::BufferLengthOp>(
         sizeOp.getLoc(), rewriter.getI64Type(), adaptor.getOperand());
     rewriter.replaceOp(sizeOp, castToIndex(size, rewriter));
@@ -130,9 +132,9 @@
 struct BufferCopyOpConversion
     : public OpConversionPattern<IREE::Util::BufferCopyOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Util::BufferCopyOp copyOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::BufferCopyOp copyOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<IREE::VM::BufferCopyOp>(
         copyOp, adaptor.getSource(),
         castToI64(adaptor.getSourceOffset(), rewriter), adaptor.getTarget(),
@@ -145,9 +147,9 @@
 struct BufferCompareOpConversion
     : public OpConversionPattern<IREE::Util::BufferCompareOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Util::BufferCompareOp compareOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::BufferCompareOp compareOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto resultType =
         getTypeConverter()->convertType(compareOp.getResult().getType());
     rewriter.replaceOpWithNewOp<IREE::VM::BufferCompareOp>(
@@ -161,7 +163,8 @@
 
 static Value unscaleOffset(Location loc, Value offset, int64_t scale,
                            OpBuilder &builder) {
-  if (scale == 1) return offset;
+  if (scale == 1)
+    return offset;
   return builder.createOrFold<IREE::VM::DivI64SOp>(
       loc, offset.getType(), offset,
       builder.create<IREE::VM::ConstI64Op>(loc, scale));
@@ -170,9 +173,9 @@
 struct BufferFillOpConversion
     : public OpConversionPattern<IREE::Util::BufferFillOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Util::BufferFillOp fillOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::BufferFillOp fillOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto oldType = fillOp.getPattern().getType();
     auto newType = adaptor.getPattern().getType();
     if (llvm::isa<IndexType>(oldType)) {
@@ -221,9 +224,9 @@
 struct BufferLoadOpConversion
     : public OpConversionPattern<IREE::Util::BufferLoadOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Util::BufferLoadOp loadOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::BufferLoadOp loadOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto oldType = loadOp.getResult().getType();
     auto newType = getTypeConverter()->convertType(oldType);
     if (llvm::isa<IndexType>(oldType)) {
@@ -277,9 +280,9 @@
 struct BufferStoreOpConversion
     : public OpConversionPattern<IREE::Util::BufferStoreOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Util::BufferStoreOp storeOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::BufferStoreOp storeOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto oldType = storeOp.getSource().getType();
     auto newType = adaptor.getSource().getType();
     if (llvm::isa<IndexType>(oldType)) {
@@ -315,7 +318,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateUtilBufferToVMPatterns(MLIRContext *context,
                                     ConversionTarget &conversionTarget,
@@ -352,5 +355,5 @@
   patterns.insert<BufferStoreOpConversion>(typeConverter, context);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertGlobalOps.cpp b/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertGlobalOps.cpp
index 640c36c..4d70b7f 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertGlobalOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertGlobalOps.cpp

@@ -18,9 +18,9 @@
     : public OpConversionPattern<IREE::Util::InitializerOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      IREE::Util::InitializerOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::InitializerOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto newOp = rewriter.create<IREE::VM::InitializerOp>(op.getLoc());
     rewriter.cloneRegionBefore(op.getBody(), newOp.getBody(),
                                newOp.getBody().begin());
@@ -42,22 +42,22 @@
     : public OpConversionPattern<IREE::Util::InitializerReturnOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      IREE::Util::InitializerReturnOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::InitializerReturnOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<IREE::VM::ReturnOp>(op);
     return success();
   }
 };
 
 class GlobalOpConversion : public OpConversionPattern<IREE::Util::GlobalOp> {
- public:
+public:
   GlobalOpConversion(MLIRContext *context, TypeConverter &typeConverter)
       : OpConversionPattern(context), typeConverter(typeConverter) {}
 
-  LogicalResult matchAndRewrite(
-      IREE::Util::GlobalOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::GlobalOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Operation *newOp = nullptr;
     auto convertedType = typeConverter.convertType(op.getType());
     if (llvm::isa<IREE::VM::RefType>(convertedType) ||
@@ -113,37 +113,37 @@
     return success();
   }
 
- private:
+private:
   TypeConverter &typeConverter;
 };
 
 class GlobalAddressOpConversion
     : public OpConversionPattern<IREE::Util::GlobalAddressOp> {
- public:
+public:
   GlobalAddressOpConversion(MLIRContext *context, TypeConverter &typeConverter)
       : OpConversionPattern(context), typeConverter(typeConverter) {}
 
-  LogicalResult matchAndRewrite(
-      IREE::Util::GlobalAddressOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::GlobalAddressOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<IREE::VM::GlobalAddressOp>(
         op, typeConverter.convertType(op.getType()), op.getGlobal());
     return success();
   }
 
- private:
+private:
   TypeConverter &typeConverter;
 };
 
 class GlobalLoadOpConversion
     : public OpConversionPattern<IREE::Util::GlobalLoadOp> {
- public:
+public:
   GlobalLoadOpConversion(MLIRContext *context, TypeConverter &typeConverter)
       : OpConversionPattern(context), typeConverter(typeConverter) {}
 
-  LogicalResult matchAndRewrite(
-      IREE::Util::GlobalLoadOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::GlobalLoadOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto operandType = op.getType();
     auto convertedType = typeConverter.convertType(operandType);
     if (IREE::VM::RefType::isCompatible(operandType)) {
@@ -167,20 +167,20 @@
     return success();
   }
 
- private:
+private:
   TypeConverter &typeConverter;
 };
 
 class GlobalLoadIndirectOpConversion
     : public OpConversionPattern<IREE::Util::GlobalLoadIndirectOp> {
- public:
+public:
   GlobalLoadIndirectOpConversion(MLIRContext *context,
                                  TypeConverter &typeConverter)
       : OpConversionPattern(context), typeConverter(typeConverter) {}
 
-  LogicalResult matchAndRewrite(
-      IREE::Util::GlobalLoadIndirectOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::GlobalLoadIndirectOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto operandType = op.getType();
     auto convertedType = typeConverter.convertType(operandType);
     if (IREE::VM::RefType::isCompatible(operandType)) {
@@ -204,19 +204,19 @@
     return success();
   }
 
- private:
+private:
   TypeConverter &typeConverter;
 };
 
 class GlobalStoreOpConversion
     : public OpConversionPattern<IREE::Util::GlobalStoreOp> {
- public:
+public:
   GlobalStoreOpConversion(MLIRContext *context, TypeConverter &typeConverter)
       : OpConversionPattern(context) {}
 
-  LogicalResult matchAndRewrite(
-      IREE::Util::GlobalStoreOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::GlobalStoreOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto operandType = adaptor.getValue().getType();
     if (llvm::isa<IREE::VM::RefType>(operandType)) {
       rewriter.replaceOpWithNewOp<IREE::VM::GlobalStoreRefOp>(
@@ -242,14 +242,14 @@
 
 class GlobalStoreIndirectOpConversion
     : public OpConversionPattern<IREE::Util::GlobalStoreIndirectOp> {
- public:
+public:
   GlobalStoreIndirectOpConversion(MLIRContext *context,
                                   TypeConverter &typeConverter)
       : OpConversionPattern(context) {}
 
-  LogicalResult matchAndRewrite(
-      IREE::Util::GlobalStoreIndirectOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::GlobalStoreIndirectOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto operandType = adaptor.getValue().getType();
     if (llvm::isa<IREE::VM::RefType>(operandType)) {
       rewriter.replaceOpWithNewOp<IREE::VM::GlobalStoreIndirectRefOp>(
@@ -273,7 +273,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateUtilGlobalToVMPatterns(MLIRContext *context,
                                     ConversionTarget &conversionTarget,
@@ -294,5 +294,5 @@
       context, typeConverter);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertListOps.cpp b/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertListOps.cpp
index f09b0e5..419e5dd 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertListOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertListOps.cpp

@@ -22,13 +22,15 @@
 namespace {
 
 static Value castToI32(Value value, OpBuilder &builder) {
-  if (value.getType().isInteger(32)) return value;
+  if (value.getType().isInteger(32))
+    return value;
   return builder.createOrFold<IREE::VM::TruncI64I32Op>(
       value.getLoc(), builder.getI32Type(), value);
 }
 
 static Value castToIndex(Value value, OpBuilder &builder) {
-  if (value.getType().isIndex()) return value;
+  if (value.getType().isIndex())
+    return value;
   return builder.createOrFold<arith::IndexCastOp>(
       value.getLoc(), builder.getIndexType(), value);
 }
@@ -36,9 +38,9 @@
 class ListCreateOpConversion
     : public OpConversionPattern<IREE::Util::ListCreateOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Util::ListCreateOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::ListCreateOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Value initialCapacity = adaptor.getInitialCapacity();
     if (initialCapacity) {
       initialCapacity = castToI32(initialCapacity, rewriter);
@@ -56,9 +58,9 @@
 class ListSizeOpConversion
     : public OpConversionPattern<IREE::Util::ListSizeOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Util::ListSizeOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::ListSizeOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Value size = rewriter.create<IREE::VM::ListSizeOp>(
         srcOp.getLoc(), rewriter.getI32Type(), adaptor.getList());
     rewriter.replaceOp(srcOp, castToIndex(size, rewriter));
@@ -69,9 +71,9 @@
 class ListResizeOpConversion
     : public OpConversionPattern<IREE::Util::ListResizeOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Util::ListResizeOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::ListResizeOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<IREE::VM::ListResizeOp>(
         srcOp, adaptor.getList(), castToI32(adaptor.getNewSize(), rewriter));
     return success();
@@ -80,9 +82,9 @@
 
 class ListGetOpConversion : public OpConversionPattern<IREE::Util::ListGetOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Util::ListGetOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::ListGetOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto index = castToI32(adaptor.getIndex(), rewriter);
     auto resultType = typeConverter->convertType(srcOp.getResult().getType());
     if (resultType.isInteger(32)) {
@@ -109,9 +111,9 @@
 
 class ListSetOpConversion : public OpConversionPattern<IREE::Util::ListSetOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Util::ListSetOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::ListSetOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto index = castToI32(adaptor.getIndex(), rewriter);
     auto valueType = adaptor.getValue().getType();
     if (valueType.isInteger(32)) {
@@ -136,7 +138,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateUtilListToVMPatterns(MLIRContext *context,
                                   ConversionTarget &conversionTarget,
@@ -151,7 +153,8 @@
         } else {
           elementType = typeConverter.convertType(type.getElementType());
         }
-        if (!elementType) return std::nullopt;
+        if (!elementType)
+          return std::nullopt;
         return IREE::VM::RefType::get(IREE::VM::ListType::get(elementType));
       });
 
@@ -165,5 +168,5 @@
           typeConverter, context);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertStatusOps.cpp b/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertStatusOps.cpp
index 4972250..6e0bd30 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertStatusOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertStatusOps.cpp

@@ -13,13 +13,13 @@
 
 class StatusCheckOkOpConversion
     : public OpConversionPattern<IREE::Util::StatusCheckOkOp> {
- public:
+public:
   StatusCheckOkOpConversion(MLIRContext *context, TypeConverter &typeConverter)
       : OpConversionPattern(context) {}
 
-  LogicalResult matchAndRewrite(
-      IREE::Util::StatusCheckOkOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::StatusCheckOkOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // If status value is non-zero, fail.
     rewriter.replaceOpWithNewOp<IREE::VM::CondFailOp>(
         op, adaptor.getStatus(), op.getMessage().value_or(""));
@@ -35,5 +35,5 @@
   patterns.insert<StatusCheckOkOpConversion>(context, typeConverter);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertUtilToVM.cpp b/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertUtilToVM.cpp
index 1373078..a287ab6 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertUtilToVM.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertUtilToVM.cpp

@@ -53,9 +53,9 @@
 
 struct NullOpConversion : public OpConversionPattern<IREE::Util::NullOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Util::NullOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::NullOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<IREE::VM::ConstRefZeroOp>(
         op, getTypeConverter()->convertType(op.getType()));
     return success();
@@ -68,16 +68,16 @@
 
 struct CmpEQOpConversion : public OpConversionPattern<IREE::Util::CmpEQOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Util::CmpEQOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::CmpEQOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto operandType = adaptor.getLhs().getType();
     if (llvm::isa<IREE::VM::RefType>(operandType)) {
       rewriter.replaceOpWithNewOp<IREE::VM::CmpEQRefOp>(
           op, rewriter.getI32Type(), adaptor.getLhs(), adaptor.getRhs());
       return success();
     }
-    return failure();  // not used for non-ref types currently
+    return failure(); // not used for non-ref types currently
   }
 };
 
@@ -88,9 +88,9 @@
 struct UnreachableOpConversion
     : public OpConversionPattern<IREE::Util::UnreachableOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Util::UnreachableOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::UnreachableOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<IREE::VM::FailOp>(
         srcOp,
         rewriter.createOrFold<IREE::VM::ConstI32Op>(
@@ -101,7 +101,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateUtilToVMPatterns(MLIRContext *context,
                               ConversionTarget &conversionTarget,
@@ -125,5 +125,5 @@
                                  patterns);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertUtilToVM.h b/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertUtilToVM.h
index a6cdd40..c277e98 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertUtilToVM.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertUtilToVM.h

@@ -19,7 +19,7 @@
                               TypeConverter &typeConverter,
                               RewritePatternSet &patterns);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_CONVERSION_UTILTOVM_CONVERTUTILTOVM_H_
+#endif // IREE_COMPILER_DIALECT_VM_CONVERSION_UTILTOVM_CONVERTUTILTOVM_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.cpp b/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.cpp
index 0de77c7..a501279 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.cpp

@@ -199,9 +199,9 @@
 }
 
 /// Remove block arguments
-LogicalResult removeBlockArguments(
-    IREE::VM::ModuleOp moduleOp,
-    SmallVector<BlockArgument> &blockArgsToRemove) {
+LogicalResult
+removeBlockArguments(IREE::VM::ModuleOp moduleOp,
+                     SmallVector<BlockArgument> &blockArgsToRemove) {
   for (auto &blockArg : blockArgsToRemove) {
     assert(blockArg.getType().isa<IREE::VM::RefType>());
     assert(blockArg.use_empty());
@@ -247,9 +247,10 @@
          "_import_shim";
 }
 
-std::optional<std::string> buildVariadicFunctionName(
-    IREE::VM::ModuleOp &moduleOp, IREE::VM::ImportOp &importOp,
-    DenseIntElementsAttr segmentSizes) {
+std::optional<std::string>
+buildVariadicFunctionName(IREE::VM::ModuleOp &moduleOp,
+                          IREE::VM::ImportOp &importOp,
+                          DenseIntElementsAttr segmentSizes) {
   auto callingConvention = makeImportCallingConventionString(importOp);
   if (!callingConvention.has_value()) {
     return std::nullopt;
@@ -268,10 +269,11 @@
   return result;
 }
 
-std::optional<Value> createVmTypeDefPtr(
-    ConversionPatternRewriter &rewriter, Location loc,
-    IREE::VM::EmitCTypeConverter &typeConverter, IREE::VM::ModuleOp moduleOp,
-    BlockArgument moduleArg, Type elementType) {
+std::optional<Value>
+createVmTypeDefPtr(ConversionPatternRewriter &rewriter, Location loc,
+                   IREE::VM::EmitCTypeConverter &typeConverter,
+                   IREE::VM::ModuleOp moduleOp, BlockArgument moduleArg,
+                   Type elementType) {
   auto ctx = rewriter.getContext();
 
   // Map from type to enum values of type iree_vm_value_type_t and
@@ -444,11 +446,11 @@
 /// continuation and failure block based on the truthiness of the result
 /// value, i.e. a truthy value branches to the continuation block when
 /// `negateCondition` is false.
-emitc::CallOp failableCall(
-    OpBuilder &builder, Location location, Type type, StringAttr callee,
-    ArrayAttr args, ArrayRef<Value> operands,
-    const std::function<void(emitc::CallOp &)> &failureBlockBuilder,
-    bool negateCondition = false) {
+emitc::CallOp
+failableCall(OpBuilder &builder, Location location, Type type,
+             StringAttr callee, ArrayAttr args, ArrayRef<Value> operands,
+             const std::function<void(emitc::CallOp &)> &failureBlockBuilder,
+             bool negateCondition = false) {
   auto callOp = builder.create<emitc::CallOp>(
       /*location=*/location,
       /*type=*/type,
@@ -1248,16 +1250,16 @@
   using Adaptor = typename SrcOpTy::Adaptor;
   using OpConversionPattern<SrcOpTy>::OpConversionPattern;
 
- public:
+public:
   GenericOpConversion(TypeConverter &typeConverter, MLIRContext *context,
                       StringRef funcName)
       : OpConversionPattern<SrcOpTy>(typeConverter, context),
         funcName(funcName) {}
 
- private:
-  LogicalResult matchAndRewrite(
-      SrcOpTy op, Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+private:
+  LogicalResult
+  matchAndRewrite(SrcOpTy op, Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto ctx = op.getContext();
 
     auto type = op.getOperation()->getResultTypes();
@@ -1296,9 +1298,9 @@
   using Adaptor = typename SrcOpTy::Adaptor;
   using OpConversionPattern<SrcOpTy>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      SrcOpTy op, Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(SrcOpTy op, Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.eraseOp(op);
     return success();
   }
@@ -1308,9 +1310,9 @@
   using OpConversionPattern<mlir::func::FuncOp>::OpConversionPattern;
   using Adaptor = mlir::func::FuncOp::Adaptor;
 
-  LogicalResult matchAndRewrite(
-      mlir::func::FuncOp funcOp, Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::func::FuncOp funcOp, Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     TypeConverter::SignatureConversion signatureConverter(
         funcOp.getFunctionType().getNumInputs());
     TypeConverter typeConverter;
@@ -1343,9 +1345,9 @@
     SmallVector<Value> callArguments;
   };
 
-  LogicalResult matchAndRewrite(
-      IREE::VM::ExportOp exportOp, Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::VM::ExportOp exportOp, Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto ctx = exportOp.getContext();
     auto loc = exportOp.getLoc();
 
@@ -1373,12 +1375,12 @@
         emitc::PointerType::get(emitc::OpaqueType::get(ctx, "void"));
 
     SmallVector<Type> inputTypes = {
-        stackType,        // SHIM_ARGUMENT_STACK
-        flagsType,        // SHIM_ARGUMENT_FLAGS
-        spanType,         // SHIM_ARGUMENT_ARGS_STORAGE
-        spanType,         // SHIM_ARGUMENT_RETS_STORAGE
-        moduleType,       // SHIM_ARGUMENT_MODULE
-        moduleStateType,  // SHIM_ARGUMENT_MODULE_STATE
+        stackType,       // SHIM_ARGUMENT_STACK
+        flagsType,       // SHIM_ARGUMENT_FLAGS
+        spanType,        // SHIM_ARGUMENT_ARGS_STORAGE
+        spanType,        // SHIM_ARGUMENT_RETS_STORAGE
+        moduleType,      // SHIM_ARGUMENT_MODULE
+        moduleStateType, // SHIM_ARGUMENT_MODULE_STATE
     };
 
     auto newFuncType = mlir::FunctionType::get(
@@ -1404,12 +1406,12 @@
                                           newFuncOp.getFunctionBody().end());
 
       // Insert arguments into block.
-      block->addArgument(stackType, loc);        // SHIM_ARGUMENT_STACK
-      block->addArgument(flagsType, loc);        // SHIM_ARGUMENT_FLAGS
-      block->addArgument(spanType, loc);         // SHIM_ARGUMENT_ARGS_STORAGE
-      block->addArgument(spanType, loc);         // SHIM_ARGUMENT_RETS_STORAGE
-      block->addArgument(moduleType, loc);       // SHIM_ARGUMENT_MODULE
-      block->addArgument(moduleStateType, loc);  // SHIM_ARGUMENT_MODULE_STATE
+      block->addArgument(stackType, loc);       // SHIM_ARGUMENT_STACK
+      block->addArgument(flagsType, loc);       // SHIM_ARGUMENT_FLAGS
+      block->addArgument(spanType, loc);        // SHIM_ARGUMENT_ARGS_STORAGE
+      block->addArgument(spanType, loc);        // SHIM_ARGUMENT_RETS_STORAGE
+      block->addArgument(moduleType, loc);      // SHIM_ARGUMENT_MODULE
+      block->addArgument(moduleStateType, loc); // SHIM_ARGUMENT_MODULE_STATE
 
       rewriter.setInsertionPointToStart(block);
 
@@ -1480,9 +1482,10 @@
     return success();
   }
 
-  FailureOr<std::pair<Value, Value>> castModuleAndStateStructs(
-      ConversionPatternRewriter &rewriter, IREE::VM::ExportOp &exportOp,
-      mlir::func::FuncOp &newFuncOp) const {
+  FailureOr<std::pair<Value, Value>>
+  castModuleAndStateStructs(ConversionPatternRewriter &rewriter,
+                            IREE::VM::ExportOp &exportOp,
+                            mlir::func::FuncOp &newFuncOp) const {
     auto ctx = exportOp.getContext();
     auto loc = exportOp.getLoc();
 
@@ -1555,9 +1558,9 @@
 
     // To prevent scoping issues we prefix the struct name with module and
     // function name.
-    auto typedefStruct = [&rewriter, &newFuncOp, &loc](
-                             std::string structName,
-                             ArrayRef<emitc_builders::StructField> fields) {
+    auto typedefStruct = [&rewriter, &newFuncOp,
+                          &loc](std::string structName,
+                                ArrayRef<emitc_builders::StructField> fields) {
       OpBuilder::InsertionGuard guard(rewriter);
       rewriter.setInsertionPoint(newFuncOp.getOperation());
 
@@ -1763,7 +1766,7 @@
 };
 
 class ImportOpConverter {
- public:
+public:
   ImportOpConverter(IREE::VM::EmitCTypeConverter &typeConverter,
                     SmallVector<std::string> &importShims)
       : typeConverter(typeConverter), importShims(importShims) {}
@@ -1796,7 +1799,7 @@
     return success();
   }
 
- private:
+private:
   LogicalResult createVariadicImportShims(IREE::VM::ImportOp &importOp,
                                           OpBuilder &builder) const {
     SetVector<const void *> arities;
@@ -2316,8 +2319,8 @@
     return result;
   }
 
-  SmallVector<IREE::VM::CallVariadicOp> getCallers(
-      IREE::VM::ImportOp &importOp) const {
+  SmallVector<IREE::VM::CallVariadicOp>
+  getCallers(IREE::VM::ImportOp &importOp) const {
     SmallVector<IREE::VM::CallVariadicOp> result;
 
     auto moduleOp =
@@ -2344,9 +2347,9 @@
   using Adaptor = typename CallOpTy::Adaptor;
   using OpConversionPattern<CallOpTy>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      CallOpTy op, Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(CallOpTy op, Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     mlir::func::FuncOp funcOp =
         lookupSymbolRef<mlir::func::FuncOp>(op.getOperation(), "callee");
     IREE::VM::ImportOp importOp =
@@ -2599,16 +2602,16 @@
   using Adaptor = typename CmpOpTy::Adaptor;
   using OpConversionPattern<CmpOpTy>::OpConversionPattern;
 
- public:
+public:
   CompareRefOpConversion(TypeConverter &typeConverter, MLIRContext *context,
                          StringRef funcName)
       : OpConversionPattern<CmpOpTy>(typeConverter, context),
         funcName(funcName) {}
 
- private:
-  LogicalResult matchAndRewrite(
-      CmpOpTy cmpOp, Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+private:
+  LogicalResult
+  matchAndRewrite(CmpOpTy cmpOp, Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto ctx = cmpOp.getContext();
     auto loc = cmpOp.getLoc();
 
@@ -2668,9 +2671,9 @@
   using Adaptor = IREE::VM::CmpNZRefOp::Adaptor;
   using OpConversionPattern<IREE::VM::CmpNZRefOp>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      IREE::VM::CmpNZRefOp cmpOp, Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::VM::CmpNZRefOp cmpOp, Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto ctx = cmpOp.getContext();
     auto loc = cmpOp.getLoc();
 
@@ -2715,9 +2718,9 @@
   using Adaptor = typename ConstOpTy::Adaptor;
   using OpConversionPattern<ConstOpTy>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      ConstOpTy constOp, Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(ConstOpTy constOp, Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<emitc::ConstantOp>(constOp, constOp.getType(),
                                                    constOp.getValue());
     return success();
@@ -2729,9 +2732,9 @@
   using Adaptor = typename ConstZeroOpTy::Adaptor;
   using OpConversionPattern<ConstZeroOpTy>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      ConstZeroOpTy constZeroOp, Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(ConstZeroOpTy constZeroOp, Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto type = constZeroOp.getType();
 
     Attribute value = rewriter.getZeroAttr(type);
@@ -2746,9 +2749,9 @@
   using Adaptor = IREE::VM::ConstRefZeroOp::Adaptor;
   using OpConversionPattern<IREE::VM::ConstRefZeroOp>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      IREE::VM::ConstRefZeroOp constRefZeroOp, Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::VM::ConstRefZeroOp constRefZeroOp, Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto loc = constRefZeroOp.getLoc();
 
     IREE::VM::EmitCTypeConverter *typeConverter =
@@ -2774,9 +2777,9 @@
   using Adaptor = IREE::VM::ConstRefRodataOp::Adaptor;
   using OpConversionPattern<IREE::VM::ConstRefRodataOp>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      IREE::VM::ConstRefRodataOp constRefRodataOp, Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::VM::ConstRefRodataOp constRefRodataOp, Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto ctx = constRefRodataOp.getContext();
     auto loc = constRefRodataOp.getLoc();
 
@@ -2847,9 +2850,9 @@
   using Adaptor = IREE::VM::BranchOp::Adaptor;
   using OpConversionPattern<IREE::VM::BranchOp>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      IREE::VM::BranchOp op, Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::VM::BranchOp op, Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto loc = op.getLoc();
 
     assert(op.getOperands().size() == adaptor.getOperands().size());
@@ -2956,9 +2959,9 @@
   using Adaptor = IREE::VM::CondBranchOp::Adaptor;
   using OpConversionPattern<IREE::VM::CondBranchOp>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      IREE::VM::CondBranchOp op, Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::VM::CondBranchOp op, Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto loc = op.getLoc();
 
     assert(op.getOperands().size() == adaptor.getOperands().size());
@@ -3035,9 +3038,9 @@
   using Adaptor = IREE::VM::ReturnOp::Adaptor;
   using OpConversionPattern<IREE::VM::ReturnOp>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      IREE::VM::ReturnOp op, Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::VM::ReturnOp op, Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto ctx = op.getContext();
     auto loc = op.getLoc();
 
@@ -3098,14 +3101,14 @@
   using Adaptor = IREE::VM::ImportResolvedOp::Adaptor;
   using OpConversionPattern<IREE::VM::ImportResolvedOp>::OpConversionPattern;
 
- public:
+public:
   ImportResolvedOpConversion(TypeConverter &typeConverter, MLIRContext *context)
       : OpConversionPattern(typeConverter, context) {}
 
- private:
-  LogicalResult matchAndRewrite(
-      IREE::VM::ImportResolvedOp op, Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+private:
+  LogicalResult
+  matchAndRewrite(IREE::VM::ImportResolvedOp op, Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto ctx = op.getContext();
     auto loc = op.getLoc();
 
@@ -3167,9 +3170,9 @@
   using Adaptor = IREE::VM::FailOp::Adaptor;
   using OpConversionPattern<IREE::VM::FailOp>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      IREE::VM::FailOp op, Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::VM::FailOp op, Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto ctx = op.getContext();
     auto loc = op.getLoc();
 
@@ -3264,16 +3267,16 @@
   using Adaptor = typename LoadOpTy::Adaptor;
   using OpConversionPattern<LoadOpTy>::OpConversionPattern;
 
- public:
+public:
   GlobalLoadOpConversion(TypeConverter &typeConverter, MLIRContext *context,
                          StringRef funcName)
       : OpConversionPattern<LoadOpTy>(typeConverter, context),
         funcName(funcName) {}
 
- private:
-  LogicalResult matchAndRewrite(
-      LoadOpTy loadOp, Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+private:
+  LogicalResult
+  matchAndRewrite(LoadOpTy loadOp, Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto ctx = loadOp.getContext();
     auto loc = loadOp.getLoc();
 
@@ -3319,9 +3322,9 @@
   using Adaptor = typename LoadStoreOpTy::Adaptor;
   using OpConversionPattern<LoadStoreOpTy>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      LoadStoreOpTy op, Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(LoadStoreOpTy op, Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (isa<IREE::VM::GlobalLoadRefOp>(op)) {
       return rewriteOp(op.getOperation(), adaptor, rewriter, true);
     } else if (isa<IREE::VM::GlobalStoreRefOp>(op)) {
@@ -3435,16 +3438,16 @@
   using Adaptor = typename StoreOpTy::Adaptor;
   using OpConversionPattern<StoreOpTy>::OpConversionPattern;
 
- public:
+public:
   GlobalStoreOpConversion(TypeConverter &typeConverter, MLIRContext *context,
                           StringRef funcName)
       : OpConversionPattern<StoreOpTy>(typeConverter, context),
         funcName(funcName) {}
 
- private:
-  LogicalResult matchAndRewrite(
-      StoreOpTy storeOp, Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+private:
+  LogicalResult
+  matchAndRewrite(StoreOpTy storeOp, Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto ctx = storeOp.getContext();
     auto loc = storeOp.getLoc();
 
@@ -3494,19 +3497,18 @@
   using Adaptor = typename SrcOpTy::Adaptor;
   using OpConversionPattern<SrcOpTy>::OpConversionPattern;
 
- public:
+public:
   ContainerOpConversion(TypeConverter &typeConverter, MLIRContext *context,
                         StringRef funcName, DenseSet<size_t> refArgumentIndices,
                         bool failable)
       : OpConversionPattern<SrcOpTy>(typeConverter, context),
-        funcName(funcName),
-        refArgumentIndices(refArgumentIndices),
+        funcName(funcName), refArgumentIndices(refArgumentIndices),
         failable(failable) {}
 
- private:
-  LogicalResult matchAndRewrite(
-      SrcOpTy op, Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+private:
+  LogicalResult
+  matchAndRewrite(SrcOpTy op, Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto ctx = op.getContext();
     auto loc = op.getLoc();
 
@@ -3655,9 +3657,9 @@
     std::string constructor;
   };
 
-  LogicalResult matchAndRewrite(
-      SrcOpTy op, Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(SrcOpTy op, Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto ctx = op.getContext();
     auto loc = op.getLoc();
 
@@ -3761,10 +3763,10 @@
     return std::nullopt;
   }
 
-  std::optional<SmallVector<Value>> getOperands(
-      IREE::VM::ListAllocOp op, Adaptor adaptor,
-      ConversionPatternRewriter &rewriter, Type elementType, Value containerPtr,
-      Value allocator) const {
+  std::optional<SmallVector<Value>>
+  getOperands(IREE::VM::ListAllocOp op, Adaptor adaptor,
+              ConversionPatternRewriter &rewriter, Type elementType,
+              Value containerPtr, Value allocator) const {
     SmallVector<Value> result;
 
     IREE::VM::EmitCTypeConverter *typeConverter =
@@ -3792,10 +3794,10 @@
     return result;
   }
 
-  std::optional<SmallVector<Value>> getOperands(
-      IREE::VM::BufferAllocOp op, Adaptor adaptor,
-      ConversionPatternRewriter &rewriter, Type elementType, Value containerPtr,
-      Value allocator) const {
+  std::optional<SmallVector<Value>>
+  getOperands(IREE::VM::BufferAllocOp op, Adaptor adaptor,
+              ConversionPatternRewriter &rewriter, Type elementType,
+              Value containerPtr, Value allocator) const {
     auto ctx = op.getContext();
     auto loc = op.getLoc();
 
@@ -3824,10 +3826,10 @@
     return result;
   }
 
-  std::optional<SmallVector<Value>> getOperands(
-      IREE::VM::BufferCloneOp op, Adaptor adaptor,
-      ConversionPatternRewriter &rewriter, Type elementType, Value containerPtr,
-      Value allocator) const {
+  std::optional<SmallVector<Value>>
+  getOperands(IREE::VM::BufferCloneOp op, Adaptor adaptor,
+              ConversionPatternRewriter &rewriter, Type elementType,
+              Value containerPtr, Value allocator) const {
     auto ctx = op.getContext();
     auto loc = op.getLoc();
 
@@ -3884,9 +3886,9 @@
   using Adaptor = typename GetOpTy::Adaptor;
   using OpConversionPattern<GetOpTy>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      GetOpTy getOp, Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(GetOpTy getOp, Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto ctx = getOp.getContext();
     auto loc = getOp.getLoc();
 
@@ -3963,9 +3965,9 @@
   using Adaptor = IREE::VM::ListGetRefOp::Adaptor;
   using OpConversionPattern<IREE::VM::ListGetRefOp>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      IREE::VM::ListGetRefOp getOp, Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::VM::ListGetRefOp getOp, Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto ctx = getOp.getContext();
     auto loc = getOp.getLoc();
 
@@ -4118,9 +4120,9 @@
   using Adaptor = typename SetOpTy::Adaptor;
   using OpConversionPattern<SetOpTy>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      SetOpTy setOp, Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(SetOpTy setOp, Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto ctx = setOp.getContext();
     auto loc = setOp.getLoc();
 
@@ -4183,9 +4185,9 @@
   using Adaptor = IREE::VM::ListSetRefOp::Adaptor;
   using OpConversionPattern<IREE::VM::ListSetRefOp>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      IREE::VM::ListSetRefOp setOp, Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::VM::ListSetRefOp setOp, Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto ctx = setOp.getContext();
     auto loc = setOp.getLoc();
 
@@ -4232,7 +4234,7 @@
     return success();
   }
 };
-}  // namespace
+} // namespace
 
 void populateVMToEmitCPatterns(ConversionTarget &conversionTarget,
                                IREE::VM::EmitCTypeConverter &typeConverter,
@@ -4704,7 +4706,7 @@
 class ConvertVMToEmitCPass
     : public PassWrapper<ConvertVMToEmitCPass,
                          OperationPass<IREE::VM::ModuleOp>> {
- public:
+public:
   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ConvertVMToEmitCPass)
 
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -4809,17 +4811,17 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<IREE::VM::ModuleOp>>
 createConvertVMToEmitCPass() {
   return std::make_unique<ConvertVMToEmitCPass>();
 }
 
-}  // namespace VM
-}  // namespace IREE
+} // namespace VM
+} // namespace IREE
 
 static PassRegistration<IREE::VM::ConvertVMToEmitCPass> pass;
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.h b/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.h
index 45b678f..b7e455c 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.h

@@ -24,10 +24,10 @@
 
 std::unique_ptr<OperationPass<IREE::VM::ModuleOp>> createConvertVMToEmitCPass();
 
-}  // namespace VM
-}  // namespace IREE
+} // namespace VM
+} // namespace IREE
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_CONVERSION_VMTOEMITC_CONVERTVMTOEMITC_H_
+#endif // IREE_COMPILER_DIALECT_VM_CONVERSION_VMTOEMITC_CONVERTVMTOEMITC_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/DropExcludedExports.cpp b/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/DropExcludedExports.cpp
index 8f270fa..afe04d1 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/DropExcludedExports.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/DropExcludedExports.cpp

@@ -16,7 +16,7 @@
 class DropExcludedExportsPass
     : public PassWrapper<DropExcludedExportsPass,
                          OperationPass<IREE::VM::ModuleOp>> {
- public:
+public:
   StringRef getArgument() const override {
     return "iree-vm-drop-excluded-exports";
   }
@@ -48,7 +48,7 @@
 
 static PassRegistration<DropExcludedExportsPass> pass;
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/DropExcludedExports.h b/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/DropExcludedExports.h
index 42648fe..b35b1cc 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/DropExcludedExports.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/DropExcludedExports.h

@@ -19,9 +19,9 @@
 std::unique_ptr<OperationPass<IREE::VM::ModuleOp>>
 createDropExcludedExportsPass();
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_CONVERSION_VMTOEMITC_DROPEXCLUDEDEXPORTS_H_
+#endif // IREE_COMPILER_DIALECT_VM_CONVERSION_VMTOEMITC_DROPEXCLUDEDEXPORTS_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/EmitCBuilders.cpp b/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/EmitCBuilders.cpp
index eb9642b..014b1d4 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/EmitCBuilders.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/EmitCBuilders.cpp

@@ -16,64 +16,64 @@
 namespace {
 std::string mapUnaryOperator(UnaryOperator op) {
   switch (op) {
-    case UnaryOperator::PLUS:
-      return "+";
-    case UnaryOperator::MINUS:
-      return "-";
-    case UnaryOperator::BITWISE_NOT:
-      return "~";
-    case UnaryOperator::LOGICAL_NOT:
-      return "!";
-    default:
-      llvm_unreachable("unsupported unary operator");
-      return "XXX";
+  case UnaryOperator::PLUS:
+    return "+";
+  case UnaryOperator::MINUS:
+    return "-";
+  case UnaryOperator::BITWISE_NOT:
+    return "~";
+  case UnaryOperator::LOGICAL_NOT:
+    return "!";
+  default:
+    llvm_unreachable("unsupported unary operator");
+    return "XXX";
   }
 }
 
 std::string mapBinaryOperator(BinaryOperator op) {
   switch (op) {
-    case BinaryOperator::ADDITION:
-      return "+";
-    case BinaryOperator::SUBTRACTION:
-      return "-";
-    case BinaryOperator::PRODUCT:
-      return "*";
-    case BinaryOperator::DIVISION:
-      return "/";
-    case BinaryOperator::REMAINDER:
-      return "%";
-    case BinaryOperator::BITWISE_AND:
-      return "&";
-    case BinaryOperator::BITWISE_OR:
-      return "|";
-    case BinaryOperator::BITWISE_XOR:
-      return "^";
-    case BinaryOperator::BITWISE_LEFT_SHIFT:
-      return "<<";
-    case BinaryOperator::BITWISE_RIGHT_SHIFT:
-      return ">>";
-    case BinaryOperator::LOGICAL_AND:
-      return "&&";
-    case BinaryOperator::LOGICAL_OR:
-      return "||";
-    case BinaryOperator::EQUAL_TO:
-      return "==";
-    case BinaryOperator::NOT_EQUAL_TO:
-      return "!=";
-    case BinaryOperator::LESS_THAN:
-      return "<";
-    case BinaryOperator::GREATER_THAN:
-      return ">";
-    case BinaryOperator::LESS_THAN_OR_EQUAL:
-      return "<=";
-    case BinaryOperator::GREATER_THAN_OR_EQUAL:
-      return ">=";
-    default:
-      llvm_unreachable("unsupported binary operator");
-      return "XXX";
+  case BinaryOperator::ADDITION:
+    return "+";
+  case BinaryOperator::SUBTRACTION:
+    return "-";
+  case BinaryOperator::PRODUCT:
+    return "*";
+  case BinaryOperator::DIVISION:
+    return "/";
+  case BinaryOperator::REMAINDER:
+    return "%";
+  case BinaryOperator::BITWISE_AND:
+    return "&";
+  case BinaryOperator::BITWISE_OR:
+    return "|";
+  case BinaryOperator::BITWISE_XOR:
+    return "^";
+  case BinaryOperator::BITWISE_LEFT_SHIFT:
+    return "<<";
+  case BinaryOperator::BITWISE_RIGHT_SHIFT:
+    return ">>";
+  case BinaryOperator::LOGICAL_AND:
+    return "&&";
+  case BinaryOperator::LOGICAL_OR:
+    return "||";
+  case BinaryOperator::EQUAL_TO:
+    return "==";
+  case BinaryOperator::NOT_EQUAL_TO:
+    return "!=";
+  case BinaryOperator::LESS_THAN:
+    return "<";
+  case BinaryOperator::GREATER_THAN:
+    return ">";
+  case BinaryOperator::LESS_THAN_OR_EQUAL:
+    return "<=";
+  case BinaryOperator::GREATER_THAN_OR_EQUAL:
+    return ">=";
+  default:
+    llvm_unreachable("unsupported binary operator");
+    return "XXX";
   }
 }
-}  // namespace
+} // namespace
 
 Value unaryOperator(OpBuilder builder, Location location, UnaryOperator op,
                     Value operand, Type resultType) {
@@ -414,6 +414,6 @@
       /*operands=*/ArrayRef<Value>{operand});
 }
 
-}  // namespace emitc_builders
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace emitc_builders
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/EmitCBuilders.h b/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/EmitCBuilders.h
index fb076bb..620ddf1 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/EmitCBuilders.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/EmitCBuilders.h

@@ -122,8 +122,8 @@
 
 void ireeVmRefRelease(OpBuilder builder, Location location, Value operand);
 
-}  // namespace emitc_builders
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace emitc_builders
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_CONVERSION_VMTOEMITC_EMITCBUILDERS_H_
+#endif // IREE_COMPILER_DIALECT_VM_CONVERSION_VMTOEMITC_EMITCBUILDERS_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/EmitCTypeConverter.cpp b/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/EmitCTypeConverter.cpp
index fba5c69..956e7a9 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/EmitCTypeConverter.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/EmitCTypeConverter.cpp

@@ -82,16 +82,16 @@
   if (auto iType = llvm::dyn_cast<IntegerType>(type)) {
     std::string typeLiteral;
     switch (iType.getWidth()) {
-      case 32: {
-        typeLiteral = "int32_t";
-        break;
-      }
-      case 64: {
-        typeLiteral = "int64_t";
-        break;
-      }
-      default:
-        return {};
+    case 32: {
+      typeLiteral = "int32_t";
+      break;
+    }
+    case 64: {
+      typeLiteral = "int64_t";
+      break;
+    }
+    default:
+      return {};
     }
     return emitc::OpaqueType::get(type.getContext(), typeLiteral);
   }
@@ -99,16 +99,16 @@
   if (auto fType = llvm::dyn_cast<FloatType>(type)) {
     std::string typeLiteral;
     switch (fType.getWidth()) {
-      case 32: {
-        typeLiteral = "float";
-        break;
-      }
-      case 64: {
-        typeLiteral = "double";
-        break;
-      }
-      default:
-        return {};
+    case 32: {
+      typeLiteral = "float";
+      break;
+    }
+    case 64: {
+      typeLiteral = "double";
+      break;
+    }
+    default:
+      return {};
     }
     return emitc::OpaqueType::get(type.getContext(), typeLiteral);
   }
@@ -166,7 +166,7 @@
   return applyOp.getResult();
 }
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/EmitCTypeConverter.h b/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/EmitCTypeConverter.h
index bfb88f9..948e61b 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/EmitCTypeConverter.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/EmitCTypeConverter.h

@@ -20,14 +20,14 @@
 namespace VM {
 
 class EmitCTypeConverter : public mlir::TypeConverter {
- public:
+public:
   EmitCTypeConverter();
-  FailureOr<std::reference_wrapper<VMAnalysis>> lookupAnalysis(
-      mlir::func::FuncOp &funcOp) {
+  FailureOr<std::reference_wrapper<VMAnalysis>>
+  lookupAnalysis(mlir::func::FuncOp &funcOp) {
     return lookupAnalysis(funcOp.getOperation());
   }
-  FailureOr<std::reference_wrapper<VMAnalysis>> lookupAnalysis(
-      IREE::VM::FuncOp &funcOp) {
+  FailureOr<std::reference_wrapper<VMAnalysis>>
+  lookupAnalysis(IREE::VM::FuncOp &funcOp) {
     return lookupAnalysis(funcOp.getOperation());
   }
   std::optional<Value> materializeRef(Value ref);
@@ -54,14 +54,14 @@
   VMAnalysisCache analysisCache;
   std::vector<TypeDef> typeTable;
 
- private:
+private:
   llvm::DenseMap<Type, int> typeOrdinalMap;
   FailureOr<std::reference_wrapper<VMAnalysis>> lookupAnalysis(Operation *op);
 };
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_CONVERSION_VMTOEMITC_EMITCTYPECONVERTER_H_
+#endif // IREE_COMPILER_DIALECT_VM_CONVERSION_VMTOEMITC_EMITCTYPECONVERTER_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/VMAnalysis.h b/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/VMAnalysis.h
index 14349d0..ba393a7 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/VMAnalysis.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/VMAnalysis.h

@@ -16,7 +16,7 @@
 namespace iree_compiler {
 
 struct VMAnalysis {
- public:
+public:
   VMAnalysis() = default;
   VMAnalysis(IREE::VM::FuncOp &funcOp) {
     Operation *op = funcOp.getOperation();
@@ -70,7 +70,7 @@
 
   DenseMap<int64_t, Operation *> &localRefs() { return refs; }
 
- private:
+private:
   RegisterAllocation registerAllocation;
   ValueLiveness valueLiveness;
   DenseMap<int64_t, Operation *> refs;
@@ -79,7 +79,7 @@
 
 using VMAnalysisCache = DenseMap<Operation *, VMAnalysis>;
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_CONVERSION_VMTOEMITC_VMANALYSIS_H_
+#endif // IREE_COMPILER_DIALECT_VM_CONVERSION_VMTOEMITC_VMANALYSIS_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/IR/VMDialect.cpp b/compiler/src/iree/compiler/Dialect/VM/IR/VMDialect.cpp
index 80cea64..ca53f8e 100644
--- a/compiler/src/iree/compiler/Dialect/VM/IR/VMDialect.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/IR/VMDialect.cpp

@@ -22,7 +22,7 @@
 namespace IREE {
 namespace VM {
 
-#include "iree/compiler/Dialect/VM/IR/VMOpInterfaces.cpp.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/VM/IR/VMOpInterfaces.cpp.inc" // IWYU pragma: keep
 
 // Fallback asm printer for ops that do not define their own. See op-specific
 // printers in the op implementations.
@@ -141,7 +141,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 VMDialect::VMDialect(MLIRContext *context)
     : Dialect(getDialectNamespace(), context, TypeID::get<VMDialect>()),
@@ -152,7 +152,7 @@
 
 #define GET_OP_LIST
   addOperations<
-#include "iree/compiler/Dialect/VM/IR/VMOps.cpp.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/VM/IR/VMOps.cpp.inc" // IWYU pragma: keep
       >();
 }
 
@@ -262,7 +262,8 @@
 Operation *VMDialect::materializeConstant(OpBuilder &builder, Attribute value,
                                           Type type, Location loc) {
   auto typedValue = dyn_cast<TypedAttr>(value);
-  if (!typedValue) return nullptr;
+  if (!typedValue)
+    return nullptr;
 
   if (ConstI32Op::isBuildableWith(typedValue, type)) {
     auto convertedValue = ConstI32Op::convertConstValue(typedValue);
@@ -298,7 +299,7 @@
   return nullptr;
 }
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/IR/VMDialect.h b/compiler/src/iree/compiler/Dialect/VM/IR/VMDialect.h
index a709444..738232e 100644
--- a/compiler/src/iree/compiler/Dialect/VM/IR/VMDialect.h
+++ b/compiler/src/iree/compiler/Dialect/VM/IR/VMDialect.h

@@ -24,10 +24,10 @@
 namespace IREE {
 namespace VM {
 
-#include "iree/compiler/Dialect/VM/IR/VMOpInterfaces.h.inc"  // IWYU pragma: export
+#include "iree/compiler/Dialect/VM/IR/VMOpInterfaces.h.inc" // IWYU pragma: export
 
 class VMDialect : public Dialect {
- public:
+public:
   explicit VMDialect(MLIRContext *context);
   ~VMDialect() override;
   static StringRef getDialectNamespace() { return "vm"; }
@@ -51,7 +51,7 @@
   void *getRegisteredInterfaceForOp(mlir::TypeID interface,
                                     mlir::OperationName opName) override;
 
- private:
+private:
   /// Register the attributes of this dialect.
   void registerAttributes();
   /// Register the types of this dialect.
@@ -61,9 +61,9 @@
   VMOpAsmInterface *fallbackOpAsmInterface;
 };
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_IR_VMDIALECT_H_
+#endif // IREE_COMPILER_DIALECT_VM_IR_VMDIALECT_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/IR/VMFuncEncoder.h b/compiler/src/iree/compiler/Dialect/VM/IR/VMFuncEncoder.h
index a5209d7..9c61414 100644
--- a/compiler/src/iree/compiler/Dialect/VM/IR/VMFuncEncoder.h
+++ b/compiler/src/iree/compiler/Dialect/VM/IR/VMFuncEncoder.h

@@ -17,7 +17,7 @@
 // This base manages source map construction and vm.func walking while
 // subclasses provide actual emission.
 class VMFuncEncoder {
- public:
+public:
   virtual ~VMFuncEncoder() = default;
 
   // Begins encoding the contents of a block.
@@ -74,7 +74,7 @@
   virtual LogicalResult encodeResults(Operation::result_range values) = 0;
 };
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_IR_VMFUNCENCODER_H_
+#endif // IREE_COMPILER_DIALECT_VM_IR_VMFUNCENCODER_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/IR/VMOpFolders.cpp b/compiler/src/iree/compiler/Dialect/VM/IR/VMOpFolders.cpp
index 1d7992c..1c92e1f 100644
--- a/compiler/src/iree/compiler/Dialect/VM/IR/VMOpFolders.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/IR/VMOpFolders.cpp

@@ -47,13 +47,14 @@
   } else if (llvm::isa<RankedTensorType, VectorType>(type)) {
     auto vtType = llvm::cast<ShapedType>(type);
     auto element = oneOfType(vtType.getElementType());
-    if (!element) return {};
+    if (!element)
+      return {};
     return DenseElementsAttr::get(vtType, element);
   }
   return {};
 }
 
-}  // namespace
+} // namespace
 
 //===----------------------------------------------------------------------===//
 // Structural ops
@@ -66,7 +67,8 @@
   using OpRewritePattern::OpRewritePattern;
   LogicalResult matchAndRewrite(InitializerOp op,
                                 PatternRewriter &rewriter) const override {
-    if (op.getBody().getBlocks().size() != 1) return failure();
+    if (op.getBody().getBlocks().size() != 1)
+      return failure();
     auto &block = op.getBody().front();
     if (block.empty() || isa<ReturnOp>(block.front())) {
       rewriter.eraseOp(op);
@@ -86,10 +88,12 @@
                                 PatternRewriter &rewriter) const override {
     SmallVector<Operation *> deadOps;
     op.walk([&](Operation *op) {
-      if (!isGlobalStoreOp(op)) return;
+      if (!isGlobalStoreOp(op))
+        return;
       auto value = op->getOperand(0);
       Attribute valueAttr;
-      if (!matchPattern(value, m_Constant(&valueAttr))) return;
+      if (!matchPattern(value, m_Constant(&valueAttr)))
+        return;
       auto globalRefAttr = op->getAttrOfType<SymbolRefAttr>("global");
       assert(globalRefAttr);
       auto globalOp =
@@ -99,8 +103,10 @@
           globalOp, [&]() { globalOp.setGlobalInitialValue(valueAttr); });
       deadOps.push_back(op);
     });
-    if (deadOps.empty()) return failure();
-    for (auto deadOp : deadOps) rewriter.eraseOp(deadOp);
+    if (deadOps.empty())
+      return failure();
+    for (auto deadOp : deadOps)
+      rewriter.eraseOp(deadOp);
     return success();
   }
 
@@ -114,7 +120,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void InitializerOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                 MLIRContext *context) {
@@ -134,12 +140,15 @@
   using OpRewritePattern<T>::OpRewritePattern;
   LogicalResult matchAndRewrite(T op,
                                 PatternRewriter &rewriter) const override {
-    if (!op.getInitialValue().has_value()) return failure();
+    if (!op.getInitialValue().has_value())
+      return failure();
     if (auto value = llvm::dyn_cast<IntegerAttr>(op.getInitialValueAttr())) {
-      if (value.getValue() != 0) return failure();
+      if (value.getValue() != 0)
+        return failure();
     } else if (auto value =
                    llvm::dyn_cast<FloatAttr>(op.getInitialValueAttr())) {
-      if (value.getValue().isNonZero()) return failure();
+      if (value.getValue().isNonZero())
+        return failure();
     }
     auto visibility = op.getVisibility();
     auto newOp = rewriter.replaceOpWithNewOp<T>(
@@ -150,7 +159,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void GlobalI32Op::getCanonicalizationPatterns(RewritePatternSet &results,
                                               MLIRContext *context) {
@@ -204,7 +213,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void GlobalLoadIndirectI32Op::getCanonicalizationPatterns(
     RewritePatternSet &results, MLIRContext *context) {
@@ -258,7 +267,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void GlobalStoreIndirectI32Op::getCanonicalizationPatterns(
     RewritePatternSet &results, MLIRContext *context) {
@@ -317,7 +326,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 OpFoldResult ConstI32Op::fold(FoldAdaptor operands) { return getValue(); }
 
@@ -482,7 +491,8 @@
                  llvm::dyn_cast_if_present<SplatElementsAttr>(rawOperand)) {
     auto elementResult = constFoldUnaryOp<AttrElementT>(
         {operand.getSplatValue<Attribute>()}, calculate);
-    if (!elementResult) return {};
+    if (!elementResult)
+      return {};
     return DenseElementsAttr::get(operand.getType(), elementResult);
   } else if (auto operand =
                  llvm::dyn_cast_if_present<ElementsAttr>(rawOperand)) {
@@ -496,15 +506,17 @@
 
 /// Performs const folding `calculate` with element-wise behavior on the given
 /// attribute in `operands` and returns the result if possible.
-static Attribute constFoldFloatUnaryOp(
-    Attribute rawOperand, const std::function<APFloat(APFloat)> &calculate) {
+static Attribute
+constFoldFloatUnaryOp(Attribute rawOperand,
+                      const std::function<APFloat(APFloat)> &calculate) {
   if (auto operand = llvm::dyn_cast_if_present<FloatAttr>(rawOperand)) {
     return FloatAttr::get(operand.getType(), calculate(operand.getValue()));
   } else if (auto operand =
                  llvm::dyn_cast_if_present<SplatElementsAttr>(rawOperand)) {
     auto elementResult =
         constFoldFloatUnaryOp({operand.getSplatValue<Attribute>()}, calculate);
-    if (!elementResult) return {};
+    if (!elementResult)
+      return {};
     return DenseElementsAttr::get(operand.getType(), elementResult);
   } else if (auto operand =
                  llvm::dyn_cast_if_present<ElementsAttr>(rawOperand)) {
@@ -528,28 +540,33 @@
                                    const CalculationT &calculate) {
   if (auto lhs = llvm::dyn_cast_if_present<AttrElementT>(rawLhs)) {
     auto rhs = llvm::dyn_cast_if_present<AttrElementT>(rawRhs);
-    if (!rhs) return {};
+    if (!rhs)
+      return {};
     return AttrElementT::get(lhs.getType(),
                              calculate(lhs.getValue(), rhs.getValue()));
   } else if (auto lhs = llvm::dyn_cast_if_present<SplatElementsAttr>(rawLhs)) {
     // TODO(benvanik): handle splat/otherwise.
     auto rhs = llvm::dyn_cast_if_present<SplatElementsAttr>(rawRhs);
-    if (!rhs || lhs.getType() != rhs.getType()) return {};
+    if (!rhs || lhs.getType() != rhs.getType())
+      return {};
     auto elementResult = constFoldBinaryOp<AttrElementT>(
         lhs.getSplatValue<Attribute>(), rhs.getSplatValue<Attribute>(),
         calculate);
-    if (!elementResult) return {};
+    if (!elementResult)
+      return {};
     return DenseElementsAttr::get(lhs.getType(), elementResult);
   } else if (auto lhs = llvm::dyn_cast_if_present<ElementsAttr>(rawLhs)) {
     auto rhs = llvm::dyn_cast_if_present<ElementsAttr>(rawRhs);
-    if (!rhs || lhs.getType() != rhs.getType()) return {};
+    if (!rhs || lhs.getType() != rhs.getType())
+      return {};
     auto lhsIt = lhs.getValues<AttrElementT>().begin();
     auto rhsIt = rhs.getValues<AttrElementT>().begin();
     SmallVector<Attribute> resultAttrs(lhs.getNumElements());
     for (int64_t i = 0; i < lhs.getNumElements(); ++i) {
       resultAttrs[i] =
           constFoldBinaryOp<AttrElementT>(*lhsIt, *rhsIt, calculate);
-      if (!resultAttrs[i]) return {};
+      if (!resultAttrs[i])
+        return {};
       ++lhsIt;
       ++rhsIt;
     }
@@ -585,7 +602,8 @@
     auto elementResult = constFoldTernaryOp<AttrElementT>(
         a.getSplatValue<Attribute>(), b.getSplatValue<Attribute>(),
         c.getSplatValue<Attribute>(), calculate);
-    if (!elementResult) return {};
+    if (!elementResult)
+      return {};
     return DenseElementsAttr::get(a.getType(), elementResult);
   } else if (auto a = llvm::dyn_cast_if_present<ElementsAttr>(rawA)) {
     auto b = llvm::dyn_cast_if_present<ElementsAttr>(rawB);
@@ -600,7 +618,8 @@
     for (int64_t i = 0; i < a.getNumElements(); ++i) {
       resultAttrs[i] =
           constFoldTernaryOp<AttrElementT>(*aIt, *bIt, *cIt, calculate);
-      if (!resultAttrs[i]) return {};
+      if (!resultAttrs[i])
+        return {};
       ++aIt;
       ++bIt;
       ++cIt;
@@ -653,11 +672,15 @@
     return op.getLhs();
   }
   if (auto subOp = dyn_cast_or_null<SUB>(op.getLhs().getDefiningOp())) {
-    if (subOp.getLhs() == op.getRhs()) return subOp.getRhs();
-    if (subOp.getRhs() == op.getRhs()) return subOp.getLhs();
+    if (subOp.getLhs() == op.getRhs())
+      return subOp.getRhs();
+    if (subOp.getRhs() == op.getRhs())
+      return subOp.getLhs();
   } else if (auto subOp = dyn_cast_or_null<SUB>(op.getRhs().getDefiningOp())) {
-    if (subOp.getLhs() == op.getLhs()) return subOp.getRhs();
-    if (subOp.getRhs() == op.getLhs()) return subOp.getLhs();
+    if (subOp.getLhs() == op.getLhs())
+      return subOp.getRhs();
+    if (subOp.getRhs() == op.getLhs())
+      return subOp.getLhs();
   }
   return constFoldBinaryOp<AttrElementT>(
       lhs, rhs,
@@ -695,11 +718,15 @@
     return op.getLhs();
   }
   if (auto addOp = dyn_cast_or_null<ADD>(op.getLhs().getDefiningOp())) {
-    if (addOp.getLhs() == op.getRhs()) return addOp.getRhs();
-    if (addOp.getRhs() == op.getRhs()) return addOp.getLhs();
+    if (addOp.getLhs() == op.getRhs())
+      return addOp.getRhs();
+    if (addOp.getRhs() == op.getRhs())
+      return addOp.getLhs();
   } else if (auto addOp = dyn_cast_or_null<ADD>(op.getRhs().getDefiningOp())) {
-    if (addOp.getLhs() == op.getLhs()) return addOp.getRhs();
-    if (addOp.getRhs() == op.getLhs()) return addOp.getLhs();
+    if (addOp.getLhs() == op.getLhs())
+      return addOp.getRhs();
+    if (addOp.getRhs() == op.getLhs())
+      return addOp.getLhs();
   }
   return constFoldBinaryOp<AttrElementT>(
       lhs, rhs,
@@ -744,7 +771,8 @@
   LogicalResult matchAndRewrite(T op,
                                 PatternRewriter &rewriter) const override {
     AttrElementT c1, c2;
-    if (!matchPattern(op.getRhs(), m_Constant(&c1))) return failure();
+    if (!matchPattern(op.getRhs(), m_Constant(&c1)))
+      return failure();
     if (auto mulOp = dyn_cast_or_null<T>(op.getLhs().getDefiningOp())) {
       if (matchPattern(mulOp.getRhs(), m_Constant(&c2))) {
         auto c = rewriter.createOrFold<CONST_OP>(
@@ -959,7 +987,8 @@
 }
 
 OpFoldResult MinI32SOp::fold(FoldAdaptor operands) {
-  if (getLhs() == getRhs()) return getLhs();
+  if (getLhs() == getRhs())
+    return getLhs();
   return constFoldBinaryOp<IntegerAttr>(operands.getLhs(), operands.getRhs(),
                                         [](const APInt &lhs, const APInt &rhs) {
                                           return llvm::APIntOps::smin(lhs, rhs);
@@ -967,7 +996,8 @@
 }
 
 OpFoldResult MinI64SOp::fold(FoldAdaptor operands) {
-  if (getLhs() == getRhs()) return getLhs();
+  if (getLhs() == getRhs())
+    return getLhs();
   return constFoldBinaryOp<IntegerAttr>(operands.getLhs(), operands.getRhs(),
                                         [](const APInt &lhs, const APInt &rhs) {
                                           return llvm::APIntOps::smin(lhs, rhs);
@@ -975,7 +1005,8 @@
 }
 
 OpFoldResult MinI32UOp::fold(FoldAdaptor operands) {
-  if (getLhs() == getRhs()) return getLhs();
+  if (getLhs() == getRhs())
+    return getLhs();
   return constFoldBinaryOp<IntegerAttr>(operands.getLhs(), operands.getRhs(),
                                         [](const APInt &lhs, const APInt &rhs) {
                                           return llvm::APIntOps::umin(lhs, rhs);
@@ -983,7 +1014,8 @@
 }
 
 OpFoldResult MinI64UOp::fold(FoldAdaptor operands) {
-  if (getLhs() == getRhs()) return getLhs();
+  if (getLhs() == getRhs())
+    return getLhs();
   return constFoldBinaryOp<IntegerAttr>(operands.getLhs(), operands.getRhs(),
                                         [](const APInt &lhs, const APInt &rhs) {
                                           return llvm::APIntOps::umin(lhs, rhs);
@@ -991,7 +1023,8 @@
 }
 
 OpFoldResult MaxI32SOp::fold(FoldAdaptor operands) {
-  if (getLhs() == getRhs()) return getLhs();
+  if (getLhs() == getRhs())
+    return getLhs();
   return constFoldBinaryOp<IntegerAttr>(operands.getLhs(), operands.getRhs(),
                                         [](const APInt &lhs, const APInt &rhs) {
                                           return llvm::APIntOps::smax(lhs, rhs);
@@ -999,7 +1032,8 @@
 }
 
 OpFoldResult MaxI64SOp::fold(FoldAdaptor operands) {
-  if (getLhs() == getRhs()) return getLhs();
+  if (getLhs() == getRhs())
+    return getLhs();
   return constFoldBinaryOp<IntegerAttr>(operands.getLhs(), operands.getRhs(),
                                         [](const APInt &lhs, const APInt &rhs) {
                                           return llvm::APIntOps::smax(lhs, rhs);
@@ -1007,7 +1041,8 @@
 }
 
 OpFoldResult MaxI32UOp::fold(FoldAdaptor operands) {
-  if (getLhs() == getRhs()) return getLhs();
+  if (getLhs() == getRhs())
+    return getLhs();
   return constFoldBinaryOp<IntegerAttr>(operands.getLhs(), operands.getRhs(),
                                         [](const APInt &lhs, const APInt &rhs) {
                                           return llvm::APIntOps::umax(lhs, rhs);
@@ -1015,7 +1050,8 @@
 }
 
 OpFoldResult MaxI64UOp::fold(FoldAdaptor operands) {
-  if (getLhs() == getRhs()) return getLhs();
+  if (getLhs() == getRhs())
+    return getLhs();
   return constFoldBinaryOp<IntegerAttr>(operands.getLhs(), operands.getRhs(),
                                         [](const APInt &lhs, const APInt &rhs) {
                                           return llvm::APIntOps::umax(lhs, rhs);
@@ -1245,14 +1281,16 @@
 }
 
 OpFoldResult MaxF32Op::fold(FoldAdaptor operands) {
-  if (getLhs() == getRhs()) return getLhs();
+  if (getLhs() == getRhs())
+    return getLhs();
   return constFoldBinaryOp<FloatAttr>(
       operands.getLhs(), operands.getRhs(),
       [](const APFloat &a, const APFloat &b) { return llvm::maxnum(a, b); });
 }
 
 OpFoldResult MaxF64Op::fold(FoldAdaptor operands) {
-  if (getLhs() == getRhs()) return getLhs();
+  if (getLhs() == getRhs())
+    return getLhs();
   return constFoldBinaryOp<FloatAttr>(
       operands.getLhs(), operands.getRhs(),
       [](const APFloat &a, const APFloat &b) { return llvm::maxnum(a, b); });
@@ -1576,7 +1614,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void TruncI64I8Op::getCanonicalizationPatterns(RewritePatternSet &results,
                                                MLIRContext *context) {
@@ -1681,17 +1719,19 @@
                                 PatternRewriter &rewriter) const override {
     auto zeroOp =
         dyn_cast_or_null<ConstRefZeroOp>(castOp.getOperand().getDefiningOp());
-    if (!zeroOp) return failure();
+    if (!zeroOp)
+      return failure();
     rewriter.replaceOpWithNewOp<ConstRefZeroOp>(castOp,
                                                 castOp.getResult().getType());
     return success();
   }
 };
 
-}  // namespace
+} // namespace
 
 OpFoldResult CastAnyRefOp::fold(FoldAdaptor operands) {
-  if (getOperand().getType() == getResult().getType()) return getOperand();
+  if (getOperand().getType() == getResult().getType())
+    return getOperand();
   if (auto castOp =
           dyn_cast_or_null<CastRefAnyOp>(getOperand().getDefiningOp())) {
     if (castOp.getOperand().getType() == getResult().getType()) {
@@ -1707,7 +1747,8 @@
 }
 
 OpFoldResult CastRefAnyOp::fold(FoldAdaptor operands) {
-  if (getOperand().getType() == getResult().getType()) return getOperand();
+  if (getOperand().getType() == getResult().getType())
+    return getOperand();
   if (auto castOp =
           dyn_cast_or_null<CastAnyRefOp>(getOperand().getDefiningOp())) {
     if (castOp.getOperand().getType() == getResult().getType()) {
@@ -1763,7 +1804,8 @@
                                       const CalculationT &calculate) {
   if (auto lhs = llvm::dyn_cast_if_present<AttrElementT>(rawLhs)) {
     auto rhs = llvm::dyn_cast_if_present<AttrElementT>(rawRhs);
-    if (!rhs) return {};
+    if (!rhs)
+      return {};
     auto boolType = IntegerType::get(lhs.getContext(), 32);
     return AttrElementT::get(boolType,
                              calculate(lhs.getValue(), rhs.getValue()));
@@ -1799,7 +1841,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 template <typename T>
 static OpFoldResult foldCmpEQOp(T op, Attribute lhs, Attribute rhs) {
@@ -1863,7 +1905,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void CmpNEI32Op::getCanonicalizationPatterns(RewritePatternSet &results,
                                              MLIRContext *context) {
@@ -1943,7 +1985,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 template <typename T>
 static OpFoldResult foldCmpLTESOp(T op, Attribute lhs, Attribute rhs) {
@@ -2019,7 +2061,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 template <typename T>
 static OpFoldResult foldCmpGTSOp(T op, Attribute lhs, Attribute rhs) {
@@ -2099,7 +2141,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 template <typename T>
 static OpFoldResult foldCmpGTESOp(T op, Attribute lhs, Attribute rhs) {
@@ -2188,30 +2230,35 @@
                                        const CalculationT &calculate) {
   if (auto lhs = llvm::dyn_cast_if_present<AttrElementT>(rawLhs)) {
     auto rhs = llvm::dyn_cast_if_present<AttrElementT>(rawRhs);
-    if (!rhs) return {};
+    if (!rhs)
+      return {};
     return IntegerAttr::get(IntegerType::get(lhs.getContext(), 32),
                             calculate(lhs.getValue(), rhs.getValue()));
   } else if (auto lhs = llvm::dyn_cast_if_present<SplatElementsAttr>(rawLhs)) {
     // TODO(benvanik): handle splat/otherwise.
     auto rhs = llvm::dyn_cast_if_present<SplatElementsAttr>(rawRhs);
-    if (!rhs || lhs.getType() != rhs.getType()) return {};
+    if (!rhs || lhs.getType() != rhs.getType())
+      return {};
     auto elementResult = constFoldBinaryCmpFOp<AttrElementT>(
         lhs.getSplatValue<Attribute>(), rhs.getSplatValue<Attribute>(),
         calculate);
-    if (!elementResult) return {};
+    if (!elementResult)
+      return {};
     auto resultType = lhs.getType().clone(
         std::nullopt, IntegerType::get(lhs.getContext(), 32));
     return DenseElementsAttr::get(resultType, elementResult);
   } else if (auto lhs = llvm::dyn_cast_if_present<ElementsAttr>(rawLhs)) {
     auto rhs = llvm::dyn_cast_if_present<ElementsAttr>(rawRhs);
-    if (!rhs || lhs.getType() != rhs.getType()) return {};
+    if (!rhs || lhs.getType() != rhs.getType())
+      return {};
     auto lhsIt = lhs.getValues<AttrElementT>().begin();
     auto rhsIt = rhs.getValues<AttrElementT>().begin();
     SmallVector<Attribute> resultAttrs(lhs.getNumElements());
     for (int64_t i = 0; i < lhs.getNumElements(); ++i) {
       resultAttrs[i] =
           constFoldBinaryCmpFOp<AttrElementT>(*lhsIt, *rhsIt, calculate);
-      if (!resultAttrs[i]) return {};
+      if (!resultAttrs[i])
+        return {};
       ++lhsIt;
       ++rhsIt;
     }
@@ -2349,7 +2396,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 template <typename T>
 static OpFoldResult foldCmpEQNearOp(T op, Attribute lhs, Attribute rhs) {
@@ -2660,7 +2707,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 OpFoldResult CmpNZF32OOp::fold(FoldAdaptor operands) {
   return constFoldUnaryCmpOp<FloatAttr>(
@@ -2768,7 +2815,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void CmpEQRefOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                              MLIRContext *context) {
@@ -2799,7 +2846,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void CmpNERefOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                              MLIRContext *context) {
@@ -2836,18 +2883,22 @@
     return failure();
   }
   // Check that the successor only contains a unconditional branch.
-  if (std::next(successor->begin()) != successor->end()) return failure();
+  if (std::next(successor->begin()) != successor->end())
+    return failure();
   // Check that the terminator is an unconditional branch.
   BranchOp successorBranch = dyn_cast<BranchOp>(successor->getTerminator());
-  if (!successorBranch) return failure();
+  if (!successorBranch)
+    return failure();
   // Check that the arguments are only used within the terminator.
   for (BlockArgument arg : successor->getArguments()) {
     for (Operation *user : arg.getUsers())
-      if (user != successorBranch) return failure();
+      if (user != successorBranch)
+        return failure();
   }
   // Don't try to collapse branches to infinite loops.
   Block *successorDest = successorBranch.getDest();
-  if (successorDest == successor) return failure();
+  if (successorDest == successor)
+    return failure();
 
   // Update the operands to the successor. If the branch parent has no
   // arguments, we can use the branch operands directly.
@@ -2924,7 +2975,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void BranchOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                            MLIRContext *context) {
@@ -3002,7 +3053,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void CondBranchOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                MLIRContext *context) {
@@ -3047,7 +3098,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void CallOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                          MLIRContext *context) {
@@ -3074,7 +3125,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void CallVariadicOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                  MLIRContext *context) {
@@ -3113,7 +3164,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void CondFailOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                              MLIRContext *context) {
@@ -3166,7 +3217,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void CheckEQOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                             MLIRContext *context) {
@@ -3206,13 +3257,14 @@
                                 PatternRewriter &rewriter) const override {
     auto importOp = SymbolTable::lookupNearestSymbolFrom<IREE::VM::ImportOp>(
         op, op.getImportAttr());
-    if (!importOp || importOp.getIsOptional()) return failure();
+    if (!importOp || importOp.getIsOptional())
+      return failure();
     rewriter.replaceOpWithNewOp<IREE::VM::ConstI32Op>(op, 1);
     return success();
   }
 };
 
-}  // namespace
+} // namespace
 
 void ImportResolvedOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                    MLIRContext *context) {
@@ -3271,7 +3323,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void TraceOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                           MLIRContext *context) {
@@ -3294,7 +3346,7 @@
                  SimplifyConstCondBreakPred>(context);
 }
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.cpp b/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.cpp
index c66df05..b9154cc 100644
--- a/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.cpp

@@ -58,7 +58,7 @@
   setNameFn(result, os.str());
 }
 
-}  // namespace
+} // namespace
 
 //===----------------------------------------------------------------------===//
 // Structural ops
@@ -262,7 +262,7 @@
       if (failed(parser.parseColonType(operandType))) {
         return parser.emitError(operandLoc) << "invalid operand";
       }
-      operandName = operand.name.substr(1);  // consume `%`
+      operandName = operand.name.substr(1); // consume `%`
     } else {
       if (failed(parser.parseType(operandType))) {
         return parser.emitError(operandLoc) << "invalid operand";
@@ -496,7 +496,7 @@
     return false;
   }
   if (llvm::isa<UnitAttr>(value)) {
-    return SZ == 32;  // Conditions/bools are always i32
+    return SZ == 32; // Conditions/bools are always i32
   } else if (auto intAttr = llvm::dyn_cast<IntegerAttr>(value)) {
     return intAttr.getType().isInteger(SZ);
   } else if (auto elementsAttr = llvm::dyn_cast<ElementsAttr>(value)) {
@@ -521,7 +521,8 @@
   } else if (auto elementsAttr = llvm::dyn_cast<ElementsAttr>(value)) {
     elementType = elementsAttr.getShapedType().getElementType();
   }
-  if (!elementType) return false;
+  if (!elementType)
+    return false;
   return elementType.getIntOrFloatBitWidth() == SZ;
 }
 
@@ -556,15 +557,15 @@
 
 static FloatType getFloatType(int bitwidth, MLIRContext *context) {
   switch (bitwidth) {
-    case 16:
-      return FloatType::getF16(context);
-    case 32:
-      return FloatType::getF32(context);
-    case 64:
-      return FloatType::getF64(context);
-    default:
-      assert(false && "unhandled floating point type");
-      return {};
+  case 16:
+    return FloatType::getF16(context);
+  case 32:
+    return FloatType::getF32(context);
+  case 64:
+    return FloatType::getF64(context);
+  default:
+    assert(false && "unhandled floating point type");
+    return {};
   }
 }
 
@@ -780,7 +781,8 @@
   llvm::raw_string_ostream os(result);
   bool lastUnderscore = true;
   for (char c : unsafeIdentifier) {
-    if (!llvm::isPrint(c)) continue;
+    if (!llvm::isPrint(c))
+      continue;
     if (llvm::isAlnum(c)) {
       os << llvm::toLower(c);
       lastUnderscore = false;
@@ -1216,7 +1218,8 @@
               }
               p << tupleOperands;
               p << ')';
-              if (i < segmentSize - 1) p << ", ";
+              if (i < segmentSize - 1)
+                p << ", ";
             }
           } else {
             SmallVector<Value> segmentOperands;
@@ -1369,15 +1372,15 @@
   return SuccessorOperands(getDestOperandsMutable());
 }
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
 //===----------------------------------------------------------------------===//
 // TableGen definitions (intentionally last)
 //===----------------------------------------------------------------------===//
 
-#include "iree/compiler/Dialect/VM/IR/VMOpEncoder.cpp.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/VM/IR/VMOpEncoder.cpp.inc" // IWYU pragma: keep
 #define GET_OP_CLASSES
-#include "iree/compiler/Dialect/VM/IR/VMOps.cpp.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/VM/IR/VMOps.cpp.inc" // IWYU pragma: keep

diff --git a/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.h b/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.h
index 02300cf..b59abae 100644
--- a/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.h
+++ b/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.h

@@ -31,12 +31,12 @@
 /// Generic method for verifying VM fail ops.
 LogicalResult verifyFailOp(Operation *op, Value statusVal);
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
 #define GET_OP_CLASSES
-#include "iree/compiler/Dialect/VM/IR/VMOps.h.inc"  // IWYU pragma: export
+#include "iree/compiler/Dialect/VM/IR/VMOps.h.inc" // IWYU pragma: export
 
-#endif  // IREE_COMPILER_DIALECT_VM_IR_VMOPS_H_
+#endif // IREE_COMPILER_DIALECT_VM_IR_VMOPS_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/IR/VMTraits.h b/compiler/src/iree/compiler/Dialect/VM/IR/VMTraits.h
index f30ca05..ba02726 100644
--- a/compiler/src/iree/compiler/Dialect/VM/IR/VMTraits.h
+++ b/compiler/src/iree/compiler/Dialect/VM/IR/VMTraits.h

@@ -16,7 +16,7 @@
 
 template <typename ConcreteType>
 class DebugOnly : public OpTrait::TraitBase<ConcreteType, DebugOnly> {
- public:
+public:
   static LogicalResult verifyTrait(Operation *op) {
     // TODO(benvanik): verify debug-only.
     return success();
@@ -25,7 +25,7 @@
 
 template <typename ConcreteType>
 class FullBarrier : public OpTrait::TraitBase<ConcreteType, FullBarrier> {
- public:
+public:
   static LogicalResult verifyTrait(Operation *op) {
     // TODO(benvanik): verify full barrier.
     return success();
@@ -34,7 +34,7 @@
 
 template <typename ConcreteType>
 class PseudoOp : public OpTrait::TraitBase<ConcreteType, PseudoOp> {
- public:
+public:
   static LogicalResult verifyTrait(Operation *op) {
     // TODO(benvanik): verify pseudo op (not serializable?).
     return success();
@@ -43,7 +43,7 @@
 
 template <typename ConcreteType>
 class AssignmentOp : public OpTrait::TraitBase<ConcreteType, AssignmentOp> {
- public:
+public:
   static LogicalResult verifyTrait(Operation *op) {
     if (op->getNumOperands() != op->getNumResults()) {
       return op->emitOpError()
@@ -55,7 +55,7 @@
 
 template <typename ConcreteType>
 class ExtF32 : public OpTrait::TraitBase<ConcreteType, ExtF32> {
- public:
+public:
   static LogicalResult verifyTrait(Operation *op) {
     // TODO(benvanik): verify f32 ext is supported.
     return success();
@@ -64,16 +64,16 @@
 
 template <typename ConcreteType>
 class ExtF64 : public OpTrait::TraitBase<ConcreteType, ExtF64> {
- public:
+public:
   static LogicalResult verifyTrait(Operation *op) {
     // TODO(benvanik): verify f64 ext is supported.
     return success();
   }
 };
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace OpTrait
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace OpTrait
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_IR_VMTRAITS_H_
+#endif // IREE_COMPILER_DIALECT_VM_IR_VMTRAITS_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/IR/VMTypes.cpp b/compiler/src/iree/compiler/Dialect/VM/IR/VMTypes.cpp
index 1afaec9..2a98f2f 100644
--- a/compiler/src/iree/compiler/Dialect/VM/IR/VMTypes.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/IR/VMTypes.cpp

@@ -15,8 +15,8 @@
 
 // clang-format off: must be included after all LLVM/MLIR headers.
 #define GET_ATTRDEF_CLASSES
-#include "iree/compiler/Dialect/VM/IR/VMAttrs.cpp.inc"  // IWYU pragma: keep
-#include "iree/compiler/Dialect/VM/IR/VMEnums.cpp.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/VM/IR/VMAttrs.cpp.inc" // IWYU pragma: keep
+#include "iree/compiler/Dialect/VM/IR/VMEnums.cpp.inc" // IWYU pragma: keep
 // clang-format on
 
 namespace mlir {
@@ -46,7 +46,7 @@
   Type elementType;
 };
 
-}  // namespace detail
+} // namespace detail
 
 // static
 bool ListType::isCompatible(Type type) {
@@ -101,7 +101,7 @@
   Type objectType;
 };
 
-}  // namespace detail
+} // namespace detail
 
 // static
 bool RefType::isCompatible(Type type) {
@@ -139,7 +139,7 @@
 void VMDialect::registerAttributes() {
   addAttributes<
 #define GET_ATTRDEF_LIST
-#include "iree/compiler/Dialect/VM/IR/VMAttrs.cpp.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/VM/IR/VMAttrs.cpp.inc" // IWYU pragma: keep
       >();
 }
 void VMDialect::registerTypes() {
@@ -156,7 +156,8 @@
   Attribute genAttr;
   OptionalParseResult parseResult =
       generatedAttributeParser(parser, &mnemonic, type, genAttr);
-  if (parseResult.has_value()) return genAttr;
+  if (parseResult.has_value())
+    return genAttr;
   parser.emitError(parser.getNameLoc())
       << "unknown HAL attribute: " << mnemonic;
   return {};
@@ -170,7 +171,7 @@
   });
 }
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/IR/VMTypes.h b/compiler/src/iree/compiler/Dialect/VM/IR/VMTypes.h
index 85417ee..9962df8 100644
--- a/compiler/src/iree/compiler/Dialect/VM/IR/VMTypes.h
+++ b/compiler/src/iree/compiler/Dialect/VM/IR/VMTypes.h

@@ -18,8 +18,8 @@
 
 // clang-format off: must be included after all LLVM/MLIR headers.
 #define GET_ATTRDEF_CLASSES
-#include "iree/compiler/Dialect/VM/IR/VMAttrs.h.inc"  // IWYU pragma: export
-#include "iree/compiler/Dialect/VM/IR/VMEnums.h.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/VM/IR/VMAttrs.h.inc" // IWYU pragma: export
+#include "iree/compiler/Dialect/VM/IR/VMEnums.h.inc" // IWYU pragma: keep
 // clang-format on
 
 namespace mlir {
@@ -30,18 +30,18 @@
 namespace detail {
 struct ListTypeStorage;
 struct RefTypeStorage;
-}  // namespace detail
+} // namespace detail
 
 /// A byte buffer.
 class BufferType : public Type::TypeBase<BufferType, Type, TypeStorage> {
- public:
+public:
   using Base::Base;
 };
 
 /// A list containing an optional element type.
 class ListType
     : public Type::TypeBase<ListType, Type, detail::ListTypeStorage> {
- public:
+public:
   using Base::Base;
 
   /// Returns true if the given type can be wrapped in a list.
@@ -71,14 +71,14 @@
 
 /// An opaque ref object that comes from an external source.
 class OpaqueType : public Type::TypeBase<OpaqueType, Type, TypeStorage> {
- public:
+public:
   using Base::Base;
 };
 
 /// A ref<T> containing a reference to a ref-object-compatible type.
 /// This models an iree_vm_ref_t intrusive reference counted object.
 class RefType : public Type::TypeBase<RefType, Type, detail::RefTypeStorage> {
- public:
+public:
   using Base::Base;
 
   /// Returns true if the given type can be wrapped in a ref ptr.
@@ -106,9 +106,9 @@
   Type getObjectType();
 };
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_IR_VMTYPES_H_
+#endif // IREE_COMPILER_DIALECT_VM_IR_VMTYPES_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/ArchiveWriter.cpp b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/ArchiveWriter.cpp
index 8f2d630..b147ad2 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/ArchiveWriter.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/ArchiveWriter.cpp

@@ -134,12 +134,12 @@
 using llvm::support::ulittle16_t;
 using llvm::support::ulittle32_t;
 using llvm::support::ulittle64_t;
-}  // namespace
+} // namespace
 
 LLVM_PACKED_START
 
 struct ZIPLocalFileHeader {
-  ulittle32_t signature;  // 0x04034B50
+  ulittle32_t signature; // 0x04034B50
   ulittle16_t versionToExtract;
   ulittle16_t generalPurposeFlag;
   ulittle16_t compressionMethod;
@@ -156,7 +156,7 @@
 static_assert(sizeof(ZIPLocalFileHeader) == 30, "bad packing");
 
 struct ZIP64DataDescriptor {
-  ulittle32_t signature;  // 0x08074B50
+  ulittle32_t signature; // 0x08074B50
   ulittle32_t crc32;
   ulittle64_t compressedSize;
   ulittle64_t uncompressedSize;
@@ -177,7 +177,7 @@
 static_assert(sizeof(ZIP64LocalExtraField) == 20, "bad packing");
 
 struct ZIPCentralDirectoryRecord {
-  ulittle32_t signature;  // 0x02014B50
+  ulittle32_t signature; // 0x02014B50
   ulittle16_t versionMadeBy;
   ulittle16_t versionToExtract;
   ulittle16_t generalPurposeFlags;
@@ -209,7 +209,7 @@
 static_assert(sizeof(ZIP64CentralExtraField) == 28, "bad packing");
 
 struct ZIPEndOfCentralDirectoryRecord {
-  ulittle32_t signature;  // 0x06054B50
+  ulittle32_t signature; // 0x06054B50
   ulittle16_t diskNumber;
   ulittle16_t startDiskNumber;
   ulittle16_t entriesOnDisk;
@@ -222,7 +222,7 @@
 static_assert(sizeof(ZIPEndOfCentralDirectoryRecord) == 22, "bad packing");
 
 struct ZIPEndOfCentralDirectoryRecord64 {
-  ulittle32_t signature;  // 0x06064B50
+  ulittle32_t signature; // 0x06064B50
   ulittle64_t sizeOfEOCD64Minus12;
   ulittle16_t versionMadeBy;
   ulittle16_t versionRequired;
@@ -237,7 +237,7 @@
 static_assert(sizeof(ZIPEndOfCentralDirectoryRecord64) == 56, "bad packing");
 
 struct ZIPEndOfCentralDirectoryLocator64 {
-  ulittle32_t signature;  // 0x07064B50
+  ulittle32_t signature; // 0x07064B50
   ulittle32_t recordDiskNumber;
   ulittle64_t recordOffset;
   ulittle32_t diskCount;
@@ -287,15 +287,15 @@
   // Append local file header.
   ZIPLocalFileHeader fileHeader;
   fileHeader.signature = 0x04034B50u;
-  fileHeader.versionToExtract = 0x2Du;  // 4.5 (for zip64)
+  fileHeader.versionToExtract = 0x2Du; // 4.5 (for zip64)
   fileHeader.generalPurposeFlag = 0;
-  fileHeader.compressionMethod = 0;  // COMP_STORED
+  fileHeader.compressionMethod = 0; // COMP_STORED
   // https://docs.microsoft.com/en-us/windows/win32/api/oleauto/nf-oleauto-dosdatetimetovarianttime
   fileHeader.lastModifiedTime = 0u;
-  fileHeader.lastModifiedDate = 0x21;  // 1980-01-01
+  fileHeader.lastModifiedDate = 0x21; // 1980-01-01
   fileHeader.crc32 = crc32;
-  fileHeader.compressedSize = 0xFFFFFFFFu;    // in extra field
-  fileHeader.uncompressedSize = 0xFFFFFFFFu;  // in extra field
+  fileHeader.compressedSize = 0xFFFFFFFFu;   // in extra field
+  fileHeader.uncompressedSize = 0xFFFFFFFFu; // in extra field
   fileHeader.fileNameLength = static_cast<uint16_t>(fileName.size());
   fileHeader.extraFieldLength = sizeof(ZIP64LocalExtraField) +
                                 sizeof(ZIPExtraFieldHeader) + interiorPadding;
@@ -311,7 +311,7 @@
   // poorly written that they only ever look at the last field present for
   // getting the size. Have I mentioned how terrible of a format ZIP is?
   ZIPExtraFieldHeader paddingExtra;
-  paddingExtra.id = 0xFECAu;  // 'CAFE'; in the user prefix range
+  paddingExtra.id = 0xFECAu; // 'CAFE'; in the user prefix range
   paddingExtra.size = static_cast<uint16_t>(interiorPadding);
   os.write(reinterpret_cast<char *>(&paddingExtra), sizeof(paddingExtra));
   os.write_zeros(interiorPadding);
@@ -338,12 +338,12 @@
 
 // Computes an Adler32 CRC and sends the data into the void.
 class null_crc32_ostream : public llvm::raw_ostream {
- public:
+public:
   explicit null_crc32_ostream(uint32_t &crc32) : crc32(crc32) {
     SetUnbuffered();
   }
 
- private:
+private:
   void write_impl(const char *Ptr, size_t Size) override {
     crc32 = llvm::crc32(
         crc32, ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(Ptr), Size));
@@ -357,10 +357,11 @@
 // appendZIPFile implementation used when |os| is a stream without random
 // access (like stdout). This requires us to serialize the file twice in order
 // to compute the total length and CRC32.
-static std::optional<ZIPFileRef> appendZIPFileToStream(
-    std::string fileName, uint64_t filePadding, uint64_t fileLength,
-    std::function<LogicalResult(llvm::raw_ostream &os)> write,
-    llvm::raw_ostream &os) {
+static std::optional<ZIPFileRef>
+appendZIPFileToStream(std::string fileName, uint64_t filePadding,
+                      uint64_t fileLength,
+                      std::function<LogicalResult(llvm::raw_ostream &os)> write,
+                      llvm::raw_ostream &os) {
   // Compute the Adler32 CRC as required in the local file header (and later the
   // central directory). Since we only have an unseekable raw_ostream we can't
   // go patch the header after we stream out the file and instead have to stream
@@ -396,13 +397,13 @@
 
 // Computes an Adler32 CRC and passes the data along to an underlying ostream.
 class crc32_ostream : public llvm::raw_ostream {
- public:
+public:
   explicit crc32_ostream(llvm::raw_ostream &impl, uint32_t &crc32)
       : impl(impl), crc32(crc32) {
     SetUnbuffered();
   }
 
- private:
+private:
   void write_impl(const char *Ptr, size_t Size) override {
     crc32 = llvm::crc32(
         crc32, ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(Ptr), Size));
@@ -416,10 +417,11 @@
 // appendZIPFile implementation used when |os| is a file with random access.
 // This allows us to write the header and backpatch the CRC computed while while
 // serializing the file contents.
-static std::optional<ZIPFileRef> appendZIPFileToFD(
-    std::string fileName, uint64_t filePadding, uint64_t fileLength,
-    std::function<LogicalResult(llvm::raw_ostream &os)> write,
-    llvm::raw_fd_ostream &os) {
+static std::optional<ZIPFileRef>
+appendZIPFileToFD(std::string fileName, uint64_t filePadding,
+                  uint64_t fileLength,
+                  std::function<LogicalResult(llvm::raw_ostream &os)> write,
+                  llvm::raw_fd_ostream &os) {
   // Write the ZIP header and padding up to the start of the file.
   // We write a dummy CRC we'll patch up after we compute it while serializing
   // the file contents.
@@ -450,10 +452,10 @@
 // Appends a file wrapped in a ZIP header and data descriptor.
 // |write| is used to stream the file contents to |os| while also capturing its
 // CRC as required for the central directory.
-static std::optional<ZIPFileRef> appendZIPFile(
-    std::string fileName, uint64_t filePadding, uint64_t fileLength,
-    std::function<LogicalResult(llvm::raw_ostream &os)> write,
-    llvm::raw_ostream &os) {
+static std::optional<ZIPFileRef>
+appendZIPFile(std::string fileName, uint64_t filePadding, uint64_t fileLength,
+              std::function<LogicalResult(llvm::raw_ostream &os)> write,
+              llvm::raw_ostream &os) {
   if (os.get_kind() == llvm::raw_ostream::OStreamKind::OK_FDStream) {
     auto &osFD = static_cast<llvm::raw_fd_ostream &>(os);
     if (osFD.supportsSeeking()) {
@@ -483,15 +485,15 @@
     ZIPCentralDirectoryRecord cdr;
     cdr.signature = 0x02014B50u;
     cdr.versionMadeBy = 0x031E;
-    cdr.versionToExtract = 0x2Du;  // 4.5 (for zip64)
+    cdr.versionToExtract = 0x2Du; // 4.5 (for zip64)
     cdr.generalPurposeFlags = 0;
-    cdr.compressionMethod = 0;  // COMP_STORED
+    cdr.compressionMethod = 0; // COMP_STORED
     // https://docs.microsoft.com/en-us/windows/win32/api/oleauto/nf-oleauto-dosdatetimetovarianttime
     cdr.lastModifiedTime = 0u;
-    cdr.lastModifiedDate = 0x21;  // 1980-01-01
+    cdr.lastModifiedDate = 0x21; // 1980-01-01
     cdr.crc32 = fileRef.crc32;
-    cdr.compressedSize = 0xFFFFFFFFu;    // in extra field
-    cdr.uncompressedSize = 0xFFFFFFFFu;  // in extra field
+    cdr.compressedSize = 0xFFFFFFFFu;   // in extra field
+    cdr.uncompressedSize = 0xFFFFFFFFu; // in extra field
     cdr.fileNameLength = static_cast<uint16_t>(fileRef.fileName.size());
     cdr.extraFieldLength =
         static_cast<uint16_t>(sizeof(ZIP64CentralExtraField));
@@ -523,7 +525,7 @@
   endOfCDR64.signature = 0x06064B50u;
   endOfCDR64.sizeOfEOCD64Minus12 = sizeof(endOfCDR64) - 12;
   endOfCDR64.versionMadeBy = 0x002Du;
-  endOfCDR64.versionRequired = 0x002Du;  // 4.5 (for zip64)
+  endOfCDR64.versionRequired = 0x002Du; // 4.5 (for zip64)
   endOfCDR64.diskNumber = 0;
   endOfCDR64.startDiskNumber = 0;
   endOfCDR64.entriesOnDisk = static_cast<uint64_t>(fileRefs.size());
@@ -671,7 +673,8 @@
           return success();
         },
         os);
-    if (!zipFile.has_value()) return failure();
+    if (!zipFile.has_value())
+      return failure();
     fileRefs.push_back(*zipFile);
   }
 
@@ -682,7 +685,7 @@
   return success();
 }
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/ArchiveWriter.h b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/ArchiveWriter.h
index b445ae9..cc39351 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/ArchiveWriter.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/ArchiveWriter.h

@@ -27,7 +27,7 @@
 //  - FlatBuffer is written
 //  - embedded files are flushed
 class ArchiveWriter {
- public:
+public:
   struct File {
     // Name of the file when exposed to users; informational only.
     std::string fileName;
@@ -48,9 +48,9 @@
   // Declares an embedded file in the archive and reserves a location for it.
   // The relative offset returned will be stable despite the variable-length
   // FlatBuffer header as it is relative to the header and not the archive 0.
-  virtual File declareFile(
-      std::string fileName, uint64_t fileAlignment, uint64_t fileLength,
-      std::function<LogicalResult(llvm::raw_ostream &os)> write) = 0;
+  virtual File
+  declareFile(std::string fileName, uint64_t fileAlignment, uint64_t fileLength,
+              std::function<LogicalResult(llvm::raw_ostream &os)> write) = 0;
 
   // Writes an in-memory FlatBuffer to the archive as the header and flushes
   // all archive contents.
@@ -63,7 +63,7 @@
 // Archive structure:
 //   {json text}
 class JSONArchiveWriter : public ArchiveWriter {
- public:
+public:
   explicit JSONArchiveWriter(Location loc, llvm::raw_ostream &os);
   ~JSONArchiveWriter() override;
   bool supportsFiles() override { return false; }
@@ -72,7 +72,7 @@
       std::function<LogicalResult(llvm::raw_ostream &os)> write) override;
   LogicalResult flush(FlatbufferBuilder &fbb) override;
 
- private:
+private:
   Location loc;
   llvm::raw_ostream &os;
 };
@@ -90,7 +90,7 @@
 //   [declared file 1]
 //   ...
 class FlatArchiveWriter : public ArchiveWriter {
- public:
+public:
   explicit FlatArchiveWriter(Location loc, llvm::raw_ostream &os);
   ~FlatArchiveWriter() override;
   bool supportsFiles() override { return true; }
@@ -99,10 +99,10 @@
       std::function<LogicalResult(llvm::raw_ostream &os)> write) override;
   LogicalResult flush(FlatbufferBuilder &fbb) override;
 
- private:
+private:
   Location loc;
   llvm::raw_ostream &os;
-  uint64_t tailFileOffset = 0;  // unpadded
+  uint64_t tailFileOffset = 0; // unpadded
   SmallVector<File> files;
 };
 
@@ -125,7 +125,7 @@
 //  - [zip central directory]
 //    [zip locators]
 class ZIPArchiveWriter : public ArchiveWriter {
- public:
+public:
   explicit ZIPArchiveWriter(Location loc, llvm::raw_ostream &os);
   ~ZIPArchiveWriter() override;
   bool supportsFiles() override { return true; }
@@ -134,16 +134,16 @@
       std::function<LogicalResult(llvm::raw_ostream &os)> write) override;
   LogicalResult flush(FlatbufferBuilder &fbb) override;
 
- private:
+private:
   Location loc;
   llvm::raw_ostream &os;
-  uint64_t tailFileOffset = 0;  // unpadded
+  uint64_t tailFileOffset = 0; // unpadded
   SmallVector<File> files;
 };
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_TARGET_BYTECODE_ARCHIVE_WRITER_H_
+#endif // IREE_COMPILER_DIALECT_VM_TARGET_BYTECODE_ARCHIVE_WRITER_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeEncoder.cpp b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeEncoder.cpp
index 20930b3..2ffe492 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeEncoder.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeEncoder.cpp

@@ -25,7 +25,7 @@
 // representation. Always generate this from source in tooling and never check
 // in any emitted files!
 class V0BytecodeEncoder : public BytecodeEncoder {
- public:
+public:
   V0BytecodeEncoder(llvm::DenseMap<Type, int> *typeTable,
                     RegisterAllocation *registerAllocation)
       : typeTable_(typeTable), registerAllocation_(registerAllocation) {}
@@ -107,39 +107,39 @@
       uint64_t limitedValue =
           integerAttr.getValue().extractBitsAsZExtValue(bitWidth, 0);
       switch (bitWidth) {
-        case 8:
-          return writeUint8(static_cast<uint8_t>(limitedValue));
-        case 16:
-          return writeUint16(static_cast<uint16_t>(limitedValue));
-        case 32:
-          return writeUint32(static_cast<uint32_t>(limitedValue));
-        case 64:
-          return writeUint64(static_cast<uint64_t>(limitedValue));
-        default:
-          return currentOp_->emitOpError()
-                 << "attribute of bitwidth " << bitWidth << " not supported";
+      case 8:
+        return writeUint8(static_cast<uint8_t>(limitedValue));
+      case 16:
+        return writeUint16(static_cast<uint16_t>(limitedValue));
+      case 32:
+        return writeUint32(static_cast<uint32_t>(limitedValue));
+      case 64:
+        return writeUint64(static_cast<uint64_t>(limitedValue));
+      default:
+        return currentOp_->emitOpError()
+               << "attribute of bitwidth " << bitWidth << " not supported";
       }
     } else if (auto floatAttr = llvm::dyn_cast<FloatAttr>(attr)) {
       switch (bitWidth) {
-        case 32: {
-          union {
-            float f32;
-            uint32_t u32;
-          } value;
-          value.f32 = floatAttr.getValue().convertToFloat();
-          return writeUint32(value.u32);
-        }
-        case 64: {
-          union {
-            double f64;
-            uint64_t u64;
-          } value;
-          value.f64 = floatAttr.getValue().convertToDouble();
-          return writeUint64(value.u64);
-        }
-        default:
-          return currentOp_->emitOpError()
-                 << "attribute of bitwidth " << bitWidth << " not supported";
+      case 32: {
+        union {
+          float f32;
+          uint32_t u32;
+        } value;
+        value.f32 = floatAttr.getValue().convertToFloat();
+        return writeUint32(value.u32);
+      }
+      case 64: {
+        union {
+          double f64;
+          uint64_t u64;
+        } value;
+        value.f64 = floatAttr.getValue().convertToDouble();
+        return writeUint64(value.u64);
+      }
+      default:
+        return currentOp_->emitOpError()
+               << "attribute of bitwidth " << bitWidth << " not supported";
       }
     } else {
       return currentOp_->emitOpError()
@@ -270,13 +270,15 @@
   LogicalResult ensureAlignment(size_t alignment) {
     size_t paddedSize = (bytecode_.size() + (alignment - 1)) & ~(alignment - 1);
     size_t padding = paddedSize - bytecode_.size();
-    if (padding == 0) return success();
+    if (padding == 0)
+      return success();
     static const uint8_t kZeros[32] = {0};
-    if (padding > sizeof(kZeros)) return failure();
+    if (padding > sizeof(kZeros))
+      return failure();
     return writeBytes(kZeros, padding);
   }
 
- private:
+private:
   // TODO(benvanik): replace this with something not using an ever-expanding
   // vector. I'm sure LLVM has something.
 
@@ -340,7 +342,7 @@
   std::vector<std::pair<Block *, size_t>> blockOffsetFixups_;
 };
 
-}  // namespace
+} // namespace
 
 // static
 std::optional<EncodedBytecodeFunction> BytecodeEncoder::encodeFunction(
@@ -425,7 +427,7 @@
   return result;
 }
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeEncoder.h b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeEncoder.h
index 5ac4157..4fec085 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeEncoder.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeEncoder.h

@@ -35,7 +35,7 @@
 
 // Abstract encoder used for function bytecode encoding.
 class BytecodeEncoder : public VMFuncEncoder {
- public:
+public:
   // Matches IREE_VM_BYTECODE_VERSION_MAJOR.
   static constexpr uint32_t kVersionMajor = 15;
   // Matches IREE_VM_BYTECODE_VERSION_MINOR.
@@ -44,17 +44,17 @@
 
   // Encodes a vm.func to bytecode and returns the result.
   // Returns None on failure.
-  static std::optional<EncodedBytecodeFunction> encodeFunction(
-      IREE::VM::FuncOp funcOp, llvm::DenseMap<Type, int> &typeTable,
-      SymbolTable &symbolTable, DebugDatabaseBuilder &debugDatabase);
+  static std::optional<EncodedBytecodeFunction>
+  encodeFunction(IREE::VM::FuncOp funcOp, llvm::DenseMap<Type, int> &typeTable,
+                 SymbolTable &symbolTable, DebugDatabaseBuilder &debugDatabase);
 
   BytecodeEncoder() = default;
   ~BytecodeEncoder() = default;
 };
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_TARGET_BYTECODE_BYTECODEENCODER_H_
+#endif // IREE_COMPILER_DIALECT_VM_TARGET_BYTECODE_BYTECODEENCODER_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.cpp b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.cpp
index 745fd2d..544bf11 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.cpp

@@ -75,7 +75,7 @@
   std::optional<ArchiveWriter::File> archiveFile;
 };
 
-}  // namespace
+} // namespace
 
 // Gets a file extension based on the given |mimeType| that can be used to help
 // applications guess the file type of embedded data.
@@ -96,9 +96,9 @@
 // Serializes a constant attribute to the FlatBuffer as a binary blob.
 // Returns the size in bytes of the serialized value and the FlatBuffers offset
 // to the uint8 vec containing the data.
-static flatbuffers_uint8_vec_ref_t serializeEmbeddedData(
-    Location loc, Attribute valueAttr, uint64_t alignment, uint64_t totalSize,
-    FlatbufferBuilder &fbb) {
+static flatbuffers_uint8_vec_ref_t
+serializeEmbeddedData(Location loc, Attribute valueAttr, uint64_t alignment,
+                      uint64_t totalSize, FlatbufferBuilder &fbb) {
   flatcc_builder_start_vector(fbb, 1, alignment, FLATBUFFERS_COUNT_MAX(1));
 
   if (totalSize > SIZE_MAX) {
@@ -130,9 +130,9 @@
 // Canonicalizes the module to its final form prior to emission.
 // This verifies that we only have ops we can serialize and performs any of the
 // required transformations (such as debug op stripping).
-static LogicalResult canonicalizeModule(
-    IREE::VM::BytecodeTargetOptions bytecodeOptions,
-    IREE::VM::ModuleOp moduleOp) {
+static LogicalResult
+canonicalizeModule(IREE::VM::BytecodeTargetOptions bytecodeOptions,
+                   IREE::VM::ModuleOp moduleOp) {
   RewritePatternSet patterns(moduleOp.getContext());
   ConversionTarget target(*moduleOp.getContext());
   target.addLegalDialect<IREE::VM::VMDialect>();
@@ -217,23 +217,27 @@
 }
 
 // Returns a serialized function signature.
-static iree_vm_FunctionSignatureDef_ref_t makeImportFunctionSignatureDef(
-    IREE::VM::ImportOp importOp, llvm::DenseMap<Type, int> &typeTable,
-    FlatbufferBuilder &fbb) {
+static iree_vm_FunctionSignatureDef_ref_t
+makeImportFunctionSignatureDef(IREE::VM::ImportOp importOp,
+                               llvm::DenseMap<Type, int> &typeTable,
+                               FlatbufferBuilder &fbb) {
   // Generate the signature calling convention string based on types.
   auto cconv = makeImportCallingConventionString(importOp);
-  if (!cconv.has_value()) return {};
+  if (!cconv.has_value())
+    return {};
   return createFunctionSignatureDef(importOp.getFunctionType(), typeTable,
                                     cconv.value(), /*attrsRef=*/0, fbb);
 }
 
 // Returns a serialized function signature.
-static iree_vm_FunctionSignatureDef_ref_t makeFunctionSignatureDef(
-    IREE::VM::FuncOp funcOp, llvm::DenseMap<Type, int> &typeTable,
-    FlatbufferBuilder &fbb) {
+static iree_vm_FunctionSignatureDef_ref_t
+makeFunctionSignatureDef(IREE::VM::FuncOp funcOp,
+                         llvm::DenseMap<Type, int> &typeTable,
+                         FlatbufferBuilder &fbb) {
   // Generate the signature calling convention string based on types.
   auto cconv = makeCallingConventionString(funcOp);
-  if (!cconv.has_value()) return {};
+  if (!cconv.has_value())
+    return {};
 
   // Reflection attributes.
   iree_vm_AttrDef_vec_ref_t attrsRef = 0;
@@ -242,7 +246,8 @@
     for (auto attr : attrs) {
       auto key = attr.getName().strref();
       auto value = llvm::dyn_cast<StringAttr>(attr.getValue());
-      if (!value || key.empty()) continue;
+      if (!value || key.empty())
+        continue;
       // NOTE: if we actually want to keep these we should dedupe them (as the
       // keys and likely several of the values are shared across all functions).
       auto valueRef = fbb.createString(value.getValue());
@@ -258,12 +263,14 @@
 }
 
 // Returns a serialized function signature.
-static iree_vm_FunctionSignatureDef_ref_t makeInternalFunctionSignatureDef(
-    IREE::VM::FuncOp funcOp, llvm::DenseMap<Type, int> &typeTable,
-    FlatbufferBuilder &fbb) {
+static iree_vm_FunctionSignatureDef_ref_t
+makeInternalFunctionSignatureDef(IREE::VM::FuncOp funcOp,
+                                 llvm::DenseMap<Type, int> &typeTable,
+                                 FlatbufferBuilder &fbb) {
   // Generate the signature calling convention string based on types.
   auto cconv = makeCallingConventionString(funcOp);
-  if (!cconv.has_value()) return {};
+  if (!cconv.has_value())
+    return {};
   return createFunctionSignatureDef(funcOp.getFunctionType(), typeTable,
                                     cconv.value(), /*attrsRef=*/0, fbb);
 }
@@ -292,11 +299,12 @@
 // has been packed into the top-level table. This results in a messier function
 // here during serialization but a much more trivial (and cache-friendly)
 // representation at runtime.
-static LogicalResult buildFlatBufferModule(
-    IREE::VM::TargetOptions vmOptions,
-    IREE::VM::BytecodeTargetOptions bytecodeOptions,
-    IREE::VM::ModuleOp moduleOp, MutableArrayRef<RodataRef> rodataRefs,
-    FlatbufferBuilder &fbb) {
+static LogicalResult
+buildFlatBufferModule(IREE::VM::TargetOptions vmOptions,
+                      IREE::VM::BytecodeTargetOptions bytecodeOptions,
+                      IREE::VM::ModuleOp moduleOp,
+                      MutableArrayRef<RodataRef> rodataRefs,
+                      FlatbufferBuilder &fbb) {
   // Start the buffer so that we can begin recording data prior to the root
   // table (which we do at the very end). This does not change the layout of the
   // file and is only used to prime the flatcc builder.
@@ -411,7 +419,8 @@
       flatbuffers_uint8_vec_ref_t embeddedRef = serializeEmbeddedData(
           rodataRef.rodataOp.getLoc(), rodataRef.rodataOp.getValue(),
           rodataRef.alignment, rodataRef.totalSize, fbb);
-      if (!embeddedRef) return failure();
+      if (!embeddedRef)
+        return failure();
       iree_vm_RodataSegmentDef_start(fbb);
       iree_vm_RodataSegmentDef_embedded_data_add(fbb, embeddedRef);
       rodataSegmentRefs.push_back(iree_vm_RodataSegmentDef_end(fbb));
@@ -518,8 +527,10 @@
   // so that we can multi-version. For now the moduleRequirements will be the OR
   // of all functions.
   iree_vm_FeatureBits_enum_t allowedFeatures = 0;
-  if (vmOptions.f32Extension) allowedFeatures |= iree_vm_FeatureBits_EXT_F32;
-  if (vmOptions.f64Extension) allowedFeatures |= iree_vm_FeatureBits_EXT_F64;
+  if (vmOptions.f32Extension)
+    allowedFeatures |= iree_vm_FeatureBits_EXT_F32;
+  if (vmOptions.f64Extension)
+    allowedFeatures |= iree_vm_FeatureBits_EXT_F64;
   if ((moduleRequirements & allowedFeatures) != moduleRequirements) {
     return moduleOp.emitError()
            << "module uses features not allowed by flags (requires "
@@ -550,10 +561,11 @@
   return success();
 }
 
-LogicalResult translateModuleToBytecode(
-    IREE::VM::ModuleOp moduleOp, IREE::VM::TargetOptions vmOptions,
-    IREE::VM::BytecodeTargetOptions bytecodeOptions,
-    llvm::raw_ostream &output) {
+LogicalResult
+translateModuleToBytecode(IREE::VM::ModuleOp moduleOp,
+                          IREE::VM::TargetOptions vmOptions,
+                          IREE::VM::BytecodeTargetOptions bytecodeOptions,
+                          llvm::raw_ostream &output) {
   moduleOp.getContext()->getOrLoadDialect<IREE::Util::UtilDialect>();
 
   if (failed(canonicalizeModule(bytecodeOptions, moduleOp))) {
@@ -676,10 +688,11 @@
   return success();
 }
 
-LogicalResult translateModuleToBytecode(
-    mlir::ModuleOp outerModuleOp, IREE::VM::TargetOptions vmOptions,
-    IREE::VM::BytecodeTargetOptions bytecodeOptions,
-    llvm::raw_ostream &output) {
+LogicalResult
+translateModuleToBytecode(mlir::ModuleOp outerModuleOp,
+                          IREE::VM::TargetOptions vmOptions,
+                          IREE::VM::BytecodeTargetOptions bytecodeOptions,
+                          llvm::raw_ostream &output) {
   auto moduleOps = outerModuleOp.getOps<IREE::VM::ModuleOp>();
   if (moduleOps.empty()) {
     return outerModuleOp.emitError()
@@ -731,7 +744,7 @@
           "(only applies to binary targets)"));
 }
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.h b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.h
index aeed3c1..35a4dc2 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.h

@@ -73,9 +73,9 @@
     mlir::ModuleOp outerModuleOp, IREE::VM::TargetOptions vmOptions,
     IREE::VM::BytecodeTargetOptions bytecodeOptions, llvm::raw_ostream &output);
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_TARGET_BYTECODE_BYTECODEMODULETARGET_H_
+#endif // IREE_COMPILER_DIALECT_VM_TARGET_BYTECODE_BYTECODEMODULETARGET_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/DebugDatabaseBuilder.cpp b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/DebugDatabaseBuilder.cpp
index 9acc6f2..e7a2db4 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/DebugDatabaseBuilder.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/DebugDatabaseBuilder.cpp

@@ -37,7 +37,8 @@
   // Inserts a string into the location table string subtable if needed.
   flatbuffers_string_ref_t insert(StringRef value) {
     auto it = strings.find(value);
-    if (it != strings.end()) return it->second;
+    if (it != strings.end())
+      return it->second;
     auto stringRef = fbb.createString(value);
     strings[value] = stringRef;
     return stringRef;
@@ -47,7 +48,8 @@
   // Returns the ordinal of the location in the table.
   int32_t insert(Location baseLoc) {
     auto it = map.find(baseLoc);
-    if (it != map.end()) return it->second;
+    if (it != map.end())
+      return it->second;
     auto locationRef =
         llvm::TypeSwitch<Location, iree_vm_LocationTypeDef_union_ref_t>(baseLoc)
             .Case([&](CallSiteLoc loc) {
@@ -102,9 +104,10 @@
   }
 };
 
-iree_vm_DebugDatabaseDef_ref_t DebugDatabaseBuilder::build(
-    FlatbufferBuilder &fbb) {
-  if (functionSourceMaps.empty()) return 0;
+iree_vm_DebugDatabaseDef_ref_t
+DebugDatabaseBuilder::build(FlatbufferBuilder &fbb) {
+  if (functionSourceMaps.empty())
+    return 0;
 
   LocationTable locationTable(fbb);
 
@@ -137,7 +140,7 @@
   return iree_vm_DebugDatabaseDef_end(fbb);
 }
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/DebugDatabaseBuilder.h b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/DebugDatabaseBuilder.h
index 55bbc04..47d6363 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/DebugDatabaseBuilder.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/DebugDatabaseBuilder.h

@@ -29,7 +29,7 @@
 };
 
 class DebugDatabaseBuilder {
- public:
+public:
   // Appends a function source map entry to the debug database.
   void addFunctionSourceMap(IREE::VM::FuncOp funcOp,
                             FunctionSourceMap sourceMap);
@@ -37,14 +37,14 @@
   // Finishes construction of the debug database and emits it to the FlatBuffer.
   iree_vm_DebugDatabaseDef_ref_t build(FlatbufferBuilder &fbb);
 
- private:
+private:
   // Function source maps ordered by function ordinal.
   SmallVector<FunctionSourceMap> functionSourceMaps;
 };
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_TARGET_BYTECODE_DEBUGDATABASEBUILDER_H_
+#endif // IREE_COMPILER_DIALECT_VM_TARGET_BYTECODE_DEBUGDATABASEBUILDER_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/TranslationRegistration.cpp b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/TranslationRegistration.cpp
index 35879f8..c7750a3 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/TranslationRegistration.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/TranslationRegistration.cpp

@@ -25,7 +25,7 @@
       });
 }
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/C/CModuleTarget.cpp b/compiler/src/iree/compiler/Dialect/VM/Target/C/CModuleTarget.cpp
index cd9b31e..2645d33 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/C/CModuleTarget.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/C/CModuleTarget.cpp

@@ -38,11 +38,12 @@
          << std::string(77, '=') << "\n";
 }
 
-static LogicalResult printFunctionDeclaration(
-    mlir::func::FuncOp funcOp, llvm::raw_ostream &output,
-    mlir::emitc::CppEmitter &emitter) {
+static LogicalResult
+printFunctionDeclaration(mlir::func::FuncOp funcOp, llvm::raw_ostream &output,
+                         mlir::emitc::CppEmitter &emitter) {
   Operation *op = funcOp.getOperation();
-  if (op->hasAttr("emitc.static")) output << "static ";
+  if (op->hasAttr("emitc.static"))
+    output << "static ";
 
   if (failed(emitter.emitTypes(funcOp.getLoc(),
                                funcOp.getFunctionType().getResults())))
@@ -53,9 +54,11 @@
 
   bool error = false;
   llvm::interleaveComma(funcOp.getArguments(), output, [&](BlockArgument arg) {
-    if (failed(emitter.emitType(funcOp.getLoc(), arg.getType()))) error = true;
+    if (failed(emitter.emitType(funcOp.getLoc(), arg.getType())))
+      error = true;
   });
-  if (error) return failure();
+  if (error)
+    return failure();
   output << ");\n";
 
   return success();
@@ -81,7 +84,8 @@
         rodataOp.getAlignment()
             ? static_cast<size_t>(rodataOp.getAlignment().value())
             : 0;
-    if (alignment == 0) alignment = kDefaultRodataAlignment;
+    if (alignment == 0)
+      alignment = kDefaultRodataAlignment;
 
     std::string bufferName =
         moduleOp.getName().str() + "_" + rodataOp.getName().str();
@@ -297,8 +301,9 @@
 }
 
 /// Adapted from BytecodeModuleTarget and extended by C specific passes
-static LogicalResult canonicalizeModule(
-    IREE::VM::ModuleOp moduleOp, IREE::VM::CTargetOptions targetOptions) {
+static LogicalResult
+canonicalizeModule(IREE::VM::ModuleOp moduleOp,
+                   IREE::VM::CTargetOptions targetOptions) {
   RewritePatternSet patterns(moduleOp.getContext());
   ConversionTarget target(*moduleOp.getContext());
   target.addLegalDialect<IREE::VM::VMDialect>();
@@ -416,7 +421,8 @@
   mlir::emitc::CppEmitter emitter(output, /*declareVariablesAtTop=*/true);
   for (auto funcOp : moduleOp.getOps<mlir::func::FuncOp>()) {
     Operation *op = funcOp.getOperation();
-    if (!op->hasAttr("vm.module.constructor")) continue;
+    if (!op->hasAttr("vm.module.constructor"))
+      continue;
     if (failed(printFunctionDeclaration(funcOp, output, emitter)))
       return failure();
   }
@@ -457,7 +463,8 @@
 
   for (auto funcOp : moduleOp.getOps<mlir::func::FuncOp>()) {
     Operation *op = funcOp.getOperation();
-    if (op->hasAttr("vm.module.constructor")) continue;
+    if (op->hasAttr("vm.module.constructor"))
+      continue;
     if (failed(printFunctionDeclaration(funcOp, output, emitter)))
       return failure();
   }
@@ -469,9 +476,12 @@
     // TODO(simon-camp): Clean up. We generate calls to a macro that defines a
     // struct. As we declare all variables at the start of the function, the
     // macro call cannot be inlined into the function.
-    if (!isa<mlir::func::FuncOp, emitc::CallOp>(op)) continue;
-    if (op.hasAttr("vm.emit_at_end")) continue;
-    if (op.hasAttr("emitc.static")) output << "static ";
+    if (!isa<mlir::func::FuncOp, emitc::CallOp>(op))
+      continue;
+    if (op.hasAttr("vm.emit_at_end"))
+      continue;
+    if (op.hasAttr("emitc.static"))
+      output << "static ";
     if (failed(emitter.emitOperation(op,
                                      /*trailingSemicolon=*/false)))
       return failure();
@@ -487,8 +497,10 @@
   // Emit code for functions marked with `vm.emit_at_end`.
   for (auto funcOp : moduleOp.getOps<mlir::func::FuncOp>()) {
     Operation *op = funcOp.getOperation();
-    if (!op->hasAttr("vm.emit_at_end")) continue;
-    if (op->hasAttr("emitc.static")) output << "static ";
+    if (!op->hasAttr("vm.emit_at_end"))
+      continue;
+    if (op->hasAttr("emitc.static"))
+      output << "static ";
     if (failed(emitter.emitOperation(*funcOp.getOperation(),
                                      /*trailingSemicolon=*/false)))
       return failure();
@@ -509,7 +521,7 @@
   return translateModuleToC(*moduleOps.begin(), targetOptions, output);
 }
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/C/CModuleTarget.h b/compiler/src/iree/compiler/Dialect/VM/Target/C/CModuleTarget.h
index 5eafef9..059b111 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/C/CModuleTarget.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/C/CModuleTarget.h

@@ -47,9 +47,9 @@
                                  CTargetOptions targetOptions,
                                  llvm::raw_ostream &output);
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_TARGET_C_CMODULETARGET_H_
+#endif // IREE_COMPILER_DIALECT_VM_TARGET_C_CMODULETARGET_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/C/TranslationFlags.cpp b/compiler/src/iree/compiler/Dialect/VM/Target/C/TranslationFlags.cpp
index 65086c6..fcec91f 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/C/TranslationFlags.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/C/TranslationFlags.cpp

@@ -44,7 +44,7 @@
   return targetOptions;
 }
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/C/TranslationFlags.h b/compiler/src/iree/compiler/Dialect/VM/Target/C/TranslationFlags.h
index 6f54cc4..d136932 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/C/TranslationFlags.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/C/TranslationFlags.h

@@ -18,9 +18,9 @@
 // --iree-vm-c-* flags.
 CTargetOptions getCTargetOptionsFromFlags();
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_TARGET_C_TRANSLATIONFLAGS_H_
+#endif // IREE_COMPILER_DIALECT_VM_TARGET_C_TRANSLATIONFLAGS_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/C/TranslationRegistration.cpp b/compiler/src/iree/compiler/Dialect/VM/Target/C/TranslationRegistration.cpp
index 463fd92..6b4abe0 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/C/TranslationRegistration.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/C/TranslationRegistration.cpp

@@ -22,7 +22,7 @@
       });
 }
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/init_targets.h b/compiler/src/iree/compiler/Dialect/VM/Target/init_targets.h
index 795414d..289196d 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/init_targets.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/init_targets.h

@@ -16,9 +16,9 @@
 void registerToVMBytecodeTranslation();
 #ifdef IREE_HAVE_C_OUTPUT_FORMAT
 void registerToCTranslation();
-#endif  // IREE_HAVE_C_OUTPUT_FORMAT
-}  // namespace VM
-}  // namespace IREE
+#endif // IREE_HAVE_C_OUTPUT_FORMAT
+} // namespace VM
+} // namespace IREE
 
 // This function should be called before creating any MLIRContext if one
 // expects all the possible target backends to be available. Custom tools can
@@ -30,14 +30,14 @@
 
 #ifdef IREE_HAVE_C_OUTPUT_FORMAT
     IREE::VM::registerToCTranslation();
-#endif  // IREE_HAVE_C_OUTPUT_FORMAT
+#endif // IREE_HAVE_C_OUTPUT_FORMAT
 
     return true;
   }();
   (void)init_once;
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_TARGET_INIT_TARGETS_H_
+#endif // IREE_COMPILER_DIALECT_VM_TARGET_INIT_TARGETS_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/Tools/VMOpEncoderGen.cpp b/compiler/src/iree/compiler/Dialect/VM/Tools/VMOpEncoderGen.cpp
index e16310c..94456c9 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Tools/VMOpEncoderGen.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Tools/VMOpEncoderGen.cpp

@@ -36,9 +36,11 @@
 
   auto defs = recordKeeper.getAllDerivedDefinitions("VM_Op");
   for (const auto *def : defs) {
-    if (def->isValueUnset("encoding")) continue;
+    if (def->isValueUnset("encoding"))
+      continue;
     auto encodingExprs = def->getValueAsListOfDefs("encoding");
-    if (encodingExprs.empty()) continue;
+    if (encodingExprs.empty())
+      continue;
 
     Operator op(def);
     tblgen::NamespaceEmitter emitter(os, op.getDialect());
@@ -90,13 +92,13 @@
   return false;
 }
 
-static GenRegistration genVMOpEncoderDefs(
-    "gen-iree-vm-op-encoder-defs",
-    "Generates IREE VM operation encoder definitions (.cpp)",
-    [](const llvm::RecordKeeper &records, raw_ostream &os) {
-      return emitEncodeFnDefs(records, os);
-    });
+static GenRegistration
+    genVMOpEncoderDefs("gen-iree-vm-op-encoder-defs",
+                       "Generates IREE VM operation encoder definitions (.cpp)",
+                       [](const llvm::RecordKeeper &records, raw_ostream &os) {
+                         return emitEncodeFnDefs(records, os);
+                       });
 
-}  // namespace
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Tools/VMOpTableGen.cpp b/compiler/src/iree/compiler/Dialect/VM/Tools/VMOpTableGen.cpp
index 3def421..aaee3b4 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Tools/VMOpTableGen.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Tools/VMOpTableGen.cpp

@@ -80,6 +80,6 @@
       return emitOpTableDefs(records, os);
     });
 
-}  // namespace
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Transforms/Conversion.cpp b/compiler/src/iree/compiler/Dialect/VM/Transforms/Conversion.cpp
index 87f4622..cca462e 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Transforms/Conversion.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Transforms/Conversion.cpp

@@ -40,7 +40,7 @@
 // We should add native VM ops for supporting them.
 template <typename OpTy, arith::CmpIPredicate pred>
 struct MaxMinIOpConverter : public OpRewritePattern<OpTy> {
- public:
+public:
   using OpRewritePattern<OpTy>::OpRewritePattern;
   LogicalResult matchAndRewrite(OpTy op,
                                 PatternRewriter &rewriter) const final {
@@ -72,9 +72,11 @@
       // Generic dialect lookup.
       dialect = op->getDialect();
     }
-    if (!dialect) return;
+    if (!dialect)
+      return;
     auto *dialectInterface = dialect->getRegisteredInterface<T>();
-    if (!dialectInterface) return;
+    if (!dialectInterface)
+      return;
     resultSet.insert(dialectInterface);
   });
 
@@ -89,12 +91,12 @@
   return results;
 }
 
-}  // namespace
+} // namespace
 
 // Runs conversion with registered input dialects.
 class ConversionPass
     : public PassWrapper<ConversionPass, OperationPass<mlir::ModuleOp>> {
- public:
+public:
   explicit ConversionPass(TargetOptions targetOptions)
       : targetOptions_(targetOptions) {}
 
@@ -111,7 +113,8 @@
   }
 
   void runOnOperation() override {
-    if (getOperation().getBody()->empty()) return;
+    if (getOperation().getBody()->empty())
+      return;
 
     auto *context = &getContext();
     VMConversionTarget conversionTarget(context);
@@ -175,12 +178,12 @@
     }
   }
 
- private:
+private:
   TargetOptions targetOptions_;
 };
 
-std::unique_ptr<OperationPass<mlir::ModuleOp>> createConversionPass(
-    TargetOptions targetOptions) {
+std::unique_ptr<OperationPass<mlir::ModuleOp>>
+createConversionPass(TargetOptions targetOptions) {
   return std::make_unique<ConversionPass>(targetOptions);
 }
 
@@ -191,7 +194,7 @@
       return std::make_unique<ConversionPass>(options);
     });
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Transforms/DeduplicateRodata.cpp b/compiler/src/iree/compiler/Dialect/VM/Transforms/DeduplicateRodata.cpp
index 2b99760..0cbfdfd 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Transforms/DeduplicateRodata.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Transforms/DeduplicateRodata.cpp

@@ -24,7 +24,7 @@
 class DeduplicateRodataPass
     : public PassWrapper<DeduplicateRodataPass,
                          OperationPass<IREE::VM::ModuleOp>> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<IREE::VM::VMDialect>();
   }
@@ -101,7 +101,7 @@
 
 static PassRegistration<DeduplicateRodataPass> pass;
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Transforms/DropEmptyModuleInitializers.cpp b/compiler/src/iree/compiler/Dialect/VM/Transforms/DropEmptyModuleInitializers.cpp
index 47b4195..a58a2a4 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Transforms/DropEmptyModuleInitializers.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Transforms/DropEmptyModuleInitializers.cpp

@@ -27,12 +27,12 @@
           &funcOp.front().front() == funcOp.front().getTerminator());
 }
 
-}  // namespace
+} // namespace
 
 class DropEmptyModuleInitializersPass
     : public PassWrapper<DropEmptyModuleInitializersPass,
                          OperationPass<IREE::VM::ModuleOp>> {
- public:
+public:
   StringRef getArgument() const override {
     return "iree-vm-drop-empty-module-initializers";
   }
@@ -55,7 +55,8 @@
     auto initFuncOp = symbolTable.lookup<IREE::VM::FuncOp>("__init");
     if (initFuncOp && isFuncEmpty(initFuncOp)) {
       auto exportOp = exportOps[initFuncOp.getName()];
-      if (exportOp) exportOp.erase();
+      if (exportOp)
+        exportOp.erase();
       initFuncOp.erase();
     }
 
@@ -63,7 +64,8 @@
     auto deinitFuncOp = symbolTable.lookup<IREE::VM::FuncOp>("__deinit");
     if (deinitFuncOp && isFuncEmpty(deinitFuncOp)) {
       auto exportOp = exportOps[deinitFuncOp.getName()];
-      if (exportOp) exportOp.erase();
+      if (exportOp)
+        exportOp.erase();
       deinitFuncOp.erase();
     }
   }
@@ -76,7 +78,7 @@
 
 static PassRegistration<DropEmptyModuleInitializersPass> pass;
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Transforms/GlobalInitialization.cpp b/compiler/src/iree/compiler/Dialect/VM/Transforms/GlobalInitialization.cpp
index 51d5bcc..532ee70 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Transforms/GlobalInitialization.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Transforms/GlobalInitialization.cpp

@@ -29,9 +29,9 @@
 // The returned op builder will be set at an insertion point where new
 // operations can be added that are guaranteed to execute in the CFG. The
 // caller must insert a return op at the insertion point when done.
-static std::tuple<IREE::VM::FuncOp, OpBuilder> appendOrCreateInitFuncOp(
-    IREE::VM::ModuleOp moduleOp, StringRef name, SymbolTable &symbolTable,
-    OpBuilder &moduleBuilder) {
+static std::tuple<IREE::VM::FuncOp, OpBuilder>
+appendOrCreateInitFuncOp(IREE::VM::ModuleOp moduleOp, StringRef name,
+                         SymbolTable &symbolTable, OpBuilder &moduleBuilder) {
   IREE::VM::FuncOp funcOp = symbolTable.lookup<IREE::VM::FuncOp>(name);
   OpBuilder funcBuilder(moduleOp.getContext());
   if (!funcOp) {
@@ -92,7 +92,8 @@
   SmallVector<Operation *> deadOps;
   for (auto &op : moduleOp->getRegion(0).front()) {
     auto globalOp = dyn_cast<IREE::Util::GlobalOpInterface>(op);
-    if (!globalOp) continue;
+    if (!globalOp)
+      continue;
     if (!cast<SymbolOpInterface>(op).isPrivate()) {
       // May be used outside the module; treat as used and mutable.
       globalOp.setGlobalMutable(true);
@@ -126,7 +127,7 @@
   }
 }
 
-}  // namespace
+} // namespace
 
 // Finds all global variables and moves their inital values/initializer calls
 // into a single function. Relies on the inliner to later make the uber function
@@ -143,7 +144,7 @@
 class GlobalInitializationPass
     : public PassWrapper<GlobalInitializationPass,
                          OperationPass<IREE::VM::ModuleOp>> {
- public:
+public:
   StringRef getArgument() const override {
     return "iree-vm-global-initialization";
   }
@@ -222,9 +223,10 @@
     exportFuncIfNeeded(moduleOp, deinitFuncOp);
   }
 
- private:
-  LogicalResult appendPrimitiveInitialization(
-      IREE::Util::GlobalOpInterface globalOp, OpBuilder &builder) {
+private:
+  LogicalResult
+  appendPrimitiveInitialization(IREE::Util::GlobalOpInterface globalOp,
+                                OpBuilder &builder) {
     auto initialValue = globalOp.getGlobalInitialValue();
     Value value = {};
     if (initialValue) {
@@ -256,14 +258,14 @@
         return {success(), {}};
       }
       switch (integerAttr.getType().getIntOrFloatBitWidth()) {
-        case 32:
-          return {success(),
-                  builder.createOrFold<IREE::VM::ConstI32Op>(loc, integerAttr)};
-        case 64:
-          return {success(),
-                  builder.createOrFold<IREE::VM::ConstI64Op>(loc, integerAttr)};
-        default:
-          return {failure(), {}};
+      case 32:
+        return {success(),
+                builder.createOrFold<IREE::VM::ConstI32Op>(loc, integerAttr)};
+      case 64:
+        return {success(),
+                builder.createOrFold<IREE::VM::ConstI64Op>(loc, integerAttr)};
+      default:
+        return {failure(), {}};
       }
     } else if (auto floatAttr = llvm::dyn_cast<FloatAttr>(value)) {
       if (floatAttr.getValue().isZero()) {
@@ -271,14 +273,14 @@
         return {success(), {}};
       }
       switch (floatAttr.getType().getIntOrFloatBitWidth()) {
-        case 32:
-          return {success(),
-                  builder.createOrFold<IREE::VM::ConstF32Op>(loc, floatAttr)};
-        case 64:
-          return {success(),
-                  builder.createOrFold<IREE::VM::ConstF64Op>(loc, floatAttr)};
-        default:
-          return {failure(), {}};
+      case 32:
+        return {success(),
+                builder.createOrFold<IREE::VM::ConstF32Op>(loc, floatAttr)};
+      case 64:
+        return {success(),
+                builder.createOrFold<IREE::VM::ConstF64Op>(loc, floatAttr)};
+      default:
+        return {failure(), {}};
       }
     }
     return {failure(), {}};
@@ -289,25 +291,25 @@
                                      Value value, OpBuilder &builder) {
     if (auto integerType = llvm::dyn_cast<IntegerType>(value.getType())) {
       switch (integerType.getIntOrFloatBitWidth()) {
-        case 32:
-          builder.create<IREE::VM::GlobalStoreI32Op>(loc, value, symName);
-          return success();
-        case 64:
-          builder.create<IREE::VM::GlobalStoreI64Op>(loc, value, symName);
-          return success();
-        default:
-          return failure();
+      case 32:
+        builder.create<IREE::VM::GlobalStoreI32Op>(loc, value, symName);
+        return success();
+      case 64:
+        builder.create<IREE::VM::GlobalStoreI64Op>(loc, value, symName);
+        return success();
+      default:
+        return failure();
       }
     } else if (auto floatType = llvm::dyn_cast<FloatType>(value.getType())) {
       switch (floatType.getIntOrFloatBitWidth()) {
-        case 32:
-          builder.create<IREE::VM::GlobalStoreF32Op>(loc, value, symName);
-          return success();
-        case 64:
-          builder.create<IREE::VM::GlobalStoreF64Op>(loc, value, symName);
-          return success();
-        default:
-          return failure();
+      case 32:
+        builder.create<IREE::VM::GlobalStoreF32Op>(loc, value, symName);
+        return success();
+      case 64:
+        builder.create<IREE::VM::GlobalStoreF64Op>(loc, value, symName);
+        return success();
+      default:
+        return failure();
       }
     }
     return failure();
@@ -341,7 +343,7 @@
 
 static PassRegistration<GlobalInitializationPass> pass;
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Transforms/HoistInlinedRodata.cpp b/compiler/src/iree/compiler/Dialect/VM/Transforms/HoistInlinedRodata.cpp
index 2fcf88c..9bdbdf7 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Transforms/HoistInlinedRodata.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Transforms/HoistInlinedRodata.cpp

@@ -24,7 +24,7 @@
 class HoistInlinedRodataPass
     : public PassWrapper<HoistInlinedRodataPass,
                          OperationPass<IREE::VM::ModuleOp>> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<IREE::VM::VMDialect>();
   }
@@ -71,7 +71,7 @@
     }
   }
 
- private:
+private:
   Operation *findParentContainer(IREE::VM::RodataInlineOp inlineOp) {
     if (auto parentOp = inlineOp->getParentOfType<IREE::VM::InitializerOp>()) {
       return parentOp;
@@ -111,7 +111,7 @@
 
 static PassRegistration<HoistInlinedRodataPass> pass;
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Transforms/OrdinalAllocation.cpp b/compiler/src/iree/compiler/Dialect/VM/Transforms/OrdinalAllocation.cpp
index f71deb9..3868644 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Transforms/OrdinalAllocation.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Transforms/OrdinalAllocation.cpp

@@ -42,7 +42,7 @@
 // clustered together to make use of paging in memory mapped files.
 class OrdinalAllocationPass
     : public PassWrapper<OrdinalAllocationPass, OperationPass<ModuleOp>> {
- public:
+public:
   StringRef getArgument() const override {
     return "iree-vm-ordinal-allocation";
   }
@@ -94,7 +94,8 @@
     int globalBytes = 0;
     for (auto sizeGlobalOps : llvm::enumerate(primitiveGlobalOps)) {
       size_t storageSize = sizeGlobalOps.index();
-      if (sizeGlobalOps.value().empty()) continue;
+      if (sizeGlobalOps.value().empty())
+        continue;
       nextGlobalBytesOrdinal =
           llvm::alignTo(nextGlobalBytesOrdinal, storageSize);
       for (auto &globalOp : sizeGlobalOps.value()) {
@@ -140,7 +141,7 @@
 
 static PassRegistration<OrdinalAllocationPass> pass;
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Transforms/Passes.cpp b/compiler/src/iree/compiler/Dialect/VM/Transforms/Passes.cpp
index fd68b19..5b16de6 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Transforms/Passes.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Transforms/Passes.cpp

@@ -138,7 +138,7 @@
       });
 }
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Transforms/Passes.h b/compiler/src/iree/compiler/Dialect/VM/Transforms/Passes.h
index 8870c9d..1d9fe3f 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Transforms/Passes.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Transforms/Passes.h

@@ -44,8 +44,8 @@
 //===----------------------------------------------------------------------===//
 
 // Converts from various dialects (standard, HAL, etc) to the VM dialect.
-std::unique_ptr<OperationPass<mlir::ModuleOp>> createConversionPass(
-    TargetOptions targetOptions);
+std::unique_ptr<OperationPass<mlir::ModuleOp>>
+createConversionPass(TargetOptions targetOptions);
 
 //===----------------------------------------------------------------------===//
 // Module layout
@@ -118,9 +118,9 @@
   createConvertStandardToVMTestPass();
 }
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_TRANSFORMS_PASSES_H_
+#endif // IREE_COMPILER_DIALECT_VM_TRANSFORMS_PASSES_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/Transforms/ResolveRodataLoads.cpp b/compiler/src/iree/compiler/Dialect/VM/Transforms/ResolveRodataLoads.cpp
index c38838e..2c02834 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Transforms/ResolveRodataLoads.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Transforms/ResolveRodataLoads.cpp

@@ -30,8 +30,9 @@
 
 // Returns the vm.rodata that is stored into the global.
 // Returns nullptr if the rodata values stored differ across multiple stores.
-static IREE::VM::RodataOp findUniformlyStoredRodata(
-    Explorer &explorer, const Explorer::GlobalInfo *globalInfo) {
+static IREE::VM::RodataOp
+findUniformlyStoredRodata(Explorer &explorer,
+                          const Explorer::GlobalInfo *globalInfo) {
   // This will be the first op found; we'll use it to lookup the rodata.
   IREE::VM::RodataOp uniformRodataOp;
   for (auto storeOp : globalInfo->getStores()) {
@@ -65,9 +66,11 @@
                                 const Explorer::GlobalInfo *globalInfo,
                                 DenseSet<Operation *> &deadOps) {
   // Ignore indirect/unanalyzable globals.
-  if (globalInfo->isIndirect) return;
+  if (globalInfo->isIndirect)
+    return;
   // Ignore mutable globals, as they could be changed to various values.
-  if (globalInfo->op.isGlobalMutable()) return;
+  if (globalInfo->op.isGlobalMutable())
+    return;
 
   // If there are no stores to the global then it's always null.
   if (globalInfo->getStores().empty()) {
@@ -86,7 +89,8 @@
   // the program (there may be multiple initializers or control flow that
   // determines the stored value).
   auto rodataOp = findUniformlyStoredRodata(explorer, globalInfo);
-  if (!rodataOp) return;
+  if (!rodataOp)
+    return;
 
   // All stores to the global are of the same rodata.
   // Replace all of the loads with direct references to the rodata and then
@@ -109,7 +113,7 @@
 class ResolveRodataLoadsPass
     : public PassWrapper<ResolveRodataLoadsPass,
                          OperationPass<IREE::VM::ModuleOp>> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<IREE::VM::VMDialect>();
   }
@@ -144,7 +148,8 @@
     });
 
     // Erase all ops after we're done iterating them.
-    for (auto *deadOp : deadOps) deadOp->erase();
+    for (auto *deadOp : deadOps)
+      deadOp->erase();
   }
 };
 
@@ -155,7 +160,7 @@
 
 static PassRegistration<ResolveRodataLoadsPass> pass;
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Transforms/SinkDefiningOps.cpp b/compiler/src/iree/compiler/Dialect/VM/Transforms/SinkDefiningOps.cpp
index f8a83a9..d455fe4 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Transforms/SinkDefiningOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Transforms/SinkDefiningOps.cpp

@@ -25,7 +25,7 @@
 
 class SinkDefiningOpsPass
     : public PassWrapper<SinkDefiningOpsPass, OperationPass<ModuleOp>> {
- public:
+public:
   StringRef getArgument() const override { return "iree-vm-sink-defining-ops"; }
 
   StringRef getDescription() const override {
@@ -90,7 +90,7 @@
 
 static PassRegistration<SinkDefiningOpsPass> pass;
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Utils/CallingConvention.cpp b/compiler/src/iree/compiler/Dialect/VM/Utils/CallingConvention.cpp
index ee5b618..f3466b7 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Utils/CallingConvention.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Utils/CallingConvention.cpp

@@ -31,23 +31,23 @@
     return success();
   } else if (auto integerType = llvm::dyn_cast<IntegerType>(type)) {
     switch (integerType.getIntOrFloatBitWidth()) {
-      default:
-      case 32:
-        s.push_back('i');
-        return success();
-      case 64:
-        s.push_back('I');
-        return success();
+    default:
+    case 32:
+      s.push_back('i');
+      return success();
+    case 64:
+      s.push_back('I');
+      return success();
     }
   } else if (auto floatType = llvm::dyn_cast<FloatType>(type)) {
     switch (floatType.getIntOrFloatBitWidth()) {
-      default:
-      case 32:
-        s.push_back('f');
-        return success();
-      case 64:
-        s.push_back('F');
-        return success();
+    default:
+    case 32:
+      s.push_back('f');
+      return success();
+    case 64:
+      s.push_back('F');
+      return success();
     }
   } else if (auto tupleType = llvm::dyn_cast<TupleType>(type)) {
     // Flatten tuple (so tuple<i32, i64> -> `...iI...`).
@@ -74,11 +74,11 @@
   return result;
 }
 
-std::optional<std::string> makeImportCallingConventionString(
-    IREE::VM::ImportOp importOp) {
+std::optional<std::string>
+makeImportCallingConventionString(IREE::VM::ImportOp importOp) {
   auto functionType = importOp.getFunctionType();
   if (functionType.getNumInputs() == 0 && functionType.getNumResults() == 0) {
-    return std::string("0v_v");  // Valid but empty.
+    return std::string("0v_v"); // Valid but empty.
   }
 
   SmallVector<char, 8> s = {'0'};
@@ -113,11 +113,11 @@
   return std::string(s.data(), s.size());
 }
 
-std::optional<std::string> makeCallingConventionString(
-    IREE::VM::FuncOp funcOp) {
+std::optional<std::string>
+makeCallingConventionString(IREE::VM::FuncOp funcOp) {
   auto functionType = funcOp.getFunctionType();
   if (functionType.getNumInputs() == 0 && functionType.getNumResults() == 0) {
-    return std::string("0v_v");  // Valid but empty.
+    return std::string("0v_v"); // Valid but empty.
   }
 
   SmallVector<char, 8> s = {'0'};
@@ -145,7 +145,7 @@
   return std::string(s.data(), s.size());
 }
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Utils/CallingConvention.h b/compiler/src/iree/compiler/Dialect/VM/Utils/CallingConvention.h
index 1d17eca..36da1a5 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Utils/CallingConvention.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Utils/CallingConvention.h

@@ -20,16 +20,16 @@
 // This differs from makeCallingConventionString in that it supports variadic
 // arguments. Ideally we'd combine the two, but we only have this additional
 // metadata on IREE::VM::ImportOp.
-std::optional<std::string> makeImportCallingConventionString(
-    IREE::VM::ImportOp importOp);
+std::optional<std::string>
+makeImportCallingConventionString(IREE::VM::ImportOp importOp);
 
 // Generates a string encoding the function type for defining the
 // FunctionSignatureDef::calling_convention field for internal/export functions.
 std::optional<std::string> makeCallingConventionString(IREE::VM::FuncOp funcOp);
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_UTILS_CALLINGCONVENTION_H_
+#endif // IREE_COMPILER_DIALECT_VM_UTILS_CALLINGCONVENTION_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/Utils/ConstantEncoding.cpp b/compiler/src/iree/compiler/Dialect/VM/Utils/ConstantEncoding.cpp
index be96ad4..f8feb74 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Utils/ConstantEncoding.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Utils/ConstantEncoding.cpp

@@ -89,34 +89,34 @@
 
   if (auto attr = elementsAttr.dyn_cast<DenseIntElementsAttr>()) {
     switch (bitwidth) {
-      case 8:
-        serializeConstantI8Array(attr, alignment, dst);
-        break;
-      case 16:
-        serializeConstantI16Array(attr, alignment, dst);
-        break;
-      case 32:
-        serializeConstantI32Array(attr, alignment, dst);
-        break;
-      case 64:
-        serializeConstantI64Array(attr, alignment, dst);
-        break;
-      default:
-        return emitError(loc) << "unhandled element bitwidth " << bitwidth;
+    case 8:
+      serializeConstantI8Array(attr, alignment, dst);
+      break;
+    case 16:
+      serializeConstantI16Array(attr, alignment, dst);
+      break;
+    case 32:
+      serializeConstantI32Array(attr, alignment, dst);
+      break;
+    case 64:
+      serializeConstantI64Array(attr, alignment, dst);
+      break;
+    default:
+      return emitError(loc) << "unhandled element bitwidth " << bitwidth;
     }
   } else if (auto attr = elementsAttr.dyn_cast<DenseFPElementsAttr>()) {
     switch (bitwidth) {
-      case 16:
-        serializeConstantF16Array(attr, alignment, dst);
-        break;
-      case 32:
-        serializeConstantF32Array(attr, alignment, dst);
-        break;
-      case 64:
-        serializeConstantF64Array(attr, alignment, dst);
-        break;
-      default:
-        return emitError(loc) << "unhandled element bitwidth " << bitwidth;
+    case 16:
+      serializeConstantF16Array(attr, alignment, dst);
+      break;
+    case 32:
+      serializeConstantF32Array(attr, alignment, dst);
+      break;
+    case 64:
+      serializeConstantF64Array(attr, alignment, dst);
+      break;
+    default:
+      return emitError(loc) << "unhandled element bitwidth " << bitwidth;
     }
   } else {
     return emitError(loc) << "unimplemented attribute encoding: "
@@ -126,7 +126,7 @@
   return success();
 }
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Utils/ConstantEncoding.h b/compiler/src/iree/compiler/Dialect/VM/Utils/ConstantEncoding.h
index ac8521c..453dfd8 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Utils/ConstantEncoding.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Utils/ConstantEncoding.h

@@ -19,9 +19,9 @@
 LogicalResult serializeConstantArray(Location loc, ElementsAttr elementsAttr,
                                      size_t alignment, uint8_t *dst);
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_UTILS_CONSTANTENCODING_H_
+#endif // IREE_COMPILER_DIALECT_VM_UTILS_CONSTANTENCODING_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/Utils/TypeTable.cpp b/compiler/src/iree/compiler/Dialect/VM/Utils/TypeTable.cpp
index e873572..60d7027 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Utils/TypeTable.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Utils/TypeTable.cpp

@@ -20,7 +20,8 @@
     if (auto refPtrType = llvm::dyn_cast<IREE::VM::RefType>(type)) {
       type = refPtrType.getObjectType();
     }
-    if (typeMap.count(type)) return;
+    if (typeMap.count(type))
+      return;
     std::string str;
     llvm::raw_string_ostream sstream(str);
     type.print(sstream);
@@ -33,8 +34,10 @@
   };
   for (auto funcOp : moduleOp.getBlock().getOps<IREE::VM::FuncOp>()) {
     funcOp.walk([&](Operation *op) {
-      for (auto type : op->getOperandTypes()) tryInsertType(type);
-      for (auto type : op->getResultTypes()) tryInsertType(type);
+      for (auto type : op->getOperandTypes())
+        tryInsertType(type);
+      for (auto type : op->getResultTypes())
+        tryInsertType(type);
     });
   }
 
@@ -56,7 +59,7 @@
   return table;
 }
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Utils/TypeTable.h b/compiler/src/iree/compiler/Dialect/VM/Utils/TypeTable.h
index 837289c..e915c3d 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Utils/TypeTable.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Utils/TypeTable.h

@@ -26,9 +26,9 @@
 // the vector to the type represented by the type ordinal.
 std::vector<TypeDef> buildTypeTable(IREE::VM::ModuleOp moduleOp);
 
-}  // namespace VM
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VM_UTILS_TYPETABLE_H_
+#endif // IREE_COMPILER_DIALECT_VM_UTILS_TYPETABLE_H_

diff --git a/compiler/src/iree/compiler/Dialect/VMVX/Conversion/HALToVMVX/ConvertHALToVMVX.cpp b/compiler/src/iree/compiler/Dialect/VMVX/Conversion/HALToVMVX/ConvertHALToVMVX.cpp
index 7406f7d..c6cf39a 100644
--- a/compiler/src/iree/compiler/Dialect/VMVX/Conversion/HALToVMVX/ConvertHALToVMVX.cpp
+++ b/compiler/src/iree/compiler/Dialect/VMVX/Conversion/HALToVMVX/ConvertHALToVMVX.cpp

@@ -74,12 +74,12 @@
   }
 
   auto bufferType = IREE::Util::BufferType::get(funcOp.getContext());
-  auto bindingsType = IREE::Util::ListType::get(bufferType);  // of i8
+  auto bindingsType = IREE::Util::ListType::get(bufferType); // of i8
   auto i32Type = IntegerType::get(funcOp.getContext(), 32);
   auto newType = FunctionType::get(funcOp.getContext(),
                                    {
-                                       /*local_memory=*/bufferType,  // of i8
-                                       /*constants=*/bufferType,     // of i32
+                                       /*local_memory=*/bufferType, // of i8
+                                       /*constants=*/bufferType,    // of i32
                                        /*bindings=*/bindingsType,
                                        /*workgroup_id_x=*/i32Type,
                                        /*workgroup_id_y=*/i32Type,
@@ -107,9 +107,9 @@
 struct ConvertHALInterfaceWorkgroupIDOp
     : public OpConversionPattern<IREE::HAL::InterfaceWorkgroupIDOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::InterfaceWorkgroupIDOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::InterfaceWorkgroupIDOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     uint64_t dim = op.getDimension().getZExtValue();
     if (dim >= 3) {
       return op.emitOpError() << "out of bounds workgroup ID dimension";
@@ -131,9 +131,9 @@
 struct ConvertHALInterfaceWorkgroupSizeOp
     : public OpConversionPattern<IREE::HAL::InterfaceWorkgroupSizeOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::InterfaceWorkgroupSizeOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::InterfaceWorkgroupSizeOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     uint64_t dim = op.getDimension().getZExtValue();
     if (dim >= 3) {
       return op.emitOpError() << "out of bounds workgroup size dimension";
@@ -155,9 +155,9 @@
 struct ConvertHALInterfaceWorkgroupCountOp
     : public OpConversionPattern<IREE::HAL::InterfaceWorkgroupCountOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::InterfaceWorkgroupCountOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::InterfaceWorkgroupCountOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     uint64_t dim = op.getDimension().getZExtValue();
     if (dim >= 3) {
       return op.emitOpError() << "out of bounds workgroup count dimension";
@@ -178,9 +178,9 @@
 struct ConvertHALInterfaceConstantLoadOp
     : public OpConversionPattern<IREE::HAL::InterfaceConstantLoadOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::InterfaceConstantLoadOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::InterfaceConstantLoadOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Find the vmvx.interface argument to the function.
     auto constantsArg = op->getParentOfType<mlir::func::FuncOp>().getArgument(
         kEntryArgConstants);
@@ -207,9 +207,10 @@
 struct ConvertGetRawInterfaceBindingBufferOp
     : public OpConversionPattern<IREE::VMVX::GetRawInterfaceBindingBufferOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::VMVX::GetRawInterfaceBindingBufferOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::VMVX::GetRawInterfaceBindingBufferOp op,
+                  OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Find the vmvx.interface argument to the function.
     auto bindingsArg = op->getParentOfType<mlir::func::FuncOp>().getArgument(
         kEntryArgBindings);
@@ -239,9 +240,9 @@
 struct ConvertHALInterfaceBindingSubspanOp
     : public OpConversionPattern<IREE::HAL::InterfaceBindingSubspanOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::InterfaceBindingSubspanOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::InterfaceBindingSubspanOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Find the vmvx.interface argument to the function.
     auto bindingsArg = op->getParentOfType<mlir::func::FuncOp>().getArgument(
         kEntryArgBindings);
@@ -298,7 +299,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateHALToVMVXPatterns(MLIRContext *context,
                                ConversionTarget &conversionTarget,
@@ -316,5 +317,5 @@
   patterns.insert<ConvertHALInterfaceBindingSubspanOp>(typeConverter, context);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VMVX/Conversion/HALToVMVX/ConvertHALToVMVX.h b/compiler/src/iree/compiler/Dialect/VMVX/Conversion/HALToVMVX/ConvertHALToVMVX.h
index c83d1d3..57e6886 100644
--- a/compiler/src/iree/compiler/Dialect/VMVX/Conversion/HALToVMVX/ConvertHALToVMVX.h
+++ b/compiler/src/iree/compiler/Dialect/VMVX/Conversion/HALToVMVX/ConvertHALToVMVX.h

@@ -26,7 +26,7 @@
                                RewritePatternSet &patterns,
                                TypeConverter &typeConverter);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VMVX_CONVERSION_HALTOVMVX_CONVERTHALTOVMVX_H_
+#endif // IREE_COMPILER_DIALECT_VMVX_CONVERSION_HALTOVMVX_CONVERTHALTOVMVX_H_

diff --git a/compiler/src/iree/compiler/Dialect/VMVX/Conversion/StandardToVMVX/ConvertStandardToVMVX.cpp b/compiler/src/iree/compiler/Dialect/VMVX/Conversion/StandardToVMVX/ConvertStandardToVMVX.cpp
index e4f79be..cfc7607 100644
--- a/compiler/src/iree/compiler/Dialect/VMVX/Conversion/StandardToVMVX/ConvertStandardToVMVX.cpp
+++ b/compiler/src/iree/compiler/Dialect/VMVX/Conversion/StandardToVMVX/ConvertStandardToVMVX.cpp

@@ -33,9 +33,9 @@
 template <typename OpTy>
 struct FoldAsNoOp final : public OpConversionPattern<OpTy> {
   using OpConversionPattern<OpTy>::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      OpTy op, typename OpTy::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(OpTy op, typename OpTy::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOp(op, adaptor.getOperands());
     return success();
   }
@@ -46,9 +46,9 @@
 struct RemoveIdentityConversionCast final
     : public OpConversionPattern<UnrealizedConversionCastOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      UnrealizedConversionCastOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(UnrealizedConversionCastOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (op->getNumOperands() == 1 && op->getNumResults() == 1 &&
         adaptor.getOperands().front().getType() ==
             op->getResultTypes().front()) {
@@ -60,7 +60,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateStandardToVMVXPatterns(MLIRContext *context,
                                     RewritePatternSet &patterns,
@@ -72,5 +72,5 @@
   patterns.insert<RemoveIdentityConversionCast>(typeConverter, context);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VMVX/Conversion/StandardToVMVX/ConvertStandardToVMVX.h b/compiler/src/iree/compiler/Dialect/VMVX/Conversion/StandardToVMVX/ConvertStandardToVMVX.h
index e083a12..9526ae0 100644
--- a/compiler/src/iree/compiler/Dialect/VMVX/Conversion/StandardToVMVX/ConvertStandardToVMVX.h
+++ b/compiler/src/iree/compiler/Dialect/VMVX/Conversion/StandardToVMVX/ConvertStandardToVMVX.h

@@ -18,8 +18,8 @@
                                     RewritePatternSet &patterns,
                                     TypeConverter &typeConverter);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VMVX_CONVERSION_STANDARDTOVMVX_CONVERTSTANDARDTOVMVX_H_
-        // // NOLINT
+#endif // IREE_COMPILER_DIALECT_VMVX_CONVERSION_STANDARDTOVMVX_CONVERTSTANDARDTOVMVX_H_
+       // // NOLINT

diff --git a/compiler/src/iree/compiler/Dialect/VMVX/Conversion/VMVXToVM/ConvertVMVXToVM.cpp b/compiler/src/iree/compiler/Dialect/VMVX/Conversion/VMVXToVM/ConvertVMVXToVM.cpp
index 2e32ed9..15927df 100644
--- a/compiler/src/iree/compiler/Dialect/VMVX/Conversion/VMVXToVM/ConvertVMVXToVM.cpp
+++ b/compiler/src/iree/compiler/Dialect/VMVX/Conversion/VMVXToVM/ConvertVMVXToVM.cpp

@@ -31,13 +31,13 @@
 // as part of lowering (i.e. tagging or metadata ops that are unrepresentable
 // in the VM dialect).
 class EraseNonVMOp : public ConversionPattern {
- public:
+public:
   EraseNonVMOp(StringRef rootName, MLIRContext *ctx)
       : ConversionPattern(rootName, 0, ctx) {}
 
-  LogicalResult matchAndRewrite(
-      Operation *op, ArrayRef<Value> operands,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.eraseOp(op);
     return success();
   }
@@ -49,16 +49,15 @@
 // it.
 template <typename T>
 class VMVXImportOpConversion : public OpConversionPattern<T> {
- public:
+public:
   VMVXImportOpConversion(MLIRContext *context, SymbolTable &importSymbols,
                          TypeConverter &typeConverter)
-      : OpConversionPattern<T>(context),
-        importSymbols(importSymbols),
+      : OpConversionPattern<T>(context), importSymbols(importSymbols),
         typeConverter(typeConverter) {}
 
-  LogicalResult matchAndRewrite(
-      T op, typename T::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(T op, typename T::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     std::string importFqName = getImportFqName(op);
     auto importOp =
         importSymbols.template lookup<IREE::VM::ImportOp>(importFqName);
@@ -68,16 +67,17 @@
       return failure();
     }
     auto results = emitCall(op, adaptor, importOp, rewriter);
-    if (!results.has_value()) return failure();
+    if (!results.has_value())
+      return failure();
     rewriter.replaceOp(op, results.value());
     return success();
   }
 
- protected:
+protected:
   virtual std::string getImportFqName(T op) const = 0;
-  virtual std::optional<SmallVector<Value>> emitCall(
-      T op, typename T::Adaptor adaptor, IREE::VM::ImportOp importOp,
-      ConversionPatternRewriter &rewriter) const {
+  virtual std::optional<SmallVector<Value>>
+  emitCall(T op, typename T::Adaptor adaptor, IREE::VM::ImportOp importOp,
+           ConversionPatternRewriter &rewriter) const {
     return rewriteToCall(op, adaptor, importOp, typeConverter, rewriter);
   }
 
@@ -112,13 +112,13 @@
     return typePrefix + std::to_string(bitWidth);
   }
 
- private:
+private:
   SymbolTable &importSymbols;
   TypeConverter &typeConverter;
 };
 
 class BinaryOpConversion : public VMVXImportOpConversion<IREE::VMVX::BinaryOp> {
- public:
+public:
   using VMVXImportOpConversion::VMVXImportOpConversion;
 
   std::string getImportFqName(IREE::VMVX::BinaryOp op) const override {
@@ -135,7 +135,7 @@
 
 // Converts the vmvx.copy op to an appropriate typed import.
 class CopyOpConversion : public VMVXImportOpConversion<IREE::VMVX::CopyOp> {
- public:
+public:
   using VMVXImportOpConversion::VMVXImportOpConversion;
 
   std::string getImportFqName(IREE::VMVX::CopyOp op) const override {
@@ -150,7 +150,7 @@
 
 // Converts the vmvx.fill2d op to an appropriate typed import.
 class Fill2DOpConversion : public VMVXImportOpConversion<IREE::VMVX::Fill2DOp> {
- public:
+public:
   using VMVXImportOpConversion::VMVXImportOpConversion;
 
   std::string getImportFqName(IREE::VMVX::Fill2DOp op) const override {
@@ -161,7 +161,7 @@
 };
 
 class UnaryOpConversion : public VMVXImportOpConversion<IREE::VMVX::UnaryOp> {
- public:
+public:
   using VMVXImportOpConversion::VMVXImportOpConversion;
 
   std::string getImportFqName(IREE::VMVX::UnaryOp op) const override {
@@ -176,7 +176,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateVMVXToVMPatterns(MLIRContext *context,
                               ConversionTarget &conversionTarget,
@@ -187,5 +187,5 @@
                   UnaryOpConversion>(context, importSymbols, typeConverter);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VMVX/Conversion/VMVXToVM/ConvertVMVXToVM.h b/compiler/src/iree/compiler/Dialect/VMVX/Conversion/VMVXToVM/ConvertVMVXToVM.h
index db3a103..8b2102a 100644
--- a/compiler/src/iree/compiler/Dialect/VMVX/Conversion/VMVXToVM/ConvertVMVXToVM.h
+++ b/compiler/src/iree/compiler/Dialect/VMVX/Conversion/VMVXToVM/ConvertVMVXToVM.h

@@ -21,7 +21,7 @@
                               SymbolTable &importSymbols,
                               RewritePatternSet &patterns);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VMVX_CONVERSION_VMVXTOVM_CONVERTVMVXTOVM_H_
+#endif // IREE_COMPILER_DIALECT_VMVX_CONVERSION_VMVXTOVM_CONVERTVMVXTOVM_H_

diff --git a/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXDialect.cpp b/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXDialect.cpp
index e112caa..02af659 100644
--- a/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXDialect.cpp
+++ b/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXDialect.cpp

@@ -24,7 +24,7 @@
 namespace {
 
 class VMVXToVMConversionInterface : public VMConversionDialectInterface {
- public:
+public:
   using VMConversionDialectInterface::VMConversionDialectInterface;
 
   OwningOpRef<mlir::ModuleOp> parseVMImportModule() const override {
@@ -34,17 +34,18 @@
         getDialect()->getContext());
   }
 
-  void populateVMConversionPatterns(
-      SymbolTable &importSymbols, RewritePatternSet &patterns,
-      ConversionTarget &conversionTarget,
-      TypeConverter &typeConverter) const override {
+  void
+  populateVMConversionPatterns(SymbolTable &importSymbols,
+                               RewritePatternSet &patterns,
+                               ConversionTarget &conversionTarget,
+                               TypeConverter &typeConverter) const override {
     conversionTarget.addIllegalDialect<IREE::VMVX::VMVXDialect>();
     populateVMVXToVMPatterns(getDialect()->getContext(), conversionTarget,
                              typeConverter, importSymbols, patterns);
   }
 };
 
-}  // namespace
+} // namespace
 
 VMVXDialect::VMVXDialect(MLIRContext *context)
     : Dialect(getDialectNamespace(), context, TypeID::get<VMVXDialect>()) {
@@ -56,7 +57,7 @@
       >();
 }
 
-}  // namespace VMVX
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VMVX
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXDialect.h b/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXDialect.h
index aac7ac0..d315fde 100644
--- a/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXDialect.h
+++ b/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXDialect.h

@@ -16,14 +16,14 @@
 namespace VMVX {
 
 class VMVXDialect : public Dialect {
- public:
+public:
   explicit VMVXDialect(MLIRContext *context);
   static StringRef getDialectNamespace() { return "vmvx"; }
 };
 
-}  // namespace VMVX
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VMVX
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VMVX_IR_VMVXDIALECT_H_
+#endif // IREE_COMPILER_DIALECT_VMVX_IR_VMVXDIALECT_H_

diff --git a/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXOps.cpp b/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXOps.cpp
index a3b3285..772c31f 100644
--- a/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXOps.cpp

@@ -19,10 +19,10 @@
 namespace mlir {
 namespace iree_compiler {
 namespace IREE {
-namespace VMVX {}  // namespace VMVX
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+namespace VMVX {} // namespace VMVX
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
 //===----------------------------------------------------------------------===//
 // TableGen definitions (intentionally last)

diff --git a/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXOps.h b/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXOps.h
index 418b1b1..8083aef 100644
--- a/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXOps.h
+++ b/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXOps.h

@@ -21,6 +21,6 @@
 #include "mlir/Interfaces/SideEffectInterfaces.h"
 
 #define GET_OP_CLASSES
-#include "iree/compiler/Dialect/VMVX/IR/VMVXOps.h.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/VMVX/IR/VMVXOps.h.inc" // IWYU pragma: keep
 
-#endif  // IREE_COMPILER_DIALECT_VMVX_IR_VMVXOPS_H_
+#endif // IREE_COMPILER_DIALECT_VMVX_IR_VMVXOPS_H_

diff --git a/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXTypes.cpp b/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXTypes.cpp
index 15eda3c..bd56ee4 100644
--- a/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXTypes.cpp
+++ b/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXTypes.cpp

@@ -18,7 +18,7 @@
 
 #include "iree/compiler/Dialect/VMVX/IR/VMVXOpInterfaces.cpp.inc"
 
-}  // namespace VMVX
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VMVX
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXTypes.h b/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXTypes.h
index 21e1751..3fef579 100644
--- a/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXTypes.h
+++ b/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXTypes.h

@@ -21,8 +21,8 @@
 #include "mlir/Support/LLVM.h"
 
 // clang-format off: must be included after all LLVM/MLIR headers.
-#include "iree/compiler/Dialect/VMVX/IR/VMVXEnums.h.inc"  // IWYU pragma: export
-#include "iree/compiler/Dialect/VMVX/IR/VMVXOpInterfaces.h.inc"  // IWYU pragma: export
+#include "iree/compiler/Dialect/VMVX/IR/VMVXEnums.h.inc" // IWYU pragma: export
+#include "iree/compiler/Dialect/VMVX/IR/VMVXOpInterfaces.h.inc" // IWYU pragma: export
 // clang-format on
 
-#endif  // IREE_COMPILER_DIALECT_VMVX_IR_VMVXTYPES_H_
+#endif // IREE_COMPILER_DIALECT_VMVX_IR_VMVXTYPES_H_

diff --git a/compiler/src/iree/compiler/Dialect/VMVX/Transforms/Conversion.cpp b/compiler/src/iree/compiler/Dialect/VMVX/Transforms/Conversion.cpp
index 9b658dc..cc59908 100644
--- a/compiler/src/iree/compiler/Dialect/VMVX/Transforms/Conversion.cpp
+++ b/compiler/src/iree/compiler/Dialect/VMVX/Transforms/Conversion.cpp

@@ -38,7 +38,7 @@
 
 // Runs conversion with registered input dialects.
 class ConversionPass : public ConversionBase<ConversionPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<IREE::Util::UtilDialect, IREE::HAL::HALDialect,
                     IREE::VM::VMDialect, IREE::VMVX::VMVXDialect,
@@ -101,13 +101,13 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<mlir::ModuleOp>> createConversionPass() {
   return std::make_unique<ConversionPass>();
 }
 
-}  // namespace VMVX
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VMVX
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VMVX/Transforms/MaterializeConstants.cpp b/compiler/src/iree/compiler/Dialect/VMVX/Transforms/MaterializeConstants.cpp
index 8d50867..539e9e8 100644
--- a/compiler/src/iree/compiler/Dialect/VMVX/Transforms/MaterializeConstants.cpp
+++ b/compiler/src/iree/compiler/Dialect/VMVX/Transforms/MaterializeConstants.cpp

@@ -29,7 +29,7 @@
 
 class MaterializeConstantsPass
     : public MaterializeConstantsBase<MaterializeConstantsPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<IREE::Util::UtilDialect, IREE::HAL::HALDialect,
                     arith::ArithDialect, func::FuncDialect>();
@@ -51,7 +51,8 @@
     });
 
     // No constants found; omit the constant block entirely.
-    if (allLoadOps.empty()) return;
+    if (allLoadOps.empty())
+      return;
 
     // Create global ops for each constant and replace the HAL ops so they load
     // from them. Each global will track what constant key it represents for
@@ -135,7 +136,7 @@
   return std::make_unique<MaterializeConstantsPass>();
 }
 
-}  // namespace VMVX
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VMVX
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VMVX/Transforms/PassDetail.h b/compiler/src/iree/compiler/Dialect/VMVX/Transforms/PassDetail.h
index 81f4d5c..2095506 100644
--- a/compiler/src/iree/compiler/Dialect/VMVX/Transforms/PassDetail.h
+++ b/compiler/src/iree/compiler/Dialect/VMVX/Transforms/PassDetail.h

@@ -19,9 +19,9 @@
 #define GEN_PASS_CLASSES
 #include "iree/compiler/Dialect/VMVX/Transforms/Passes.h.inc"
 
-}  // namespace VMVX
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VMVX
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VMVX_TRANSFORMS_PASS_DETAIL_H_
+#endif // IREE_COMPILER_DIALECT_VMVX_TRANSFORMS_PASS_DETAIL_H_

diff --git a/compiler/src/iree/compiler/Dialect/VMVX/Transforms/Passes.cpp b/compiler/src/iree/compiler/Dialect/VMVX/Transforms/Passes.cpp
index cabc64a..86232ce 100644
--- a/compiler/src/iree/compiler/Dialect/VMVX/Transforms/Passes.cpp
+++ b/compiler/src/iree/compiler/Dialect/VMVX/Transforms/Passes.cpp

@@ -91,8 +91,8 @@
   nestedModulePM.addPass(createCanonicalizerPass());
 }
 
-static void buildLoopOptimizationVMVXTransformPassPipeline(
-    OpPassManager &passManager) {
+static void
+buildLoopOptimizationVMVXTransformPassPipeline(OpPassManager &passManager) {
   OpPassManager &nestedModulePM = passManager.nest<ModuleOp>();
 
   nestedModulePM.addNestedPass<func::FuncOp>(createLowerAffinePass());
@@ -132,7 +132,7 @@
 namespace {
 #define GEN_PASS_REGISTRATION
 #include "iree/compiler/Dialect/VMVX/Transforms/Passes.h.inc"
-}  // namespace
+} // namespace
 
 void registerVMVXPasses() {
   // Generated.
@@ -146,7 +146,7 @@
       });
 }
 
-}  // namespace VMVX
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VMVX
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VMVX/Transforms/Passes.h b/compiler/src/iree/compiler/Dialect/VMVX/Transforms/Passes.h
index 5d345ab..ca67756 100644
--- a/compiler/src/iree/compiler/Dialect/VMVX/Transforms/Passes.h
+++ b/compiler/src/iree/compiler/Dialect/VMVX/Transforms/Passes.h

@@ -55,9 +55,9 @@
 
 void registerVMVXPasses();
 
-}  // namespace VMVX
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace VMVX
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VMVX_TRANSFORMS_PASSES_H_
+#endif // IREE_COMPILER_DIALECT_VMVX_TRANSFORMS_PASSES_H_

diff --git a/compiler/src/iree/compiler/Dialect/VMVX/Transforms/ResolveBufferDescriptors.cpp b/compiler/src/iree/compiler/Dialect/VMVX/Transforms/ResolveBufferDescriptors.cpp
index 77c36ec..45b2395 100644
--- a/compiler/src/iree/compiler/Dialect/VMVX/Transforms/ResolveBufferDescriptors.cpp
+++ b/compiler/src/iree/compiler/Dialect/VMVX/Transforms/ResolveBufferDescriptors.cpp

@@ -31,7 +31,7 @@
   SmallVector<OpFoldResult> sizes;
   SmallVector<OpFoldResult> strides;
 };
-}  // namespace
+} // namespace
 
 /// Returns an AffineMap for an add or a mul.
 static AffineMap getAddMap(MLIRContext *context) {
@@ -75,8 +75,9 @@
 
 /// Returns the strides based on the sizes assuming that the `memref`
 /// has default layout, i.e. it is not a result of a subview.
-static SmallVector<OpFoldResult> getStridesFromSizes(
-    RewriterBase &rewriter, Location loc, ArrayRef<OpFoldResult> sizes) {
+static SmallVector<OpFoldResult>
+getStridesFromSizes(RewriterBase &rewriter, Location loc,
+                    ArrayRef<OpFoldResult> sizes) {
   if (sizes.size() == 0) {
     return {};
   }
@@ -121,8 +122,9 @@
   return resultDescriptor;
 }
 
-static FailureOr<DescriptorInfo> resolveBufferDescriptorForAllocation(
-    memref::AllocaOp alloca, RewriterBase &rewriter, Location loc) {
+static FailureOr<DescriptorInfo>
+resolveBufferDescriptorForAllocation(memref::AllocaOp alloca,
+                                     RewriterBase &rewriter, Location loc) {
   DescriptorInfo resultDescriptor;
 
   // Replace the op with values:
@@ -153,8 +155,9 @@
   return resultDescriptor;
 }
 
-static FailureOr<DescriptorInfo> resolveBufferDescriptorForGetGlobalOp(
-    memref::GetGlobalOp global, RewriterBase &rewriter, Location loc) {
+static FailureOr<DescriptorInfo>
+resolveBufferDescriptorForGetGlobalOp(memref::GetGlobalOp global,
+                                      RewriterBase &rewriter, Location loc) {
   IndexSet indexSet(loc, rewriter);
   DescriptorInfo resultDescriptor;
 
@@ -187,9 +190,10 @@
 
 /// Replaces the offsets, sizes and strides based on values provided
 /// by `DescriptorInfo` object.
-static void replaceOffsetSizesAndStridesWith(
-    RewriterBase &rewriter, GetBufferDescriptorOp op,
-    const DescriptorInfo &resultDescriptor) {
+static void
+replaceOffsetSizesAndStridesWith(RewriterBase &rewriter,
+                                 GetBufferDescriptorOp op,
+                                 const DescriptorInfo &resultDescriptor) {
   int rank = resultDescriptor.sizes.size();
   assert(rank == resultDescriptor.strides.size() &&
          "expected number of sizes and strides to match");
@@ -223,7 +227,8 @@
   LogicalResult matchAndRewrite(GetBufferDescriptorOp op,
                                 PatternRewriter &rewriter) const override {
     auto subview = op.getSource().template getDefiningOp<memref::SubViewOp>();
-    if (!subview) return failure();
+    if (!subview)
+      return failure();
     auto loc = op.getLoc();
     IndexSet indexSet(loc, rewriter);
 
@@ -258,7 +263,8 @@
     llvm::SmallBitVector droppedDims = subview.getDroppedDims();
     int targetIndex = 0;
     for (int i = 0; i < sourceRank; ++i) {
-      if (droppedDims.test(i)) continue;
+      if (droppedDims.test(i))
+        continue;
       rewriter.replaceAllUsesWith(
           op.getSizes()[targetIndex],
           getValueOrCreateConstantIndexOp(rewriter, loc,
@@ -288,7 +294,8 @@
     auto binding =
         op.getSource()
             .template getDefiningOp<IREE::HAL::InterfaceBindingSubspanOp>();
-    if (!binding) return failure();
+    if (!binding)
+      return failure();
 
     auto loc = op.getLoc();
     FailureOr<DescriptorInfo> resultDescriptor =
@@ -316,9 +323,10 @@
 
 /// Function to handle replacement of base pointer of buffer
 /// descriptors.
-static Value getBaseBufferReplacementForDescriptor(
-    GetBufferDescriptorOp descriptorOp, RewriterBase &rewriter, Location loc,
-    Value source) {
+static Value
+getBaseBufferReplacementForDescriptor(GetBufferDescriptorOp descriptorOp,
+                                      RewriterBase &rewriter, Location loc,
+                                      Value source) {
   return rewriter
       .create<UnrealizedConversionCastOp>(
           loc, descriptorOp.getBaseBuffer().getType(), source)
@@ -332,7 +340,8 @@
   LogicalResult matchAndRewrite(GetBufferDescriptorOp op,
                                 PatternRewriter &rewriter) const override {
     auto alloca = op.getSource().template getDefiningOp<memref::AllocaOp>();
-    if (!alloca) return failure();
+    if (!alloca)
+      return failure();
     auto memRefType = llvm::cast<MemRefType>(alloca.getResult().getType());
     if (!memRefType.getLayout().isIdentity()) {
       return rewriter.notifyMatchFailure(op, "not identity allocation");
@@ -365,7 +374,8 @@
   LogicalResult matchAndRewrite(GetBufferDescriptorOp op,
                                 PatternRewriter &rewriter) const override {
     auto global = op.getSource().template getDefiningOp<memref::GetGlobalOp>();
-    if (!global) return failure();
+    if (!global)
+      return failure();
     auto memRefType = llvm::cast<MemRefType>(global.getResult().getType());
     if (!memRefType.getLayout().isIdentity()) {
       return rewriter.notifyMatchFailure(op, "not identity allocation");
@@ -397,7 +407,7 @@
 
 class ResolveBufferDescriptorsPass
     : public ResolveBufferDescriptorsBase<ResolveBufferDescriptorsPass> {
- public:
+public:
   ResolveBufferDescriptorsPass() = default;
   ResolveBufferDescriptorsPass(const ResolveBufferDescriptorsPass &) {}
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -440,10 +450,10 @@
       llvm::cl::init(false)};
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<mlir::OperationPass<>> createResolveBufferDescriptorsPass() {
   return std::make_unique<ResolveBufferDescriptorsPass>();
 }
 
-}  // namespace mlir::iree_compiler::IREE::VMVX
+} // namespace mlir::iree_compiler::IREE::VMVX

diff --git a/compiler/src/iree/compiler/Dialect/Vulkan/IR/VulkanAttributes.cpp b/compiler/src/iree/compiler/Dialect/Vulkan/IR/VulkanAttributes.cpp
index 9fe4170..4939355 100644
--- a/compiler/src/iree/compiler/Dialect/Vulkan/IR/VulkanAttributes.cpp
+++ b/compiler/src/iree/compiler/Dialect/Vulkan/IR/VulkanAttributes.cpp

@@ -20,7 +20,7 @@
 #include "mlir/IR/Location.h"
 
 #define GET_ATTRDEF_CLASSES
-#include "iree/compiler/Dialect/Vulkan/IR/VulkanAttributes.cpp.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/Vulkan/IR/VulkanAttributes.cpp.inc" // IWYU pragma: keep
 
 namespace mlir {
 namespace iree_compiler {
@@ -40,12 +40,8 @@
                             Attribute extensions, spirv::Vendor vendorID,
                             spirv::DeviceType deviceType, uint32_t deviceID,
                             Attribute capabilities)
-      : version(version),
-        revision(revision),
-        extensions(extensions),
-        capabilities(capabilities),
-        vendorID(vendorID),
-        deviceType(deviceType),
+      : version(version), revision(revision), extensions(extensions),
+        capabilities(capabilities), vendorID(vendorID), deviceType(deviceType),
         deviceID(deviceID) {}
 
   bool operator==(const KeyTy &key) const {
@@ -53,8 +49,8 @@
                                   deviceType, deviceID, capabilities);
   }
 
-  static TargetEnvAttributeStorage *construct(
-      AttributeStorageAllocator &allocator, const KeyTy &key) {
+  static TargetEnvAttributeStorage *
+  construct(AttributeStorageAllocator &allocator, const KeyTy &key) {
     return new (allocator.allocate<TargetEnvAttributeStorage>())
         TargetEnvAttributeStorage(std::get<0>(key), std::get<1>(key),
                                   std::get<2>(key), std::get<3>(key),
@@ -70,7 +66,7 @@
   spirv::DeviceType deviceType;
   uint32_t deviceID;
 };
-}  // namespace detail
+} // namespace detail
 
 TargetEnvAttr TargetEnvAttr::get(Vulkan::Version version, uint32_t revision,
                                  ArrayRef<Extension> extensions,
@@ -139,11 +135,12 @@
   return llvm::cast<CapabilitiesAttr>(getImpl()->capabilities);
 }
 
-LogicalResult TargetEnvAttr::verify(
-    function_ref<InFlightDiagnostic()> emitError, IntegerAttr version,
-    IntegerAttr revision, ArrayAttr extensions, spirv::Vendor /*vendorID*/,
-    spirv::DeviceType /*deviceType*/, uint32_t /*deviceID*/,
-    CapabilitiesAttr capabilities) {
+LogicalResult
+TargetEnvAttr::verify(function_ref<InFlightDiagnostic()> emitError,
+                      IntegerAttr version, IntegerAttr revision,
+                      ArrayAttr extensions, spirv::Vendor /*vendorID*/,
+                      spirv::DeviceType /*deviceType*/, uint32_t /*deviceID*/,
+                      CapabilitiesAttr capabilities) {
   if (!version.getType().isInteger(32))
     return emitError() << "expected 32-bit integer for version";
 
@@ -164,10 +161,12 @@
 ParseResult parseKeywordList(
     DialectAsmParser &parser,
     function_ref<LogicalResult(llvm::SMLoc, StringRef)> processKeyword) {
-  if (parser.parseLSquare()) return failure();
+  if (parser.parseLSquare())
+    return failure();
 
   // Special case for empty list.
-  if (succeeded(parser.parseOptionalRSquare())) return success();
+  if (succeeded(parser.parseOptionalRSquare()))
+    return success();
 
   // Keep parsing the keyword and an optional comma following it. If the comma
   // is successfully parsed, then we have more keywords to parse.
@@ -178,14 +177,16 @@
       return failure();
   } while (succeeded(parser.parseOptionalComma()));
 
-  if (parser.parseRSquare()) return failure();
+  if (parser.parseRSquare())
+    return failure();
 
   return success();
 }
 
 /// Parses a TargetEnvAttr.
 Attribute parseTargetAttr(DialectAsmParser &parser) {
-  if (parser.parseLess()) return {};
+  if (parser.parseLess())
+    return {};
 
   Builder &builder = parser.getBuilder();
 
@@ -193,7 +194,8 @@
   {
     auto loc = parser.getCurrentLocation();
     StringRef version;
-    if (parser.parseKeyword(&version) || parser.parseComma()) return {};
+    if (parser.parseKeyword(&version) || parser.parseComma())
+      return {};
 
     if (auto versionSymbol = symbolizeVersion(version)) {
       versionAttr =
@@ -246,7 +248,8 @@
   {
     auto loc = parser.getCurrentLocation();
     StringRef vendorStr;
-    if (parser.parseKeyword(&vendorStr)) return {};
+    if (parser.parseKeyword(&vendorStr))
+      return {};
     if (auto vendorSymbol = spirv::symbolizeVendor(vendorStr)) {
       vendorID = *vendorSymbol;
     } else {
@@ -255,7 +258,8 @@
 
     loc = parser.getCurrentLocation();
     StringRef deviceTypeStr;
-    if (parser.parseColon() || parser.parseKeyword(&deviceTypeStr)) return {};
+    if (parser.parseColon() || parser.parseKeyword(&deviceTypeStr))
+      return {};
     if (auto deviceTypeSymbol = spirv::symbolizeDeviceType(deviceTypeStr)) {
       deviceType = *deviceTypeSymbol;
     } else {
@@ -264,21 +268,25 @@
 
     loc = parser.getCurrentLocation();
     if (succeeded(parser.parseOptionalColon())) {
-      if (parser.parseInteger(deviceID)) return {};
+      if (parser.parseInteger(deviceID))
+        return {};
     }
 
-    if (parser.parseComma()) return {};
+    if (parser.parseComma())
+      return {};
   }
 
   CapabilitiesAttr capabilities;
-  if (parser.parseAttribute(capabilities)) return {};
+  if (parser.parseAttribute(capabilities))
+    return {};
 
-  if (parser.parseGreater()) return {};
+  if (parser.parseGreater())
+    return {};
 
   return TargetEnvAttr::get(versionAttr, revisionAttr, extensionsAttr, vendorID,
                             deviceType, deviceID, capabilities);
 }
-}  // anonymous namespace
+} // anonymous namespace
 
 Attribute VulkanDialect::parseAttribute(DialectAsmParser &parser,
                                         Type type) const {
@@ -294,11 +302,13 @@
   OptionalParseResult result =
       generatedAttributeParser(parser, &attrKind, type, attr);
   if (result.has_value()) {
-    if (failed(result.value())) return {};
+    if (failed(result.value()))
+      return {};
     return attr;
   }
 
-  if (attrKind == TargetEnvAttr::getKindName()) return parseTargetAttr(parser);
+  if (attrKind == TargetEnvAttr::getKindName())
+    return parseTargetAttr(parser);
 
   parser.emitError(parser.getNameLoc(), "unknown Vulkan attriubte kind: ")
       << attrKind;
@@ -325,11 +335,12 @@
   }
   printer << ", " << targetEnv.getCapabilitiesAttr() << ">";
 }
-}  // anonymous namespace
+} // anonymous namespace
 
 void VulkanDialect::printAttribute(Attribute attr,
                                    DialectAsmPrinter &printer) const {
-  if (succeeded(generatedAttributePrinter(attr, printer))) return;
+  if (succeeded(generatedAttributePrinter(attr, printer)))
+    return;
 
   if (auto targetEnv = llvm::dyn_cast<TargetEnvAttr>(attr))
     return print(targetEnv, printer);
@@ -348,7 +359,7 @@
                 >();
 }
 
-}  // namespace Vulkan
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Vulkan
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Vulkan/IR/VulkanAttributes.h b/compiler/src/iree/compiler/Dialect/Vulkan/IR/VulkanAttributes.h
index af4a473..f08c6e1 100644
--- a/compiler/src/iree/compiler/Dialect/Vulkan/IR/VulkanAttributes.h
+++ b/compiler/src/iree/compiler/Dialect/Vulkan/IR/VulkanAttributes.h

@@ -12,7 +12,7 @@
 #include "mlir/IR/BuiltinAttributes.h"
 
 #define GET_ATTRDEF_CLASSES
-#include "iree/compiler/Dialect/Vulkan/IR/VulkanAttributes.h.inc"  // IWYU pragma: export
+#include "iree/compiler/Dialect/Vulkan/IR/VulkanAttributes.h.inc" // IWYU pragma: export
 
 namespace mlir {
 namespace iree_compiler {
@@ -21,14 +21,14 @@
 
 namespace detail {
 struct TargetEnvAttributeStorage;
-}  // namespace detail
+} // namespace detail
 
 /// An attribute that specifies the target version, supported extensions, and
 /// resource limits. These information describles a Vulkan target environment.
 class TargetEnvAttr
     : public Attribute::AttrBase<TargetEnvAttr, Attribute,
                                  detail::TargetEnvAttributeStorage> {
- public:
+public:
   using Base::Base;
 
   /// Gets a TargetEnvAttr instance.
@@ -85,9 +85,9 @@
                               CapabilitiesAttr capabilities);
 };
 
-}  // namespace Vulkan
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Vulkan
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VULKAN_IR_VULKANATTRIBUTES_H_
+#endif // IREE_COMPILER_DIALECT_VULKAN_IR_VULKANATTRIBUTES_H_

diff --git a/compiler/src/iree/compiler/Dialect/Vulkan/IR/VulkanDialect.cpp b/compiler/src/iree/compiler/Dialect/Vulkan/IR/VulkanDialect.cpp
index 7740f7f..6bfdac7 100644
--- a/compiler/src/iree/compiler/Dialect/Vulkan/IR/VulkanDialect.cpp
+++ b/compiler/src/iree/compiler/Dialect/Vulkan/IR/VulkanDialect.cpp

@@ -18,7 +18,7 @@
   registerAttributes();
 }
 
-}  // namespace Vulkan
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Vulkan
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Vulkan/IR/VulkanDialect.h b/compiler/src/iree/compiler/Dialect/Vulkan/IR/VulkanDialect.h
index 1801f6f..7d0a65e 100644
--- a/compiler/src/iree/compiler/Dialect/Vulkan/IR/VulkanDialect.h
+++ b/compiler/src/iree/compiler/Dialect/Vulkan/IR/VulkanDialect.h

@@ -15,7 +15,7 @@
 namespace Vulkan {
 
 class VulkanDialect : public Dialect {
- public:
+public:
   explicit VulkanDialect(MLIRContext *context);
 
   static StringRef getDialectNamespace() { return "vk"; }
@@ -30,14 +30,14 @@
   /// Prints an attribute registered to this dialect.
   void printAttribute(Attribute, DialectAsmPrinter &printer) const override;
 
- private:
+private:
   /// Register the attributes of this dialect.
   void registerAttributes();
 };
 
-}  // namespace Vulkan
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Vulkan
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VULKAN_IR_VULKANDIALECT_H_
+#endif // IREE_COMPILER_DIALECT_VULKAN_IR_VULKANDIALECT_H_

diff --git a/compiler/src/iree/compiler/Dialect/Vulkan/IR/VulkanTypes.cpp b/compiler/src/iree/compiler/Dialect/Vulkan/IR/VulkanTypes.cpp
index 7b15e3a..fc67767 100644
--- a/compiler/src/iree/compiler/Dialect/Vulkan/IR/VulkanTypes.cpp
+++ b/compiler/src/iree/compiler/Dialect/Vulkan/IR/VulkanTypes.cpp

@@ -6,8 +6,8 @@
 
 #include "iree/compiler/Dialect/Vulkan/IR/VulkanTypes.h"
 
-#include "llvm/ADT/StringExtras.h"  // IWYU pragma: keep
+#include "llvm/ADT/StringExtras.h" // IWYU pragma: keep
 
 // clang-format off: must be included after all LLVM/MLIR headers.
-#include "iree/compiler/Dialect/Vulkan/IR/VulkanEnums.cpp.inc"  // IWYU pragma: keep
+#include "iree/compiler/Dialect/Vulkan/IR/VulkanEnums.cpp.inc" // IWYU pragma: keep
 // clang-format on

diff --git a/compiler/src/iree/compiler/Dialect/Vulkan/IR/VulkanTypes.h b/compiler/src/iree/compiler/Dialect/Vulkan/IR/VulkanTypes.h
index 8d8a862..2422a85 100644
--- a/compiler/src/iree/compiler/Dialect/Vulkan/IR/VulkanTypes.h
+++ b/compiler/src/iree/compiler/Dialect/Vulkan/IR/VulkanTypes.h

@@ -14,7 +14,7 @@
 #include "mlir/IR/BuiltinTypes.h"
 
 // clang-format off: must be included after all LLVM/MLIR headers.
-#include "iree/compiler/Dialect/Vulkan/IR/VulkanEnums.h.inc"  // IWYU pragma: export
+#include "iree/compiler/Dialect/Vulkan/IR/VulkanEnums.h.inc" // IWYU pragma: export
 // clang-format on
 
-#endif  // IREE_COMPILER_DIALECT_VULKAN_IR_VULKANTYPES_H_
+#endif // IREE_COMPILER_DIALECT_VULKAN_IR_VULKANTYPES_H_

diff --git a/compiler/src/iree/compiler/Dialect/Vulkan/Utils/TargetEnvironment.cpp b/compiler/src/iree/compiler/Dialect/Vulkan/Utils/TargetEnvironment.cpp
index d3da43f..c455621 100644
--- a/compiler/src/iree/compiler/Dialect/Vulkan/Utils/TargetEnvironment.cpp
+++ b/compiler/src/iree/compiler/Dialect/Vulkan/Utils/TargetEnvironment.cpp

@@ -26,18 +26,18 @@
                                             Extension::VK_KHR_spirv_1_4));
 
   switch (vkTargetEnv.getVersion()) {
-    case Version::V_1_0:
-      // Vulkan 1.0 only supports SPIR-V 1.0 by default.
-      return has14Ext ? spirv::Version::V_1_4 : spirv::Version::V_1_0;
-    case Version::V_1_1:
-      // Vulkan 1.1 supports up to SPIR-V 1.3 by default.
-      return has14Ext ? spirv::Version::V_1_4 : spirv::Version::V_1_3;
-    case Version::V_1_2:
-      // Vulkan 1.1 supports up to SPIR-V 1.5 by default.
-      return spirv::Version::V_1_5;
-    case Version::V_1_3:
-      // Vulkan 1.1 supports up to SPIR-V 1.6 by default.
-      return spirv::Version::V_1_6;
+  case Version::V_1_0:
+    // Vulkan 1.0 only supports SPIR-V 1.0 by default.
+    return has14Ext ? spirv::Version::V_1_4 : spirv::Version::V_1_0;
+  case Version::V_1_1:
+    // Vulkan 1.1 supports up to SPIR-V 1.3 by default.
+    return has14Ext ? spirv::Version::V_1_4 : spirv::Version::V_1_3;
+  case Version::V_1_2:
+    // Vulkan 1.1 supports up to SPIR-V 1.5 by default.
+    return spirv::Version::V_1_5;
+  case Version::V_1_3:
+    // Vulkan 1.1 supports up to SPIR-V 1.6 by default.
+    return spirv::Version::V_1_6;
   }
   return spirv::Version::V_1_0;
 }
@@ -50,34 +50,34 @@
 
   for (Extension ext : vkTargetEnv.getExtensions()) {
     switch (ext) {
-      case Extension::VK_KHR_16bit_storage:
-        extensions.push_back(spirv::Extension::SPV_KHR_16bit_storage);
-        break;
-      case Extension::VK_KHR_8bit_storage:
-        extensions.push_back(spirv::Extension::SPV_KHR_8bit_storage);
-        break;
-      case Extension::VK_KHR_shader_float16_int8:
-        // This extension allows using certain SPIR-V capabilities.
-        break;
-      case Extension::VK_KHR_shader_integer_dot_product:
-        extensions.push_back(spirv::Extension::SPV_KHR_integer_dot_product);
-        break;
-      case Extension::VK_KHR_spirv_1_4:
-        // This extension only affects SPIR-V version.
-        break;
-      case Extension::VK_KHR_storage_buffer_storage_class:
-        extensions.push_back(
-            spirv::Extension::SPV_KHR_storage_buffer_storage_class);
-        break;
-      case Extension::VK_KHR_variable_pointers:
-        extensions.push_back(spirv::Extension::SPV_KHR_variable_pointers);
-        break;
-      case Extension::VK_EXT_subgroup_size_control:
-        // This extension allows specifying min/max subgroup size.
-        break;
-      case Extension::VK_NV_cooperative_matrix:
-        extensions.push_back(spirv::Extension::SPV_NV_cooperative_matrix);
-        break;
+    case Extension::VK_KHR_16bit_storage:
+      extensions.push_back(spirv::Extension::SPV_KHR_16bit_storage);
+      break;
+    case Extension::VK_KHR_8bit_storage:
+      extensions.push_back(spirv::Extension::SPV_KHR_8bit_storage);
+      break;
+    case Extension::VK_KHR_shader_float16_int8:
+      // This extension allows using certain SPIR-V capabilities.
+      break;
+    case Extension::VK_KHR_shader_integer_dot_product:
+      extensions.push_back(spirv::Extension::SPV_KHR_integer_dot_product);
+      break;
+    case Extension::VK_KHR_spirv_1_4:
+      // This extension only affects SPIR-V version.
+      break;
+    case Extension::VK_KHR_storage_buffer_storage_class:
+      extensions.push_back(
+          spirv::Extension::SPV_KHR_storage_buffer_storage_class);
+      break;
+    case Extension::VK_KHR_variable_pointers:
+      extensions.push_back(spirv::Extension::SPV_KHR_variable_pointers);
+      break;
+    case Extension::VK_EXT_subgroup_size_control:
+      // This extension allows specifying min/max subgroup size.
+      break;
+    case Extension::VK_NV_cooperative_matrix:
+      extensions.push_back(spirv::Extension::SPV_NV_cooperative_matrix);
+      break;
     }
   }
 }
@@ -94,8 +94,8 @@
 
   auto vkCapabilities = vkTargetEnv.getCapabilitiesAttr();
 
-#define MAP_PRIMITIVE_TYPE(type)        \
-  if (vkCapabilities.getShader##type()) \
+#define MAP_PRIMITIVE_TYPE(type)                                               \
+  if (vkCapabilities.getShader##type())                                        \
   capabilities.push_back(spirv::Capability::type)
 
   MAP_PRIMITIVE_TYPE(Float64);
@@ -105,8 +105,8 @@
   MAP_PRIMITIVE_TYPE(Int8);
 #undef MAP_PRIMITIVE_TYPE
 
-#define MAP_8_16_BIT_STORAGE(vkFeature, spvCap) \
-  if (vkCapabilities.vkFeature())               \
+#define MAP_8_16_BIT_STORAGE(vkFeature, spvCap)                                \
+  if (vkCapabilities.vkFeature())                                              \
   capabilities.push_back(spirv::Capability::spvCap)
 
   MAP_8_16_BIT_STORAGE(getStorageBuffer16BitAccess, StorageBuffer16BitAccess);
@@ -120,9 +120,9 @@
 
   auto subgroupFeatures = vkCapabilities.getSubgroupFeatures().getValue();
 
-#define MAP_SUBGROUP_FEATURE(featureBit)                  \
-  if ((subgroupFeatures & SubgroupFeature::featureBit) == \
-      SubgroupFeature::featureBit)                        \
+#define MAP_SUBGROUP_FEATURE(featureBit)                                       \
+  if ((subgroupFeatures & SubgroupFeature::featureBit) ==                      \
+      SubgroupFeature::featureBit)                                             \
   capabilities.push_back(spirv::Capability::GroupNonUniform##featureBit)
 
   if ((subgroupFeatures & SubgroupFeature::Basic) == SubgroupFeature::Basic) {
@@ -161,8 +161,8 @@
 
 /// Gets the corresponding SPIR-V resource limits for the given Vulkan target
 /// environment.
-spirv::ResourceLimitsAttr convertResourceLimits(
-    Vulkan::TargetEnvAttr vkTargetEnv) {
+spirv::ResourceLimitsAttr
+convertResourceLimits(Vulkan::TargetEnvAttr vkTargetEnv) {
   MLIRContext *context = vkTargetEnv.getContext();
   Builder builder(context);
   auto vkCapabilities = vkTargetEnv.getCapabilitiesAttr();
@@ -188,7 +188,7 @@
       vkCapabilities.getMinSubgroupSize(), vkCapabilities.getMaxSubgroupSize(),
       ArrayAttr::get(context, spvAttrs));
 }
-}  // anonymous namespace
+} // anonymous namespace
 
 Vulkan::TargetEnvAttr getTargetEnvForTriple(MLIRContext *context,
                                             llvm::StringRef triple) {
@@ -213,7 +213,7 @@
       vkTargetEnv.getDeviceType(), vkTargetEnv.getDeviceID());
 }
 
-}  // namespace Vulkan
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Vulkan
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Vulkan/Utils/TargetEnvironment.h b/compiler/src/iree/compiler/Dialect/Vulkan/Utils/TargetEnvironment.h
index bf2270d..0500ab8 100644
--- a/compiler/src/iree/compiler/Dialect/Vulkan/Utils/TargetEnvironment.h
+++ b/compiler/src/iree/compiler/Dialect/Vulkan/Utils/TargetEnvironment.h

@@ -16,7 +16,7 @@
 namespace Vulkan {
 
 /// Returns the Vulkan target environment attribute for the given GPU triple.
-Vulkan::TargetEnvAttr getTargetEnvForTriple(MLIRContext* context,
+Vulkan::TargetEnvAttr getTargetEnvForTriple(MLIRContext *context,
                                             llvm::StringRef triple);
 
 /// Converts the given Vulkan target environment into the corresponding SPIR-V
@@ -34,9 +34,9 @@
 /// domains.
 spirv::TargetEnvAttr convertTargetEnv(Vulkan::TargetEnvAttr vkTargetEnv);
 
-}  // namespace Vulkan
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Vulkan
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VULKAN_UTILS_TARGETENVIRONMENT_H_
+#endif // IREE_COMPILER_DIALECT_VULKAN_UTILS_TARGETENVIRONMENT_H_

diff --git a/compiler/src/iree/compiler/Dialect/Vulkan/Utils/TargetTriple.cpp b/compiler/src/iree/compiler/Dialect/Vulkan/Utils/TargetTriple.cpp
index 1e708e0..070ffbd 100644
--- a/compiler/src/iree/compiler/Dialect/Vulkan/Utils/TargetTriple.cpp
+++ b/compiler/src/iree/compiler/Dialect/Vulkan/Utils/TargetTriple.cpp

@@ -28,59 +28,59 @@
 /// Returns the GPU vendor for the given target `triple`.
 spirv::Vendor getVendor(const TargetTriple &triple) {
   switch (triple.getArch()) {
-    case TargetTripleArch::Unknown:
-      return spirv::Vendor::Unknown;
-    case TargetTripleArch::AMD_RDNAv1:
-    case TargetTripleArch::AMD_RDNAv2:
-    case TargetTripleArch::AMD_RDNAv3:
-      return spirv::Vendor::AMD;
-    case TargetTripleArch::ARM_Valhall:
-      return spirv::Vendor::ARM;
-    case TargetTripleArch::Apple_M1:
-      return spirv::Vendor::Apple;
-    case TargetTripleArch::Intel_Arc:
-      return spirv::Vendor::Intel;
-    case TargetTripleArch::NV_Turing:
-    case TargetTripleArch::NV_Ampere:
-    case TargetTripleArch::NV_Pascal:
-      return spirv::Vendor::NVIDIA;
-    case TargetTripleArch::QC_Adreno:
-      return spirv::Vendor::Qualcomm;
-    case TargetTripleArch::CPU:
-      switch (triple.getProduct()) {
-        case TargetTripleProduct::SwiftShader:
-          return spirv::Vendor::SwiftShader;
-        default:
-          return spirv::Vendor::Unknown;
-      }
+  case TargetTripleArch::Unknown:
+    return spirv::Vendor::Unknown;
+  case TargetTripleArch::AMD_RDNAv1:
+  case TargetTripleArch::AMD_RDNAv2:
+  case TargetTripleArch::AMD_RDNAv3:
+    return spirv::Vendor::AMD;
+  case TargetTripleArch::ARM_Valhall:
+    return spirv::Vendor::ARM;
+  case TargetTripleArch::Apple_M1:
+    return spirv::Vendor::Apple;
+  case TargetTripleArch::Intel_Arc:
+    return spirv::Vendor::Intel;
+  case TargetTripleArch::NV_Turing:
+  case TargetTripleArch::NV_Ampere:
+  case TargetTripleArch::NV_Pascal:
+    return spirv::Vendor::NVIDIA;
+  case TargetTripleArch::QC_Adreno:
+    return spirv::Vendor::Qualcomm;
+  case TargetTripleArch::CPU:
+    switch (triple.getProduct()) {
+    case TargetTripleProduct::SwiftShader:
+      return spirv::Vendor::SwiftShader;
     default:
-      assert(false && "unhandled vendor");
       return spirv::Vendor::Unknown;
+    }
+  default:
+    assert(false && "unhandled vendor");
+    return spirv::Vendor::Unknown;
   }
 }
 
 /// Returns the GPU device type for the given target `triple`.
 spirv::DeviceType getDeviceType(const TargetTriple &triple) {
   switch (triple.getArch()) {
-    case TargetTripleArch::Unknown:
-      return spirv::DeviceType::Unknown;
-    case TargetTripleArch::CPU:
-      return spirv::DeviceType::CPU;
-    case TargetTripleArch::AMD_RDNAv1:
-    case TargetTripleArch::AMD_RDNAv2:
-    case TargetTripleArch::AMD_RDNAv3:
-    case TargetTripleArch::NV_Turing:
-    case TargetTripleArch::NV_Ampere:
-    case TargetTripleArch::NV_Pascal:
-    case TargetTripleArch::Intel_Arc:
-      return spirv::DeviceType::DiscreteGPU;
-    case TargetTripleArch::Apple_M1:
-    case TargetTripleArch::ARM_Valhall:
-    case TargetTripleArch::QC_Adreno:
-      return spirv::DeviceType::IntegratedGPU;
-    default:
-      assert(false && "unhandled device type");
-      return spirv::DeviceType::Unknown;
+  case TargetTripleArch::Unknown:
+    return spirv::DeviceType::Unknown;
+  case TargetTripleArch::CPU:
+    return spirv::DeviceType::CPU;
+  case TargetTripleArch::AMD_RDNAv1:
+  case TargetTripleArch::AMD_RDNAv2:
+  case TargetTripleArch::AMD_RDNAv3:
+  case TargetTripleArch::NV_Turing:
+  case TargetTripleArch::NV_Ampere:
+  case TargetTripleArch::NV_Pascal:
+  case TargetTripleArch::Intel_Arc:
+    return spirv::DeviceType::DiscreteGPU;
+  case TargetTripleArch::Apple_M1:
+  case TargetTripleArch::ARM_Valhall:
+  case TargetTripleArch::QC_Adreno:
+    return spirv::DeviceType::IntegratedGPU;
+  default:
+    assert(false && "unhandled device type");
+    return spirv::DeviceType::Unknown;
   }
 }
 
@@ -119,48 +119,48 @@
                    llvm::SmallVectorImpl<Extension> &extensions) {
   // Mobile GPUs need to take Android version into consideration.
   switch (triple.getArch()) {
-    case TargetTripleArch::Apple_M1: {
-      // Example: https://vulkan.gpuinfo.org/displayreport.php?id=14673
-      const Extension list[] = {
-          Extension::VK_KHR_16bit_storage,
-          Extension::VK_KHR_8bit_storage,
-          Extension::VK_KHR_shader_float16_int8,
-          Extension::VK_KHR_storage_buffer_storage_class,
-          Extension::VK_KHR_variable_pointers,
-      };
-      return append_range(extensions, list);
+  case TargetTripleArch::Apple_M1: {
+    // Example: https://vulkan.gpuinfo.org/displayreport.php?id=14673
+    const Extension list[] = {
+        Extension::VK_KHR_16bit_storage,
+        Extension::VK_KHR_8bit_storage,
+        Extension::VK_KHR_shader_float16_int8,
+        Extension::VK_KHR_storage_buffer_storage_class,
+        Extension::VK_KHR_variable_pointers,
+    };
+    return append_range(extensions, list);
+  }
+  case TargetTripleArch::ARM_Valhall: {
+    // Example: https://vulkan.gpuinfo.org/displayreport.php?id=10312
+    const Extension list[] = {
+        Extension::VK_KHR_16bit_storage,
+        Extension::VK_KHR_8bit_storage,
+        Extension::VK_KHR_shader_float16_int8,
+        Extension::VK_KHR_shader_integer_dot_product,
+        Extension::VK_KHR_spirv_1_4,
+        Extension::VK_KHR_storage_buffer_storage_class,
+        Extension::VK_KHR_variable_pointers,
+    };
+    return append_range(extensions, list);
+  }
+  case TargetTripleArch::QC_Adreno: {
+    // Example: https://vulkan.gpuinfo.org/displayreport.php?id=10983 (11)
+    // Example: https://vulkan.gpuinfo.org/displayreport.php?id=16312 (12)
+    const Extension list[] = {
+        Extension::VK_KHR_16bit_storage,
+        Extension::VK_KHR_shader_float16_int8,
+        Extension::VK_KHR_spirv_1_4,
+        Extension::VK_KHR_storage_buffer_storage_class,
+        Extension::VK_KHR_variable_pointers,
+    };
+    append_range(extensions, list);
+    if (triple.getOS() == TargetTripleOS::Android31) {
+      extensions.push_back(Extension::VK_KHR_8bit_storage);
     }
-    case TargetTripleArch::ARM_Valhall: {
-      // Example: https://vulkan.gpuinfo.org/displayreport.php?id=10312
-      const Extension list[] = {
-          Extension::VK_KHR_16bit_storage,
-          Extension::VK_KHR_8bit_storage,
-          Extension::VK_KHR_shader_float16_int8,
-          Extension::VK_KHR_shader_integer_dot_product,
-          Extension::VK_KHR_spirv_1_4,
-          Extension::VK_KHR_storage_buffer_storage_class,
-          Extension::VK_KHR_variable_pointers,
-      };
-      return append_range(extensions, list);
-    }
-    case TargetTripleArch::QC_Adreno: {
-      // Example: https://vulkan.gpuinfo.org/displayreport.php?id=10983 (11)
-      // Example: https://vulkan.gpuinfo.org/displayreport.php?id=16312 (12)
-      const Extension list[] = {
-          Extension::VK_KHR_16bit_storage,
-          Extension::VK_KHR_shader_float16_int8,
-          Extension::VK_KHR_spirv_1_4,
-          Extension::VK_KHR_storage_buffer_storage_class,
-          Extension::VK_KHR_variable_pointers,
-      };
-      append_range(extensions, list);
-      if (triple.getOS() == TargetTripleOS::Android31) {
-        extensions.push_back(Extension::VK_KHR_8bit_storage);
-      }
-      return;
-    }
-    default:
-      break;
+    return;
+  }
+  default:
+    break;
   }
 
   // SwiftShader is very limited regarding functionalities.
@@ -234,245 +234,243 @@
   Builder builder(context);
 
   switch (triple.getArch()) {
-    case TargetTripleArch::AMD_RDNAv3: {
-      auto i8t = builder.getIntegerType(8);
-      auto i32t = builder.getIntegerType(32);
-      auto f16t = builder.getF16Type();
-      auto f32t = builder.getF32Type();
-      auto scope = ScopeNVAttr::get(context, ScopeNV::Subgroup);
+  case TargetTripleArch::AMD_RDNAv3: {
+    auto i8t = builder.getIntegerType(8);
+    auto i32t = builder.getIntegerType(32);
+    auto f16t = builder.getF16Type();
+    auto f32t = builder.getF32Type();
+    auto scope = ScopeNVAttr::get(context, ScopeNV::Subgroup);
 
-      coopmatCases.push_back(CooperativeMatrixPropertiesNVAttr::get(
-          context,
-          /*mSize=*/16, /*nSize=*/16, /*kSize=*/16, /*aType=*/i8t,
-          /*bType=*/i8t, /*cType=*/i32t, /*resultType=*/i32t, scope));
-      coopmatCases.push_back(CooperativeMatrixPropertiesNVAttr::get(
-          context,
-          /*mSize=*/16, /*nSize=*/16, /*kSize=*/16, /*aType=*/f16t,
-          /*bType=*/f16t, /*cType=*/f16t, /*resultType=*/f16t, scope));
-      coopmatCases.push_back(CooperativeMatrixPropertiesNVAttr::get(
-          context,
-          /*mSize=*/16, /*nSize=*/16, /*kSize=*/16, /*aType=*/f16t,
-          /*bType=*/f16t, /*cType=*/f32t, /*resultType=*/f32t, scope));
+    coopmatCases.push_back(CooperativeMatrixPropertiesNVAttr::get(
+        context,
+        /*mSize=*/16, /*nSize=*/16, /*kSize=*/16, /*aType=*/i8t,
+        /*bType=*/i8t, /*cType=*/i32t, /*resultType=*/i32t, scope));
+    coopmatCases.push_back(CooperativeMatrixPropertiesNVAttr::get(
+        context,
+        /*mSize=*/16, /*nSize=*/16, /*kSize=*/16, /*aType=*/f16t,
+        /*bType=*/f16t, /*cType=*/f16t, /*resultType=*/f16t, scope));
+    coopmatCases.push_back(CooperativeMatrixPropertiesNVAttr::get(
+        context,
+        /*mSize=*/16, /*nSize=*/16, /*kSize=*/16, /*aType=*/f16t,
+        /*bType=*/f16t, /*cType=*/f32t, /*resultType=*/f32t, scope));
+  }
+    LLVM_FALLTHROUGH;
+  case TargetTripleArch::AMD_RDNAv1:
+  case TargetTripleArch::AMD_RDNAv2:
+    // Example: https://vulkan.gpuinfo.org/displayreport.php?id=10906
+    maxComputeSharedMemorySize = 65536;
+    maxComputeWorkGroupInvocations = 1024;
+    maxComputeWorkGroupSize = {1024, 1024, 1024};
+
+    subgroupSize = 64, minSubgroupSize = 32, maxSubgroupSize = 64;
+    subgroupFeatures = SubgroupFeature::Basic | SubgroupFeature::Vote |
+                       SubgroupFeature::Arithmetic | SubgroupFeature::Ballot |
+                       SubgroupFeature::Shuffle |
+                       SubgroupFeature::ShuffleRelative |
+                       SubgroupFeature::Clustered | SubgroupFeature::Quad;
+
+    shaderFloat16 = shaderFloat64 = true;
+    shaderInt8 = shaderInt16 = shaderInt64 = true;
+
+    shaderIntegerDotProduct = true;
+
+    storageBuffer16BitAccess = storagePushConstant16 = true;
+    uniformAndStorageBuffer16BitAccess = true;
+    storageBuffer8BitAccess = true, storagePushConstant8 = true;
+    uniformAndStorageBuffer8BitAccess = true;
+
+    variablePointers = variablePointersStorageBuffer = true;
+    break;
+  case TargetTripleArch::Apple_M1:
+    // Example: https://vulkan.gpuinfo.org/displayreport.php?id=14673
+    maxComputeSharedMemorySize = 32768;
+    maxComputeWorkGroupInvocations = 1024;
+    maxComputeWorkGroupSize = {1024, 1024, 1024};
+
+    subgroupSize = 32;
+    subgroupFeatures = SubgroupFeature::Basic | SubgroupFeature::Vote |
+                       SubgroupFeature::Arithmetic | SubgroupFeature::Ballot |
+                       SubgroupFeature::Shuffle |
+                       SubgroupFeature::ShuffleRelative | SubgroupFeature::Quad;
+
+    shaderFloat16 = true;
+    shaderFloat64 = false;
+    shaderInt8 = shaderInt16 = shaderInt64 = true;
+
+    storageBuffer16BitAccess = storagePushConstant16 = true;
+    uniformAndStorageBuffer16BitAccess = true;
+    storageBuffer8BitAccess = true, storagePushConstant8 = true;
+    uniformAndStorageBuffer8BitAccess = true;
+
+    variablePointers = variablePointersStorageBuffer = true;
+    break;
+  case TargetTripleArch::ARM_Valhall:
+    // Example: https://vulkan.gpuinfo.org/displayreport.php?id=10312 (11)
+    // Example: https://vulkan.gpuinfo.org/displayreport.php?id=15142 (12)
+    maxComputeSharedMemorySize = 32768;
+    maxComputeWorkGroupInvocations = 512;
+    maxComputeWorkGroupSize = {512, 512, 512};
+
+    subgroupSize = 16;
+    subgroupFeatures = SubgroupFeature::Basic | SubgroupFeature::Vote |
+                       SubgroupFeature::Arithmetic | SubgroupFeature::Ballot |
+                       SubgroupFeature::Clustered | SubgroupFeature::Quad;
+
+    if (triple.getOS() == TargetTripleOS::Android31) {
+      subgroupFeatures = subgroupFeatures | SubgroupFeature::Shuffle |
+                         SubgroupFeature::ShuffleRelative;
     }
-      LLVM_FALLTHROUGH;
-    case TargetTripleArch::AMD_RDNAv1:
-    case TargetTripleArch::AMD_RDNAv2:
-      // Example: https://vulkan.gpuinfo.org/displayreport.php?id=10906
-      maxComputeSharedMemorySize = 65536;
-      maxComputeWorkGroupInvocations = 1024;
-      maxComputeWorkGroupSize = {1024, 1024, 1024};
 
-      subgroupSize = 64, minSubgroupSize = 32, maxSubgroupSize = 64;
+    shaderFloat16 = shaderInt8 = shaderInt16 = true;
+
+    shaderIntegerDotProduct = true;
+
+    storageBuffer16BitAccess = storagePushConstant16 = true;
+    uniformAndStorageBuffer16BitAccess = true;
+    storageBuffer8BitAccess = true, storagePushConstant8 = true;
+    uniformAndStorageBuffer8BitAccess = true;
+
+    variablePointers = variablePointersStorageBuffer = true;
+    break;
+  case TargetTripleArch::CPU:
+    if (triple.getProduct() == TargetTripleProduct::SwiftShader) {
+      // Example: https://vulkan.gpuinfo.org/displayreport.php?id=11023
+      maxComputeSharedMemorySize = 16384;
+
+      subgroupSize = 4;
       subgroupFeatures = SubgroupFeature::Basic | SubgroupFeature::Vote |
                          SubgroupFeature::Arithmetic | SubgroupFeature::Ballot |
                          SubgroupFeature::Shuffle |
-                         SubgroupFeature::ShuffleRelative |
-                         SubgroupFeature::Clustered | SubgroupFeature::Quad;
+                         SubgroupFeature::ShuffleRelative;
+    }
+    break;
+  case TargetTripleArch::NV_Turing:
+  case TargetTripleArch::NV_Ampere: {
+    // Example: https://vulkan.gpuinfo.org/displayreport.php?id=11252
+    maxComputeSharedMemorySize = 49152;
+    maxComputeWorkGroupInvocations = 1024;
+    maxComputeWorkGroupSize = {1024, 1024, 64};
 
-      shaderFloat16 = shaderFloat64 = true;
-      shaderInt8 = shaderInt16 = shaderInt64 = true;
+    subgroupSize = 32, minSubgroupSize = 32, maxSubgroupSize = 32;
+    subgroupFeatures = SubgroupFeature::Basic | SubgroupFeature::Vote |
+                       SubgroupFeature::Arithmetic | SubgroupFeature::Ballot |
+                       SubgroupFeature::Shuffle |
+                       SubgroupFeature::ShuffleRelative |
+                       SubgroupFeature::Clustered | SubgroupFeature::Quad;
 
-      shaderIntegerDotProduct = true;
+    shaderFloat16 = shaderFloat64 = true;
+    shaderInt8 = shaderInt16 = shaderInt64 = true;
 
-      storageBuffer16BitAccess = storagePushConstant16 = true;
-      uniformAndStorageBuffer16BitAccess = true;
-      storageBuffer8BitAccess = true, storagePushConstant8 = true;
-      uniformAndStorageBuffer8BitAccess = true;
+    shaderIntegerDotProduct = true;
 
-      variablePointers = variablePointersStorageBuffer = true;
-      break;
-    case TargetTripleArch::Apple_M1:
-      // Example: https://vulkan.gpuinfo.org/displayreport.php?id=14673
-      maxComputeSharedMemorySize = 32768;
-      maxComputeWorkGroupInvocations = 1024;
-      maxComputeWorkGroupSize = {1024, 1024, 1024};
+    storageBuffer16BitAccess = storagePushConstant16 = true;
+    uniformAndStorageBuffer16BitAccess = true;
+    storageBuffer8BitAccess = true, storagePushConstant8 = true;
+    uniformAndStorageBuffer8BitAccess = true;
 
-      subgroupSize = 32;
-      subgroupFeatures = SubgroupFeature::Basic | SubgroupFeature::Vote |
-                         SubgroupFeature::Arithmetic | SubgroupFeature::Ballot |
-                         SubgroupFeature::Shuffle |
-                         SubgroupFeature::ShuffleRelative |
-                         SubgroupFeature::Quad;
+    variablePointers = variablePointersStorageBuffer = true;
 
-      shaderFloat16 = true;
-      shaderFloat64 = false;
-      shaderInt8 = shaderInt16 = shaderInt64 = true;
+    auto i8t = builder.getIntegerType(8);
+    auto i32t = builder.getIntegerType(32);
+    auto f16t = builder.getF16Type();
+    auto f32t = builder.getF32Type();
+    auto scope = ScopeNVAttr::get(context, ScopeNV::Subgroup);
 
-      storageBuffer16BitAccess = storagePushConstant16 = true;
-      uniformAndStorageBuffer16BitAccess = true;
-      storageBuffer8BitAccess = true, storagePushConstant8 = true;
-      uniformAndStorageBuffer8BitAccess = true;
+    coopmatCases.push_back(CooperativeMatrixPropertiesNVAttr::get(
+        context,
+        /*mSize=*/8, /*nSize=*/8, /*kSize=*/32, /*aType=*/i8t,
+        /*bType=*/i8t, /*cType=*/i32t, /*resultType=*/i32t, scope));
+    coopmatCases.push_back(CooperativeMatrixPropertiesNVAttr::get(
+        context,
+        /*mSize=*/16, /*nSize=*/16, /*kSize=*/16, /*aType=*/f16t,
+        /*bType=*/f16t, /*cType=*/f16t, /*resultType=*/f16t, scope));
+    coopmatCases.push_back(CooperativeMatrixPropertiesNVAttr::get(
+        context,
+        /*mSize=*/16, /*nSize=*/16, /*kSize=*/16, /*aType=*/f16t,
+        /*bType=*/f16t, /*cType=*/f32t, /*resultType=*/f32t, scope));
+  } break;
+  case TargetTripleArch::NV_Pascal:
+    // Example: https://vulkan.gpuinfo.org/displayreport.php?id=17937
+    maxComputeSharedMemorySize = 49152;
+    maxComputeWorkGroupInvocations = 1536;
+    maxComputeWorkGroupSize = {1536, 1024, 64};
 
-      variablePointers = variablePointersStorageBuffer = true;
-      break;
-    case TargetTripleArch::ARM_Valhall:
-      // Example: https://vulkan.gpuinfo.org/displayreport.php?id=10312 (11)
-      // Example: https://vulkan.gpuinfo.org/displayreport.php?id=15142 (12)
-      maxComputeSharedMemorySize = 32768;
-      maxComputeWorkGroupInvocations = 512;
-      maxComputeWorkGroupSize = {512, 512, 512};
+    subgroupSize = 32, minSubgroupSize = 32, maxSubgroupSize = 32;
+    subgroupFeatures = SubgroupFeature::Basic | SubgroupFeature::Vote |
+                       SubgroupFeature::Arithmetic | SubgroupFeature::Ballot |
+                       SubgroupFeature::Shuffle |
+                       SubgroupFeature::ShuffleRelative |
+                       SubgroupFeature::Clustered | SubgroupFeature::Quad;
 
-      subgroupSize = 16;
-      subgroupFeatures = SubgroupFeature::Basic | SubgroupFeature::Vote |
-                         SubgroupFeature::Arithmetic | SubgroupFeature::Ballot |
-                         SubgroupFeature::Clustered | SubgroupFeature::Quad;
+    shaderFloat16 = shaderFloat64 = true;
+    shaderInt8 = shaderInt16 = shaderInt64 = true;
 
-      if (triple.getOS() == TargetTripleOS::Android31) {
-        subgroupFeatures = subgroupFeatures | SubgroupFeature::Shuffle |
-                           SubgroupFeature::ShuffleRelative;
-      }
+    shaderIntegerDotProduct = true;
 
-      shaderFloat16 = shaderInt8 = shaderInt16 = true;
+    storageBuffer16BitAccess = storagePushConstant16 = true;
+    uniformAndStorageBuffer16BitAccess = true;
+    storageBuffer8BitAccess = true, storagePushConstant8 = true;
+    uniformAndStorageBuffer8BitAccess = true;
 
-      shaderIntegerDotProduct = true;
+    variablePointers = variablePointersStorageBuffer = true;
+    break;
+  case TargetTripleArch::QC_Adreno:
+    // Example: https://vulkan.gpuinfo.org/displayreport.php?id=10983 (11)
+    // Example: https://vulkan.gpuinfo.org/displayreport.php?id=16312 (12)
+    maxComputeSharedMemorySize = 32768;
+    maxComputeWorkGroupInvocations = 1024;
+    maxComputeWorkGroupSize = {1024, 1024, 64};
 
-      storageBuffer16BitAccess = storagePushConstant16 = true;
-      uniformAndStorageBuffer16BitAccess = true;
-      storageBuffer8BitAccess = true, storagePushConstant8 = true;
-      uniformAndStorageBuffer8BitAccess = true;
+    subgroupSize = 64;
+    subgroupFeatures = SubgroupFeature::Basic | SubgroupFeature::Vote |
+                       SubgroupFeature::Arithmetic | SubgroupFeature::Ballot |
+                       SubgroupFeature::Shuffle |
+                       SubgroupFeature::ShuffleRelative | SubgroupFeature::Quad;
 
-      variablePointers = variablePointersStorageBuffer = true;
-      break;
-    case TargetTripleArch::CPU:
-      if (triple.getProduct() == TargetTripleProduct::SwiftShader) {
-        // Example: https://vulkan.gpuinfo.org/displayreport.php?id=11023
-        maxComputeSharedMemorySize = 16384;
+    shaderFloat16 = shaderInt8 = shaderInt16 = true;
 
-        subgroupSize = 4;
-        subgroupFeatures = SubgroupFeature::Basic | SubgroupFeature::Vote |
-                           SubgroupFeature::Arithmetic |
-                           SubgroupFeature::Ballot | SubgroupFeature::Shuffle |
-                           SubgroupFeature::ShuffleRelative;
-      }
-      break;
-    case TargetTripleArch::NV_Turing:
-    case TargetTripleArch::NV_Ampere: {
-      // Example: https://vulkan.gpuinfo.org/displayreport.php?id=11252
-      maxComputeSharedMemorySize = 49152;
-      maxComputeWorkGroupInvocations = 1024;
-      maxComputeWorkGroupSize = {1024, 1024, 64};
+    storageBuffer16BitAccess = true;
+    if (triple.getOS() == TargetTripleOS::Android31) {
+      storageBuffer8BitAccess = true;
+    }
 
-      subgroupSize = 32, minSubgroupSize = 32, maxSubgroupSize = 32;
-      subgroupFeatures = SubgroupFeature::Basic | SubgroupFeature::Vote |
-                         SubgroupFeature::Arithmetic | SubgroupFeature::Ballot |
-                         SubgroupFeature::Shuffle |
-                         SubgroupFeature::ShuffleRelative |
-                         SubgroupFeature::Clustered | SubgroupFeature::Quad;
+    variablePointers = variablePointersStorageBuffer = true;
+    break;
+  case TargetTripleArch::Intel_Arc:
+    // Example: https://vulkan.gpuinfo.org/displayreport.php?id=19818
+    maxComputeSharedMemorySize = 32768;
+    maxComputeWorkGroupInvocations = 1024;
+    maxComputeWorkGroupSize = {1024, 1024, 64};
 
-      shaderFloat16 = shaderFloat64 = true;
-      shaderInt8 = shaderInt16 = shaderInt64 = true;
+    subgroupSize = 32, minSubgroupSize = 8, maxSubgroupSize = 32;
+    subgroupFeatures = SubgroupFeature::Basic | SubgroupFeature::Vote |
+                       SubgroupFeature::Arithmetic | SubgroupFeature::Ballot |
+                       SubgroupFeature::Shuffle |
+                       SubgroupFeature::ShuffleRelative |
+                       SubgroupFeature::Clustered | SubgroupFeature::Quad;
 
-      shaderIntegerDotProduct = true;
+    shaderFloat16 = true;
+    shaderFloat64 = false;
+    shaderInt8 = shaderInt16 = true;
+    shaderInt64 = false;
 
-      storageBuffer16BitAccess = storagePushConstant16 = true;
-      uniformAndStorageBuffer16BitAccess = true;
-      storageBuffer8BitAccess = true, storagePushConstant8 = true;
-      uniformAndStorageBuffer8BitAccess = true;
+    shaderIntegerDotProduct = true;
 
-      variablePointers = variablePointersStorageBuffer = true;
+    storageBuffer16BitAccess = storagePushConstant16 = true;
+    uniformAndStorageBuffer16BitAccess = true;
+    storageBuffer8BitAccess = true, storagePushConstant8 = true;
+    uniformAndStorageBuffer8BitAccess = true;
 
-      auto i8t = builder.getIntegerType(8);
-      auto i32t = builder.getIntegerType(32);
-      auto f16t = builder.getF16Type();
-      auto f32t = builder.getF32Type();
-      auto scope = ScopeNVAttr::get(context, ScopeNV::Subgroup);
-
-      coopmatCases.push_back(CooperativeMatrixPropertiesNVAttr::get(
-          context,
-          /*mSize=*/8, /*nSize=*/8, /*kSize=*/32, /*aType=*/i8t,
-          /*bType=*/i8t, /*cType=*/i32t, /*resultType=*/i32t, scope));
-      coopmatCases.push_back(CooperativeMatrixPropertiesNVAttr::get(
-          context,
-          /*mSize=*/16, /*nSize=*/16, /*kSize=*/16, /*aType=*/f16t,
-          /*bType=*/f16t, /*cType=*/f16t, /*resultType=*/f16t, scope));
-      coopmatCases.push_back(CooperativeMatrixPropertiesNVAttr::get(
-          context,
-          /*mSize=*/16, /*nSize=*/16, /*kSize=*/16, /*aType=*/f16t,
-          /*bType=*/f16t, /*cType=*/f32t, /*resultType=*/f32t, scope));
-    } break;
-    case TargetTripleArch::NV_Pascal:
-      // Example: https://vulkan.gpuinfo.org/displayreport.php?id=17937
-      maxComputeSharedMemorySize = 49152;
-      maxComputeWorkGroupInvocations = 1536;
-      maxComputeWorkGroupSize = {1536, 1024, 64};
-
-      subgroupSize = 32, minSubgroupSize = 32, maxSubgroupSize = 32;
-      subgroupFeatures = SubgroupFeature::Basic | SubgroupFeature::Vote |
-                         SubgroupFeature::Arithmetic | SubgroupFeature::Ballot |
-                         SubgroupFeature::Shuffle |
-                         SubgroupFeature::ShuffleRelative |
-                         SubgroupFeature::Clustered | SubgroupFeature::Quad;
-
-      shaderFloat16 = shaderFloat64 = true;
-      shaderInt8 = shaderInt16 = shaderInt64 = true;
-
-      shaderIntegerDotProduct = true;
-
-      storageBuffer16BitAccess = storagePushConstant16 = true;
-      uniformAndStorageBuffer16BitAccess = true;
-      storageBuffer8BitAccess = true, storagePushConstant8 = true;
-      uniformAndStorageBuffer8BitAccess = true;
-
-      variablePointers = variablePointersStorageBuffer = true;
-      break;
-    case TargetTripleArch::QC_Adreno:
-      // Example: https://vulkan.gpuinfo.org/displayreport.php?id=10983 (11)
-      // Example: https://vulkan.gpuinfo.org/displayreport.php?id=16312 (12)
-      maxComputeSharedMemorySize = 32768;
-      maxComputeWorkGroupInvocations = 1024;
-      maxComputeWorkGroupSize = {1024, 1024, 64};
-
-      subgroupSize = 64;
-      subgroupFeatures = SubgroupFeature::Basic | SubgroupFeature::Vote |
-                         SubgroupFeature::Arithmetic | SubgroupFeature::Ballot |
-                         SubgroupFeature::Shuffle |
-                         SubgroupFeature::ShuffleRelative |
-                         SubgroupFeature::Quad;
-
-      shaderFloat16 = shaderInt8 = shaderInt16 = true;
-
-      storageBuffer16BitAccess = true;
-      if (triple.getOS() == TargetTripleOS::Android31) {
-        storageBuffer8BitAccess = true;
-      }
-
-      variablePointers = variablePointersStorageBuffer = true;
-      break;
-    case TargetTripleArch::Intel_Arc:
-      // Example: https://vulkan.gpuinfo.org/displayreport.php?id=19818
-      maxComputeSharedMemorySize = 32768;
-      maxComputeWorkGroupInvocations = 1024;
-      maxComputeWorkGroupSize = {1024, 1024, 64};
-
-      subgroupSize = 32, minSubgroupSize = 8, maxSubgroupSize = 32;
-      subgroupFeatures = SubgroupFeature::Basic | SubgroupFeature::Vote |
-                         SubgroupFeature::Arithmetic | SubgroupFeature::Ballot |
-                         SubgroupFeature::Shuffle |
-                         SubgroupFeature::ShuffleRelative |
-                         SubgroupFeature::Clustered | SubgroupFeature::Quad;
-
-      shaderFloat16 = true;
-      shaderFloat64 = false;
-      shaderInt8 = shaderInt16 = true;
-      shaderInt64 = false;
-
-      shaderIntegerDotProduct = true;
-
-      storageBuffer16BitAccess = storagePushConstant16 = true;
-      uniformAndStorageBuffer16BitAccess = true;
-      storageBuffer8BitAccess = true, storagePushConstant8 = true;
-      uniformAndStorageBuffer8BitAccess = true;
-
-      variablePointers = variablePointersStorageBuffer = true;
-      break;
-    case TargetTripleArch::Unknown:
-      // Use the largest subgroup size we can find across various vendors.
-      subgroupSize = 64;
-      // The following capabilities have 90%+ device coverage (Vulkan 1.1+)
-      // from https://vulkan.gpuinfo.org/listfeaturesextensions.php.
-      variablePointers = variablePointersStorageBuffer = false;
-      // Use Vulkan default for others.
-      break;
+    variablePointers = variablePointersStorageBuffer = true;
+    break;
+  case TargetTripleArch::Unknown:
+    // Use the largest subgroup size we can find across various vendors.
+    subgroupSize = 64;
+    // The following capabilities have 90%+ device coverage (Vulkan 1.1+)
+    // from https://vulkan.gpuinfo.org/listfeaturesextensions.php.
+    variablePointers = variablePointersStorageBuffer = false;
+    // Use Vulkan default for others.
+    break;
   }
 
   auto getBoolAttr = [context](bool value) -> UnitAttr {
@@ -495,7 +493,7 @@
       getBoolAttr(variablePointersStorageBuffer), getBoolAttr(variablePointers),
       builder.getArrayAttr(coopmatCases));
 }
-}  // namespace
+} // namespace
 
 TargetTriple TargetTriple::get(const char *triple) {
   llvm::SmallVector<llvm::StringRef, 3> fragments;
@@ -535,7 +533,7 @@
                             getCapabilities(*this, context));
 }
 
-}  // namespace Vulkan
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Vulkan
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/Vulkan/Utils/TargetTriple.h b/compiler/src/iree/compiler/Dialect/Vulkan/Utils/TargetTriple.h
index 4171a86..d6e699f 100644
--- a/compiler/src/iree/compiler/Dialect/Vulkan/Utils/TargetTriple.h
+++ b/compiler/src/iree/compiler/Dialect/Vulkan/Utils/TargetTriple.h

@@ -45,7 +45,7 @@
 /// We need a field for product to differentiate the cases where the
 /// architecture is unknown or ambiguous.
 class TargetTriple {
- public:
+public:
   static TargetTriple get(const char *triple);
 
   TargetTriple(TargetTripleArch, TargetTripleProduct, TargetTripleOS);
@@ -59,15 +59,15 @@
 
   TargetEnvAttr getTargetEnv(MLIRContext *context) const;
 
- private:
+private:
   TargetTripleArch arch;
   TargetTripleProduct product;
   TargetTripleOS os;
 };
 
-}  // namespace Vulkan
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Vulkan
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_DIALECT_VULKAN_UTILS_TARGETTRIPLE_H_
+#endif // IREE_COMPILER_DIALECT_VULKAN_UTILS_TARGETTRIPLE_H_

diff --git a/compiler/src/iree/compiler/InputConversion/Common/AutoInputConversionPipeline.cpp b/compiler/src/iree/compiler/InputConversion/Common/AutoInputConversionPipeline.cpp
index be7a9b7..4f0766f 100644
--- a/compiler/src/iree/compiler/InputConversion/Common/AutoInputConversionPipeline.cpp
+++ b/compiler/src/iree/compiler/InputConversion/Common/AutoInputConversionPipeline.cpp

@@ -17,24 +17,24 @@
 #ifdef IREE_HAVE_STABLEHLO_INPUT
 #include "iree/compiler/InputConversion/StableHLO/Passes.h"
 #include "stablehlo/dialect/StablehloOps.h"
-#endif  // IREE_HAVE_STABLEHLO_INPUT
+#endif // IREE_HAVE_STABLEHLO_INPUT
 #ifdef IREE_HAVE_TOSA_INPUT
 #include "iree/compiler/InputConversion/TOSA/Passes.h"
-#endif  // IREE_HAVE_TOSA_INPUT
+#endif // IREE_HAVE_TOSA_INPUT
 #ifdef IREE_HAVE_TORCH_INPUT
 #include "iree/compiler/InputConversion/TMTensor/Passes.h"
 #include "torch-mlir-dialects/Dialect/TMTensor/IR/TMTensorDialect.h"
-#endif  // IREE_HAVE_TORCH_INPUT
+#endif // IREE_HAVE_TORCH_INPUT
 
 namespace mlir::iree_compiler {
 namespace {
 struct AutoInputConversionPipelinePass final
     : AutoInputConversionPipelineBase<AutoInputConversionPipelinePass> {
   AutoInputConversionPipelinePass(
-      const AutoInputConversionPipelineOptions& inputOptions)
+      const AutoInputConversionPipelineOptions &inputOptions)
       : options(inputOptions) {}
   void runOnOperation() override;
-  void getDependentDialects(DialectRegistry& registry) const override;
+  void getDependentDialects(DialectRegistry &registry) const override;
 
   AutoInputConversionPipelineOptions options;
 };
@@ -53,7 +53,7 @@
   bool hasTmTensor = false;
 };
 
-static void populateHloFeatures(Operation* op, InputFeatures& features) {
+static void populateHloFeatures(Operation *op, InputFeatures &features) {
   if (features.hasTuples) {
     return;
   }
@@ -89,11 +89,11 @@
   }
 }
 
-static void populateFeatures(Operation* op, const Dialect* stablehloDialect,
-                             const Dialect* tmTensorDialect,
-                             const Dialect* tosaDialect,
-                             InputFeatures& features) {
-  Dialect* d = op->getDialect();
+static void populateFeatures(Operation *op, const Dialect *stablehloDialect,
+                             const Dialect *tmTensorDialect,
+                             const Dialect *tosaDialect,
+                             InputFeatures &features) {
+  Dialect *d = op->getDialect();
   if (d == stablehloDialect) {
     features.hasStableHLO = true;
     return populateHloFeatures(op, features);
@@ -110,17 +110,17 @@
 
 void AutoInputConversionPipelinePass::runOnOperation() {
   ModuleOp module = getOperation();
-  MLIRContext* ctxt = &getContext();
+  MLIRContext *ctxt = &getContext();
 
   InputFeatures features;
-  const Dialect* stablehloDialect = ctxt->getLoadedDialect("stablehlo");
-  const Dialect* tosaDialect = ctxt->getLoadedDialect("tosa");
-  const Dialect* tmTensorDialect = ctxt->getLoadedDialect("tm_tensor");
+  const Dialect *stablehloDialect = ctxt->getLoadedDialect("stablehlo");
+  const Dialect *tosaDialect = ctxt->getLoadedDialect("tosa");
+  const Dialect *tmTensorDialect = ctxt->getLoadedDialect("tm_tensor");
   if (!stablehloDialect && !tosaDialect && !tmTensorDialect) {
     return;
   }
 
-  auto res = module.walk([&](Operation* op) {
+  auto res = module.walk([&](Operation *op) {
     populateFeatures(op, stablehloDialect, tmTensorDialect, tosaDialect,
                      features);
     if (features.hasStableHLO && features.hasTOSA) {
@@ -158,18 +158,18 @@
       stablehlo::buildStableHLOInputConversionPassPipeline(pm, options);
     }
   }
-#endif  // IREE_HAVE_STABLEHLO_INPUT
+#endif // IREE_HAVE_STABLEHLO_INPUT
 #ifdef IREE_HAVE_TOSA_INPUT
   if (features.hasTOSA) {
     buildTOSAInputConversionPassPipeline(pm);
   }
-#endif  // IREE_HAVE_TOSA_INPUT
+#endif // IREE_HAVE_TOSA_INPUT
 #ifdef IREE_HAVE_TORCH_INPUT
   if (features.hasTmTensor) {
     pm.addNestedPass<func::FuncOp>(
         TMTensor::createConvertTMTensorToLinalgExtPass());
   }
-#endif  // IREE_HAVE_TORCH_INPUT
+#endif // IREE_HAVE_TORCH_INPUT
 
   if (failed(runPipeline(pm, module))) {
     signalPassFailure();
@@ -177,7 +177,7 @@
 }
 
 void AutoInputConversionPipelinePass::getDependentDialects(
-    DialectRegistry& registry) const {
+    DialectRegistry &registry) const {
   // Register dialects from all possible pipelines, as we do not statically know
   // which pipeline will be selected, while dialect registration happens before
   // we run any detection on the input.
@@ -185,7 +185,7 @@
   // TODO(kuhar): Find a better registration mechanism so that we do not have to
   // build pipelines just to query dialects and discard them immediately after.
   auto appendPipelineDialects =
-      [&registry](function_ref<void(OpPassManager&)> buildFn) {
+      [&registry](function_ref<void(OpPassManager &)> buildFn) {
         OpPassManager pm;
         buildFn(pm);
         pm.getDependentDialects(registry);
@@ -193,8 +193,8 @@
 
 #ifdef IREE_HAVE_STABLEHLO_INPUT
   auto appendStablehloPipelineDialects =
-      [&registry](function_ref<void(OpPassManager&,
-                                    const stablehlo::StableHloOptions& options)>
+      [&registry](function_ref<void(OpPassManager &,
+                                    const stablehlo::StableHloOptions &options)>
                       buildFn) {
         const stablehlo::StableHloOptions options;
         OpPassManager pm;
@@ -206,20 +206,20 @@
       stablehlo::buildStableHLOInputConversionPassPipeline);
   appendStablehloPipelineDialects(
       stablehlo::buildStableHLOXLAInputConversionPassPipeline);
-#endif  // IREE_HAVE_STABLEHLO_INPUT
+#endif // IREE_HAVE_STABLEHLO_INPUT
 
 #ifdef IREE_HAVE_TOSA_INPUT
   appendPipelineDialects(buildTOSAInputConversionPassPipeline);
-#endif  // IREE_HAVE_TOSA_INPUT
+#endif // IREE_HAVE_TOSA_INPUT
 
 #ifdef IREE_HAVE_TORCH_INPUT
-  appendPipelineDialects([](OpPassManager& pm) {
+  appendPipelineDialects([](OpPassManager &pm) {
     pm.addNestedPass<func::FuncOp>(
         TMTensor::createConvertTMTensorToLinalgExtPass());
   });
-#endif  // IREE_HAVE_TORCH_INPUT
+#endif // IREE_HAVE_TORCH_INPUT
 }
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<ModuleOp>>
 createAutoInputConversionPipelinePass() {
@@ -228,8 +228,8 @@
 }
 
 std::unique_ptr<OperationPass<ModuleOp>> createAutoInputConversionPipelinePass(
-    const AutoInputConversionPipelineOptions& options) {
+    const AutoInputConversionPipelineOptions &options) {
   return std::make_unique<AutoInputConversionPipelinePass>(options);
 }
 
-}  // namespace mlir::iree_compiler
+} // namespace mlir::iree_compiler

diff --git a/compiler/src/iree/compiler/InputConversion/Common/IREEImportPublic.cpp b/compiler/src/iree/compiler/InputConversion/Common/IREEImportPublic.cpp
index 5c8c5e5..51d57ca 100644
--- a/compiler/src/iree/compiler/InputConversion/Common/IREEImportPublic.cpp
+++ b/compiler/src/iree/compiler/InputConversion/Common/IREEImportPublic.cpp

@@ -49,22 +49,22 @@
 };
 
 class IREETypeConverter : public TypeConverter {
- public:
+public:
   IREETypeConverter();
 };
 
 // Generic 1:1 conversion pattern which effectively just renames an op.
 // It does not support regions or ops with successors.
 class OneToOneConverionPattern : public ConversionPattern {
- public:
+public:
   OneToOneConverionPattern(TypeConverter &converter, StringRef srcName,
                            StringRef targetName, MLIRContext *context,
                            PatternBenefit benefit)
       : ConversionPattern(converter, srcName, benefit, context),
         targetName(targetName) {}
-  LogicalResult matchAndRewrite(
-      Operation *srcOp, ArrayRef<Value> operands,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(Operation *srcOp, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
     SmallVector<Type> resultTypes;
     if (failed(typeConverter->convertTypes(srcOp->getResultTypes(),
                                            resultTypes))) {
@@ -79,7 +79,7 @@
     return success();
   }
 
- private:
+private:
   StringRef targetName;
 };
 
@@ -87,12 +87,13 @@
     : public OpConversionPattern<IREE::Input::BufferViewToTensorOp> {
   using OpConversionPattern<
       IREE::Input::BufferViewToTensorOp>::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Input::BufferViewToTensorOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Input::BufferViewToTensorOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     TensorType resultType = llvm::dyn_cast_if_present<TensorType>(
         typeConverter->convertType(srcOp.getTarget().getType()));
-    if (!resultType) return failure();
+    if (!resultType)
+      return failure();
     if (adaptor.getTargetDims().empty() && !resultType.hasStaticShape()) {
       // For the input dialect, we allow ops that don't have their dims
       // specified and we reify them here with the specific builder that does
@@ -115,12 +116,13 @@
     : public OpConversionPattern<IREE::Input::TensorToBufferViewOp> {
   using OpConversionPattern<
       IREE::Input::TensorToBufferViewOp>::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Input::TensorToBufferViewOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Input::TensorToBufferViewOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Type resultType = typeConverter->convertType(srcOp.getTarget().getType());
     auto sourceType = llvm::dyn_cast<TensorType>(adaptor.getSource().getType());
-    if (!resultType || !sourceType) return failure();
+    if (!resultType || !sourceType)
+      return failure();
     if (adaptor.getSourceDims().empty() && !sourceType.hasStaticShape()) {
       // For the input dialect, we allow ops that don't have their dims
       // specified and we reify them here with the specific builder that does
@@ -142,9 +144,9 @@
 
 class BuiltinFuncOpPattern : public OpConversionPattern<func::FuncOp> {
   using OpConversionPattern<func::FuncOp>::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      func::FuncOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(func::FuncOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     FunctionType srcFuncType = srcOp.getFunctionType();
     TypeConverter::SignatureConversion signatureConversion(
         srcOp.getNumArguments());
@@ -201,11 +203,12 @@
 
 class GlobalOpPattern : public OpConversionPattern<IREE::Input::GlobalOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Input::GlobalOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Input::GlobalOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Type newType = typeConverter->convertType(srcOp.getType());
-    if (!newType) return failure();
+    if (!newType)
+      return failure();
     auto globalOp = rewriter.replaceOpWithNewOp<IREE::Util::GlobalOp>(
         srcOp, srcOp.getName(), srcOp.getIsMutable(), newType,
         srcOp.getInitialValue());
@@ -228,13 +231,13 @@
 
 // Matches any op and generically converts types. Matches with benefit 0.
 class GenericTypeConvert : public ConversionPattern {
- public:
+public:
   GenericTypeConvert(TypeConverter &converter, MLIRContext *context,
                      PatternBenefit benefit)
       : ConversionPattern(converter, MatchAnyOpTypeTag(), benefit, context) {}
-  LogicalResult matchAndRewrite(
-      Operation *op, ArrayRef<Value> operands,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
     llvm::SmallVector<NamedAttribute> newAttr;
     llvm::append_range(newAttr, op->getAttrs());
     llvm::SmallVector<Type> newResults;
@@ -255,7 +258,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 IREETypeConverter::IREETypeConverter() {
   addConversion([](Type t) { return t; });
@@ -264,12 +267,14 @@
   });
   addConversion([=](IREE::Input::ListType t) -> IREE::Util::ListType {
     auto subType = convertType(t.getElementType());
-    if (!subType) return nullptr;
+    if (!subType)
+      return nullptr;
     return IREE::Util::ListType::get(subType);
   });
   addConversion([=](IREE::Input::PtrType t) -> IREE::Util::PtrType {
     auto subType = convertType(t.getTargetType());
-    if (!subType) return nullptr;
+    if (!subType)
+      return nullptr;
     return IREE::Util::PtrType::get(subType);
   });
   addConversion([](IREE::Input::VariantType t) {
@@ -292,24 +297,29 @@
   };
   auto isLegallyTypedOp = [&](Operation *op) -> bool {
     for (Type type : op->getResultTypes()) {
-      if (isIllegalType(type)) return false;
+      if (isIllegalType(type))
+        return false;
     }
     for (Type type : op->getOperandTypes()) {
-      if (isIllegalType(type)) return false;
+      if (isIllegalType(type))
+        return false;
     }
     return true;
   };
 
   target.addDynamicallyLegalOp<func::FuncOp>([&](func::FuncOp funcOp) {
     for (Type type : funcOp.getFunctionType().getInputs()) {
-      if (isIllegalType(type)) return false;
+      if (isIllegalType(type))
+        return false;
     }
     for (Type type : funcOp.getFunctionType().getResults()) {
-      if (isIllegalType(type)) return false;
+      if (isIllegalType(type))
+        return false;
     }
     for (Block &block : funcOp.getFunctionBody()) {
       for (Type type : block.getArgumentTypes()) {
-        if (isIllegalType(type)) return false;
+        if (isIllegalType(type))
+          return false;
       }
     }
     return true;
@@ -327,9 +337,9 @@
                                              specific_benefit);
   patterns.insert<GlobalOpPattern>(typeConverter, &getContext(), 0);
 
-#define ONE_TO_ONE(SrcOpTy, TargetOpTy)           \
-  patterns.insert<OneToOneConverionPattern>(      \
-      typeConverter, SrcOpTy::getOperationName(), \
+#define ONE_TO_ONE(SrcOpTy, TargetOpTy)                                        \
+  patterns.insert<OneToOneConverionPattern>(                                   \
+      typeConverter, SrcOpTy::getOperationName(),                              \
       TargetOpTy::getOperationName(), &getContext(), specific_benefit)
 
   ONE_TO_ONE(IREE::Input::BufferViewRankOp, IREE::HAL::BufferViewRankOp);
@@ -364,5 +374,5 @@
   return std::make_unique<IREEImportPublicPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/InputConversion/Common/ImportMLProgram.cpp b/compiler/src/iree/compiler/InputConversion/Common/ImportMLProgram.cpp
index 9cbfcd1..31dbce3 100644
--- a/compiler/src/iree/compiler/InputConversion/Common/ImportMLProgram.cpp
+++ b/compiler/src/iree/compiler/InputConversion/Common/ImportMLProgram.cpp

@@ -36,22 +36,22 @@
 };
 
 class IREETypeConverter : public TypeConverter {
- public:
+public:
   IREETypeConverter();
 };
 
 // Generic 1:1 conversion pattern which effectively just renames an op.
 // It does not support regions or ops with successors.
 class OneToOneConversionPattern : public ConversionPattern {
- public:
+public:
   OneToOneConversionPattern(TypeConverter &converter, StringRef srcName,
                             StringRef targetName, MLIRContext *context,
                             PatternBenefit benefit)
       : ConversionPattern(converter, srcName, benefit, context),
         targetName(targetName) {}
-  LogicalResult matchAndRewrite(
-      Operation *srcOp, ArrayRef<Value> operands,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(Operation *srcOp, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
     SmallVector<Type> resultTypes;
     if (failed(typeConverter->convertTypes(srcOp->getResultTypes(),
                                            resultTypes))) {
@@ -66,7 +66,7 @@
     return success();
   }
 
- private:
+private:
   StringRef targetName;
 };
 
@@ -80,7 +80,7 @@
 
 class MLProgramGlobalOpPattern
     : public OpConversionPattern<ml_program::GlobalOp> {
- public:
+public:
   MLProgramGlobalOpPattern(TypeConverter &typeConverter, MLIRContext *context,
                            PatternBenefit benefit,
                            SmallVector<GlobalComponents> &externGlobals)
@@ -88,11 +88,12 @@
                                                   benefit),
         externGlobals(externGlobals) {}
 
-  LogicalResult matchAndRewrite(
-      ml_program::GlobalOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(ml_program::GlobalOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Type newType = typeConverter->convertType(srcOp.getType());
-    if (!newType) return failure();
+    if (!newType)
+      return failure();
 
     std::map<StringRef, ml_program::ExternAttr> externs;
 
@@ -110,10 +111,12 @@
         srcOp, srcOp.getName(), isMutable | isExtern, newType, srcOpTypedAttr);
     globalOp.setVisibility(SymbolTable::Visibility::Private);
 
-    if (isExtern) externGlobals.emplace_back(srcOp.getName(), newType);
+    if (isExtern)
+      externGlobals.emplace_back(srcOp.getName(), newType);
 
     // No more work needed if not public global.
-    if (visibility != SymbolTable::Visibility::Public) return success();
+    if (visibility != SymbolTable::Visibility::Public)
+      return success();
 
     ModuleOp module = srcOp->getParentOfType<ModuleOp>();
 
@@ -133,9 +136,12 @@
       StringRef s = format;
       // Verify only single replacement of 0th index.
       s = s.drop_until([](char c) { return c == '{'; });
-      if (s.empty() || !s.consume_front("{")) return failure();
-      if (!s.consume_front("0")) return failure();
-      if (!s.consume_front("}")) return failure();
+      if (s.empty() || !s.consume_front("{"))
+        return failure();
+      if (!s.consume_front("0"))
+        return failure();
+      if (!s.consume_front("}"))
+        return failure();
       s = s.drop_until([](char c) { return c == '{'; });
       return success(s.empty());
     };
@@ -147,14 +153,16 @@
         v ? llvm::dyn_cast_if_present<StringAttr>(v.get("get")) : nullptr;
     {
       const std::string getFormat = get ? get.str() : "global${0}$get";
-      if (failed(verifyFormat(getFormat))) return failure();
+      if (failed(verifyFormat(getFormat)))
+        return failure();
       getterName = llvm::formatv(getFormat.c_str(), globalOp.getSymName());
     }
     auto set =
         v ? llvm::dyn_cast_if_present<StringAttr>(v.get("set")) : nullptr;
     {
       const std::string setFormat = set ? set.str() : "global${0}$set";
-      if (failed(verifyFormat(setFormat))) return failure();
+      if (failed(verifyFormat(setFormat)))
+        return failure();
       setterName = llvm::formatv(setFormat.c_str(), globalOp.getSymName());
     }
 
@@ -190,8 +198,9 @@
   SmallVector<GlobalComponents> &externGlobals;
 };
 
-LogicalResult createExternInitFunction(
-    ModuleOp module, SmallVector<GlobalComponents> &externGlobals) {
+LogicalResult
+createExternInitFunction(ModuleOp module,
+                         SmallVector<GlobalComponents> &externGlobals) {
   std::sort(externGlobals.begin(), externGlobals.end(),
             [](const GlobalComponents &lhs, const GlobalComponents &rhs) {
               return lhs.name < rhs.name;
@@ -219,7 +228,7 @@
   return success();
 }
 
-}  // namespace
+} // namespace
 
 IREETypeConverter::IREETypeConverter() {
   addConversion([](Type t) { return t; });
@@ -239,9 +248,9 @@
                                             externGlobals);
 
   PatternBenefit specific_benefit = 100;
-#define ONE_TO_ONE(SrcOpTy, TargetOpTy)           \
-  patterns.insert<OneToOneConversionPattern>(     \
-      typeConverter, SrcOpTy::getOperationName(), \
+#define ONE_TO_ONE(SrcOpTy, TargetOpTy)                                        \
+  patterns.insert<OneToOneConversionPattern>(                                  \
+      typeConverter, SrcOpTy::getOperationName(),                              \
       TargetOpTy::getOperationName(), &context, specific_benefit)
 
   ONE_TO_ONE(ml_program::GlobalLoadOp, IREE::Util::GlobalLoadOp);
@@ -260,5 +269,5 @@
   return std::make_unique<ImportMLProgramPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/InputConversion/Common/PassDetail.h b/compiler/src/iree/compiler/InputConversion/Common/PassDetail.h
index 5dfb26f..226e20a 100644
--- a/compiler/src/iree/compiler/InputConversion/Common/PassDetail.h
+++ b/compiler/src/iree/compiler/InputConversion/Common/PassDetail.h

@@ -17,7 +17,7 @@
 #define GEN_PASS_CLASSES
 #include "iree/compiler/InputConversion/Common/Passes.h.inc"
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_INPUTCONVERSION_COMMON_PASSDETAIL_H_
+#endif // IREE_COMPILER_INPUTCONVERSION_COMMON_PASSDETAIL_H_

diff --git a/compiler/src/iree/compiler/InputConversion/Common/Passes.cpp b/compiler/src/iree/compiler/InputConversion/Common/Passes.cpp
index 83a26bc..4d2ad8d 100644
--- a/compiler/src/iree/compiler/InputConversion/Common/Passes.cpp
+++ b/compiler/src/iree/compiler/InputConversion/Common/Passes.cpp

@@ -16,8 +16,8 @@
 
 namespace {
 #define GEN_PASS_REGISTRATION
-#include "iree/compiler/InputConversion/Common/Passes.h.inc"  // IWYU pragma: export
-}  // namespace
+#include "iree/compiler/InputConversion/Common/Passes.h.inc" // IWYU pragma: export
+} // namespace
 
 void buildCommonInputConversionPassPipeline(OpPassManager &passManager) {
   passManager.addPass(createIREEImportPublicPass());
@@ -37,5 +37,5 @@
       });
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/InputConversion/Common/Passes.h b/compiler/src/iree/compiler/InputConversion/Common/Passes.h
index 797a29c..edbfcb7 100644
--- a/compiler/src/iree/compiler/InputConversion/Common/Passes.h
+++ b/compiler/src/iree/compiler/InputConversion/Common/Passes.h

@@ -33,7 +33,7 @@
 std::unique_ptr<OperationPass<ModuleOp>>
 createAutoInputConversionPipelinePass();
 std::unique_ptr<OperationPass<ModuleOp>> createAutoInputConversionPipelinePass(
-    const AutoInputConversionPipelineOptions& options);
+    const AutoInputConversionPipelineOptions &options);
 std::unique_ptr<OperationPass<ModuleOp>> createIREEImportPublicPass();
 std::unique_ptr<OperationPass<ModuleOp>> createImportMLProgramPass();
 std::unique_ptr<OperationPass<func::FuncOp>>
@@ -49,7 +49,7 @@
 
 void registerCommonInputConversionPasses();
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_INPUTCONVERSION_COMMON_PASSES_H_
+#endif // IREE_COMPILER_INPUTCONVERSION_COMMON_PASSES_H_

diff --git a/compiler/src/iree/compiler/InputConversion/Common/QuantizedConvToConv.cpp b/compiler/src/iree/compiler/InputConversion/Common/QuantizedConvToConv.cpp
index 4cd7cef..4416ce2 100644
--- a/compiler/src/iree/compiler/InputConversion/Common/QuantizedConvToConv.cpp
+++ b/compiler/src/iree/compiler/InputConversion/Common/QuantizedConvToConv.cpp

@@ -353,12 +353,12 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>>
 createLinalgQuantizedConvToConvPass() {
   return std::make_unique<LinalgQuantizedConvToConvPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/InputConversion/Common/QuantizedMatmulToMatmul.cpp b/compiler/src/iree/compiler/InputConversion/Common/QuantizedMatmulToMatmul.cpp
index 4b5f6c6..d91942e 100644
--- a/compiler/src/iree/compiler/InputConversion/Common/QuantizedMatmulToMatmul.cpp
+++ b/compiler/src/iree/compiler/InputConversion/Common/QuantizedMatmulToMatmul.cpp

@@ -169,12 +169,12 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>>
 createLinalgQuantizedMatmulToMatmulPass() {
   return std::make_unique<LinalgQuantizedMatmulToMatmulPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/InputConversion/Common/SanitizeModuleNames.cpp b/compiler/src/iree/compiler/InputConversion/Common/SanitizeModuleNames.cpp
index fec3905..ad6116f 100644
--- a/compiler/src/iree/compiler/InputConversion/Common/SanitizeModuleNames.cpp
+++ b/compiler/src/iree/compiler/InputConversion/Common/SanitizeModuleNames.cpp

@@ -28,18 +28,19 @@
 
     auto moduleOp = getOperation();
     auto optionalName = moduleOp.getName();
-    if (!optionalName.has_value()) return;
+    if (!optionalName.has_value())
+      return;
     auto name = optionalName.value();
 
     moduleOp.setName(sanitizeSymbolName(name));
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<ModuleOp>> createSanitizeModuleNamesPass() {
   return std::make_unique<SanitizeModuleNamesPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/InputConversion/Common/TopLevelSCFToCFG.cpp b/compiler/src/iree/compiler/InputConversion/Common/TopLevelSCFToCFG.cpp
index e01a3f6..264382d 100644
--- a/compiler/src/iree/compiler/InputConversion/Common/TopLevelSCFToCFG.cpp
+++ b/compiler/src/iree/compiler/InputConversion/Common/TopLevelSCFToCFG.cpp

@@ -23,7 +23,7 @@
   void runOnOperation() override;
 };
 
-}  // namespace
+} // namespace
 
 void TopLevelSCFToCFGPass::runOnOperation() {
   RewritePatternSet patterns(&getContext());
@@ -50,5 +50,5 @@
   return std::make_unique<TopLevelSCFToCFGPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/InputConversion/Common/Utils.cpp b/compiler/src/iree/compiler/InputConversion/Common/Utils.cpp
index 99304e1..83c663f 100644
--- a/compiler/src/iree/compiler/InputConversion/Common/Utils.cpp
+++ b/compiler/src/iree/compiler/InputConversion/Common/Utils.cpp

@@ -30,7 +30,8 @@
   llvm::SmallVector<int64_t> staticSizes;
   SmallVector<Value> dynSizes;
   for (int i = 0, s = is_reduction.size(); i < s; i++) {
-    if (is_reduction[i]) continue;
+    if (is_reduction[i])
+      continue;
 
     staticSizes.push_back(ty.getDimSize(i));
     if (ty.isDynamicDim(i)) {
@@ -83,5 +84,5 @@
       .getResult(0);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/InputConversion/Common/Utils.h b/compiler/src/iree/compiler/InputConversion/Common/Utils.h
index 8f4623f..aa0862d 100644
--- a/compiler/src/iree/compiler/InputConversion/Common/Utils.h
+++ b/compiler/src/iree/compiler/InputConversion/Common/Utils.h

@@ -13,5 +13,5 @@
 Value sumReduceDimensionSubset(ImplicitLocOpBuilder &rewriter, Value val,
                                Type accETy, ArrayRef<bool> is_reduction);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/ConvertCollectives.cpp b/compiler/src/iree/compiler/InputConversion/StableHLO/ConvertCollectives.cpp
index f1f62d6..7f15ead 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/ConvertCollectives.cpp
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/ConvertCollectives.cpp

@@ -112,8 +112,8 @@
   return std::nullopt;
 }
 
-static IREE::Flow::CollectiveElementTypeAttr getCollectiveElementTypeAttr(
-    MLIRContext *context, RankedTensorType type) {
+static IREE::Flow::CollectiveElementTypeAttr
+getCollectiveElementTypeAttr(MLIRContext *context, RankedTensorType type) {
   std::optional<IREE::Flow::CollectiveElementType> collectiveElemType =
       convertToFlowCollectiveElementType(type.getElementType());
   if (!collectiveElemType) {
@@ -156,7 +156,8 @@
                                                     OpBuilder &builder) {
   IndexSet indexSet(loc, builder);
   Value noColor = indexSet.get(-1);
-  if (!groups) return std::make_pair(noColor, noColor);
+  if (!groups)
+    return std::make_pair(noColor, noColor);
 
   auto groupsType = llvm::cast<RankedTensorType>(groups.getType());
   assert(groupsType.getRank() == 2);
@@ -202,9 +203,9 @@
   return std::make_pair(color, key);
 }
 
-static DenseIntElementsAttr convertToRankGroupsByCrossReplica(
-    DenseIntElementsAttr replicaGroups, int32_t numPartitions,
-    OpBuilder &builder) {
+static DenseIntElementsAttr
+convertToRankGroupsByCrossReplica(DenseIntElementsAttr replicaGroups,
+                                  int32_t numPartitions, OpBuilder &builder) {
   if (numPartitions <= 1) {
     // Treat as a single partition.
     return replicaGroups;
@@ -236,9 +237,9 @@
   return DenseIntElementsAttr::get(type, newValues);
 }
 
-static DenseIntElementsAttr convertToRankGroupsByCrossPartition(
-    DenseIntElementsAttr partitionGroups, int32_t numReplicas,
-    OpBuilder &builder) {
+static DenseIntElementsAttr
+convertToRankGroupsByCrossPartition(DenseIntElementsAttr partitionGroups,
+                                    int32_t numReplicas, OpBuilder &builder) {
   if (numReplicas <= 1) {
     // Treat as a single replica.
     return partitionGroups;
@@ -344,8 +345,9 @@
 // |                    |    > 0    | true               | FlattenedIds             |
 // +--------------------+-----------+--------------------+--------------------------+
 // clang-format on
-static CollectiveOpGroupMode getCollectiveOpGroupMode(
-    int64_t channelId, std::optional<bool> useGlobalDeviceIds) {
+static CollectiveOpGroupMode
+getCollectiveOpGroupMode(int64_t channelId,
+                         std::optional<bool> useGlobalDeviceIds) {
   if (channelId <= 0) {
     assert(!useGlobalDeviceIds.has_value() || !*useGlobalDeviceIds);
     return CollectiveOpGroupMode::CrossReplica;
@@ -368,7 +370,8 @@
     DenseIntElementsAttr replicaGroups, std::optional<bool> useGlobalDeviceIds,
     OpBuilder &builder) {
   // Set numPartitions to 1 if not set by the user.
-  if (numPartitions == -1) numPartitions = 1;
+  if (numPartitions == -1)
+    numPartitions = 1;
 
   // Base channel that may be split by the group info.
   Value baseChannel =
@@ -457,9 +460,9 @@
   using OpConversionPattern<
       mlir::stablehlo::PartitionIdOp>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::PartitionIdOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::PartitionIdOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto loc = op.getLoc();
     // PartitionId = rank % numPartitions
     auto moduleOp = op->getParentOfType<ModuleOp>();
@@ -477,7 +480,7 @@
       value = rewriter.create<arith::RemUIOp>(loc, rank, cst);
     }
     auto resultType =
-        llvm::cast<RankedTensorType>(op.getType());  // tensor<ui32>
+        llvm::cast<RankedTensorType>(op.getType()); // tensor<ui32>
     auto elemType = resultType.getElementType();
     // index -> ui32
     auto rankElem = rewriter.create<arith::IndexCastUIOp>(loc, elemType, value);
@@ -494,9 +497,9 @@
     : public OpConversionPattern<mlir::stablehlo::ReplicaIdOp> {
   using OpConversionPattern<mlir::stablehlo::ReplicaIdOp>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::ReplicaIdOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::ReplicaIdOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto loc = op.getLoc();
     auto channel = rewriter.create<IREE::Flow::ChannelDefaultOp>(
         loc, /*group=*/StringAttr{});
@@ -512,7 +515,7 @@
     }
 
     auto resultType =
-        llvm::cast<RankedTensorType>(op.getType());  // tensor<ui32>
+        llvm::cast<RankedTensorType>(op.getType()); // tensor<ui32>
     auto elemType = resultType.getElementType();
     // index -> ui32
     auto rankElem = rewriter.create<arith::IndexCastUIOp>(loc, elemType, rank);
@@ -528,9 +531,9 @@
     : OpConversionPattern<mlir::stablehlo::AllGatherOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::AllGatherOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::AllGatherOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (checkCollectiveAttrs(op, rewriter).failed()) {
       return failure();
     }
@@ -587,9 +590,9 @@
     : OpConversionPattern<mlir::stablehlo::AllReduceOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::AllReduceOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::AllReduceOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (checkCollectiveAttrs(op, rewriter).failed()) {
       return failure();
     }
@@ -717,9 +720,9 @@
     : OpConversionPattern<mlir::stablehlo::AllToAllOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::AllToAllOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::AllToAllOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Location loc = op.getLoc();
 
     auto moduleOp = op->getParentOfType<ModuleOp>();
@@ -781,9 +784,9 @@
     : OpConversionPattern<mlir::stablehlo::ReduceScatterOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::ReduceScatterOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::ReduceScatterOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (checkCollectiveAttrs(op, rewriter).failed()) {
       return failure();
     }
@@ -890,9 +893,9 @@
   using OpConversionPattern<
       mlir::stablehlo::CollectivePermuteOp>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::CollectivePermuteOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::CollectivePermuteOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto loc = op.getLoc();
 
     auto moduleOp = op->getParentOfType<ModuleOp>();
@@ -909,7 +912,8 @@
     int64_t numParticipants = mode == CollectiveOpGroupMode::CrossReplica
                                   ? numReplicas
                                   : numPartitions;
-    if (numParticipants == -1) numParticipants = 1;
+    if (numParticipants == -1)
+      numParticipants = 1;
     SmallVector<Attribute> replicaGroups;
     for (int64_t i = 0; i < numParticipants; ++i) {
       replicaGroups.push_back(rewriter.getI64IntegerAttr(i));
@@ -980,7 +984,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateStableHloCollectivesConversionPatterns(
     MLIRContext *context, TypeConverter &typeConverter,
@@ -992,4 +996,4 @@
                                                               context);
 }
 
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace mlir::iree_compiler::stablehlo

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/LegalizeCHLO.cpp b/compiler/src/iree/compiler/InputConversion/StableHLO/LegalizeCHLO.cpp
index 9734046..f2985ce 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/LegalizeCHLO.cpp
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/LegalizeCHLO.cpp

@@ -48,50 +48,52 @@
 static std::optional<mlir::stablehlo::ComparisonDirection>
 toStableHloComparisonDirection(mlir::chlo::ComparisonDirection value) {
   switch (value) {
-    case mlir::chlo::ComparisonDirection::EQ:
-      return mlir::stablehlo::ComparisonDirection::EQ;
-    case mlir::chlo::ComparisonDirection::NE:
-      return mlir::stablehlo::ComparisonDirection::NE;
-    case mlir::chlo::ComparisonDirection::GE:
-      return mlir::stablehlo::ComparisonDirection::GE;
-    case mlir::chlo::ComparisonDirection::GT:
-      return mlir::stablehlo::ComparisonDirection::GT;
-    case mlir::chlo::ComparisonDirection::LE:
-      return mlir::stablehlo::ComparisonDirection::LE;
-    case mlir::chlo::ComparisonDirection::LT:
-      return mlir::stablehlo::ComparisonDirection::LT;
+  case mlir::chlo::ComparisonDirection::EQ:
+    return mlir::stablehlo::ComparisonDirection::EQ;
+  case mlir::chlo::ComparisonDirection::NE:
+    return mlir::stablehlo::ComparisonDirection::NE;
+  case mlir::chlo::ComparisonDirection::GE:
+    return mlir::stablehlo::ComparisonDirection::GE;
+  case mlir::chlo::ComparisonDirection::GT:
+    return mlir::stablehlo::ComparisonDirection::GT;
+  case mlir::chlo::ComparisonDirection::LE:
+    return mlir::stablehlo::ComparisonDirection::LE;
+  case mlir::chlo::ComparisonDirection::LT:
+    return mlir::stablehlo::ComparisonDirection::LT;
   }
   return {};
 }
 
-static std::optional<mlir::stablehlo::ComparisonType> toStableHloComparisonType(
-    mlir::chlo::ComparisonType value) {
+static std::optional<mlir::stablehlo::ComparisonType>
+toStableHloComparisonType(mlir::chlo::ComparisonType value) {
   switch (value) {
-    case mlir::chlo::ComparisonType::NOTYPE:
-      return mlir::stablehlo::ComparisonType::NOTYPE;
-    case mlir::chlo::ComparisonType::FLOAT:
-      return mlir::stablehlo::ComparisonType::FLOAT;
-    case mlir::chlo::ComparisonType::TOTALORDER:
-      return mlir::stablehlo::ComparisonType::TOTALORDER;
-    case mlir::chlo::ComparisonType::SIGNED:
-      return mlir::stablehlo::ComparisonType::SIGNED;
-    case mlir::chlo::ComparisonType::UNSIGNED:
-      return mlir::stablehlo::ComparisonType::UNSIGNED;
+  case mlir::chlo::ComparisonType::NOTYPE:
+    return mlir::stablehlo::ComparisonType::NOTYPE;
+  case mlir::chlo::ComparisonType::FLOAT:
+    return mlir::stablehlo::ComparisonType::FLOAT;
+  case mlir::chlo::ComparisonType::TOTALORDER:
+    return mlir::stablehlo::ComparisonType::TOTALORDER;
+  case mlir::chlo::ComparisonType::SIGNED:
+    return mlir::stablehlo::ComparisonType::SIGNED;
+  case mlir::chlo::ComparisonType::UNSIGNED:
+    return mlir::stablehlo::ComparisonType::UNSIGNED;
   }
   return {};
 }
 
 struct HloCompareAdaptor {
-  static mlir::stablehlo::CompareOp createOp(
-      mlir::chlo::BroadcastCompareOp fromOp, Type resultType,
-      ValueRange broadcastedOperands, OpBuilder &builder) {
+  static mlir::stablehlo::CompareOp
+  createOp(mlir::chlo::BroadcastCompareOp fromOp, Type resultType,
+           ValueRange broadcastedOperands, OpBuilder &builder) {
     auto chloDirection = fromOp.getComparisonDirection();
     auto hloDirection = toStableHloComparisonDirection(chloDirection);
-    if (!hloDirection) return nullptr;
+    if (!hloDirection)
+      return nullptr;
     auto chloType =
         fromOp.getCompareType().value_or(mlir::chlo::ComparisonType::NOTYPE);
     auto hloType = toStableHloComparisonType(chloType);
-    if (!hloType) return nullptr;
+    if (!hloType)
+      return nullptr;
     auto hloTypeAttr = fromOp.getCompareType()
                            ? mlir::stablehlo::ComparisonTypeAttr::get(
                                  builder.getContext(), *hloType)
@@ -150,8 +152,10 @@
                              Value val) {
   Type ty = getElementTypeOrSelf(val.getType());
   auto getAttr = [&]() -> Attribute {
-    if (isa<IntegerType>(ty)) return b.getIntegerAttr(ty, constant);
-    if (isa<FloatType>(ty)) return b.getFloatAttr(ty, constant);
+    if (isa<IntegerType>(ty))
+      return b.getIntegerAttr(ty, constant);
+    if (isa<FloatType>(ty))
+      return b.getFloatAttr(ty, constant);
     if (auto complexTy = dyn_cast<ComplexType>(ty)) {
       return complex::NumberAttr::get(complexTy, constant, 0);
     }
@@ -193,16 +197,18 @@
     : OpConversionPattern<ChloOpTy> {
   using OpConversionPattern<ChloOpTy>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      ChloOpTy op, typename ChloOpTy::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(ChloOpTy op, typename ChloOpTy::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Only rewrite for statically determinable non-broadcasting cases.
     auto lhsType = dyn_cast<RankedTensorType>(adaptor.getLhs().getType());
     auto rhsType = dyn_cast<RankedTensorType>(adaptor.getRhs().getType());
-    if (!lhsType || !rhsType) return failure();
+    if (!lhsType || !rhsType)
+      return failure();
 
     // Requires rank broadcast.
-    if (lhsType.getRank() != rhsType.getRank()) return failure();
+    if (lhsType.getRank() != rhsType.getRank())
+      return failure();
 
     // Any dynamic dimension may require broadcasting and requires more
     // analysis.
@@ -238,16 +244,17 @@
     : OpConversionPattern<ChloOpTy> {
   using OpConversionPattern<ChloOpTy>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      ChloOpTy op, typename ChloOpTy::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(ChloOpTy op, typename ChloOpTy::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Only support ranked operands.
     Value lhs = adaptor.getLhs();
     Value rhs = adaptor.getRhs();
     auto lhsType = dyn_cast<RankedTensorType>(lhs.getType());
     auto rhsType = dyn_cast<RankedTensorType>(rhs.getType());
     auto resultType = dyn_cast<RankedTensorType>(op.getResult().getType());
-    if (!lhsType || !rhsType || !resultType) return failure();
+    if (!lhsType || !rhsType || !resultType)
+      return failure();
 
     // Check for "numpy"-style rank broadcast.
     auto broadcastDimensions = op.getBroadcastDimensions();
@@ -322,19 +329,20 @@
     : OpConversionPattern<mlir::chlo::ConstantLikeOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::chlo::ConstantLikeOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::chlo::ConstantLikeOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto resultTy = cast<ShapedType>(op.getType());
 
     // Unranked uses are not supported.
-    if (!resultTy.hasRank()) return failure();
+    if (!resultTy.hasRank())
+      return failure();
 
     // Lower to HLO constant if statically shaped.
     if (resultTy.hasStaticShape()) {
       auto complexAttr = dyn_cast<mlir::complex::NumberAttr>(op.getValue());
-      auto attr = DenseElementsAttr::get(
-          resultTy, complexAttr ? complexAttr : op.getValue());
+      auto attr = DenseElementsAttr::get(resultTy, complexAttr ? complexAttr
+                                                               : op.getValue());
       rewriter.replaceOpWithNewOp<mlir::stablehlo::ConstantOp>(op, attr);
       return success();
     }
@@ -354,9 +362,9 @@
     : OpConversionPattern<mlir::chlo::BroadcastSelectOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::chlo::BroadcastSelectOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::chlo::BroadcastSelectOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Only support ranked operands.
     Value pred = adaptor.getPred();
     Value onTrue = adaptor.getOnTrue();
@@ -474,18 +482,19 @@
 struct ConvertConstantOp final : OpConversionPattern<mlir::chlo::ConstantOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::chlo::ConstantOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::chlo::ConstantOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<mlir::stablehlo::ConstantOp>(op, op.getValue());
     return success();
   }
 };
 
 template <typename FTy>
-static Value materializeChebyshevPolynomialApproximation(
-    ConversionPatternRewriter &rewriter, Location loc, Value x,
-    ArrayRef<FTy> coefficients) {
+static Value
+materializeChebyshevPolynomialApproximation(ConversionPatternRewriter &rewriter,
+                                            Location loc, Value x,
+                                            ArrayRef<FTy> coefficients) {
   Value b0 = getConstantLike(rewriter, loc, 0.0, x);
   Value b1 = getConstantLike(rewriter, loc, 0.0, x);
   Value b2 = getConstantLike(rewriter, loc, 0.0, x);
@@ -563,8 +572,9 @@
                                                   kI1eCoeffsB);
 }
 
-static Value materializeBesselI1eApproximationF64(
-    ConversionPatternRewriter &rewriter, Location loc, ValueRange args) {
+static Value
+materializeBesselI1eApproximationF64(ConversionPatternRewriter &rewriter,
+                                     Location loc, ValueRange args) {
   Value x = args.front();
   assert(cast<ShapedType>(x.getType()).getElementType().isF64() &&
          "expect f64 element type");
@@ -639,9 +649,9 @@
 struct ConvertBesselI1eOp final : OpConversionPattern<mlir::chlo::BesselI1eOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::chlo::BesselI1eOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::chlo::BesselI1eOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Location loc = op.getLoc();
     Value x = adaptor.getOperand();
     Type ty = cast<ShapedType>(x.getType()).getElementType();
@@ -667,10 +677,12 @@
 };
 
 template <typename FTy>
-static Value materializePolynomialApproximation(
-    ConversionPatternRewriter &rewriter, Location loc, Value x,
-    ArrayRef<FTy> coefficients) {
-  if (coefficients.empty()) return getConstantLike(rewriter, loc, 0.0, x);
+static Value
+materializePolynomialApproximation(ConversionPatternRewriter &rewriter,
+                                   Location loc, Value x,
+                                   ArrayRef<FTy> coefficients) {
+  if (coefficients.empty())
+    return getConstantLike(rewriter, loc, 0.0, x);
 
   Value poly = getConstantLike(rewriter, loc, coefficients[0], x);
   for (size_t i = 1, e = coefficients.size(); i < e; ++i) {
@@ -825,8 +837,9 @@
                                                     erfcBasedApprox);
 }
 
-static Value materializeErfcApproximationF64(
-    ConversionPatternRewriter &rewriter, Location loc, ValueRange args) {
+static Value
+materializeErfcApproximationF64(ConversionPatternRewriter &rewriter,
+                                Location loc, ValueRange args) {
   Value x = args.front();
   assert(x.getType().cast<ShapedType>().getElementType().isF64() &&
          "expect f64 element type");
@@ -980,8 +993,9 @@
                                                    erf, ubErf);
 }
 
-static Value materializeErfcApproximationF32(
-    ConversionPatternRewriter &rewriter, Location loc, ValueRange args) {
+static Value
+materializeErfcApproximationF32(ConversionPatternRewriter &rewriter,
+                                Location loc, ValueRange args) {
   Value x = args.front();
   assert(x.getType().cast<ShapedType>().getElementType().isF32() &&
          "expect f32 element type");
@@ -1010,9 +1024,9 @@
 struct ConvertErfOp final : OpConversionPattern<mlir::chlo::ErfOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::chlo::ErfOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::chlo::ErfOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Location loc = op.getLoc();
     Value x = adaptor.getOperand();
     Type ty = cast<ShapedType>(x.getType()).getElementType();
@@ -1038,9 +1052,9 @@
 struct ConvertErfcOp final : OpConversionPattern<mlir::chlo::ErfcOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::chlo::ErfcOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::chlo::ErfcOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Location loc = op.getLoc();
     Value x = adaptor.getOperand();
     Type ty = cast<ShapedType>(x.getType()).getElementType();
@@ -1239,18 +1253,18 @@
 struct ConvertErfInvOp final : OpConversionPattern<mlir::chlo::ErfInvOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::chlo::ErfInvOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::chlo::ErfInvOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Location loc = op.getLoc();
     if (op.getResult().getType().getElementType().isF64()) {
       rewriter.replaceOp(op, erfInv64(rewriter, loc, adaptor.getOperands()));
       return success();
     }
     FloatType minPrecisionTy = rewriter.getF32Type();
-    rewriter.replaceOp(
-        op, materializeWithUpcast(rewriter, loc, adaptor.getOperands(),
-                                  minPrecisionTy, &erfInv32));
+    rewriter.replaceOp(op, materializeWithUpcast(rewriter, loc,
+                                                 adaptor.getOperands(),
+                                                 minPrecisionTy, &erfInv32));
     return success();
   }
 };
@@ -1262,7 +1276,7 @@
 // [7, 9] seemed to be the least sensitive to the quality of the log function.
 // In particular, [5, 7] is the only choice where -1.5e-5 <= lgamma(2) <= 1.5e-5
 // for a particularly inaccurate log function.
-constexpr double kLanczosGamma = 7;  // aka g
+constexpr double kLanczosGamma = 7; // aka g
 constexpr double kBaseLanczosCoeff = 0.99999999999980993227684700473478;
 constexpr std::array<double, 8> kLanczosCoefficients = {
     676.520368121885098567009190444019, -1259.13921672240287047156078755283,
@@ -1442,9 +1456,9 @@
 struct ConvertCoshOp final : OpConversionPattern<mlir::chlo::CoshOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::chlo::CoshOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::chlo::CoshOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOp(
         op, materializeWithUpcast(rewriter, op.getLoc(), adaptor.getOperands(),
                                   rewriter.getF32Type(),
@@ -1773,9 +1787,9 @@
 struct ConvertLgammaOp final : OpConversionPattern<mlir::chlo::LgammaOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::chlo::LgammaOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::chlo::LgammaOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     FloatType minPrecisionTy = rewriter.getF32Type();
     rewriter.replaceOp(
         op, materializeWithUpcast(rewriter, op.getLoc(), adaptor.getOperands(),
@@ -1787,9 +1801,9 @@
 struct ConvertDigammaOp final : OpConversionPattern<mlir::chlo::DigammaOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::chlo::DigammaOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::chlo::DigammaOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     FloatType minPrecisionTy = rewriter.getF32Type();
     rewriter.replaceOp(
         op, materializeWithUpcast(rewriter, op.getLoc(), adaptor.getOperands(),
@@ -1890,9 +1904,9 @@
 struct ConvertNextAfterOp final : OpConversionPattern<mlir::chlo::NextAfterOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::chlo::NextAfterOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::chlo::NextAfterOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOp(
         op, materializeNextAfter(rewriter, op.getLoc(), adaptor.getOperands()));
     return success();
@@ -1902,9 +1916,9 @@
 struct ConvertPolygammaOp final : OpConversionPattern<mlir::chlo::PolygammaOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::chlo::PolygammaOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::chlo::PolygammaOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Location loc = op.getLoc();
     FloatType minPrecisionTy = rewriter.getF32Type();
     rewriter.replaceOp(
@@ -1924,8 +1938,9 @@
 // +/-89.4159851, due to rounding error when computing x +/- log(1/2).  The
 // correct answer of 3.40281961e+38 (0x7f7fffec) is very close to max-float, so
 // we deem this acceptable.
-static Value materializeSinhApproximationForLargeX(
-    ConversionPatternRewriter &rewriter, Location loc, ValueRange operands) {
+static Value
+materializeSinhApproximationForLargeX(ConversionPatternRewriter &rewriter,
+                                      Location loc, ValueRange operands) {
   mlir::chlo::SinhOp::Adaptor transformed(operands);
   Value x = transformed.getOperand();
 
@@ -1977,9 +1992,9 @@
 struct ConvertSinhOp final : OpConversionPattern<mlir::chlo::SinhOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::chlo::SinhOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::chlo::SinhOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Value x = adaptor.getOperand();
     if (cast<ShapedType>(x.getType()).getElementType().isa<ComplexType>()) {
       rewriter.replaceOp(op, materializeSinhApproximationForLargeX(
@@ -2031,11 +2046,12 @@
 struct ConvertTopKOp final : OpConversionPattern<mlir::chlo::TopKOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::chlo::TopKOp op, OpAdaptor /*adaptor*/,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::chlo::TopKOp op, OpAdaptor /*adaptor*/,
+                  ConversionPatternRewriter &rewriter) const override {
     auto operandType = dyn_cast<RankedTensorType>(op.getOperand().getType());
-    if (!operandType) return failure();
+    if (!operandType)
+      return failure();
     int64_t operandRank = operandType.getRank();
     int64_t lastDimIndex = operandRank - 1;
     int64_t lastDimSize = operandType.getDimSize(lastDimIndex);
@@ -2146,9 +2162,9 @@
 struct ConvertZetaOp final : OpConversionPattern<mlir::chlo::ZetaOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::chlo::ZetaOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::chlo::ZetaOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Location loc = op.getLoc();
     FloatType minPrecisionTy = rewriter.getF32Type();
     rewriter.replaceOp(
@@ -2206,11 +2222,11 @@
     }
   }
 };
-}  // namespace
+} // namespace
 
 namespace {
 #include "iree/compiler/InputConversion/StableHLO/CHLODecompositionPatterns.h.inc"
-}  // end anonymous namespace
+} // end anonymous namespace
 
 namespace {
 static void populateBroadcastingPatterns(MLIRContext *context,
@@ -2235,11 +2251,11 @@
                 ConvertLgammaOp, ConvertNextAfterOp, ConvertPolygammaOp,
                 ConvertSinhOp, ConvertTopKOp, ConvertZetaOp>(context);
 }
-}  // namespace
+} // namespace
 
 void populateLegalizeChloPatterns(MLIRContext *context,
                                   RewritePatternSet *patterns) {
   populateBroadcastingPatterns(context, patterns);
   populateDecompositionPatterns(context, patterns);
 }
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace mlir::iree_compiler::stablehlo

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/LegalizeControlFlow.cpp b/compiler/src/iree/compiler/InputConversion/StableHLO/LegalizeControlFlow.cpp
index c9eaefc..6328f7c 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/LegalizeControlFlow.cpp
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/LegalizeControlFlow.cpp

@@ -65,7 +65,8 @@
 std::optional<ScfForBounds> extractForBounds(mlir::stablehlo::WhileOp op) {
   Block &cond = op.getCond().front();
   Block &body = op.getBody().front();
-  if (cond.getOperations().size() != 2) return std::nullopt;
+  if (cond.getOperations().size() != 2)
+    return std::nullopt;
 
   auto matchBbArg = [](Value v, Block &block) -> std::optional<unsigned> {
     if (!isa<BlockArgument>(v) || v.getParentBlock() != &block)
@@ -86,7 +87,8 @@
   }
 
   std::optional<unsigned> iterArg = matchBbArg(compare.getLhs(), cond);
-  if (!iterArg) return std::nullopt;
+  if (!iterArg)
+    return std::nullopt;
 
   auto add = dyn_cast_or_null<mlir::stablehlo::AddOp>(
       body.getTerminator()->getOperand(*iterArg).getDefiningOp());
@@ -107,9 +109,9 @@
 struct WhileOpPattern final : OpConversionPattern<mlir::stablehlo::WhileOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::WhileOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::WhileOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Location loc = op.getLoc();
 
     if (std::optional<ScfForBounds> bounds = extractForBounds(op)) {
@@ -164,9 +166,9 @@
 struct IfOpPattern final : OpConversionPattern<mlir::stablehlo::IfOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::IfOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::IfOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto scfIf = rewriter.create<scf::IfOp>(
         op.getLoc(), op.getResultTypes(),
         extractTensorValue(rewriter, adaptor.getPred()),
@@ -225,9 +227,9 @@
     return scfIf;
   }
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::CaseOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::CaseOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Inline the op if there is only a default block.
     if (op.getBranches().size() == 1) {
       Block &block = op.getBranches().front().front();
@@ -270,11 +272,11 @@
     }
   }
 };
-}  // namespace
+} // namespace
 
 void populateLegalizeControlFlowPatterns(MLIRContext *context,
                                          RewritePatternSet *patterns) {
   patterns->add<WhileOpPattern, IfOpPattern, CaseOpPattern>(context);
 }
 
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace mlir::iree_compiler::stablehlo

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/LegalizeShapeComputations.cpp b/compiler/src/iree/compiler/InputConversion/StableHLO/LegalizeShapeComputations.cpp
index 564d23f..6902b22 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/LegalizeShapeComputations.cpp
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/LegalizeShapeComputations.cpp

@@ -47,7 +47,8 @@
 
   LogicalResult matchAndRewrite(OpTy op,
                                 PatternRewriter &rewriter) const final {
-    if (!opIsShapeComputation(op)) return failure();
+    if (!opIsShapeComputation(op))
+      return failure();
 
     auto resultTy = cast<ShapedType>(op.getType());
 
@@ -84,7 +85,8 @@
 
   LogicalResult matchAndRewrite(mlir::stablehlo::ConcatenateOp op,
                                 PatternRewriter &rewriter) const override {
-    if (!opIsShapeComputation(op)) return failure();
+    if (!opIsShapeComputation(op))
+      return failure();
 
     Location loc = op.getLoc();
     auto resultTy = cast<ShapedType>(op.getType());
@@ -140,12 +142,14 @@
                                 PatternRewriter &rewriter) const override {
     Value operand = op.getOperand();
     auto shapedTy = cast<ShapedType>(operand.getType());
-    if (!shapedTy.hasRank() || shapedTy.getRank() > 1) return failure();
+    if (!shapedTy.hasRank() || shapedTy.getRank() > 1)
+      return failure();
 
     auto resultTy = cast<ShapedType>(op.getType());
 
     auto fromElements = op.getOperand().getDefiningOp<tensor::FromElementsOp>();
-    if (!fromElements) return failure();
+    if (!fromElements)
+      return failure();
 
     rewriter.replaceOpWithNewOp<tensor::FromElementsOp>(
         op, resultTy, fromElements.getOperands());
@@ -171,7 +175,7 @@
     }
   }
 };
-}  // namespace
+} // namespace
 
 void populateLegalizeShapeComputationPatterns(MLIRContext *context,
                                               RewritePatternSet *patterns) {
@@ -194,4 +198,4 @@
       context);
 }
 
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace mlir::iree_compiler::stablehlo

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/LegalizeToLinalgUtils.cpp b/compiler/src/iree/compiler/InputConversion/StableHLO/LegalizeToLinalgUtils.cpp
index e3aba95..df900a4 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/LegalizeToLinalgUtils.cpp
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/LegalizeToLinalgUtils.cpp

@@ -21,27 +21,27 @@
 
 namespace mlir::iree_compiler::stablehlo {
 namespace {
-bool hasIntegralShapeType(Operation* op) {
+bool hasIntegralShapeType(Operation *op) {
   auto stp = llvm::dyn_cast<ShapedType>(op->getOperand(0).getType());
   return stp && stp.getElementType().isIntOrIndex();
 }
 
-}  // namespace
+} // namespace
 
-SmallVector<utils::IteratorType, 3> getParallelAndReductionIterators(
-    unsigned nLoops, unsigned nReduction) {
+SmallVector<utils::IteratorType, 3>
+getParallelAndReductionIterators(unsigned nLoops, unsigned nReduction) {
   SmallVector<utils::IteratorType, 3> res(nLoops - nReduction,
                                           utils::IteratorType::parallel);
   res.append(nReduction, utils::IteratorType::reduction);
   return res;
 }
 
-SmallVector<utils::IteratorType, 3> getNParallelLoopsAttrs(
-    unsigned nParallelLoops) {
+SmallVector<utils::IteratorType, 3>
+getNParallelLoopsAttrs(unsigned nParallelLoops) {
   return getParallelAndReductionIterators(nParallelLoops, 0);
 }
 
-Value getEmptySparseTensor(OpBuilder& b, Location loc, ShapedType type,
+Value getEmptySparseTensor(OpBuilder &b, Location loc, ShapedType type,
                            ArrayRef<Value> dynSizes) {
   return b.create<bufferization::AllocTensorOp>(
       loc, llvm::cast<TensorType>(type), dynSizes,
@@ -49,15 +49,15 @@
       /*memory_space=*/IntegerAttr());
 }
 
-Value getEmptyTensor(OpBuilder& b, Location loc, ShapedType type,
+Value getEmptyTensor(OpBuilder &b, Location loc, ShapedType type,
                      ArrayRef<Value> dynSizes) {
   return b.create<tensor::EmptyOp>(
       loc, type.getShape(), type.getElementType(), dynSizes,
       llvm::cast<RankedTensorType>(type).getEncoding());
 }
 
-Value getEmptyTensorFor(OpBuilder& b, Location loc, ShapedType resultType,
-                        Operation* op, ValueRange operands) {
+Value getEmptyTensorFor(OpBuilder &b, Location loc, ShapedType resultType,
+                        Operation *op, ValueRange operands) {
   bool isSparse = sparse_tensor::getSparseTensorEncoding(resultType) != nullptr;
   // Collect the sizes for a ranked tensor to be passed as parameter to a
   // new tensor initialization operation. This operation only needs the
@@ -70,8 +70,9 @@
     (void)shapeSource.reifyReturnTypeShapes(b, operands, reifiedShapes);
     assert(reifiedShapes.size() == 1 && "Expected one reified result");
     // Construct sizes for the required dimensions.
-    for (const auto& en : llvm::enumerate(resultType.getShape())) {
-      if (en.value() != ShapedType::kDynamic) continue;
+    for (const auto &en : llvm::enumerate(resultType.getShape())) {
+      if (en.value() != ShapedType::kDynamic)
+        continue;
       sizes.push_back(b.create<tensor::ExtractOp>(
           loc, reifiedShapes[0],
           ValueRange{b.create<arith::ConstantIndexOp>(loc, en.index())}));
@@ -81,22 +82,23 @@
                   : getEmptyTensor(b, loc, resultType, sizes);
 }
 
-Value coerceTensorShape(OpBuilder& builder, Location loc,
+Value coerceTensorShape(OpBuilder &builder, Location loc,
                         TypedValue<ShapedType> value, ShapedType targetType) {
   return builder.createOrFold<tensor::CastOp>(
       loc, targetType.cloneWith(std::nullopt, value.getType().getElementType()),
       value);
 }
 
-LogicalResult verifyHloOpBufferOrTensorSemantics(Operation* op) {
+LogicalResult verifyHloOpBufferOrTensorSemantics(Operation *op) {
   auto isRankedTensor = [](Value val) {
     return isa<RankedTensorType>(val.getType());
   };
-  if (!llvm::all_of(op->getOperands(), isRankedTensor)) return failure();
+  if (!llvm::all_of(op->getOperands(), isRankedTensor))
+    return failure();
   return success(llvm::all_of(op->getResults(), isRankedTensor));
 }
 
-Value fillTensorWithZeros(OpBuilder& builder, Location loc, Value tensor) {
+Value fillTensorWithZeros(OpBuilder &builder, Location loc, Value tensor) {
   auto type = cast<ShapedType>(tensor.getType());
   Value zero;
   // Complex numbers are a special case.
@@ -111,8 +113,8 @@
   return builder.create<linalg::FillOp>(loc, zero, tensor).result();
 }
 
-Value preSparsify(Operation* op, llvm::SmallVector<Value, 2>& values, Type rtp,
-                  OpBuilder* b) {
+Value preSparsify(Operation *op, llvm::SmallVector<Value, 2> &values, Type rtp,
+                  OpBuilder *b) {
   // Apply for semi-ring operations that lower to elaborate code
   // (any sign-op, or an integral abs-op).
   // TODO(peiming, ajcbik): these all can potentially be optimized by applying
@@ -129,7 +131,7 @@
     Location loc = op->getLoc();
     auto semiring = b->create<sparse_tensor::UnaryOp>(loc, rtp, values[0]);
     Type itp = values[0].getType();
-    Block* present = b->createBlock(&semiring.getPresentRegion(), {}, itp, loc);
+    Block *present = b->createBlock(&semiring.getPresentRegion(), {}, itp, loc);
     b->setInsertionPointToStart(&semiring.getPresentRegion().front());
     values[0] = present->getArgument(0);
     return semiring;
@@ -137,7 +139,7 @@
   return Value();
 }
 
-Value postSparsify(Operation* op, Value semiring, Value result, OpBuilder* b) {
+Value postSparsify(Operation *op, Value semiring, Value result, OpBuilder *b) {
   if (semiring) {
     b->create<sparse_tensor::YieldOp>(op->getLoc(), result);
     b->setInsertionPointAfter(semiring.getDefiningOp());
@@ -146,25 +148,25 @@
   return result;
 }
 
-bool allOperandsAreScalarTensors(Operation* op) {
+bool allOperandsAreScalarTensors(Operation *op) {
   return llvm::all_of(op->getOperands(), [](Value operand) {
     auto operandTy = llvm::dyn_cast<ShapedType>(operand.getType());
     return operandTy && operandTy.getRank() == 0;
   });
 }
 
-bool isInBodyOfLinalgOps(Operation* op) {
-  auto* parentOp = op->getParentRegion()->getParentOp();
+bool isInBodyOfLinalgOps(Operation *op) {
+  auto *parentOp = op->getParentRegion()->getParentOp();
   return parentOp->getDialect() ==
          parentOp->getContext()->getLoadedDialect<linalg::LinalgDialect>();
 }
 
 SmallVector<int64_t> extract1DVector(DenseIntElementsAttr elements) {
   SmallVector<int64_t> ret;
-  for (const APInt& element : elements) {
+  for (const APInt &element : elements) {
     ret.push_back(element.getLimitedValue());
   }
   return ret;
 }
 
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace mlir::iree_compiler::stablehlo

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/LegalizeToLinalgUtils.h b/compiler/src/iree/compiler/InputConversion/StableHLO/LegalizeToLinalgUtils.h
index cee4489..bf5ea2d 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/LegalizeToLinalgUtils.h
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/LegalizeToLinalgUtils.h

@@ -42,38 +42,38 @@
 /// Returns an ArrayAttr that contains `nLoops` attributes. All the attributes
 /// are "parallel" except the last `nReduction` elements, where are "reduction"
 /// attributes.
-SmallVector<utils::IteratorType, 3> getParallelAndReductionIterators(
-    unsigned nLoops, unsigned nReduction);
+SmallVector<utils::IteratorType, 3>
+getParallelAndReductionIterators(unsigned nLoops, unsigned nReduction);
 
 /// Returns an ArrayAttr that contains `nParallelLoops` "parallel" attributes.
-SmallVector<utils::IteratorType, 3> getNParallelLoopsAttrs(
-    unsigned nParallelLoops);
+SmallVector<utils::IteratorType, 3>
+getNParallelLoopsAttrs(unsigned nParallelLoops);
 
 /// Generates an init sparse tensor.
-Value getEmptySparseTensor(OpBuilder& b, Location loc, ShapedType type,
+Value getEmptySparseTensor(OpBuilder &b, Location loc, ShapedType type,
                            ArrayRef<Value> dynSizes);
 
 /// Generates a tensor.empty op.
-Value getEmptyTensor(OpBuilder& b, Location loc, ShapedType type,
+Value getEmptyTensor(OpBuilder &b, Location loc, ShapedType type,
                      ArrayRef<Value> dynSizes);
 
 /// Generates an empty tensor for the result of the operation, which could be a
 /// dense tensor or a sparse tensor.
-Value getEmptyTensorFor(OpBuilder& b, Location loc, ShapedType resultType,
-                        Operation* op, ValueRange operands);
+Value getEmptyTensorFor(OpBuilder &b, Location loc, ShapedType resultType,
+                        Operation *op, ValueRange operands);
 
 /// Ensures a tensor has the same shape (not including the element type) as
 /// another.
-Value coerceTensorShape(OpBuilder& builder, Location loc,
+Value coerceTensorShape(OpBuilder &builder, Location loc,
                         TypedValue<ShapedType> value, ShapedType targetType);
 
 /// Verifies |op|'s semantics by checking if all operands and results have
 /// ranged tensor types.
-LogicalResult verifyHloOpBufferOrTensorSemantics(Operation* op);
+LogicalResult verifyHloOpBufferOrTensorSemantics(Operation *op);
 
 /// Fills |tensor| with a zero constant of the matching type. Returns the new
 /// value.
-Value fillTensorWithZeros(OpBuilder& builder, Location loc, Value tensor);
+Value fillTensorWithZeros(OpBuilder &builder, Location loc, Value tensor);
 
 /// Sparsifies a (block of) operation(s) that cannot be handled directly
 /// by the sparse compiler but has well-known semi-ring semantics.
@@ -88,17 +88,17 @@
 ///     }
 ///     absent={}
 ///   linalg.yield %result
-Value preSparsify(Operation* op, llvm::SmallVector<Value, 2>& values, Type rtp,
-                  OpBuilder* b);
+Value preSparsify(Operation *op, llvm::SmallVector<Value, 2> &values, Type rtp,
+                  OpBuilder *b);
 
 /// Finalizes sparse semi-ring construction.
-Value postSparsify(Operation* op, Value semiring, Value result, OpBuilder* b);
+Value postSparsify(Operation *op, Value semiring, Value result, OpBuilder *b);
 
 /// Returns true if all operands are tensors with rank 0.
-bool allOperandsAreScalarTensors(Operation* op);
+bool allOperandsAreScalarTensors(Operation *op);
 
 /// Returns true if parent op is linalg.
-bool isInBodyOfLinalgOps(Operation* op);
+bool isInBodyOfLinalgOps(Operation *op);
 
 /// Extracts integer values from the attribute |elements|.
 SmallVector<int64_t> extract1DVector(DenseIntElementsAttr elements);
@@ -108,6 +108,6 @@
   return attr.isSplat() && attr.getSplatValue<uint64_t>() == value;
 }
 
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace mlir::iree_compiler::stablehlo
 
-#endif  // IREE_COMPILER_INPUTCONVERSION_STABLEHLO_LEGALIZE_TO_LINALG_UTILS_H_
+#endif // IREE_COMPILER_INPUTCONVERSION_STABLEHLO_LEGALIZE_TO_LINALG_UTILS_H_

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/MapStableHLOToScalarOp.h b/compiler/src/iree/compiler/InputConversion/StableHLO/MapStableHLOToScalarOp.h
index fb25d72..563cb2e 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/MapStableHLOToScalarOp.h
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/MapStableHLOToScalarOp.h

@@ -187,7 +187,7 @@
 struct MapStableHloOpToScalarOpImpl {
   Value operator()(Location /*loc*/, ArrayRef<Type> /*ResultTypes*/,
                    ArrayRef<Type> /*argTypes*/, ValueRange /*args*/,
-                   OpBuilder* /*b*/) {
+                   OpBuilder * /*b*/) {
     return nullptr;
   }
 };
@@ -195,7 +195,7 @@
 template <typename StdScalarOp>
 struct MapStableHloOpToScalarOpImpl<StdScalarOp> {
   Value operator()(Location loc, ArrayRef<Type> resultTypes,
-                   ArrayRef<Type> /*argTypes*/, ValueRange args, OpBuilder* b) {
+                   ArrayRef<Type> /*argTypes*/, ValueRange args, OpBuilder *b) {
     return b->template create<StdScalarOp>(loc, resultTypes, args,
                                            std::nullopt);
   }
@@ -204,7 +204,7 @@
 template <typename SupportedType, typename StdScalarOp, typename... Args>
 struct MapStableHloOpToScalarOpImpl<SupportedType, StdScalarOp, Args...> {
   Value operator()(Location loc, ArrayRef<Type> resultTypes,
-                   ArrayRef<Type> argTypes, ValueRange args, OpBuilder* b) {
+                   ArrayRef<Type> argTypes, ValueRange args, OpBuilder *b) {
     Type elementType = getElementTypeOrSelf(argTypes.front());
     if (SupportedType{}(elementType)) {
       return b->template create<StdScalarOp>(loc, resultTypes, args,
@@ -218,7 +218,7 @@
 template <typename SupportedType, typename... Args>
 struct MapStableHloOpToScalarOpImpl<SupportedType, void, Args...> {
   Value operator()(Location loc, ArrayRef<Type> resultTypes,
-                   ArrayRef<Type> argTypes, ValueRange args, OpBuilder* b) {
+                   ArrayRef<Type> argTypes, ValueRange args, OpBuilder *b) {
     return MapStableHloOpToScalarOpImpl<Args...>{}(loc, resultTypes, argTypes,
                                                    args, b);
   }
@@ -265,7 +265,7 @@
 template <typename StableHloOpTy>
 inline Value mapStableHloOpToStdScalarOp(
     Location loc, ArrayRef<Type> resultTypes, ArrayRef<Type> argTypes,
-    typename StableHloOpTy::Adaptor adaptor, OpBuilder* b) {
+    typename StableHloOpTy::Adaptor adaptor, OpBuilder *b) {
   using ScalarIOpOrVoid = typename MapableIf<ScalarIOp, StableHloOpTy>::type;
   using ScalarUOpOrVoid = typename MapableIf<ScalarUOp, StableHloOpTy>::type;
   using ScalarFOpOrVoid = typename MapableIf<ScalarFOp, StableHloOpTy>::type;
@@ -280,7 +280,7 @@
 template <>
 inline Value mapStableHloOpToStdScalarOp<stablehlo::AbsOp>(
     Location loc, ArrayRef<Type> resultTypes, ArrayRef<Type> argTypes,
-    stablehlo::AbsOp::Adaptor adaptor, OpBuilder* b) {
+    stablehlo::AbsOp::Adaptor adaptor, OpBuilder *b) {
   Type elementType = getElementTypeOrSelf(argTypes.front());
   if (elementType.isa<FloatType>()) {
     return MapStableHloOpToScalarOpImpl<IsFloatType, ::mlir::math::AbsFOp>{}(
@@ -306,7 +306,7 @@
 }
 
 // Return a constant for v of type t, splat if t is a vector type.
-inline Value getConstantOrSplat(OpBuilder* b, Location loc, Type t,
+inline Value getConstantOrSplat(OpBuilder *b, Location loc, Type t,
                                 Attribute v) {
   if (VectorType vecType = t.dyn_cast<VectorType>()) {
     v = SplatElementsAttr::get(vecType, v);
@@ -315,8 +315,8 @@
 }
 
 template <typename PredicateType>
-inline std::optional<PredicateType> getCmpPredicate(
-    stablehlo::ComparisonDirection, bool) {
+inline std::optional<PredicateType>
+getCmpPredicate(stablehlo::ComparisonDirection, bool) {
   return std::nullopt;
 }
 
@@ -357,7 +357,7 @@
 
 inline Value cmpComplex(Location loc, Value lhs, Value rhs,
                         stablehlo::ComparisonDirection comparisonDirection,
-                        OpBuilder* b) {
+                        OpBuilder *b) {
   auto complexType = lhs.getType().cast<ComplexType>();
   if (complexType.getElementType().isa<FloatType>()) {
     if (comparisonDirection == stablehlo::ComparisonDirection::EQ) {
@@ -398,11 +398,11 @@
 template <>
 inline Value mapStableHloOpToStdScalarOp<stablehlo::CompareOp>(
     Location loc, ArrayRef<Type> /*resultTypes*/, ArrayRef<Type> argTypes,
-    stablehlo::CompareOp::Adaptor adaptor, OpBuilder* b) {
+    stablehlo::CompareOp::Adaptor adaptor, OpBuilder *b) {
   stablehlo::ComparisonDirection comparisonDirection =
       adaptor.getComparisonDirection();
-  const auto& lhs = adaptor.getLhs();
-  const auto& rhs = adaptor.getRhs();
+  const auto &lhs = adaptor.getLhs();
+  const auto &rhs = adaptor.getRhs();
   Type elementType = getElementTypeOrSelf(argTypes.front());
   if (elementType.isa<IntegerType>()) {
     bool isUnsigned = IsUnsignedIntegerType{}(elementType);
@@ -464,7 +464,7 @@
 template <>
 inline Value mapStableHloOpToStdScalarOp<stablehlo::ReducePrecisionOp>(
     Location loc, ArrayRef<Type> /*resultTypes*/, ArrayRef<Type> argTypes,
-    stablehlo::ReducePrecisionOp::Adaptor adaptor, OpBuilder* builder) {
+    stablehlo::ReducePrecisionOp::Adaptor adaptor, OpBuilder *builder) {
   using llvm::APInt;
   mlir::ImplicitLocOpBuilder b(loc, *builder);
 
@@ -488,7 +488,7 @@
   APInt expBitsMask(nbits, 1);
   expBitsMask = ((expBitsMask << srcExponentBits) - 1) << srcMantissaBits;
 
-  auto createConstant = [&](const APInt& v) {
+  auto createConstant = [&](const APInt &v) {
     return b.create<arith::ConstantIntOp>(v.getZExtValue(), intType)
         .getResult();
   };
@@ -588,7 +588,7 @@
 template <>
 inline Value mapStableHloOpToStdScalarOp<stablehlo::ComplexOp>(
     Location loc, ArrayRef<Type> resultTypes, ArrayRef<Type> argTypes,
-    stablehlo::ComplexOp::Adaptor adaptor, OpBuilder* b) {
+    stablehlo::ComplexOp::Adaptor adaptor, OpBuilder *b) {
   return MapStableHloOpToScalarOpImpl<complex::CreateOp>{}(
       loc, resultTypes, argTypes, adaptor.getOperands(), b);
 }
@@ -596,7 +596,7 @@
 template <>
 inline Value mapStableHloOpToStdScalarOp<stablehlo::MaxOp>(
     Location loc, ArrayRef<Type> resultTypes, ArrayRef<Type> argTypes,
-    stablehlo::MaxOp::Adaptor adaptor, OpBuilder* b) {
+    stablehlo::MaxOp::Adaptor adaptor, OpBuilder *b) {
   ValueRange operands = adaptor.getOperands();
   Value lhs = operands.front();
   Type complexTy = lhs.getType();
@@ -620,7 +620,7 @@
 template <>
 inline Value mapStableHloOpToStdScalarOp<stablehlo::MinOp>(
     Location loc, ArrayRef<Type> resultTypes, ArrayRef<Type> argTypes,
-    stablehlo::MinOp::Adaptor adaptor, OpBuilder* b) {
+    stablehlo::MinOp::Adaptor adaptor, OpBuilder *b) {
   ValueRange operands = adaptor.getOperands();
   Value lhs = operands.front();
   Type complexTy = lhs.getType();
@@ -644,7 +644,7 @@
 template <>
 inline Value mapStableHloOpToStdScalarOp<stablehlo::RealOp>(
     Location loc, ArrayRef<Type> resultTypes, ArrayRef<Type> argTypes,
-    stablehlo::RealOp::Adaptor adaptor, OpBuilder* b) {
+    stablehlo::RealOp::Adaptor adaptor, OpBuilder *b) {
   if (!adaptor.getOperand().getType().isa<ComplexType>())
     return adaptor.getOperand();
   return MapStableHloOpToScalarOpImpl<complex::ReOp>{}(
@@ -654,7 +654,7 @@
 template <>
 inline Value mapStableHloOpToStdScalarOp<stablehlo::ImagOp>(
     Location loc, ArrayRef<Type> resultTypes, ArrayRef<Type> argTypes,
-    stablehlo::ImagOp::Adaptor adaptor, OpBuilder* b) {
+    stablehlo::ImagOp::Adaptor adaptor, OpBuilder *b) {
   if (!adaptor.getOperand().getType().isa<ComplexType>())
     return b->create<arith::ConstantOp>(
         loc, b->getZeroAttr(adaptor.getOperand().getType()));
@@ -667,7 +667,7 @@
 inline Value mapConvertOpToStdScalarOp(Location loc, ArrayRef<Type> targetTypes,
                                        ArrayRef<Type> resultTypes,
                                        ArrayRef<Type> argTypes, ValueRange args,
-                                       OpBuilder* b) {
+                                       OpBuilder *b) {
   assert(targetTypes.size() == 1 && "ConvertOp should return a single result");
   assert(resultTypes.size() == 1 && "ConvertOp should return a single result");
   assert(argTypes.size() == 1 && "ConvertOp should take a single argument");
@@ -807,7 +807,7 @@
 template <>
 inline Value mapStableHloOpToStdScalarOp<stablehlo::BitcastConvertOp>(
     Location loc, ArrayRef<Type> resultTypes, ArrayRef<Type> argTypes,
-    stablehlo::BitcastConvertOp::Adaptor adaptor, OpBuilder* b) {
+    stablehlo::BitcastConvertOp::Adaptor adaptor, OpBuilder *b) {
   Type argType = getElementTypeOrSelf(argTypes.front());
   Type resultType = getElementTypeOrSelf(resultTypes.front());
 
@@ -821,11 +821,11 @@
 template <>
 inline Value mapStableHloOpToStdScalarOp<stablehlo::DotOp>(
     Location loc, ArrayRef<Type> resultTypes, ArrayRef<Type> argTypes,
-    stablehlo::DotOp::Adaptor adaptor, OpBuilder* b) {
+    stablehlo::DotOp::Adaptor adaptor, OpBuilder *b) {
   // Dot Op converter from lhlo to affine only accepts float and integer types.
-  const auto& lhs = adaptor.getOperands()[0];
-  const auto& rhs = adaptor.getOperands()[1];
-  const auto& result = adaptor.getOperands()[2];
+  const auto &lhs = adaptor.getOperands()[0];
+  const auto &rhs = adaptor.getOperands()[1];
+  const auto &result = adaptor.getOperands()[2];
   Type elementType = lhs.getType();
   if (elementType.isa<FloatType>()) {
     Value floatMul =
@@ -848,7 +848,7 @@
 template <>
 inline Value mapStableHloOpToStdScalarOp<stablehlo::IsFiniteOp>(
     Location loc, ArrayRef<Type> /*ResultTypes*/, ArrayRef<Type> /*argTypes*/,
-    stablehlo::IsFiniteOp::Adaptor adaptor, OpBuilder* b) {
+    stablehlo::IsFiniteOp::Adaptor adaptor, OpBuilder *b) {
   if (adaptor.getX().getType().isa<FloatType>()) {
     auto posInf = APFloat::getInf(
         adaptor.getX().getType().cast<FloatType>().getFloatSemantics());
@@ -868,7 +868,7 @@
   static Value map(Location /*loc*/,
                    stablehlo::ComparisonDirection /*comparison_direction*/,
                    ArrayRef<Type> /*ResultTypes*/, ArrayRef<Type> /*argTypes*/,
-                   ValueRange /*args*/, OpBuilder* /*b*/) {
+                   ValueRange /*args*/, OpBuilder * /*b*/) {
     return nullptr;
   }
 };
@@ -882,7 +882,7 @@
   static Value map(Location loc,
                    stablehlo::ComparisonDirection comparisonDirection,
                    ArrayRef<Type> resultTypes, ArrayRef<Type> argTypes,
-                   ValueRange args, OpBuilder* b) {
+                   ValueRange args, OpBuilder *b) {
     Type elementType = getElementTypeOrSelf(argTypes.front());
     if (elementType.isa<SupportedType>()) {
       auto predicate = getCmpPredicate<Predicate>(
@@ -900,7 +900,7 @@
 template <>
 inline Value mapStableHloOpToStdScalarOp<stablehlo::ClampOp>(
     Location loc, ArrayRef<Type> resultTypes, ArrayRef<Type> argTypes,
-    stablehlo::ClampOp::Adaptor op, OpBuilder* b) {
+    stablehlo::ClampOp::Adaptor op, OpBuilder *b) {
   // clamp(lb, x, ub) = min(max(lb, x), ub)
   Value maxLbX = mapStableHloOpToStdScalarOp<stablehlo::MaxOp>(
       loc, resultTypes, argTypes, ValueRange{op.getMin(), op.getOperand()}, b);
@@ -909,13 +909,13 @@
 }
 
 template <typename U, typename S>
-inline Value makeSafeIntDiv(ImplicitLocOpBuilder& lb, Type originalType,
+inline Value makeSafeIntDiv(ImplicitLocOpBuilder &lb, Type originalType,
                             Value lhs, Value rhs, Value returnedOnZero,
                             Value returnedOnSignedOverflow) {
   Type type = lhs.getType();
   auto elementType = getElementTypeOrSelf(type).cast<IntegerType>();
   Value zero = lb.create<arith::ConstantOp>(lb.getZeroAttr(type));
-  auto makeConstant = [&](const APInt& i) {
+  auto makeConstant = [&](const APInt &i) {
     return getConstantOrSplat(&lb, lb.getLoc(), type,
                               lb.getIntegerAttr(elementType, i));
   };
@@ -949,7 +949,7 @@
 template <>
 inline Value mapStableHloOpToStdScalarOp<stablehlo::DivOp>(
     Location loc, ArrayRef<Type> resultTypes, ArrayRef<Type> argTypes,
-    stablehlo::DivOp::Adaptor adaptor, OpBuilder* b) {
+    stablehlo::DivOp::Adaptor adaptor, OpBuilder *b) {
   Type originalType = getElementTypeOrSelf(argTypes.front());
   if (originalType.isa<ComplexType, FloatType>()) {
     return MapStableHloOpToScalarOpImpl<IsFloatType, arith::DivFOp,
@@ -964,7 +964,7 @@
   ImplicitLocOpBuilder lb(loc, *b);
   Type type = adaptor.getLhs().getType();
   auto elementType = getElementTypeOrSelf(type).cast<IntegerType>();
-  auto makeConstant = [&](const APInt& i) {
+  auto makeConstant = [&](const APInt &i) {
     return getConstantOrSplat(&lb, lb.getLoc(), type,
                               lb.getIntegerAttr(elementType, i));
   };
@@ -979,7 +979,7 @@
 template <>
 inline Value mapStableHloOpToStdScalarOp<stablehlo::RemOp>(
     Location loc, ArrayRef<Type> resultTypes, ArrayRef<Type> argTypes,
-    stablehlo::RemOp::Adaptor adaptor, OpBuilder* b) {
+    stablehlo::RemOp::Adaptor adaptor, OpBuilder *b) {
   Type originalType = getElementTypeOrSelf(argTypes.front());
   if (originalType.isa<ComplexType, FloatType>()) {
     return MapStableHloOpToScalarOpImpl<IsFloatType, arith::RemFOp>{}(
@@ -1002,7 +1002,7 @@
 template <>
 inline Value mapStableHloOpToStdScalarOp<stablehlo::NegOp>(
     Location loc, ArrayRef<Type> resultTypes, ArrayRef<Type> argTypes,
-    stablehlo::NegOp::Adaptor adaptor, OpBuilder* b) {
+    stablehlo::NegOp::Adaptor adaptor, OpBuilder *b) {
   Type elementType = getElementTypeOrSelf(adaptor.getOperand().getType());
   if (elementType.isa<ComplexType, FloatType>()) {
     return MapStableHloOpToScalarOpImpl<IsFloatType, ::mlir::arith::NegFOp,
@@ -1023,7 +1023,7 @@
 template <>
 inline Value mapStableHloOpToStdScalarOp<stablehlo::NotOp>(
     Location loc, ArrayRef<Type> /*ResultTypes*/, ArrayRef<Type> /*argTypes*/,
-    stablehlo::NotOp::Adaptor adaptor, OpBuilder* b) {
+    stablehlo::NotOp::Adaptor adaptor, OpBuilder *b) {
   Type elementType = getElementTypeOrSelf(adaptor.getOperand().getType());
   if (auto integerType = elementType.dyn_cast<IntegerType>()) {
     // lmhlo.not(x) -> x ^ -1
@@ -1039,7 +1039,7 @@
 template <>
 inline Value mapStableHloOpToStdScalarOp<stablehlo::LogisticOp>(
     Location loc, ArrayRef<Type> resultTypes, ArrayRef<Type> /*argTypes*/,
-    stablehlo::LogisticOp::Adaptor adaptor, OpBuilder* b) {
+    stablehlo::LogisticOp::Adaptor adaptor, OpBuilder *b) {
   // 1.0 / (1.0 - exp(-x))
   Value negX = mapStableHloOpToStdScalarOp<stablehlo::NegOp>(
       loc, resultTypes, resultTypes, {adaptor.getOperand()}, b);
@@ -1058,7 +1058,7 @@
 template <>
 inline Value mapStableHloOpToStdScalarOp<stablehlo::PowOp>(
     Location loc, ArrayRef<Type> resultTypes, ArrayRef<Type> argTypes,
-    stablehlo::PowOp::Adaptor adaptor, OpBuilder* b) {
+    stablehlo::PowOp::Adaptor adaptor, OpBuilder *b) {
   auto lb = ImplicitLocOpBuilder(loc, *b);
   // Floating point can use std::powf
   auto resultType = resultTypes.front();
@@ -1088,7 +1088,7 @@
       lb.create<scf::ForOp>(
             lowerBound, upperBound, step,
             SmallVector<Value>({one, originalBase, originalExponent}),
-            [&](OpBuilder& b, Location, Value /*v*/, ValueRange iters) {
+            [&](OpBuilder &b, Location, Value /*v*/, ValueRange iters) {
               Value accum = iters[0];
               Value base = iters[1];
               Value exponent = iters[2];
@@ -1137,7 +1137,7 @@
 template <>
 inline Value mapStableHloOpToStdScalarOp<stablehlo::SelectOp>(
     Location loc, ArrayRef<Type> resultTypes, ArrayRef<Type> argTypes,
-    stablehlo::SelectOp::Adaptor adaptor, OpBuilder* b) {
+    stablehlo::SelectOp::Adaptor adaptor, OpBuilder *b) {
   return MapStableHloOpToScalarOpImpl<::mlir::arith::SelectOp>{}(
       loc, resultTypes, argTypes, adaptor.getOperands(), b);
 }
@@ -1145,7 +1145,7 @@
 template <>
 inline Value mapStableHloOpToStdScalarOp<stablehlo::SignOp>(
     Location loc, ArrayRef<Type> resultTypes, ArrayRef<Type> /*argTypes*/,
-    stablehlo::SignOp::Adaptor adaptor, OpBuilder* b) {
+    stablehlo::SignOp::Adaptor adaptor, OpBuilder *b) {
   Value operand = adaptor.getOperand();
   Type elementType = getElementTypeOrSelf(operand.getType());
   if (auto floatType = elementType.dyn_cast<FloatType>()) {
@@ -1185,7 +1185,7 @@
 
 /// Construct operations to select the saturated value if the shift amount is
 /// greater than the bitwidth of the type.
-inline Value selectShiftedOrSaturated(ImplicitLocOpBuilder& lb, Value rhs,
+inline Value selectShiftedOrSaturated(ImplicitLocOpBuilder &lb, Value rhs,
                                       Value shifted, Value saturated,
                                       Type type) {
   Type etype =
@@ -1201,7 +1201,7 @@
 template <>
 inline Value mapStableHloOpToStdScalarOp<stablehlo::ShiftLeftOp>(
     Location loc, ArrayRef<Type> /*ResultTypes*/, ArrayRef<Type> /*argTypes*/,
-    stablehlo::ShiftLeftOp::Adaptor adaptor, OpBuilder* b) {
+    stablehlo::ShiftLeftOp::Adaptor adaptor, OpBuilder *b) {
   ImplicitLocOpBuilder lb(loc, *b);
   Value lhs = adaptor.getLhs();
   Value rhs = adaptor.getRhs();
@@ -1217,7 +1217,7 @@
 template <>
 inline Value mapStableHloOpToStdScalarOp<stablehlo::ShiftRightLogicalOp>(
     Location loc, ArrayRef<Type> /*ResultTypes*/, ArrayRef<Type> /*argTypes*/,
-    stablehlo::ShiftRightLogicalOp::Adaptor adaptor, OpBuilder* b) {
+    stablehlo::ShiftRightLogicalOp::Adaptor adaptor, OpBuilder *b) {
   ImplicitLocOpBuilder lb(loc, *b);
   Value lhs = adaptor.getLhs();
   Value rhs = adaptor.getRhs();
@@ -1233,7 +1233,7 @@
 template <>
 inline Value mapStableHloOpToStdScalarOp<stablehlo::ShiftRightArithmeticOp>(
     Location loc, ArrayRef<Type> /*ResultTypes*/, ArrayRef<Type> /*argTypes*/,
-    stablehlo::ShiftRightArithmeticOp::Adaptor adaptor, OpBuilder* b) {
+    stablehlo::ShiftRightArithmeticOp::Adaptor adaptor, OpBuilder *b) {
   ImplicitLocOpBuilder lb(loc, *b);
   Value lhs = adaptor.getLhs();
   Value rhs = adaptor.getRhs();
@@ -1250,13 +1250,13 @@
 
   return selectShiftedOrSaturated(lb, rhs, shifted, saturatedShifted, type);
 }
-}  // namespace impl
+} // namespace impl
 
 struct StableHloOpToStdScalarOp {
   // Converts stablehlo 'op' to linalg and arith ops.
   template <typename StableHloOpTy>
   static Value mapOp(StableHloOpTy op, ArrayRef<Type> resultTypes,
-                     ValueRange args, OpBuilder* b) {
+                     ValueRange args, OpBuilder *b) {
     auto argTypes = llvm::to_vector(op->getOperandTypes());
     return mapOpWithArgTypes(op, resultTypes, argTypes, args, b);
   }
@@ -1266,7 +1266,7 @@
   template <typename StableHloOpTy>
   static Value mapOpWithArgTypes(StableHloOpTy op, ArrayRef<Type> resultTypes,
                                  ArrayRef<Type> argTypes, ValueRange args,
-                                 OpBuilder* b) {
+                                 OpBuilder *b) {
     static_assert(!std::is_same<StableHloOpTy, stablehlo::ConvertOp>::value);
     return mapOpOfType<StableHloOpTy>(
         op.getLoc(), resultTypes, argTypes,
@@ -1276,17 +1276,16 @@
   static Value mapOpWithArgTypes(stablehlo::ConvertOp op,
                                  ArrayRef<Type> resultTypes,
                                  ArrayRef<Type> argTypes, ValueRange args,
-                                 OpBuilder* b) {
+                                 OpBuilder *b) {
     return impl::mapConvertOpToStdScalarOp(op.getLoc(), op.getType(),
                                            resultTypes, argTypes, args, b);
   }
 
   // Converts stablehlo 'op' to linalg and arith ops.
   template <typename StableHloOpTy>
-  static Value mapOpOfType(Location loc, ArrayRef<Type> resultTypes,
-                           ArrayRef<Type> argTypes,
-                           typename StableHloOpTy::Adaptor adaptor,
-                           OpBuilder* b) {
+  static Value
+  mapOpOfType(Location loc, ArrayRef<Type> resultTypes, ArrayRef<Type> argTypes,
+              typename StableHloOpTy::Adaptor adaptor, OpBuilder *b) {
     if (std::is_same<StableHloOpTy, stablehlo::ConvertOp>::value) {
       // Note: this assumes that the caller is passing result/arg types with
       // appropriate signedness.
@@ -1298,7 +1297,7 @@
   }
 };
 
-}  // namespace stablehlo
-}  // namespace mlir
+} // namespace stablehlo
+} // namespace mlir
 
-#endif  // IREE_COMPILER_INPUTCONVERSION_STABLEHLO_MAP_STABLEHLO_TO_SCALAR_OP_H
+#endif // IREE_COMPILER_INPUTCONVERSION_STABLEHLO_MAP_STABLEHLO_TO_SCALAR_OP_H

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/PassDetail.h b/compiler/src/iree/compiler/InputConversion/StableHLO/PassDetail.h
index 26e9cf31..6027d09 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/PassDetail.h
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/PassDetail.h

@@ -16,6 +16,6 @@
 #define GEN_PASS_DECL
 #include "iree/compiler/InputConversion/StableHLO/Passes.h.inc"
 
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace mlir::iree_compiler::stablehlo
 
-#endif  // IREE_COMPILER_INPUTCONVERSION_STABLEHLO_PASSDETAIL_H_
+#endif // IREE_COMPILER_INPUTCONVERSION_STABLEHLO_PASSDETAIL_H_

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/Passes.cpp b/compiler/src/iree/compiler/InputConversion/StableHLO/Passes.cpp
index 6eef697..4f6c2a3 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/Passes.cpp
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/Passes.cpp

@@ -22,8 +22,8 @@
 namespace mlir::iree_compiler::stablehlo {
 namespace {
 #define GEN_PASS_REGISTRATION
-#include "iree/compiler/InputConversion/StableHLO/Passes.h.inc"  // IWYU pragma: export
-}  // namespace
+#include "iree/compiler/InputConversion/StableHLO/Passes.h.inc" // IWYU pragma: export
+} // namespace
 
 namespace {
 
@@ -31,14 +31,14 @@
   PassPipelineRegistration<StableHloOptions> stablehlo(
       "iree-stablehlo-input-transformation-pipeline",
       "Runs the StableHLO IREE flow dialect transformation pipeline",
-      [](OpPassManager& passManager, const StableHloOptions& options) {
+      [](OpPassManager &passManager, const StableHloOptions &options) {
         buildStableHLOInputConversionPassPipeline(passManager, options);
       });
 }
 
 // Prepare HLO for use as an input to the Flow dialect.
 void buildStableHLOInputConversionPassPipelineImpl(
-    OpPassManager& passManager, const StableHloOptions& options, bool detuple) {
+    OpPassManager &passManager, const StableHloOptions &options, bool detuple) {
   passManager.addNestedPass<func::FuncOp>(mlir::createCanonicalizerPass());
   passManager.addNestedPass<func::FuncOp>(createStableHLOCanonicalize());
   passManager.addNestedPass<func::FuncOp>(mlir::createCSEPass());
@@ -49,7 +49,8 @@
   // In the future it would be nice if we could have all of flow be both scf
   // and cfg compatible.
   passManager.addNestedPass<func::FuncOp>(createTopLevelSCFToCFGPass());
-  if (detuple) passManager.addPass(createFlattenTuplesInCFG());
+  if (detuple)
+    passManager.addPass(createFlattenTuplesInCFG());
 
   passManager.addPass(createStableHLOToStableHLOPreprocessing());
   passManager.addNestedPass<func::FuncOp>(createStableHLOCanonicalize());
@@ -104,16 +105,16 @@
 
   passManager.addPass(stablehlo::createVerifyCompilerStableHloInputLegality());
 }
-}  // namespace
+} // namespace
 
 void buildStableHLOInputConversionPassPipeline(
-    OpPassManager& passManager, const StableHloOptions& options) {
+    OpPassManager &passManager, const StableHloOptions &options) {
   buildStableHLOInputConversionPassPipelineImpl(passManager, options,
                                                 /*detuple=*/false);
 }
 
 void buildStableHLOXLAInputConversionPassPipeline(
-    OpPassManager& passManager, const StableHloOptions& options) {
+    OpPassManager &passManager, const StableHloOptions &options) {
   buildStableHLOInputConversionPassPipelineImpl(passManager, options,
                                                 /*detuple=*/true);
 }
@@ -126,4 +127,4 @@
   registerStableHLOConversionPassPipeline();
 }
 
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace mlir::iree_compiler::stablehlo

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/Passes.h b/compiler/src/iree/compiler/InputConversion/StableHLO/Passes.h
index be8af77..c8fe79f 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/Passes.h
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/Passes.h

@@ -26,13 +26,13 @@
 // Pipelines
 //===----------------------------------------------------------------------===//
 
-void buildStableHLOInputConversionPassPipeline(OpPassManager& passManager,
-                                               const StableHloOptions& options);
+void buildStableHLOInputConversionPassPipeline(OpPassManager &passManager,
+                                               const StableHloOptions &options);
 
 // Performs input legalization on programs that may have originated from an XLA
 // import (or made to interop with it).
 void buildStableHLOXLAInputConversionPassPipeline(
-    OpPassManager& passManager, const StableHloOptions& options);
+    OpPassManager &passManager, const StableHloOptions &options);
 
 //===----------------------------------------------------------------------===//
 // Register all Passes
@@ -40,6 +40,6 @@
 
 void registerStableHLOConversionPasses();
 
-}  // namespace iree_compiler::stablehlo
-}  // namespace mlir
-#endif  // IREE_COMPILER_INPUTCONVERSION_STABLEHLO_PASSES_H_
+} // namespace iree_compiler::stablehlo
+} // namespace mlir
+#endif // IREE_COMPILER_INPUTCONVERSION_STABLEHLO_PASSES_H_

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/Canonicalization.cpp b/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/Canonicalization.cpp
index 7c42d88..6d84aad 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/Canonicalization.cpp
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/Canonicalization.cpp

@@ -42,7 +42,8 @@
 
 static bool isIotaRange(ElementsAttr attr) {
   auto elems = attr.tryGetValues<APInt>();
-  if (!elems) return false;
+  if (!elems)
+    return false;
 
   for (auto [idx, value] : llvm::enumerate(*elems)) {
     if (idx != value) {
@@ -104,7 +105,8 @@
   LogicalResult matchAndRewrite(mlir::stablehlo::AddOp op,
                                 PatternRewriter &rewriter) const override {
     auto type = dyn_cast<RankedTensorType>(op.getType());
-    if (!type) return failure();
+    if (!type)
+      return failure();
 
     Value lhs = op.getLhs();
     Value rhs = op.getRhs();
@@ -151,7 +153,8 @@
   LogicalResult matchAndRewrite(mlir::stablehlo::SubtractOp op,
                                 PatternRewriter &rewriter) const override {
     auto type = dyn_cast<RankedTensorType>(op.getType());
-    if (!type) return failure();
+    if (!type)
+      return failure();
 
     Value lhs = op.getLhs();
     Value rhs = op.getRhs();
@@ -192,7 +195,8 @@
   LogicalResult matchAndRewrite(mlir::stablehlo::MulOp op,
                                 PatternRewriter &rewriter) const override {
     auto type = dyn_cast<RankedTensorType>(op.getType());
-    if (!type) return failure();
+    if (!type)
+      return failure();
 
     Value lhs = op.getLhs();
     Value rhs = op.getRhs();
@@ -240,23 +244,23 @@
   }
 };
 
-static mlir::stablehlo::ComparisonDirection invertDirection(
-    mlir::stablehlo::ComparisonDirection direction) {
+static mlir::stablehlo::ComparisonDirection
+invertDirection(mlir::stablehlo::ComparisonDirection direction) {
   using mlir::stablehlo::ComparisonDirection;
 
   switch (direction) {
-    case ComparisonDirection::EQ:
-      return ComparisonDirection::EQ;
-    case ComparisonDirection::GE:
-      return ComparisonDirection::LE;
-    case ComparisonDirection::LE:
-      return ComparisonDirection::GE;
-    case ComparisonDirection::GT:
-      return ComparisonDirection::LT;
-    case ComparisonDirection::LT:
-      return ComparisonDirection::GT;
-    case ComparisonDirection::NE:
-      return ComparisonDirection::NE;
+  case ComparisonDirection::EQ:
+    return ComparisonDirection::EQ;
+  case ComparisonDirection::GE:
+    return ComparisonDirection::LE;
+  case ComparisonDirection::LE:
+    return ComparisonDirection::GE;
+  case ComparisonDirection::GT:
+    return ComparisonDirection::LT;
+  case ComparisonDirection::LT:
+    return ComparisonDirection::GT;
+  case ComparisonDirection::NE:
+    return ComparisonDirection::NE;
   }
 
   llvm_unreachable("Unhandled case");
@@ -278,35 +282,35 @@
   // Signed comparison.
   if (kind == ComparisonType::SIGNED) {
     switch (direction) {
-      case ComparisonDirection::EQ:
-        return asBit(lhs == rhs);
-      case ComparisonDirection::GE:
-        return asBit(lhs.sge(rhs));
-      case ComparisonDirection::GT:
-        return asBit(lhs.sgt(rhs));
-      case ComparisonDirection::LE:
-        return asBit(lhs.sle(rhs));
-      case ComparisonDirection::LT:
-        return asBit(lhs.slt(rhs));
-      case ComparisonDirection::NE:
-        return asBit(lhs != rhs);
+    case ComparisonDirection::EQ:
+      return asBit(lhs == rhs);
+    case ComparisonDirection::GE:
+      return asBit(lhs.sge(rhs));
+    case ComparisonDirection::GT:
+      return asBit(lhs.sgt(rhs));
+    case ComparisonDirection::LE:
+      return asBit(lhs.sle(rhs));
+    case ComparisonDirection::LT:
+      return asBit(lhs.slt(rhs));
+    case ComparisonDirection::NE:
+      return asBit(lhs != rhs);
     }
   }
 
   // Unsigned comparison.
   switch (direction) {
-    case ComparisonDirection::EQ:
-      return asBit(lhs == rhs);
-    case ComparisonDirection::GE:
-      return asBit(lhs.uge(rhs));
-    case ComparisonDirection::GT:
-      return asBit(lhs.ugt(rhs));
-    case ComparisonDirection::LE:
-      return asBit(lhs.ule(rhs));
-    case ComparisonDirection::LT:
-      return asBit(lhs.ult(rhs));
-    case ComparisonDirection::NE:
-      return asBit(lhs != rhs);
+  case ComparisonDirection::EQ:
+    return asBit(lhs == rhs);
+  case ComparisonDirection::GE:
+    return asBit(lhs.uge(rhs));
+  case ComparisonDirection::GT:
+    return asBit(lhs.ugt(rhs));
+  case ComparisonDirection::LE:
+    return asBit(lhs.ule(rhs));
+  case ComparisonDirection::LT:
+    return asBit(lhs.ult(rhs));
+  case ComparisonDirection::NE:
+    return asBit(lhs != rhs);
   }
 
   llvm_unreachable("Unhandled case");
@@ -318,7 +322,8 @@
   LogicalResult matchAndRewrite(mlir::stablehlo::CompareOp op,
                                 PatternRewriter &rewriter) const override {
     auto type = dyn_cast<RankedTensorType>(op.getType());
-    if (!type) return failure();
+    if (!type)
+      return failure();
 
     // Bail out on non-integer comparison.
     // TODO: Support more comparison types.
@@ -337,20 +342,20 @@
 
     if (lhs == rhs) {
       switch (direction) {
-        case ComparisonDirection::EQ:
-        case ComparisonDirection::GE:
-        case ComparisonDirection::LE: {
-          rewriter.replaceOpWithNewOp<mlir::stablehlo::ConstantOp>(
-              op, SplatElementsAttr::get(type, rewriter.getBoolAttr(true)));
-          return success();
-        }
-        case ComparisonDirection::GT:
-        case ComparisonDirection::LT:
-        case ComparisonDirection::NE: {
-          rewriter.replaceOpWithNewOp<mlir::stablehlo::ConstantOp>(
-              op, rewriter.getZeroAttr(type));
-          return success();
-        }
+      case ComparisonDirection::EQ:
+      case ComparisonDirection::GE:
+      case ComparisonDirection::LE: {
+        rewriter.replaceOpWithNewOp<mlir::stablehlo::ConstantOp>(
+            op, SplatElementsAttr::get(type, rewriter.getBoolAttr(true)));
+        return success();
+      }
+      case ComparisonDirection::GT:
+      case ComparisonDirection::LT:
+      case ComparisonDirection::NE: {
+        rewriter.replaceOpWithNewOp<mlir::stablehlo::ConstantOp>(
+            op, rewriter.getZeroAttr(type));
+        return success();
+      }
       }
       llvm_unreachable("Unhandled case");
     }
@@ -391,7 +396,8 @@
   LogicalResult matchAndRewrite(mlir::stablehlo::SelectOp op,
                                 PatternRewriter &rewriter) const override {
     auto type = dyn_cast<RankedTensorType>(op.getType());
-    if (!type) return failure();
+    if (!type)
+      return failure();
 
     Value trueVal = op.getOnTrue();
     Value falseVal = op.getOnFalse();
@@ -417,13 +423,16 @@
 
     // Handle elementwise selection when both outcomes are also constants. This
     // will create a new, likely non-splat constant.
-    if (cond.getNumElements() > kFoldOpEltLimit) return failure();
+    if (cond.getNumElements() > kFoldOpEltLimit)
+      return failure();
 
     DenseElementsAttr trueAttr;
-    if (!matchPattern(trueVal, m_Constant(&trueAttr))) return failure();
+    if (!matchPattern(trueVal, m_Constant(&trueAttr)))
+      return failure();
 
     DenseElementsAttr falseAttr;
-    if (!matchPattern(falseVal, m_Constant(&falseAttr))) return failure();
+    if (!matchPattern(falseVal, m_Constant(&falseAttr)))
+      return failure();
 
     SmallVector<Attribute> newValues;
     newValues.reserve(cond.getNumElements());
@@ -446,11 +455,13 @@
   LogicalResult matchAndRewrite(mlir::stablehlo::BroadcastInDimOp op,
                                 PatternRewriter &rewriter) const override {
     auto type = dyn_cast<RankedTensorType>(op.getType());
-    if (!type) return failure();
+    if (!type)
+      return failure();
 
     Value operand = op.getOperand();
     auto operandTy = dyn_cast<RankedTensorType>(operand.getType());
-    if (!operandTy) return failure();
+    if (!operandTy)
+      return failure();
 
     // Fold when broadcast is a noop.
     DenseIntElementsAttr dims = op.getBroadcastDimensions();
@@ -511,10 +522,12 @@
   LogicalResult matchAndRewrite(mlir::stablehlo::ConcatenateOp op,
                                 PatternRewriter &rewriter) const override {
     auto type = dyn_cast<RankedTensorType>(op.getType());
-    if (!type || !type.hasStaticShape()) return failure();
+    if (!type || !type.hasStaticShape())
+      return failure();
 
     size_t numElems = type.getNumElements();
-    if (numElems > kFoldOpEltLimit) return failure();
+    if (numElems > kFoldOpEltLimit)
+      return failure();
 
     // Fold concatenate when all inputs are constants.
     OperandRange inputs = op.getInputs();
@@ -554,7 +567,8 @@
   LogicalResult matchAndRewrite(mlir::stablehlo::ConvertOp op,
                                 PatternRewriter &rewriter) const override {
     // Check if this convert is a noop.
-    if (op.getOperand().getType() != op.getType()) return failure();
+    if (op.getOperand().getType() != op.getType())
+      return failure();
 
     rewriter.replaceOp(op, op.getOperand());
     return success();
@@ -647,7 +661,8 @@
     auto precedingBcast =
         bcast.getOperand()
             .getDefiningOp<mlir::stablehlo::DynamicBroadcastInDimOp>();
-    if (!precedingBcast) return failure();
+    if (!precedingBcast)
+      return failure();
 
     // Compose broadcast dimensions.
     DenseIntElementsAttr precedingBcastDims =
@@ -737,7 +752,8 @@
                                          "unranked input unsupported");
     }
 
-    if (!llvm::is_contained(elemTy.getShape(), 0)) return failure();
+    if (!llvm::is_contained(elemTy.getShape(), 0))
+      return failure();
 
     Location loc = op.getLoc();
     DenseIntElementsAttr empty = rewriter.getI64TensorAttr({});
@@ -776,7 +792,8 @@
                                 PatternRewriter &rewriter) const override {
     // This is a noop when the output type is already a static shape.
     auto type = dyn_cast<RankedTensorType>(op.getType());
-    if (!type || !type.hasStaticShape()) return failure();
+    if (!type || !type.hasStaticShape())
+      return failure();
 
     rewriter.replaceOpWithNewOp<mlir::stablehlo::ReshapeOp>(op, type,
                                                             op.getOperand());
@@ -792,7 +809,8 @@
                                 PatternRewriter &rewriter) const override {
     auto constructor =
         op.getOperand().getDefiningOp<mlir::stablehlo::TupleOp>();
-    if (!constructor) return failure();
+    if (!constructor)
+      return failure();
 
     Value result = constructor.getOperand(op.getIndex());
     rewriter.replaceOp(op, result);
@@ -806,7 +824,8 @@
   LogicalResult matchAndRewrite(mlir::stablehlo::RealOp op,
                                 PatternRewriter &rewriter) const override {
     auto complex = op.getOperand().getDefiningOp<mlir::stablehlo::ComplexOp>();
-    if (!complex) return failure();
+    if (!complex)
+      return failure();
 
     rewriter.replaceOp(op, complex.getLhs());
     return success();
@@ -819,7 +838,8 @@
   LogicalResult matchAndRewrite(mlir::stablehlo::ImagOp op,
                                 PatternRewriter &rewriter) const override {
     auto complex = op.getOperand().getDefiningOp<mlir::stablehlo::ComplexOp>();
-    if (!complex) return failure();
+    if (!complex)
+      return failure();
 
     rewriter.replaceOp(op, complex.getRhs());
     return success();
@@ -834,10 +854,12 @@
                                 PatternRewriter &rewriter) const override {
     // Fold get_dimension_size when the queried dim is statically known.
     auto tensorTy = dyn_cast<RankedTensorType>(op.getOperand().getType());
-    if (!tensorTy) return failure();
+    if (!tensorTy)
+      return failure();
 
     int64_t dimSize = tensorTy.getDimSize(op.getDimension());
-    if (dimSize < 0) return failure();
+    if (dimSize < 0)
+      return failure();
 
     auto elemTy = cast<IntegerType>(op.getType().getElementType());
     IntegerAttr elemVal = rewriter.getIntegerAttr(elemTy, dimSize);
@@ -874,7 +896,8 @@
 
     auto operandType =
         dyn_cast<RankedTensorType>(gather->getOperand(0).getType());
-    if (!operandType || !operandType.hasStaticShape()) return failure();
+    if (!operandType || !operandType.hasStaticShape())
+      return failure();
 
     auto sliceEnd =
         llvm::to_vector(gather.getSliceSizes().getValues<int64_t>());
@@ -961,7 +984,8 @@
   LogicalResult matchAndRewrite(mlir::stablehlo::TransposeOp op,
                                 PatternRewriter &rewriter) const override {
     // Check if this transpose is a noop and use the operand instead.
-    if (!isIotaRange(op.getPermutation())) return failure();
+    if (!isIotaRange(op.getPermutation()))
+      return failure();
 
     rewriter.replaceOp(op, op.getOperand());
     return success();
@@ -981,7 +1005,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 void populateCanonicalizationPatterns(MLIRContext *context,
                                       RewritePatternSet *patterns,
                                       PatternBenefit benefit) {
@@ -1002,4 +1026,4 @@
       ConcatenateOpCanon, ConvertOpCanon, DynamicReshapeOpCanon, GatherOpCanon,
       ReshapeOpCanon, TransposeOpCanon>(context, benefit);
 }
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace mlir::iree_compiler::stablehlo

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/DotGeneralToDot.cpp b/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/DotGeneralToDot.cpp
index 37ef374..3459297 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/DotGeneralToDot.cpp
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/DotGeneralToDot.cpp

@@ -69,7 +69,8 @@
   auto transposeType = RankedTensorType::get(transposedShape, elementType);
   Value transposeResult = rewriter.create<mlir::stablehlo::TransposeOp>(
       loc, transposeType, arg, transposePermutationAttr);
-  if (noReshape) return transposeResult;
+  if (noReshape)
+    return transposeResult;
 
   // Return the final result.
   auto reshapedType = RankedTensorType::get({leftSize, rightSize}, elementType);
@@ -168,7 +169,8 @@
 
     ArrayAttr precisionConfig;
     auto opPrecisionConfig = op.getPrecisionConfig();
-    if (opPrecisionConfig.has_value()) precisionConfig = *opPrecisionConfig;
+    if (opPrecisionConfig.has_value())
+      precisionConfig = *opPrecisionConfig;
 
     auto resultTy = cast<ShapedType>(op.getType());
 
@@ -182,7 +184,8 @@
 
     RankedTensorType lhsTy = dyn_cast<RankedTensorType>(lhs.getType());
     RankedTensorType rhsTy = dyn_cast<RankedTensorType>(rhs.getType());
-    if (!lhsTy || !rhsTy) return failure();
+    if (!lhsTy || !rhsTy)
+      return failure();
 
     // The StableHLO dot operator directly supports a vector dot product
     // (two vectors reduce into a scalar) as well as a matrix vector
@@ -230,7 +233,8 @@
     // For any sparse situation, don't use any of the following rules, since
     // transposing and reshaping is not without cost. Instead, rely on the
     // default linalg lowering that follows later in the pipeline.
-    if (sparse_tensor::hasAnySparseOperandOrResult(op)) return failure();
+    if (sparse_tensor::hasAnySparseOperandOrResult(op))
+      return failure();
 
     // Compute the, possibly, transposed-reshaped operands.
     lhs = cast<mlir::TypedValue<mlir::TensorType>>(processDotArg(
@@ -241,7 +245,8 @@
     // Accept only static shaped types.
     auto lhsShapeType = dyn_cast_or_null<ShapedType>(lhs.getType());
     auto rhsShapeType = dyn_cast_or_null<ShapedType>(rhs.getType());
-    if (!lhsShapeType || !rhsShapeType) return failure();
+    if (!lhsShapeType || !rhsShapeType)
+      return failure();
 
     // Generate new dot operator on expanded types.
     ShapedType newTy = RankedTensorType::get(
@@ -322,11 +327,11 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void populatePreprocessingDotGeneralToDotPatterns(mlir::MLIRContext *context,
                                                   RewritePatternSet *patterns) {
   patterns->add<GeneralDotConvert>(context);
 }
 
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace mlir::iree_compiler::stablehlo

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/EinsumToDotGeneral.cpp b/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/EinsumToDotGeneral.cpp
index be3e784..e71e73f 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/EinsumToDotGeneral.cpp
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/EinsumToDotGeneral.cpp

@@ -167,11 +167,11 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void populatePreprocessingEinsumToDotGeneralPatterns(
     mlir::MLIRContext *context, RewritePatternSet *patterns) {
   patterns->add<EinsumToDotGeneralPattern>(context);
 }
 
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace mlir::iree_compiler::stablehlo

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/FlattenTuplesInCFG.cpp b/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/FlattenTuplesInCFG.cpp
index 0baad64..2412ac3 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/FlattenTuplesInCFG.cpp
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/FlattenTuplesInCFG.cpp

@@ -101,10 +101,9 @@
 }
 
 template <typename T>
-LogicalResult untupleAndLookupValues(T values,
-                                     llvm::SmallVectorImpl<Value> &newValues,
-                                     OpBuilder &builder, Location loc,
-                                     IRMapping &mapping) {
+LogicalResult
+untupleAndLookupValues(T values, llvm::SmallVectorImpl<Value> &newValues,
+                       OpBuilder &builder, Location loc, IRMapping &mapping) {
   for (auto operand : values) {
     auto newValue = mapping.lookupOrNull(operand);
     if (!newValue) {
@@ -339,5 +338,5 @@
   }
 };
 
-}  // namespace
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace
+} // namespace mlir::iree_compiler::stablehlo

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/GatherToTorchIndexSelect.cpp b/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/GatherToTorchIndexSelect.cpp
index 5097799..528f6dc 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/GatherToTorchIndexSelect.cpp
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/GatherToTorchIndexSelect.cpp

@@ -135,11 +135,11 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void populatePreprocessingGatherToTorchIndexSelectPatterns(
     mlir::MLIRContext *context, RewritePatternSet *patterns) {
   patterns->add<GatherIsTorchIndexSelectPattern>(context);
 }
 
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace mlir::iree_compiler::stablehlo

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/LowerComplex.cpp b/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/LowerComplex.cpp
index c649f0b..c4d6ddf 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/LowerComplex.cpp
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/LowerComplex.cpp

@@ -111,15 +111,15 @@
 ElementsAttr getSplat(Builder *b, Value val, T constant) {
   return getSplat(b, cast<RankedTensorType>(val.getType()), constant);
 }
-}  // end anonymous namespace
+} // end anonymous namespace
 
 namespace {
 #include "iree/compiler/InputConversion/StableHLO/Preprocessing/ComplexLoweringPatterns.h.inc"
-}  // end anonymous namespace
+} // end anonymous namespace
 
 void populatePreprocessingComplexPatterns(MLIRContext *context,
                                           RewritePatternSet *patterns) {
   patterns->add<ConvertComplexDot>(context);
   populateWithGenerated(*patterns);
 }
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace mlir::iree_compiler::stablehlo

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/Passes.cpp b/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/Passes.cpp
index 5cc8b90..2633e60 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/Passes.cpp
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/Passes.cpp

@@ -9,12 +9,12 @@
 namespace mlir::iree_compiler::stablehlo {
 namespace {
 #define GEN_PASS_REGISTRATION
-#include "iree/compiler/InputConversion/StableHLO/Preprocessing/Passes.h.inc"  // IWYU pragma: export
-}  // namespace
+#include "iree/compiler/InputConversion/StableHLO/Preprocessing/Passes.h.inc" // IWYU pragma: export
+} // namespace
 
 void registerStableHLOPreprocessingPasses() {
   // Generated.
   registerPasses();
 }
 
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace mlir::iree_compiler::stablehlo

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/Passes.h b/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/Passes.h
index 43f2370..b5e6a2c 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/Passes.h
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/Passes.h

@@ -22,6 +22,6 @@
 
 void registerStableHLOPreprocessingPasses();
 
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace mlir::iree_compiler::stablehlo
 
-#endif  // IREE_COMPILER_INPUTCONVERSION_STABLEHLO_PREPROCESSING_PASSES_H_
+#endif // IREE_COMPILER_INPUTCONVERSION_STABLEHLO_PREPROCESSING_PASSES_H_

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/Rewriters.h b/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/Rewriters.h
index ceccea1..280e8db 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/Rewriters.h
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/Rewriters.h

@@ -43,6 +43,6 @@
 void populatePreprocessingUnfuseBatchNormPatterns(MLIRContext *context,
                                                   RewritePatternSet *patterns);
 
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace mlir::iree_compiler::stablehlo
 
-#endif  // IREE_COMPILER_INPUTCONVERSION_STABLEHLO_PREPROCESSING_REWRITERS_H_
+#endif // IREE_COMPILER_INPUTCONVERSION_STABLEHLO_PREPROCESSING_REWRITERS_H_

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/StableHLOToStableHLO.cpp b/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/StableHLOToStableHLO.cpp
index edf18f4..79acf47 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/StableHLOToStableHLO.cpp
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/StableHLOToStableHLO.cpp

@@ -33,7 +33,8 @@
 
 bool isIota(ArrayRef<int64_t> array) {
   for (auto [idx, value] : llvm::enumerate(array)) {
-    if (static_cast<int64_t>(idx) != value) return false;
+    if (static_cast<int64_t>(idx) != value)
+      return false;
   }
   return true;
 }
@@ -129,7 +130,8 @@
                                 PatternRewriter &rewriter) const override {
     auto kernel = op.getRhs();
     auto kernelType = cast<ShapedType>(kernel.getType());
-    if (!kernelType.hasRank()) return failure();
+    if (!kernelType.hasRank())
+      return failure();
     auto kernelShape = kernelType.getShape();
 
     auto dimensionNumbers = op.getDimensionNumbers();
@@ -148,7 +150,8 @@
     permutation.push_back(outputFeatureDimension);
 
     // If the permutation is iota, then no transpose is required.
-    if (isIota(permutation)) return failure();
+    if (isIota(permutation))
+      return failure();
 
     llvm::SmallVector<int64_t> transposeShape;
     for (int64_t perm : permutation) {
@@ -258,7 +261,8 @@
 
 bool isConsecutive(ArrayRef<int64_t> array) {
   for (size_t i = 1, e = array.size(); i < e; ++i) {
-    if (array[i] - array[i - 1] != 1) return false;
+    if (array[i] - array[i - 1] != 1)
+      return false;
   }
   return true;
 }
@@ -278,7 +282,8 @@
 
   Value TransposeIfNonConsecutive(OpBuilder &b, Location loc, Value src,
                                   ArrayRef<int64_t> targetOrder) const {
-    if (isConsecutive(targetOrder)) return src;
+    if (isConsecutive(targetOrder))
+      return src;
 
     auto type = cast<RankedTensorType>(src.getType());
     SmallVector<int64_t> transposeShape;
@@ -315,7 +320,8 @@
     auto lhsShapeType = dyn_cast<RankedTensorType>(op.getLhs().getType());
     auto rhsShapeType = dyn_cast<RankedTensorType>(op.getRhs().getType());
     auto resultType = dyn_cast<RankedTensorType>(op.getResult().getType());
-    if (!lhsShapeType || !rhsShapeType || !resultType) return failure();
+    if (!lhsShapeType || !rhsShapeType || !resultType)
+      return failure();
 
     // TODO(jpienaar): This pattern is not safe for dynamic shapes and seems to
     // be (now) redundant with later pass that does handle them. To decouple
@@ -418,7 +424,8 @@
     // batching、lhs parallel、rhs parallel this order is a conversion
     SmallVector<int64_t> newShape = {lhsNewType.getShape()[0],
                                      lhsNewType.getShape()[1]};
-    if (rhsNewType.getRank() > 2) newShape.push_back(rhsNewType.getDimSize(2));
+    if (rhsNewType.getRank() > 2)
+      newShape.push_back(rhsNewType.getDimSize(2));
 
     TensorType newResultType =
         needReshapeResult
@@ -543,7 +550,8 @@
   static Value addUnitBatchDim(Location loc, Value value,
                                PatternRewriter &rewriter) {
     auto valueTy = cast<ShapedType>(value.getType());
-    if (!valueTy.hasRank()) return nullptr;
+    if (!valueTy.hasRank())
+      return nullptr;
 
     // Materialize the implicit indices dim.
     SmallVector<ReassociationExprs> reassociationMap(valueTy.getRank());
@@ -570,7 +578,8 @@
     auto indicesTy = llvm::dyn_cast<RankedTensorType>(indices.getType());
 
     // Check whether indices has no batch dimension.
-    if (!indicesTy) return failure();
+    if (!indicesTy)
+      return failure();
     if (indicesTy.getRank() != 1 || indexVectorDim != 0) {
       return rewriter.notifyMatchFailure(op,
                                          "no implicit batch dimension to add.");
@@ -622,7 +631,8 @@
   static Value collapseBatchDims(Location loc, Value value, int64_t batchCount,
                                  PatternRewriter &rewriter) {
     auto valueTy = dyn_cast<ShapedType>(value.getType());
-    if (!valueTy) return nullptr;
+    if (!valueTy)
+      return nullptr;
 
     SmallVector<ReassociationExprs> reassociationMap(1);
     reassociationMap.reserve(valueTy.getRank() - batchCount + 1);
@@ -732,10 +742,12 @@
       llvm::SmallVector<int64_t> perm;
       perm.reserve(indicesTy.getRank());
       for (int i = 0, s = indicesTy.getRank(); i < s; ++i) {
-        if (i != indexVectorDim) perm.push_back(i);
+        if (i != indexVectorDim)
+          perm.push_back(i);
       }
 
-      if (perm.size() < indicesTy.getRank()) perm.push_back(indexVectorDim);
+      if (perm.size() < indicesTy.getRank())
+        perm.push_back(indexVectorDim);
 
       llvm::SmallVector<int64_t> newShape;
       for (int i = 0, s = perm.size(); i < s; ++i) {
@@ -764,7 +776,8 @@
     llvm::SmallVector<int64_t> updatePerm;
     updatePerm.reserve(updates0Ty.getRank());
     for (int i = 0, s = isBatch.size(); i < s; ++i)
-      if (isBatch[i]) updatePerm.push_back(i);
+      if (isBatch[i])
+        updatePerm.push_back(i);
     updatePerm.append(updatedWindowDims.begin(), updatedWindowDims.end());
 
     llvm::SmallVector<int64_t> newUpdatedWindowDims;
@@ -875,7 +888,8 @@
     int64_t firstNonIndex = 0;
     for (int64_t s = scatterDimsToOperandDims.size(); firstNonIndex < s;
          ++firstNonIndex) {
-      if (!isIndexDim[firstNonIndex]) break;
+      if (!isIndexDim[firstNonIndex])
+        break;
     }
 
     llvm::SmallVector<bool> isInsertDims(operandTy.getRank(), false);
@@ -901,7 +915,8 @@
     reassociationMap.push_back({rewriter.getAffineDimExpr(0)});
 
     for (auto it : llvm::enumerate(llvm::ArrayRef<bool>(toInsertDims))) {
-      if (!it.value()) reassociationMap.push_back({});
+      if (!it.value())
+        reassociationMap.push_back({});
       reassociationMap.back().push_back(
           rewriter.getAffineDimExpr(it.index() + 1));
     }
@@ -951,7 +966,8 @@
 bool isFromBool(Value val) {
   while (true) {
     Operation *op = val.getDefiningOp();
-    if (!op) return false;
+    if (!op)
+      return false;
 
     if (auto convertOp = dyn_cast<mlir::stablehlo::ConvertOp>(op)) {
       auto inTy = llvm::cast<ShapedType>(convertOp.getOperand().getType());
@@ -981,14 +997,17 @@
   LogicalResult matchAndRewrite(mlir::stablehlo::MulOp op,
                                 PatternRewriter &rewriter) const override {
     auto resultTy = cast<ShapedType>(op.getType());
-    if (!isa<FloatType>(resultTy.getElementType())) return failure();
+    if (!isa<FloatType>(resultTy.getElementType()))
+      return failure();
     Value lhs = op.getLhs();
     Value rhs = op.getRhs();
     bool lhsIsBool = isFromBool(lhs);
     bool rhsIsBool = isFromBool(rhs);
 
-    if (lhsIsBool == rhsIsBool) return failure();
-    if (rhsIsBool) std::swap(lhs, rhs);
+    if (lhsIsBool == rhsIsBool)
+      return failure();
+    if (rhsIsBool)
+      std::swap(lhs, rhs);
 
     Type eType = resultTy.getElementType();
     auto lhsTy = cast<ShapedType>(lhs.getType());
@@ -1007,7 +1026,8 @@
       auto valueTy = cast<ShapedType>(value.getType());
       auto newTy =
           RankedTensorType::get(resultTy.getShape(), valueTy.getElementType());
-      if (valueTy == newTy) return value;
+      if (valueTy == newTy)
+        return value;
       auto dimensions = llvm::to_vector(
           llvm::seq<int64_t>(resultRank - valueTy.getRank(), resultRank));
       return rewriter.create<mlir::stablehlo::DynamicBroadcastInDimOp>(
@@ -1036,11 +1056,13 @@
     auto resTy = dyn_cast<RankedTensorType>(op.getType());
     // We can support static shapes, but it's easier to implement Box-Muller
     // transform if we know the number of elements.
-    if (!resTy || !resTy.hasStaticShape()) return failure();
+    if (!resTy || !resTy.hasStaticShape())
+      return failure();
 
     // The algorithm requires even numbers and will generate pairs.
     auto numElems = resTy.getNumElements();
-    if (numElems & 1) numElems++;
+    if (numElems & 1)
+      numElems++;
     auto halfNumElems = numElems / 2;
 
     ImplicitLocOpBuilder b(op.getLoc(), rewriter);
@@ -1237,7 +1259,8 @@
       return rewriter.notifyMatchFailure(op, "lhs and rhs must be rank-2");
     }
 
-    if (lhsTy.getDimSize(1) != 1) return failure();
+    if (lhsTy.getDimSize(1) != 1)
+      return failure();
 
     // Dynamically compute the shape of the result of the DotOp by querying
     // the 0-th dimensions, of the left, and the 1st dimension of the right.
@@ -1339,7 +1362,8 @@
     auto resultTy = dyn_cast<RankedTensorType>(op.getType());
     ImplicitLocOpBuilder builder(op.getLoc(), rewriter);
 
-    if (!lhsTy || !rhsTy || !resultTy) return failure();
+    if (!lhsTy || !rhsTy || !resultTy)
+      return failure();
 
     auto dNums = op.getDotDimensionNumbers();
     auto batchDimsL = dNums.getLhsBatchingDimensions();
@@ -1350,10 +1374,14 @@
     llvm::SmallVector<bool> isLhsParallelDim(lhsTy.getRank(), true);
     llvm::SmallVector<bool> isRhsParallelDim(rhsTy.getRank(), true);
 
-    for (auto dim : batchDimsL) isLhsParallelDim[dim] = false;
-    for (auto dim : batchDimsR) isRhsParallelDim[dim] = false;
-    for (auto dim : contractDimsL) isLhsParallelDim[dim] = false;
-    for (auto dim : contractDimsR) isRhsParallelDim[dim] = false;
+    for (auto dim : batchDimsL)
+      isLhsParallelDim[dim] = false;
+    for (auto dim : batchDimsR)
+      isRhsParallelDim[dim] = false;
+    for (auto dim : contractDimsL)
+      isLhsParallelDim[dim] = false;
+    for (auto dim : contractDimsR)
+      isRhsParallelDim[dim] = false;
 
     for (auto dim : contractDimsL) {
       if (lhsTy.getDimSize(dim) != 1) {
@@ -1369,11 +1397,13 @@
     permRhs.append(batchDimsR.begin(), batchDimsR.end());
 
     for (auto [idx, value] : llvm::enumerate(isLhsParallelDim)) {
-      if (value) permLhs.push_back(idx);
+      if (value)
+        permLhs.push_back(idx);
     }
 
     for (auto [idx, value] : llvm::enumerate(isRhsParallelDim)) {
-      if (value) permRhs.push_back(idx);
+      if (value)
+        permRhs.push_back(idx);
     }
 
     llvm::append_range(permLhs, contractDimsL);
@@ -1382,8 +1412,10 @@
     // Determine the transpose shape based on the generate permutations.
     llvm::SmallVector<int64_t> lhsTransposeShape;
     llvm::SmallVector<int64_t> rhsTransposeShape;
-    for (auto dim : permLhs) lhsTransposeShape.push_back(lhsTy.getDimSize(dim));
-    for (auto dim : permRhs) rhsTransposeShape.push_back(rhsTy.getDimSize(dim));
+    for (auto dim : permLhs)
+      lhsTransposeShape.push_back(lhsTy.getDimSize(dim));
+    for (auto dim : permRhs)
+      rhsTransposeShape.push_back(rhsTy.getDimSize(dim));
 
     // Transpose the left hand side and the right hand side.
     lhs = builder.create<mlir::stablehlo::TransposeOp>(
@@ -1825,5 +1857,5 @@
   }
 };
 
-}  // namespace
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace
+} // namespace mlir::iree_compiler::stablehlo

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/UnfuseBatchNorm.cpp b/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/UnfuseBatchNorm.cpp
index 510ab8f..dc9ceee 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/UnfuseBatchNorm.cpp
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/Preprocessing/UnfuseBatchNorm.cpp

@@ -26,7 +26,7 @@
 // a static broadcast.
 Value broadcastToFeatureDim(Location loc, RankedTensorType resultType,
                             Value value1d, Value shapeValue, int64_t featureDim,
-                            PatternRewriter& rewriter) {
+                            PatternRewriter &rewriter) {
   auto dimsType = RankedTensorType::get({1}, rewriter.getIntegerType(64));
   auto dims = DenseIntElementsAttr::get(dimsType, {featureDim});
   if (shapeValue) {
@@ -39,7 +39,7 @@
 }
 
 // Get the shape of operand, assuming it is a dynamic shape with static rank.
-Value getShapeValue(Location loc, Value operand, PatternRewriter& rewriter) {
+Value getShapeValue(Location loc, Value operand, PatternRewriter &rewriter) {
   RankedTensorType resultType = cast<RankedTensorType>(operand.getType());
   return rewriter.create<mlir::shape::ShapeOfOp>(
       loc,
@@ -47,9 +47,9 @@
       operand);
 }
 
-Value materializeEpsilon(Operation* op, FloatAttr epsilonAttr, FloatType fpType,
+Value materializeEpsilon(Operation *op, FloatAttr epsilonAttr, FloatType fpType,
                          Value broadcastTo, RankedTensorType broadcastToType,
-                         PatternRewriter& rewriter) {
+                         PatternRewriter &rewriter) {
   ImplicitLocOpBuilder b(op->getLoc(), rewriter);
   if (epsilonAttr.getType() != fpType) {
     // Need to convert.
@@ -90,7 +90,7 @@
   using OpRewritePattern ::OpRewritePattern;
 
   LogicalResult matchAndRewrite(mlir::stablehlo::BatchNormInferenceOp bnOp,
-                                PatternRewriter& rewriter) const override {
+                                PatternRewriter &rewriter) const override {
     // Enforce type invariants.
     // Note that we deduce the actual element type from the variance,
     // which should not be subject to quantization at a higher level.
@@ -153,8 +153,8 @@
 // Create "stablehlo.reduce", "operand" is reduce input and "zero" is init
 // value, reduce sum from operand to operand[feature_index].
 Value createReduce(Location loc, Value operand, Value zero,
-                   SmallVector<int64_t>& reduceDims, int64_t featureIndex,
-                   PatternRewriter& rewriter) {
+                   SmallVector<int64_t> &reduceDims, int64_t featureIndex,
+                   PatternRewriter &rewriter) {
   auto operandType = cast<RankedTensorType>(operand.getType());
   auto reduceResultType = RankedTensorType::get(
       {operandType.getDimSize(featureIndex)}, operandType.getElementType());
@@ -163,8 +163,8 @@
       rewriter.getI64TensorAttr(reduceDims));
 
   // setup "stablehlo.reduce"'s body
-  Region& region = reduce.getBody();
-  Block& block = region.emplaceBlock();
+  Region &region = reduce.getBody();
+  Block &block = region.emplaceBlock();
   RankedTensorType blockArgumentType =
       RankedTensorType::get({}, operandType.getElementType());
   block.addArgument(blockArgumentType, loc);
@@ -184,10 +184,10 @@
 
 // Calculate total reduce size, assuming it is a dynamic shape with static rank.
 // Reduce from operand to operand[feature_index]/scale
-Value calculateReduceSize(Operation* op, Value operand,
+Value calculateReduceSize(Operation *op, Value operand,
                           RankedTensorType operandType, Value scale,
                           RankedTensorType scaleType, int64_t featureIndex,
-                          PatternRewriter& rewriter) {
+                          PatternRewriter &rewriter) {
   ImplicitLocOpBuilder b(op->getLoc(), rewriter);
   Type indexType = b.getIndexType();
   if (!operandType.hasStaticShape()) {
@@ -237,7 +237,7 @@
   using OpRewritePattern::OpRewritePattern;
 
   LogicalResult matchAndRewrite(mlir::stablehlo::BatchNormTrainingOp bnOp,
-                                PatternRewriter& rewriter) const override {
+                                PatternRewriter &rewriter) const override {
     auto operandType = dyn_cast<RankedTensorType>(bnOp.getOperand().getType());
     auto scaleType = dyn_cast<RankedTensorType>(bnOp.getScale().getType());
     if (!operandType || !scaleType) {
@@ -341,7 +341,7 @@
 };
 
 struct UnfuseBatchNorm final : impl::UnfuseBatchNormBase<UnfuseBatchNorm> {
-  void getDependentDialects(DialectRegistry& registry) const override {
+  void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<arith::ArithDialect, shape::ShapeDialect,
                     tensor::TensorDialect>();
   }
@@ -355,13 +355,13 @@
     }
   }
 };
-}  // namespace
+} // namespace
 
-void populatePreprocessingUnfuseBatchNormPatterns(mlir::MLIRContext* context,
-                                                  RewritePatternSet* patterns) {
+void populatePreprocessingUnfuseBatchNormPatterns(mlir::MLIRContext *context,
+                                                  RewritePatternSet *patterns) {
   patterns
       ->add<UnfuseBatchNormInferencePattern, UnfuseBatchNormTrainingPattern>(
           context);
 }
 
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace mlir::iree_compiler::stablehlo

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/Rewriters.h b/compiler/src/iree/compiler/InputConversion/StableHLO/Rewriters.h
index 440bd58..13b01fb 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/Rewriters.h
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/Rewriters.h

@@ -95,8 +95,8 @@
     MLIRContext *context, TypeConverter &typeConverter,
     RewritePatternSet *patterns,
     llvm::function_ref<bool(Operation *)> filterFn = nullptr);
-}  // namespace detail
+} // namespace detail
 
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace mlir::iree_compiler::stablehlo
 
-#endif  // IREE_COMPILER_INPUTCONVERSION_STABLEHLO_REWRITERS_H_
+#endif // IREE_COMPILER_INPUTCONVERSION_STABLEHLO_REWRITERS_H_

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToArith.cpp b/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToArith.cpp
index 36f8840..89492b8 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToArith.cpp
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToArith.cpp

@@ -25,10 +25,11 @@
       : OpConversionPattern<OpTy>(typeConverter, context, benefit),
         filterFn(filterFn) {}
 
-  LogicalResult matchAndRewrite(
-      OpTy op, typename OpTy::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
-    if (filterFn && !filterFn(op)) return failure();
+  LogicalResult
+  matchAndRewrite(OpTy op, typename OpTy::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    if (filterFn && !filterFn(op))
+      return failure();
 
     auto isScalar = [](Value v) {
       return cast<ShapedType>(v.getType()).getRank() == 0;
@@ -41,7 +42,8 @@
 
     auto resultTy = dyn_cast_or_null<ShapedType>(
         this->getTypeConverter()->convertType(op->getResultTypes().front()));
-    if (!resultTy) return failure();
+    if (!resultTy)
+      return failure();
 
     SmallVector<Value> operands;
     for (Value operand : adaptor.getOperands()) {
@@ -50,17 +52,18 @@
     }
     Value scalarResult = mlir::stablehlo::StableHloOpToStdScalarOp::mapOp(
         op, resultTy.getElementType(), operands, &rewriter);
-    if (!scalarResult) return failure();
+    if (!scalarResult)
+      return failure();
     rewriter.replaceOpWithNewOp<tensor::FromElementsOp>(op, resultTy,
                                                         scalarResult);
     return success();
   }
 
- private:
+private:
   llvm::function_ref<bool(Operation *)> filterFn;
 };
 
-}  // namespace
+} // namespace
 
 namespace detail {
 void populateScalarHloToArithConversionPatterns(
@@ -114,9 +117,9 @@
       ScalarHloToArithmeticPattern<mlir::stablehlo::SqrtOp>,
       ScalarHloToArithmeticPattern<mlir::stablehlo::SubtractOp>,
       ScalarHloToArithmeticPattern<mlir::stablehlo::TanhOp>,
-      ScalarHloToArithmeticPattern<mlir::stablehlo::XorOp> >(typeConverter,
-                                                             context, filterFn);
+      ScalarHloToArithmeticPattern<mlir::stablehlo::XorOp>>(typeConverter,
+                                                            context, filterFn);
 }
-}  // namespace detail
+} // namespace detail
 
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace mlir::iree_compiler::stablehlo

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToIREEInputDialects.cpp b/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToIREEInputDialects.cpp
index 9e22e52..c85e303 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToIREEInputDialects.cpp
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToIREEInputDialects.cpp

@@ -50,9 +50,9 @@
     : OpConversionPattern<mlir::stablehlo::ConcatenateOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::ConcatenateOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::ConcatenateOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto resultType =
         getTypeConverter()->convertType<RankedTensorType>(op.getType());
     if (!resultType || !resultType.hasStaticShape()) {
@@ -87,7 +87,8 @@
 
     auto toOpFoldResult = [](Value v) -> OpFoldResult {
       auto op = v.getDefiningOp<arith::ConstantIndexOp>();
-      if (!op) return v;
+      if (!op)
+        return v;
       return op.getValue();
     };
 
@@ -142,21 +143,19 @@
       b.create<linalg::FillOp>(loc, zero, emptyTensor).getResult(0);
 
   switch (llvm::cast<RankedTensorType>(lhs.getType()).getRank()) {
-    case 1:
-      return b
-          .create<linalg::VecmatOp>(loc, TypeRange{resultType},
-                                    ValueRange{lhs, rhs},
-                                    ValueRange{zeroTensor})
-          .getResult(0);
-    case 2:
-      return b
-          .create<linalg::MatmulOp>(loc, TypeRange{resultType},
-                                    ValueRange{lhs, rhs},
-                                    ValueRange{zeroTensor})
-          .getResult(0);
-    default:
-      assert(false && "unhandled matmul type");
-      return Value();
+  case 1:
+    return b
+        .create<linalg::VecmatOp>(loc, TypeRange{resultType},
+                                  ValueRange{lhs, rhs}, ValueRange{zeroTensor})
+        .getResult(0);
+  case 2:
+    return b
+        .create<linalg::MatmulOp>(loc, TypeRange{resultType},
+                                  ValueRange{lhs, rhs}, ValueRange{zeroTensor})
+        .getResult(0);
+  default:
+    assert(false && "unhandled matmul type");
+    return Value();
   }
 }
 
@@ -164,9 +163,9 @@
 struct FftOpConversion final : OpConversionPattern<mlir::stablehlo::FftOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::FftOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::FftOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (op.getFftType() != mlir::stablehlo::FftType::RFFT) {
       return rewriter.notifyMatchFailure(op,
                                          "non RFFT types are supported yet");
@@ -210,9 +209,9 @@
     : OpConversionPattern<mlir::stablehlo::OptimizationBarrierOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::OptimizationBarrierOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::OptimizationBarrierOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     SmallVector<Value> outputs;
     for (Value operand : adaptor.getOperands()) {
       outputs.push_back(
@@ -231,7 +230,8 @@
   // TODO: switch to using a dialect-based exclusion list or some other way that
   // is not a big string table.
   for (auto attr : attrs) {
-    if (attr.getName() == "tf.aliasing_output") return false;
+    if (attr.getName() == "tf.aliasing_output")
+      return false;
   }
   return true;
 }
@@ -290,9 +290,9 @@
 struct BuiltinFuncOpPattern final : OpConversionPattern<func::FuncOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      func::FuncOp srcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(func::FuncOp srcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     FunctionType srcFuncType = srcOp.getFunctionType();
     TypeConverter::SignatureConversion signatureConversion(
         srcOp.getNumArguments());
@@ -339,12 +339,13 @@
 struct GlobalOpPattern final : OpConversionPattern<ml_program::GlobalOp> {
   using OpConversionPattern<ml_program::GlobalOp>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      ml_program::GlobalOp globalOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(ml_program::GlobalOp globalOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Type oldType = globalOp.getType();
     Type newType = getTypeConverter()->convertType(oldType);
-    if (newType == oldType) return failure();
+    if (newType == oldType)
+      return failure();
     if (!newType) {
       return rewriter.notifyMatchFailure(globalOp,
                                          "result type conversion failed");
@@ -365,9 +366,9 @@
                      MLIRContext *context, PatternBenefit benefit = 0)
       : ConversionPattern(converter, rootName, benefit, context) {}
 
-  LogicalResult matchAndRewrite(
-      Operation *op, ArrayRef<Value> operands,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
     llvm::SmallVector<NamedAttribute> newAttr;
     llvm::append_range(newAttr, op->getAttrs());
 
@@ -422,18 +423,21 @@
   auto filterOpAttrs = [&](Operation *op) {
     SmallVector<NamedAttribute> newAttrs;
     for (auto attr : op->getDialectAttrs()) {
-      if (!isAttrFiltered(attr)) newAttrs.push_back(attr);
+      if (!isAttrFiltered(attr))
+        newAttrs.push_back(attr);
     }
     op->setDialectAttrs(newAttrs);
   };
   auto filterAttrDicts = [&](ArrayAttr allOldAttrs,
                              SmallVectorImpl<DictionaryAttr> &newAttrs) {
-    if (!allOldAttrs) return false;
+    if (!allOldAttrs)
+      return false;
     for (auto oldAttrs : allOldAttrs.getAsRange<DictionaryAttr>()) {
       SmallVector<NamedAttribute> preservedAttrs;
       preservedAttrs.reserve(oldAttrs.size());
       for (auto attr : oldAttrs) {
-        if (!isAttrFiltered(attr)) preservedAttrs.push_back(attr);
+        if (!isAttrFiltered(attr))
+          preservedAttrs.push_back(attr);
       }
       newAttrs.push_back(
           DictionaryAttr::get(allOldAttrs.getContext(), preservedAttrs));
@@ -508,10 +512,12 @@
     auto isIllegalType = [&](Type t) { return !typeConverter->isLegal(t); };
     auto isLegallyTypedOp = [&](Operation *op) -> bool {
       for (Type type : op->getResultTypes()) {
-        if (isIllegalType(type)) return false;
+        if (isIllegalType(type))
+          return false;
       }
       for (Type type : op->getOperandTypes()) {
-        if (isIllegalType(type)) return false;
+        if (isIllegalType(type))
+          return false;
       }
       return true;
     };
@@ -534,14 +540,17 @@
         }
       }
       for (Type type : funcOp.getFunctionType().getInputs()) {
-        if (isIllegalType(type)) return false;
+        if (isIllegalType(type))
+          return false;
       }
       for (Type type : funcOp.getFunctionType().getResults()) {
-        if (isIllegalType(type)) return false;
+        if (isIllegalType(type))
+          return false;
       }
       for (Block &block : funcOp.getFunctionBody()) {
         for (Type type : block.getArgumentTypes()) {
-          if (isIllegalType(type)) return false;
+          if (isIllegalType(type))
+            return false;
         }
       }
       return true;
@@ -578,7 +587,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateStableHloToLinalgOnTensorsConversionPatterns(
     MLIRContext *context, TypeConverter &typeConverter,
@@ -593,4 +602,4 @@
                                               /*enablePrimitiveOps=*/false);
 }
 
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace mlir::iree_compiler::stablehlo

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToLinalg.cpp b/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToLinalg.cpp
index 90d3fc7..523db14 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToLinalg.cpp
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToLinalg.cpp

@@ -50,19 +50,20 @@
 #include "iree/compiler/InputConversion/StableHLO/Passes.h.inc"
 
 namespace {
-Value getResultValue(Operation* op) { return op->getResult(0); }
+Value getResultValue(Operation *op) { return op->getResult(0); }
 
-ShapedType getHloOpResultType(Operation* op) {
+ShapedType getHloOpResultType(Operation *op) {
   return llvm::cast<ShapedType>(getResultValue(op).getType());
 }
 
 /// Extracts an element from a tensor and optionally converts it to an index
 /// type, based on the tensor's pre-type conversion type.
-Value extractIndexFromTensor(OpBuilder& builder, Location loc, Value tensor,
+Value extractIndexFromTensor(OpBuilder &builder, Location loc, Value tensor,
                              ShapedType originalType,
                              ArrayRef<Value> tensorIndex = {}) {
   Value extracted = builder.create<tensor::ExtractOp>(loc, tensor, tensorIndex);
-  if (extracted.getType().isIndex()) return extracted;
+  if (extracted.getType().isIndex())
+    return extracted;
   return originalType.getElementType().isUnsignedInteger()
              ? builder.createOrFold<arith::IndexCastUIOp>(
                    loc, builder.getIndexType(), extracted)
@@ -80,9 +81,9 @@
     : public OpConversionPattern<mlir::stablehlo::RngOp> {
   using OpConversionPattern<mlir::stablehlo::RngOp>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::RngOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter& rewriter) const final {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::RngOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
     // We only handle uniform distributions
     if (op.getRngDistribution() !=
         ::mlir::stablehlo::RngDistribution::UNIFORM) {
@@ -121,7 +122,7 @@
         /*outputs=*/emptyTensor, indexingMaps,
         getParallelAndReductionIterators(/*nLoops=*/targetRank,
                                          /*nReduction=*/0),
-        [&](OpBuilder& b, Location loc, ValueRange args) {
+        [&](OpBuilder &b, Location loc, ValueRange args) {
           llvm::SmallVector<Value> updateVec = {b.create<arith::ConstantOp>(
               loc, b.getI32IntegerAttr(kInitialSeed))};
           Value multiplier =
@@ -168,9 +169,9 @@
 // Looks through a set of dimension that has been marked as reduction axes,
 // if it is found within the set, then we set it as "reduction", otherwise
 // we can label it as "parallel".
-SmallVector<utils::IteratorType, 3> getEinsumLoopsAttrs(
-    const llvm::SmallSetVector<StringRef, 4>& inputInd,
-    const llvm::SmallSetVector<StringRef, 4>& reductionDims) {
+SmallVector<utils::IteratorType, 3>
+getEinsumLoopsAttrs(const llvm::SmallSetVector<StringRef, 4> &inputInd,
+                    const llvm::SmallSetVector<StringRef, 4> &reductionDims) {
   SmallVector<utils::IteratorType, 3> res;
   for (StringRef dim : inputInd) {
     if (!reductionDims.contains(dim)) {
@@ -182,22 +183,23 @@
   return res;
 }
 
-SmallVector<Value, 2> extractDynamicEinsumSizes(
-    OpBuilder& b, Location loc, Value lhs, Value rhs,
-    const SmallVector<std::string>& lhsLoopVec,
-    const SmallVector<std::string>& rhsLoopVec,
-    const SmallVector<std::string>& outputLoopVec) {
+SmallVector<Value, 2>
+extractDynamicEinsumSizes(OpBuilder &b, Location loc, Value lhs, Value rhs,
+                          const SmallVector<std::string> &lhsLoopVec,
+                          const SmallVector<std::string> &rhsLoopVec,
+                          const SmallVector<std::string> &outputLoopVec) {
   SmallVector<Value, 2> dynSizes;
-  for (const std::string& dimInd : outputLoopVec) {
+  for (const std::string &dimInd : outputLoopVec) {
     Value dimSize;
-    const auto* dimIndIt =
+    const auto *dimIndIt =
         std::find(lhsLoopVec.begin(), lhsLoopVec.end(), dimInd);
     if (dimIndIt != lhsLoopVec.end()) {
       // Query from lhs vars.
       auto dimIndPos = dimIndIt - lhsLoopVec.begin();
       auto lhsShape =
           llvm::dyn_cast<RankedTensorType>(lhs.getType()).getShape();
-      if (lhsShape[dimIndPos] != ShapedType::kDynamic) continue;
+      if (lhsShape[dimIndPos] != ShapedType::kDynamic)
+        continue;
       dimSize = b.create<tensor::DimOp>(loc, lhs, dimIndPos);
     } else {
       // query from rhs vars.
@@ -205,7 +207,8 @@
       auto dimIndPos = dimIndIt - rhsLoopVec.begin();
       auto rhsShape =
           llvm::dyn_cast<RankedTensorType>(rhs.getType()).getShape();
-      if (rhsShape[dimIndPos] != ShapedType::kDynamic) continue;
+      if (rhsShape[dimIndPos] != ShapedType::kDynamic)
+        continue;
       dimSize = b.create<tensor::DimOp>(loc, rhs, dimIndPos);
     }
     dynSizes.push_back(dimSize);
@@ -214,12 +217,13 @@
 }
 
 // Adds indices/axes that are missing from output set.
-llvm::SmallSetVector<StringRef, 4> findSummationAxes(
-    const llvm::SmallSetVector<StringRef, 4>& inputSet,
-    const llvm::SmallSetVector<StringRef, 4>& outputSet) {
+llvm::SmallSetVector<StringRef, 4>
+findSummationAxes(const llvm::SmallSetVector<StringRef, 4> &inputSet,
+                  const llvm::SmallSetVector<StringRef, 4> &outputSet) {
   llvm::SmallSetVector<StringRef, 4> summationAxes;
   for (StringRef ind : inputSet) {
-    if (!outputSet.contains(ind)) summationAxes.insert(ind);
+    if (!outputSet.contains(ind))
+      summationAxes.insert(ind);
   }
   return summationAxes;
 }
@@ -233,11 +237,11 @@
 // first_input_operand will get umap[{"a","b","c"}] -> (d0, d1, d2).
 // second_input_operand will get umap[{"c","b"}] -> (d2, d1).
 // output_operand will get umap[{"a","c","b"}] -> (d0, d2, d1).
-SmallVector<AffineExpr> getExprFromConfig(
-    const SmallVector<std::string>& loopDims,
-    const DenseMap<StringRef, AffineExpr>& strAffineDimUmap) {
+SmallVector<AffineExpr>
+getExprFromConfig(const SmallVector<std::string> &loopDims,
+                  const DenseMap<StringRef, AffineExpr> &strAffineDimUmap) {
   SmallVector<AffineExpr> exprs;
-  for (const auto& dim : loopDims) {
+  for (const auto &dim : loopDims) {
     exprs.push_back(strAffineDimUmap.lookup(dim));
   }
   return exprs;
@@ -259,12 +263,12 @@
 // rhs:[d2,d3], out:[d0,d1,d3]}.
 class EinsumToLinalgConverter
     : public OpConversionPattern<mlir::stablehlo::EinsumOp> {
- public:
+public:
   using OpConversionPattern<mlir::stablehlo::EinsumOp>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::EinsumOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter& rewriter) const final {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::EinsumOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
     auto getRank = [](Value v) {
       return llvm::cast<ShapedType>(v.getType()).getRank();
     };
@@ -341,14 +345,14 @@
     // Create a 1:1 map from f:strDimension -> affineDimension.
     int64_t nloops = inputInd.size();
     DenseMap<StringRef, AffineExpr> strAffineDimUmap;
-    for (const auto& it : llvm::enumerate(inputInd)) {
+    for (const auto &it : llvm::enumerate(inputInd)) {
       strAffineDimUmap[it.value()] = rewriter.getAffineDimExpr(it.index());
     }
 
     // From einsum_config of each operand in vector<string>, generate
     // the equivalent vector<AffineExpr>.
     SmallVector<AffineMap> maps;
-    for (const SmallVector<std::string>& loopOperand :
+    for (const SmallVector<std::string> &loopOperand :
          {lhsEin, rhsEin, outEin}) {
       auto exprs = getExprFromConfig(loopOperand, strAffineDimUmap);
       maps.push_back(AffineMap::get(nloops, 0, exprs, rewriter.getContext()));
@@ -357,7 +361,7 @@
     auto linalgOp = rewriter.create<linalg::GenericOp>(
         loc, resultTy ? resultTy : TypeRange{}, adaptor.getOperands(), output,
         maps, getEinsumLoopsAttrs(inputInd, reductionAxe),
-        [reductionAxe](OpBuilder& b, Location nestedLoc, ValueRange args) {
+        [reductionAxe](OpBuilder &b, Location nestedLoc, ValueRange args) {
           Value resultVal =
               b.create<mlir::arith::MulFOp>(nestedLoc, args[0], args[1]);
           if (!reductionAxe.empty()) {
@@ -371,7 +375,7 @@
     return success();
   }
 
- private:
+private:
   static constexpr StringRef kArrow = "->";
   static constexpr StringRef kComma = ",";
   static constexpr StringRef kEllipsis = "...";
@@ -391,18 +395,21 @@
 // Convert the representation from string/vector<char> to vector<string>.
 // i.e ("abc") -> {"a", "b", "c"}. For cases with ellipsis with batch rank 3:
 // get loop_dim = f("ab...cde") = {"a","b","0","1","2","c","d","e"}
-SmallVector<std::string> EinsumToLinalgConverter::getEinsumConfigAsVector(
-    StringRef loop, size_t operandRank) {
+SmallVector<std::string>
+EinsumToLinalgConverter::getEinsumConfigAsVector(StringRef loop,
+                                                 size_t operandRank) {
   SmallVector<std::string> loopDim;
   size_t preElip = loop.find(kEllipsis);
   bool hasElip = preElip != std::string::npos;
-  if (!hasElip) preElip = loop.size();
+  if (!hasElip)
+    preElip = loop.size();
   // Add the dimension until the end or up to ellipsis if it exist.
   for (int64_t preElipInd = 0; preElipInd < static_cast<int64_t>(preElip);
        preElipInd++) {
     loopDim.push_back(loop.substr(preElipInd, 1).str());
   }
-  if (!hasElip) return loopDim;
+  if (!hasElip)
+    return loopDim;
   // Case where Ellipsis presence:
   size_t nonBatchRank = loop.size() - kEllipsis.size();
   size_t batchRank = operandRank - nonBatchRank;
@@ -441,7 +448,8 @@
   bool batchHasEqualRank = true;
 
   // Condition is valid if only 1 operand or less have batches.
-  if (batchRankVec.size() < 2) return batchHasEqualRank;
+  if (batchRankVec.size() < 2)
+    return batchHasEqualRank;
   if (!std::equal(batchRankVec.begin() + 1, batchRankVec.end(),
                   batchRankVec.begin()) &&
       batchRankVec.size() > 1)
@@ -456,20 +464,22 @@
 /// and the output.
 template <typename Derived, typename OpTy>
 class DataMovementOpConverter : public OpConversionPattern<OpTy> {
- public:
+public:
   using OpConversionPattern<OpTy>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      OpTy op, typename OpTy::Adaptor adaptor,
-      ConversionPatternRewriter& rewriter) const final {
-    if (failed(verifyHloOpBufferOrTensorSemantics(op))) return failure();
+  LogicalResult
+  matchAndRewrite(OpTy op, typename OpTy::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
+    if (failed(verifyHloOpBufferOrTensorSemantics(op)))
+      return failure();
     auto resultType = getHloOpResultType(op);
     resultType =
         llvm::cast<ShapedType>(this->typeConverter->convertType(resultType));
 
     SmallVector<AffineMap, 2> indexingMaps =
         Derived::getIndexingMaps(op, &rewriter);
-    if (indexingMaps.empty()) return failure();
+    if (indexingMaps.empty())
+      return failure();
 
     auto nloops = resultType.getRank();
     auto loc = op.getLoc();
@@ -482,7 +492,7 @@
         ValueRange{getEmptyTensorFor(rewriter, loc, resultType, op,
                                      adaptor.getOperands())},
         indexingMaps, getNParallelLoopsAttrs(nloops),
-        [&](OpBuilder& nestedBuilder, Location /*nested_loc*/,
+        [&](OpBuilder &nestedBuilder, Location /*nested_loc*/,
             ValueRange args) {
           nestedBuilder.create<linalg::YieldOp>(loc, *args.begin());
         },
@@ -496,12 +506,12 @@
 template <typename OpTy>
 class BroadcastConverter
     : public DataMovementOpConverter<BroadcastConverter<OpTy>, OpTy> {
- public:
+public:
   using DataMovementOpConverter<BroadcastConverter,
                                 OpTy>::DataMovementOpConverter;
 
   static SmallVector<AffineMap, 2> getIndexingMaps(OpTy broadcastOp,
-                                                   Builder* b) {
+                                                   Builder *b) {
     ShapedType inputType =
         llvm::cast<ShapedType>(broadcastOp.getOperand().getType());
     unsigned inputRank = inputType.getRank();
@@ -517,7 +527,7 @@
     }
 
     AffineMap inputMap;
-    MLIRContext* context = b->getContext();
+    MLIRContext *context = b->getContext();
     if (inputDimExprs.empty()) {
       // The input is a scalar, i.e. this is a scalar broadcast op.
       inputMap = AffineMap::get(nloops, /*symbolCount=*/0, context);
@@ -533,9 +543,9 @@
     : public OpConversionPattern<mlir::stablehlo::BroadcastOp> {
   using OpConversionPattern<mlir::stablehlo::BroadcastOp>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::BroadcastOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter& rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::BroadcastOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto resultTy =
         llvm::cast<ShapedType>(typeConverter->convertType(op.getType()));
 
@@ -557,13 +567,13 @@
 class HloBroadcastInDimConverter
     : public DataMovementOpConverter<HloBroadcastInDimConverter,
                                      mlir::stablehlo::BroadcastInDimOp> {
- public:
+public:
   using DataMovementOpConverter<
       HloBroadcastInDimConverter,
       mlir::stablehlo::BroadcastInDimOp>::DataMovementOpConverter;
 
-  static SmallVector<AffineMap, 2> getIndexingMaps(
-      mlir::stablehlo::BroadcastInDimOp broadcastOp, Builder* b) {
+  static SmallVector<AffineMap, 2>
+  getIndexingMaps(mlir::stablehlo::BroadcastInDimOp broadcastOp, Builder *b) {
     auto resultType = getHloOpResultType(broadcastOp);
     auto operandType =
         llvm::cast<ShapedType>(broadcastOp.getOperand().getType());
@@ -580,7 +590,7 @@
     dimExprs.reserve(nloops);
 
     if (broadcastOp.getBroadcastDimensions()) {
-      for (const auto& broadcastDim :
+      for (const auto &broadcastDim :
            enumerate(broadcastOp.getBroadcastDimensions().getValues<APInt>())) {
         int size = broadcastDim.value().getSExtValue();
         bool expansionNeeded = operandShape[broadcastDim.index()] == 1 &&
@@ -595,8 +605,8 @@
   }
 };
 
-Value collapseExpandingDims(PatternRewriter& rewriter, Location loc,
-                            Value operand, SmallVector<int64_t>& dimensions,
+Value collapseExpandingDims(PatternRewriter &rewriter, Location loc,
+                            Value operand, SmallVector<int64_t> &dimensions,
                             llvm::function_ref<bool(int64_t)> isExpandingDim) {
   auto operandTy = llvm::cast<RankedTensorType>(operand.getType());
 
@@ -607,7 +617,7 @@
   SmallVector<int64_t> newOperandShape;
   SmallVector<int64_t> newDimensions;
 
-  for (const auto& [idx, dim] : llvm::enumerate(dimensions)) {
+  for (const auto &[idx, dim] : llvm::enumerate(dimensions)) {
     currentIndices.push_back(idx);
 
     if (!isExpandingDim(idx)) {
@@ -637,11 +647,12 @@
 // Insert linalg.transpose if broadcasted dimensions are not in sorded order.
 // linalg.broadcast does not support implicit transpose, so the input needs to
 // be explicitely transposed.
-Value transposeBroadcastOperand(PatternRewriter& rewriter, Location loc,
+Value transposeBroadcastOperand(PatternRewriter &rewriter, Location loc,
                                 Value operand,
-                                SmallVector<int64_t>& dimensions) {
+                                SmallVector<int64_t> &dimensions) {
   // Do not insert `transpose` is dimensions are already sorted.
-  if (llvm::is_sorted(dimensions)) return operand;
+  if (llvm::is_sorted(dimensions))
+    return operand;
 
   SmallVector<int64_t> permutation =
       llvm::to_vector(llvm::seq<int64_t>(0, dimensions.size()));
@@ -667,13 +678,13 @@
 
 class BroadcastInDimOpToBroadcastConverter
     : public OpConversionPattern<mlir::stablehlo::BroadcastInDimOp> {
- public:
+public:
   using OpConversionPattern<
       mlir::stablehlo::BroadcastInDimOp>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::BroadcastInDimOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter& rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::BroadcastInDimOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Location loc = op.getLoc();
 
     SmallVector<int64_t> broadcastDimensions =
@@ -723,19 +734,22 @@
 // be converted to a tensor dialect op similar to TF's `ConstantLikeOp`.
 class HloDynamicBroadcastInDimConverter
     : public OpConversionPattern<mlir::stablehlo::DynamicBroadcastInDimOp> {
- public:
+public:
   using OpConversionPattern<
       mlir::stablehlo::DynamicBroadcastInDimOp>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::DynamicBroadcastInDimOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter& rewriter) const final {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::DynamicBroadcastInDimOp op,
+                  OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
     Value operand = adaptor.getOperand();
     auto operandType = llvm::dyn_cast<RankedTensorType>(operand.getType());
-    if (!operandType) return failure();
+    if (!operandType)
+      return failure();
     auto resultType = llvm::dyn_cast<RankedTensorType>(
         typeConverter->convertType(op.getType()));
-    if (!resultType) return failure();
+    if (!resultType)
+      return failure();
 
     // Determine dimension expressions based on whether the dimension is
     // expanding (0) or non-expanding (identity), and fail if we cannot decide
@@ -744,11 +758,12 @@
 
     // Use static type info.
     auto bcastDims =
-        llvm::map_to_vector(op.getBroadcastDimensions(), [](const APInt& d) {
+        llvm::map_to_vector(op.getBroadcastDimensions(), [](const APInt &d) {
           return static_cast<int64_t>(d.getLimitedValue());
         });
-    for (const auto& it : llvm::enumerate(operandType.getShape())) {
-      if (ShapedType::isDynamic(it.value())) continue;
+    for (const auto &it : llvm::enumerate(operandType.getShape())) {
+      if (ShapedType::isDynamic(it.value()))
+        continue;
       bool isExpanding = it.value() == 1;
       dimExprs[it.index()] =
           isExpanding ? rewriter.getAffineConstantExpr(0)
@@ -757,14 +772,14 @@
 
     // Use annotated expansion behavior, if available.
     if (op.getKnownExpandingDimensions()) {
-      for (const auto& it :
+      for (const auto &it :
            op.getKnownExpandingDimensions()->getValues<APInt>()) {
         auto i = it.getLimitedValue();
         dimExprs[i] = rewriter.getAffineConstantExpr(0);
       }
     }
     if (op.getKnownNonexpandingDimensions()) {
-      for (const auto& it :
+      for (const auto &it :
            op.getKnownNonexpandingDimensions()->getValues<APInt>()) {
         auto i = it.getLimitedValue();
         dimExprs[i] = rewriter.getAffineDimExpr(bcastDims[i]);
@@ -787,7 +802,7 @@
                                        dimExprs, rewriter.getContext()),
                         rewriter.getMultiDimIdentityMap(nloops)}),
         getNParallelLoopsAttrs(nloops),
-        [&](OpBuilder& nestedBuilder, Location /*nested_loc*/,
+        [&](OpBuilder &nestedBuilder, Location /*nested_loc*/,
             ValueRange args) {
           nestedBuilder.create<linalg::YieldOp>(loc, *args.begin());
         },
@@ -798,21 +813,24 @@
 
 class DynamicBroadcastInDimOpToBroadcastConverter
     : public OpConversionPattern<mlir::stablehlo::DynamicBroadcastInDimOp> {
- public:
+public:
   using OpConversionPattern<
       mlir::stablehlo::DynamicBroadcastInDimOp>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::DynamicBroadcastInDimOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter& rewriter) const final {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::DynamicBroadcastInDimOp op,
+                  OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
     Location loc = op.getLoc();
 
     Value operand = adaptor.getOperand();
     auto operandTy = llvm::dyn_cast<RankedTensorType>(operand.getType());
-    if (!operandTy) return failure();
+    if (!operandTy)
+      return failure();
     auto resultTy = llvm::dyn_cast<RankedTensorType>(
         typeConverter->convertType(op.getType()));
-    if (!resultTy) return failure();
+    if (!resultTy)
+      return failure();
 
     SmallVector<int64_t> broadcastDimensions =
         llvm::to_vector(op.getBroadcastDimensions().getValues<int64_t>());
@@ -821,20 +839,21 @@
         broadcastDimensions.size());
 
     // Use static type info.
-    for (const auto& [idx, dim] : llvm::enumerate(operandTy.getShape())) {
-      if (ShapedType::isDynamic(dim)) continue;
+    for (const auto &[idx, dim] : llvm::enumerate(operandTy.getShape())) {
+      if (ShapedType::isDynamic(dim))
+        continue;
       expansionBehavior[idx] = (dim == 1);
     }
 
     // Use annotated expansion behavior, if available.
     if (op.getKnownExpandingDimensions()) {
-      for (const auto& it :
+      for (const auto &it :
            op.getKnownExpandingDimensions()->getValues<int64_t>()) {
         expansionBehavior[it] = true;
       }
     }
     if (op.getKnownNonexpandingDimensions()) {
-      for (const auto& it :
+      for (const auto &it :
            op.getKnownNonexpandingDimensions()->getValues<int64_t>()) {
         expansionBehavior[it] = false;
       }
@@ -882,16 +901,17 @@
     return success();
   }
 
- private:
-  static Value getBroadcastOperand(
-      PatternRewriter& rewriter, Location loc, Value operand,
-      llvm::function_ref<bool(int64_t)> isExpandingDim) {
+private:
+  static Value
+  getBroadcastOperand(PatternRewriter &rewriter, Location loc, Value operand,
+                      llvm::function_ref<bool(int64_t)> isExpandingDim) {
     auto operandTy = llvm::dyn_cast<RankedTensorType>(operand.getType());
 
     SmallVector<int64_t> updatedOperandShape =
         llvm::to_vector(operandTy.getShape());
     for (size_t i = 0; i < updatedOperandShape.size(); ++i) {
-      if (isExpandingDim(i)) updatedOperandShape[i] = 1;
+      if (isExpandingDim(i))
+        updatedOperandShape[i] = 1;
     }
 
     auto updatedOperandTy =
@@ -904,15 +924,17 @@
     return operand;
   }
 
-  static ShapedType getBroadcastResultType(
-      Value operand, RankedTensorType resultTy, ArrayRef<int64_t> dimensions,
-      llvm::function_ref<bool(int64_t)> isExpandingDim) {
+  static ShapedType
+  getBroadcastResultType(Value operand, RankedTensorType resultTy,
+                         ArrayRef<int64_t> dimensions,
+                         llvm::function_ref<bool(int64_t)> isExpandingDim) {
     auto operandShape =
         llvm::cast<RankedTensorType>(operand.getType()).getShape();
     auto broadcastResultShape = llvm::to_vector(resultTy.getShape());
 
     for (auto [operandIndex, resultIndex] : llvm::enumerate(dimensions)) {
-      if (isExpandingDim(operandIndex)) continue;
+      if (isExpandingDim(operandIndex))
+        continue;
       broadcastResultShape[resultIndex] = operandShape[operandIndex];
     }
 
@@ -924,15 +946,15 @@
 template <typename OpTy>
 class TransposeConverter
     : public DataMovementOpConverter<TransposeConverter<OpTy>, OpTy> {
- public:
+public:
   using DataMovementOpConverter<TransposeConverter<OpTy>,
                                 OpTy>::DataMovementOpConverter;
-  static SmallVector<AffineMap, 2> getIndexingMaps(OpTy op, Builder* b) {
+  static SmallVector<AffineMap, 2> getIndexingMaps(OpTy op, Builder *b) {
     auto resultType = llvm::cast<ShapedType>(getHloOpResultType(op));
     auto nloops = resultType.getRank();
     SmallVector<AffineExpr, 2> inputExprs;
     inputExprs.resize(resultType.getRank());
-    for (const auto& permutation : llvm::enumerate(op.getPermutation())) {
+    for (const auto &permutation : llvm::enumerate(op.getPermutation())) {
       inputExprs[permutation.value().getZExtValue()] =
           b->getAffineDimExpr(permutation.index());
     }
@@ -946,9 +968,9 @@
     : public OpConversionPattern<mlir::stablehlo::TransposeOp> {
   using OpConversionPattern<mlir::stablehlo::TransposeOp>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::TransposeOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter& rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::TransposeOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto resultTy =
         llvm::cast<ShapedType>(typeConverter->convertType(op.getType()));
 
@@ -970,10 +992,11 @@
     : public OpConversionPattern<mlir::stablehlo::BitcastConvertOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::BitcastConvertOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter& rewriter) const final {
-    if (failed(verifyHloOpBufferOrTensorSemantics(op))) return failure();
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::BitcastConvertOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
+    if (failed(verifyHloOpBufferOrTensorSemantics(op)))
+      return failure();
 
     auto inputType =
         llvm::cast<RankedTensorType>(adaptor.getOperand().getType());
@@ -1015,7 +1038,7 @@
     rewriter.replaceOpWithNewOp<linalg::GenericOp>(
         op, outputType, adaptor.getOperand(), output, indexingMaps,
         getParallelAndReductionIterators(maxRank, isContraction ? 1 : 0),
-        [&](OpBuilder& nestedBuilder, Location nestedLoc, ValueRange args) {
+        [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) {
           auto inIntType = nestedBuilder.getIntegerType(inputBitWidth);
           auto outIntType = nestedBuilder.getIntegerType(outputBitWidth);
           Value innerResult = args.front();
@@ -1073,23 +1096,25 @@
 // arith/tensor dialect ops.
 class RealDynamicSliceConverter
     : public OpConversionPattern<mlir::stablehlo::RealDynamicSliceOp> {
- public:
+public:
   using OpConversionPattern<
       mlir::stablehlo::RealDynamicSliceOp>::OpConversionPattern;
 
   // Computes size of a slice as
   //   size = ceil((limit - start)/stride)
   static Value computeSize(Location loc, Value start, Value limit, Value stride,
-                           ConversionPatternRewriter& b) {
+                           ConversionPatternRewriter &b) {
     Value delta = b.create<arith::SubIOp>(loc, limit, start);
     Value ret = b.create<arith::CeilDivUIOp>(loc, delta, stride);
-    if (ret.getType().isIndex()) return ret;
+    if (ret.getType().isIndex())
+      return ret;
     return b.create<arith::IndexCastOp>(loc, b.getIndexType(), ret);
   }
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::RealDynamicSliceOp realDynamicSliceOp, OpAdaptor adaptor,
-      ConversionPatternRewriter& rewriter) const final {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::RealDynamicSliceOp realDynamicSliceOp,
+                  OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
     Location loc = realDynamicSliceOp.getLoc();
     auto argType = llvm::dyn_cast<ShapedType>(adaptor.getOperand().getType());
     if (!argType || !argType.hasRank()) {
@@ -1172,20 +1197,22 @@
 // or expansion of dimensions of the operand.
 class ReshapeOpConverter
     : public OpConversionPattern<mlir::stablehlo::ReshapeOp> {
- public:
+public:
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::ReshapeOp reshapeOp,
-      mlir::stablehlo::ReshapeOp::Adaptor adaptor,
-      ConversionPatternRewriter& rewriter) const final {
-    if (failed(verifyHloOpBufferOrTensorSemantics(reshapeOp))) return failure();
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::ReshapeOp reshapeOp,
+                  mlir::stablehlo::ReshapeOp::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
+    if (failed(verifyHloOpBufferOrTensorSemantics(reshapeOp)))
+      return failure();
     auto operand = adaptor.getOperand();
     auto operandType = llvm::cast<ShapedType>(operand.getType());
     auto elemType = operandType.getElementType();
     auto resultType = llvm::cast<ShapedType>(reshapeOp.getType());
 
-    if (!resultType.hasStaticShape()) return failure();
+    if (!resultType.hasStaticShape())
+      return failure();
 
     // If any of the output dimensions is 0, the tensor has no elements. In that
     // case, we can just replace the reshape with an empty op.
@@ -1221,12 +1248,14 @@
         // collapsed to a static-sized dimension in the output, to 1.
         SmallVector<int64_t> shape(operandType.getShape().begin(),
                                    operandType.getShape().end());
-        for (const auto& map : llvm::enumerate(*reassociationMap)) {
+        for (const auto &map : llvm::enumerate(*reassociationMap)) {
           // If the result dim is dynamic, we do not mind dynamic entries in the
           // source.
-          if (resultType.isDynamicDim(map.index())) continue;
+          if (resultType.isDynamicDim(map.index()))
+            continue;
           for (auto targetDim : map.value()) {
-            if (shape[targetDim] == ShapedType::kDynamic) shape[targetDim] = 1;
+            if (shape[targetDim] == ShapedType::kDynamic)
+              shape[targetDim] = 1;
           }
         }
         // Insert a cast if types are not the same (ignoring sparse encoding).
@@ -1251,7 +1280,8 @@
     Location loc = reshapeOp.getLoc();
     auto getIdentityExprs = [&rewriter](int64_t n) {
       SmallVector<AffineExpr> exprs;
-      for (int i = 0; i < n; ++i) exprs.push_back(rewriter.getAffineDimExpr(i));
+      for (int i = 0; i < n; ++i)
+        exprs.push_back(rewriter.getAffineDimExpr(i));
       return exprs;
     };
     // Otherwise, we need to first reduce all source dimensions into one and
@@ -1288,14 +1318,15 @@
 
 template <typename OpTy>
 class IotaConverter : public OpConversionPattern<OpTy> {
- public:
+public:
   using OpConversionPattern<OpTy>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      OpTy iotaOp, typename OpTy::Adaptor adaptor,
-      ConversionPatternRewriter& rewriter) const final {
+  LogicalResult
+  matchAndRewrite(OpTy iotaOp, typename OpTy::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
     ShapedType resultShapedType = getHloOpResultType(iotaOp);
-    if (!resultShapedType) return failure();
+    if (!resultShapedType)
+      return failure();
     resultShapedType = llvm::dyn_cast<ShapedType>(
         this->typeConverter->convertType(resultShapedType));
 
@@ -1316,7 +1347,7 @@
                                      adaptor.getOperands())},
         llvm::ArrayRef(rewriter.getMultiDimIdentityMap(nloops)),
         getNParallelLoopsAttrs(nloops),
-        [&](OpBuilder& nestedBuilder, Location nestedLoc, ValueRange /*args*/) {
+        [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange /*args*/) {
           Value indexOp = nestedBuilder.create<linalg::IndexOp>(
               nestedLoc, iotaOp.getIotaDimension());
           Type unwrappedResultElementType = resultElementType;
@@ -1342,14 +1373,15 @@
 
 template <typename OpTy>
 class IotaToMapConverter : public OpConversionPattern<OpTy> {
- public:
+public:
   using OpConversionPattern<OpTy>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      OpTy iotaOp, typename OpTy::Adaptor adaptor,
-      ConversionPatternRewriter& rewriter) const final {
+  LogicalResult
+  matchAndRewrite(OpTy iotaOp, typename OpTy::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
     ShapedType resultTy = getHloOpResultType(iotaOp);
-    if (!resultTy) return failure();
+    if (!resultTy)
+      return failure();
     resultTy =
         llvm::dyn_cast<ShapedType>(this->typeConverter->convertType(resultTy));
 
@@ -1359,7 +1391,7 @@
 
     auto linalgOp = rewriter.create<linalg::MapOp>(
         loc, ValueRange{}, empty,
-        [&](OpBuilder& nestedBuilder, Location nestedLoc, ValueRange /*args*/) {
+        [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange /*args*/) {
           Value index = nestedBuilder.create<linalg::IndexOp>(
               nestedLoc, iotaOp.getIotaDimension());
           index = nestedBuilder.create<arith::IndexCastOp>(
@@ -1381,9 +1413,9 @@
     : public OpConversionPattern<mlir::stablehlo::ConcatenateOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::ConcatenateOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter& rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::ConcatenateOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Shortcut the one-operand case, simplifies code below.
     if (adaptor.getOperands().size() == 1) {
       rewriter.replaceOp(op, adaptor.getOperands()[0]);
@@ -1392,7 +1424,8 @@
 
     auto resultType = llvm::dyn_cast<RankedTensorType>(
         this->typeConverter->convertType(op.getResult().getType()));
-    if (!resultType) return failure();
+    if (!resultType)
+      return failure();
 
     uint64_t dim = op.getDimension();
     Location loc = op.getLoc();
@@ -1411,7 +1444,7 @@
         /*inputs=*/ValueRange{}, /*outputBuffers=*/result,
         llvm::ArrayRef(rewriter.getMultiDimIdentityMap(nloops)),
         getNParallelLoopsAttrs(nloops),
-        [&](OpBuilder& nestedBuilder, Location loc, ValueRange) {
+        [&](OpBuilder &nestedBuilder, Location loc, ValueRange) {
           OpBuilder b = nestedBuilder;
           Value concatDimSize = zero;
           Value result;
@@ -1423,7 +1456,7 @@
           }
 
           Value indexOp = b.create<linalg::IndexOp>(loc, dim);
-          for (const auto& it : llvm::enumerate(adaptor.getOperands())) {
+          for (const auto &it : llvm::enumerate(adaptor.getOperands())) {
             Value arg = it.value();
             Value newConcatDimSize;
             scf::IfOp ifOp;
@@ -1468,19 +1501,19 @@
 
 class ConstConverterTensor
     : public OpConversionPattern<mlir::stablehlo::ConstantOp> {
- public:
+public:
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::ConstantOp constOp, OpAdaptor /*adaptor*/,
-      ConversionPatternRewriter& rewriter) const final {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::ConstantOp constOp, OpAdaptor /*adaptor*/,
+                  ConversionPatternRewriter &rewriter) const final {
     auto valueAttr = llvm::cast<DenseElementsAttr>(constOp.getValue());
     auto type =
         llvm::cast<ShapedType>(typeConverter->convertType(constOp.getType()));
     if (type != constOp.getType()) {
       // Signedness conversion.
       valueAttr = valueAttr.mapValues(type.getElementType(),
-                                      [](const APInt& i) { return i; });
+                                      [](const APInt &i) { return i; });
     }
     rewriter.replaceOpWithNewOp<arith::ConstantOp>(constOp, type, valueAttr);
     return success();
@@ -1491,11 +1524,11 @@
 class ReverseConverter
     : public DataMovementOpConverter<ReverseConverter,
                                      mlir::stablehlo::ReverseOp> {
- public:
+public:
   using DataMovementOpConverter<
       ReverseConverter, mlir::stablehlo::ReverseOp>::DataMovementOpConverter;
-  static SmallVector<AffineMap, 2> getIndexingMaps(
-      mlir::stablehlo::ReverseOp op, Builder* b) {
+  static SmallVector<AffineMap, 2>
+  getIndexingMaps(mlir::stablehlo::ReverseOp op, Builder *b) {
     auto resultType = llvm::cast<ShapedType>(getHloOpResultType(op));
     auto nloops = resultType.getRank();
     SmallVector<AffineExpr, 2> inputExprs;
@@ -1504,7 +1537,8 @@
       inputExprs.push_back(b->getAffineDimExpr(i));
     for (auto dim : op.getDimensions()) {
       int i = dim.getZExtValue();
-      if (resultType.isDynamicDim(i)) return {};
+      if (resultType.isDynamicDim(i))
+        return {};
       int n = resultType.getShape()[i];
       inputExprs[i] = b->getAffineConstantExpr(n - 1) - inputExprs[i];
     }
@@ -1515,13 +1549,13 @@
 };
 
 class SliceConverter : public OpConversionPattern<mlir::stablehlo::SliceOp> {
- public:
+public:
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::SliceOp sliceOp,
-      typename mlir::stablehlo::SliceOp::Adaptor adaptor,
-      ConversionPatternRewriter& rewriter) const final {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::SliceOp sliceOp,
+                  typename mlir::stablehlo::SliceOp::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
     auto argType =
         llvm::dyn_cast<ShapedType>(adaptor.getOperands()[0].getType());
     if (!argType || !argType.hasRank()) {
@@ -1550,13 +1584,14 @@
 
 class DynamicSliceConverter
     : public OpConversionPattern<mlir::stablehlo::DynamicSliceOp> {
- public:
+public:
   using OpConversionPattern<
       mlir::stablehlo::DynamicSliceOp>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::DynamicSliceOp dynamicSliceOp, OpAdaptor adaptor,
-      ConversionPatternRewriter& rewriter) const final {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::DynamicSliceOp dynamicSliceOp,
+                  OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
     auto loc = dynamicSliceOp.getLoc();
     auto argType = llvm::dyn_cast<ShapedType>(adaptor.getOperand().getType());
     if (!argType || !argType.hasRank()) {
@@ -1567,7 +1602,7 @@
     SmallVector<OpFoldResult, 3> startIndices, sizes;
     auto originalStartIndexType = llvm::cast<ShapedType>(
         dynamicSliceOp.getStartIndices().front().getType());
-    for (const auto& en : llvm::enumerate(
+    for (const auto &en : llvm::enumerate(
              llvm::zip(adaptor.getStartIndices(),
                        dynamicSliceOp.getSliceSizes().getValues<int64_t>()))) {
       int64_t size = std::get<1>(en.value());
@@ -1607,13 +1642,13 @@
 
 class DynamicUpdateSliceConverter
     : public OpConversionPattern<mlir::stablehlo::DynamicUpdateSliceOp> {
- public:
+public:
   using OpConversionPattern<
       mlir::stablehlo::DynamicUpdateSliceOp>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::DynamicUpdateSliceOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter& rewriter) const final {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::DynamicUpdateSliceOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
     auto loc = op.getLoc();
     auto operandType =
         llvm::dyn_cast<RankedTensorType>(adaptor.getOperand().getType());
@@ -1639,7 +1674,7 @@
 
     SmallVector<OpFoldResult, 3> startIndices;
     Value zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
-    for (const auto& en : llvm::enumerate(adaptor.getStartIndices())) {
+    for (const auto &en : llvm::enumerate(adaptor.getStartIndices())) {
       // By stablehlo.DynamicUpdateSlice definition:
       //   `start_indices[i] = clamp(start_indices[i],
       //       0, operand.dimension_size[i] - update.dimension_size[i])`
@@ -1666,12 +1701,13 @@
 
 class MapOpToGenericConverter
     : public OpConversionPattern<mlir::stablehlo::MapOp> {
- public:
+public:
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::MapOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter& rewriter) const final {
-    if (failed(verifyHloOpBufferOrTensorSemantics(op))) return failure();
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::MapOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
+    if (failed(verifyHloOpBufferOrTensorSemantics(op)))
+      return failure();
 
     auto resultType =
         llvm::cast<ShapedType>(typeConverter->convertType(op.getType()));
@@ -1692,12 +1728,12 @@
 
     // Convert the signature of the body. We scalarize the operands and add a
     // scalar operand representing the output tensor.
-    Region& region = linalgOp.getRegion();
+    Region &region = linalgOp.getRegion();
     rewriter.inlineRegionBefore(op.getComputation(), region, region.end());
     TypeConverter::SignatureConversion signatureConverter(op.getNumOperands() +
                                                           1);
 
-    for (const auto& it : llvm::enumerate(op.getOperation()->getOperands())) {
+    for (const auto &it : llvm::enumerate(op.getOperation()->getOperands())) {
       signatureConverter.addInputs(
           it.index(),
           typeConverter->convertType(
@@ -1713,12 +1749,13 @@
 };
 
 class MapOpToMapConverter : public OpConversionPattern<mlir::stablehlo::MapOp> {
- public:
+public:
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::MapOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter& rewriter) const final {
-    if (failed(verifyHloOpBufferOrTensorSemantics(op))) return failure();
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::MapOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
+    if (failed(verifyHloOpBufferOrTensorSemantics(op)))
+      return failure();
 
     auto resultType =
         llvm::cast<ShapedType>(typeConverter->convertType(op.getType()));
@@ -1743,11 +1780,11 @@
 
     // Convert the signature of the body. We scalarize the operands and add a
     // scalar operand representing the output tensor.
-    Region& region = linalgOp.getRegion();
+    Region &region = linalgOp.getRegion();
     rewriter.inlineRegionBefore(op.getComputation(), region, region.end());
     TypeConverter::SignatureConversion signatureConverter(op.getNumOperands());
 
-    for (const auto& it : llvm::enumerate(op.getOperation()->getOperands())) {
+    for (const auto &it : llvm::enumerate(op.getOperation()->getOperands())) {
       signatureConverter.addInputs(
           it.index(),
           typeConverter->convertType(
@@ -1774,9 +1811,9 @@
 struct GatherConversion final : OpConversionPattern<mlir::stablehlo::GatherOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::GatherOp gatherOp, OpAdaptor adaptor,
-      ConversionPatternRewriter& rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::GatherOp gatherOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Location loc = gatherOp.getLoc();
 
     Value startIndices = adaptor.getStartIndices();
@@ -1829,8 +1866,8 @@
         /*bodyBuild=*/nullptr, linalg::getPrunedAttributeList(gatherOp));
 
     // Now populate the linalg generic region
-    Region& region = linalgOp.getRegion();
-    Block* block = rewriter.createBlock(&region, region.end());
+    Region &region = linalgOp.getRegion();
+    Block *block = rewriter.createBlock(&region, region.end());
     block->addArguments(resultType.getElementType(), loc);
     OpBuilder::InsertionGuard guard(rewriter);
     rewriter.setInsertionPointToEnd(block);
@@ -1914,7 +1951,8 @@
       }
 
       // If this is a skipped dimension, we're done and don't have to clamp.
-      if (remappedIndexFromIndices[i] == constants[0]) continue;
+      if (remappedIndexFromIndices[i] == constants[0])
+        continue;
 
       Value operandDimSize =
           rewriter.createOrFold<tensor::DimOp>(loc, operand, i);
@@ -1975,9 +2013,9 @@
   using OpConversionPattern<
       mlir::stablehlo::SelectAndScatterOp>::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::SelectAndScatterOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter& rewriter) const final {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::SelectAndScatterOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
     Location loc = op.getLoc();
     ImplicitLocOpBuilder b(loc, rewriter);
     Value source = op.getSource();
@@ -2111,7 +2149,7 @@
         /*bodyBuild=*/nullptr, linalg::getPrunedAttributeList(op));
 
     // First we clone in the selection block.
-    auto& reduceRegion = reduceGeneric.getRegion();
+    auto &reduceRegion = reduceGeneric.getRegion();
     rewriter.setInsertionPoint(reduceGeneric);
     rewriter.cloneRegionBefore(op.getSelect(), reduceRegion,
                                reduceRegion.end());
@@ -2128,8 +2166,8 @@
 
     // Grab the terminator and use the turned value to now select the
     // correct index and value.
-    auto& reduceBlock = reduceRegion.front();
-    auto* reduceTerminator = reduceBlock.getTerminator();
+    auto &reduceBlock = reduceRegion.front();
+    auto *reduceTerminator = reduceBlock.getTerminator();
     Value selectPred = reduceTerminator->getOperand(0);
     Value selectInVal = reduceBlock.getArgument(0);
     Value selectOutVal = reduceBlock.getArgument(2);
@@ -2222,7 +2260,7 @@
 
     // Clone the scattering combination logic and perform the tensor-to-scalar
     // conversion.
-    auto& scatterRegion = scatterGeneric.getRegion();
+    auto &scatterRegion = scatterGeneric.getRegion();
     b.setInsertionPoint(scatterGeneric);
     rewriter.cloneRegionBefore(op.getScatter(), scatterRegion,
                                scatterRegion.end());
@@ -2234,7 +2272,7 @@
     rewriter.applySignatureConversion(&scatterRegion, scatterSignConverter,
                                       getTypeConverter());
 
-    auto& scatterBlock = scatterRegion.front();
+    auto &scatterBlock = scatterRegion.front();
     auto scatterTerminator = scatterBlock.getTerminator();
     b.setInsertionPoint(scatterTerminator);
 
@@ -2275,7 +2313,8 @@
     SmallVector<ReassociationIndices> reassociationMap;
     for (int i = 0, s = window.size(); i < s; ++i) {
       SmallVector<int64_t, 2> dims = {collapseDim};
-      if (strides[i] > 1) dims.push_back(collapseDim + 1);
+      if (strides[i] > 1)
+        dims.push_back(collapseDim + 1);
 
       reassociationMap.push_back(ReassociationIndices(dims));
       collapseDim += dims.size();
@@ -2334,9 +2373,9 @@
     : public OpConversionPattern<mlir::stablehlo::PadOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::PadOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter& rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::PadOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     SmallVector<int64_t> padLow;
     SmallVector<int64_t> padHigh;
     SmallVector<OpFoldResult> sliceStarts;
@@ -2363,7 +2402,8 @@
     }
 
     // If there's no negative edge padding we're done.
-    if (!hasNegativePadding) return failure();
+    if (!hasNegativePadding)
+      return failure();
 
     // Create a new pad op with the positive values.
     Value pad = rewriter.create<mlir::stablehlo::PadOp>(
@@ -2373,10 +2413,12 @@
 
     // Then slice according to the negative edge padding. Static shapes only for
     // now.
-    if (!op.getType().hasStaticShape()) return failure();
-    SmallVector<OpFoldResult> sizes(llvm::map_range(
-        op.getType().getShape(),
-        [&](int64_t dim) { return rewriter.getIndexAttr(dim); }));
+    if (!op.getType().hasStaticShape())
+      return failure();
+    SmallVector<OpFoldResult> sizes(
+        llvm::map_range(op.getType().getShape(), [&](int64_t dim) {
+          return rewriter.getIndexAttr(dim);
+        }));
     SmallVector<OpFoldResult> strides(sliceStarts.size(),
                                       rewriter.getIndexAttr(1));
     rewriter.replaceOpWithNewOp<tensor::ExtractSliceOp>(op, pad, sliceStarts,
@@ -2389,14 +2431,14 @@
 struct PadOpConversion : public OpConversionPattern<mlir::stablehlo::PadOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::PadOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter& rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::PadOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto loc = op.getLoc();
     auto resultType = typeConverter->convertType(op.getResult().getType());
 
     // Negative edge padding is decomposed separately.
-    auto isNegative = [](const APInt& intVal) { return intVal.isNegative(); };
+    auto isNegative = [](const APInt &intVal) { return intVal.isNegative(); };
     if (llvm::any_of(op.getEdgePaddingLow().getValues<APInt>(), isNegative) ||
         llvm::any_of(op.getEdgePaddingHigh().getValues<APInt>(), isNegative))
       return failure();
@@ -2409,7 +2451,7 @@
 
     // If there is no interior padding lower to tensor.pad directly.
     if (llvm::all_of(op.getInteriorPadding().getValues<APInt>(),
-                     [](const APInt& intVal) { return intVal.isZero(); })) {
+                     [](const APInt &intVal) { return intVal.isZero(); })) {
       SmallVector<OpFoldResult> high(
           op.getEdgePaddingHigh().getValues<IntegerAttr>());
       auto padTensorOp = rewriter.create<tensor::PadOp>(
@@ -2454,17 +2496,19 @@
     : public OpConversionPattern<mlir::stablehlo::TorchIndexSelectOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::TorchIndexSelectOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter& rewriter) const final {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::TorchIndexSelectOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
     int axis = static_cast<int>(op.getDim());
     int batch = static_cast<int>(op.getBatchDims());
     auto indexShapedType = llvm::cast<ShapedType>(adaptor.getIndex().getType());
     int numIndices = static_cast<int>(indexShapedType.getRank());
     auto operandShapedType =
         llvm::cast<ShapedType>(adaptor.getOperand().getType());
-    if (axis < 0) axis += static_cast<int>(operandShapedType.getRank());
-    if (batch < 0) batch += numIndices;
+    if (axis < 0)
+      axis += static_cast<int>(operandShapedType.getRank());
+    if (batch < 0)
+      batch += numIndices;
 
     Location loc = op.getLoc();
     auto resultType = llvm::cast<ShapedType>(
@@ -2475,7 +2519,8 @@
     //   `params[:axis] + indices[batch_dims:] + params[axis + 1:]`
     SmallVector<Value> dynSizes;
     for (int i = 0; i < rank; ++i) {
-      if (!resultType.isDynamicDim(i)) continue;
+      if (!resultType.isDynamicDim(i))
+        continue;
       if (i < axis) {
         dynSizes.push_back(
             rewriter.create<tensor::DimOp>(loc, adaptor.getOperand(), i));
@@ -2498,14 +2543,16 @@
     for (int i = 0; i < axis; ++i) {
       sliceExprs.push_back(rewriter.getAffineDimExpr(i));
       sliceShape.push_back(resultShape[i]);
-      if (!resultType.isDynamicDim(i)) continue;
+      if (!resultType.isDynamicDim(i))
+        continue;
       dynSliceSizes.push_back(
           rewriter.create<tensor::DimOp>(loc, adaptor.getOperand(), i));
     }
     for (int i = axis + numIndices - batch; i < rank; ++i) {
       sliceExprs.push_back(rewriter.getAffineDimExpr(i));
       sliceShape.push_back(resultShape[i]);
-      if (!resultType.isDynamicDim(i)) continue;
+      if (!resultType.isDynamicDim(i))
+        continue;
       int idx = i - (axis + numIndices - batch) + axis + 1;
       dynSliceSizes.push_back(
           rewriter.create<tensor::DimOp>(loc, adaptor.getOperand(), idx));
@@ -2541,8 +2588,8 @@
     SmallVector<Type> bodyArgTypes;
     SmallVector<Value, 2> linalgOpArgs = {adaptor.getIndex(), sliceOp};
     // Add a block to the region.
-    auto* region = &linalgOp.getRegion();
-    auto* block = rewriter.createBlock(region, region->end());
+    auto *region = &linalgOp.getRegion();
+    auto *block = rewriter.createBlock(region, region->end());
     for (auto blockArgs : linalgOpArgs) {
       bodyArgTypes.push_back(
           llvm::cast<ShapedType>(blockArgs.getType()).getElementType());
@@ -2575,12 +2622,13 @@
 
 class SetDimensionSizeConverter
     : public OpConversionPattern<mlir::stablehlo::SetDimensionSizeOp> {
- public:
+public:
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::SetDimensionSizeOp setDimensionSizeOp, OpAdaptor adaptor,
-      ConversionPatternRewriter& rewriter) const final {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::SetDimensionSizeOp setDimensionSizeOp,
+                  OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
     // We can lower SetDimensionSize to tensor extract. This turns into a
     // regular dynamic shape. Note that the bounds annotation is still around
     // but may be no longer valid depending on choices made by bufferization.
@@ -2615,14 +2663,14 @@
     : impl::ConvertStableHloToLinalgBase<ConvertStableHloToLinalg> {
   using ConvertStableHloToLinalgBase::ConvertStableHloToLinalgBase;
 
-  void getDependentDialects(DialectRegistry& registry) const override {
+  void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<bufferization::BufferizationDialect, linalg::LinalgDialect,
                     scf::SCFDialect, complex::ComplexDialect, math::MathDialect,
                     memref::MemRefDialect, shape::ShapeDialect>();
   }
 
   void runOnOperation() override {
-    MLIRContext& ctx = getContext();
+    MLIRContext &ctx = getContext();
     RewritePatternSet patterns(&ctx);
     ConversionTarget target(ctx);
     target.addLegalDialect<
@@ -2644,11 +2692,11 @@
   }
 };
 
-}  // namespace
+} // namespace
 
-void populateStableHloToLinalgConversionPatterns(MLIRContext* context,
-                                                 TypeConverter& typeConverter,
-                                                 RewritePatternSet* patterns,
+void populateStableHloToLinalgConversionPatterns(MLIRContext *context,
+                                                 TypeConverter &typeConverter,
+                                                 RewritePatternSet *patterns,
                                                  bool enablePrimitiveOps) {
   // clang-format off
   patterns->add<
@@ -2714,4 +2762,4 @@
   return std::make_unique<LinalgTypeConverter>();
 }
 
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace mlir::iree_compiler::stablehlo

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToLinalgConvolution.cpp b/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToLinalgConvolution.cpp
index 62cd0c9..e7473cb 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToLinalgConvolution.cpp
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToLinalgConvolution.cpp

@@ -171,14 +171,16 @@
     : OpConversionPattern<mlir::stablehlo::ConvolutionOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::ConvolutionOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::ConvolutionOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (!hasCanonicalDimensionNumbers(op.getDimensionNumbers())) {
       return failure();
     }
-    if (op.getFeatureGroupCount() != 1u) return failure();
-    if (op.getBatchGroupCount() != 1u) return failure();
+    if (op.getFeatureGroupCount() != 1u)
+      return failure();
+    if (op.getBatchGroupCount() != 1u)
+      return failure();
 
     Location loc = op.getLoc();
     Value input = adaptor.getLhs();
@@ -228,33 +230,33 @@
                                     rewriter);
 
     switch (rank) {
-      case 2: {
-        res = rewriter.create<linalg::MatmulOp>(
-            loc, resultType, ValueRange{input, filter}, ValueRange{zeroTensor},
-            linalg::getPrunedAttributeList(op));
-        break;
-      }
-      case 3: {
-        res = rewriter.create<linalg::Conv1DNwcWcfOp>(
-            loc, resultType, ValueRange{input, filter}, ValueRange{zeroTensor},
-            strides, dilations, linalg::getPrunedAttributeList(op));
-        break;
-      }
-      case 4: {
-        res = rewriter.create<linalg::Conv2DNhwcHwcfOp>(
-            loc, resultType, ValueRange{input, filter}, ValueRange{zeroTensor},
-            strides, dilations, linalg::getPrunedAttributeList(op));
-        break;
-      }
-      case 5: {
-        res = rewriter.create<linalg::Conv3DNdhwcDhwcfOp>(
-            loc, resultType, ValueRange{input, filter}, ValueRange{zeroTensor},
-            strides, dilations, linalg::getPrunedAttributeList(op));
-        break;
-      }
-      default: {
-        return rewriter.notifyMatchFailure(op, "expected 1/2/3D conv op");
-      }
+    case 2: {
+      res = rewriter.create<linalg::MatmulOp>(
+          loc, resultType, ValueRange{input, filter}, ValueRange{zeroTensor},
+          linalg::getPrunedAttributeList(op));
+      break;
+    }
+    case 3: {
+      res = rewriter.create<linalg::Conv1DNwcWcfOp>(
+          loc, resultType, ValueRange{input, filter}, ValueRange{zeroTensor},
+          strides, dilations, linalg::getPrunedAttributeList(op));
+      break;
+    }
+    case 4: {
+      res = rewriter.create<linalg::Conv2DNhwcHwcfOp>(
+          loc, resultType, ValueRange{input, filter}, ValueRange{zeroTensor},
+          strides, dilations, linalg::getPrunedAttributeList(op));
+      break;
+    }
+    case 5: {
+      res = rewriter.create<linalg::Conv3DNdhwcDhwcfOp>(
+          loc, resultType, ValueRange{input, filter}, ValueRange{zeroTensor},
+          strides, dilations, linalg::getPrunedAttributeList(op));
+      break;
+    }
+    default: {
+      return rewriter.notifyMatchFailure(op, "expected 1/2/3D conv op");
+    }
     }
     rewriter.replaceOp(op, res.getOperation()->getResults());
     return success();
@@ -291,9 +293,9 @@
   /// 7. Create the linalg.generic that computes the multiply-add
   /// 8. Reshape the output to the original shape if it was reshaped by the
   ///    feature or group count attributes.
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::ConvolutionOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::ConvolutionOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Location loc = op.getLoc();
     MLIRContext *ctx = op.getContext();
 
@@ -304,7 +306,8 @@
     }
 
     auto reshapedResultShape = resultType.getShape().vec();
-    if (!resultType.hasStaticShape()) return failure();
+    if (!resultType.hasStaticShape())
+      return failure();
 
     // Immediately emit an EmptyOp for output tensors with zero dimension.
     if (llvm::is_contained(reshapedResultShape, 0)) {
@@ -432,9 +435,9 @@
       {
         dstExprs.insert(dstExprs.begin() + outputFeatureDimension, parallelDim);
         updateDimMappingFromOffset(resultIndexMapping, outputFeatureDimension);
-        reshapedResultShape.insert(
-            reshapedResultShape.begin() + outputFeatureDimension,
-            featureGroupCount);
+        reshapedResultShape.insert(reshapedResultShape.begin() +
+                                       outputFeatureDimension,
+                                   featureGroupCount);
         reshapedResultShape[outputFeatureDimension + 1] /= featureGroupCount;
       }
     }
@@ -567,12 +570,14 @@
     : OpConversionPattern<mlir::stablehlo::ConvolutionOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::ConvolutionOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
-    if (op.getBatchGroupCount() != 1) return failure();
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::ConvolutionOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    if (op.getBatchGroupCount() != 1)
+      return failure();
     // Fall into the normal convolution cases.
-    if (op.getFeatureGroupCount() == 1) return failure();
+    if (op.getFeatureGroupCount() == 1)
+      return failure();
 
     const mlir::stablehlo::ConvDimensionNumbersAttr &dimensionNumbers =
         op.getDimensionNumbers();
@@ -646,7 +651,8 @@
     auto getReassociationIndicesToCollapseLastTwoDims = [](Value v) {
       SmallVector<ReassociationIndices> reassociations;
       int64_t rank = cast<ShapedType>(v.getType()).getRank();
-      for (int64_t i = 0; i < rank - 1; ++i) reassociations.emplace_back(1, i);
+      for (int64_t i = 0; i < rank - 1; ++i)
+        reassociations.emplace_back(1, i);
       reassociations.back().push_back(rank - 1);
       return reassociations;
     };
@@ -696,38 +702,38 @@
           reshapedOutputDims, resultType.getElementType());
       Value conv;
       switch (spatialRank) {
-        case 1: {
-          conv = rewriter
-                     .create<linalg::DepthwiseConv1DNwcWcmOp>(
-                         loc, reshapedOutputType,
-                         ValueRange{input, reshapedFilter},
-                         ValueRange{zeroTensor}, windowStrides, rhsDilation,
-                         linalg::getPrunedAttributeList(op))
-                     .getResult(0);
-          break;
-        }
-        case 2: {
-          conv = rewriter
-                     .create<linalg::DepthwiseConv2DNhwcHwcmOp>(
-                         loc, reshapedOutputType,
-                         ValueRange{input, reshapedFilter},
-                         ValueRange{zeroTensor}, windowStrides, rhsDilation,
-                         linalg::getPrunedAttributeList(op))
-                     .getResult(0);
-          break;
-        }
-        case 3: {
-          conv = rewriter
-                     .create<linalg::DepthwiseConv3DNdhwcDhwcmOp>(
-                         loc, reshapedOutputType,
-                         ValueRange{input, reshapedFilter},
-                         ValueRange{zeroTensor}, windowStrides, rhsDilation,
-                         linalg::getPrunedAttributeList(op))
-                     .getResult(0);
-          break;
-        }
-        default:
-          llvm_unreachable("Unhandled case");
+      case 1: {
+        conv =
+            rewriter
+                .create<linalg::DepthwiseConv1DNwcWcmOp>(
+                    loc, reshapedOutputType, ValueRange{input, reshapedFilter},
+                    ValueRange{zeroTensor}, windowStrides, rhsDilation,
+                    linalg::getPrunedAttributeList(op))
+                .getResult(0);
+        break;
+      }
+      case 2: {
+        conv =
+            rewriter
+                .create<linalg::DepthwiseConv2DNhwcHwcmOp>(
+                    loc, reshapedOutputType, ValueRange{input, reshapedFilter},
+                    ValueRange{zeroTensor}, windowStrides, rhsDilation,
+                    linalg::getPrunedAttributeList(op))
+                .getResult(0);
+        break;
+      }
+      case 3: {
+        conv =
+            rewriter
+                .create<linalg::DepthwiseConv3DNdhwcDhwcmOp>(
+                    loc, reshapedOutputType, ValueRange{input, reshapedFilter},
+                    ValueRange{zeroTensor}, windowStrides, rhsDilation,
+                    linalg::getPrunedAttributeList(op))
+                .getResult(0);
+        break;
+      }
+      default:
+        llvm_unreachable("Unhandled case");
       }
 
       // Create a Linalg reshape op that converts the output from 5 dimensions
@@ -760,24 +766,24 @@
           getReassociationIndicesToCollapseLastTwoDims(filter));
 
       switch (spatialRank) {
-        case 1:
-          rewriter.replaceOpWithNewOp<linalg::DepthwiseConv1DNwcWcOp>(
-              op, resultType, ValueRange{input, reshapedFilter},
-              ValueRange{zeroTensor}, windowStrides, rhsDilation,
-              linalg::getPrunedAttributeList(op));
-          break;
-        case 2:
-          rewriter.replaceOpWithNewOp<linalg::DepthwiseConv2DNhwcHwcOp>(
-              op, resultType, ValueRange{input, reshapedFilter},
-              ValueRange{zeroTensor}, windowStrides, rhsDilation,
-              linalg::getPrunedAttributeList(op));
-          break;
-        case 3:
-          rewriter.replaceOpWithNewOp<linalg::DepthwiseConv3DNdhwcDhwcOp>(
-              op, resultType, ValueRange{input, reshapedFilter},
-              ValueRange{zeroTensor}, windowStrides, rhsDilation,
-              linalg::getPrunedAttributeList(op));
-          break;
+      case 1:
+        rewriter.replaceOpWithNewOp<linalg::DepthwiseConv1DNwcWcOp>(
+            op, resultType, ValueRange{input, reshapedFilter},
+            ValueRange{zeroTensor}, windowStrides, rhsDilation,
+            linalg::getPrunedAttributeList(op));
+        break;
+      case 2:
+        rewriter.replaceOpWithNewOp<linalg::DepthwiseConv2DNhwcHwcOp>(
+            op, resultType, ValueRange{input, reshapedFilter},
+            ValueRange{zeroTensor}, windowStrides, rhsDilation,
+            linalg::getPrunedAttributeList(op));
+        break;
+      case 3:
+        rewriter.replaceOpWithNewOp<linalg::DepthwiseConv3DNdhwcDhwcOp>(
+            op, resultType, ValueRange{input, reshapedFilter},
+            ValueRange{zeroTensor}, windowStrides, rhsDilation,
+            linalg::getPrunedAttributeList(op));
+        break;
       }
     }
 
@@ -785,7 +791,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 namespace detail {
 void populateStableHloConvolutionToLinalgConversionPatterns(
@@ -799,5 +805,5 @@
 
   patterns->add<ConvolutionOpGeneralConversion>(typeConverter, context);
 }
-}  // namespace detail
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace detail
+} // namespace mlir::iree_compiler::stablehlo

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToLinalgDotProd.cpp b/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToLinalgDotProd.cpp
index e19e978..3c6f060 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToLinalgDotProd.cpp
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToLinalgDotProd.cpp

@@ -53,31 +53,31 @@
   return DotOperationType::kUnsupported;
 }
 
-SmallVector<Value, 2> getDotOpEmptyTensorDynSizes(OpBuilder& b, Location loc,
+SmallVector<Value, 2> getDotOpEmptyTensorDynSizes(OpBuilder &b, Location loc,
                                                   Value lhs, Value rhs,
                                                   DotOperationType type) {
   SmallVector<Value, 2> dynShape;
   switch (type) {
-    case DotOperationType::kMatrixMatrix: {
-      if (llvm::cast<ShapedType>(lhs.getType()).isDynamicDim(0))
-        dynShape.push_back(b.create<tensor::DimOp>(loc, lhs, 0));
-      if (llvm::cast<ShapedType>(rhs.getType()).isDynamicDim(1))
-        dynShape.push_back(b.create<tensor::DimOp>(loc, rhs, 1));
-      break;
-    }
-    case DotOperationType::kMatrixVector: {
-      if (llvm::cast<ShapedType>(lhs.getType()).isDynamicDim(0))
-        dynShape.push_back(b.create<tensor::DimOp>(loc, lhs, 0));
-      break;
-    }
-    case DotOperationType::kVectorMatrix: {
-      if (llvm::cast<ShapedType>(rhs.getType()).isDynamicDim(1))
-        dynShape.push_back(b.create<tensor::DimOp>(loc, rhs, 1));
-      break;
-    }
-    case DotOperationType::kVectorDot:
-    case DotOperationType::kUnsupported:
-      break;
+  case DotOperationType::kMatrixMatrix: {
+    if (llvm::cast<ShapedType>(lhs.getType()).isDynamicDim(0))
+      dynShape.push_back(b.create<tensor::DimOp>(loc, lhs, 0));
+    if (llvm::cast<ShapedType>(rhs.getType()).isDynamicDim(1))
+      dynShape.push_back(b.create<tensor::DimOp>(loc, rhs, 1));
+    break;
+  }
+  case DotOperationType::kMatrixVector: {
+    if (llvm::cast<ShapedType>(lhs.getType()).isDynamicDim(0))
+      dynShape.push_back(b.create<tensor::DimOp>(loc, lhs, 0));
+    break;
+  }
+  case DotOperationType::kVectorMatrix: {
+    if (llvm::cast<ShapedType>(rhs.getType()).isDynamicDim(1))
+      dynShape.push_back(b.create<tensor::DimOp>(loc, rhs, 1));
+    break;
+  }
+  case DotOperationType::kVectorDot:
+  case DotOperationType::kUnsupported:
+    break;
   }
   return dynShape;
 }
@@ -87,13 +87,14 @@
   using OpConversionPattern<mlir::stablehlo::DotOp>::OpConversionPattern;
   using OpAdaptor = mlir::stablehlo::DotOp::Adaptor;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::DotOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter& rewriter) const final {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::DotOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
     if (failed(verifyHloOpBufferOrTensorSemantics(op))) {
       return failure();
     }
-    if (getDotOperationType(op) != op_type) return failure();
+    if (getDotOperationType(op) != op_type)
+      return failure();
 
     Location loc = op.getLoc();
     // Convert unsigned to signed. This works because signed and unsigned
@@ -119,9 +120,9 @@
     : OpConversionPattern<mlir::stablehlo::DotGeneralOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::DotGeneralOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter& rewriter) const final {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::DotGeneralOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
     if (failed(verifyHloOpBufferOrTensorSemantics(op))) {
       return failure();
     }
@@ -162,7 +163,7 @@
     Value emptyTensor =
         getEmptyTensorFor(rewriter, loc, outputType, op, adaptor.getOperands());
     Value zeroTensor = fillTensorWithZeros(rewriter, loc, emptyTensor);
-    Operation* linalgOp = rewriter.create<linalg::BatchMatmulOp>(
+    Operation *linalgOp = rewriter.create<linalg::BatchMatmulOp>(
         loc, /*resultTensorTypes=*/TypeRange{outputType},
         /*inputs=*/ValueRange{adaptor.getLhs(), adaptor.getRhs()},
         /*outputBuffers=*/ValueRange{zeroTensor},
@@ -176,9 +177,9 @@
 struct DotGeneralOpConversion final
     : OpConversionPattern<mlir::stablehlo::DotGeneralOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::DotGeneralOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter& rewriter) const final {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::DotGeneralOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
     if (failed(verifyHloOpBufferOrTensorSemantics(op))) {
       return failure();
     }
@@ -220,10 +221,10 @@
     auto getMap = [&](int64_t rank, ArrayRef<int64_t> batchingDims,
                       ArrayRef<int64_t> contractingDims, size_t extraDims) {
       llvm::SmallVector<AffineExpr> indices(rank);
-      for (const auto& i : llvm::enumerate(batchingDims)) {
+      for (const auto &i : llvm::enumerate(batchingDims)) {
         indices[i.value()] = rewriter.getAffineDimExpr(i.index());
       }
-      for (const auto& i : llvm::enumerate(contractingDims)) {
+      for (const auto &i : llvm::enumerate(contractingDims)) {
         indices[i.value()] = rewriter.getAffineDimExpr(i.index() + targetRank);
       }
       for (int i = 0; i < rank; ++i) {
@@ -250,14 +251,14 @@
                                             op.getContext()));
     }
 
-    Operation* linalgOp = rewriter.create<linalg::GenericOp>(
+    Operation *linalgOp = rewriter.create<linalg::GenericOp>(
         loc, /*resultTensorTypes=*/TypeRange{outputType},
         /*inputs=*/ValueRange{adaptor.getLhs(), adaptor.getRhs()},
         /*outputBuffers=*/ValueRange{zeroTensor}, indexingMaps,
         getParallelAndReductionIterators(
             /*nLoops=*/totalLoopCount,
             /*nReduction=*/numContracting),
-        [](OpBuilder& b, Location loc, ValueRange) {
+        [](OpBuilder &b, Location loc, ValueRange) {
           ImplicitLocOpBuilder builder(loc, b);
           linalg::MatmulOp::regionBuilder(builder, *b.getInsertionBlock(), {});
         },
@@ -268,12 +269,12 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 namespace detail {
 void populateStableHloDotProdToLinalgConversionPatterns(
-    MLIRContext* context, TypeConverter& typeConverter,
-    RewritePatternSet* patterns) {
+    MLIRContext *context, TypeConverter &typeConverter,
+    RewritePatternSet *patterns) {
   // Ensure specialized patterns are higher priority than their generic
   // versions.
   patterns
@@ -286,5 +287,5 @@
   patterns->add<DotGeneralOpConversion>(typeConverter, context,
                                         PatternBenefit(1));
 }
-}  // namespace detail
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace detail
+} // namespace mlir::iree_compiler::stablehlo

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToLinalgExt.cpp b/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToLinalgExt.cpp
index 8a84fd6..316d0e6 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToLinalgExt.cpp
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToLinalgExt.cpp

@@ -47,7 +47,8 @@
 }
 
 std::optional<Type> convertRank0TensorToScalar(RankedTensorType tensorType) {
-  if (tensorType.getRank() != 0) return std::nullopt;
+  if (tensorType.getRank() != 0)
+    return std::nullopt;
   Type elementType = tensorType.getElementType();
   if (auto intType = dyn_cast<IntegerType>(elementType)) {
     elementType = convertIntegerToSignless(intType);
@@ -67,7 +68,8 @@
   assert(inputs.size() == 1 && "too many inputs to type conversion");
   Value fromValue = inputs[0];
   auto fromType = dyn_cast<RankedTensorType>(fromValue.getType());
-  if (!fromType) return std::nullopt;
+  if (!fromType)
+    return std::nullopt;
 
   if (auto intFromType = dyn_cast<IntegerType>(fromType.getElementType())) {
     Type castType = getElementTypeOrSelf(toType);
@@ -82,7 +84,8 @@
     }
   }
 
-  if (fromType.getRank() != 0) return fromValue;
+  if (fromType.getRank() != 0)
+    return fromValue;
 
   Type extractType = getElementTypeOrSelf(toType);
   return builder.createOrFold<tensor::ExtractOp>(loc, extractType, fromValue);
@@ -122,12 +125,14 @@
 template <typename OpTy>
 struct LinalgExtRegionHLOOpConversion final : OpConversionPattern<OpTy> {
   using OpConversionPattern<OpTy>::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      OpTy op, typename OpTy::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
-    if (!isInBodyOfLinalgExtOps(op)) return failure();
+  LogicalResult
+  matchAndRewrite(OpTy op, typename OpTy::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    if (!isInBodyOfLinalgExtOps(op))
+      return failure();
     TensorType origRetType = dyn_cast<TensorType>(op.getType());
-    if (!origRetType) return failure();
+    if (!origRetType)
+      return failure();
     SmallVector<Value> scalarArgs;
     Type newRetType = getElementTypeOrSelf(
         this->typeConverter->convertType(origRetType.getElementType()));
@@ -141,10 +146,11 @@
 struct LinalgExtRegionReturnOpConversion final
     : OpConversionPattern<mlir::stablehlo::ReturnOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::ReturnOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
-    if (!isInBodyOfLinalgExtOps(op)) return failure();
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::ReturnOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    if (!isInBodyOfLinalgExtOps(op))
+      return failure();
     rewriter.replaceOpWithNewOp<IREE::LinalgExt::YieldOp>(
         op, adaptor.getOperands());
     return success();
@@ -158,9 +164,9 @@
 struct SortOpConversion final : OpConversionPattern<mlir::stablehlo::SortOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::SortOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const final {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::SortOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
     Location loc = op.getLoc();
 
     llvm::SmallVector<Type> resultTypes;
@@ -213,27 +219,33 @@
     auto indexDepth = indicesType.getShape().back();
     auto scatterDimsToOperandDims = dimNumbers.getScatterDimsToOperandDims();
 
-    if (indicesRank != 2) return false;
-    if (indexVectorDim != indicesRank - 1) return false;
-    if (scatterDimsToOperandDims.size() != indexDepth) return false;
+    if (indicesRank != 2)
+      return false;
+    if (indexVectorDim != indicesRank - 1)
+      return false;
+    if (scatterDimsToOperandDims.size() != indexDepth)
+      return false;
 
     auto insertedWindowDims = dimNumbers.getInsertedWindowDims();
     for (auto [idx, dim] : llvm::enumerate(insertedWindowDims)) {
-      if (idx != dim) return false;
+      if (idx != dim)
+        return false;
     }
 
     // Check that there is only one batch dimension in the updates.
     for (auto [idx, dim] : llvm::enumerate(dimNumbers.getUpdateWindowDims())) {
-      if (idx + 1 != dim) return false;
+      if (idx + 1 != dim)
+        return false;
     }
 
     return true;
   }
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::ScatterOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
-    if (!hasCanonicalDimensionNumbers(op)) return failure();
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::ScatterOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    if (!hasCanonicalDimensionNumbers(op))
+      return failure();
     if (llvm::size(op.getInputs()) != 1)
       return op.emitError("NYI variadic operands scatter");
     if (llvm::size(op.getUpdates()) != 1)
@@ -350,9 +362,9 @@
                                     DenseFPElementsAttr::get(type, imag))};
   }
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::FftOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::FftOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Only handle 2^n fft length.
     auto operandType =
         llvm::dyn_cast<RankedTensorType>(adaptor.getOperand().getType());
@@ -406,11 +418,12 @@
     : OpConversionPattern<mlir::stablehlo::ReverseOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::ReverseOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::ReverseOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto ty = dyn_cast<RankedTensorType>(adaptor.getOperands()[0].getType());
-    if (!ty) return failure();
+    if (!ty)
+      return failure();
 
     Location loc = op.getLoc();
     SmallVector<OpFoldResult> mixedSizes =
@@ -430,9 +443,9 @@
 
 struct TopkOpConversion final : OpConversionPattern<chlo::TopKOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      chlo::TopKOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(chlo::TopKOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Location loc = op.getLoc();
     Value operand = adaptor.getOperand();
 
@@ -568,7 +581,7 @@
     }
   }
 };
-}  // namespace
+} // namespace
 
 void populateStableHloToLinalgExtConversionPatterns(
     MLIRContext *context, TypeConverter &typeConverter,
@@ -625,4 +638,4 @@
       LinalgExtRegionReturnOpConversion>(typeConverter, context);
 }
 
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace mlir::iree_compiler::stablehlo

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToLinalgPointwise.cpp b/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToLinalgPointwise.cpp
index f4e7090..6c9fdf1 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToLinalgPointwise.cpp
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToLinalgPointwise.cpp

@@ -54,9 +54,10 @@
 
 /// Checks the preconditions for conversion of pointwise HLO ops to linalg.
 /// Returns the max operand rank and the result type on success.
-FailureOr<PointwiseConversionInfo> checkOperandsAndResults(
-    Operation* op, ValueRange operands, TypeConverter& typeConverter,
-    ConversionPatternRewriter& rewriter) {
+FailureOr<PointwiseConversionInfo>
+checkOperandsAndResults(Operation *op, ValueRange operands,
+                        TypeConverter &typeConverter,
+                        ConversionPatternRewriter &rewriter) {
   int64_t maxRank = getMaxRank(operands);
 
   // Apply only if all operands are scalar or have the same rank. Some ops,
@@ -83,7 +84,8 @@
 
   // All-scalar pointwise ops inside of linalg ops are processes by
   // ScalarHloToArithmeticPattern.
-  if (maxRank == 0 && isInBodyOfLinalgOps(op)) return failure();
+  if (maxRank == 0 && isInBodyOfLinalgOps(op))
+    return failure();
 
   return PointwiseConversionInfo{maxRank, resultTy};
 }
@@ -95,9 +97,9 @@
   using OpConversionPattern<OpTy>::OpConversionPattern;
   using OpAdaptor = typename OpTy::Adaptor;
 
-  LogicalResult matchAndRewrite(
-      OpTy op, OpAdaptor adaptor,
-      ConversionPatternRewriter& rewriter) const override {
+  LogicalResult
+  matchAndRewrite(OpTy op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto conversionInfo = checkOperandsAndResults(
         op, adaptor.getOperands(), *this->typeConverter, rewriter);
     if (failed(conversionInfo)) {
@@ -129,7 +131,7 @@
 
     auto mapOp = rewriter.create<linalg::MapOp>(
         loc, mappedInputs, emptyTensor,
-        [&](OpBuilder& b, Location loc, ValueRange args) {
+        [&](OpBuilder &b, Location loc, ValueRange args) {
           Value innerResult = mlir::stablehlo::StableHloOpToStdScalarOp::mapOp(
               op, getElementTypeOrSelf(emptyTensor),
               interleaveScalarAndBlockArgs(scalarInputs, args), &b);
@@ -150,9 +152,9 @@
   using OpConversionPattern<OpTy>::OpConversionPattern;
   using OpAdaptor = typename OpTy::Adaptor;
 
-  LogicalResult matchAndRewrite(
-      OpTy op, OpAdaptor adaptor,
-      ConversionPatternRewriter& rewriter) const override {
+  LogicalResult
+  matchAndRewrite(OpTy op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto conversionInfo = checkOperandsAndResults(
         op, adaptor.getOperands(), *this->typeConverter, rewriter);
     if (failed(conversionInfo)) {
@@ -172,7 +174,8 @@
     AffineMap scalarMap = AffineMap::get(maxRank, 0, rewriter.getContext());
     AffineMap idMap = rewriter.getMultiDimIdentityMap(maxRank);
     SmallVector<AffineMap> maps;
-    for (Value v : inputs) maps.push_back(isScalar(v) ? scalarMap : idMap);
+    for (Value v : inputs)
+      maps.push_back(isScalar(v) ? scalarMap : idMap);
     maps.push_back(idMap);
 
     // Build `linalg.generic` op.
@@ -180,7 +183,7 @@
     auto linalgOp = rewriter.create<linalg::GenericOp>(
         loc, resultTy ? resultTy : TypeRange{}, inputs, output, maps,
         getNParallelLoopsAttrs(maxRank),
-        [&](OpBuilder& nestedBuilder, Location /*nested_loc*/,
+        [&](OpBuilder &nestedBuilder, Location /*nested_loc*/,
             ValueRange args) {
           Type innerResultTy = getElementTypeOrSelf(output);
           auto argvec = llvm::to_vector<2>(args.take_front(inputs.size()));
@@ -195,18 +198,19 @@
           }
         },
         linalg::getPrunedAttributeList(op));
-    if (failed) return failure();
+    if (failed)
+      return failure();
 
     rewriter.replaceOp(op, linalgOp->getResults());
     return success();
   }
 };
-}  // namespace
+} // namespace
 
 namespace detail {
 void populatePointwiseStableHloToLinalgConversionPatterns(
-    MLIRContext* context, TypeConverter& typeConverter,
-    RewritePatternSet* patterns, bool enablePrimitiveOps) {
+    MLIRContext *context, TypeConverter &typeConverter,
+    RewritePatternSet *patterns, bool enablePrimitiveOps) {
   if (enablePrimitiveOps) {
     patterns->add<
         PointwiseToLinalgMapConverter<mlir::stablehlo::AbsOp>,
@@ -308,5 +312,5 @@
             PointwiseToLinalgConverter<mlir::stablehlo::XorOp>>(typeConverter,
                                                                 context);
 }
-}  // namespace detail
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace detail
+} // namespace mlir::iree_compiler::stablehlo

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToLinalgRandom.cpp b/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToLinalgRandom.cpp
index 204064a..4372d5f 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToLinalgRandom.cpp
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToLinalgRandom.cpp

@@ -18,7 +18,7 @@
 namespace mlir::iree_compiler::stablehlo {
 namespace {
 class ArithOpBuilder {
- public:
+public:
   ArithOpBuilder(OpBuilder b, Location l, Value v)
       : builder(b), loc(l), value(v) {}
 
@@ -38,7 +38,8 @@
   }
 
   ArithOpBuilder truncI(int64_t bits) {
-    if (value.getType().getIntOrFloatBitWidth() == bits) return *this;
+    if (value.getType().getIntOrFloatBitWidth() == bits)
+      return *this;
     Value trunc = builder.create<arith::TruncIOp>(
         loc, builder.getIntegerType(bits), value);
     return ArithOpBuilder(builder, loc, trunc);
@@ -100,7 +101,7 @@
     return ArithOpBuilder(builder, loc, shr);
   }
 
- private:
+private:
   OpBuilder builder;
   Location loc;
   Value value;
@@ -123,8 +124,9 @@
 // Implements the ThreeFry counter-based PRNG algorithm.
 // Salmon et al. SC 2011. Parallel random numbers: as easy as 1, 2, 3.
 // http://www.thesalmons.org/john/random123/papers/random123sc11.pdf
-std::pair<ArithOpBuilder, ArithOpBuilder> runThreeFry2xi32(
-    ArithOpBuilder key0, ArithOpBuilder key1, ArithOpBuilder initialState) {
+std::pair<ArithOpBuilder, ArithOpBuilder>
+runThreeFry2xi32(ArithOpBuilder key0, ArithOpBuilder key1,
+                 ArithOpBuilder initialState) {
   ArithOpBuilder index = initialState.linalgIndex(0);
   index = index.indexCast(64);
   index = index + initialState;
@@ -169,7 +171,8 @@
 std::pair<Value, Value> extractKey32(OpBuilder &builder, Location loc,
                                      Value store) {
   auto storeTy = cast<ShapedType>(store.getType());
-  if (storeTy.getRank() != 1) return {nullptr, nullptr};
+  if (storeTy.getRank() != 1)
+    return {nullptr, nullptr};
 
   Type storeETy = storeTy.getElementType();
   IntegerType i32Ty = builder.getIntegerType(32);
@@ -199,7 +202,8 @@
 // Extract and potentially reconstruct the i64 state as necessary.
 Value extractState64(OpBuilder &builder, Location loc, Value store) {
   auto storeTy = cast<ShapedType>(store.getType());
-  if (storeTy.getRank() != 1) return nullptr;
+  if (storeTy.getRank() != 1)
+    return nullptr;
 
   Type storeETy = storeTy.getElementType();
   IntegerType i64Ty = builder.getIntegerType(64);
@@ -228,7 +232,8 @@
 
 Value setState64(OpBuilder &b, Location loc, Value store, Value state) {
   auto storeTy = cast<ShapedType>(store.getType());
-  if (storeTy.getRank() != 1) return nullptr;
+  if (storeTy.getRank() != 1)
+    return nullptr;
 
   Type storeETy = storeTy.getElementType();
 
@@ -293,7 +298,8 @@
       std::max_element(shape.begin(), shape.end()) - shape.begin();
 
   for (int i = 0, s = shape.size(); i < s; i++) {
-    if (shape[i] & 0x1) continue;
+    if (shape[i] & 0x1)
+      continue;
     halfDim = i;
     break;
   }
@@ -321,10 +327,12 @@
 
   // Extract the stateful values as an i64 and increment the state ahead.
   Value initialState = extractState64(builder, loc, store);
-  if (!initialState) return failure();
+  if (!initialState)
+    return failure();
 
   std::pair<Value, Value> keys = extractKey32(builder, loc, store);
-  if (!keys.first || !keys.second) return failure();
+  if (!keys.first || !keys.second)
+    return failure();
 
   ArithOpBuilder key0(builder, loc, keys.first);
   ArithOpBuilder key1(builder, loc, keys.second);
@@ -412,10 +420,12 @@
 
   // Extract the stateful values as an i64 and increment the state ahead.
   Value initialState = extractState64(builder, loc, store);
-  if (!initialState) return failure();
+  if (!initialState)
+    return failure();
 
   std::pair<Value, Value> keys = extractKey32(builder, loc, store);
-  if (!keys.first || !keys.second) return failure();
+  if (!keys.first || !keys.second)
+    return failure();
 
   ArithOpBuilder key0(builder, loc, keys.first);
   ArithOpBuilder key1(builder, loc, keys.second);
@@ -523,10 +533,12 @@
   Type resultETy = resultTy.getElementType();
 
   Value initialState = extractState64(builder, loc, store);
-  if (!initialState) return failure();
+  if (!initialState)
+    return failure();
 
   std::pair<Value, Value> keys = extractKey32(builder, loc, store);
-  if (!keys.first || !keys.second) return failure();
+  if (!keys.first || !keys.second)
+    return failure();
 
   int64_t numElements = resultTy.getNumElements();
   int64_t count = (numElements + 3) / 4;
@@ -621,10 +633,12 @@
   Type resultETy = resultTy.getElementType();
 
   Value initialState = extractState64(builder, loc, store);
-  if (!initialState) return failure();
+  if (!initialState)
+    return failure();
 
   std::pair<Value, Value> keys = extractKey32(builder, loc, store);
-  if (!keys.first || !keys.second) return failure();
+  if (!keys.first || !keys.second)
+    return failure();
 
   int64_t numElements = resultTy.getNumElements();
   int64_t count = (numElements + 1) / 2;
@@ -744,9 +758,9 @@
     : OpConversionPattern<mlir::stablehlo::RngBitGeneratorOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::RngBitGeneratorOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::RngBitGeneratorOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Location loc = op.getLoc();
     Value state = adaptor.getInitialState();
     auto resultTy = dyn_cast_or_null<ShapedType>(
@@ -784,9 +798,9 @@
     : OpConversionPattern<mlir::stablehlo::RngOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::RngOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::RngOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // We only handle uniform distributions.
     if (op.getRngDistribution() != mlir::stablehlo::RngDistribution::UNIFORM) {
       return failure();
@@ -864,7 +878,7 @@
     return success();
   }
 };
-}  // namespace
+} // namespace
 
 namespace detail {
 void populateStableHloRandomToLinalgConversionPatterns(
@@ -873,5 +887,5 @@
   patterns->add<RngBitGeneratorConverter, RngUniformConversion>(typeConverter,
                                                                 context);
 }
-}  // namespace detail
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace detail
+} // namespace mlir::iree_compiler::stablehlo

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToLinalgReduce.cpp b/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToLinalgReduce.cpp
index 9845f20..d4a2d71 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToLinalgReduce.cpp
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/StableHLOToLinalgReduce.cpp

@@ -23,7 +23,8 @@
 static bool isUnsupported(mlir::stablehlo::ReduceOp op) {
   // Empty reductions are not supported. We expect canonicalization patterns to
   // handle them.
-  if (op.getDimensions().empty()) return true;
+  if (op.getDimensions().empty())
+    return true;
 
   // We require all reduce shapes to be the same, up to the element types, so
   // we can just the first operand and the first result as a representative.
@@ -56,17 +57,20 @@
   return inversePermutation(map);
 }
 
-SmallVector<Value, 8> getReduceOpEmptyTensorDynSizes(
-    OpBuilder &b, Location loc, Value arg, ShapedType resultType,
-    ArrayRef<int64_t> reductionDims) {
+SmallVector<Value, 8>
+getReduceOpEmptyTensorDynSizes(OpBuilder &b, Location loc, Value arg,
+                               ShapedType resultType,
+                               ArrayRef<int64_t> reductionDims) {
   llvm::SmallSetVector<int, 4> s(reductionDims.begin(), reductionDims.end());
 
   SmallVector<unsigned> parallelDims;
   SmallVector<Value, 8> dynShape;
   int rank = cast<RankedTensorType>(arg.getType()).getRank();
   for (int i = 0, j = 0; i < rank; ++i) {
-    if (s.contains(i)) continue;
-    if (!resultType.isDynamicDim(j++)) continue;
+    if (s.contains(i))
+      continue;
+    if (!resultType.isDynamicDim(j++))
+      continue;
     dynShape.push_back(b.create<tensor::DimOp>(loc, arg, i));
   }
 
@@ -77,9 +81,9 @@
     : OpConversionPattern<mlir::stablehlo::ReturnOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::ReturnOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::ReturnOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (!isInBodyOfLinalgOps(op)) {
       return failure();
     }
@@ -100,9 +104,9 @@
     : OpConversionPattern<mlir::stablehlo::ReduceOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::ReduceOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::ReduceOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (isUnsupported(op)) {
       return rewriter.notifyMatchFailure(op,
                                          "unsupported reduce (noop or empty)");
@@ -145,10 +149,10 @@
     // Prepare indexing maps for linalg generic op. The elements are for src
     // and dst. Transpose `src` to make the reduction loops be the innermost,
     // because it's easier to fully utilize processors.
-    indexingMaps.append(
-        numOperands,
-        getTransposeMapForReduction(rewriter.getContext(),
-                                    static_cast<int>(srcRank), reductionDims));
+    indexingMaps.append(numOperands,
+                        getTransposeMapForReduction(rewriter.getContext(),
+                                                    static_cast<int>(srcRank),
+                                                    reductionDims));
 
     // The indexing map of `dst` should drop the reduction loops. Since the
     // reduction loops now are all in the innermost, drops
@@ -208,9 +212,9 @@
     : OpConversionPattern<mlir::stablehlo::ReduceOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::ReduceOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::ReduceOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (isUnsupported(op)) {
       return rewriter.notifyMatchFailure(op,
                                          "unsupported reduce (noop or empty)");
@@ -317,9 +321,9 @@
     : OpConversionPattern<mlir::stablehlo::ReduceWindowOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::ReduceWindowOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::ReduceWindowOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     MLIRContext *ctx = op->getContext();
     Location loc = op.getLoc();
     llvm::SmallVector<Value> initValues = adaptor.getInitValues();
@@ -361,7 +365,8 @@
     for (int64_t i = 0; i < rank; i++) {
       AffineExpr srcExpr = mlir::getAffineDimExpr(i, ctx);
 
-      if (windowStrides[i] != 1) srcExpr = srcExpr * windowStrides[i];
+      if (windowStrides[i] != 1)
+        srcExpr = srcExpr * windowStrides[i];
 
       if (windowDimensions[i] != 1) {
         filteredWindowDims.push_back(windowDimensions[i]);
@@ -396,7 +401,8 @@
     for (uint64_t i = 0, s = initValues.size(); i < s; i++) {
       Value initValue = initValues[i];
       auto resultTy = llvm::cast<ShapedType>(resultTypes[i]);
-      if (!resultTy.hasStaticShape()) return failure();
+      if (!resultTy.hasStaticShape())
+        return failure();
 
       auto broadcastSizes = rewriter.getI64TensorAttr(resultTy.getShape());
       broadcastValues.push_back(rewriter.create<mlir::stablehlo::BroadcastOp>(
@@ -492,14 +498,17 @@
     auto returnOp =
         cast<mlir::stablehlo::ReturnOp>(op.getBody().front().getTerminator());
     Operation *computeOp = returnOp.getResults()[resultIndex].getDefiningOp();
-    if (computeOp->getNumOperands() != 2) return nullptr;
+    if (computeOp->getNumOperands() != 2)
+      return nullptr;
     auto arg0 = llvm::dyn_cast<BlockArgument>(computeOp->getOperand(0));
     auto arg1 = llvm::dyn_cast<BlockArgument>(computeOp->getOperand(1));
-    if (!arg0 || !arg1) return nullptr;
+    if (!arg0 || !arg1)
+      return nullptr;
     int64_t arg0Num = arg0.getArgNumber();
     int64_t arg1Num = arg1.getArgNumber();
     int64_t otherArgIndex = resultIndex + op.getInputs().size();
-    if (arg0Num == resultIndex && arg1Num == otherArgIndex) return computeOp;
+    if (arg0Num == resultIndex && arg1Num == otherArgIndex)
+      return computeOp;
     if (arg0Num == otherArgIndex && arg1Num == resultIndex &&
         computeOp->hasTrait<mlir::OpTrait::IsCommutative>())
       return computeOp;
@@ -540,9 +549,9 @@
     return PoolingType::kInvalid;
   }
 
-  LogicalResult matchAndRewrite(
-      mlir::stablehlo::ReduceWindowOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(mlir::stablehlo::ReduceWindowOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto loc = op.getLoc();
     int rank = llvm::cast<ShapedType>(op.getResultTypes()[0]).getRank();
     if (rank != 4 && rank != 5) {
@@ -620,7 +629,8 @@
 
       SmallVector<Value> resultDynamicDims;
       for (const auto &en : llvm::enumerate(resultType.getShape())) {
-        if (en.value() != ShapedType::kDynamic) continue;
+        if (en.value() != ShapedType::kDynamic)
+          continue;
         Value dimSize = rewriter.create<tensor::DimOp>(loc, input, en.index());
         if (en.index() == 0 || static_cast<int64_t>(en.index()) == rank - 1) {
           // batch dims and channel dims can be derived from input dims
@@ -666,38 +676,32 @@
       linalg::LinalgOp poolingOp;
       PoolingType poolingType = getPoolingType(op, result.getResultNumber());
       switch (poolingType) {
-        case PoolingType::k2DMin: {
-          poolingOp =
-              createOp(static_cast<linalg::PoolingNhwcMinOp *>(nullptr));
-          break;
-        }
-        case PoolingType::k3DMin: {
-          poolingOp =
-              createOp(static_cast<linalg::PoolingNdhwcMinOp *>(nullptr));
-          break;
-        }
-        case PoolingType::k2DMax: {
-          poolingOp =
-              createOp(static_cast<linalg::PoolingNhwcMaxOp *>(nullptr));
-          break;
-        }
-        case PoolingType::k3DMax: {
-          poolingOp =
-              createOp(static_cast<linalg::PoolingNdhwcMaxOp *>(nullptr));
-          break;
-        }
-        case PoolingType::k2DAdd: {
-          poolingOp =
-              createOp(static_cast<linalg::PoolingNhwcSumOp *>(nullptr));
-          break;
-        }
-        case PoolingType::k3DAdd: {
-          poolingOp =
-              createOp(static_cast<linalg::PoolingNdhwcSumOp *>(nullptr));
-          break;
-        }
-        case PoolingType::kInvalid:
-          return rewriter.notifyMatchFailure(op, "unknown reduction operation");
+      case PoolingType::k2DMin: {
+        poolingOp = createOp(static_cast<linalg::PoolingNhwcMinOp *>(nullptr));
+        break;
+      }
+      case PoolingType::k3DMin: {
+        poolingOp = createOp(static_cast<linalg::PoolingNdhwcMinOp *>(nullptr));
+        break;
+      }
+      case PoolingType::k2DMax: {
+        poolingOp = createOp(static_cast<linalg::PoolingNhwcMaxOp *>(nullptr));
+        break;
+      }
+      case PoolingType::k3DMax: {
+        poolingOp = createOp(static_cast<linalg::PoolingNdhwcMaxOp *>(nullptr));
+        break;
+      }
+      case PoolingType::k2DAdd: {
+        poolingOp = createOp(static_cast<linalg::PoolingNhwcSumOp *>(nullptr));
+        break;
+      }
+      case PoolingType::k3DAdd: {
+        poolingOp = createOp(static_cast<linalg::PoolingNdhwcSumOp *>(nullptr));
+        break;
+      }
+      case PoolingType::kInvalid:
+        return rewriter.notifyMatchFailure(op, "unknown reduction operation");
       }
       poolingOps.push_back(poolingOp->getResult(0));
     }
@@ -706,7 +710,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 namespace detail {
 void populateStableHloReductionToLinalgConversionPatterns(
@@ -726,5 +730,5 @@
   patterns->add<ReduceWindowOpConversion>(typeConverter, context,
                                           PatternBenefit(2));
 }
-}  // namespace detail
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace detail
+} // namespace mlir::iree_compiler::stablehlo

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/TypeConversion.cpp b/compiler/src/iree/compiler/InputConversion/StableHLO/TypeConversion.cpp
index b5b5ebc..6501fcd 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/TypeConversion.cpp
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/TypeConversion.cpp

@@ -31,7 +31,7 @@
   return shapedType;
 }
 
-std::optional<Value> materializeCastFromIllegal(OpBuilder& builder, Type type,
+std::optional<Value> materializeCastFromIllegal(OpBuilder &builder, Type type,
                                                 ValueRange inputs,
                                                 Location loc) {
   Type fromType = getElementTypeOrSelf(inputs[0].getType());
@@ -44,7 +44,7 @@
       ->getResult(0);
 }
 
-std::optional<Value> materializeCastToIllegal(OpBuilder& builder, Type type,
+std::optional<Value> materializeCastToIllegal(OpBuilder &builder, Type type,
                                               ValueRange inputs, Location loc) {
   Type fromType = getElementTypeOrSelf(inputs[0].getType());
   Type toType = getElementTypeOrSelf(type);
@@ -56,7 +56,7 @@
       ->getResult(0);
 }
 
-std::optional<Value> scalarToTensor(OpBuilder& builder, Type /*type*/,
+std::optional<Value> scalarToTensor(OpBuilder &builder, Type /*type*/,
                                     ValueRange inputs, Location loc) {
   assert(inputs.size() == 1);
   if (llvm::isa<ShapedType>(inputs.front().getType())) {
@@ -69,7 +69,7 @@
       .getResult();
 }
 
-}  // namespace
+} // namespace
 
 RemoveSignTypeConverter::RemoveSignTypeConverter() {
   addConversion([](Type type) { return type; });
@@ -86,4 +86,4 @@
   addArgumentMaterialization(scalarToTensor);
 }
 
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace mlir::iree_compiler::stablehlo

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/TypeConversion.h b/compiler/src/iree/compiler/InputConversion/StableHLO/TypeConversion.h
index 47d249b..73e0f7c 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/TypeConversion.h
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/TypeConversion.h

@@ -16,7 +16,7 @@
 // Type converter to use as part of lowerings from dialects that carry signs
 // in their types to those that are signless.
 class RemoveSignTypeConverter : public TypeConverter {
- public:
+public:
   RemoveSignTypeConverter();
 };
 
@@ -25,10 +25,10 @@
 // This is the type converter used by the test pass and is the sanctioned
 // way to use the underlying patterns.
 class LinalgTypeConverter : public RemoveSignTypeConverter {
- public:
+public:
   LinalgTypeConverter();
 };
 
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace mlir::iree_compiler::stablehlo
 
-#endif  // IREE_COMPILER_INPUTCONVERSION_STABLEHLO_TYPE_CONVERSION_H
+#endif // IREE_COMPILER_INPUTCONVERSION_STABLEHLO_TYPE_CONVERSION_H

diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/VerifyCompilerInputLegality.cpp b/compiler/src/iree/compiler/InputConversion/StableHLO/VerifyCompilerInputLegality.cpp
index 5491593..05f5902 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/VerifyCompilerInputLegality.cpp
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/VerifyCompilerInputLegality.cpp

@@ -67,5 +67,5 @@
   }
 };
 
-}  // namespace
-}  // namespace mlir::iree_compiler::stablehlo
+} // namespace
+} // namespace mlir::iree_compiler::stablehlo

diff --git a/compiler/src/iree/compiler/InputConversion/TMTensor/ConvertTMTensorToLinalgExt.cpp b/compiler/src/iree/compiler/InputConversion/TMTensor/ConvertTMTensorToLinalgExt.cpp
index acbf547..14a642a 100644
--- a/compiler/src/iree/compiler/InputConversion/TMTensor/ConvertTMTensorToLinalgExt.cpp
+++ b/compiler/src/iree/compiler/InputConversion/TMTensor/ConvertTMTensorToLinalgExt.cpp

@@ -47,7 +47,8 @@
   LogicalResult matchAndRewrite(mlir::torch::TMTensor::ScatterOp op,
                                 PatternRewriter &rewriter) const override {
     auto indicesTy = op.getIndicesType();
-    if (!indicesTy.hasRank()) return failure();
+    if (!indicesTy.hasRank())
+      return failure();
 
     if (indicesTy.isDynamicDim(indicesTy.getRank() - 1)) {
       return rewriter.notifyMatchFailure(op, "number of indices is unknown");
@@ -55,7 +56,8 @@
 
     auto numIndices = indicesTy.getShape().back();
     llvm::SmallVector<int64_t> dimMap(numIndices);
-    for (int i = 0; i < numIndices; i++) dimMap[i] = i;
+    for (int i = 0; i < numIndices; i++)
+      dimMap[i] = i;
 
     auto scatterOp = rewriter.create<IREE::LinalgExt::ScatterOp>(
         op.getLoc(), op->getResultTypes(), op.getInputs(), op.getOutputs(),
@@ -67,7 +69,7 @@
     return success();
   }
 };
-}  // namespace
+} // namespace
 
 static Value collapseBatches(PatternRewriter &rewriter, Location loc,
                              Value val) {
@@ -82,7 +84,8 @@
 
   auto rank = valSizes.size();
   SmallVector<int64_t> collapsed;
-  for (auto i = 0; i < rank - 2; i++) collapsed.push_back(i);
+  for (auto i = 0; i < rank - 2; i++)
+    collapsed.push_back(i);
 
   SmallVector<ReassociationIndices> reassociation(3);
   reassociation[0].append(collapsed);
@@ -103,7 +106,8 @@
   Type newType = RankedTensorType::get(newSizes, elementType);
 
   SmallVector<ReassociationIndices> reassociation(3);
-  for (auto i = 0; i < batchSizes.size(); i++) reassociation[0].push_back(i);
+  for (auto i = 0; i < batchSizes.size(); i++)
+    reassociation[0].push_back(i);
   reassociation[1].push_back(rank - 2);
   reassociation[2].push_back(rank - 1);
 
@@ -174,9 +178,9 @@
     RewritePatternSet patterns(context);
     ConversionTarget target(*context);
 
-#define INSERT_TMTENSOR_CONVERSION_PATTERN(Op)                               \
-  patterns.add<                                                              \
-      TMTensorOpConversion<mlir::torch::TMTensor::Op, IREE::LinalgExt::Op>>( \
+#define INSERT_TMTENSOR_CONVERSION_PATTERN(Op)                                 \
+  patterns.add<                                                                \
+      TMTensorOpConversion<mlir::torch::TMTensor::Op, IREE::LinalgExt::Op>>(   \
       context);
 
     INSERT_TMTENSOR_CONVERSION_PATTERN(YieldOp);
@@ -194,13 +198,13 @@
     }
   }
 };
-}  // namespace
+} // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>>
 createConvertTMTensorToLinalgExtPass() {
   return std::make_unique<ConvertTMTensorToLinalgExtPass>();
 }
 
-}  // namespace TMTensor
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace TMTensor
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/InputConversion/TMTensor/PassDetail.h b/compiler/src/iree/compiler/InputConversion/TMTensor/PassDetail.h
index 9a1f982..ec7e226 100644
--- a/compiler/src/iree/compiler/InputConversion/TMTensor/PassDetail.h
+++ b/compiler/src/iree/compiler/InputConversion/TMTensor/PassDetail.h

@@ -17,8 +17,8 @@
 #define GEN_PASS_CLASSES
 #include "iree/compiler/InputConversion/TMTensor/Passes.h.inc"
 
-}  // namespace TMTensor
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace TMTensor
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_INPUTCONVERSION_TMTENSOR_PASSDETAIL_H_
+#endif // IREE_COMPILER_INPUTCONVERSION_TMTENSOR_PASSDETAIL_H_

diff --git a/compiler/src/iree/compiler/InputConversion/TMTensor/Passes.cpp b/compiler/src/iree/compiler/InputConversion/TMTensor/Passes.cpp
index 9dd2361..13d23fa 100644
--- a/compiler/src/iree/compiler/InputConversion/TMTensor/Passes.cpp
+++ b/compiler/src/iree/compiler/InputConversion/TMTensor/Passes.cpp

@@ -12,14 +12,14 @@
 
 namespace {
 #define GEN_PASS_REGISTRATION
-#include "iree/compiler/InputConversion/TMTensor/Passes.h.inc"  // IWYU pragma: export
-}  // namespace
+#include "iree/compiler/InputConversion/TMTensor/Passes.h.inc" // IWYU pragma: export
+} // namespace
 
 void registerTMTensorConversionPasses() {
   // Generated.
   registerPasses();
 }
 
-}  // namespace TMTensor
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace TMTensor
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/InputConversion/TMTensor/Passes.h b/compiler/src/iree/compiler/InputConversion/TMTensor/Passes.h
index dfd55a6..e471676 100644
--- a/compiler/src/iree/compiler/InputConversion/TMTensor/Passes.h
+++ b/compiler/src/iree/compiler/InputConversion/TMTensor/Passes.h

@@ -23,8 +23,8 @@
 
 void registerTMTensorConversionPasses();
 
-}  // namespace TMTensor
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace TMTensor
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_INPUTCONVERSION_TMTENSOR_PASSES_H_
+#endif // IREE_COMPILER_INPUTCONVERSION_TMTENSOR_PASSES_H_

diff --git a/compiler/src/iree/compiler/InputConversion/TOSA/PassDetail.h b/compiler/src/iree/compiler/InputConversion/TOSA/PassDetail.h
index b519e7d..8987600 100644
--- a/compiler/src/iree/compiler/InputConversion/TOSA/PassDetail.h
+++ b/compiler/src/iree/compiler/InputConversion/TOSA/PassDetail.h

@@ -21,7 +21,7 @@
 #define GEN_PASS_CLASSES
 #include "iree/compiler/InputConversion/TOSA/Passes.h.inc"
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_INPUTCONVERSION_TOSA_PASSDETAIL_H_
+#endif // IREE_COMPILER_INPUTCONVERSION_TOSA_PASSDETAIL_H_

diff --git a/compiler/src/iree/compiler/InputConversion/TOSA/Passes.cpp b/compiler/src/iree/compiler/InputConversion/TOSA/Passes.cpp
index 07a1577..0d6da19 100644
--- a/compiler/src/iree/compiler/InputConversion/TOSA/Passes.cpp
+++ b/compiler/src/iree/compiler/InputConversion/TOSA/Passes.cpp

@@ -74,8 +74,8 @@
 
 namespace {
 #define GEN_PASS_REGISTRATION
-#include "iree/compiler/InputConversion/TOSA/Passes.h.inc"  // IWYU pragma: export
-}  // namespace
+#include "iree/compiler/InputConversion/TOSA/Passes.h.inc" // IWYU pragma: export
+} // namespace
 
 void registerTOSAConversionPasses() {
   // Generated.
@@ -85,5 +85,5 @@
   registerTOSAConversionPassPipeline();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/InputConversion/TOSA/Passes.h b/compiler/src/iree/compiler/InputConversion/TOSA/Passes.h
index 7a74dc1..bcb2cd9 100644
--- a/compiler/src/iree/compiler/InputConversion/TOSA/Passes.h
+++ b/compiler/src/iree/compiler/InputConversion/TOSA/Passes.h

@@ -44,7 +44,7 @@
 
 void registerTOSAConversionPasses();
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_INPUTCONVERSION_TOSA_PASSES_H_
+#endif // IREE_COMPILER_INPUTCONVERSION_TOSA_PASSES_H_

diff --git a/compiler/src/iree/compiler/InputConversion/TOSA/TosaToLinalgExt.cpp b/compiler/src/iree/compiler/InputConversion/TOSA/TosaToLinalgExt.cpp
index 0ed05e7..9a1c400 100644
--- a/compiler/src/iree/compiler/InputConversion/TOSA/TosaToLinalgExt.cpp
+++ b/compiler/src/iree/compiler/InputConversion/TOSA/TosaToLinalgExt.cpp

@@ -32,7 +32,7 @@
 // LinalgExt version is not batched therefore we materialize the batch index
 // for each update.
 class ScatterConversion : public OpRewritePattern<tosa::ScatterOp> {
- public:
+public:
   using OpRewritePattern<tosa::ScatterOp>::OpRewritePattern;
 
   LogicalResult matchAndRewrite(tosa::ScatterOp op,
@@ -174,5 +174,5 @@
   return std::make_unique<TosaToLinalgExtPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/InputConversion/TOSA/VerifyCompilerTOSAInputLegality.cpp b/compiler/src/iree/compiler/InputConversion/TOSA/VerifyCompilerTOSAInputLegality.cpp
index aa063ae..719c015 100644
--- a/compiler/src/iree/compiler/InputConversion/TOSA/VerifyCompilerTOSAInputLegality.cpp
+++ b/compiler/src/iree/compiler/InputConversion/TOSA/VerifyCompilerTOSAInputLegality.cpp

@@ -69,5 +69,5 @@
   return std::make_unique<VerifyCompilerTOSAInputLegalityPass>();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Modules/Check/Conversion/ConversionPatterns.cpp b/compiler/src/iree/compiler/Modules/Check/Conversion/ConversionPatterns.cpp
index 3036f5f..82da66b 100644
--- a/compiler/src/iree/compiler/Modules/Check/Conversion/ConversionPatterns.cpp
+++ b/compiler/src/iree/compiler/Modules/Check/Conversion/ConversionPatterns.cpp

@@ -23,9 +23,9 @@
 template <typename T, typename Adaptor = typename T::Adaptor>
 struct OptionalCheckImportConversion : public VMImportOpConversion<T, Adaptor> {
   using VMImportOpConversion<T, Adaptor>::VMImportOpConversion;
-  LogicalResult matchAndRewrite(
-      T op, typename T::Adaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(T op, typename T::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto hasImport = rewriter.create<IREE::VM::ImportResolvedOp>(
         op.getLoc(), rewriter.getI32Type(), this->importOp.getName());
     auto *followingBlock = rewriter.splitBlock(rewriter.getInsertionBlock(),
@@ -37,7 +37,8 @@
     rewriter.setInsertionPointToStart(callBlock);
     auto results = rewriteToCall(op, adaptor, this->importOp,
                                  *this->getTypeConverter(), rewriter);
-    if (!results.has_value()) return failure();
+    if (!results.has_value())
+      return failure();
     rewriter.replaceOp(op, results.value());
     rewriter.create<IREE::VM::BranchOp>(op.getLoc(), followingBlock);
     return success();
@@ -72,7 +73,7 @@
                                                               typeConverter);
 }
 
-}  // namespace Check
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Check
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Modules/Check/Conversion/ConversionPatterns.h b/compiler/src/iree/compiler/Modules/Check/Conversion/ConversionPatterns.h
index a34472c..a08327c 100644
--- a/compiler/src/iree/compiler/Modules/Check/Conversion/ConversionPatterns.h
+++ b/compiler/src/iree/compiler/Modules/Check/Conversion/ConversionPatterns.h

@@ -26,9 +26,9 @@
                                 RewritePatternSet &patterns,
                                 TypeConverter &typeConverter);
 
-}  // namespace Check
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Check
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_MODULES_CHECK_CONVERSION_CONVERSION_PATTERNS_H_
+#endif // IREE_COMPILER_MODULES_CHECK_CONVERSION_CONVERSION_PATTERNS_H_

diff --git a/compiler/src/iree/compiler/Modules/Check/IR/CheckDialect.cpp b/compiler/src/iree/compiler/Modules/Check/IR/CheckDialect.cpp
index 244e65d..dbdb4e1 100644
--- a/compiler/src/iree/compiler/Modules/Check/IR/CheckDialect.cpp
+++ b/compiler/src/iree/compiler/Modules/Check/IR/CheckDialect.cpp

@@ -22,7 +22,7 @@
 
 namespace {
 class CheckToVmConversionInterface : public VMConversionDialectInterface {
- public:
+public:
   using VMConversionDialectInterface::VMConversionDialectInterface;
 
   OwningOpRef<mlir::ModuleOp> parseVMImportModule() const override {
@@ -32,17 +32,18 @@
         getDialect()->getContext());
   }
 
-  void populateVMConversionPatterns(
-      SymbolTable &importSymbols, RewritePatternSet &patterns,
-      ConversionTarget &conversionTarget,
-      TypeConverter &typeConverter) const override {
+  void
+  populateVMConversionPatterns(SymbolTable &importSymbols,
+                               RewritePatternSet &patterns,
+                               ConversionTarget &conversionTarget,
+                               TypeConverter &typeConverter) const override {
     populateCheckToVMPatterns(getDialect()->getContext(), importSymbols,
                               patterns, typeConverter);
   }
 };
 
 class CheckToHalConversionInterface : public HALConversionDialectInterface {
- public:
+public:
   using HALConversionDialectInterface::HALConversionDialectInterface;
 
   void setupConversionTarget(ConversionTarget &target,
@@ -52,7 +53,7 @@
                                typeConverter);
   }
 };
-}  // namespace
+} // namespace
 
 CheckDialect::CheckDialect(MLIRContext *context)
     : Dialect(getDialectNamespace(), context, TypeID::get<CheckDialect>()) {
@@ -64,7 +65,7 @@
       >();
 }
 
-}  // namespace Check
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Check
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Modules/Check/IR/CheckDialect.h b/compiler/src/iree/compiler/Modules/Check/IR/CheckDialect.h
index 8496c87..eb1b46c 100644
--- a/compiler/src/iree/compiler/Modules/Check/IR/CheckDialect.h
+++ b/compiler/src/iree/compiler/Modules/Check/IR/CheckDialect.h

@@ -15,14 +15,14 @@
 namespace Check {
 
 class CheckDialect : public Dialect {
- public:
+public:
   explicit CheckDialect(MLIRContext *context);
   static StringRef getDialectNamespace() { return "check"; }
 };
 
-}  // namespace Check
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Check
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_MODULES_CHECK_IR_CHECK_DIALECT_H_
+#endif // IREE_COMPILER_MODULES_CHECK_IR_CHECK_DIALECT_H_

diff --git a/compiler/src/iree/compiler/Modules/Check/IR/CheckOps.cpp b/compiler/src/iree/compiler/Modules/Check/IR/CheckOps.cpp
index 17a7e2c..a651bfe 100644
--- a/compiler/src/iree/compiler/Modules/Check/IR/CheckOps.cpp
+++ b/compiler/src/iree/compiler/Modules/Check/IR/CheckOps.cpp

@@ -28,7 +28,7 @@
     return success();
   }
 };
-}  // namespace
+} // namespace
 
 void ExpectEqConstOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                   MLIRContext *context) {
@@ -42,10 +42,10 @@
           context);
 }
 
-}  // namespace Check
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Check
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
 #define GET_OP_CLASSES
 #include "iree/compiler/Modules/Check/IR/CheckOps.cpp.inc"

diff --git a/compiler/src/iree/compiler/Modules/Check/IR/CheckOps.h b/compiler/src/iree/compiler/Modules/Check/IR/CheckOps.h
index b3844c2..940159a 100644
--- a/compiler/src/iree/compiler/Modules/Check/IR/CheckOps.h
+++ b/compiler/src/iree/compiler/Modules/Check/IR/CheckOps.h

@@ -11,6 +11,6 @@
 #include "mlir/IR/OpDefinition.h"
 
 #define GET_OP_CLASSES
-#include "iree/compiler/Modules/Check/IR/CheckOps.h.inc"  // IWYU pragma: export
+#include "iree/compiler/Modules/Check/IR/CheckOps.h.inc" // IWYU pragma: export
 
-#endif  // IREE_COMPILER_MODULES_CHECK_IR_CHECK_OPS_H_
+#endif // IREE_COMPILER_MODULES_CHECK_IR_CHECK_OPS_H_

diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/HALInlineToVM/Patterns.cpp b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/HALInlineToVM/Patterns.cpp
index 838b28d..804effa 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/HALInlineToVM/Patterns.cpp
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/HALInlineToVM/Patterns.cpp

@@ -67,5 +67,5 @@
       context, importSymbols, typeConverter, "hal_inline.device.query.i64");
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/HALInlineToVM/Patterns.h b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/HALInlineToVM/Patterns.h
index 1d56a95..481ec1c 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/HALInlineToVM/Patterns.h
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/HALInlineToVM/Patterns.h

@@ -21,7 +21,7 @@
                                    SymbolTable &importSymbols,
                                    RewritePatternSet &patterns);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_MODULES_HAL_INLINE_CONVERSION_HALINLINETOVM_PATTERNS_H_
+#endif // IREE_COMPILER_MODULES_HAL_INLINE_CONVERSION_HALINLINETOVM_PATTERNS_H_

diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/HALToHALInline/Patterns.cpp b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/HALToHALInline/Patterns.cpp
index fb481c5..7137202 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/HALToHALInline/Patterns.cpp
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/HALToHALInline/Patterns.cpp

@@ -24,9 +24,9 @@
 struct BufferSubspanOpPattern
     : public OpConversionPattern<IREE::HAL::BufferSubspanOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::BufferSubspanOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::BufferSubspanOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto bufferType = getTypeConverter()->convertType(op.getResult().getType());
     rewriter.replaceOpWithNewOp<IREE::HAL::Inline::BufferSubspanOp>(
         op, bufferType, adaptor.getSourceBuffer(), adaptor.getSourceOffset(),
@@ -38,9 +38,9 @@
 struct BufferLengthOpPattern
     : public OpConversionPattern<IREE::HAL::BufferLengthOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::BufferLengthOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::BufferLengthOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto sizeType = getTypeConverter()->convertType(op.getResult().getType());
     rewriter.replaceOpWithNewOp<IREE::HAL::Inline::BufferLengthOp>(
         op, sizeType, adaptor.getBuffer());
@@ -51,9 +51,9 @@
 struct BufferLoadOpPattern
     : public OpConversionPattern<IREE::HAL::BufferLoadOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::BufferLoadOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::BufferLoadOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Value storageBuffer =
         rewriter.createOrFold<IREE::HAL::Inline::BufferStorageOp>(
             op.getLoc(), adaptor.getSourceBuffer());
@@ -72,9 +72,9 @@
 struct BufferStoreOpPattern
     : public OpConversionPattern<IREE::HAL::BufferStoreOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::BufferStoreOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::BufferStoreOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Value storageBuffer =
         rewriter.createOrFold<IREE::HAL::Inline::BufferStorageOp>(
             op.getLoc(), adaptor.getTargetBuffer());
@@ -92,9 +92,9 @@
 struct BufferViewCreateOpPattern
     : public OpConversionPattern<IREE::HAL::BufferViewCreateOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::BufferViewCreateOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::BufferViewCreateOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<IREE::HAL::Inline::BufferViewCreateOp>(
         op, adaptor.getSourceBuffer(), adaptor.getSourceOffset(),
         adaptor.getSourceLength(), adaptor.getElementType(),
@@ -106,9 +106,9 @@
 struct BufferViewBufferOpPattern
     : public OpConversionPattern<IREE::HAL::BufferViewBufferOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::BufferViewBufferOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::BufferViewBufferOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<IREE::HAL::Inline::BufferViewBufferOp>(
         op, rewriter.getType<IREE::HAL::BufferType>(), adaptor.getBufferView());
     return success();
@@ -118,9 +118,9 @@
 struct BufferViewAssertOpPattern
     : public OpConversionPattern<IREE::HAL::BufferViewAssertOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::BufferViewAssertOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::BufferViewAssertOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<IREE::HAL::Inline::BufferViewAssertOp>(
         op, adaptor.getBufferView(), adaptor.getMessage(),
         adaptor.getElementType(), adaptor.getEncodingType(),
@@ -132,9 +132,9 @@
 struct BufferViewElementTypeOpPattern
     : public OpConversionPattern<IREE::HAL::BufferViewElementTypeOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::BufferViewElementTypeOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::BufferViewElementTypeOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<IREE::HAL::Inline::BufferViewElementTypeOp>(
         op, op.getResult().getType(), adaptor.getBufferView());
     return success();
@@ -144,9 +144,9 @@
 struct BufferViewEncodingTypeOpPattern
     : public OpConversionPattern<IREE::HAL::BufferViewEncodingTypeOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::BufferViewEncodingTypeOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::BufferViewEncodingTypeOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<IREE::HAL::Inline::BufferViewEncodingTypeOp>(
         op, op.getResult().getType(), adaptor.getBufferView());
     return success();
@@ -156,9 +156,9 @@
 struct BufferViewRankOpPattern
     : public OpConversionPattern<IREE::HAL::BufferViewRankOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::BufferViewRankOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::BufferViewRankOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<IREE::HAL::Inline::BufferViewRankOp>(
         op, op.getResult().getType(), adaptor.getBufferView());
     return success();
@@ -168,9 +168,9 @@
 struct BufferViewDimOpPattern
     : public OpConversionPattern<IREE::HAL::BufferViewDimOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::BufferViewDimOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::BufferViewDimOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<IREE::HAL::Inline::BufferViewDimOp>(
         op, op.getResult().getType(), adaptor.getBufferView(),
         adaptor.getIndexAttr());
@@ -181,16 +181,16 @@
 struct BufferViewTraceOpPattern
     : public OpConversionPattern<IREE::HAL::BufferViewTraceOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::BufferViewTraceOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::BufferViewTraceOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<IREE::HAL::Inline::BufferViewTraceOp>(
         op, adaptor.getKeyAttr(), adaptor.getOperands());
     return success();
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateHALToHALInlinePatterns(MLIRContext *context,
                                     ConversionTarget &conversionTarget,
@@ -229,5 +229,5 @@
   patterns.insert<BufferViewTraceOpPattern>(typeConverter, context);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/HALToHALInline/Patterns.h b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/HALToHALInline/Patterns.h
index aa8d21a..856d852 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/HALToHALInline/Patterns.h
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/HALToHALInline/Patterns.h

@@ -19,7 +19,7 @@
                                     TypeConverter &typeConverter,
                                     RewritePatternSet &patterns);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_MODULES_HAL_INLINE_CONVERSION_HALTOHALINLINE_PATTERNS_H_
+#endif // IREE_COMPILER_MODULES_HAL_INLINE_CONVERSION_HALTOHALINLINE_PATTERNS_H_

diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/Patterns.cpp b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/Patterns.cpp
index 0c27cd5..2422688 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/Patterns.cpp
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/Patterns.cpp

@@ -60,9 +60,9 @@
 struct ResourceAllocOpPattern
     : public OpConversionPattern<IREE::Stream::ResourceAllocOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::ResourceAllocOp allocOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::ResourceAllocOp allocOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto deviceBufferType = rewriter.getType<IREE::HAL::BufferType>();
     auto hostBufferType = rewriter.getType<IREE::Util::BufferType>();
 
@@ -87,9 +87,9 @@
 struct ResourceAllocaOpPattern
     : public OpConversionPattern<IREE::Stream::ResourceAllocaOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::ResourceAllocaOp allocaOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::ResourceAllocaOp allocaOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto deviceBufferType = rewriter.getType<IREE::HAL::BufferType>();
     auto hostBufferType = rewriter.getType<IREE::Util::BufferType>();
 
@@ -112,9 +112,10 @@
 struct ResourceDeallocaOpPattern
     : public OpConversionPattern<IREE::Stream::ResourceDeallocaOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::ResourceDeallocaOp deallocaOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::ResourceDeallocaOp deallocaOp,
+                  OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // TODO(benvanik): discard op?
     auto resolvedTimepoint =
         rewriter.create<arith::ConstantIntOp>(deallocaOp.getLoc(), 0, 64)
@@ -127,9 +128,9 @@
 struct ResourceSizeOpPattern
     : public OpConversionPattern<IREE::Stream::ResourceSizeOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::ResourceSizeOp sizeOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::ResourceSizeOp sizeOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOp(sizeOp, getResourceSize(sizeOp.getLoc(),
                                                adaptor.getOperand(), rewriter));
     return success();
@@ -142,9 +143,9 @@
 struct ResourceMapOpPattern
     : public OpConversionPattern<IREE::Stream::ResourceMapOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::ResourceMapOp mapOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::ResourceMapOp mapOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<IREE::Util::BufferSubspanOp>(
         mapOp, adaptor.getSource(),
         getResourceSize(mapOp.getLoc(), adaptor.getSource(), rewriter),
@@ -159,9 +160,9 @@
 struct ResourceTryMapOpPattern
     : public OpConversionPattern<IREE::Stream::ResourceTryMapOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::ResourceTryMapOp tryMapOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::ResourceTryMapOp tryMapOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     Value subspan = rewriter.create<IREE::Util::BufferSubspanOp>(
         tryMapOp.getLoc(), adaptor.getSource(),
         getResourceSize(tryMapOp.getLoc(), adaptor.getSource(), rewriter),
@@ -176,9 +177,9 @@
 struct ResourceLoadOpPattern
     : public OpConversionPattern<IREE::Stream::ResourceLoadOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::ResourceLoadOp loadOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::ResourceLoadOp loadOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto loc = loadOp.getLoc();
     auto storage = getResourceStorage(loc, adaptor.getSource(),
                                       adaptor.getSourceSize(), rewriter);
@@ -196,9 +197,9 @@
 struct ResourceStoreOpPattern
     : public OpConversionPattern<IREE::Stream::ResourceStoreOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::ResourceStoreOp storeOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::ResourceStoreOp storeOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto loc = storeOp.getLoc();
     auto storage = getResourceStorage(loc, adaptor.getTarget(),
                                       adaptor.getTargetSize(), rewriter);
@@ -214,9 +215,9 @@
 struct ResourceSubviewOpPattern
     : public OpConversionPattern<IREE::Stream::ResourceSubviewOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::ResourceSubviewOp subviewOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::ResourceSubviewOp subviewOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (llvm::isa<IREE::HAL::BufferType>(adaptor.getSource().getType())) {
       auto bufferType = rewriter.getType<IREE::HAL::BufferType>();
       // NOTE: this aliases! We assume at this point all useful alias analysis
@@ -236,9 +237,9 @@
 struct TensorImportBufferOpPattern
     : public OpConversionPattern<IREE::Stream::TensorImportOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::TensorImportOp importOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::TensorImportOp importOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (!llvm::isa<IREE::HAL::BufferType>(importOp.getSource().getType())) {
       return failure();
     }
@@ -253,9 +254,9 @@
 struct TensorImportBufferViewOpPattern
     : public OpConversionPattern<IREE::Stream::TensorImportOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::TensorImportOp importOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::TensorImportOp importOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto sourceType = importOp.getSource().getType();
     if (!llvm::isa<IREE::HAL::BufferViewType>(sourceType) &&
         !llvm::isa<TensorType>(sourceType)) {
@@ -273,9 +274,9 @@
 struct TensorExportBufferOpPattern
     : public OpConversionPattern<IREE::Stream::TensorExportOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::TensorExportOp exportOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::TensorExportOp exportOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     if (!llvm::isa<IREE::HAL::BufferType>(exportOp.getResult().getType())) {
       return failure();
     }
@@ -287,9 +288,9 @@
 struct TensorExportBufferViewOpPattern
     : public OpConversionPattern<IREE::Stream::TensorExportOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::TensorExportOp exportOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::TensorExportOp exportOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto targetType = exportOp.getResult().getType();
     if (!llvm::isa<IREE::HAL::BufferViewType>(targetType) &&
         !llvm::isa<TensorType>(targetType)) {
@@ -333,9 +334,9 @@
 struct TensorTraceOpPattern
     : public OpConversionPattern<IREE::Stream::TensorTraceOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::TensorTraceOp traceOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::TensorTraceOp traceOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<IREE::HAL::Inline::BufferViewTraceOp>(
         traceOp, traceOp.getKeyAttr(), adaptor.getOperands());
     return success();
@@ -345,9 +346,9 @@
 struct CmdFlushOpPattern
     : public OpConversionPattern<IREE::Stream::CmdFlushOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::CmdFlushOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::CmdFlushOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.eraseOp(op);
     return success();
   }
@@ -356,9 +357,9 @@
 struct CmdInvalidateOpPattern
     : public OpConversionPattern<IREE::Stream::CmdInvalidateOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::CmdInvalidateOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::CmdInvalidateOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.eraseOp(op);
     return success();
   }
@@ -367,9 +368,9 @@
 struct CmdDiscardOpPattern
     : public OpConversionPattern<IREE::Stream::CmdDiscardOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::CmdDiscardOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::CmdDiscardOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.eraseOp(op);
     return success();
   }
@@ -377,9 +378,9 @@
 
 struct CmdFillOpPattern : public OpConversionPattern<IREE::Stream::CmdFillOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::CmdFillOp fillOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::CmdFillOp fillOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto loc = fillOp.getLoc();
     auto storage = getResourceStorage(loc, adaptor.getTarget(),
                                       adaptor.getTargetSize(), rewriter);
@@ -392,9 +393,9 @@
 
 struct CmdCopyOpPattern : public OpConversionPattern<IREE::Stream::CmdCopyOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::CmdCopyOp copyOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::CmdCopyOp copyOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto loc = copyOp.getLoc();
     auto sourceStorage = getResourceStorage(loc, adaptor.getSource(),
                                             adaptor.getSourceSize(), rewriter);
@@ -412,17 +413,16 @@
 struct CmdDispatchOpPattern
     : public OpConversionPattern<IREE::Stream::CmdDispatchOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::CmdDispatchOp dispatchOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::CmdDispatchOp dispatchOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto loc = dispatchOp.getLoc();
 
     auto callee = dispatchOp->getAttrOfType<SymbolRefAttr>("hal_inline.target");
     if (!callee) {
       return rewriter.notifyMatchFailure(
-          dispatchOp,
-          "missing hal_inline.target annotation from the "
-          "--iree-hal-inline-executables pass");
+          dispatchOp, "missing hal_inline.target annotation from the "
+                      "--iree-hal-inline-executables pass");
     }
 
     // The InlineExecutables pass has already done the hard work here; we just
@@ -451,9 +451,9 @@
 
 struct CmdFuncOpPattern : public OpConversionPattern<IREE::Stream::CmdFuncOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::CmdFuncOp funcOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::CmdFuncOp funcOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     SmallVector<Type> newArgTypes;
     SmallVector<Type> newResultTypes;
     if (failed(getTypeConverter()->convertTypes(funcOp.getArgumentTypes(),
@@ -474,9 +474,9 @@
 
 struct CmdCallOpPattern : public OpConversionPattern<IREE::Stream::CmdCallOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::CmdCallOp callOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::CmdCallOp callOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     SmallVector<Value> operands;
     size_t resourceIndex = 0;
     for (auto [originalOperand, convertedOperand] : llvm::zip_equal(
@@ -516,9 +516,9 @@
 struct CmdExecuteOpPattern
     : public OpConversionPattern<IREE::Stream::CmdExecuteOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::CmdExecuteOp executeOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::CmdExecuteOp executeOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Inline the serial execution region.
     rewriter.inlineBlockBefore(&executeOp.getBody().front(), executeOp,
                                adaptor.getResourceOperands());
@@ -534,9 +534,9 @@
 struct CmdSerialOpPattern
     : public OpConversionPattern<IREE::Stream::CmdSerialOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::CmdSerialOp serialOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::CmdSerialOp serialOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Inline the serial execution region.
     rewriter.inlineBlockBefore(&serialOp.getBody().front(), serialOp);
     rewriter.eraseOp(serialOp);
@@ -547,9 +547,9 @@
 struct CmdConcurrentOpPattern
     : public OpConversionPattern<IREE::Stream::CmdConcurrentOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::CmdConcurrentOp concurrentOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::CmdConcurrentOp concurrentOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Inline the concurrent execution region.
     rewriter.inlineBlockBefore(&concurrentOp.getBody().front(), concurrentOp);
     rewriter.eraseOp(concurrentOp);
@@ -562,11 +562,12 @@
 struct GlobalTimepointConversionPattern
     : public OpConversionPattern<IREE::Util::GlobalOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Util::GlobalOp op, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Util::GlobalOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto initialValue = op.getInitialValue();
-    if (!initialValue.has_value()) return failure();
+    if (!initialValue.has_value())
+      return failure();
     if (!llvm::isa<IREE::Stream::TimepointAttr>(*initialValue))
       return failure();
     rewriter.updateRootInPlace(
@@ -578,9 +579,10 @@
 struct TimepointImmediateOpPattern
     : public OpConversionPattern<IREE::Stream::TimepointImmediateOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::TimepointImmediateOp immediateOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::TimepointImmediateOp immediateOp,
+                  OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<arith::ConstantIntOp>(immediateOp, 0, 64);
     return success();
   }
@@ -589,9 +591,9 @@
 struct TimepointImportOpPattern
     : public OpConversionPattern<IREE::Stream::TimepointImportOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::TimepointImportOp importOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::TimepointImportOp importOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     return rewriter.notifyMatchFailure(
         importOp,
         "timepoints are not supported across the ABI with inline execution");
@@ -601,9 +603,9 @@
 struct TimepointExportOpPattern
     : public OpConversionPattern<IREE::Stream::TimepointExportOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::TimepointExportOp exportOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::TimepointExportOp exportOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     return rewriter.notifyMatchFailure(
         exportOp,
         "timepoints are not supported across the ABI with inline execution");
@@ -613,9 +615,10 @@
 struct TimepointChainExternalOpPattern
     : public OpConversionPattern<IREE::Stream::TimepointChainExternalOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::TimepointChainExternalOp exportOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::TimepointChainExternalOp exportOp,
+                  OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     return rewriter.notifyMatchFailure(
         exportOp,
         "timepoints are not supported across the ABI with inline execution");
@@ -625,9 +628,9 @@
 struct TimepointJoinOpPattern
     : public OpConversionPattern<IREE::Stream::TimepointJoinOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::TimepointJoinOp joinOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::TimepointJoinOp joinOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<arith::ConstantIntOp>(joinOp, 0, 64);
     return success();
   }
@@ -636,9 +639,9 @@
 struct TimepointBarrierOpPattern
     : public OpConversionPattern<IREE::Stream::TimepointBarrierOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::TimepointBarrierOp barrierOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::TimepointBarrierOp barrierOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOp(barrierOp, {
                                       adaptor.getResource(),
                                       rewriter.create<arith::ConstantIntOp>(
@@ -651,9 +654,9 @@
 struct TimepointAwaitOpPattern
     : public OpConversionPattern<IREE::Stream::TimepointAwaitOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::TimepointAwaitOp awaitOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::TimepointAwaitOp awaitOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOp(awaitOp, adaptor.getResourceOperands());
     return success();
   }
@@ -661,15 +664,15 @@
 
 struct ElideYieldOpPattern : public OpConversionPattern<IREE::Stream::YieldOp> {
   using OpConversionPattern::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      IREE::Stream::YieldOp yieldOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::YieldOp yieldOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     rewriter.eraseOp(yieldOp);
     return success();
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateStreamToHALInlinePatterns(MLIRContext *context,
                                        ConversionTarget &conversionTarget,
@@ -724,5 +727,5 @@
   patterns.insert<ElideYieldOpPattern>(typeConverter, context);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/Patterns.h b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/Patterns.h
index 3787744..3d634ab 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/Patterns.h
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/Patterns.h

@@ -19,7 +19,7 @@
                                        TypeConverter &typeConverter,
                                        RewritePatternSet &patterns);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_MODULES_HAL_INLINE_CONVERSION_STREAMTOHALINLINE_PATTERNS_H_
+#endif // IREE_COMPILER_MODULES_HAL_INLINE_CONVERSION_STREAMTOHALINLINE_PATTERNS_H_

diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/IR/HALInlineDialect.cpp b/compiler/src/iree/compiler/Modules/HAL/Inline/IR/HALInlineDialect.cpp
index f82c5d5..ae98125 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/IR/HALInlineDialect.cpp
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/IR/HALInlineDialect.cpp

@@ -24,7 +24,7 @@
 namespace {
 
 class HALInlineToVMConversionInterface : public VMConversionDialectInterface {
- public:
+public:
   using VMConversionDialectInterface::VMConversionDialectInterface;
 
   OwningOpRef<mlir::ModuleOp> parseVMImportModule() const override {
@@ -34,17 +34,18 @@
         getDialect()->getContext());
   }
 
-  void populateVMConversionPatterns(
-      SymbolTable &importSymbols, RewritePatternSet &patterns,
-      ConversionTarget &conversionTarget,
-      TypeConverter &typeConverter) const override {
+  void
+  populateVMConversionPatterns(SymbolTable &importSymbols,
+                               RewritePatternSet &patterns,
+                               ConversionTarget &conversionTarget,
+                               TypeConverter &typeConverter) const override {
     conversionTarget.addIllegalDialect<IREE::HAL::Inline::HALInlineDialect>();
     populateHALInlineToVMPatterns(getDialect()->getContext(), conversionTarget,
                                   typeConverter, importSymbols, patterns);
   }
 };
 
-}  // namespace
+} // namespace
 
 HALInlineDialect::HALInlineDialect(MLIRContext *context)
     : Dialect(getDialectNamespace(), context, TypeID::get<HALInlineDialect>()) {
@@ -56,8 +57,8 @@
       >();
 }
 
-}  // namespace Inline
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Inline
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/IR/HALInlineDialect.h b/compiler/src/iree/compiler/Modules/HAL/Inline/IR/HALInlineDialect.h
index c4b8f22..f7ec7ad 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/IR/HALInlineDialect.h
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/IR/HALInlineDialect.h

@@ -17,15 +17,15 @@
 namespace Inline {
 
 class HALInlineDialect : public Dialect {
- public:
+public:
   explicit HALInlineDialect(MLIRContext *context);
   static StringRef getDialectNamespace() { return "hal_inline"; }
 };
 
-}  // namespace Inline
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Inline
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_MODULES_HAL_INLINE_IR_HALINLINEDIALECT_H_
+#endif // IREE_COMPILER_MODULES_HAL_INLINE_IR_HALINLINEDIALECT_H_

diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/IR/HALInlineOps.cpp b/compiler/src/iree/compiler/Modules/HAL/Inline/IR/HALInlineOps.cpp
index 6dbc737..416779c 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/IR/HALInlineOps.cpp
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/IR/HALInlineOps.cpp

@@ -107,7 +107,8 @@
 
 OpFoldResult BufferStorageOp::fold(FoldAdaptor operands) {
   auto *definingOp = getBuffer().getDefiningOp();
-  if (!definingOp) return {};
+  if (!definingOp)
+    return {};
   if (auto sourceOp =
           dyn_cast_or_null<IREE::HAL::Inline::BufferAllocateOp>(definingOp)) {
     return sourceOp.getStorage();
@@ -171,7 +172,8 @@
       needsUpdate = true;
     }
     rewriter.restoreInsertionPoint(ip);
-    if (!needsUpdate) return failure();
+    if (!needsUpdate)
+      return failure();
     rewriter.updateRootInPlace(op, [&]() {
       op.getSourceBufferMutable().assign(newSourceBuffer);
       op.getSourceOffsetMutable().assign(newSourceOffset);
@@ -180,7 +182,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void BufferViewCreateOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                      MLIRContext *context) {
@@ -213,7 +215,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void BufferViewBufferOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                      MLIRContext *context) {
@@ -238,11 +240,11 @@
   return success();
 }
 
-}  // namespace Inline
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Inline
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
 //===----------------------------------------------------------------------===//
 // TableGen definitions (intentionally last)

diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/IR/HALInlineOps.h b/compiler/src/iree/compiler/Modules/HAL/Inline/IR/HALInlineOps.h
index 30ce5ff..6f04599 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/IR/HALInlineOps.h
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/IR/HALInlineOps.h

@@ -21,6 +21,6 @@
 #include "mlir/Interfaces/SideEffectInterfaces.h"
 
 #define GET_OP_CLASSES
-#include "iree/compiler/Modules/HAL/Inline/IR/HALInlineOps.h.inc"  // IWYU pragma: keep
+#include "iree/compiler/Modules/HAL/Inline/IR/HALInlineOps.h.inc" // IWYU pragma: keep
 
-#endif  // IREE_COMPILER_MODULES_HAL_INLINE_IR_HALINLINEOPS_H_
+#endif // IREE_COMPILER_MODULES_HAL_INLINE_IR_HALINLINEOPS_H_

diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/Conversion.cpp b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/Conversion.cpp
index 664cec9..d372b6b 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/Conversion.cpp
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/Conversion.cpp

@@ -34,7 +34,7 @@
 
 // Runs conversion with registered input dialects.
 class ConversionPass : public ConversionBase<ConversionPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<IREE::Util::UtilDialect, IREE::HAL::HALDialect,
                     IREE::HAL::Inline::HALInlineDialect,
@@ -97,8 +97,8 @@
   return std::make_unique<ConversionPass>();
 }
 
-}  // namespace Inline
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Inline
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/InlineExecutables.cpp b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/InlineExecutables.cpp
index 70e6cc8..f8c65fa 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/InlineExecutables.cpp
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/InlineExecutables.cpp

@@ -30,7 +30,7 @@
 
 class InlineExecutablesPass
     : public InlineExecutablesBase<InlineExecutablesPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<IREE::Util::UtilDialect, IREE::HAL::HALDialect,
                     IREE::HAL::Inline::HALInlineDialect, arith::ArithDialect,
@@ -110,11 +110,11 @@
       }
       SmallVector<Type> inputTypes;
       inputTypes.append(exportOp.getWorkgroupCountBody()->getNumArguments() - 1,
-                        indexType);  // workload
+                        indexType); // workload
       inputTypes.append(layoutAttr.getPushConstants(), i32Type);
-      inputTypes.append(totalBindingCount, bufferType);  // buffers
-      inputTypes.append(totalBindingCount, indexType);   // offsets
-      inputTypes.append(totalBindingCount, indexType);   // lengths
+      inputTypes.append(totalBindingCount, bufferType); // buffers
+      inputTypes.append(totalBindingCount, indexType);  // offsets
+      inputTypes.append(totalBindingCount, indexType);  // lengths
       auto dispatchFuncType =
           innerModuleBuilder.getFunctionType(inputTypes, {});
 
@@ -137,7 +137,7 @@
                                              bodyFuncOp))) {
           return failure();
         }
-        bodyFuncOp.setPrivate();  // so we only do it once
+        bodyFuncOp.setPrivate(); // so we only do it once
       }
       buildDispatchFunc(exportOp, layoutAttr, totalBindingCount, bodyFuncOp,
                         dispatchFuncOp);
@@ -180,9 +180,9 @@
   // Whenever better IPO and util.list optimizations are added we could back
   // this out to keep things vanilla and have fewer places making assumptions
   // about the function signatures.
-  LogicalResult rewriteWorkgroupSignature(
-      IREE::HAL::PipelineLayoutAttr layoutAttr, size_t totalBindingCount,
-      func::FuncOp bodyFuncOp) {
+  LogicalResult
+  rewriteWorkgroupSignature(IREE::HAL::PipelineLayoutAttr layoutAttr,
+                            size_t totalBindingCount, func::FuncOp bodyFuncOp) {
     auto *entryBlock = &bodyFuncOp.front();
     auto builder = OpBuilder::atBlockBegin(entryBlock);
     auto indexType = builder.getIndexType();
@@ -384,13 +384,13 @@
     }
 
     int workgroupXYZOffset = workgroupArgs.size();
-    workgroupArgs.push_back(nullptr);            // workgroup_x, set below
-    workgroupArgs.push_back(nullptr);            // workgroup_y, set below
-    workgroupArgs.push_back(nullptr);            // workgroup_z, set below
-    workgroupArgs.append(3, indexSet.get(1));    // workgroup_size_xyz
-    workgroupArgs.push_back(workgroupCount[0]);  // workgroup_count_x
-    workgroupArgs.push_back(workgroupCount[1]);  // workgroup_count_y
-    workgroupArgs.push_back(workgroupCount[2]);  // workgroup_count_z
+    workgroupArgs.push_back(nullptr);           // workgroup_x, set below
+    workgroupArgs.push_back(nullptr);           // workgroup_y, set below
+    workgroupArgs.push_back(nullptr);           // workgroup_z, set below
+    workgroupArgs.append(3, indexSet.get(1));   // workgroup_size_xyz
+    workgroupArgs.push_back(workgroupCount[0]); // workgroup_count_x
+    workgroupArgs.push_back(workgroupCount[1]); // workgroup_count_y
+    workgroupArgs.push_back(workgroupCount[2]); // workgroup_count_z
 
     // Z -> Y -> Z loop nest.
     builder.create<scf::ForOp>(
@@ -426,8 +426,8 @@
   return std::make_unique<InlineExecutablesPass>();
 }
 
-}  // namespace Inline
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Inline
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/PassDetail.h b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/PassDetail.h
index d4c9b8e..8197d36 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/PassDetail.h
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/PassDetail.h

@@ -20,10 +20,10 @@
 #define GEN_PASS_CLASSES
 #include "iree/compiler/Modules/HAL/Inline/Transforms/Passes.h.inc"
 
-}  // namespace Inline
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Inline
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_MODULES_HAL_INLINE_TRANSFORMS_PASS_DETAIL_H_
+#endif // IREE_COMPILER_MODULES_HAL_INLINE_TRANSFORMS_PASS_DETAIL_H_

diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/Passes.cpp b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/Passes.cpp
index 1372328..c47e526 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/Passes.cpp
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/Passes.cpp

@@ -95,7 +95,7 @@
 namespace {
 #define GEN_PASS_REGISTRATION
 #include "iree/compiler/Modules/HAL/Inline/Transforms/Passes.h.inc"
-}  // namespace
+} // namespace
 
 void registerHALInlinePasses() {
   // Generated.
@@ -111,8 +111,8 @@
       });
 }
 
-}  // namespace Inline
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Inline
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/Passes.h b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/Passes.h
index 984a4c8..021f117 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/Passes.h
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/Passes.h

@@ -56,10 +56,10 @@
 
 void registerHALInlinePasses();
 
-}  // namespace Inline
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Inline
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_MODULES_HAL_INLINE_TRANSFORMS_PASSES_H_
+#endif // IREE_COMPILER_MODULES_HAL_INLINE_TRANSFORMS_PASSES_H_

diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/HALLoaderToVM/Patterns.cpp b/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/HALLoaderToVM/Patterns.cpp
index df88010..091287c 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/HALLoaderToVM/Patterns.cpp
+++ b/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/HALLoaderToVM/Patterns.cpp

@@ -27,7 +27,8 @@
 
 // Casts |value| to i32 if it is not already.
 static Value castToI32(Value value, OpBuilder &builder) {
-  if (value.getType().isInteger(32)) return value;
+  if (value.getType().isInteger(32))
+    return value;
   return builder.createOrFold<IREE::VM::TruncI64I32Op>(
       value.getLoc(), builder.getI32Type(), value);
 }
@@ -40,9 +41,9 @@
     importOp = importSymbols.lookup<IREE::VM::ImportOp>(importName);
     assert(importOp);
   }
-  LogicalResult matchAndRewrite(
-      IREE::HAL::Loader::ExecutableLoadOp loadOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::Loader::ExecutableLoadOp loadOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     // Get format string as a rodata blob.
     auto executableFormatStr = rewriter.create<IREE::VM::RodataInlineOp>(
         loadOp.getLoc(), loadOp.getFormatAttr());
@@ -64,7 +65,7 @@
     return success();
   }
 
- private:
+private:
   mutable IREE::VM::ImportOp importOp;
 };
 
@@ -78,9 +79,10 @@
     importOp = importSymbols.lookup<IREE::VM::ImportOp>(importName);
     assert(importOp);
   }
-  LogicalResult matchAndRewrite(
-      IREE::HAL::Loader::ExecutableDispatchOp dispatchOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::Loader::ExecutableDispatchOp dispatchOp,
+                  OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto entryPoint = rewriter.create<IREE::VM::ConstI32Op>(
         dispatchOp.getLoc(),
         static_cast<int32_t>(adaptor.getEntryPoint().getZExtValue()));
@@ -121,11 +123,11 @@
     return success();
   }
 
- private:
+private:
   mutable IREE::VM::ImportOp importOp;
 };
 
-}  // namespace
+} // namespace
 
 void populateHALLoaderToVMPatterns(MLIRContext *context,
                                    ConversionTarget &conversionTarget,
@@ -142,5 +144,5 @@
       context, importSymbols, typeConverter, "hal_loader.executable.dispatch");
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/HALLoaderToVM/Patterns.h b/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/HALLoaderToVM/Patterns.h
index 53459f8..a5f1d2f 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/HALLoaderToVM/Patterns.h
+++ b/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/HALLoaderToVM/Patterns.h

@@ -21,7 +21,7 @@
                                    SymbolTable &importSymbols,
                                    RewritePatternSet &patterns);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_MODULES_HAL_LOADER_CONVERSION_HALLOADERTOVM_PATTERNS_H_
+#endif // IREE_COMPILER_MODULES_HAL_LOADER_CONVERSION_HALLOADERTOVM_PATTERNS_H_

diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/StreamToHALLoader/Patterns.cpp b/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/StreamToHALLoader/Patterns.cpp
index 497fbd0..005d442 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/StreamToHALLoader/Patterns.cpp
+++ b/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/StreamToHALLoader/Patterns.cpp

@@ -41,9 +41,9 @@
     : public OpConversionPattern<IREE::Stream::CmdDispatchOp> {
   CmdDispatchOpPattern(TypeConverter &typeConverter, MLIRContext *context)
       : OpConversionPattern(typeConverter, context, PatternBenefit(10000)) {}
-  LogicalResult matchAndRewrite(
-      IREE::Stream::CmdDispatchOp dispatchOp, OpAdaptor adaptor,
-      ConversionPatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::Stream::CmdDispatchOp dispatchOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
     auto loc = dispatchOp.getLoc();
 
     // TODO(benvanik): support a lightweight switch builder for picking variants
@@ -143,7 +143,7 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void populateStreamToHALLoaderPatterns(MLIRContext *context,
                                        ConversionTarget &conversionTarget,
@@ -157,5 +157,5 @@
   patterns.insert<CmdDispatchOpPattern>(typeConverter, context);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/StreamToHALLoader/Patterns.h b/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/StreamToHALLoader/Patterns.h
index a2c279a..8ac692d 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/StreamToHALLoader/Patterns.h
+++ b/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/StreamToHALLoader/Patterns.h

@@ -19,7 +19,7 @@
                                        TypeConverter &typeConverter,
                                        RewritePatternSet &patterns);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_MODULES_HAL_LOADER_CONVERSION_STREAMTOHALLOADER_PATTERNS_H_
+#endif // IREE_COMPILER_MODULES_HAL_LOADER_CONVERSION_STREAMTOHALLOADER_PATTERNS_H_

diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/IR/HALLoaderDialect.cpp b/compiler/src/iree/compiler/Modules/HAL/Loader/IR/HALLoaderDialect.cpp
index 0f269b7..419d0cf 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Loader/IR/HALLoaderDialect.cpp
+++ b/compiler/src/iree/compiler/Modules/HAL/Loader/IR/HALLoaderDialect.cpp

@@ -24,7 +24,7 @@
 namespace {
 
 class HALLoaderToVMConversionInterface : public VMConversionDialectInterface {
- public:
+public:
   using VMConversionDialectInterface::VMConversionDialectInterface;
 
   OwningOpRef<mlir::ModuleOp> parseVMImportModule() const override {
@@ -34,17 +34,18 @@
         getDialect()->getContext());
   }
 
-  void populateVMConversionPatterns(
-      SymbolTable &importSymbols, RewritePatternSet &patterns,
-      ConversionTarget &conversionTarget,
-      TypeConverter &typeConverter) const override {
+  void
+  populateVMConversionPatterns(SymbolTable &importSymbols,
+                               RewritePatternSet &patterns,
+                               ConversionTarget &conversionTarget,
+                               TypeConverter &typeConverter) const override {
     conversionTarget.addIllegalDialect<IREE::HAL::Loader::HALLoaderDialect>();
     populateHALLoaderToVMPatterns(getDialect()->getContext(), conversionTarget,
                                   typeConverter, importSymbols, patterns);
   }
 };
 
-}  // namespace
+} // namespace
 
 HALLoaderDialect::HALLoaderDialect(MLIRContext *context)
     : Dialect(getDialectNamespace(), context, TypeID::get<HALLoaderDialect>()) {
@@ -56,8 +57,8 @@
       >();
 }
 
-}  // namespace Loader
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Loader
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/IR/HALLoaderDialect.h b/compiler/src/iree/compiler/Modules/HAL/Loader/IR/HALLoaderDialect.h
index df5b412..f1989a5 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Loader/IR/HALLoaderDialect.h
+++ b/compiler/src/iree/compiler/Modules/HAL/Loader/IR/HALLoaderDialect.h

@@ -17,15 +17,15 @@
 namespace Loader {
 
 class HALLoaderDialect : public Dialect {
- public:
+public:
   explicit HALLoaderDialect(MLIRContext *context);
   static StringRef getDialectNamespace() { return "hal_loader"; }
 };
 
-}  // namespace Loader
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Loader
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_MODULES_HAL_LOADER_IR_HALLOADERDIALECT_H_
+#endif // IREE_COMPILER_MODULES_HAL_LOADER_IR_HALLOADERDIALECT_H_

diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/IR/HALLoaderOps.cpp b/compiler/src/iree/compiler/Modules/HAL/Loader/IR/HALLoaderOps.cpp
index 033fc39..bca9345 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Loader/IR/HALLoaderOps.cpp
+++ b/compiler/src/iree/compiler/Modules/HAL/Loader/IR/HALLoaderOps.cpp

@@ -109,8 +109,8 @@
   setNameFn(getResult(), "exe");
 }
 
-LogicalResult ExecutableLookupOp::verifySymbolUses(
-    SymbolTableCollection &symbolTable) {
+LogicalResult
+ExecutableLookupOp::verifySymbolUses(SymbolTableCollection &symbolTable) {
   Operation *op = getOperation();
   auto exportOp = symbolTable.lookupNearestSymbolFrom<IREE::HAL::ExecutableOp>(
       op, getExecutableAttr());
@@ -182,7 +182,8 @@
       bindingBuffers.push_back(subspanOp.getSource());
       bindingOffsets.push_back(newOffset);
     }
-    if (!didChangeAny) return failure();
+    if (!didChangeAny)
+      return failure();
     rewriter.updateRootInPlace(op, [&]() {
       op.getBindingBuffersMutable().assign(bindingBuffers);
       op.getBindingOffsetsMutable().assign(bindingOffsets);
@@ -191,18 +192,18 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 void ExecutableDispatchOp::getCanonicalizationPatterns(
     RewritePatternSet &results, MLIRContext *context) {
   results.insert<FoldBindingSubspansIntoDispatchOp>(context);
 }
 
-}  // namespace Loader
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Loader
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
 //===----------------------------------------------------------------------===//
 // TableGen definitions (intentionally last)

diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/IR/HALLoaderOps.h b/compiler/src/iree/compiler/Modules/HAL/Loader/IR/HALLoaderOps.h
index 2e6e606..b61de56 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Loader/IR/HALLoaderOps.h
+++ b/compiler/src/iree/compiler/Modules/HAL/Loader/IR/HALLoaderOps.h

@@ -21,6 +21,6 @@
 #include "mlir/Interfaces/SideEffectInterfaces.h"
 
 #define GET_OP_CLASSES
-#include "iree/compiler/Modules/HAL/Loader/IR/HALLoaderOps.h.inc"  // IWYU pragma: keep
+#include "iree/compiler/Modules/HAL/Loader/IR/HALLoaderOps.h.inc" // IWYU pragma: keep
 
-#endif  // IREE_COMPILER_MODULES_HAL_LOADER_IR_HALLOADEROPS_H_
+#endif // IREE_COMPILER_MODULES_HAL_LOADER_IR_HALLOADEROPS_H_

diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/Conversion.cpp b/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/Conversion.cpp
index 0a2c458..93bf44f 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/Conversion.cpp
+++ b/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/Conversion.cpp

@@ -36,7 +36,7 @@
 
 // Runs conversion with registered input dialects.
 class ConversionPass : public ConversionBase<ConversionPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<IREE::Util::UtilDialect, IREE::HAL::HALDialect,
                     IREE::HAL::Inline::HALInlineDialect,
@@ -105,8 +105,8 @@
   return std::make_unique<ConversionPass>();
 }
 
-}  // namespace Loader
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Loader
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/MaterializeExecutables.cpp b/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/MaterializeExecutables.cpp
index 66fb740..d4b6cdc 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/MaterializeExecutables.cpp
+++ b/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/MaterializeExecutables.cpp

@@ -116,7 +116,7 @@
     Value binaryData = loadBuilder.create<IREE::Util::BufferConstantOp>(
         binaryLoc, binaryOp.getNameAttr(), binaryOp.getData(), alignmentAttr,
         binaryOp.getMimeTypeAttr());
-    SmallVector<Value> constants;  // TBD
+    SmallVector<Value> constants; // TBD
     Value executable = loadBuilder.create<IREE::HAL::Loader::ExecutableLoadOp>(
         binaryLoc, executableType, binaryOp.getFormatAttr(), binaryData,
         constants);
@@ -131,7 +131,7 @@
 // Runs conversion with registered input dialects.
 class MaterializeExecutablesPass
     : public MaterializeExecutablesBase<MaterializeExecutablesPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<IREE::Util::UtilDialect, IREE::HAL::HALDialect,
                     IREE::HAL::Loader::HALLoaderDialect, arith::ArithDialect,
@@ -166,8 +166,8 @@
   return std::make_unique<MaterializeExecutablesPass>();
 }
 
-}  // namespace Loader
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Loader
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/PassDetail.h b/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/PassDetail.h
index 55fdb52..693ae6b 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/PassDetail.h
+++ b/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/PassDetail.h

@@ -20,10 +20,10 @@
 #define GEN_PASS_CLASSES
 #include "iree/compiler/Modules/HAL/Loader/Transforms/Passes.h.inc"
 
-}  // namespace Loader
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Loader
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_MODULES_HAL_LOADER_TRANSFORMS_PASS_DETAIL_H_
+#endif // IREE_COMPILER_MODULES_HAL_LOADER_TRANSFORMS_PASS_DETAIL_H_

diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/Passes.cpp b/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/Passes.cpp
index 12f46d9..0dbfb5f 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/Passes.cpp
+++ b/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/Passes.cpp

@@ -116,7 +116,7 @@
 namespace {
 #define GEN_PASS_REGISTRATION
 #include "iree/compiler/Modules/HAL/Loader/Transforms/Passes.h.inc"
-}  // namespace
+} // namespace
 
 void registerHALLoaderPasses() {
   // Generated.
@@ -132,8 +132,8 @@
       });
 }
 
-}  // namespace Loader
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Loader
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/Passes.h b/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/Passes.h
index 6294387..9ba7f5a 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/Passes.h
+++ b/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/Passes.h

@@ -61,10 +61,10 @@
 
 void registerHALLoaderPasses();
 
-}  // namespace Loader
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Loader
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_MODULES_HAL_LOADER_TRANSFORMS_PASSES_H_
+#endif // IREE_COMPILER_MODULES_HAL_LOADER_TRANSFORMS_PASSES_H_

diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/ResolveExportOrdinals.cpp b/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/ResolveExportOrdinals.cpp
index 24e2026..d0d19e0 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/ResolveExportOrdinals.cpp
+++ b/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/ResolveExportOrdinals.cpp

@@ -20,9 +20,9 @@
 struct ResolveExecutableDispatchSymbolOp
     : public OpRewritePattern<IREE::HAL::Loader::ExecutableDispatchSymbolOp> {
   using OpRewritePattern::OpRewritePattern;
-  LogicalResult matchAndRewrite(
-      IREE::HAL::Loader::ExecutableDispatchSymbolOp op,
-      PatternRewriter &rewriter) const override {
+  LogicalResult
+  matchAndRewrite(IREE::HAL::Loader::ExecutableDispatchSymbolOp op,
+                  PatternRewriter &rewriter) const override {
     auto symbol = SymbolTable::lookupNearestSymbolFrom(op, op.getEntryPoint());
     assert(symbol && "missing ExecutableEntryPoint symbol");
     auto exportOp = cast<IREE::HAL::ExecutableExportOp>(symbol);
@@ -36,7 +36,7 @@
 
 class ResolveExportOrdinalsPass
     : public ResolveExportOrdinalsBase<ResolveExportOrdinalsPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<IREE::HAL::Loader::HALLoaderDialect>();
   }
@@ -58,8 +58,8 @@
 
 static PassRegistration<ResolveExportOrdinalsPass> pass;
 
-}  // namespace Loader
-}  // namespace HAL
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace Loader
+} // namespace HAL
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Pipelines/Options.cpp b/compiler/src/iree/compiler/Pipelines/Options.cpp
index 2326e98..d377acd 100644
--- a/compiler/src/iree/compiler/Pipelines/Options.cpp
+++ b/compiler/src/iree/compiler/Pipelines/Options.cpp

@@ -79,7 +79,7 @@
       "iree-input-promote-bf16-to-f32", promoteBF16ToF32,
       llvm::cl::desc("Converts all bf16 ops and values into f32 counterparts."),
       llvm::cl::cat(category));
-#endif  // IREE_HAVE_STABLEHLO_INPUT
+#endif // IREE_HAVE_STABLEHLO_INPUT
 }
 
 void HighLevelOptimizationOptions::bindOptions(OptionsBinder &binder) {
@@ -165,5 +165,5 @@
       llvm::cl::cat(category));
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Pipelines/Options.h b/compiler/src/iree/compiler/Pipelines/Options.h
index 52a9a2f..0ae7dbb 100644
--- a/compiler/src/iree/compiler/Pipelines/Options.h
+++ b/compiler/src/iree/compiler/Pipelines/Options.h

@@ -42,15 +42,15 @@
     // Special case of 'stablehlo' legalization which also performs some XLA
     // preprocessing, e.g., flattening of tuples.
     stablehlo_xla,
-#endif  // IREE_HAVE_STABLEHLO_INPUT
+#endif // IREE_HAVE_STABLEHLO_INPUT
 #ifdef IREE_HAVE_TORCH_INPUT
     // Legalizes input defined over TMTensor ops.
     tm_tensor,
-#endif  // IREE_HAVE_TORCH_INPUT
+#endif // IREE_HAVE_TORCH_INPUT
 #ifdef IREE_HAVE_TOSA_INPUT
     // Legalizes input defined over TOSA ops.
     tosa,
-#endif  // IREE_HAVE_TOSA_INPUT
+#endif // IREE_HAVE_TOSA_INPUT
   };
   Type type = Type::auto_detect;
 
@@ -140,7 +140,7 @@
   using FromFlags = OptionsFromFlags<PreprocessingOptions>;
 };
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_PIPELINES_OPTIONS_H_
+#endif // IREE_COMPILER_PIPELINES_OPTIONS_H_

diff --git a/compiler/src/iree/compiler/Pipelines/Pipelines.cpp b/compiler/src/iree/compiler/Pipelines/Pipelines.cpp
index 39b2eb1..ab036c8 100644
--- a/compiler/src/iree/compiler/Pipelines/Pipelines.cpp
+++ b/compiler/src/iree/compiler/Pipelines/Pipelines.cpp

@@ -21,13 +21,13 @@
 
 #ifdef IREE_HAVE_STABLEHLO_INPUT
 #include "iree/compiler/InputConversion/StableHLO/Passes.h"
-#endif  // IREE_HAVE_STABLEHLO_INPUT
+#endif // IREE_HAVE_STABLEHLO_INPUT
 #ifdef IREE_HAVE_TORCH_INPUT
 #include "iree/compiler/InputConversion/TMTensor/Passes.h"
-#endif  // IREE_HAVE_TORCH_INPUT
+#endif // IREE_HAVE_TORCH_INPUT
 #ifdef IREE_HAVE_TOSA_INPUT
 #include "iree/compiler/InputConversion/TOSA/Passes.h"
-#endif  // IREE_HAVE_TOSA_INPUT
+#endif // IREE_HAVE_TOSA_INPUT
 
 namespace mlir {
 namespace iree_compiler {
@@ -66,39 +66,40 @@
   stablehloOptions.demoteI64ToI32 = inputOptions.demoteI64ToI32;
   stablehloOptions.demoteF64ToF32 = inputOptions.demoteF64ToF32;
   stablehloOptions.promoteBF16ToF32 = inputOptions.promoteBF16ToF32;
-#endif  // IREE_HAVE_STABLEHLO_INPUT
+#endif // IREE_HAVE_STABLEHLO_INPUT
 
   switch (inputOptions.type) {
-    case InputDialectOptions::Type::none:
-      break;
-    case InputDialectOptions::Type::auto_detect:
-      passManager.addPass(createAutoInputConversionPipelinePass(autoOptions));
-      break;
+  case InputDialectOptions::Type::none:
+    break;
+  case InputDialectOptions::Type::auto_detect:
+    passManager.addPass(createAutoInputConversionPipelinePass(autoOptions));
+    break;
 #ifdef IREE_HAVE_STABLEHLO_INPUT
-    case InputDialectOptions::Type::stablehlo:
-      stablehlo::buildStableHLOInputConversionPassPipeline(passManager,
-                                                           stablehloOptions);
-      break;
-    case InputDialectOptions::Type::stablehlo_xla:
-      stablehlo::buildStableHLOXLAInputConversionPassPipeline(passManager,
-                                                              stablehloOptions);
-      break;
-#endif  // IREE_HAVE_STABLEHLO_INPUT
+  case InputDialectOptions::Type::stablehlo:
+    stablehlo::buildStableHLOInputConversionPassPipeline(passManager,
+                                                         stablehloOptions);
+    break;
+  case InputDialectOptions::Type::stablehlo_xla:
+    stablehlo::buildStableHLOXLAInputConversionPassPipeline(passManager,
+                                                            stablehloOptions);
+    break;
+#endif // IREE_HAVE_STABLEHLO_INPUT
 #ifdef IREE_HAVE_TORCH_INPUT
-    case InputDialectOptions::Type::tm_tensor:
-      passManager.addNestedPass<func::FuncOp>(
-          TMTensor::createConvertTMTensorToLinalgExtPass());
-      break;
-#endif  // IREE_HAVE_TORCH_INPUT
+  case InputDialectOptions::Type::tm_tensor:
+    passManager.addNestedPass<func::FuncOp>(
+        TMTensor::createConvertTMTensorToLinalgExtPass());
+    break;
+#endif // IREE_HAVE_TORCH_INPUT
 #ifdef IREE_HAVE_TOSA_INPUT
-    case InputDialectOptions::Type::tosa:
-      buildTOSAInputConversionPassPipeline(passManager);
-      break;
-#endif  // IREE_HAVE_TOSA_INPUT
+  case InputDialectOptions::Type::tosa:
+    buildTOSAInputConversionPassPipeline(passManager);
+    break;
+#endif // IREE_HAVE_TOSA_INPUT
   }
   buildCommonInputConversionPassPipeline(passManager);
   IREE_TRACE_ADD_END_FRAME_PASS(passManager, "Input");
-  if (compileTo == IREEVMPipelinePhase::Input) return;  // early-exit
+  if (compileTo == IREEVMPipelinePhase::Input)
+    return; // early-exit
 
   // Now that inputs are legalized, generate wrapper for entry functions.
   IREE_TRACE_ADD_BEGIN_FRAME_PASS(passManager, "ABI");
@@ -115,7 +116,8 @@
     IREE::TFLite::buildTransformPassPipeline(passManager);
   }
   IREE_TRACE_ADD_END_FRAME_PASS(passManager, "ABI");
-  if (compileTo == IREEVMPipelinePhase::ABI) return;  // early-exit
+  if (compileTo == IREEVMPipelinePhase::ABI)
+    return; // early-exit
 
   IREE::Flow::TransformOptions flowOptions;
   flowOptions.constExprHoisting =
@@ -149,74 +151,76 @@
   streamOptions.dumpStatisticsFile = schedulingOptions.dumpStatisticsFile;
 
   switch (schedulingOptions.executionModel) {
-    case SchedulingOptions::ExecutionModel::HostOnly:
-      // No flow/stream processing (implies no tensors).
-      break;
-    default:
-      IREE_TRACE_ADD_BEGIN_FRAME_PASS(passManager, "Preprocessing");
-      IREE::buildPreprocessingPassPipeline(passManager, preprocessingOptions,
-                                           hooks.pipelineExtensions);
-      IREE_TRACE_ADD_END_FRAME_PASS(passManager, "Preprocessing");
-      if (compileTo == IREEVMPipelinePhase::Preprocessing)
-        return;  // early-exit
+  case SchedulingOptions::ExecutionModel::HostOnly:
+    // No flow/stream processing (implies no tensors).
+    break;
+  default:
+    IREE_TRACE_ADD_BEGIN_FRAME_PASS(passManager, "Preprocessing");
+    IREE::buildPreprocessingPassPipeline(passManager, preprocessingOptions,
+                                         hooks.pipelineExtensions);
+    IREE_TRACE_ADD_END_FRAME_PASS(passManager, "Preprocessing");
+    if (compileTo == IREEVMPipelinePhase::Preprocessing)
+      return; // early-exit
 
-      IREE_TRACE_ADD_BEGIN_FRAME_PASS(passManager, "Flow");
-      IREE::Flow::buildFlowTransformPassPipeline(passManager, flowOptions);
-      IREE_TRACE_ADD_END_FRAME_PASS(passManager, "Flow");
-      if (compileTo == IREEVMPipelinePhase::Flow) return;  // early-exit
+    IREE_TRACE_ADD_BEGIN_FRAME_PASS(passManager, "Flow");
+    IREE::Flow::buildFlowTransformPassPipeline(passManager, flowOptions);
+    IREE_TRACE_ADD_END_FRAME_PASS(passManager, "Flow");
+    if (compileTo == IREEVMPipelinePhase::Flow)
+      return; // early-exit
 
-      IREE_TRACE_ADD_BEGIN_FRAME_PASS(passManager, "Stream");
-      IREE::Stream::buildStreamTransformPassPipeline(passManager,
-                                                     streamOptions);
-      IREE_TRACE_ADD_END_FRAME_PASS(passManager, "Stream");
-      if (compileTo == IREEVMPipelinePhase::Stream) return;  // early-exit
-      break;
+    IREE_TRACE_ADD_BEGIN_FRAME_PASS(passManager, "Stream");
+    IREE::Stream::buildStreamTransformPassPipeline(passManager, streamOptions);
+    IREE_TRACE_ADD_END_FRAME_PASS(passManager, "Stream");
+    if (compileTo == IREEVMPipelinePhase::Stream)
+      return; // early-exit
+    break;
   }
 
   IREE::HAL::PipelinePhase halCompileTo;
   switch (compileTo) {
-    default:
-      halCompileTo = IREE::HAL::PipelinePhase::End;
-      break;
-    case IREEVMPipelinePhase::ExecutableSources:
-      halCompileTo = IREE::HAL::PipelinePhase::ExecutableSources;
-      break;
-    case IREEVMPipelinePhase::ExecutableTargets:
-      halCompileTo = IREE::HAL::PipelinePhase::ExecutableTargets;
-      break;
+  default:
+    halCompileTo = IREE::HAL::PipelinePhase::End;
+    break;
+  case IREEVMPipelinePhase::ExecutableSources:
+    halCompileTo = IREE::HAL::PipelinePhase::ExecutableSources;
+    break;
+  case IREEVMPipelinePhase::ExecutableTargets:
+    halCompileTo = IREE::HAL::PipelinePhase::ExecutableTargets;
+    break;
   }
 
   IREE_TRACE_ADD_BEGIN_FRAME_PASS(passManager, "HAL");
   switch (schedulingOptions.executionModel) {
-    case SchedulingOptions::ExecutionModel::HostOnly:
-      // No HAL required.
-      break;
-    default:
-    case SchedulingOptions::ExecutionModel::AsyncInternal:
-    case SchedulingOptions::ExecutionModel::AsyncExternal:
-      IREE::HAL::buildHALTransformPassPipeline(passManager, targetRegistry,
-                                               executableOptions, halCompileTo);
-      break;
-    case SchedulingOptions::ExecutionModel::InlineStatic:
-      IREE::HAL::Inline::buildHALInlineStaticTransformPassPipeline(
-          passManager, targetRegistry, executableOptions);
-      break;
-    case SchedulingOptions::ExecutionModel::InlineDynamic:
-      IREE::HAL::Loader::buildHALInlineDynamicTransformPassPipeline(
-          passManager, targetRegistry, executableOptions);
-      break;
+  case SchedulingOptions::ExecutionModel::HostOnly:
+    // No HAL required.
+    break;
+  default:
+  case SchedulingOptions::ExecutionModel::AsyncInternal:
+  case SchedulingOptions::ExecutionModel::AsyncExternal:
+    IREE::HAL::buildHALTransformPassPipeline(passManager, targetRegistry,
+                                             executableOptions, halCompileTo);
+    break;
+  case SchedulingOptions::ExecutionModel::InlineStatic:
+    IREE::HAL::Inline::buildHALInlineStaticTransformPassPipeline(
+        passManager, targetRegistry, executableOptions);
+    break;
+  case SchedulingOptions::ExecutionModel::InlineDynamic:
+    IREE::HAL::Loader::buildHALInlineDynamicTransformPassPipeline(
+        passManager, targetRegistry, executableOptions);
+    break;
   }
   IREE_TRACE_ADD_END_FRAME_PASS(passManager, "HAL");
   if (compileTo == IREEVMPipelinePhase::HAL ||
       halCompileTo != IREE::HAL::PipelinePhase::End) {
-    return;  // early-exit
+    return; // early-exit
   }
 
   IREE_TRACE_ADD_BEGIN_FRAME_PASS(passManager, "VM");
   IREE::VM::buildVMTransformPassPipeline(passManager, targetOptions);
   passManager.addPass(IREE::Util::createDropCompilerHintsPass());
   IREE_TRACE_ADD_END_FRAME_PASS(passManager, "VM");
-  if (compileTo == IREEVMPipelinePhase::VM) return;  // early-exit
+  if (compileTo == IREEVMPipelinePhase::VM)
+    return; // early-exit
 }
 
 void buildDefaultIREEVMTransformPassPipeline(OpPassManager &passManager) {
@@ -250,5 +254,5 @@
       });
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Pipelines/Pipelines.h b/compiler/src/iree/compiler/Pipelines/Pipelines.h
index 9ada1a0..f3a010f 100644
--- a/compiler/src/iree/compiler/Pipelines/Pipelines.h
+++ b/compiler/src/iree/compiler/Pipelines/Pipelines.h

@@ -95,7 +95,7 @@
 // Registration hooks.
 void registerIREEVMTransformPassPipeline();
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_PIPELINES_PIPELINES_H_
+#endif // IREE_COMPILER_PIPELINES_PIPELINES_H_

diff --git a/compiler/src/iree/compiler/PluginAPI/Client.cpp b/compiler/src/iree/compiler/PluginAPI/Client.cpp
index 341fac8..632976e 100644
--- a/compiler/src/iree/compiler/PluginAPI/Client.cpp
+++ b/compiler/src/iree/compiler/PluginAPI/Client.cpp

@@ -44,4 +44,4 @@
   }
 }
 
-}  // namespace mlir::iree_compiler
+} // namespace mlir::iree_compiler

diff --git a/compiler/src/iree/compiler/PluginAPI/Client.h b/compiler/src/iree/compiler/PluginAPI/Client.h
index 909f6cd..73ebfde 100644
--- a/compiler/src/iree/compiler/PluginAPI/Client.h
+++ b/compiler/src/iree/compiler/PluginAPI/Client.h

@@ -18,7 +18,7 @@
 class DialectRegistry;
 class MLIRContext;
 class OpPassManager;
-}  // namespace mlir
+} // namespace mlir
 
 namespace mlir::iree_compiler {
 
@@ -26,7 +26,7 @@
 // Forward declared only from Dialect/HAL/Target/TargetRegistry.h so as to avoid
 // bringing full dependencies into the plugin API.
 class TargetBackendList;
-}  // namespace IREE::HAL
+} // namespace IREE::HAL
 
 class AbstractPluginSession;
 class PluginRegistrar;
@@ -43,7 +43,7 @@
 // Entrypoints for extending IREE's pass pipelines at various stages.
 // Override what is needed.
 class PipelineExtensions {
- public:
+public:
   virtual ~PipelineExtensions();
 
   // Add passes to the input preprocessing pipeline, which allows to process the
@@ -76,7 +76,7 @@
 // This is typically not instantiated directly but via the PluginSession
 // CRTP helper which manages most details.
 class AbstractPluginRegistration {
- public:
+public:
   AbstractPluginRegistration(std::string pluginId)
       : pluginId(std::move(pluginId)) {}
   virtual ~AbstractPluginRegistration();
@@ -121,10 +121,10 @@
   // the session instance. This will be called globally for all available
   // plugins so that option registration can happen first. It must have
   // no overhead beyond allocating some memory and setting up options.
-  virtual std::unique_ptr<AbstractPluginSession> createUninitializedSession(
-      OptionsBinder &localOptionsBinder) = 0;
+  virtual std::unique_ptr<AbstractPluginSession>
+  createUninitializedSession(OptionsBinder &localOptionsBinder) = 0;
 
- private:
+private:
   std::string pluginId;
 };
 
@@ -139,7 +139,7 @@
 // which adds some niceties and support for global command line option
 // registration.
 class AbstractPluginSession : public PipelineExtensions {
- public:
+public:
   virtual ~AbstractPluginSession();
 
   // Called prior to context initialization in order to register dialects.
@@ -153,10 +153,10 @@
 
   // If the plugin contributes HAL target backends, then it must return a
   // pointer to the plugin session-owned registry here. Otherwise, nullptr.
-  virtual void populateHALTargetBackends(
-      IREE::HAL::TargetBackendList &targets) {}
+  virtual void
+  populateHALTargetBackends(IREE::HAL::TargetBackendList &targets) {}
 
- protected:
+protected:
   // Called from registerDialects() prior to initializing the context and
   // prior to onActivate().
   virtual void onRegisterDialects(DialectRegistry &registry) {}
@@ -172,7 +172,7 @@
           PluginActivationPolicy activationPolicy =
               PluginActivationPolicy::Explicit>
 class PluginSession : public AbstractPluginSession {
- public:
+public:
   using Options = OptionsTy;
   const Options &getOptions() { return options; }
 
@@ -200,8 +200,8 @@
       // Forward to the CRTP derived type.
       DerivedTy::registerGlobalDialects(registry);
     }
-    std::unique_ptr<AbstractPluginSession> createUninitializedSession(
-        OptionsBinder &localOptionsBinder) override {
+    std::unique_ptr<AbstractPluginSession>
+    createUninitializedSession(OptionsBinder &localOptionsBinder) override {
       auto instance = std::make_unique<DerivedTy>();
       if (globalCLIOptions) {
         // Bootstrap the local options with global CLI options.
@@ -214,7 +214,7 @@
     std::optional<OptionsTy *> globalCLIOptions;
   };
 
- protected:
+protected:
   OptionsTy options;
   friend struct Registration;
 };
@@ -222,7 +222,7 @@
 // Interface to the registration system.
 // Implemented by PluginManager.
 class PluginRegistrar {
- public:
+public:
   // Register a plugin based on a registration class.
   void registerPlugin(std::unique_ptr<AbstractPluginRegistration> registration);
 
@@ -237,10 +237,10 @@
     registerPlugin(std::move(registration));
   }
 
- protected:
+protected:
   llvm::StringMap<std::unique_ptr<AbstractPluginRegistration>> registrations;
 };
 
-}  // namespace mlir::iree_compiler
+} // namespace mlir::iree_compiler
 
-#endif  // IREE_COMPILER_PLUGINAPI_CLIENT_H_
+#endif // IREE_COMPILER_PLUGINAPI_CLIENT_H_

diff --git a/compiler/src/iree/compiler/PluginAPI/PluginManager.cpp b/compiler/src/iree/compiler/PluginAPI/PluginManager.cpp
index ee0e857..79f8039 100644
--- a/compiler/src/iree/compiler/PluginAPI/PluginManager.cpp
+++ b/compiler/src/iree/compiler/PluginAPI/PluginManager.cpp

@@ -15,8 +15,8 @@
 #include "mlir/IR/Location.h"
 
 // Declare entrypoints for each statically registered plugin.
-#define HANDLE_PLUGIN_ID(plugin_id)                          \
-  extern "C" bool iree_register_compiler_plugin_##plugin_id( \
+#define HANDLE_PLUGIN_ID(plugin_id)                                            \
+  extern "C" bool iree_register_compiler_plugin_##plugin_id(                   \
       mlir::iree_compiler::PluginRegistrar *);
 #include "iree/compiler/PluginAPI/Config/StaticLinkedPlugins.inc"
 #undef HANDLE_PLUGIN_ID
@@ -41,8 +41,9 @@
 
 bool PluginManager::loadAvailablePlugins() {
 // Initialize static plugins.
-#define HANDLE_PLUGIN_ID(plugin_id) \
-  if (!iree_register_compiler_plugin_##plugin_id(this)) return false;
+#define HANDLE_PLUGIN_ID(plugin_id)                                            \
+  if (!iree_register_compiler_plugin_##plugin_id(this))                        \
+    return false;
 #include "iree/compiler/PluginAPI/Config/StaticLinkedPlugins.inc"
 #undef HANDLE_PLUGIN_ID
   return true;
@@ -135,7 +136,8 @@
     }
 
     // Skip if already initialized.
-    if (!initializedIds.insert(it.first()).second) continue;
+    if (!initializedIds.insert(it.first()).second)
+      continue;
 
     if (options.printPluginInfo) {
       llvm::errs() << "[IREE plugins]: Initializing default '" << it.first()
@@ -154,7 +156,8 @@
     }
 
     // Skip if already initialized.
-    if (!initializedIds.insert(pluginId).second) continue;
+    if (!initializedIds.insert(pluginId).second)
+      continue;
 
     if (options.printPluginInfo) {
       llvm::errs() << "[IREE plugins]: Initializing plugin '" << pluginId
@@ -184,7 +187,8 @@
 
 LogicalResult PluginManagerSession::activatePlugins(MLIRContext *context) {
   for (auto *s : initializedSessions) {
-    if (failed(s->activate(context))) return failure();
+    if (failed(s->activate(context)))
+      return failure();
   }
   return success();
 }
@@ -196,4 +200,4 @@
   }
 }
 
-}  // namespace mlir::iree_compiler
+} // namespace mlir::iree_compiler

diff --git a/compiler/src/iree/compiler/PluginAPI/PluginManager.h b/compiler/src/iree/compiler/PluginAPI/PluginManager.h
index 7c5805b..8debb5b 100644
--- a/compiler/src/iree/compiler/PluginAPI/PluginManager.h
+++ b/compiler/src/iree/compiler/PluginAPI/PluginManager.h

@@ -23,7 +23,7 @@
 
 // Command line options for the plugin manager.
 class PluginManagerOptions {
- public:
+public:
   // Plugins to be activated in a session.
   llvm::SmallVector<std::string> plugins;
 
@@ -47,7 +47,7 @@
 // Most of the work of a plugin is done at session initialization time when
 // an MLIRContext is available.
 class PluginManager : public PluginRegistrar {
- public:
+public:
   PluginManager();
 
   // Initializes the plugin manager. Since this may do shared library opening
@@ -74,13 +74,13 @@
   // Gets a list of all loaded plugin names.
   llvm::SmallVector<std::string> getLoadedPlugins();
 
- private:
+private:
   friend class PluginManagerSession;
 };
 
 // Holds activated plugins for an |iree_compiler_session_t|.
 class PluginManagerSession : public PipelineExtensions {
- public:
+public:
   PluginManagerSession(PluginManager &pluginManager, OptionsBinder &binder,
                        PluginManagerOptions &options);
 
@@ -113,7 +113,7 @@
   // plugins.
   void populateHALTargetBackends(IREE::HAL::TargetBackendList &list);
 
- private:
+private:
   PluginManagerOptions &options;
   // At construction, uninitialized plugin sessions are created for all
   // registered plugins so that CLI options can be set properly.
@@ -126,6 +126,6 @@
   llvm::SmallVector<AbstractPluginSession *> initializedSessions;
 };
 
-}  // namespace mlir::iree_compiler
+} // namespace mlir::iree_compiler
 
-#endif  // IREE_COMPILER_PLUGINAPI_PLUGINMANAGER_H_
+#endif // IREE_COMPILER_PLUGINAPI_PLUGINMANAGER_H_

diff --git a/compiler/src/iree/compiler/Preprocessing/Common/ConvertConv2DToImg2Col.cpp b/compiler/src/iree/compiler/Preprocessing/Common/ConvertConv2DToImg2Col.cpp
index 4269378..12d9c49 100644
--- a/compiler/src/iree/compiler/Preprocessing/Common/ConvertConv2DToImg2Col.cpp
+++ b/compiler/src/iree/compiler/Preprocessing/Common/ConvertConv2DToImg2Col.cpp

@@ -30,13 +30,15 @@
 
 static Value createAdd(Location loc, Value x, Value y, bool isInt,
                        OpBuilder &builder) {
-  if (isInt) return builder.create<arith::AddIOp>(loc, x, y);
+  if (isInt)
+    return builder.create<arith::AddIOp>(loc, x, y);
   return builder.create<arith::AddFOp>(loc, x, y);
 }
 
 static Value createMul(Location loc, Value x, Value y, bool isInt,
                        OpBuilder &builder) {
-  if (isInt) return builder.create<arith::MulIOp>(loc, x, y);
+  if (isInt)
+    return builder.create<arith::MulIOp>(loc, x, y);
   return builder.create<arith::MulFOp>(loc, x, y);
 }
 
@@ -77,7 +79,7 @@
 // multplication.
 class ConvertConv2DNhwcHwcf final
     : public OpRewritePattern<linalg::Conv2DNhwcHwcfOp> {
- public:
+public:
   using OpRewritePattern::OpRewritePattern;
 
   LogicalResult matchAndRewrite(linalg::Conv2DNhwcHwcfOp convOp,
@@ -91,7 +93,8 @@
     }
 
     // TODO: Support dilation.
-    if (!hasAllOneValues(convOp.getDilations())) return failure();
+    if (!hasAllOneValues(convOp.getDilations()))
+      return failure();
 
     Value input = convOp.getInputs()[0];
     Value filter = convOp.getInputs()[1];
@@ -228,7 +231,7 @@
 // is a batched matrix-vector product.
 class ConvertDepthwiseConv2DNhwcHwc final
     : public OpRewritePattern<linalg::DepthwiseConv2DNhwcHwcOp> {
- public:
+public:
   using OpRewritePattern<linalg::DepthwiseConv2DNhwcHwcOp>::OpRewritePattern;
 
   LogicalResult matchAndRewrite(linalg::DepthwiseConv2DNhwcHwcOp convOp,
@@ -245,7 +248,8 @@
     }
 
     // TODO: Support dilation.
-    if (!hasAllOneValues(convOp.getDilations())) return failure();
+    if (!hasAllOneValues(convOp.getDilations()))
+      return failure();
 
     auto loc = convOp.getLoc();
 
@@ -384,7 +388,7 @@
 // (i.e. (D, C x Kh x Kw) * (C x Kh x Kw, Ho x Wo))
 class ConvertConv2DNchwFchw final
     : public OpRewritePattern<linalg::Conv2DNchwFchwOp> {
- public:
+public:
   using OpRewritePattern::OpRewritePattern;
 
   LogicalResult matchAndRewrite(linalg::Conv2DNchwFchwOp convOp,
@@ -398,7 +402,8 @@
     }
 
     // TODO: Support dilation.
-    if (!hasAllOneValues(convOp.getDilations())) return failure();
+    if (!hasAllOneValues(convOp.getDilations()))
+      return failure();
 
     Value input = convOp.getInputs()[0];
     Value filter = convOp.getInputs()[1];
@@ -545,12 +550,12 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<Pass> createConvertConv2DToImg2ColPass() {
   return std::make_unique<ConvertConv2DToImg2ColPass>();
 }
 
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Preprocessing/Common/PadLinalgOps.cpp b/compiler/src/iree/compiler/Preprocessing/Common/PadLinalgOps.cpp
index 43fb017..676498a 100644
--- a/compiler/src/iree/compiler/Preprocessing/Common/PadLinalgOps.cpp
+++ b/compiler/src/iree/compiler/Preprocessing/Common/PadLinalgOps.cpp

@@ -22,7 +22,7 @@
 /// A pattern to pad statically shaped matmul operands to the next integer
 /// multiple of padSize.
 class PadMatmulOp : public OpInterfaceRewritePattern<linalg::LinalgOp> {
- public:
+public:
   PadMatmulOp(MLIRContext *context, int size, PatternBenefit benefit = 1)
       : OpInterfaceRewritePattern(context, benefit), paddingSize(size) {}
 
@@ -31,7 +31,8 @@
     Operation *op = linalgOp.getOperation();
     const bool isBatchMatmul = isa<linalg::BatchMatmulOp>(op);
     const bool isMatmul = isa<linalg::MatmulOp>(op);
-    if (!isBatchMatmul && !isMatmul) return failure();
+    if (!isBatchMatmul && !isMatmul)
+      return failure();
 
     Location loc = linalgOp.getLoc();
     Value lhs = linalgOp.getDpsInputOperand(0)->get();
@@ -42,7 +43,8 @@
     auto rhsType = llvm::dyn_cast<RankedTensorType>(rhs.getType());
     auto resultType = llvm::dyn_cast<RankedTensorType>(result.getType());
 
-    if (!lhsType || !rhsType) return failure();
+    if (!lhsType || !rhsType)
+      return failure();
 
     if (!lhsType.hasStaticShape() || !rhsType.hasStaticShape())
       return failure();
@@ -67,7 +69,8 @@
 
     auto getFullShape = [&](ArrayRef<int> dims) {
       SmallVector<int64_t, 3> shape;
-      if (isBatchMatmul) shape.push_back(B);
+      if (isBatchMatmul)
+        shape.push_back(B);
       llvm::append_range(shape, dims);
       return shape;
     };
@@ -148,12 +151,12 @@
     return success();
   }
 
- private:
+private:
   int paddingSize;
 };
 
 class PadLinalgOpsPass : public PadLinalgOpsBase<PadLinalgOpsPass> {
- public:
+public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<linalg::LinalgDialect>();
   }
@@ -168,12 +171,12 @@
   }
 };
 
-}  // namespace
+} // namespace
 
 std::unique_ptr<Pass> createPadLinalgOpsToIntegerMultiplePass() {
   return std::make_unique<PadLinalgOpsPass>();
 }
 
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Preprocessing/Common/PassDetail.h b/compiler/src/iree/compiler/Preprocessing/Common/PassDetail.h
index 707dc81..6a9fdf4 100644
--- a/compiler/src/iree/compiler/Preprocessing/Common/PassDetail.h
+++ b/compiler/src/iree/compiler/Preprocessing/Common/PassDetail.h

@@ -16,10 +16,10 @@
 namespace IREE {
 
 #define GEN_PASS_CLASSES
-#include "iree/compiler/Preprocessing/Common/Passes.h.inc"  // IWYU pragma: keep
+#include "iree/compiler/Preprocessing/Common/Passes.h.inc" // IWYU pragma: keep
 
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_PREPROCESSING_COMMON_PASS_DETAIL_H_
+#endif // IREE_COMPILER_PREPROCESSING_COMMON_PASS_DETAIL_H_

diff --git a/compiler/src/iree/compiler/Preprocessing/Common/Passes.cpp b/compiler/src/iree/compiler/Preprocessing/Common/Passes.cpp
index 759f388..50e3fb2 100644
--- a/compiler/src/iree/compiler/Preprocessing/Common/Passes.cpp
+++ b/compiler/src/iree/compiler/Preprocessing/Common/Passes.cpp

@@ -11,11 +11,11 @@
 
 namespace {
 #define GEN_PASS_REGISTRATION
-#include "iree/compiler/Preprocessing/Common/Passes.h.inc"  // IWYU pragma: export
-}  // namespace
+#include "iree/compiler/Preprocessing/Common/Passes.h.inc" // IWYU pragma: export
+} // namespace
 
 void registerCommonPreprocessingPasses() { registerPasses(); }
 
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Preprocessing/Common/Passes.h b/compiler/src/iree/compiler/Preprocessing/Common/Passes.h
index f36dae5..5b4f9f5 100644
--- a/compiler/src/iree/compiler/Preprocessing/Common/Passes.h
+++ b/compiler/src/iree/compiler/Preprocessing/Common/Passes.h

@@ -29,8 +29,8 @@
 
 void registerCommonPreprocessingPasses();
 
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_PREPROCESSING_COMMON_PASSES_H_W_TRANSFORMS_PASSES_H_
+#endif // IREE_COMPILER_PREPROCESSING_COMMON_PASSES_H_W_TRANSFORMS_PASSES_H_

diff --git a/compiler/src/iree/compiler/Preprocessing/Passes.cpp b/compiler/src/iree/compiler/Preprocessing/Passes.cpp
index 27e0132..3c28c7d 100644
--- a/compiler/src/iree/compiler/Preprocessing/Passes.cpp
+++ b/compiler/src/iree/compiler/Preprocessing/Passes.cpp

@@ -54,7 +54,7 @@
   });
 }
 
-}  // namespace
+} // namespace
 
 void buildPreprocessingPassPipeline(
     OpPassManager &passManager,
@@ -73,6 +73,6 @@
 
 void registerPreprocessingPasses() { registerCommonPreprocessingPasses(); }
 
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Preprocessing/Passes.h b/compiler/src/iree/compiler/Preprocessing/Passes.h
index 22ed1b5..2170bdc 100644
--- a/compiler/src/iree/compiler/Preprocessing/Passes.h
+++ b/compiler/src/iree/compiler/Preprocessing/Passes.h

@@ -29,8 +29,8 @@
 
 void registerPreprocessingPasses();
 
-}  // namespace IREE
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_PREPROCESSING_PASSES_H_
+#endif // IREE_COMPILER_PREPROCESSING_PASSES_H_

diff --git a/compiler/src/iree/compiler/Tools/init_compiler_modules.h b/compiler/src/iree/compiler/Tools/init_compiler_modules.h
index 090c14b..c5a83e4 100644
--- a/compiler/src/iree/compiler/Tools/init_compiler_modules.h
+++ b/compiler/src/iree/compiler/Tools/init_compiler_modules.h

@@ -19,7 +19,7 @@
   // clang-format on
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_TOOLS_INIT_COMPILER_MODULES_H_
+#endif // IREE_COMPILER_TOOLS_INIT_COMPILER_MODULES_H_

diff --git a/compiler/src/iree/compiler/Tools/init_dialects.h b/compiler/src/iree/compiler/Tools/init_dialects.h
index 804c382..62fb776 100644
--- a/compiler/src/iree/compiler/Tools/init_dialects.h
+++ b/compiler/src/iree/compiler/Tools/init_dialects.h

@@ -28,7 +28,7 @@
   mlir::iree_compiler::registerIreeCompilerModuleDialects(registry);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_TOOLS_INIT_DIALECTS_H_
+#endif // IREE_COMPILER_TOOLS_INIT_DIALECTS_H_

diff --git a/compiler/src/iree/compiler/Tools/init_input_dialects.cc b/compiler/src/iree/compiler/Tools/init_input_dialects.cc
index d7a3296..db21dde 100644
--- a/compiler/src/iree/compiler/Tools/init_input_dialects.cc
+++ b/compiler/src/iree/compiler/Tools/init_input_dialects.cc

@@ -9,13 +9,13 @@
 #ifdef IREE_HAVE_STABLEHLO_INPUT
 #include "stablehlo/dialect/ChloOps.h"
 #include "stablehlo/dialect/StablehloOps.h"
-#endif  // IREE_HAVE_STABLEHLO_INPUT
+#endif // IREE_HAVE_STABLEHLO_INPUT
 #ifdef IREE_HAVE_TORCH_INPUT
 #include "torch-mlir-dialects/Dialect/TMTensor/IR/TMTensorDialect.h"
 #endif
 #ifdef IREE_HAVE_TOSA_INPUT
 #include "mlir/Dialect/Tosa/IR/TosaOps.h"
-#endif  // IREE_HAVE_TOSA_INPUT
+#endif // IREE_HAVE_TOSA_INPUT
 
 namespace mlir {
 namespace iree_compiler {
@@ -23,14 +23,14 @@
 void registerInputDialects(DialectRegistry &registry) {
 #ifdef IREE_HAVE_STABLEHLO_INPUT
   registry.insert<mlir::chlo::ChloDialect, mlir::stablehlo::StablehloDialect>();
-#endif  // IREE_HAVE_STABLEHLO_INPUT
+#endif // IREE_HAVE_STABLEHLO_INPUT
 #ifdef IREE_HAVE_TORCH_INPUT
   registry.insert<mlir::torch::TMTensor::TMTensorDialect>();
-#endif  // IREE_HAVE_TORCH_INPUT
+#endif // IREE_HAVE_TORCH_INPUT
 #ifdef IREE_HAVE_TOSA_INPUT
   registry.insert<tosa::TosaDialect>();
-#endif  // IREE_HAVE_TOSA_INPUT
+#endif // IREE_HAVE_TOSA_INPUT
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Tools/init_input_dialects.h b/compiler/src/iree/compiler/Tools/init_input_dialects.h
index 654f46e..d4d2957 100644
--- a/compiler/src/iree/compiler/Tools/init_input_dialects.h
+++ b/compiler/src/iree/compiler/Tools/init_input_dialects.h

@@ -19,7 +19,7 @@
 
 void registerInputDialects(DialectRegistry &registry);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_TOOLS_INIT_INPUT_DIALECTS_H_
+#endif // IREE_COMPILER_TOOLS_INIT_INPUT_DIALECTS_H_

diff --git a/compiler/src/iree/compiler/Tools/init_input_passes.cc b/compiler/src/iree/compiler/Tools/init_input_passes.cc
index ab2faec..73718e5 100644
--- a/compiler/src/iree/compiler/Tools/init_input_passes.cc
+++ b/compiler/src/iree/compiler/Tools/init_input_passes.cc

@@ -10,15 +10,15 @@
 
 #ifdef IREE_HAVE_STABLEHLO_INPUT
 #include "iree/compiler/InputConversion/StableHLO/Passes.h"
-#endif  // IREE_HAVE_STABLEHLO_INPUT
+#endif // IREE_HAVE_STABLEHLO_INPUT
 #ifdef IREE_HAVE_TORCH_INPUT
 #include "iree/compiler/InputConversion/TMTensor/Passes.h"
-#endif  // IREE_HAVE_TORCH_INPUT
+#endif // IREE_HAVE_TORCH_INPUT
 #ifdef IREE_HAVE_TOSA_INPUT
 #include "iree/compiler/InputConversion/TOSA/Passes.h"
 #include "mlir/Conversion/Passes.h"
 #include "mlir/Dialect/Tosa/Transforms/Passes.h"
-#endif  // IREE_HAVE_TOSA_INPUT
+#endif // IREE_HAVE_TOSA_INPUT
 
 namespace mlir {
 namespace iree_compiler {
@@ -28,7 +28,7 @@
 
 #ifdef IREE_HAVE_STABLEHLO_INPUT
   stablehlo::registerStableHLOConversionPasses();
-#endif  // IREE_HAVE_STABLEHLO_INPUT
+#endif // IREE_HAVE_STABLEHLO_INPUT
 #ifdef IREE_HAVE_TORCH_INPUT
   TMTensor::registerTMTensorConversionPasses();
 #endif
@@ -37,8 +37,8 @@
   registerTosaToArithPass();
   registerTosaToLinalgPass();
   registerTosaToTensorPass();
-#endif  // IREE_HAVE_TOSA_INPUT
+#endif // IREE_HAVE_TOSA_INPUT
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Tools/init_input_passes.h b/compiler/src/iree/compiler/Tools/init_input_passes.h
index af3eca8..ae338c2 100644
--- a/compiler/src/iree/compiler/Tools/init_input_passes.h
+++ b/compiler/src/iree/compiler/Tools/init_input_passes.h

@@ -18,7 +18,7 @@
 // Registers IREE input conversion passes with the global registry.
 void registerInputPasses();
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_TOOLS_INIT_INPUT_PASSES_H_
+#endif // IREE_COMPILER_TOOLS_INIT_INPUT_PASSES_H_

diff --git a/compiler/src/iree/compiler/Tools/init_iree_dialects.h b/compiler/src/iree/compiler/Tools/init_iree_dialects.h
index c556c62..4dda2a2 100644
--- a/compiler/src/iree/compiler/Tools/init_iree_dialects.h
+++ b/compiler/src/iree/compiler/Tools/init_iree_dialects.h

@@ -58,7 +58,7 @@
   registerUKernelBufferizationInterface(registry);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_TOOLS_INIT_IREE_DIALECTS_H_
+#endif // IREE_COMPILER_TOOLS_INIT_IREE_DIALECTS_H_

diff --git a/compiler/src/iree/compiler/Tools/init_iree_passes.h b/compiler/src/iree/compiler/Tools/init_iree_passes.h
index 55d5962..b1930a6 100644
--- a/compiler/src/iree/compiler/Tools/init_iree_passes.h
+++ b/compiler/src/iree/compiler/Tools/init_iree_passes.h

@@ -33,7 +33,7 @@
 #ifdef IREE_HAVE_C_OUTPUT_FORMAT
 // TODO: Remove these once rolled up into explicit registration.
 #include "iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.h"
-#endif  // IREE_HAVE_C_OUTPUT_FORMAT
+#endif // IREE_HAVE_C_OUTPUT_FORMAT
 
 namespace mlir {
 namespace iree_compiler {
@@ -67,10 +67,10 @@
   // TODO: Eliminate these.
 #ifdef IREE_HAVE_C_OUTPUT_FORMAT
   IREE::VM::createConvertVMToEmitCPass();
-#endif  // IREE_HAVE_C_OUTPUT_FORMAT
+#endif // IREE_HAVE_C_OUTPUT_FORMAT
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_TOOLS_INIT_IREE_PASSES_H_
+#endif // IREE_COMPILER_TOOLS_INIT_IREE_PASSES_H_

diff --git a/compiler/src/iree/compiler/Tools/init_llvmir_translations.h b/compiler/src/iree/compiler/Tools/init_llvmir_translations.h
index 3c49753..dc83e12 100644
--- a/compiler/src/iree/compiler/Tools/init_llvmir_translations.h
+++ b/compiler/src/iree/compiler/Tools/init_llvmir_translations.h

@@ -26,7 +26,7 @@
   mlir::registerArmNeonDialectTranslation(registry);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_TOOLS_INIT_LLVMIR_TRANSLATIONS_H_
+#endif // IREE_COMPILER_TOOLS_INIT_LLVMIR_TRANSLATIONS_H_

diff --git a/compiler/src/iree/compiler/Tools/init_mlir_dialects.h b/compiler/src/iree/compiler/Tools/init_mlir_dialects.h
index 114b756..80d22d8 100644
--- a/compiler/src/iree/compiler/Tools/init_mlir_dialects.h
+++ b/compiler/src/iree/compiler/Tools/init_mlir_dialects.h

@@ -41,7 +41,7 @@
 
 #ifdef IREE_HAVE_C_OUTPUT_FORMAT
 #include "mlir/Dialect/EmitC/IR/EmitC.h"
-#endif  // IREE_HAVE_C_OUTPUT_FORMAT
+#endif // IREE_HAVE_C_OUTPUT_FORMAT
 
 namespace mlir {
 
@@ -78,9 +78,9 @@
 
 #ifdef IREE_HAVE_C_OUTPUT_FORMAT
   registry.insert<emitc::EmitCDialect>();
-#endif  // IREE_HAVE_C_OUTPUT_FORMAT
+#endif // IREE_HAVE_C_OUTPUT_FORMAT
 }
 
-}  // namespace mlir
+} // namespace mlir
 
-#endif  // IREE_COMPILER_TOOLS_INIT_MLIR_DIALECTS_H_
+#endif // IREE_COMPILER_TOOLS_INIT_MLIR_DIALECTS_H_

diff --git a/compiler/src/iree/compiler/Tools/init_mlir_passes.h b/compiler/src/iree/compiler/Tools/init_mlir_passes.h
index d1f2d18..2e8216a 100644
--- a/compiler/src/iree/compiler/Tools/init_mlir_passes.h
+++ b/compiler/src/iree/compiler/Tools/init_mlir_passes.h

@@ -80,6 +80,6 @@
   registerConvertFuncToSPIRVPass();
 }
 
-}  // namespace mlir
+} // namespace mlir
 
-#endif  // IREE_COMPILER_TOOLS_INIT_MLIR_PASSES_H_
+#endif // IREE_COMPILER_TOOLS_INIT_MLIR_PASSES_H_

diff --git a/compiler/src/iree/compiler/Tools/init_passes.h b/compiler/src/iree/compiler/Tools/init_passes.h
index 30dce53..b8ea190 100644
--- a/compiler/src/iree/compiler/Tools/init_passes.h
+++ b/compiler/src/iree/compiler/Tools/init_passes.h

@@ -29,7 +29,7 @@
   registerHALConversionPasses();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_TOOLS_INIT_PASSES_H_
+#endif // IREE_COMPILER_TOOLS_INIT_PASSES_H_

diff --git a/compiler/src/iree/compiler/Tools/init_targets.cc b/compiler/src/iree/compiler/Tools/init_targets.cc
index f9c7d13..296b0e0 100644
--- a/compiler/src/iree/compiler/Tools/init_targets.cc
+++ b/compiler/src/iree/compiler/Tools/init_targets.cc

@@ -10,22 +10,22 @@
 
 #ifdef IREE_HAVE_LLVM_CPU_TARGET
 #include "iree/compiler/Dialect/HAL/Target/LLVMCPU/LLVMCPUTarget.h"
-#endif  // IREE_HAVE_LLVM_CPU_TARGET
+#endif // IREE_HAVE_LLVM_CPU_TARGET
 #ifdef IREE_HAVE_METALSPIRV_TARGET
 #include "iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.h"
-#endif  // IREE_HAVE_METALSPIRV_TARGET
+#endif // IREE_HAVE_METALSPIRV_TARGET
 #ifdef IREE_HAVE_ROCM_TARGET
 #include "iree/compiler/Dialect/HAL/Target/ROCM/ROCMTarget.h"
-#endif  // IREE_HAVE_ROCM_TARGET
+#endif // IREE_HAVE_ROCM_TARGET
 #ifdef IREE_HAVE_VMVX_TARGET
 #include "iree/compiler/Dialect/HAL/Target/VMVX/VMVXTarget.h"
-#endif  // IREE_HAVE_VMVX_TARGET
+#endif // IREE_HAVE_VMVX_TARGET
 #ifdef IREE_HAVE_VULKANSPIRV_TARGET
 #include "iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.h"
-#endif  // IREE_HAVE_VULKANSPIRV_TARGET
+#endif // IREE_HAVE_VULKANSPIRV_TARGET
 #ifdef IREE_HAVE_WEBGPU_TARGET
 #include "iree/compiler/Dialect/HAL/Target/WebGPU/WebGPUTarget.h"
-#endif  // IREE_HAVE_WEBGPU_TARGET
+#endif // IREE_HAVE_WEBGPU_TARGET
 
 namespace mlir {
 namespace iree_compiler {
@@ -40,28 +40,28 @@
 #ifdef IREE_HAVE_LLVM_CPU_TARGET
     IREE::HAL::registerLLVMCPUTargetBackends(
         []() { return IREE::HAL::getLLVMTargetOptionsFromFlags(); });
-#endif  // IREE_HAVE_LLVM_CPU_TARGET
+#endif // IREE_HAVE_LLVM_CPU_TARGET
 #ifdef IREE_HAVE_METALSPIRV_TARGET
     IREE::HAL::registerMetalSPIRVTargetBackends();
-#endif  // IREE_HAVE_METALSPIRV_TARGET
+#endif // IREE_HAVE_METALSPIRV_TARGET
 #ifdef IREE_HAVE_ROCM_TARGET
     IREE::HAL::registerROCMTargetBackends();
-#endif  // IREE_HAVE_ROCM_TARGET
+#endif // IREE_HAVE_ROCM_TARGET
 #ifdef IREE_HAVE_VMVX_TARGET
     IREE::HAL::registerVMVXTargetBackends();
-#endif  // IREE_HAVE_VMVX_TARGET
+#endif // IREE_HAVE_VMVX_TARGET
 #ifdef IREE_HAVE_VULKANSPIRV_TARGET
     IREE::HAL::registerVulkanSPIRVTargetBackends(
         []() { return IREE::HAL::getVulkanSPIRVTargetOptionsFromFlags(); });
-#endif  // IREE_HAVE_VULKANSPIRV_TARGET
+#endif // IREE_HAVE_VULKANSPIRV_TARGET
 #ifdef IREE_HAVE_WEBGPU_TARGET
     IREE::HAL::registerWebGPUTargetBackends(
         []() { return IREE::HAL::getWebGPUTargetOptionsFromFlags(); });
-#endif  // IREE_HAVE_WEBGPU_TARGET
+#endif // IREE_HAVE_WEBGPU_TARGET
     return true;
   }();
   (void)init_once;
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Tools/init_targets.h b/compiler/src/iree/compiler/Tools/init_targets.h
index 07dcf06..20be09a 100644
--- a/compiler/src/iree/compiler/Tools/init_targets.h
+++ b/compiler/src/iree/compiler/Tools/init_targets.h

@@ -16,7 +16,7 @@
 // need.
 void registerHALTargetBackends();
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_TOOLS_INIT_TARGETS_H_
+#endif // IREE_COMPILER_TOOLS_INIT_TARGETS_H_

diff --git a/compiler/src/iree/compiler/Tools/iree_compile_lib.cc b/compiler/src/iree/compiler/Tools/iree_compile_lib.cc
index 951ea55..c0bc241 100644
--- a/compiler/src/iree/compiler/Tools/iree_compile_lib.cc
+++ b/compiler/src/iree/compiler/Tools/iree_compile_lib.cc

@@ -43,10 +43,10 @@
   hal_executable,
 };
 
-}  // namespace
+} // namespace
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
 int mlir::iree_compiler::runIreecMain(int argc, char **argv) {
   static llvm::cl::OptionCategory mainOptions("IREE Main Options");
@@ -69,7 +69,7 @@
                      "IREE VM Bytecode (default)"),
 #ifdef IREE_HAVE_C_OUTPUT_FORMAT
           clEnumValN(OutputFormat::vm_c, "vm-c", "C source module"),
-#endif  // IREE_HAVE_C_OUTPUT_FORMAT
+#endif // IREE_HAVE_C_OUTPUT_FORMAT
           clEnumValN(OutputFormat::vm_asm, "vm-asm", "IREE VM MLIR Assembly")),
       llvm::cl::init(OutputFormat::vm_bytecode), llvm::cl::cat(mainOptions));
 
@@ -185,25 +185,26 @@
     ireeCompilerInvocationSetCompileToPhase(
         r.inv, compileToPhases[static_cast<int>(compileTo.getValue())].c_str());
     ireeCompilerInvocationSetVerifyIR(r.inv, verifyIR);
-    if (!ireeCompilerInvocationParseSource(r.inv, source)) return false;
+    if (!ireeCompilerInvocationParseSource(r.inv, source))
+      return false;
 
     // Switch on compileMode to choose a pipeline to run.
     switch (compileMode) {
-      case CompileMode::std:
-        if (!ireeCompilerInvocationPipeline(r.inv, IREE_COMPILER_PIPELINE_STD))
-          return false;
-        break;
-      case CompileMode::vm:
-        break;
-      case CompileMode::hal_executable: {
-        if (!ireeCompilerInvocationPipeline(
-                r.inv, IREE_COMPILER_PIPELINE_HAL_EXECUTABLE))
-          return false;
-        break;
-      }
-      default:
-        llvm::errs() << "INTERNAL ERROR: unknown compile mode\n";
+    case CompileMode::std:
+      if (!ireeCompilerInvocationPipeline(r.inv, IREE_COMPILER_PIPELINE_STD))
         return false;
+      break;
+    case CompileMode::vm:
+      break;
+    case CompileMode::hal_executable: {
+      if (!ireeCompilerInvocationPipeline(
+              r.inv, IREE_COMPILER_PIPELINE_HAL_EXECUTABLE))
+        return false;
+      break;
+    }
+    default:
+      llvm::errs() << "INTERNAL ERROR: unknown compile mode\n";
+      return false;
     }
 
     // Ending early and just emitting IR.
@@ -218,25 +219,24 @@
     // Switch based on output format.
     iree_compiler_error_t *outputError = nullptr;
     switch (outputFormat) {
-      case OutputFormat::vm_asm:
-        outputError = ireeCompilerInvocationOutputIR(r.inv, s.output);
-        break;
-      case OutputFormat::vm_bytecode:
-        outputError = ireeCompilerInvocationOutputVMBytecode(r.inv, s.output);
-        break;
+    case OutputFormat::vm_asm:
+      outputError = ireeCompilerInvocationOutputIR(r.inv, s.output);
+      break;
+    case OutputFormat::vm_bytecode:
+      outputError = ireeCompilerInvocationOutputVMBytecode(r.inv, s.output);
+      break;
 #ifdef IREE_HAVE_C_OUTPUT_FORMAT
-      case OutputFormat::vm_c:
-        outputError = ireeCompilerInvocationOutputVMCSource(r.inv, s.output);
-        break;
-#endif  // IREE_HAVE_C_OUTPUT_FORMAT
-      case OutputFormat::hal_executable: {
-        outputError =
-            ireeCompilerInvocationOutputHALExecutable(r.inv, s.output);
-        break;
-      }
-      default:
-        llvm::errs() << "INTERNAL ERROR: Unknown output format\n";
-        return false;
+    case OutputFormat::vm_c:
+      outputError = ireeCompilerInvocationOutputVMCSource(r.inv, s.output);
+      break;
+#endif // IREE_HAVE_C_OUTPUT_FORMAT
+    case OutputFormat::hal_executable: {
+      outputError = ireeCompilerInvocationOutputHALExecutable(r.inv, s.output);
+      break;
+    }
+    default:
+      llvm::errs() << "INTERNAL ERROR: Unknown output format\n";
+      return false;
     }
 
     if (outputError) {
@@ -269,7 +269,8 @@
       return 1;
     }
   } else {
-    if (!processBuffer(s.source)) return 1;
+    if (!processBuffer(s.source))
+      return 1;
   }
 
   ireeCompilerOutputKeep(s.output);

diff --git a/compiler/src/iree/compiler/Tools/iree_compile_lib.h b/compiler/src/iree/compiler/Tools/iree_compile_lib.h
index 396b802..7956f71 100644
--- a/compiler/src/iree/compiler/Tools/iree_compile_lib.h
+++ b/compiler/src/iree/compiler/Tools/iree_compile_lib.h

@@ -12,7 +12,7 @@
 
 int runIreecMain(int argc, char **argv);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_TOOLS_IREE_COMPILE_LIB_H
\ No newline at end of file
+#endif // IREE_COMPILER_TOOLS_IREE_COMPILE_LIB_H
\ No newline at end of file

diff --git a/compiler/src/iree/compiler/Tools/version.h b/compiler/src/iree/compiler/Tools/version.h
index 20f3385..c14f86d 100644
--- a/compiler/src/iree/compiler/Tools/version.h
+++ b/compiler/src/iree/compiler/Tools/version.h

@@ -15,6 +15,6 @@
 // defined.
 std::string getIreeRevision();
 
-}  // namespace mlir::iree_compiler
+} // namespace mlir::iree_compiler
 
-#endif  // IREE_COMPILER_TOOLS_VERSION_H
+#endif // IREE_COMPILER_TOOLS_VERSION_H

diff --git a/compiler/src/iree/compiler/Utils/ConversionUtils.cpp b/compiler/src/iree/compiler/Utils/ConversionUtils.cpp
index 9e29886..55724dd 100644
--- a/compiler/src/iree/compiler/Utils/ConversionUtils.cpp
+++ b/compiler/src/iree/compiler/Utils/ConversionUtils.cpp

@@ -46,7 +46,8 @@
       illegalOps.insert(op);
     }
   });
-  if (illegalOps.empty()) return success();
+  if (illegalOps.empty())
+    return success();
   emitLegalizationErrors(op->getLoc(), illegalOps);
   return failure();
 }
@@ -60,12 +61,14 @@
 
   // Return the same attribute if it doesn't have a type.
   auto typedOldAttr = llvm::dyn_cast<TypedAttr>(oldAttr);
-  if (!typedOldAttr) return oldAttr;
+  if (!typedOldAttr)
+    return oldAttr;
 
   // Convert the attribute type - if it's the same then it's already legal.
   auto oldType = typedOldAttr.getType();
   auto newType = typeConverter.convertType(oldType);
-  if (oldType == newType) return typedOldAttr;
+  if (oldType == newType)
+    return typedOldAttr;
 
   if (auto intAttr = llvm::dyn_cast<IntegerAttr>(typedOldAttr)) {
     APInt value = intAttr.getValue();
@@ -123,5 +126,5 @@
   return oldAttr;
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Utils/ConversionUtils.h b/compiler/src/iree/compiler/Utils/ConversionUtils.h
index a2ee0c3..5a561fc 100644
--- a/compiler/src/iree/compiler/Utils/ConversionUtils.h
+++ b/compiler/src/iree/compiler/Utils/ConversionUtils.h

@@ -25,7 +25,7 @@
 Attribute convertAttribute(Location loc, Attribute oldAttr,
                            TypeConverter &typeConverter);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_UTILS_CONVERSIONUTILS_H_
+#endif // IREE_COMPILER_UTILS_CONVERSIONUTILS_H_

diff --git a/compiler/src/iree/compiler/Utils/ElementPackingUtils.cpp b/compiler/src/iree/compiler/Utils/ElementPackingUtils.cpp
index 313eedc..cb15b56 100644
--- a/compiler/src/iree/compiler/Utils/ElementPackingUtils.cpp
+++ b/compiler/src/iree/compiler/Utils/ElementPackingUtils.cpp

@@ -31,16 +31,19 @@
 Type legalizeStorageElementType(Type elementType) {
   // Only handle integers; floats in MLIR all have aligned widths (today).
   auto intType = dyn_cast<IntegerType>(elementType);
-  if (!intType) return elementType;
+  if (!intType)
+    return elementType;
 
   // For sub-byte elements, default to pack them into bytes.
   unsigned bitWidth = intType.getWidth();
-  if (needToPackSubByteElementBitWidth(bitWidth)) return elementType;
+  if (needToPackSubByteElementBitWidth(bitWidth))
+    return elementType;
 
   // Otherwise, extend them to the next power-of-two bitwidth.
   unsigned alignedBitWidth =
       IREE::Util::getRoundedElementByteWidth(intType) * 8;
-  if (alignedBitWidth == bitWidth) return elementType;
+  if (alignedBitWidth == bitWidth)
+    return elementType;
   return IntegerType::get(elementType.getContext(), alignedBitWidth,
                           intType.getSignedness());
 }
@@ -59,7 +62,8 @@
     staticCount *= IREE::Util::getRoundedElementByteWidth(alignedElementType);
   }
   for (unsigned i = 0; i < shapedType.getRank(); ++i) {
-    if (!shapedType.isDynamicDim(i)) staticCount *= shapedType.getDimSize(i);
+    if (!shapedType.isDynamicDim(i))
+      staticCount *= shapedType.getDimSize(i);
   }
 
   // Scale by dynamic dims, if present.
@@ -110,5 +114,5 @@
                                              elementBytes);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Utils/ElementPackingUtils.h b/compiler/src/iree/compiler/Utils/ElementPackingUtils.h
index 06423bec..ba3ebef 100644
--- a/compiler/src/iree/compiler/Utils/ElementPackingUtils.h
+++ b/compiler/src/iree/compiler/Utils/ElementPackingUtils.h

@@ -46,7 +46,7 @@
                                            Value linearizedIndex,
                                            OpBuilder &builder);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_UTILS_TYPEUTILS_H_
+#endif // IREE_COMPILER_UTILS_TYPEUTILS_H_

diff --git a/compiler/src/iree/compiler/Utils/FlatbufferUtils.cpp b/compiler/src/iree/compiler/Utils/FlatbufferUtils.cpp
index 2dbb505..3419c10 100644
--- a/compiler/src/iree/compiler/Utils/FlatbufferUtils.cpp
+++ b/compiler/src/iree/compiler/Utils/FlatbufferUtils.cpp

@@ -25,8 +25,8 @@
 // builder is paged. If we end up with a custom attribute type for this that
 // does not support storage uniquing then we can directly allocate and copy
 // the pages into the buffer without the extra copy.
-static SmallVector<uint8_t, 32> cloneBufferIntoContiguousBytes(
-    FlatbufferBuilder &fbb) {
+static SmallVector<uint8_t, 32>
+cloneBufferIntoContiguousBytes(FlatbufferBuilder &fbb) {
   size_t packedSize = flatcc_builder_get_buffer_size(fbb);
   SmallVector<uint8_t, 32> packedData(packedSize);
   void *result =
@@ -40,8 +40,9 @@
 
 FlatbufferBuilder::~FlatbufferBuilder() { flatcc_builder_clear(&builder); }
 
-flatbuffers_uint8_vec_ref_t FlatbufferBuilder::streamUint8Vec(
-    std::function<bool(raw_ostream &stream)> fn, size_t alignment) {
+flatbuffers_uint8_vec_ref_t
+FlatbufferBuilder::streamUint8Vec(std::function<bool(raw_ostream &stream)> fn,
+                                  size_t alignment) {
   flatcc_builder_start_vector(*this, 1, alignment, FLATBUFFERS_COUNT_MAX(1));
   raw_flatbuffer_uint8_vec_ostream stream(*this);
   if (!fn(stream)) {
@@ -123,5 +124,5 @@
   return success();
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Utils/FlatbufferUtils.h b/compiler/src/iree/compiler/Utils/FlatbufferUtils.h
index 711d4a0..baea17e 100644
--- a/compiler/src/iree/compiler/Utils/FlatbufferUtils.h
+++ b/compiler/src/iree/compiler/Utils/FlatbufferUtils.h

@@ -45,7 +45,7 @@
 //   // ... and finally capture the results as an mlir::Attribute.
 //   auto attr = builder.getBufferAttr(mlirContext);
 class FlatbufferBuilder {
- public:
+public:
   FlatbufferBuilder();
   ~FlatbufferBuilder();
 
@@ -53,7 +53,8 @@
 
   // Creates a string with the given string contents (including zeros).
   flatbuffers_string_ref_t createString(StringRef value) {
-    if (value.empty()) return 0;
+    if (value.empty())
+      return 0;
     return flatbuffers_string_create(*this, value.data(), value.size());
   }
 
@@ -63,7 +64,8 @@
     auto stringRefs = llvm::map_to_vector<8>(Range, [&](StringRef value) {
       return flatbuffers_string_create(*this, value.data(), value.size());
     });
-    if (stringRefs.empty()) return 0;
+    if (stringRefs.empty())
+      return 0;
     return flatbuffers_string_vec_create(*this, stringRefs.data(),
                                          stringRefs.size());
   }
@@ -71,7 +73,8 @@
   // Creates an offset vector with the given values. The source values will not
   // be modified.
   flatbuffers_vec_ref_t createOffsetVec(ArrayRef<flatcc_builder_ref_t> values) {
-    if (values.empty()) return 0;
+    if (values.empty())
+      return 0;
     return flatcc_builder_create_offset_vector(*this, values.data(),
                                                values.size());
   }
@@ -79,9 +82,10 @@
   // Creates an offset vector with the given values.
   // Unlike createOffsetVec this will destroy the input values array during
   // serialization but be much faster.
-  flatbuffers_vec_ref_t createOffsetVecDestructive(
-      SmallVectorImpl<flatcc_builder_ref_t> &values) {
-    if (values.empty()) return 0;
+  flatbuffers_vec_ref_t
+  createOffsetVecDestructive(SmallVectorImpl<flatcc_builder_ref_t> &values) {
+    if (values.empty())
+      return 0;
     return flatcc_builder_create_offset_vector_direct(*this, values.data(),
                                                       values.size());
   }
@@ -89,7 +93,8 @@
   // Creates an [int32] vec with the contents of the given range.
   template <typename RangeTy>
   flatbuffers_int32_vec_ref_t createInt32Vec(RangeTy &&Range) {
-    if (Range.empty()) return 0;
+    if (Range.empty())
+      return 0;
     flatbuffers_int32_vec_start(*this);
     for (int32_t v : Range) {
       flatbuffers_int32_vec_push_create(*this, v);
@@ -108,8 +113,9 @@
   //   ...
   //   my_type_uint8_vec_field_add(builder, ref);  // use vec reference
   //   ...
-  flatbuffers_uint8_vec_ref_t streamUint8Vec(
-      std::function<bool(raw_ostream &stream)> fn, size_t alignment = 16);
+  flatbuffers_uint8_vec_ref_t
+  streamUint8Vec(std::function<bool(raw_ostream &stream)> fn,
+                 size_t alignment = 16);
 
   // Captures the current contents of the flatbuffer builder and returns them
   // as a shaped `vector<SIZExi8>` dense attr. The builder is left unmodified.
@@ -145,7 +151,7 @@
                                   print_json_fn_t printJsonFn,
                                   llvm::raw_ostream &output);
 
- private:
+private:
   flatcc_builder_t builder;
 };
 
@@ -160,13 +166,13 @@
 //   stream.flush();  // *********** IMPORTANT ***********
 //   flatbuffers_uint8_vec_ref_t ref = flatbuffers_uint8_vec_end(builder);
 class raw_flatbuffer_uint8_vec_ostream : public llvm::raw_ostream {
- public:
+public:
   explicit raw_flatbuffer_uint8_vec_ostream(flatcc_builder_t *builder)
       : raw_ostream(/*unbuffered=*/true), builder(builder) {}
 
   ~raw_flatbuffer_uint8_vec_ostream() override { flush(); }
 
- private:
+private:
   void write_impl(const char *Ptr, size_t Size) override {
     flatbuffers_uint8_vec_append(builder,
                                  reinterpret_cast<const uint8_t *>(Ptr), Size);
@@ -179,7 +185,7 @@
   uint64_t pos = 0;
 };
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_UTILS_FLATBUFFERUTILS_H_
+#endif // IREE_COMPILER_UTILS_FLATBUFFERUTILS_H_

diff --git a/compiler/src/iree/compiler/Utils/IndexSet.h b/compiler/src/iree/compiler/Utils/IndexSet.h
index 168d82d..ca9b894 100644
--- a/compiler/src/iree/compiler/Utils/IndexSet.h
+++ b/compiler/src/iree/compiler/Utils/IndexSet.h

@@ -17,13 +17,14 @@
 // Simple cache for generated index values.
 // Always inserts at the location specified by the builder when constructed.
 class IndexSet {
- public:
+public:
   explicit IndexSet(Location loc, OpBuilder builder)
       : loc(loc), builder(builder) {}
 
   Value get(int64_t value) {
     auto it = memoizedIndices.find(value);
-    if (it != memoizedIndices.end()) return it->second;
+    if (it != memoizedIndices.end())
+      return it->second;
     auto memoizedValue =
         builder.create<arith::ConstantIndexOp>(loc, value).getResult();
     memoizedIndices[value] = memoizedValue;
@@ -39,13 +40,13 @@
     }
   }
 
- private:
+private:
   Location loc;
   OpBuilder builder;
   DenseMap<int64_t, Value> memoizedIndices;
 };
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_UTILS_INDEXSET_H_
+#endif // IREE_COMPILER_UTILS_INDEXSET_H_

diff --git a/compiler/src/iree/compiler/Utils/ModuleUtils.cpp b/compiler/src/iree/compiler/Utils/ModuleUtils.cpp
index a49a1fa..43c71a0 100644
--- a/compiler/src/iree/compiler/Utils/ModuleUtils.cpp
+++ b/compiler/src/iree/compiler/Utils/ModuleUtils.cpp

@@ -26,18 +26,22 @@
     // Recurse through fused locations.
     for (auto &childLoc : loc.getLocations()) {
       auto childResult = findFirstFileLoc(childLoc);
-      if (childResult) return childResult;
+      if (childResult)
+        return childResult;
     }
   } else if (auto loc = llvm::dyn_cast<CallSiteLoc>(baseLoc)) {
     // First check caller...
     auto callerResult = findFirstFileLoc(loc.getCaller());
-    if (callerResult) return callerResult;
+    if (callerResult)
+      return callerResult;
     // Then check callee...
     auto calleeResult = findFirstFileLoc(loc.getCallee());
-    if (calleeResult) return calleeResult;
+    if (calleeResult)
+      return calleeResult;
   } else if (auto loc = llvm::dyn_cast<NameLoc>(baseLoc)) {
     auto childResult = findFirstFileLoc(loc.getChildLoc());
-    if (childResult) return childResult;
+    if (childResult)
+      return childResult;
   } else if (auto loc = llvm::dyn_cast<OpaqueLoc>(baseLoc)) {
     // TODO(scotttodd): Use loc.fallbackLocation()?
   } else if (auto loc = llvm::dyn_cast<UnknownLoc>(baseLoc)) {
@@ -49,7 +53,8 @@
 
 std::string guessModuleName(mlir::ModuleOp moduleOp, StringRef defaultName) {
   std::string moduleName = moduleOp.getName().value_or("").str();
-  if (!moduleName.empty()) return moduleName;
+  if (!moduleName.empty())
+    return moduleName;
   auto loc = findFirstFileLoc(moduleOp.getLoc());
   if (loc.has_value()) {
     return sanitizeSymbolName(
@@ -100,7 +105,8 @@
 
   // Resolve conflicts and move the op.
   for (auto &sourceOp : sourceOps) {
-    if (sourceOp->hasTrait<OpTrait::IsTerminator>()) continue;
+    if (sourceOp->hasTrait<OpTrait::IsTerminator>())
+      continue;
     if (auto symbolOp = dyn_cast<SymbolOpInterface>(sourceOp)) {
       auto symbolName = symbolOp.getName();
 
@@ -165,5 +171,5 @@
   return mergeModuleInto(*sourceModuleRef, targetOp, targetBuilder);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Utils/ModuleUtils.h b/compiler/src/iree/compiler/Utils/ModuleUtils.h
index 0518a94..30a71ac 100644
--- a/compiler/src/iree/compiler/Utils/ModuleUtils.h
+++ b/compiler/src/iree/compiler/Utils/ModuleUtils.h

@@ -41,7 +41,7 @@
                                     Operation *targetOp,
                                     OpBuilder &targetBuilder);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_UTILS_MODULEUTILS_H_
+#endif // IREE_COMPILER_UTILS_MODULEUTILS_H_

diff --git a/compiler/src/iree/compiler/Utils/OptionUtils.cpp b/compiler/src/iree/compiler/Utils/OptionUtils.cpp
index 6174e67..e1fcaf3 100644
--- a/compiler/src/iree/compiler/Utils/OptionUtils.cpp
+++ b/compiler/src/iree/compiler/Utils/OptionUtils.cpp

@@ -74,12 +74,14 @@
   return success();
 }
 
-llvm::SmallVector<std::string> OptionsBinder::printArguments(
-    bool nonDefaultOnly) {
+llvm::SmallVector<std::string>
+OptionsBinder::printArguments(bool nonDefaultOnly) {
   llvm::SmallVector<std::string> values;
   for (auto &info : localOptions) {
-    if (!info.print) continue;
-    if (nonDefaultOnly && !info.isChanged()) continue;
+    if (!info.print)
+      continue;
+    if (nonDefaultOnly && !info.isChanged())
+      continue;
 
     std::string s;
     llvm::raw_string_ostream os(s);
@@ -90,8 +92,8 @@
   return values;
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
 // Parses a byte size in |value| and returns the value in |out_size|.
 //
@@ -128,8 +130,8 @@
 namespace cl {
 template class basic_parser<ByteSize>;
 template class basic_parser<PowerOf2ByteSize>;
-}  // namespace cl
-}  // namespace llvm
+} // namespace cl
+} // namespace llvm
 
 using ByteSize = llvm::cl::ByteSize;
 using PowerOf2ByteSize = llvm::cl::PowerOf2ByteSize;

diff --git a/compiler/src/iree/compiler/Utils/OptionUtils.h b/compiler/src/iree/compiler/Utils/OptionUtils.h
index 33b17a9..c3c0b82 100644
--- a/compiler/src/iree/compiler/Utils/OptionUtils.h
+++ b/compiler/src/iree/compiler/Utils/OptionUtils.h

@@ -36,7 +36,7 @@
 // options of built-in scalar types (string, ints, bool, etc) and enums. Lists
 // of built-in scalar types are also supported.
 class OptionsBinder {
- public:
+public:
   static OptionsBinder global() { return OptionsBinder(); }
 
   static OptionsBinder local() {
@@ -104,7 +104,7 @@
   // and is stable.
   llvm::SmallVector<std::string> printArguments(bool nonDefaultOnly = false);
 
- private:
+private:
   struct LocalOptionInfo {
     using ChangedCallback = std::function<bool()>;
     using PrintCallback = std::function<void(llvm::raw_ostream &)>;
@@ -183,8 +183,8 @@
 
   // List changed specialization.
   template <typename V>
-  static LocalOptionInfo::ChangedCallback makeListChangedCallback(
-      V *currentValue) {
+  static LocalOptionInfo::ChangedCallback
+  makeListChangedCallback(V *currentValue) {
     return [currentValue]() -> bool { return !currentValue->empty(); };
   }
 
@@ -199,7 +199,8 @@
     return [optionName, values](llvm::raw_ostream &os) {
       os << "--" << optionName << "=";
       for (auto it : llvm::enumerate(*values)) {
-        if (it.index() > 0) os << ",";
+        if (it.index() > 0)
+          os << ",";
         os << it.value();
       }
     };
@@ -216,26 +217,26 @@
 //   IREE_DEFINE_COMPILER_OPTION_FLAGS(DerivedTy);
 template <typename DerivedTy>
 class OptionsFromFlags {
- public:
+public:
   static DerivedTy &get();
 };
 
-#define IREE_DEFINE_COMPILER_OPTION_FLAGS(DerivedTy)                   \
-  template <>                                                          \
-  DerivedTy &mlir::iree_compiler::OptionsFromFlags<DerivedTy>::get() { \
-    struct InitializedTy : DerivedTy {                                 \
-      InitializedTy() {                                                \
-        mlir::iree_compiler::OptionsBinder binder =                    \
-            mlir::iree_compiler::OptionsBinder::global();              \
-        DerivedTy::bindOptions(binder);                                \
-      }                                                                \
-    };                                                                 \
-    static InitializedTy singleton;                                    \
-    return singleton;                                                  \
+#define IREE_DEFINE_COMPILER_OPTION_FLAGS(DerivedTy)                           \
+  template <>                                                                  \
+  DerivedTy &mlir::iree_compiler::OptionsFromFlags<DerivedTy>::get() {         \
+    struct InitializedTy : DerivedTy {                                         \
+      InitializedTy() {                                                        \
+        mlir::iree_compiler::OptionsBinder binder =                            \
+            mlir::iree_compiler::OptionsBinder::global();                      \
+        DerivedTy::bindOptions(binder);                                        \
+      }                                                                        \
+    };                                                                         \
+    static InitializedTy singleton;                                            \
+    return singleton;                                                          \
   }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
 namespace llvm {
 namespace cl {
@@ -256,7 +257,7 @@
 
 template <>
 class parser<ByteSize> : public basic_parser<ByteSize> {
- public:
+public:
   parser(Option &O) : basic_parser(O) {}
   bool parse(Option &O, StringRef ArgName, StringRef Arg, ByteSize &Val);
   StringRef getValueName() const override { return "byte size"; }
@@ -267,7 +268,7 @@
 
 template <>
 class parser<PowerOf2ByteSize> : public basic_parser<PowerOf2ByteSize> {
- public:
+public:
   parser(Option &O) : basic_parser(O) {}
   bool parse(Option &O, StringRef ArgName, StringRef Arg,
              PowerOf2ByteSize &Val);
@@ -277,7 +278,7 @@
   void anchor() override;
 };
 
-}  // namespace cl
-}  // namespace llvm
+} // namespace cl
+} // namespace llvm
 
-#endif  // IREE_COMPILER_UTILS_FLAG_UTILS_H
+#endif // IREE_COMPILER_UTILS_FLAG_UTILS_H

diff --git a/compiler/src/iree/compiler/Utils/PassUtils.cpp b/compiler/src/iree/compiler/Utils/PassUtils.cpp
index 8ac2753..9557672 100644
--- a/compiler/src/iree/compiler/Utils/PassUtils.cpp
+++ b/compiler/src/iree/compiler/Utils/PassUtils.cpp

@@ -25,5 +25,5 @@
   rootOp->setAttr("iree.fixedpoint.modified", UnitAttr::get(context));
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Utils/PassUtils.h b/compiler/src/iree/compiler/Utils/PassUtils.h
index 18cf953..8841915 100644
--- a/compiler/src/iree/compiler/Utils/PassUtils.h
+++ b/compiler/src/iree/compiler/Utils/PassUtils.h

@@ -26,7 +26,7 @@
 ///     .addPredicatedPass(enable, createMyOtherPass);
 template <typename... OpTys>
 struct MultiOpNest {
- public:
+public:
   MultiOpNest(OpPassManager &parentPm) : parentPm(parentPm) {
     addNest<0, OpTys...>();
   }
@@ -52,7 +52,7 @@
     return *this;
   }
 
- private:
+private:
   // Initialize a nest.
   template <int index, typename T, typename... Rest>
   void addNest() {
@@ -83,7 +83,7 @@
 // has been made which requires another iteration. No-op otherwise.
 void signalFixedPointModified(Operation *rootOp);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_UTILS_FUNCTIONUTILS_H_
+#endif // IREE_COMPILER_UTILS_FUNCTIONUTILS_H_

diff --git a/compiler/src/iree/compiler/Utils/PatternUtils.h b/compiler/src/iree/compiler/Utils/PatternUtils.h
index 7dcd761..b940232 100644
--- a/compiler/src/iree/compiler/Utils/PatternUtils.h
+++ b/compiler/src/iree/compiler/Utils/PatternUtils.h

@@ -65,9 +65,9 @@
     Pattern(MLIRContext *context, GenericOpRewritePattern<OpTy> f,
             PatternBenefit benefit)
         : OpConversionPattern<OpTy>(context, benefit), f(f) {}
-    LogicalResult matchAndRewrite(
-        OpTy op, typename OpTy::Adaptor adaptor,
-        ConversionPatternRewriter &rewriter) const override {
+    LogicalResult
+    matchAndRewrite(OpTy op, typename OpTy::Adaptor adaptor,
+                    ConversionPatternRewriter &rewriter) const override {
       return f(op, adaptor, rewriter);
     }
     GenericOpRewritePattern<OpTy> f;
@@ -75,7 +75,7 @@
   patterns.insert<Pattern>(context, f, benefit);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_UTILS_PATTERNUTILS_H_
+#endif // IREE_COMPILER_UTILS_PATTERNUTILS_H_

diff --git a/compiler/src/iree/compiler/Utils/StringUtils.cpp b/compiler/src/iree/compiler/Utils/StringUtils.cpp
index bfdbbee..9f65e20 100644
--- a/compiler/src/iree/compiler/Utils/StringUtils.cpp
+++ b/compiler/src/iree/compiler/Utils/StringUtils.cpp

@@ -55,5 +55,5 @@
   return result;
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Utils/StringUtils.h b/compiler/src/iree/compiler/Utils/StringUtils.h
index 8f7eddf..d1663ba 100644
--- a/compiler/src/iree/compiler/Utils/StringUtils.h
+++ b/compiler/src/iree/compiler/Utils/StringUtils.h

@@ -47,7 +47,7 @@
 //  `a$-æb` -> `a_-_b`
 std::string sanitizeFileName(StringRef name);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_UTILS_STRINGUTILS_H_
+#endif // IREE_COMPILER_UTILS_STRINGUTILS_H_

diff --git a/compiler/src/iree/compiler/Utils/ToolUtils.cpp b/compiler/src/iree/compiler/Utils/ToolUtils.cpp
index ca1548b..134c145 100644
--- a/compiler/src/iree/compiler/Utils/ToolUtils.cpp
+++ b/compiler/src/iree/compiler/Utils/ToolUtils.cpp

@@ -21,7 +21,7 @@
   return "\"" + component + "\"";
 #else
   return component;
-#endif  // _WIN32
+#endif // _WIN32
 }
 
 StringRef unescapeCommandLineComponent(StringRef component) {
@@ -29,7 +29,7 @@
   if (component.starts_with("\"") && component.ends_with("\"")) {
     return component.drop_front(1).drop_back(1);
   }
-#endif  // _WIN32
+#endif // _WIN32
   return component;
 }
 
@@ -38,7 +38,7 @@
   return toolName + ".exe";
 #else
   return toolName;
-#endif  // _WIN32
+#endif // _WIN32
 }
 
 static std::string findToolAtPath(SmallVector<std::string> normalizedToolNames,
@@ -56,8 +56,8 @@
   return "";
 }
 
-static SmallVector<std::string> normalizeToolNames(
-    SmallVector<std::string> toolNames) {
+static SmallVector<std::string>
+normalizeToolNames(SmallVector<std::string> toolNames) {
   SmallVector<std::string> normalizedToolNames;
   normalizedToolNames.reserve(toolNames.size());
   for (auto toolName : toolNames) {
@@ -126,11 +126,13 @@
 
   // Search the install or build dir.
   std::string executableDirPath = findToolFromExecutableDir(toolNames);
-  if (!executableDirPath.empty()) return executableDirPath;
+  if (!executableDirPath.empty())
+    return executableDirPath;
 
   // Currently fall back on searching the environment.
   std::string environmentPath = findToolInEnvironment(toolNames);
-  if (!environmentPath.empty()) return environmentPath;
+  if (!environmentPath.empty())
+    return environmentPath;
 
   return "";
 }
@@ -140,5 +142,5 @@
   return findTool(toolNames);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir

diff --git a/compiler/src/iree/compiler/Utils/ToolUtils.h b/compiler/src/iree/compiler/Utils/ToolUtils.h
index 8d5f5e5..e623942 100644
--- a/compiler/src/iree/compiler/Utils/ToolUtils.h
+++ b/compiler/src/iree/compiler/Utils/ToolUtils.h

@@ -40,7 +40,7 @@
 std::string findTool(SmallVector<std::string> toolNames);
 std::string findTool(std::string toolName);
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_UTILS_TOOLUTILS_H_
+#endif // IREE_COMPILER_UTILS_TOOLUTILS_H_

diff --git a/compiler/src/iree/compiler/Utils/TracingUtils.cpp b/compiler/src/iree/compiler/Utils/TracingUtils.cpp
index c09396c..a5fb865 100644
--- a/compiler/src/iree/compiler/Utils/TracingUtils.cpp
+++ b/compiler/src/iree/compiler/Utils/TracingUtils.cpp

@@ -6,7 +6,7 @@
 
 #include "iree/compiler/Utils/TracingUtils.h"
 
-#if IREE_ENABLE_COMPILER_TRACING && \
+#if IREE_ENABLE_COMPILER_TRACING &&                                            \
     IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
 
 #include "mlir/IR/SymbolTable.h"
@@ -21,7 +21,7 @@
 
 namespace {
 thread_local llvm::SmallVector<iree_zone_id_t, 8> passTraceZonesStack;
-}  // namespace
+} // namespace
 
 static void prettyPrintOpBreadcrumb(Operation *op, llvm::raw_ostream &os) {
   auto parentOp = op->getParentOp();
@@ -63,7 +63,7 @@
 
 class TraceFrameMarkBeginPass
     : public PassWrapper<TraceFrameMarkBeginPass, OperationPass<ModuleOp>> {
- public:
+public:
   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TraceFrameMarkBeginPass);
 
   TraceFrameMarkBeginPass() = default;
@@ -83,7 +83,7 @@
 
 class TraceFrameMarkEndPass
     : public PassWrapper<TraceFrameMarkEndPass, OperationPass<ModuleOp>> {
- public:
+public:
   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TraceFrameMarkEndPass);
 
   TraceFrameMarkEndPass() = default;
@@ -98,20 +98,20 @@
   llvm::StringRef name;
 };
 
-}  // namespace
+} // namespace
 
-std::unique_ptr<OperationPass<ModuleOp>> createTraceFrameMarkBeginPass(
-    llvm::StringRef name) {
+std::unique_ptr<OperationPass<ModuleOp>>
+createTraceFrameMarkBeginPass(llvm::StringRef name) {
   return std::make_unique<TraceFrameMarkBeginPass>(name);
 }
 
-std::unique_ptr<OperationPass<ModuleOp>> createTraceFrameMarkEndPass(
-    llvm::StringRef name) {
+std::unique_ptr<OperationPass<ModuleOp>>
+createTraceFrameMarkEndPass(llvm::StringRef name) {
   return std::make_unique<TraceFrameMarkEndPass>(name);
 }
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
 //===----------------------------------------------------------------------===//
 // Allocation tracking
@@ -140,11 +140,11 @@
 // Avoid potential sharp edge by making allocation tracking and sanitizers
 // mutually exclusive. They _might_ work together, but here's a warning anyway.
 #if defined(__has_feature)
-#if __has_feature(address_sanitizer) || __has_feature(memory_sanitizer) || \
+#if __has_feature(address_sanitizer) || __has_feature(memory_sanitizer) ||     \
     __has_feature(thread_sanitizer)
 #error Compiler IREE_TRACING_FEATURE_ALLOCATION_TRACKING not compatible with sanitizers
-#endif  // __has_feature(*_sanitizer)
-#endif  // defined(__has_feature)
+#endif // __has_feature(*_sanitizer)
+#endif // defined(__has_feature)
 
 #include <new>
 
@@ -245,6 +245,6 @@
   iree_aligned_free(ptr);
 }
 
-#endif  // IREE_TRACING_FEATURE_ALLOCATION_TRACKING
+#endif // IREE_TRACING_FEATURE_ALLOCATION_TRACKING
 
-#endif  // IREE_ENABLE_COMPILER_TRACING + IREE_TRACING_FEATURE_INSTRUMENTATION
+#endif // IREE_ENABLE_COMPILER_TRACING + IREE_TRACING_FEATURE_INSTRUMENTATION

diff --git a/compiler/src/iree/compiler/Utils/TracingUtils.h b/compiler/src/iree/compiler/Utils/TracingUtils.h
index 6a4498f..134a55e 100644
--- a/compiler/src/iree/compiler/Utils/TracingUtils.h
+++ b/compiler/src/iree/compiler/Utils/TracingUtils.h

@@ -23,15 +23,15 @@
   PassTracing() {}
   ~PassTracing() override = default;
 
-#if IREE_ENABLE_COMPILER_TRACING && \
+#if IREE_ENABLE_COMPILER_TRACING &&                                            \
     IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
   void runBeforePass(Pass *pass, Operation *op) override;
   void runAfterPass(Pass *pass, Operation *op) override;
   void runAfterPassFailed(Pass *pass, Operation *op) override;
-#endif  // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
 };
 
-#if IREE_ENABLE_COMPILER_TRACING && \
+#if IREE_ENABLE_COMPILER_TRACING &&                                            \
     IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
 
 enum {
@@ -43,36 +43,36 @@
 };
 
 // Fork of IREE_TRACE_MESSAGE_DYNAMIC, taking std::string (or llvm::StringRef).
-#define IREE_COMPILER_TRACE_MESSAGE_DYNAMIC(level, value_string)   \
-  ___tracy_emit_messageC(value_string.data(), value_string.size(), \
+#define IREE_COMPILER_TRACE_MESSAGE_DYNAMIC(level, value_string)               \
+  ___tracy_emit_messageC(value_string.data(), value_string.size(),             \
                          IREE_TRACING_COMPILER_MESSAGE_LEVEL_##level, 0)
 
 // Adds a pass to |passManager| that marks the beginning of a named frame.
 // * |frameName| must be a null-terminated string
 // * |frameName| must use the same underlying storage as the name used with
 //   IREE_TRACE_ADD_END_FRAME_PASS
-#define IREE_TRACE_ADD_BEGIN_FRAME_PASS(passManager, frameName) \
+#define IREE_TRACE_ADD_BEGIN_FRAME_PASS(passManager, frameName)                \
   passManager.addPass(createTraceFrameMarkBeginPass(frameName));
 
 // Adds a pass to |passManager| that marks the end of a named frame.
 // * |frameName| must be a null-terminated string
 // * |frameName| must use the same underlying storage as the name used with
 //   IREE_TRACE_ADD_BEGIN_FRAME_PASS
-#define IREE_TRACE_ADD_END_FRAME_PASS(passManager, frameName) \
+#define IREE_TRACE_ADD_END_FRAME_PASS(passManager, frameName)                  \
   passManager.addPass(createTraceFrameMarkEndPass(frameName));
 
-std::unique_ptr<OperationPass<mlir::ModuleOp>> createTraceFrameMarkBeginPass(
-    llvm::StringRef name = "");
-std::unique_ptr<OperationPass<mlir::ModuleOp>> createTraceFrameMarkEndPass(
-    llvm::StringRef name = "");
+std::unique_ptr<OperationPass<mlir::ModuleOp>>
+createTraceFrameMarkBeginPass(llvm::StringRef name = "");
+std::unique_ptr<OperationPass<mlir::ModuleOp>>
+createTraceFrameMarkEndPass(llvm::StringRef name = "");
 
 #else
 #define IREE_COMPILER_TRACE_MESSAGE_DYNAMIC(level, value_string)
 #define IREE_TRACE_ADD_BEGIN_FRAME_PASS(passManager, frameName)
 #define IREE_TRACE_ADD_END_FRAME_PASS(passManager, frameName)
-#endif  // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
 
-}  // namespace iree_compiler
-}  // namespace mlir
+} // namespace iree_compiler
+} // namespace mlir
 
-#endif  // IREE_COMPILER_UTILS_TRACINGUTILS_H_
+#endif // IREE_COMPILER_UTILS_TRACINGUTILS_H_
commit	3b652d46e86cd82cc81922a964327b12dc914428	[log] [tgz]
author	Jakub Kuderski <kubak@google.com>	Fri Jun 23 20:51:35 2023 -0400
committer	GitHub <noreply@github.com>	Fri Jun 23 20:51:35 2023 -0400
tree	8239a9d798d89aeebc01b8555110e73ea5b3f612
parent	1799e24030af4c4c990890a151d15dff36896940 [diff]