Integrate llvm/llvm-project@4f15267d (#12226)

* Reset third_party/llvm-project:
4f15267d3dd797a15901fe9352f0d5fa121b9095 (2023-02-15 16:52:25 +0100):
[libc++][NFC] Replace _LIBCPP_STD_VER > x with _LIBCPP_STD_VER >= x
* Updated to tensorflow/tensorflow@75eaca4
* Updated to tensorflow/mlir-hlo@a913e03
* Cherry picked MLIR bug fix llvm/llvm-project@3cf7f22
* Cherry-picked MLIR bug fix llvm/llvm-project@e44f405
* Used `llvm/TargetParser/Host.h` to replace `llvm/Support/Host.h`
* Used `llvm::bit_vector` to replace `llvm::PowerOf2Floor`
* Updated GPU memory space handling in converting to LLVM
* Run `python compiler/src/iree/compiler/API2/generate_exports.py`
* Fixed bufferization issue in transform dialect path

---------

Co-authored-by: Hanhan Wang <hanchung@google.com>
Co-authored-by: Thomas Raoux <thomasraoux@google.com>
Co-authored-by: Matthias Springer <springerm@google.com>
diff --git a/compiler/src/iree/compiler/API2/Internal/LLDToolEntryPoint.cpp b/compiler/src/iree/compiler/API2/Internal/LLDToolEntryPoint.cpp
index f5dd5a4..ec63010 100644
--- a/compiler/src/iree/compiler/API2/Internal/LLDToolEntryPoint.cpp
+++ b/compiler/src/iree/compiler/API2/Internal/LLDToolEntryPoint.cpp
@@ -22,11 +22,11 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/CrashRecoveryContext.h"
-#include "llvm/Support/Host.h"
 #include "llvm/Support/InitLLVM.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/PluginLoader.h"
 #include "llvm/Support/Process.h"
+#include "llvm/TargetParser/Host.h"
 #include "llvm/TargetParser/Triple.h"
 
 using namespace lld;
diff --git a/compiler/src/iree/compiler/API2/api_exports.c b/compiler/src/iree/compiler/API2/api_exports.c
index 7fd6604..c82f363 100644
--- a/compiler/src/iree/compiler/API2/api_exports.c
+++ b/compiler/src/iree/compiler/API2/api_exports.c
@@ -302,6 +302,8 @@
 extern void mlirFlatSymbolRefAttrGet();
 extern void mlirFlatSymbolRefAttrGetValue();
 extern void mlirFloat8E4M3FNTypeGet();
+extern void mlirFloat8E4M3FNUZTypeGet();
+extern void mlirFloat8E5M2FNUZTypeGet();
 extern void mlirFloat8E5M2TypeGet();
 extern void mlirFloatAttrDoubleGet();
 extern void mlirFloatAttrDoubleGetChecked();
@@ -533,7 +535,9 @@
 extern void mlirTypeIsAF32();
 extern void mlirTypeIsAF64();
 extern void mlirTypeIsAFloat8E4M3FN();
+extern void mlirTypeIsAFloat8E4M3FNUZ();
 extern void mlirTypeIsAFloat8E5M2();
+extern void mlirTypeIsAFloat8E5M2FNUZ();
 extern void mlirTypeIsAFunction();
 extern void mlirTypeIsAIndex();
 extern void mlirTypeIsAInteger();
@@ -880,6 +884,8 @@
   x += (uintptr_t)&mlirFlatSymbolRefAttrGet;
   x += (uintptr_t)&mlirFlatSymbolRefAttrGetValue;
   x += (uintptr_t)&mlirFloat8E4M3FNTypeGet;
+  x += (uintptr_t)&mlirFloat8E4M3FNUZTypeGet;
+  x += (uintptr_t)&mlirFloat8E5M2FNUZTypeGet;
   x += (uintptr_t)&mlirFloat8E5M2TypeGet;
   x += (uintptr_t)&mlirFloatAttrDoubleGet;
   x += (uintptr_t)&mlirFloatAttrDoubleGetChecked;
@@ -1111,7 +1117,9 @@
   x += (uintptr_t)&mlirTypeIsAF32;
   x += (uintptr_t)&mlirTypeIsAF64;
   x += (uintptr_t)&mlirTypeIsAFloat8E4M3FN;
+  x += (uintptr_t)&mlirTypeIsAFloat8E4M3FNUZ;
   x += (uintptr_t)&mlirTypeIsAFloat8E5M2;
+  x += (uintptr_t)&mlirTypeIsAFloat8E5M2FNUZ;
   x += (uintptr_t)&mlirTypeIsAFunction;
   x += (uintptr_t)&mlirTypeIsAIndex;
   x += (uintptr_t)&mlirTypeIsAInteger;
diff --git a/compiler/src/iree/compiler/API2/api_exports.def b/compiler/src/iree/compiler/API2/api_exports.def
index e3316c9..6e91e69 100644
--- a/compiler/src/iree/compiler/API2/api_exports.def
+++ b/compiler/src/iree/compiler/API2/api_exports.def
@@ -294,6 +294,8 @@
   mlirFlatSymbolRefAttrGet
   mlirFlatSymbolRefAttrGetValue
   mlirFloat8E4M3FNTypeGet
+  mlirFloat8E4M3FNUZTypeGet
+  mlirFloat8E5M2FNUZTypeGet
   mlirFloat8E5M2TypeGet
   mlirFloatAttrDoubleGet
   mlirFloatAttrDoubleGetChecked
@@ -525,7 +527,9 @@
   mlirTypeIsAF32
   mlirTypeIsAF64
   mlirTypeIsAFloat8E4M3FN
+  mlirTypeIsAFloat8E4M3FNUZ
   mlirTypeIsAFloat8E5M2
+  mlirTypeIsAFloat8E5M2FNUZ
   mlirTypeIsAFunction
   mlirTypeIsAIndex
   mlirTypeIsAInteger
diff --git a/compiler/src/iree/compiler/API2/api_exports.ld b/compiler/src/iree/compiler/API2/api_exports.ld
index 435d025..55522d7 100644
--- a/compiler/src/iree/compiler/API2/api_exports.ld
+++ b/compiler/src/iree/compiler/API2/api_exports.ld
@@ -295,6 +295,8 @@
     mlirFlatSymbolRefAttrGet;
     mlirFlatSymbolRefAttrGetValue;
     mlirFloat8E4M3FNTypeGet;
+    mlirFloat8E4M3FNUZTypeGet;
+    mlirFloat8E5M2FNUZTypeGet;
     mlirFloat8E5M2TypeGet;
     mlirFloatAttrDoubleGet;
     mlirFloatAttrDoubleGetChecked;
@@ -526,7 +528,9 @@
     mlirTypeIsAF32;
     mlirTypeIsAF64;
     mlirTypeIsAFloat8E4M3FN;
+    mlirTypeIsAFloat8E4M3FNUZ;
     mlirTypeIsAFloat8E5M2;
+    mlirTypeIsAFloat8E5M2FNUZ;
     mlirTypeIsAFunction;
     mlirTypeIsAIndex;
     mlirTypeIsAInteger;
diff --git a/compiler/src/iree/compiler/API2/api_exports.macos.lst b/compiler/src/iree/compiler/API2/api_exports.macos.lst
index b9786ad..40d66a0 100644
--- a/compiler/src/iree/compiler/API2/api_exports.macos.lst
+++ b/compiler/src/iree/compiler/API2/api_exports.macos.lst
@@ -293,6 +293,8 @@
 _mlirFlatSymbolRefAttrGet
 _mlirFlatSymbolRefAttrGetValue
 _mlirFloat8E4M3FNTypeGet
+_mlirFloat8E4M3FNUZTypeGet
+_mlirFloat8E5M2FNUZTypeGet
 _mlirFloat8E5M2TypeGet
 _mlirFloatAttrDoubleGet
 _mlirFloatAttrDoubleGetChecked
@@ -524,7 +526,9 @@
 _mlirTypeIsAF32
 _mlirTypeIsAF64
 _mlirTypeIsAFloat8E4M3FN
+_mlirTypeIsAFloat8E4M3FNUZ
 _mlirTypeIsAFloat8E5M2
+_mlirTypeIsAFloat8E5M2FNUZ
 _mlirTypeIsAFunction
 _mlirTypeIsAIndex
 _mlirTypeIsAInteger
diff --git a/compiler/src/iree/compiler/Codegen/Common/FoldTensorExtractOp.td b/compiler/src/iree/compiler/Codegen/Common/FoldTensorExtractOp.td
index 56429f8..705c1aa 100644
--- a/compiler/src/iree/compiler/Codegen/Common/FoldTensorExtractOp.td
+++ b/compiler/src/iree/compiler/Codegen/Common/FoldTensorExtractOp.td
@@ -14,7 +14,9 @@
 
 // Canonicalize unnecessary tensor_load when the load is used just for
 // an extract
-def : Pat<(Tensor_ExtractOp (Bufferization_ToTensorOp $value), $indices),
+def : Pat<(Tensor_ExtractOp(Bufferization_ToTensorOp $value, $restrict,
+                            $writable),
+           $indices),
           (LoadOp $value, $indices, ConstBoolAttrFalse)>;
 
 #endif // IREE_COMPILER_CODEGEN_COMMON_FOLDTENSOREXTRACTOP
diff --git a/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensions.cpp b/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensions.cpp
index b281ed8..ca7a346 100644
--- a/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensions.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensions.cpp
@@ -1054,12 +1054,7 @@
   options.memCpyFn = memCpyFn;
   options.testAnalysisOnly = getTestAnalysisOnly();
   options.printConflicts = getPrintConflicts();
-  WalkResult res = state.getTopLevel()->walk([&](ModuleOp moduleOp) {
-    if (failed(runIREEOneShotBufferize(moduleOp, options)))
-      return WalkResult::interrupt();
-    return WalkResult::advance();
-  });
-  if (res.wasInterrupted())
+  if (failed(runIREEOneShotBufferize(state.getTopLevel(), options)))
     return DiagnosedSilenceableFailure::definiteFailure();
 
   // Early exit if test_analysis_only is set.
@@ -1071,7 +1066,7 @@
   //   3. Post-bufferization passes are fine.
   PassManager pm(getContext());
   addIREEPostBufferizationPasses(pm);
-  res = state.getTopLevel()->walk([&](ModuleOp moduleOp) {
+  WalkResult res = state.getTopLevel()->walk([&](ModuleOp moduleOp) {
     if (failed(pm.run(moduleOp))) {
       getOperation()->emitError()
           << "failed to post-bufferization passes on module:\n"
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
index 4fff146..2925394 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
@@ -767,9 +767,7 @@
   // Cast to the desired memref element type.
   auto elementType = typeConverter->convertType(memRefType.getElementType());
   Value typedPtrValue = builder.create<LLVM::BitcastOp>(
-      loc,
-      LLVM::LLVMPointerType::get(elementType, memRefType.getMemorySpaceAsInt()),
-      basePtrValue);
+      loc, LLVM::LLVMPointerType::get(elementType), basePtrValue);
 
   // Construct the MemRefDescriptor type based on the information we have.
   // NOTE: we could use the binding length to clamp this/check that the
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
index 8308849..901f93f 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
@@ -374,8 +374,8 @@
     }
     // Fallback to power of 2 if there's no hint or can't find the ideal size.
     if (vectorSize <= 1 || candidateTileSize == 1) {
-      candidateTileSize =
-          std::max<int64_t>(llvm::PowerOf2Floor(targetSize), minTileSizes[i]);
+      candidateTileSize = std::max<int64_t>(
+          llvm::bit_floor<uint64_t>(targetSize), minTileSizes[i]);
     }
 
     // Limit the workload per workgroup to the default being the max to keep the
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp
index e397b48..1acc08e 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp
@@ -154,7 +154,12 @@
 
   LogicalResult matchAndRewrite(memref::AllocOp allocOp,
                                 PatternRewriter &rewriter) const override {
-    if (allocOp.getType().getMemorySpaceAsInt() != 3) return failure();
+    auto addressSpace = allocOp.getType()
+                            .getMemorySpace()
+                            .dyn_cast_or_null<gpu::AddressSpaceAttr>();
+    if (!addressSpace ||
+        addressSpace.getValue() != gpu::GPUDialect::getWorkgroupAddressSpace())
+      return failure();
     ArrayRef<int64_t> shape = allocOp.getType().getShape();
     if (llvm::any_of(shape,
                      [](int64_t dim) { return dim == ShapedType::kDynamic; })) {
@@ -266,8 +271,7 @@
     funcOp.walk([&](IREE::HAL::InterfaceBindingSubspanOp subspanOp) {
       auto memrefType = subspanOp.getType().cast<MemRefType>();
       Type elType = memrefType.getElementType();
-      auto llvmType =
-          LLVM::LLVMPointerType::get(elType, memrefType.getMemorySpaceAsInt());
+      auto llvmType = LLVM::LLVMPointerType::get(elType);
       llvmInputTypes[argMapping[SetBinding(subspanOp.getSet(),
                                            subspanOp.getBinding())]] = llvmType;
     });
@@ -388,8 +392,7 @@
           loc, llvmBufferBasei8Ptr.getType(), llvmBufferBasei8Ptr,
           adaptor.getByteOffset());
     }
-    auto llvmPtrType = LLVM::LLVMPointerType::get(
-        memrefType.getElementType(), memrefType.getMemorySpaceAsInt());
+    auto llvmPtrType = LLVM::LLVMPointerType::get(memrefType.getElementType());
     Value llvmBufferBasePtr =
         rewriter.create<LLVM::BitcastOp>(loc, llvmPtrType, llvmBufferBasei8Ptr);
     if (memrefType.hasStaticShape()) {
@@ -519,5 +522,20 @@
   return std::make_unique<TestLLVMGPULegalizeOpPass>();
 }
 
+static IntegerAttr wrapNumericMemorySpace(MLIRContext *ctx, unsigned space) {
+  return IntegerAttr::get(IntegerType::get(ctx, 64), space);
+}
+
+void populateGpuMemorySpaceAttributeConversions(
+    TypeConverter &typeConverter, const MemorySpaceMapping &mapping) {
+  typeConverter.addTypeAttributeConversion(
+      [mapping](BaseMemRefType type, gpu::AddressSpaceAttr memorySpaceAttr) {
+        gpu::AddressSpace memorySpace = memorySpaceAttr.getValue();
+        unsigned addressSpace = mapping(memorySpace);
+        return wrapNumericMemorySpace(memorySpaceAttr.getContext(),
+                                      addressSpace);
+      });
+}
+
 }  // namespace iree_compiler
 }  // namespace mlir
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.h b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.h
index e10f93a..e683869 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.h
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.h
@@ -10,6 +10,9 @@
 #include "mlir/Conversion/LLVMCommon/Pattern.h"
 
 namespace mlir {
+namespace gpu {
+enum class AddressSpace : uint32_t;
+}
 namespace iree_compiler {
 
 /// Verifies compatibility of the module for application of the LLVM
@@ -33,6 +36,11 @@
 
 void ConvertToDynamicSharedMemory(ModuleOp moduleOp);
 
+using MemorySpaceMapping =
+    std::function<unsigned(gpu::AddressSpace gpuAddressSpace)>;
+void populateGpuMemorySpaceAttributeConversions(
+    TypeConverter &typeConverter, const MemorySpaceMapping &mapping);
+
 }  // namespace iree_compiler
 }  // namespace mlir
 
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToNVVM.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToNVVM.cpp
index 9095dc6..0d4959a 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToNVVM.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToNVVM.cpp
@@ -42,13 +42,16 @@
 
   LogicalResult matchAndRewrite(memref::DeallocOp op,
                                 PatternRewriter &rewriter) const override {
-    unsigned addressSpace =
-        op.getMemref().getType().cast<MemRefType>().getMemorySpaceAsInt();
-    if (addressSpace == NVVM::NVVMMemorySpace::kSharedMemorySpace) {
-      rewriter.eraseOp(op);
-      return success();
-    }
-    return failure();
+    auto addressSpace = op.getMemref()
+                            .getType()
+                            .cast<MemRefType>()
+                            .getMemorySpace()
+                            .dyn_cast_or_null<gpu::AddressSpaceAttr>();
+    if (!addressSpace ||
+        addressSpace.getValue() != gpu::GPUDialect::getWorkgroupAddressSpace())
+      return failure();
+    rewriter.eraseOp(op);
+    return success();
   }
 };
 
@@ -72,6 +75,21 @@
     LowerToLLVMOptions options(m.getContext(), DataLayout(m));
     options.overrideIndexBitwidth(64);
     LLVMTypeConverter converter(m.getContext(), options);
+    populateGpuMemorySpaceAttributeConversions(
+        converter, [](gpu::AddressSpace space) -> unsigned {
+          switch (space) {
+            case gpu::AddressSpace::Global:
+              return static_cast<unsigned>(
+                  NVVM::NVVMMemorySpace::kGlobalMemorySpace);
+            case gpu::AddressSpace::Workgroup:
+              return static_cast<unsigned>(
+                  NVVM::NVVMMemorySpace::kSharedMemorySpace);
+            case gpu::AddressSpace::Private:
+              return 0;
+          }
+          llvm_unreachable("unknown address space enum value");
+          return 0;
+        });
     // Lowering for MMAMatrixType.
     converter.addConversion([&](gpu::MMAMatrixType type) -> Type {
       return convertMMAToLLVMType(type);
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToROCDL.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToROCDL.cpp
index 1e393a4..c9c5ce6 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToROCDL.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToROCDL.cpp
@@ -48,6 +48,19 @@
     LowerToLLVMOptions options(m.getContext(), DataLayout(m));
     options.overrideIndexBitwidth(64);
     LLVMTypeConverter converter(m.getContext(), options);
+    populateGpuMemorySpaceAttributeConversions(
+        converter, [](gpu::AddressSpace space) {
+          switch (space) {
+            case gpu::AddressSpace::Global:
+              return 1;
+            case gpu::AddressSpace::Workgroup:
+              return 3;
+            case gpu::AddressSpace::Private:
+              return 5;
+          }
+          llvm_unreachable("unknown address space enum value");
+          return 0;
+        });
     // Apply in-dialect lowering first. In-dialect lowering will replace ops
     // which need to be lowered further, which is not supported by a single
     // conversion pass.
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorToGPU.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorToGPU.cpp
index a0e1d38..39fe02e 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorToGPU.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorToGPU.cpp
@@ -68,8 +68,9 @@
       return signalPassFailure();
     }
 
+    IRRewriter rewriter(&getContext());
     if (llvmgpuUseMMASync) {
-      if (failed(convertVectorToNVVMCompatibleMMASync(funcOp))) {
+      if (failed(convertVectorToNVVMCompatibleMMASync(rewriter, funcOp))) {
         return signalPassFailure();
       }
       // Using TF32 for Float.
@@ -81,7 +82,9 @@
         return signalPassFailure();
       }
     } else {
-      convertVectorToMMAOps(funcOp);
+      if (failed(convertVectorToMMAOps(rewriter, funcOp))) {
+        return signalPassFailure();
+      }
     }
     createAsyncGroups(funcOp, llvmgpuUseMMASync);
 
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp
index 7e51dc2..b13a5b9 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp
@@ -410,7 +410,6 @@
   pm.addNestedPass<func::FuncOp>(memref::createExpandOpsPass());
   pm.addPass(memref::createExpandStridedMetadataPass());
   pm.addPass(createLowerAffinePass());
-  pm.addPass(createGPULowerMemorySpaceAttributesPass());
   // Strip out the debug info for the kernel as CUDA driver doesn't diggest PTX
   // debug info well.
   pm.addPass(createStripDebugInfoPass());
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensions.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensions.cpp
index e276361..5aa0b5a 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensions.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensions.cpp
@@ -655,10 +655,14 @@
   mlir::vector::populateCastAwayVectorLeadingOneDimPatterns(patterns);
   populatePrepareVectorToMMAPatterns(patterns, /*llvmgpuUseMMASync=*/false);
   if (failed(applyPatternsAndFoldGreedily(target, std::move(patterns)))) {
+    target->emitOpError("vector to mma preparation patterns failed to apply");
+    return emitDefaultDefiniteFailure(target);
+  }
+  IRRewriter rewriter(getContext());
+  if (failed(convertVectorToMMAOps(rewriter, target))) {
     target->emitOpError("vector to mma patterns failed to apply");
     return emitDefaultDefiniteFailure(target);
   }
-  convertVectorToMMAOps(target);
 
   results.push_back(target);
   return DiagnosedSilenceableFailure::success();
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_nvvm.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_nvvm.mlir
index c9dd5a5..93c7bad 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_nvvm.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_nvvm.mlir
@@ -201,12 +201,12 @@
       func.func @shared_memory_lowering() {
         %c0 = arith.constant 0 : index
         %cst = arith.constant dense<0.000000e+00> : vector<4xf32>
-        %0 = memref.alloc() : memref<1x16x32xf32, 3>
-        %1 = memref.alloc() : memref<1x32x16xf32, 3>
-        %2 = memref.alloc() : memref<1x8x16xf32, 3>
-        vector.store %cst, %1[%c0, %c0, %c0] : memref<1x32x16xf32, 3>, vector<4xf32>
-        vector.store %cst, %2[%c0, %c0, %c0] : memref<1x8x16xf32, 3>, vector<4xf32>
-        vector.store %cst, %0[%c0, %c0, %c0] : memref<1x16x32xf32, 3>, vector<4xf32>
+        %0 = memref.alloc() : memref<1x16x32xf32, #gpu.address_space<workgroup>>
+        %1 = memref.alloc() : memref<1x32x16xf32, #gpu.address_space<workgroup>>
+        %2 = memref.alloc() : memref<1x8x16xf32, #gpu.address_space<workgroup>>
+        vector.store %cst, %1[%c0, %c0, %c0] : memref<1x32x16xf32, #gpu.address_space<workgroup>>, vector<4xf32>
+        vector.store %cst, %2[%c0, %c0, %c0] : memref<1x8x16xf32, #gpu.address_space<workgroup>>, vector<4xf32>
+        vector.store %cst, %0[%c0, %c0, %c0] : memref<1x16x32xf32, #gpu.address_space<workgroup>>, vector<4xf32>
         return
       }
     }
@@ -246,10 +246,10 @@
         %f0 = arith.constant 0.0 : f32
         %c0 = arith.constant 0 : index
         //     CHECK: llvm.mlir.addressof @__dynamic_shared_memory__ : !llvm.ptr<array<0 x i8>, 3>
-        %0 = memref.alloc() : memref<1xf32, 3>
-        memref.store %f0, %0[%c0] : memref<1xf32, 3>
+        %0 = memref.alloc() : memref<1xf32, #gpu.address_space<workgroup>>
+        memref.store %f0, %0[%c0] : memref<1xf32, #gpu.address_space<workgroup>>
         // CHECK-NOT: free
-        memref.dealloc %0 : memref<1xf32, 3>
+        memref.dealloc %0 : memref<1xf32, #gpu.address_space<workgroup>>
         return
       }
     }
@@ -271,10 +271,10 @@
         %c0 = arith.constant 0 : index
         %cst_f32 = arith.constant 0.000000e+00 : f32
         %cst_i8 = arith.constant 0 : i8
-        %0 = memref.alloc() : memref<1xi8, 3>
-        %1 = memref.alloc() : memref<32xf32, 3>
-        memref.store %cst_i8, %0[%c0] : memref<1xi8, 3>
-        memref.store %cst_f32, %1[%c0] : memref<32xf32, 3>
+        %0 = memref.alloc() : memref<1xi8, #gpu.address_space<workgroup>>
+        %1 = memref.alloc() : memref<32xf32, #gpu.address_space<workgroup>>
+        memref.store %cst_i8, %0[%c0] : memref<1xi8, #gpu.address_space<workgroup>>
+        memref.store %cst_f32, %1[%c0] : memref<32xf32, #gpu.address_space<workgroup>>
         return
       }
     }
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/legalize.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/legalize.mlir
index d1b5b6c..a06a2c5 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/legalize.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/legalize.mlir
@@ -37,13 +37,13 @@
 
 // -----
 
-// CHECK: memref.global "private" @__shared_memory__ : memref<16x16xf32, 3>
+// CHECK: memref.global "private" @__shared_memory__ : memref<16x16xf32, #gpu.address_space<workgroup>>
 // CHECK: func.func @allocation
-// CHECK:   %[[A:.*]] = memref.get_global @__shared_memory__ : memref<16x16xf32, 3>
-// CHECK:   memref.store %{{.*}}, %[[A]][%{{.*}}, %{{.*}}] : memref<16x16xf32, 3>
+// CHECK:   %[[A:.*]] = memref.get_global @__shared_memory__ : memref<16x16xf32, #gpu.address_space<workgroup>>
+// CHECK:   memref.store %{{.*}}, %[[A]][%{{.*}}, %{{.*}}] : memref<16x16xf32, #gpu.address_space<workgroup>>
 func.func @allocation(%arg0: f32) {
-  %0 = memref.alloc() : memref<16x16xf32, 3>
+  %0 = memref.alloc() : memref<16x16xf32, #gpu.address_space<workgroup>>
   %c0 = arith.constant 0 : index
-  memref.store %arg0, %0[%c0, %c0] : memref<16x16xf32, 3>
+  memref.store %arg0, %0[%c0, %c0] : memref<16x16xf32, #gpu.address_space<workgroup>>
   return
 }
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/KernelConfig.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/KernelConfig.cpp
index e790f90..2a59748 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/KernelConfig.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/KernelConfig.cpp
@@ -386,7 +386,8 @@
                         int64_t &tileSize) {
   // Deduce the configuration for the K dimension. We need some power of two
   // here so that we can do vector load.
-  for (int64_t t = llvm::PowerOf2Floor(residualTilingFactor); t >= 2; t >>= 1) {
+  for (int64_t t = llvm::bit_floor<uint64_t>(residualTilingFactor); t >= 2;
+       t >>= 1) {
     if (dimK % t == 0) {
       tileSize = t;
       return true;
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorToGPUSubgroupMMAOps.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorToGPUSubgroupMMAOps.cpp
index dce9cf4..0d4c86e 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorToGPUSubgroupMMAOps.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorToGPUSubgroupMMAOps.cpp
@@ -40,7 +40,11 @@
       return signalPassFailure();
     }
 
-    convertVectorToMMAOps(funcOp);
+    IRRewriter rewriter(&getContext());
+    if (failed(convertVectorToMMAOps(rewriter, funcOp))) {
+      funcOp->emitError("failed conversion to GPU subgroup MMA ops");
+      return signalPassFailure();
+    }
 
     // Make sure we actually generate GPU subgroup mma ops.
     WalkResult result = funcOp.walk([](Operation* op) {
@@ -48,7 +52,7 @@
                                                 : WalkResult::advance();
     });
     if (!result.wasInterrupted()) {
-      funcOp->emitError("failed conversion to GPU subgroup MMA ops");
+      funcOp->emitError("no GPU subgroup mma compute ops generated");
       return signalPassFailure();
     }
   }
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/BUILD b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/BUILD
index ebf3702..6d7ce09 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/BUILD
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/BUILD
@@ -91,6 +91,7 @@
         "@llvm-project//llvm:Passes",
         "@llvm-project//llvm:Support",
         "@llvm-project//llvm:Target",
+        "@llvm-project//llvm:TargetParser",
         "@llvm-project//mlir:Support",
     ],
 )
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/CMakeLists.txt b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/CMakeLists.txt
index 9d4b5d6..93959e5 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/CMakeLists.txt
@@ -73,6 +73,7 @@
     LLVMPasses
     LLVMSupport
     LLVMTarget
+    LLVMTargetParser
     MLIRSupport
   PUBLIC
 )
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LLVMCPUTarget.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LLVMCPUTarget.cpp
index 42a3049..01425cf 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LLVMCPUTarget.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LLVMCPUTarget.cpp
@@ -414,7 +414,7 @@
       func.setDSOLocal(true);
       func.setLinkage(llvm::GlobalValue::LinkageTypes::InternalLinkage);
     }
-    for (auto &global : llvmModule->getGlobalList()) {
+    for (auto &global : llvmModule->globals()) {
       global.setDSOLocal(true);
       global.setLinkage(llvm::GlobalValue::LinkageTypes::InternalLinkage);
     }
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.cpp
index 1f249fa..1f86f76 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.cpp
@@ -17,8 +17,8 @@
 #include "llvm/Passes/StandardInstrumentations.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/Error.h"
-#include "llvm/Support/Host.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Host.h"
 #include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
 #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
 
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LLVMTargetOptions.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LLVMTargetOptions.cpp
index f2d8a5e..71c1113 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LLVMTargetOptions.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LLVMTargetOptions.cpp
@@ -12,8 +12,8 @@
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/MC/TargetRegistry.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Host.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/Host.h"
 #include "llvm/TargetParser/Triple.h"
 #include "llvm/TargetParser/X86TargetParser.h"
 
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/internal/AndroidLinkerTool.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/internal/AndroidLinkerTool.cpp
index 98e4b0f..216d95b 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/internal/AndroidLinkerTool.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/internal/AndroidLinkerTool.cpp
@@ -8,7 +8,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/Host.h"
+#include "llvm/TargetParser/Host.h"
 #include "llvm/TargetParser/Triple.h"
 
 #define DEBUG_TYPE "llvm-linker"
diff --git a/integrations/tensorflow/WORKSPACE b/integrations/tensorflow/WORKSPACE
index e94d707..7232e2a 100644
--- a/integrations/tensorflow/WORKSPACE
+++ b/integrations/tensorflow/WORKSPACE
@@ -7,7 +7,7 @@
 
 load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")
 
-TENSORFLOW_COMMIT = "d1c09628895840a713e92e8557b5fd90afa94c18"
+TENSORFLOW_COMMIT = "75eaca49ed62c37278113b270a3e19edab0ba26d"
 
 git_repository(
     name = "org_tensorflow",
diff --git a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/IR/StructuredTransformOpsExt.cpp b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/IR/StructuredTransformOpsExt.cpp
index 5d75f46..4fd5b1c 100644
--- a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/IR/StructuredTransformOpsExt.cpp
+++ b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/IR/StructuredTransformOpsExt.cpp
@@ -981,16 +981,16 @@
   pm.addPass(createLowerAffinePass());
   pm.addPass(createConvertSCFToCFPass());
   pm.addPass(createConvertLinalgToLLVMPass());
-  pm.addPass(createConvertVectorToLLVMPass(
-      // clang-format off
-      LowerVectorToLLVMOptions()
-        .enableReassociateFPReductions(getReassociateFpReductions())
-        .enableIndexOptimizations(getEnableIndexOptimizations())
-        .enableArmNeon(getEnableArmNeon())
-        .enableArmSVE(getEnableArmSve())
-        .enableAMX(getEnableAmx())
-        .enableX86Vector(getEnableX86vector())));
-  // clang-format on
+  {
+    auto options = ConvertVectorToLLVMPassOptions();
+    options.reassociateFPReductions = getReassociateFpReductions();
+    options.force32BitVectorIndices = getEnableIndexOptimizations();
+    options.armNeon = getEnableArmNeon();
+    options.armSVE = getEnableArmSve();
+    options.amx = getEnableAmx();
+    options.x86Vector = getEnableX86vector();
+    pm.addPass(createConvertVectorToLLVMPass(options));
+  }
   pm.addNestedPass<func::FuncOp>(createConvertMathToLLVMPass());
   pm.addPass(createFinalizeMemRefToLLVMConversionPass());
   if (getEnableAsync())
diff --git a/third_party/llvm-project b/third_party/llvm-project
index 947c8b4..66616cb 160000
--- a/third_party/llvm-project
+++ b/third_party/llvm-project
@@ -1 +1 @@
-Subproject commit 947c8b4b5869f18b1807863138271b3149ffc245
+Subproject commit 66616cb9136ed360b0b8b3545fbd64050b32c8f6
diff --git a/third_party/mlir-hlo b/third_party/mlir-hlo
index 63ac2f8..a913e03 160000
--- a/third_party/mlir-hlo
+++ b/third_party/mlir-hlo
@@ -1 +1 @@
-Subproject commit 63ac2f8f3989266b65fc3150bd25648f75e34a62
+Subproject commit a913e03964df57009a51b46ccba09b322f2ba31b