Merge google -> main (#8027)

* 580800efa Synchronize submodules with LLVM at llvm/llvm-project@c5965a411c63
* c554bc99e Merge pull request #8025 from not-jenni:main-to-google
* 631e8e3cc Synchronize submodules with LLVM at llvm/llvm-project@c5965a411c63
* 28cd81b73 Integrate LLVM at llvm/llvm-project@c5965a411c63
* 01fe2ffdf Integrate LLVM at llvm/llvm-project@564bcf9d0243
* 723fe7351 Synchronize submodules with LLVM at llvm/llvm-project@b5149f4e66a4
* 82f2f2107 Merge pull request #8016 from not-jenni:main-to-google
diff --git a/SUBMODULE_VERSIONS.txt b/SUBMODULE_VERSIONS.txt
index b4fd94a..9645eb0 100644
--- a/SUBMODULE_VERSIONS.txt
+++ b/SUBMODULE_VERSIONS.txt
@@ -4,14 +4,14 @@
 aa533abfd4232b01f9e57041d70114d5a77e6de0 third_party/googletest
 88b845dee001723c4a0db1fe5477de735b6d3bb0 third_party/liburing
 f8f760f7387d2cc56a2fc7b1be313a3bf3f7f58c third_party/libyaml
-b5149f4e66a49a98b67e8e2de4e24a4af8e2781b third_party/llvm-project
-8ff74aee67ecc3778a878936d47b005a7e614067 third_party/mlir-hlo
+c5965a411c635106a47738b8d2e24db822b7416f third_party/llvm-project
+a05f8a3f45c95e0386d54909d32b04dc21122a81 third_party/mlir-hlo
 3f701faace7addc75d16dea8a6cd769fa5b3f260 third_party/musl
 59aa99860c60bd171b9565e9920f125fdb749267 third_party/pybind11
 e9cc6403341baf0edd430a4027b074d0a06b782f third_party/spirv_cross
 d53b49635b7484e86959608a65a64d8121e6a385 third_party/spirv_headers
 af1a5bc352164740c1cc1354942b1c6b72eacb8a third_party/stblib
-0de7e6d509182de06c9c03349938ff103232e09e third_party/tensorflow
+ee5994ed22c2eec04e3527241fe26ffa2f7c21f9 third_party/tensorflow
 058e89011fceca912d43638ebb6b85992147fcfe third_party/tracy
 9e62d027636cd7210f60d934f56107ed6e1579b8 third_party/vulkan_headers
 5c8b3ba955f0dbb30d18afc420f3a38adc779231 third_party/vulkan_memory_allocator
diff --git a/iree/compiler/Codegen/Common/IREEComprehensiveBufferizePass.cpp b/iree/compiler/Codegen/Common/IREEComprehensiveBufferizePass.cpp
index 31e458e..88afc4c 100644
--- a/iree/compiler/Codegen/Common/IREEComprehensiveBufferizePass.cpp
+++ b/iree/compiler/Codegen/Common/IREEComprehensiveBufferizePass.cpp
@@ -67,20 +67,14 @@
  public:
   explicit IREEComprehensiveBufferizePass(
       std::unique_ptr<linalg::comprehensive_bufferize::AllocationCallbacks>
-          allocationFn) {
-    options.allocationFns = std::move(allocationFn);
-    options.testAnalysisOnly = false;
-    addPostAnalysisTransformations(options);
-  }
+          allocationFn)
+      : allocationFn(std::move(allocationFn)) {}
 
   IREEComprehensiveBufferizePass(const IREEComprehensiveBufferizePass &other) {
-    options.allocationFns =
+    allocationFn =
         std::make_unique<linalg::comprehensive_bufferize::AllocationCallbacks>(
-            other.options.allocationFns->allocationFn,
-            other.options.allocationFns->deallocationFn,
-            other.options.allocationFns->memCpyFn);
-    options.testAnalysisOnly = other.options.testAnalysisOnly;
-    addPostAnalysisTransformations(options);
+            other.allocationFn->allocationFn,
+            other.allocationFn->deallocationFn, other.allocationFn->memCpyFn);
   }
 
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -95,7 +89,8 @@
   void runOnOperation() override;
 
  private:
-  linalg::comprehensive_bufferize::BufferizationOptions options;
+  std::unique_ptr<linalg::comprehensive_bufferize::AllocationCallbacks>
+      allocationFn;
 };
 }  // namespace
 
@@ -104,7 +99,16 @@
 /// Run comprehensive bufferize.
 void IREEComprehensiveBufferizePass::runOnOperation() {
   ModuleOp moduleOp = getOperation();
-  if (failed(runComprehensiveBufferize(moduleOp, options))) {
+  auto options =
+      std::make_unique<linalg::comprehensive_bufferize::BufferizationOptions>();
+  options->allocationFns =
+      std::make_unique<linalg::comprehensive_bufferize::AllocationCallbacks>(
+          allocationFn->allocationFn, allocationFn->deallocationFn,
+          allocationFn->memCpyFn);
+  options->testAnalysisOnly = false;
+  addPostAnalysisTransformations(*options);
+
+  if (failed(runComprehensiveBufferize(moduleOp, std::move(options)))) {
     return signalPassFailure();
   }
 }
diff --git a/iree/compiler/Codegen/Common/LinalgBufferizePass.cpp b/iree/compiler/Codegen/Common/LinalgBufferizePass.cpp
index 39e3820..f971880 100644
--- a/iree/compiler/Codegen/Common/LinalgBufferizePass.cpp
+++ b/iree/compiler/Codegen/Common/LinalgBufferizePass.cpp
@@ -386,16 +386,28 @@
     OpBuilder &b, Value value, const BufferizationPlan &plan,
     const BlockAndValueMapping &bvm,
     SmallVectorImpl<std::pair<OpOperand *, Value>> &traversedUses) {
-  Operation *user = nullptr;
+  Operation *op = value.getDefiningOp();
+  if (!op) return nullptr;
+  Operation *opParent = op->getParentOp();
+  if (!opParent) return nullptr;
   while (value.hasOneUse()) {
     OpOperand &use = *value.use_begin();
-    user = use.getOwner();
-    if (isa<IREE::Flow::DispatchTensorStoreOp, tensor::InsertSliceOp>(user)) {
+    Operation *user = use.getOwner();
+    bool isUserInSameScope = user->getParentOp() == opParent;
+    if (isUserInSameScope &&
+        isa<IREE::Flow::DispatchTensorStoreOp, tensor::InsertSliceOp>(user)) {
       return getSubviewOpForTensorStoreOp(b, user, bvm);
     }
-    value = getTiedResultForOperand(use, plan);
+    if (!isUserInSameScope && isa<scf::YieldOp>(user)) {
+      value = cast<scf::ForOp>(user->getParentOp())
+                  .getResult(use.getOperandNumber());
+    } else {
+      value = getTiedResultForOperand(use, plan);
+    }
     if (!value) return nullptr;
-    traversedUses.push_back(std::make_pair(&use, value));
+    if (isUserInSameScope) {
+      traversedUses.push_back(std::make_pair(&use, value));
+    }
     if (auto resultBuffer = bvm.lookupOrNull(value)) return resultBuffer;
   }
   return nullptr;
diff --git a/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp b/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp
index afe73fa..11a8c61 100644
--- a/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp
+++ b/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp
@@ -62,12 +62,12 @@
                          layout, memorySpace);
 }
 
-static Value getSubspanBuffer(Value tensor, OpBuilder &b,
+static Value getSubspanBuffer(Value tensor, RewriterBase &rewriter,
                               BufferizationState &state) {
   FlowBufferizationState &flowState = getFlowBufferizationState(state);
 
   if (!flowState.subspan_to_buffer.count(tensor)) {
-    OpBuilder::InsertionGuard g(b);
+    OpBuilder::InsertionGuard g(rewriter);
     auto subspanOp =
         tensor.getDefiningOp<IREE::HAL::InterfaceBindingSubspanOp>();
     assert(subspanOp && "expected LoadOp/StoreOp source/target is SubspanOp");
@@ -77,10 +77,10 @@
                           .dyn_cast<IREE::Flow::DispatchTensorType>();
     assert(shapedType && shapedType.hasRank());
 
-    b.setInsertionPoint(subspanOp);
+    rewriter.setInsertionPoint(subspanOp);
     // Just change the result type of the InterfaceBindingSubspanOp.
     auto memRefType = getMemrefTypeForTensor(shapedType);
-    Value baseBuffer = b.create<IREE::HAL::InterfaceBindingSubspanOp>(
+    Value baseBuffer = rewriter.create<IREE::HAL::InterfaceBindingSubspanOp>(
         subspanOp->getLoc(), memRefType, subspanOp.set(), subspanOp.binding(),
         subspanOp.type(), subspanOp.byte_offset(), subspanOp.dynamic_dims(),
         subspanOp.alignmentAttr());
@@ -107,16 +107,14 @@
     return shapedType.getAccess() != IREE::Flow::TensorAccess::ReadOnly;
   }
 
-  LogicalResult bufferize(Operation *op, OpBuilder &b,
+  LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
                           BufferizationState &state) const {
-    OpBuilder::InsertionGuard g(b);
-    b.setInsertionPoint(op);
     auto loadOp = cast<IREE::Flow::DispatchTensorLoadOp>(op);
-    Value source = getSubspanBuffer(loadOp.source(), b, state);
+    Value source = getSubspanBuffer(loadOp.source(), rewriter, state);
 
     // Bufferize to subview.
     state.replaceOpWithNewOp<memref::SubViewOp>(
-        b, op, source, loadOp.getMixedOffsets(), loadOp.getMixedSizes(),
+        rewriter, op, source, loadOp.getMixedOffsets(), loadOp.getMixedSizes(),
         loadOp.getMixedStrides());
 
     return success();
@@ -176,10 +174,8 @@
     return OpResult();
   }
 
-  LogicalResult bufferize(Operation *op, OpBuilder &b,
+  LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
                           BufferizationState &state) const {
-    OpBuilder::InsertionGuard g(b);
-    b.setInsertionPoint(op);
     auto storeOp = cast<IREE::Flow::DispatchTensorStoreOp>(op);
     auto &flowState = getFlowBufferizationState(state);
 
@@ -187,15 +183,15 @@
     // target buffer already.
     bool needCopy = !flowState.store_ops_without_copy.contains(op);
     if (needCopy) {
-      Value target = getSubspanBuffer(storeOp.target(), b, state);
-      Value subView = b.create<memref::SubViewOp>(
+      Value target = getSubspanBuffer(storeOp.target(), rewriter, state);
+      Value subView = rewriter.create<memref::SubViewOp>(
           storeOp->getLoc(), target, storeOp.getMixedOffsets(),
           storeOp.getMixedSizes(), storeOp.getMixedStrides());
-      Value srcMemref = state.lookupBuffer(storeOp.value());
-      state.createMemCpy(b, storeOp->getLoc(), srcMemref, subView);
+      Value srcMemref = state.lookupBuffer(rewriter, storeOp.value());
+      state.createMemCpy(rewriter, storeOp->getLoc(), srcMemref, subView);
     }
 
-    storeOp.erase();
+    rewriter.eraseOp(storeOp);
     return success();
   }
 };
diff --git a/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp b/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp
index 2415004..31d8632 100644
--- a/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp
+++ b/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp
@@ -601,9 +601,10 @@
           op,
           "failed to convert interface.binding.subspan result to memref type");
     }
-    auto memRefDesc = abi.loadBinding(
-        op->getLoc(), newOperands.binding().getInt(), newOperands.byte_offset(),
-        memRefType, newOperands.dynamic_dims(), rewriter);
+    auto memRefDesc =
+        abi.loadBinding(op->getLoc(), newOperands.bindingAttr().getInt(),
+                        newOperands.byte_offset(), memRefType,
+                        newOperands.dynamic_dims(), rewriter);
     rewriter.replaceOp(op, {memRefDesc});
     return success();
   }
diff --git a/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileFuseAndVectorizeLinalgTensorOps.cpp b/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileFuseAndVectorizeLinalgTensorOps.cpp
index 84cce2a..1207064 100644
--- a/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileFuseAndVectorizeLinalgTensorOps.cpp
+++ b/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileFuseAndVectorizeLinalgTensorOps.cpp
@@ -215,10 +215,11 @@
   }
 
   funcOp.walk([&](linalg::ContractionOpInterface op) {
-    if (failed(linalg::vectorizeLinalgOpPrecondition(op))) {
+    if (cast<linalg::LinalgOp>(op.getOperation()).hasDynamicShape()) {
       lowerToVectors = false;
     }
   });
+
   if (!lowerToVectors) {
     // Apply second level of tiling patterns if they are not vectorizable. This
     // will trigger LLVM auto-vectorization, which gains better performance.
diff --git a/iree/compiler/Codegen/LLVMCPU/test/tile_fuse_and_vectorize.mlir b/iree/compiler/Codegen/LLVMCPU/test/tile_fuse_and_vectorize.mlir
index b2e297e..bf95e8a 100644
--- a/iree/compiler/Codegen/LLVMCPU/test/tile_fuse_and_vectorize.mlir
+++ b/iree/compiler/Codegen/LLVMCPU/test/tile_fuse_and_vectorize.mlir
@@ -43,8 +43,8 @@
 //      CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 * 64)>
 //      CHECK: func @dot_384x512x128_dispatch_0() {
 //  CHECK-DAG: %[[CST:.+]] = arith.constant 0.000000e+00 : f32
-//  CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
 //  CHECK-DAG: %[[CST_VECTOR:.+]] = arith.constant dense<0.000000e+00> : vector<16x16xf32>
+//  CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
 //  CHECK-DAG: %[[C384:.+]] = arith.constant 384 : index
 //  CHECK-DAG: %[[C512:.+]] = arith.constant 512 : index
 //  CHECK-DAG: %[[C128:.+]] = arith.constant 128 : index
diff --git a/iree/compiler/Dialect/Flow/Transforms/InterchangeGenericOps.cpp b/iree/compiler/Dialect/Flow/Transforms/InterchangeGenericOps.cpp
index d96aec4..55b10cb 100644
--- a/iree/compiler/Dialect/Flow/Transforms/InterchangeGenericOps.cpp
+++ b/iree/compiler/Dialect/Flow/Transforms/InterchangeGenericOps.cpp
@@ -47,10 +47,7 @@
         interchange.push_back(iter.index());
       }
     }
-    rewriter.updateRootInPlace(genericOp, [&]() {
-      interchangeGenericOp(rewriter, genericOp, interchange);
-    });
-    return success();
+    return interchangeGenericOp(rewriter, genericOp, interchange);
   }
 };
 
diff --git a/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/ConvertStreamToHAL.cpp b/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/ConvertStreamToHAL.cpp
index d7e2f4a..06aa3fc 100644
--- a/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/ConvertStreamToHAL.cpp
+++ b/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/ConvertStreamToHAL.cpp
@@ -536,8 +536,7 @@
     }
 
     auto loc = exportOp.getLoc();
-    auto tensorType =
-        adaptor.source_encoding().getValue().cast<RankedTensorType>();
+    auto tensorType = adaptor.source_encoding().cast<RankedTensorType>();
     auto dynamicDims = adaptor.source_encoding_dims();
 
     // NOTE: we should have verified supported encodings/types at entry into the
diff --git a/iree/compiler/Dialect/Stream/Conversion/FlowToStream/ConvertFlowToStream.cpp b/iree/compiler/Dialect/Stream/Conversion/FlowToStream/ConvertFlowToStream.cpp
index 1325b69..1725c45 100644
--- a/iree/compiler/Dialect/Stream/Conversion/FlowToStream/ConvertFlowToStream.cpp
+++ b/iree/compiler/Dialect/Stream/Conversion/FlowToStream/ConvertFlowToStream.cpp
@@ -280,7 +280,7 @@
     rewriter.replaceOpWithNewOp<IREE::Stream::AsyncDispatchOp>(
         op, resultTypes, adaptor.workgroup_count(), adaptor.entry_point(),
         dispatchOperands, dispatchOperandSizes, resultSizes,
-        adaptor.tied_operands(),
+        adaptor.tied_operandsAttr(),
         /*affinity=*/nullptr);
     return success();
   }
diff --git a/iree/compiler/Dialect/Stream/Conversion/UtilToStream/ConvertUtilToStream.cpp b/iree/compiler/Dialect/Stream/Conversion/UtilToStream/ConvertUtilToStream.cpp
index e50a0f8..22c6cdf 100644
--- a/iree/compiler/Dialect/Stream/Conversion/UtilToStream/ConvertUtilToStream.cpp
+++ b/iree/compiler/Dialect/Stream/Conversion/UtilToStream/ConvertUtilToStream.cpp
@@ -149,8 +149,7 @@
       ConversionPatternRewriter &rewriter) const override {
     // Only apply to expanded types (tensors/etc).
     if (!isExpandedType(loadOp.getType())) return failure();
-    auto &expandedGlobal =
-        expansionState->globalMap[adaptor.global().getValue()];
+    auto &expandedGlobal = expansionState->globalMap[adaptor.global()];
 
     // Insert a load/transfer to the unknown resource lifetime.
     auto unknownType = IREE::Stream::ResourceType::get(rewriter.getContext());
@@ -181,8 +180,7 @@
       ConversionPatternRewriter &rewriter) const override {
     // Only apply to expanded types (tensors/etc).
     if (!isExpandedType(storeOp.value().getType())) return failure();
-    auto &expandedGlobal =
-        expansionState->globalMap[adaptor.global().getValue()];
+    auto &expandedGlobal = expansionState->globalMap[adaptor.global()];
 
     // Insert a transfer/store to the global with unknown lifetime. Lifetime
     // refinement will make this go away if possible.
diff --git a/iree/compiler/Dialect/VM/Conversion/StandardToVM/ConvertStandardToVM.cpp b/iree/compiler/Dialect/VM/Conversion/StandardToVM/ConvertStandardToVM.cpp
index a1d28f0..238b277 100644
--- a/iree/compiler/Dialect/VM/Conversion/StandardToVM/ConvertStandardToVM.cpp
+++ b/iree/compiler/Dialect/VM/Conversion/StandardToVM/ConvertStandardToVM.cpp
@@ -732,8 +732,8 @@
     auto invertedCondition = rewriter.createOrFold<IREE::VM::XorI32Op>(
         srcOp.getLoc(), adaptor.getArg().getType(), adaptor.getArg(),
         rewriter.createOrFold<IREE::VM::ConstI32Op>(srcOp.getLoc(), 1));
-    rewriter.replaceOpWithNewOp<IREE::VM::CondFailOp>(
-        srcOp, invertedCondition, status, adaptor.getMsg().getValue());
+    rewriter.replaceOpWithNewOp<IREE::VM::CondFailOp>(srcOp, invertedCondition,
+                                                      status, adaptor.getMsg());
     return success();
   }
 };
diff --git a/iree/compiler/InputConversion/MHLO/BroadcastingToLinalgPatterns.cpp b/iree/compiler/InputConversion/MHLO/BroadcastingToLinalgPatterns.cpp
index 9f29da7..88ebfaa 100644
--- a/iree/compiler/InputConversion/MHLO/BroadcastingToLinalgPatterns.cpp
+++ b/iree/compiler/InputConversion/MHLO/BroadcastingToLinalgPatterns.cpp
@@ -428,7 +428,7 @@
     chlo::BroadcastCompareOpAdaptor adaptor(operands, op->getAttrDictionary());
     return builder.create<mhlo::CompareOp>(
         loc, resultType, broadcastValues.first, broadcastValues.second,
-        adaptor.comparison_direction(), adaptor.compare_type());
+        adaptor.comparison_direction(), adaptor.compare_typeAttr());
   }
 };
 
diff --git a/llvm-external-projects/iree-dialects/lib/Dialect/PyDM/Transforms/ToIREE/LoweringPatterns.cpp b/llvm-external-projects/iree-dialects/lib/Dialect/PyDM/Transforms/ToIREE/LoweringPatterns.cpp
index 7bac2c6..37a6f9a 100644
--- a/llvm-external-projects/iree-dialects/lib/Dialect/PyDM/Transforms/ToIREE/LoweringPatterns.cpp
+++ b/llvm-external-projects/iree-dialects/lib/Dialect/PyDM/Transforms/ToIREE/LoweringPatterns.cpp
@@ -211,7 +211,7 @@
     if (auto pyIntegerType = pyLeftType.dyn_cast<PYDM::IntegerType>()) {
       bool isSigned = pyIntegerType.isSigned();
       Value converted =
-          convertIntegerOp(srcOp.getLoc(), adaptor.dunder_name().getValue(),
+          convertIntegerOp(srcOp.getLoc(), adaptor.dunder_name(),
                            adaptor.left(), adaptor.right(), isSigned, rewriter);
       if (!converted)
         return rewriter.notifyMatchFailure(srcOp, "unsupported operation");
@@ -219,8 +219,8 @@
       return success();
     } else if (leftType.isa<mlir::FloatType>()) {
       Value converted =
-          convertFloatOp(srcOp.getLoc(), adaptor.dunder_name().getValue(),
-                         adaptor.left(), adaptor.right(), rewriter);
+          convertFloatOp(srcOp.getLoc(), adaptor.dunder_name(), adaptor.left(),
+                         adaptor.right(), rewriter);
       if (!converted)
         return rewriter.notifyMatchFailure(srcOp, "unsupported operation");
       rewriter.replaceOp(srcOp, converted);
@@ -285,7 +285,7 @@
     }
     if (leftType.isa<mlir::IntegerType>()) {
       bool isSigned = true;  // TODO: Unsigned.
-      auto predicate = convertIntegerComparePredicate(adaptor.dunder_name(),
+      auto predicate = convertIntegerComparePredicate(adaptor.dunder_nameAttr(),
                                                       isSigned, rewriter);
       if (!predicate)
         return rewriter.notifyMatchFailure(srcOp, "unsupported predicate");
@@ -294,7 +294,7 @@
       return success();
     } else if (leftType.isa<mlir::FloatType>()) {
       auto predicate =
-          convertFpComparePredicate(adaptor.dunder_name(), rewriter);
+          convertFpComparePredicate(adaptor.dunder_nameAttr(), rewriter);
       if (!predicate)
         return rewriter.notifyMatchFailure(srcOp, "unsupported predicate");
       rewriter.replaceOpWithNewOp<arith::CmpFOp>(
diff --git a/third_party/llvm-project b/third_party/llvm-project
index b5149f4..c5965a4 160000
--- a/third_party/llvm-project
+++ b/third_party/llvm-project
@@ -1 +1 @@
-Subproject commit b5149f4e66a49a98b67e8e2de4e24a4af8e2781b
+Subproject commit c5965a411c635106a47738b8d2e24db822b7416f
diff --git a/third_party/mlir-hlo b/third_party/mlir-hlo
index 8ff74ae..a05f8a3 160000
--- a/third_party/mlir-hlo
+++ b/third_party/mlir-hlo
@@ -1 +1 @@
-Subproject commit 8ff74aee67ecc3778a878936d47b005a7e614067
+Subproject commit a05f8a3f45c95e0386d54909d32b04dc21122a81
diff --git a/third_party/tensorflow b/third_party/tensorflow
index 0de7e6d..ee5994e 160000
--- a/third_party/tensorflow
+++ b/third_party/tensorflow
@@ -1 +1 @@
-Subproject commit 0de7e6d509182de06c9c03349938ff103232e09e
+Subproject commit ee5994ed22c2eec04e3527241fe26ffa2f7c21f9