Bump LLVM to llvm/llvm-project@730f498c961f (#16286)

- Revert llvm/llvm-project@2800448 locally because it crashes many the
RISC-V compilation. After raising it to the author, they reverted it
upstream.
- Revert llvm/llvm-project@66347e51 locally because it causes
regression. It is already reverted in upstream.
- The cuda matmul regression appears in the LLVM NVPTX backend. We are
generating the same LLVM bitcode before [converting to
ISA](https://github.com/openxla/iree/blob/861815c/compiler/plugins/target/CUDA/CUDATarget.cpp#L600);
CUDA perf is not our priority; we don't have folks super familiar with
that layer and it is quite involved fixing there. So we are fine taking
the hit for now and revisit later. (maybe later it'd be fixed in
upstream..)

Additional fixes:

- Apply fixes for
https://github.com/llvm/llvm-project/commit/ce7cc723b9d51ad9c741bbaeecb5e008b2b81338
- Apply fixes for
https://github.com/llvm/llvm-project/commit/b91bba89edfb25d011e1f2366cda5dec605c87f6

---------

Co-authored-by: Diego Caballero <diegocaballero@google.com>
Co-authored-by: Lei Zhang <antiagainst@gmail.com>
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/hoist_statically_bound_allocations.mlir b/compiler/src/iree/compiler/Codegen/Common/test/hoist_statically_bound_allocations.mlir
index c3d356e..8264822 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/hoist_statically_bound_allocations.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/hoist_statically_bound_allocations.mlir
@@ -32,7 +32,8 @@
 //       CHECK:   scf.for
 //       CHECK:     %[[SIZE:.+]] = affine.min
 //       CHECK:     %[[SUBVIEW:.+]] = memref.subview %[[ALLOCA]][0] [%[[SIZE]]] [1]
-//       CHECK:     memref.store %{{.+}}, %[[SUBVIEW]]
+//       CHECK:     %[[CAST:.+]] = memref.cast %[[SUBVIEW]]
+//       CHECK:     memref.store %{{.+}}, %[[CAST]]
 
 // -----
 
@@ -54,8 +55,9 @@
 //       CHECK:   scf.for
 //       CHECK:     %[[SIZE:.+]] = affine.min
 //       CHECK:     %[[SUBVIEW:.+]] = memref.subview %[[ALLOCA]][0] [%[[SIZE]]] [1]
+//       CHECK:     %[[CAST:.+]] = memref.cast %[[SUBVIEW]]
 //       CHECK:     linalg.fill
-//  CHECK-SAME:         outs(%[[SUBVIEW]] :
+//  CHECK-SAME:         outs(%[[CAST]] :
 
 // -----
 
@@ -78,7 +80,8 @@
 //       CHECK:   scf.for
 //       CHECK:     %[[SIZE:.+]] = affine.min
 //       CHECK:     %[[SUBVIEW1:.+]] = memref.subview %[[ALLOCA]][0, 0] [%[[SIZE]], %[[SIZE]]] [1, 1]
-//       CHECK:     memref.subview %[[SUBVIEW1]]
+//       CHECK:     %[[CAST:.+]] = memref.cast %[[SUBVIEW1]]
+//       CHECK:     memref.subview %[[CAST]]
 
 // -----
 
@@ -150,6 +153,7 @@
 //       CHECK:   scf.for
 //       CHECK:     %[[SIZE:.+]] = affine.min
 //       CHECK:     %[[SUBVIEW:.+]] = memref.subview %[[ALLOC]][0] [%[[SIZE]]] [1]
+//       CHECK:     %[[CAST:.+]] = memref.cast %[[SUBVIEW]]
 //       CHECK:     linalg.fill
-//  CHECK-SAME:         outs(%[[SUBVIEW]] :
+//  CHECK-SAME:         outs(%[[CAST]] :
 //       CHECK:   memref.dealloc %[[ALLOC:.+]] : memref<16xi32>
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir b/compiler/src/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir
index 6c31315..66cb0af 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir
@@ -630,13 +630,13 @@
         %slice_size_x = flow.dispatch.workload.ordinal %cl_9, 9: index
         %source = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) : memref<?x?xi32>{%source_size_y, %source_size_x}
         %dest = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) : memref<?x?xi32>{%dest_size_y, %dest_size_x}
-        %source_subview = memref.subview %source[%source_offset_y, %source_offset_x] [%slice_size_y, %slice_size_x] [1, 1] : memref<?x?xi32> to memref<?x?xi32, strided<[?, ?], offset : ?>>
-        %dest_subview = memref.subview %dest[%dest_offset_y, %dest_offset_x] [%slice_size_y, %slice_size_x] [1, 1] : memref<?x?xi32> to memref<?x?xi32, strided<[?, ?], offset : ?>>
+        %source_subview = memref.subview %source[%source_offset_y, %source_offset_x] [%slice_size_y, %slice_size_x] [1, 1] : memref<?x?xi32> to memref<?x?xi32, strided<[?, 1], offset : ?>>
+        %dest_subview = memref.subview %dest[%dest_offset_y, %dest_offset_x] [%slice_size_y, %slice_size_x] [1, 1] : memref<?x?xi32> to memref<?x?xi32, strided<[?, 1], offset : ?>>
         linalg.generic {
             indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>],
             iterator_types = ["parallel", "parallel"]}
-            ins(%source_subview : memref<?x?xi32, strided<[?, ?], offset : ?>>)
-            outs(%dest_subview : memref<?x?xi32, strided<[?, ?], offset : ?>>)
+            ins(%source_subview : memref<?x?xi32, strided<[?, 1], offset : ?>>)
+            outs(%dest_subview : memref<?x?xi32, strided<[?, 1], offset : ?>>)
             attrs = {lowering_config = #config} {
           ^bb0(%arg0: i32, %arg1: i32):
             linalg.yield %arg0 : i32
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir
index b07c29f..d6e3a32 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir
@@ -451,11 +451,11 @@
         %o1 = hal.interface.constant.load[5] : index
         %source = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) : memref<?x?xi32>{%d0, %d1}
         %dest = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) : memref<?x?xi32>{%d2, %d3}
-        %dest_view = memref.subview %dest[%o0, %o1] [%d0, %d1] [1, 1] : memref<?x?xi32> to memref<?x?xi32,  strided<[?, ?], offset : ?>>
+        %dest_view = memref.subview %dest[%o0, %o1] [%d0, %d1] [1, 1] : memref<?x?xi32> to memref<?x?xi32,  strided<[?, 1], offset : ?>>
         linalg.generic {
             indexing_maps = [affine_map<(d0, d1) -> (d0, d1)> , affine_map<(d0, d1) -> (d0, d1)>],
             iterator_types = ["parallel", "parallel"]}
-            ins(%source : memref<?x?xi32>) outs(%dest_view : memref<?x?xi32, strided<[?, ?], offset : ?>>) {
+            ins(%source : memref<?x?xi32>) outs(%dest_view : memref<?x?xi32, strided<[?, 1], offset : ?>>) {
           ^bb0(%arg0 : i32, %arg1 : i32):
             linalg.yield %arg0 : i32
           }
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_hoist_allocs.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_hoist_allocs.mlir
index c6e3281..2c43e03 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_hoist_allocs.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_hoist_allocs.mlir
@@ -33,7 +33,7 @@
   scf.for %iv = %c0 to %arg0 step %c1 {
     %0 = affine.min #map(%iv)
     %1 = memref.alloc() : memref<16x16xi32>
-    %2 = memref.subview %1[%o0, %o1][%c1, %0][1, 1] : memref<16x16xi32> to memref<?x?xi32, strided<[?, 1], offset: ?>>
+    %2 = memref.subview %1[%o0, %o1][%c1, %0][1, 1] : memref<16x16xi32> to memref<?x?xi32, strided<[16, 1], offset: ?>>
     memref.dealloc %1 : memref<16x16xi32>
     scf.yield
   }
@@ -77,7 +77,8 @@
 //       CHECK:   scf.for
 //       CHECK:     %[[SIZE:.+]] = affine.min
 //       CHECK:     %[[SUBVIEW1:.+]] = memref.subview %[[ALLOC]][0, 0] [%[[SIZE]], %[[SIZE]]] [1, 1]
-//       CHECK:     memref.subview %[[SUBVIEW1]]
+//       CHECK:     %[[CAST:.+]] = memref.cast %[[SUBVIEW1]]
+//       CHECK:     memref.subview %[[CAST]]
 //  CHECK-NEXT:   }
 //  CHECK-NEXT:   memref.dealloc %[[ALLOC]] : memref<16x16xi32>
 
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVMapMemRefStorageClass.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVMapMemRefStorageClass.cpp
index 399398e..2c7fdbc 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVMapMemRefStorageClass.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVMapMemRefStorageClass.cpp
@@ -103,14 +103,21 @@
       return signalPassFailure();
     }
 
-    auto target = spirv::getMemorySpaceToStorageClassTarget(*context);
     spirv::MemorySpaceToStorageClassConverter converter(memorySpaceMap);
+    // Perform the replacement.
+    spirv::convertMemRefTypesAndAttrs(op, converter);
 
-    RewritePatternSet patterns(context);
-    spirv::populateMemorySpaceToStorageClassPatterns(converter, patterns);
-
-    if (failed(applyFullConversion(op, *target, std::move(patterns))))
-      return signalPassFailure();
+    // Check if there are any illegal ops remaining.
+    std::unique_ptr<ConversionTarget> target =
+        spirv::getMemorySpaceToStorageClassTarget(*context);
+    op->walk([&target, this](Operation *childOp) {
+      if (target->isIllegal(childOp)) {
+        childOp->emitOpError("failed to legalize memory space");
+        signalPassFailure();
+        return WalkResult::interrupt();
+      }
+      return WalkResult::advance();
+    });
   }
 };
 
diff --git a/compiler/src/iree/compiler/Codegen/Transforms/Transforms.cpp b/compiler/src/iree/compiler/Codegen/Transforms/Transforms.cpp
index a9348aa..32d3f4e 100644
--- a/compiler/src/iree/compiler/Codegen/Transforms/Transforms.cpp
+++ b/compiler/src/iree/compiler/Codegen/Transforms/Transforms.cpp
@@ -154,8 +154,7 @@
   {
     OpBuilder::InsertionGuard g(builder);
     builder.setInsertionPointToStart(&funcOp.getFunctionBody().front());
-    auto allocationType =
-        MemRefType::get(staticShape, allocLikeType.getElementType());
+    auto allocationType = allocLikeType.clone(staticShape);
     allocation =
         builder.create<AllocLikeOpType>(loc, allocationType, alignmentAttr);
   }
@@ -163,10 +162,17 @@
   Value subviewOp = builder.create<memref::SubViewOp>(loc, allocation, offsets,
                                                       subviewSizes, strides);
 
+  // Cast it back to the original types to prevent consumer op's verification
+  // error. It could happen when the consumer op is a memref.subview op.
+  if (subviewOp.getType() != allocLikeType) {
+    subviewOp = builder.create<memref::CastOp>(loc, allocLikeType, subviewOp);
+  }
+
   if (std::is_same<AllocLikeOpType, memref::AllocOp>::value) {
     builder.setInsertionPoint(funcOp.getFunctionBody().front().getTerminator());
     builder.create<memref::DeallocOp>(loc, allocation);
   }
+
   return subviewOp;
 }
 
diff --git a/compiler/src/iree/compiler/Codegen/VMVX/test/select_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/VMVX/test/select_lowering_strategy.mlir
index 23c520a..a330e07 100644
--- a/compiler/src/iree/compiler/Codegen/VMVX/test/select_lowering_strategy.mlir
+++ b/compiler/src/iree/compiler/Codegen/VMVX/test/select_lowering_strategy.mlir
@@ -60,11 +60,11 @@
         %o1 = hal.interface.constant.load[5] : index
         %source = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) : memref<?x?xi32>{%d0, %d1}
         %dest = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) : memref<?x?xi32>{%d2, %d3}
-        %dest_view = memref.subview %dest[%o0, %o1] [%d0, %d1] [1, 1] : memref<?x?xi32> to memref<?x?xi32, strided<[?, ?], offset : ?>>
+        %dest_view = memref.subview %dest[%o0, %o1] [%d0, %d1] [1, 1] : memref<?x?xi32> to memref<?x?xi32, strided<[?, 1], offset : ?>>
         linalg.generic {
             indexing_maps = [affine_map<(d0, d1) -> (d0, d1)> , affine_map<(d0, d1) -> (d0, d1)>],
             iterator_types = ["parallel", "parallel"]}
-            ins(%source : memref<?x?xi32>) outs(%dest_view : memref<?x?xi32, strided<[?, ?], offset : ?>>) {
+            ins(%source : memref<?x?xi32>) outs(%dest_view : memref<?x?xi32, strided<[?, 1], offset : ?>>) {
           ^bb0(%arg0 : i32, %arg1 : i32):
             linalg.yield %arg0 : i32
           }
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/foreach-thread-to-async.mlir b/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/foreach-thread-to-async.mlir
index a576420..1fb89bc 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/foreach-thread-to-async.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/foreach-thread-to-async.mlir
@@ -35,10 +35,10 @@
       %5 = affine.min #map3(%4, %arg0)
 
       %6 = memref.subview %arg2[%3] [%5] [%c1] : memref<?xf32> to memref<?xf32, strided<[?], offset:?>>
-      %7 = memref.subview %arg1[%3] [%5] [1] : memref<?xf32> to memref<?xf32, strided<[?], offset:?>>
+      %7 = memref.subview %arg1[%3] [%5] [1] : memref<?xf32> to memref<?xf32, strided<[1], offset:?>>
 
       linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel"]}
-        ins(%7 : memref<?xf32, strided<[?], offset:?>>) outs(%6 : memref<?xf32, strided<[?], offset:?>>) {
+        ins(%7 : memref<?xf32, strided<[1], offset:?>>) outs(%6 : memref<?xf32, strided<[?], offset:?>>) {
       ^bb0(%arg4: f32, %arg5: f32):  // no predecessors
         %9 = arith.mulf %arg4, %cst : f32
         linalg.yield %9 : f32
diff --git a/third_party/llvm-project b/third_party/llvm-project
index 82f5f49..0c419b2 160000
--- a/third_party/llvm-project
+++ b/third_party/llvm-project
@@ -1 +1 @@
-Subproject commit 82f5f494ece899f867088f77c70a99627a48ff3f
+Subproject commit 0c419b222b2bbfe6c180b8c6efaccf52a599c47c