Bump LLVM to llvm/llvm-project@730f498c961f (#16286) - Revert llvm/llvm-project@2800448 locally because it crashes many the RISC-V compilation. After raising it to the author, they reverted it upstream. - Revert llvm/llvm-project@66347e51 locally because it causes regression. It is already reverted in upstream. - The cuda matmul regression appears in the LLVM NVPTX backend. We are generating the same LLVM bitcode before [converting to ISA](https://github.com/openxla/iree/blob/861815c/compiler/plugins/target/CUDA/CUDATarget.cpp#L600); CUDA perf is not our priority; we don't have folks super familiar with that layer and it is quite involved fixing there. So we are fine taking the hit for now and revisit later. (maybe later it'd be fixed in upstream..) Additional fixes: - Apply fixes for https://github.com/llvm/llvm-project/commit/ce7cc723b9d51ad9c741bbaeecb5e008b2b81338 - Apply fixes for https://github.com/llvm/llvm-project/commit/b91bba89edfb25d011e1f2366cda5dec605c87f6 --------- Co-authored-by: Diego Caballero <diegocaballero@google.com> Co-authored-by: Lei Zhang <antiagainst@gmail.com>

commit: af387d39d2dd553d03943c6a698cc15b6a8fc483 [log] [tgz]
author: Han-Chung Wang <hanhan0912@gmail.com> Mon Feb 05 09:55:03 2024 -0800
committer: GitHub <noreply@github.com> Mon Feb 05 09:55:03 2024 -0800
tree: b0cbe6dd1508c8c1214eb98849077b69d435c3ac
parent: dd5f707446cb0033bf7c96daa5997ac4e8aaaf7c [diff]
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/hoist_statically_bound_allocations.mlir b/compiler/src/iree/compiler/Codegen/Common/test/hoist_statically_bound_allocations.mlir
index c3d356e..8264822 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/hoist_statically_bound_allocations.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/hoist_statically_bound_allocations.mlir

@@ -32,7 +32,8 @@
 //       CHECK:   scf.for
 //       CHECK:     %[[SIZE:.+]] = affine.min
 //       CHECK:     %[[SUBVIEW:.+]] = memref.subview %[[ALLOCA]][0] [%[[SIZE]]] [1]
-//       CHECK:     memref.store %{{.+}}, %[[SUBVIEW]]
+//       CHECK:     %[[CAST:.+]] = memref.cast %[[SUBVIEW]]
+//       CHECK:     memref.store %{{.+}}, %[[CAST]]
 
 // -----
 
@@ -54,8 +55,9 @@
 //       CHECK:   scf.for
 //       CHECK:     %[[SIZE:.+]] = affine.min
 //       CHECK:     %[[SUBVIEW:.+]] = memref.subview %[[ALLOCA]][0] [%[[SIZE]]] [1]
+//       CHECK:     %[[CAST:.+]] = memref.cast %[[SUBVIEW]]
 //       CHECK:     linalg.fill
-//  CHECK-SAME:         outs(%[[SUBVIEW]] :
+//  CHECK-SAME:         outs(%[[CAST]] :
 
 // -----
 
@@ -78,7 +80,8 @@
 //       CHECK:   scf.for
 //       CHECK:     %[[SIZE:.+]] = affine.min
 //       CHECK:     %[[SUBVIEW1:.+]] = memref.subview %[[ALLOCA]][0, 0] [%[[SIZE]], %[[SIZE]]] [1, 1]
-//       CHECK:     memref.subview %[[SUBVIEW1]]
+//       CHECK:     %[[CAST:.+]] = memref.cast %[[SUBVIEW1]]
+//       CHECK:     memref.subview %[[CAST]]
 
 // -----
 
@@ -150,6 +153,7 @@
 //       CHECK:   scf.for
 //       CHECK:     %[[SIZE:.+]] = affine.min
 //       CHECK:     %[[SUBVIEW:.+]] = memref.subview %[[ALLOC]][0] [%[[SIZE]]] [1]
+//       CHECK:     %[[CAST:.+]] = memref.cast %[[SUBVIEW]]
 //       CHECK:     linalg.fill
-//  CHECK-SAME:         outs(%[[SUBVIEW]] :
+//  CHECK-SAME:         outs(%[[CAST]] :
 //       CHECK:   memref.dealloc %[[ALLOC:.+]] : memref<16xi32>

diff --git a/compiler/src/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir b/compiler/src/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir
index 6c31315..66cb0af 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir

@@ -630,13 +630,13 @@
         %slice_size_x = flow.dispatch.workload.ordinal %cl_9, 9: index
         %source = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) : memref<?x?xi32>{%source_size_y, %source_size_x}
         %dest = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) : memref<?x?xi32>{%dest_size_y, %dest_size_x}
-        %source_subview = memref.subview %source[%source_offset_y, %source_offset_x] [%slice_size_y, %slice_size_x] [1, 1] : memref<?x?xi32> to memref<?x?xi32, strided<[?, ?], offset : ?>>
-        %dest_subview = memref.subview %dest[%dest_offset_y, %dest_offset_x] [%slice_size_y, %slice_size_x] [1, 1] : memref<?x?xi32> to memref<?x?xi32, strided<[?, ?], offset : ?>>
+        %source_subview = memref.subview %source[%source_offset_y, %source_offset_x] [%slice_size_y, %slice_size_x] [1, 1] : memref<?x?xi32> to memref<?x?xi32, strided<[?, 1], offset : ?>>
+        %dest_subview = memref.subview %dest[%dest_offset_y, %dest_offset_x] [%slice_size_y, %slice_size_x] [1, 1] : memref<?x?xi32> to memref<?x?xi32, strided<[?, 1], offset : ?>>
         linalg.generic {
             indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>],
             iterator_types = ["parallel", "parallel"]}
-            ins(%source_subview : memref<?x?xi32, strided<[?, ?], offset : ?>>)
-            outs(%dest_subview : memref<?x?xi32, strided<[?, ?], offset : ?>>)
+            ins(%source_subview : memref<?x?xi32, strided<[?, 1], offset : ?>>)
+            outs(%dest_subview : memref<?x?xi32, strided<[?, 1], offset : ?>>)
             attrs = {lowering_config = #config} {
           ^bb0(%arg0: i32, %arg1: i32):
             linalg.yield %arg0 : i32

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir
index b07c29f..d6e3a32 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir

@@ -451,11 +451,11 @@
         %o1 = hal.interface.constant.load[5] : index
         %source = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) : memref<?x?xi32>{%d0, %d1}
         %dest = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) : memref<?x?xi32>{%d2, %d3}
-        %dest_view = memref.subview %dest[%o0, %o1] [%d0, %d1] [1, 1] : memref<?x?xi32> to memref<?x?xi32,  strided<[?, ?], offset : ?>>
+        %dest_view = memref.subview %dest[%o0, %o1] [%d0, %d1] [1, 1] : memref<?x?xi32> to memref<?x?xi32,  strided<[?, 1], offset : ?>>
         linalg.generic {
             indexing_maps = [affine_map<(d0, d1) -> (d0, d1)> , affine_map<(d0, d1) -> (d0, d1)>],
             iterator_types = ["parallel", "parallel"]}
-            ins(%source : memref<?x?xi32>) outs(%dest_view : memref<?x?xi32, strided<[?, ?], offset : ?>>) {
+            ins(%source : memref<?x?xi32>) outs(%dest_view : memref<?x?xi32, strided<[?, 1], offset : ?>>) {
           ^bb0(%arg0 : i32, %arg1 : i32):
             linalg.yield %arg0 : i32
           }

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_hoist_allocs.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_hoist_allocs.mlir
index c6e3281..2c43e03 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_hoist_allocs.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_hoist_allocs.mlir

@@ -33,7 +33,7 @@
   scf.for %iv = %c0 to %arg0 step %c1 {
     %0 = affine.min #map(%iv)
     %1 = memref.alloc() : memref<16x16xi32>
-    %2 = memref.subview %1[%o0, %o1][%c1, %0][1, 1] : memref<16x16xi32> to memref<?x?xi32, strided<[?, 1], offset: ?>>
+    %2 = memref.subview %1[%o0, %o1][%c1, %0][1, 1] : memref<16x16xi32> to memref<?x?xi32, strided<[16, 1], offset: ?>>
     memref.dealloc %1 : memref<16x16xi32>
     scf.yield
   }
@@ -77,7 +77,8 @@
 //       CHECK:   scf.for
 //       CHECK:     %[[SIZE:.+]] = affine.min
 //       CHECK:     %[[SUBVIEW1:.+]] = memref.subview %[[ALLOC]][0, 0] [%[[SIZE]], %[[SIZE]]] [1, 1]
-//       CHECK:     memref.subview %[[SUBVIEW1]]
+//       CHECK:     %[[CAST:.+]] = memref.cast %[[SUBVIEW1]]
+//       CHECK:     memref.subview %[[CAST]]
 //  CHECK-NEXT:   }
 //  CHECK-NEXT:   memref.dealloc %[[ALLOC]] : memref<16x16xi32>
 

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVMapMemRefStorageClass.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVMapMemRefStorageClass.cpp
index 399398e..2c7fdbc 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVMapMemRefStorageClass.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVMapMemRefStorageClass.cpp

@@ -103,14 +103,21 @@
       return signalPassFailure();
     }
 
-    auto target = spirv::getMemorySpaceToStorageClassTarget(*context);
     spirv::MemorySpaceToStorageClassConverter converter(memorySpaceMap);
+    // Perform the replacement.
+    spirv::convertMemRefTypesAndAttrs(op, converter);
 
-    RewritePatternSet patterns(context);
-    spirv::populateMemorySpaceToStorageClassPatterns(converter, patterns);
-
-    if (failed(applyFullConversion(op, *target, std::move(patterns))))
-      return signalPassFailure();
+    // Check if there are any illegal ops remaining.
+    std::unique_ptr<ConversionTarget> target =
+        spirv::getMemorySpaceToStorageClassTarget(*context);
+    op->walk([&target, this](Operation *childOp) {
+      if (target->isIllegal(childOp)) {
+        childOp->emitOpError("failed to legalize memory space");
+        signalPassFailure();
+        return WalkResult::interrupt();
+      }
+      return WalkResult::advance();
+    });
   }
 };
 

diff --git a/compiler/src/iree/compiler/Codegen/Transforms/Transforms.cpp b/compiler/src/iree/compiler/Codegen/Transforms/Transforms.cpp
index a9348aa..32d3f4e 100644
--- a/compiler/src/iree/compiler/Codegen/Transforms/Transforms.cpp
+++ b/compiler/src/iree/compiler/Codegen/Transforms/Transforms.cpp

@@ -154,8 +154,7 @@
   {
     OpBuilder::InsertionGuard g(builder);
     builder.setInsertionPointToStart(&funcOp.getFunctionBody().front());
-    auto allocationType =
-        MemRefType::get(staticShape, allocLikeType.getElementType());
+    auto allocationType = allocLikeType.clone(staticShape);
     allocation =
         builder.create<AllocLikeOpType>(loc, allocationType, alignmentAttr);
   }
@@ -163,10 +162,17 @@
   Value subviewOp = builder.create<memref::SubViewOp>(loc, allocation, offsets,
                                                       subviewSizes, strides);
 
+  // Cast it back to the original types to prevent consumer op's verification
+  // error. It could happen when the consumer op is a memref.subview op.
+  if (subviewOp.getType() != allocLikeType) {
+    subviewOp = builder.create<memref::CastOp>(loc, allocLikeType, subviewOp);
+  }
+
   if (std::is_same<AllocLikeOpType, memref::AllocOp>::value) {
     builder.setInsertionPoint(funcOp.getFunctionBody().front().getTerminator());
     builder.create<memref::DeallocOp>(loc, allocation);
   }
+
   return subviewOp;
 }
 

diff --git a/compiler/src/iree/compiler/Codegen/VMVX/test/select_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/VMVX/test/select_lowering_strategy.mlir
index 23c520a..a330e07 100644
--- a/compiler/src/iree/compiler/Codegen/VMVX/test/select_lowering_strategy.mlir
+++ b/compiler/src/iree/compiler/Codegen/VMVX/test/select_lowering_strategy.mlir

@@ -60,11 +60,11 @@
         %o1 = hal.interface.constant.load[5] : index
         %source = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) : memref<?x?xi32>{%d0, %d1}
         %dest = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) : memref<?x?xi32>{%d2, %d3}
-        %dest_view = memref.subview %dest[%o0, %o1] [%d0, %d1] [1, 1] : memref<?x?xi32> to memref<?x?xi32, strided<[?, ?], offset : ?>>
+        %dest_view = memref.subview %dest[%o0, %o1] [%d0, %d1] [1, 1] : memref<?x?xi32> to memref<?x?xi32, strided<[?, 1], offset : ?>>
         linalg.generic {
             indexing_maps = [affine_map<(d0, d1) -> (d0, d1)> , affine_map<(d0, d1) -> (d0, d1)>],
             iterator_types = ["parallel", "parallel"]}
-            ins(%source : memref<?x?xi32>) outs(%dest_view : memref<?x?xi32, strided<[?, ?], offset : ?>>) {
+            ins(%source : memref<?x?xi32>) outs(%dest_view : memref<?x?xi32, strided<[?, 1], offset : ?>>) {
           ^bb0(%arg0 : i32, %arg1 : i32):
             linalg.yield %arg0 : i32
           }

diff --git a/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/foreach-thread-to-async.mlir b/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/foreach-thread-to-async.mlir
index a576420..1fb89bc 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/foreach-thread-to-async.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/foreach-thread-to-async.mlir

@@ -35,10 +35,10 @@
       %5 = affine.min #map3(%4, %arg0)
 
       %6 = memref.subview %arg2[%3] [%5] [%c1] : memref<?xf32> to memref<?xf32, strided<[?], offset:?>>
-      %7 = memref.subview %arg1[%3] [%5] [1] : memref<?xf32> to memref<?xf32, strided<[?], offset:?>>
+      %7 = memref.subview %arg1[%3] [%5] [1] : memref<?xf32> to memref<?xf32, strided<[1], offset:?>>
 
       linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel"]}
-        ins(%7 : memref<?xf32, strided<[?], offset:?>>) outs(%6 : memref<?xf32, strided<[?], offset:?>>) {
+        ins(%7 : memref<?xf32, strided<[1], offset:?>>) outs(%6 : memref<?xf32, strided<[?], offset:?>>) {
       ^bb0(%arg4: f32, %arg5: f32):  // no predecessors
         %9 = arith.mulf %arg4, %cst : f32
         linalg.yield %9 : f32

diff --git a/third_party/llvm-project b/third_party/llvm-project
index 82f5f49..0c419b2 160000
--- a/third_party/llvm-project
+++ b/third_party/llvm-project

@@ -1 +1 @@
-Subproject commit 82f5f494ece899f867088f77c70a99627a48ff3f
+Subproject commit 0c419b222b2bbfe6c180b8c6efaccf52a599c47c
commit	af387d39d2dd553d03943c6a698cc15b6a8fc483	[log] [tgz]
author	Han-Chung Wang <hanhan0912@gmail.com>	Mon Feb 05 09:55:03 2024 -0800
committer	GitHub <noreply@github.com>	Mon Feb 05 09:55:03 2024 -0800
tree	b0cbe6dd1508c8c1214eb98849077b69d435c3ac
parent	dd5f707446cb0033bf7c96daa5997ac4e8aaaf7c [diff]