Integrate llvm 1_20_2025 (#19740)

## Reverts

RISC-V backend

-
https://github.com/llvm/llvm-project/commit/169c32eb49fa9b559d388b9b8f4374ff9e1be9be

Python related changes 

-
https://github.com/llvm/llvm-project/commit/08e2c15a287df132ca2186f2d56669219a7ed8a1

NVPTX changes

-
https://github.com/llvm/llvm-project/commit/29b5c18e466cea867e9e785e650d9ec2e5c295b9
-
https://github.com/llvm/llvm-project/commit/e7a83fc74db78445c36a27f113e9b045f90f699a

MLIR API changes (this change is breaking something in HLO)
-
https://github.com/llvm/llvm-project/commit/7a77f14c0abfbecbfb800ea8d974e66d81ee516a.

TOSA (tosa.tile operation is changed and torch-mlir needs to catch up)
-
https://github.com/llvm/llvm-project/commit/f09db6a3af971ab7d9bbc7ba574a8dc0c10b2940


## Updates to Torch-MLIR
floating type changes to be upstreamed 

https://github.com/iree-org/torch-mlir/tree/fix_forward_iree_llvm_integrate_2025

## style change
- The .get<Ty> and .is<Ty> functions are deprecated and hence replaces
with cast and isa function in this PR.

---------

Signed-off-by: Nirvedh Meshram <nirvedh@gmail.com>
diff --git a/compiler/src/iree/compiler/Codegen/Common/ConcretizePadResultShape.cpp b/compiler/src/iree/compiler/Codegen/Common/ConcretizePadResultShape.cpp
index d7e8581..0bc1b84 100644
--- a/compiler/src/iree/compiler/Codegen/Common/ConcretizePadResultShape.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/ConcretizePadResultShape.cpp
@@ -37,7 +37,7 @@
       return val;
     matchPattern(val, m_Constant(&attr));
   } else {
-    attr = llvm::cast<IntegerAttr>(attrOrValue.get<Attribute>());
+    attr = llvm::cast<IntegerAttr>(cast<Attribute>(attrOrValue));
   }
   return builder.createOrFold<arith::ConstantIndexOp>(
       loc, attr.getValue().getSExtValue());
diff --git a/compiler/src/iree/compiler/Codegen/Common/DecomposePackUnPackOps.cpp b/compiler/src/iree/compiler/Codegen/Common/DecomposePackUnPackOps.cpp
index d408c05..7cdd705 100644
--- a/compiler/src/iree/compiler/Codegen/Common/DecomposePackUnPackOps.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/DecomposePackUnPackOps.cpp
@@ -145,9 +145,7 @@
               auto packOp = cast<tensor::PackOp>(op);
 
               // Do nothing if any of inner tile sizes is dynamic.
-              if (llvm::any_of(packOp.getMixedTiles(), [](OpFoldResult tile) {
-                    return tile.is<Value>();
-                  })) {
+              if (llvm::any_of(packOp.getMixedTiles(), llvm::IsaPred<Value>)) {
                 return {};
               }
 
diff --git a/compiler/src/iree/compiler/Codegen/Common/TensorToVectorVectorizePad.cpp b/compiler/src/iree/compiler/Codegen/Common/TensorToVectorVectorizePad.cpp
index 3d68491..ed748f7 100644
--- a/compiler/src/iree/compiler/Codegen/Common/TensorToVectorVectorizePad.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/TensorToVectorVectorizePad.cpp
@@ -38,7 +38,7 @@
       return val;
     matchPattern(val, m_Constant(&attr));
   } else {
-    attr = llvm::cast<IntegerAttr>(attrOrValue.get<Attribute>());
+    attr = cast<IntegerAttr>(cast<Attribute>(attrOrValue));
   }
   return builder.createOrFold<arith::ConstantIndexOp>(loc, attr.getInt());
 }
@@ -101,12 +101,12 @@
 
     /// Return true if the given `attrOrValue` is a constant zero.
     auto isConstantZero = [](OpFoldResult attrOrValue) {
-      if (attrOrValue.is<Attribute>()) {
-        auto attr = llvm::dyn_cast<IntegerAttr>(attrOrValue.get<Attribute>());
-        return attr && attr.getValue().getZExtValue() == 0;
+      if (auto attr = dyn_cast<Attribute>(attrOrValue)) {
+        auto intAttr = dyn_cast<IntegerAttr>(attr);
+        return intAttr && intAttr.getValue().getZExtValue() == 0;
       }
       IntegerAttr attr;
-      return matchPattern(attrOrValue.get<Value>(), m_Constant(&attr)) &&
+      return matchPattern(cast<Value>(attrOrValue), m_Constant(&attr)) &&
              attr.getValue().getZExtValue() == 0;
     };
 
diff --git a/compiler/src/iree/compiler/Codegen/Common/VectorLayoutAnalysis.cpp b/compiler/src/iree/compiler/Codegen/Common/VectorLayoutAnalysis.cpp
index 99bcc96..a15d397 100644
--- a/compiler/src/iree/compiler/Codegen/Common/VectorLayoutAnalysis.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/VectorLayoutAnalysis.cpp
@@ -305,7 +305,7 @@
 void DistributionLayout::onUpdate(DataFlowSolver *solver) const {
   AnalysisState::onUpdate(solver);
 
-  Value value = anchor.get<Value>();
+  Value value = cast<Value>(anchor);
 
   if (propagation) {
     // Make propagation run again on all users of this value.
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/decompose_softmax.mlir b/compiler/src/iree/compiler/Codegen/Common/test/decompose_softmax.mlir
index 777ca50..43da04b 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/decompose_softmax.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/decompose_softmax.mlir
@@ -13,7 +13,7 @@
 // CHECK:      func.func @softmax(%[[ARG0:[a-zA-Z0-9_]+]]: tensor<2x16x32xf32>) -> tensor<2x16x32xf32> {
 // CHECK:        %[[D0:.+]] = tensor.empty() : tensor<2x16x32xf32>
 // CHECK:        %[[D1:.+]] = tensor.empty() : tensor<2x16xf32>
-// CHECK:        %[[CST:.+]] = arith.constant -3.40282347E+38 : f32
+// CHECK:        %[[CST:.+]] = arith.constant 0xFFC00000 : f32
 // CHECK:        %[[D2:.+]] = linalg.fill ins(%[[CST]] : f32) outs(%[[D1]] : tensor<2x16xf32>) -> tensor<2x16xf32>
 // CHECK:        %[[D3:.+]] = linalg.generic {indexing_maps = [#[[MAP]], #[[MAP1]]], iterator_types = ["parallel",
 // CHECK-SAME:     "parallel", "reduction"]} ins(%[[ARG0]] : tensor<2x16x32xf32>) outs(%[[D2]] : tensor<2x16xf32>) {
@@ -49,7 +49,7 @@
 // CHECK-NO-FUSE:      func.func @softmax(%[[ARG0:[a-zA-Z0-9_]+]]: tensor<2x16x32xf32>) -> tensor<2x16x32xf32> {
 // CHECK-NO-FUSE:        %[[D0:.+]] = tensor.empty() : tensor<2x16x32xf32>
 // CHECK-NO-FUSE:        %[[D1:.+]] = tensor.empty() : tensor<2x16xf32>
-// CHECK-NO-FUSE:        %[[CST:.+]] = arith.constant -3.40282347E+38 : f32
+// CHECK-NO-FUSE:        %[[CST:.+]] = arith.constant 0xFFC00000 : f32
 // CHECK-NO-FUSE:        %[[D2:.+]] = linalg.fill ins(%[[CST]] : f32) outs(%[[D1]] : tensor<2x16xf32>) -> tensor<2x16xf32>
 // CHECK-NO-FUSE:        %[[D3:.+]] = linalg.generic {indexing_maps = [#[[MAP]], #[[MAP1]]], iterator_types = ["parallel",
 // CHECK-NO-FUSE-SAME:     "parallel", "reduction"]} ins(%[[ARG0]] : tensor<2x16x32xf32>) outs(%[[D2]] : tensor<2x16xf32>) {
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
index 6045033..58f7fb3 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
@@ -843,12 +843,12 @@
             currentStrideVal = builder.create<LLVM::ConstantOp>(
                 loc, llvmIndexType, currentStrideInt.value());
           } else {
-            currentStrideVal = currentStride.get<Value>();
+            currentStrideVal = cast<Value>(currentStride);
           }
           currentStride =
               builder.create<LLVM::MulOp>(loc, currentStrideVal, dim)
                   .getResult();
-          desc.setStride(builder, loc, i - 1, currentStride.get<Value>());
+          desc.setStride(builder, loc, i - 1, cast<Value>(currentStride));
         } else {
           currentStride = builder.getIndexAttr(strides[i - 1]);
           desc.setConstantStride(builder, loc, i - 1, strides[i - 1]);
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
index ecedd1f..0c52f1a 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
@@ -1788,8 +1788,8 @@
   // backends prefer to not decompose the ops.
   DictionaryAttr pipelineConfig;
   auto target = IREE::HAL::ExecutableTargetAttr::lookup(entryPointFn);
-  bool hasDynamicInnerTile = llvm::any_of(
-      op.getMixedTiles(), [](OpFoldResult ofr) { return ofr.is<Value>(); });
+  bool hasDynamicInnerTile =
+      llvm::any_of(op.getMixedTiles(), llvm::IsaPred<Value>);
   if (!hasDynamicInnerTile && !isX86(target) && !isRISCV(target)) {
     pipelineConfig = getPipelineConfWithDecompositionAttr(op.getContext());
   }
@@ -1828,8 +1828,8 @@
   // backends prefer to not decompose the ops.
   DictionaryAttr pipelineConfig;
   auto target = IREE::HAL::ExecutableTargetAttr::lookup(entryPointFn);
-  bool hasDynamicInnerTile = llvm::any_of(
-      op.getMixedTiles(), [](OpFoldResult ofr) { return ofr.is<Value>(); });
+  bool hasDynamicInnerTile =
+      llvm::any_of(op.getMixedTiles(), llvm::IsaPred<Value>);
   if (!hasDynamicInnerTile && !isX86(target) && !isRISCV(target)) {
     pipelineConfig = getPipelineConfWithDecompositionAttr(op.getContext());
   }
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/VectorContractCustomKernels.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/VectorContractCustomKernels.cpp
index 4cbd304..993d247 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/VectorContractCustomKernels.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/VectorContractCustomKernels.cpp
@@ -684,7 +684,7 @@
   case MMTKernel::ScalarType::I32:
     return IntegerType::get(context, 32, IntegerType::Signless);
   case MMTKernel::ScalarType::F32:
-    return FloatType::getF32(context);
+    return Float32Type::get(context);
   }
   assert(false);
   return Type();
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp
index 2fef00a..df489c2 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp
@@ -433,12 +433,12 @@
               currentStrideVal = rewriter.create<LLVM::ConstantOp>(
                   loc, llvmIndexType, currentStrideInt.value());
             } else {
-              currentStrideVal = currentStride.get<Value>();
+              currentStrideVal = cast<Value>(currentStride);
             }
             currentStride =
                 rewriter.create<LLVM::MulOp>(loc, currentStrideVal, dim)
                     .getResult();
-            desc.setStride(rewriter, loc, i - 1, currentStride.get<Value>());
+            desc.setStride(rewriter, loc, i - 1, cast<Value>(currentStride));
           } else {
             currentStride = rewriter.getIndexAttr(strides[i - 1]);
             desc.setConstantStride(rewriter, loc, i - 1, strides[i - 1]);
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorPad.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorPad.cpp
index 1a4ffe4..bc9465c 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorPad.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorPad.cpp
@@ -38,7 +38,7 @@
       FailureOr<int64_t> upperBound =
           ValueBoundsConstraintSet::computeConstantBound(
               presburger::BoundType::UB,
-              {size.get<Value>(), /*dim=*/std::nullopt},
+              {cast<Value>(size), /*dim=*/std::nullopt},
               /*stopCondition=*/nullptr, /*closedUB=*/true);
       if (failed(upperBound))
         return failure();
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_cuda.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_cuda.mlir
index cee1914..3376d29 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_cuda.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_cuda.mlir
@@ -195,7 +195,6 @@
 //         CHECK:    arith.maxnumf
 //         CHECK:    gpu.shuffle  xor
 //         CHECK:    arith.maxnumf
-//         CHECK:    arith.maxnumf
 //         CHECK:    vector.broadcast %{{.*}} : f32 to vector<4xf32>
 //         CHECK:    scf.for {{.*}} -> (vector<4xf32>) {
 //         CHECK:      vector.transfer_read
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorizeLoadStore.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorizeLoadStore.cpp
index 5aa9ccb..611cf08 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorizeLoadStore.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorizeLoadStore.cpp
@@ -1000,11 +1000,11 @@
     for (auto [idx, size] :
          llvm::zip_equal(extractOp.getMixedPosition(), maskOp.getOperands())) {
       Value idxVal;
-      if (idx.is<Attribute>()) {
+      if (auto attr = dyn_cast<Attribute>(idx)) {
         idxVal = rewriter.create<arith::ConstantIndexOp>(
-            loc, cast<IntegerAttr>(idx.get<Attribute>()).getInt());
+            loc, dyn_cast<IntegerAttr>(attr).getInt());
       } else {
-        idxVal = idx.get<Value>();
+        idxVal = dyn_cast<Value>(idx);
       }
       Value cmpIdx = rewriter.create<arith::CmpIOp>(
           loc, arith::CmpIPredicate::slt, idxVal, size);
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_reduction.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_reduction.mlir
index 55f2e2f..236d25f 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_reduction.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_reduction.mlir
@@ -229,7 +229,6 @@
 //         CHECK:    arith.maxnumf
 //         CHECK:    gpu.shuffle  xor
 //         CHECK:    arith.maxnumf
-//         CHECK:    arith.maxnumf
 //         CHECK:    vector.splat %{{.*}} : vector<4xf32>
 //         CHECK:    scf.for {{.*}} -> (vector<4xf32>) {
 //         CHECK:      vector.transfer_read
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matvec.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matvec.mlir
index 29c0bc0..ae334b8 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matvec.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matvec.mlir
@@ -76,18 +76,18 @@
 
 //     CHECK-DAG: %[[CSTVEC4XI32_255:.+]] = spirv.Constant dense<255> : vector<4xi32>
 //     CHECK-DAG: %[[CSTVEC4XI32_0:.+]] = spirv.Constant dense<0> : vector<4xi32>
-//     CHECK-DAG: %[[CSTVEC4XI32_0_4:.+]] = spirv.Constant dense<[0, 4, 0, 4]> : vector<4xi32>
-//     CHECK-DAG: %[[CSTVEC4XI32_15__16:.+]] = spirv.Constant dense<[15, -16, 15, -16]> : vector<4xi32>
+//     CHECK-DAG: %[[CSTVEC2XI32_4:.+]] = spirv.Constant dense<4> : vector<2xi32>
+//     CHECK-DAG: %[[CSTVEC2XI32_15:.+]] = spirv.Constant dense<15> : vector<2xi32>
 
 //         CHECK: spirv.mlir.loop
 
 // Load the quantized weight and get 8xi4 out of it.
 //         CHECK:   %[[LOAD:.+]] = spirv.Load "StorageBuffer" %{{.+}} : vector<4xi32>
 //         CHECK:   %[[SHUF01:.+]] = spirv.VectorShuffle [0 : i32, 1 : i32] %[[LOAD]], %[[LOAD]] : vector<4xi32>, vector<4xi32> -> vector<2xi32>
-//         CHECK:   %[[SHUF0011:.+]] = spirv.VectorShuffle [0 : i32, 0 : i32, 1 : i32, 1 : i32] %[[SHUF01]], %[[SHUF01]] : vector<2xi32>, vector<2xi32> -> vector<4xi32>
-//         CHECK:   %[[MASKED:.+]] = spirv.BitwiseAnd %[[SHUF0011]], %[[CSTVEC4XI32_15__16]] : vector<4xi32>
-//         CHECK:   %[[SHIFTED:.+]] = spirv.ShiftRightLogical %[[MASKED]], %[[CSTVEC4XI32_0_4]] : vector<4xi32>, vector<4xi32>
-//         CHECK:   %[[LOW4HIGH4_ZEROUPPER:.+]] = spirv.BitwiseAnd %[[SHIFTED]], %[[CSTVEC4XI32_255]] : vector<4xi32>
+//         CHECK:   %[[MASKED:.+]] = spirv.BitwiseAnd %[[SHUF01]], %[[CSTVEC2XI32_15]] : vector<2xi32>
+//         CHECK:   %[[SHIFTED:.+]] = spirv.ShiftRightLogical %[[SHUF01]], %[[CSTVEC2XI32_4]] : vector<2xi32>, vector<2xi32>
+//         CHECK:   %[[SHUF0011:.+]] = spirv.VectorShuffle [0 : i32, 2 : i32, 1 : i32, 3 : i32] %[[MASKED]], %[[SHIFTED]] : vector<2xi32>, vector<2xi32> -> vector<4xi32>
+//         CHECK:   %[[LOW4HIGH4_ZEROUPPER:.+]] = spirv.BitwiseAnd %[[SHUF0011]], %[[CSTVEC4XI32_255]] : vector<4xi32>
 
 //         CHECK:   %[[SHUF23:.+]] = spirv.VectorShuffle [2 : i32, 3 : i32] %[[LOAD:.+]], %[[LOAD:.+]] : vector<4xi32>, vector<4xi32> -> vector<2xi32>
 
@@ -186,8 +186,6 @@
 //     CHECK-DAG: %[[C0:.+]] = spirv.Constant 0 : i32
 //     CHECK-DAG: %[[CSTVEC4XF16_1:.+]] = spirv.Constant dense<1.000000e+00> : vector<4xf16>
 //     CHECK-DAG: %[[CSTVEC4XI32_255:.+]] = spirv.Constant dense<255> : vector<4xi32>
-//     CHECK-DAG: %[[CSTVEC2XI32_1:.+]] = spirv.Constant dense<[0, 4, 0, 4]> : vector<4xi32>
-//     CHECK-DAG: %[[CSTVEC2XI32_2:.+]] = spirv.Constant dense<[15, -16, 15, -16]> : vector<4xi32>
 
 //         CHECK: %[[WIDX:.+]] = spirv.CompositeExtract %{{.*}}[0 : i32] : vector<3xi32>
 //         CHECK: %[[PCPTR:.+]] = spirv.AccessChain %{{.*}}[{{.*}}, %[[C0]]] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32
@@ -209,9 +207,6 @@
 //         CHECK:   %[[ACCESS:.+]] = spirv.AccessChain %[[RADDR]][{{.*}}, %[[OFFSET]]] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<i32, stride=4> [0])>, StorageBuffer>, i32, i32
 //         CHECK:   spirv.Load "StorageBuffer" %[[ACCESS]] : i32
 
-//         CHECK:   spirv.ShiftRightLogical %{{.*}}, %[[CSTVEC2XI32_1]] : vector<4xi32>, vector<4xi32>
-//         CHECK:   spirv.BitwiseAnd %{{.*}}, %[[CSTVEC4XI32_255]] : vector<4xi32>
-
 //         CHECK:   spirv.ConvertUToF %{{.+}} : vector<4xi32> to vector<4xf16>
 //         CHECK:   spirv.FSub %{{.+}}, %{{.+}} : vector<4xf16>
 // CHECK-COUNT-2:   spirv.FMul %{{.+}}, %{{.+}} : vector<4xf16>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_sub_byte_dequant.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_sub_byte_dequant.mlir
index 40c26bd..452720e 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_sub_byte_dequant.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_sub_byte_dequant.mlir
@@ -45,12 +45,16 @@
 //   CHECK-LABEL: spirv.func @i4_dequant()
 
 //         CHECK: %[[BYTE1:.+]] = spirv.VectorShuffle [0 : i32, 1 : i32] {{.*}} : vector<4xi32>, vector<4xi32> -> vector<2xi32>
-//         CHECK: %[[COPIED:.+]] = spirv.VectorShuffle [0 : i32, 0 : i32, 1 : i32, 1 : i32] %[[BYTE1]], %[[BYTE1]] : vector<2xi32>, vector<2xi32> -> vector<4xi32>
-//         CHECK: %[[MASKED:.+]] = spirv.BitwiseAnd %[[COPIED]]
-//         CHECK: %[[SHIFTED:.+]] = spirv.ShiftRightLogical %[[MASKED]]
-//         CHECK: %[[ZEROUPPER:.+]] = spirv.BitwiseAnd %[[SHIFTED]]
+//         CHECK: %[[MASKED:.+]] = spirv.BitwiseAnd %[[BYTE1]]
+//         CHECK: %[[SHIFTED:.+]] = spirv.ShiftRightLogical %[[BYTE1]]
+//         CHECK: %[[COPIED:.+]] = spirv.VectorShuffle [0 : i32, 2 : i32, 1 : i32, 3 : i32] %[[MASKED]], %[[SHIFTED]] : vector<2xi32>, vector<2xi32> -> vector<4xi32>
+//         CHECK: %[[MASKED2:.+]] = spirv.BitwiseAnd %[[COPIED]]
 //         CHECK: spirv.VectorShuffle [2 : i32, 3 : i32] {{.*}} : vector<4xi32>, vector<4xi32> -> vector<2xi32>
-// CHECK-COUNT-3: spirv.VectorShuffle [0 : i32, 0 : i32, 1 : i32, 1 : i32]
+//         CHECK: spirv.VectorShuffle [0 : i32, 1 : i32]
+//         CHECK: spirv.VectorShuffle [0 : i32, 2 : i32, 1 : i32, 3 : i32]
+//         CHECK: spirv.VectorShuffle [2 : i32, 3 : i32]
+//         CHECK: spirv.VectorShuffle [0 : i32, 2 : i32, 1 : i32, 3 : i32]
+//     CHECK-NOT: spirv.VectorShuffle
 
 // CHECK-COUNT-4: spirv.ConvertUToF {{.+}} : vector<4xi32> to vector<4xf32>
 // CHECK-COUNT-4: spirv.FSub
diff --git a/compiler/src/iree/compiler/Codegen/Transforms/Transforms.cpp b/compiler/src/iree/compiler/Codegen/Transforms/Transforms.cpp
index f63891c..d583c1f 100644
--- a/compiler/src/iree/compiler/Codegen/Transforms/Transforms.cpp
+++ b/compiler/src/iree/compiler/Codegen/Transforms/Transforms.cpp
@@ -67,10 +67,10 @@
     SmallVector<OpFoldResult> clonedOfrs;
     clonedOfrs.reserve(ofrs.size());
     for (auto ofr : ofrs) {
-      if (ofr.is<Attribute>()) {
+      if (isa<Attribute>(ofr)) {
         clonedOfrs.push_back(ofr);
       } else {
-        clonedOfrs.push_back(bvm.lookupOrDefault(ofr.get<Value>()));
+        clonedOfrs.push_back(bvm.lookupOrDefault(cast<Value>(ofr)));
       }
     }
     return clonedOfrs;
diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.td b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.td
index 2666072..a10982b 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.td
+++ b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.td
@@ -1013,7 +1013,7 @@
 
     /// Set the callee for this operation.
     void setCalleeFromCallable(CallInterfaceCallable callee) {
-      (*this)->setAttr("callee", callee.get<SymbolRefAttr>());
+      (*this)->setAttr("callee", cast<SymbolRefAttr>(callee));
     }
 
     ValueRange getOperandDynamicDims(unsigned idx) {
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/RegionOpUtils.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/RegionOpUtils.cpp
index 4aa9142..86158fb 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/RegionOpUtils.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/RegionOpUtils.cpp
@@ -293,7 +293,7 @@
       return failure();
     for (int64_t i = 0; i < shapedType.getRank(); ++i)
       if (shapedType.isDynamicDim(i))
-        dynamicDims.push_back(dims[opResult.getResultNumber()][i].get<Value>());
+        dynamicDims.push_back(cast<Value>(dims[opResult.getResultNumber()][i]));
     return success();
   }
 
diff --git a/compiler/src/iree/compiler/Dialect/LinalgExt/Transforms/test/tiling.mlir b/compiler/src/iree/compiler/Dialect/LinalgExt/Transforms/test/tiling.mlir
index 6d2f057..403f5e2 100644
--- a/compiler/src/iree/compiler/Dialect/LinalgExt/Transforms/test/tiling.mlir
+++ b/compiler/src/iree/compiler/Dialect/LinalgExt/Transforms/test/tiling.mlir
@@ -2578,13 +2578,13 @@
     transform.yield
   }
 }
-//      CHECK: #[[MAP:.+]] = affine_map<(d0, d1) -> (d0 + d1 * 4)>
+//      CHECK: #[[MAP:.+]] = affine_map<(d0)[s0] -> (d0 * 4 + s0)>
 //      CHECK: func @custom_op_index_handling(%[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xindex>,
 //      CHECK:   scf.forall (%[[IV:[a-zA-Z0-9]+]],
 //      CHECK:     %[[SLICE:.+]] = tensor.extract_slice %[[ARG0]]
 //      CHECK:     iree_linalg_ext.custom_op
 // CHECK-SAME:         ins(%[[SLICE]]
 //      CHECK:       %[[NEW_INDEX:.+]] = iree_linalg_ext.index 0 : index
-//      CHECK:       %[[INDEX:.+]] = affine.apply #[[MAP]](%[[NEW_INDEX]], %[[IV]])
+//      CHECK:       %[[INDEX:.+]] = affine.apply #[[MAP]](%[[IV]])[%[[NEW_INDEX]]]
 //      CHECK:       linalg.generic
 // CHECK-SAME:           ins(%{{.+}}, %[[INDEX]] :
diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.td b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.td
index 768cbde..5de4fee 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.td
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.td
@@ -2642,7 +2642,7 @@
 
     /// Set the callee for this operation.
     void setCalleeFromCallable(CallInterfaceCallable callee) {
-      (*this)->setAttr("callee", callee.get<SymbolRefAttr>());
+      (*this)->setAttr("callee", cast<SymbolRefAttr>(callee));
     }
 
     Value getOperandSize(unsigned idx) {
@@ -3322,7 +3322,7 @@
 
     /// Set the callee for this operation.
     void setCalleeFromCallable(CallInterfaceCallable callee) {
-      (*this)->setAttr("callee", callee.get<SymbolRefAttr>());
+      (*this)->setAttr("callee", cast<SymbolRefAttr>(callee));
     }
 
     Value getOperandSize(unsigned idx) {
diff --git a/compiler/src/iree/compiler/Dialect/Util/Analysis/Explorer.cpp b/compiler/src/iree/compiler/Dialect/Util/Analysis/Explorer.cpp
index 32c7819..aa98e64 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Analysis/Explorer.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Analysis/Explorer.cpp
@@ -164,7 +164,7 @@
 void Explorer::initializeInverseCallGraph() {
   forEachFunctionLikeOp([&](FunctionOpInterface parentOp) {
     parentOp->walk([&](CallOpInterface callOp) {
-      if (callOp.getCallableForCallee().is<Value>()) {
+      if (isa<Value>(callOp.getCallableForCallee())) {
         // Indirect calls can't be tracked in the call graph, so ensure we mark
         // the incomplete flag so that any call graph queries return
         // TraversalResult::INCOMPLETE.
@@ -777,7 +777,7 @@
     // Indirect calls would require us to perform an analysis to first see if we
     // can make them direct or annotate the call sites with the possible
     // targets.
-    if (callOp.getCallableForCallee().is<Value>()) {
+    if (isa<Value>(callOp.getCallableForCallee())) {
       LLVM_DEBUG({
         llvm::dbgs()
             << "  !! traversal incomplete due to unanalyzable indirect call: ";
@@ -786,7 +786,7 @@
       });
       return TraversalResult::INCOMPLETE;
     }
-    auto targetSymbol = callOp.getCallableForCallee().get<SymbolRefAttr>();
+    auto targetSymbol = cast<SymbolRefAttr>(callOp.getCallableForCallee());
     auto targetOp = symbolTables.lookupNearestSymbolFrom<CallableOpInterface>(
         callOp, targetSymbol);
     assert(targetOp && "call target not found");
@@ -1031,7 +1031,7 @@
   // Move across a call to the callee entry block.
   auto traverseCallOp = [&](CallOpInterface callOp, unsigned operandIdx) {
     auto callable = callOp.getCallableForCallee();
-    if (callable.is<Value>()) {
+    if (isa<Value>(callable)) {
       LLVM_DEBUG({
         llvm::dbgs()
             << "  !! traversal incomplete due to unanalyzable indirect call: ";
@@ -1040,7 +1040,7 @@
       });
       return TraversalResult::INCOMPLETE;
     }
-    auto targetSymbol = callable.get<SymbolRefAttr>();
+    auto targetSymbol = cast<SymbolRefAttr>(callable);
     auto targetOp = symbolTables.lookupNearestSymbolFrom<CallableOpInterface>(
         callOp, targetSymbol);
     assert(targetOp && "call target not found");
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.td b/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.td
index 97eae87..0b09d2d 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.td
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.td
@@ -820,7 +820,7 @@
       return (*this)->getAttrOfType<SymbolRefAttr>("callee");
     }
     void setCalleeFromCallable(CallInterfaceCallable callee) {
-      (*this)->setAttr("callee", callee.get<SymbolRefAttr>());
+      (*this)->setAttr("callee", cast<SymbolRefAttr>(callee));
     }
 
     // Clones the call and potentially expands each operand and result.
diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/TypeConverter.cpp b/compiler/src/iree/compiler/Dialect/VM/Conversion/TypeConverter.cpp
index 97f3263..ed634da 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/TypeConverter.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/TypeConverter.cpp
@@ -66,7 +66,7 @@
     if (floatType.getIntOrFloatBitWidth() < 32) {
       if (targetOptions_.f32Extension) {
         // Promote f16 -> f32.
-        return FloatType::getF32(floatType.getContext());
+        return Float32Type::get(floatType.getContext());
       } else {
         // f32 is not supported; can't compile.
         return std::nullopt;
@@ -86,7 +86,7 @@
                  targetOptions_.truncateUnsupportedFloats) {
         // f64 is not supported and we still want to compile, so truncate to
         // f32 (unsafe if all bits are actually required!).
-        return FloatType::getF32(floatType.getContext());
+        return Float32Type::get(floatType.getContext());
       }
     }
     return std::nullopt;
diff --git a/compiler/src/iree/compiler/Dialect/VM/IR/VMOpFolders.cpp b/compiler/src/iree/compiler/Dialect/VM/IR/VMOpFolders.cpp
index f227292..d9a5522 100644
--- a/compiler/src/iree/compiler/Dialect/VM/IR/VMOpFolders.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/IR/VMOpFolders.cpp
@@ -1531,7 +1531,7 @@
 
 OpFoldResult TruncF64F32Op::fold(FoldAdaptor operands) {
   return constFoldConversionOp<FloatAttr>(
-      FloatType::getF32(getContext()), operands.getOperand(),
+      Float32Type::get(getContext()), operands.getOperand(),
       [&](const APFloat &a) { return APFloat(a.convertToFloat()); });
 }
 
@@ -1597,7 +1597,7 @@
 
 OpFoldResult ExtF32F64Op::fold(FoldAdaptor operands) {
   return constFoldConversionOp<FloatAttr>(
-      FloatType::getF64(getContext()), operands.getOperand(),
+      Float64Type::get(getContext()), operands.getOperand(),
       [&](const APFloat &a) { return APFloat(a.convertToDouble()); });
 }
 
diff --git a/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.cpp b/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.cpp
index 203a15b..edacfc5 100644
--- a/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.cpp
@@ -611,11 +611,11 @@
 static FloatType getFloatType(int bitwidth, MLIRContext *context) {
   switch (bitwidth) {
   case 16:
-    return FloatType::getF16(context);
+    return Float16Type::get(context);
   case 32:
-    return FloatType::getF32(context);
+    return Float32Type::get(context);
   case 64:
-    return FloatType::getF64(context);
+    return Float64Type::get(context);
   default:
     assert(false && "unhandled floating point type");
     return {};
diff --git a/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.td b/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.td
index f7e5944..a620f55 100644
--- a/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.td
+++ b/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.td
@@ -4223,7 +4223,7 @@
 
     /// Set the callee for this operation.
     void setCalleeFromCallable(CallInterfaceCallable callee) {
-      (*this)->setAttr("callee", callee.get<SymbolRefAttr>());
+      (*this)->setAttr("callee", cast<SymbolRefAttr>(callee));
     }
 
     /// Get the argument operands to the called function as a mutable range, this is
diff --git a/third_party/llvm-project b/third_party/llvm-project
index 072c320..0bdbc1c 160000
--- a/third_party/llvm-project
+++ b/third_party/llvm-project
@@ -1 +1 @@
-Subproject commit 072c320ee174178efcbe420177b8c6f72858f4ef
+Subproject commit 0bdbc1cb2d2b59e9bfe2bfa080c1bade727ef0f6
diff --git a/third_party/torch-mlir b/third_party/torch-mlir
index a6ae057..ba00913 160000
--- a/third_party/torch-mlir
+++ b/third_party/torch-mlir
@@ -1 +1 @@
-Subproject commit a6ae057c1e0b21b14fd5387b50066a39e2ed0a31
+Subproject commit ba00913d21df03a2c269ef74e682441a2569aca2