Integrate at llvm/llvm-project@bf15f1e4 and bump dependencies (#11341)

* LLVM: bf15f1e489aa2f1ac13268c9081a992a8963eb5b
* MHLO: 312c01cb1221431c18103f31c68f2439928f7abe
* TF: 2116b012412b274f1471496ed15852175d90628b

Extra changes:
* move from I64ArrayAttr to DenseI64ArrayAttr as some interface have
changed
* Use the new clone helper instead of the linalg member that was removed
* Fix few LLVM interface changes

Co-authored-by: Mahesh Ravishankar <ravishankarm@google.com>
diff --git a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingPass.cpp b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingPass.cpp
index 6780903..db69401 100644
--- a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingPass.cpp
@@ -53,8 +53,8 @@
     return failure();
   }
 
-  SmallVector<OpFoldResult, 4> targetShape = getMixedValues(
-      builder.getIndexArrayAttr(dispatchTensorType.getShape()), dynamicDims);
+  SmallVector<OpFoldResult, 4> targetShape =
+      getMixedValues(dispatchTensorType.getShape(), dynamicDims, builder);
   SmallVector<OpFoldResult> innerTileSizes = llvm::to_vector(llvm::map_range(
       encodingInfo->innerTileSizes,
       [&](int64_t v) -> OpFoldResult { return builder.getIndexAttr(v); }));
diff --git a/compiler/src/iree/compiler/Codegen/Common/TileAndDistributeToWorkgroupsPass.cpp b/compiler/src/iree/compiler/Codegen/Common/TileAndDistributeToWorkgroupsPass.cpp
index 6e8742f..1ada128 100644
--- a/compiler/src/iree/compiler/Codegen/Common/TileAndDistributeToWorkgroupsPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/TileAndDistributeToWorkgroupsPass.cpp
@@ -133,12 +133,8 @@
     encodingInfo.innerTileSizes.push_back(
         tileSize.get<Attribute>().cast<IntegerAttr>().getInt());
   }
-  encodingInfo.innerDimsPos = llvm::to_vector(llvm::map_range(
-      packOp.getInnerDimsPos(),
-      [](Attribute attr) { return attr.cast<IntegerAttr>().getInt(); }));
-  encodingInfo.outerDimsPerm = llvm::to_vector(llvm::map_range(
-      packOp.getOuterDimsPerm(),
-      [](Attribute attr) { return attr.cast<IntegerAttr>().getInt(); }));
+  encodingInfo.innerDimsPos = llvm::to_vector(packOp.getInnerDimsPos());
+  encodingInfo.outerDimsPerm = llvm::to_vector(packOp.getOuterDimsPerm());
   return encodingInfo;
 }
 
diff --git a/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensions.cpp b/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensions.cpp
index 5074053..d2e3d2e 100644
--- a/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensions.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensions.cpp
@@ -482,15 +482,14 @@
   // bugs ensue.
   MLIRContext *ctx = builder.getContext();
   auto operationType = pdl::OperationType::get(ctx);
-  auto staticTileSizesAttr = builder.getI64ArrayAttr(staticTileSizes);
 
   build(builder, result,
         /*resultTypes=*/TypeRange{operationType, operationType},
         /*target=*/target,
         /*numThreads=*/ValueRange{},
         /*tileSizes=*/dynamicTileSizes,
-        /*staticNumThreads=*/ArrayAttr(),
-        /*staticTileSizes=*/staticTileSizesAttr,
+        /*staticNumThreads=*/ArrayRef<int64_t>(),
+        /*staticTileSizes=*/staticTileSizes,
         /*mapping=*/mappingAttr);
 }
 
@@ -520,14 +519,13 @@
   // bugs ensue.
   MLIRContext *ctx = builder.getContext();
   auto operationType = pdl::OperationType::get(ctx);
-  auto staticNumThreadsAttr = builder.getI64ArrayAttr(staticNumThreads);
   build(builder, result,
         /*resultTypes=*/TypeRange{operationType, operationType},
         /*target=*/target,
         /*numThreads=*/dynamicNumThreads,
         /*tileSizes=*/ValueRange{},
-        /*staticNumThreads=*/staticNumThreadsAttr,
-        /*staticTileSizes=*/ArrayAttr(),
+        /*staticNumThreads=*/staticNumThreads,
+        /*staticTileSizes=*/ArrayRef<int64_t>(),
         /*mapping=*/mappingAttr);
 }
 
@@ -616,12 +614,14 @@
 
 SmallVector<OpFoldResult> transform_dialect::
     TileToForeachThreadAndWorkgroupCountRegionOp::getMixedNumThreads() {
-  return getMixedValues(getStaticNumThreads(), getNumThreads());
+  Builder b(getContext());
+  return getMixedValues(getStaticNumThreads(), getNumThreads(), b);
 }
 
 SmallVector<OpFoldResult> transform_dialect::
     TileToForeachThreadAndWorkgroupCountRegionOp::getMixedTileSizes() {
-  return getMixedValues(getStaticTileSizes(), getTileSizes());
+  Builder b(getContext());
+  return getMixedValues(getStaticTileSizes(), getTileSizes(), b);
 }
 
 LogicalResult
diff --git a/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensionsOps.td b/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensionsOps.td
index 4230d77..c95535a 100644
--- a/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensionsOps.td
+++ b/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensionsOps.td
@@ -246,8 +246,8 @@
   let arguments = (ins PDL_Operation:$target,
                    Variadic<PDL_Operation>:$num_threads,
                    Variadic<PDL_Operation>:$tile_sizes,
-                   DefaultValuedAttr<I64ArrayAttr, "{}">:$static_num_threads,
-                   DefaultValuedAttr<I64ArrayAttr, "{}">:$static_tile_sizes,
+                   DefaultValuedAttr<DenseI64ArrayAttr, "{}">:$static_num_threads,
+                   DefaultValuedAttr<DenseI64ArrayAttr, "{}">:$static_tile_sizes,
                    OptionalAttr<DeviceMappingArrayAttr>:$mapping);
   let results = (outs PDL_Operation:$foreach_thread_op,
                       PDL_Operation:$tiled_op);
diff --git a/compiler/src/iree/compiler/Codegen/Common/TypePropagationPass.cpp b/compiler/src/iree/compiler/Codegen/Common/TypePropagationPass.cpp
index e59a47e..92ff157 100644
--- a/compiler/src/iree/compiler/Codegen/Common/TypePropagationPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/TypePropagationPass.cpp
@@ -175,8 +175,8 @@
 
     // 3. Create a clone of the operation without cloning its regions.
     auto linalgOp = cast<linalg::LinalgOp>(genericOp.getOperation());
-    auto modifiedOp = cast<linalg::LinalgOp>(linalgOp.cloneWithoutRegions(
-        rewriter, genericOp.getLoc(), resultTypes, adaptor.getOperands()));
+    auto modifiedOp = cast<linalg::LinalgOp>(mlir::cloneWithoutRegions(
+        rewriter, linalgOp, resultTypes, adaptor.getOperands()));
 
     if (genericOp->getNumRegions() != 1) {
       return genericOp.emitOpError("unhanled linalg op with numRegions != 1");
diff --git a/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp b/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp
index f46827c..77fe735 100644
--- a/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp
+++ b/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp
@@ -288,8 +288,8 @@
   // Clone the op, but use the new operands. Move the existing block into the
   // new op. Since the new op does not have any tensor results, it does not
   // return anything.
-  auto newOp = cast<IREE::LinalgExt::LinalgExtOp>(dspOp.cloneWithoutRegions(
-      rewriter, op.getLoc(), /*resultTypes=*/TypeRange{}, newOperands));
+  auto newOp = cast<IREE::LinalgExt::LinalgExtOp>(mlir::cloneWithoutRegions(
+      rewriter, op, /*resultTypes=*/TypeRange{}, newOperands));
   int64_t numRegions = op->getNumRegions();
   for (int64_t i = 0; i < numRegions; ++i) {
     rewriter.inlineRegionBefore(op->getRegion(i), newOp->getRegion(i),
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
index fac8530..b0aa226 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
@@ -1058,7 +1058,7 @@
 
   // Fixup for making tileSizes be multiple of inner_tile_sizes.
   SmallVector<int64_t> innerTiles = op.getStaticTiles();
-  SmallVector<int64_t> dimPos = extractFromI64ArrayAttr(op.getInnerDimsPos());
+  ArrayRef<int64_t> dimPos = op.getInnerDimsPos();
   for (auto it : llvm::zip(dimPos, innerTiles)) {
     int64_t pos = std::get<0>(it);
     int64_t size = std::get<1>(it);
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorPad.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorPad.cpp
index 3682d86..4dc1b9f 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorPad.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorPad.cpp
@@ -76,7 +76,7 @@
   auto resultTensorTypes = ValueRange(paddedOperands)
                                .take_back(linalgOp.getNumDpsInits())
                                .getTypes();
-  paddedOp = linalgOp.clone(rewriter, loc, resultTensorTypes, paddedOperands);
+  paddedOp = mlir::clone(rewriter, linalgOp, resultTensorTypes, paddedOperands);
 
   // Slice out the original shape from the padded result to pass on to
   // consumers. The original linalg op is used to provide the dims for the reify
diff --git a/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp b/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp
index a53f5f2..bbd7cd8 100644
--- a/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp
+++ b/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp
@@ -666,9 +666,8 @@
     builder.setInsertionPoint(subviewUse);
     Type newType = memref::SubViewOp::inferRankReducedResultType(
         subviewUse.getType().getShape(), val.getType().cast<MemRefType>(),
-        extractFromI64ArrayAttr(subviewUse.getStaticOffsets()),
-        extractFromI64ArrayAttr(subviewUse.getStaticSizes()),
-        extractFromI64ArrayAttr(subviewUse.getStaticStrides()));
+        subviewUse.getStaticOffsets(), subviewUse.getStaticSizes(),
+        subviewUse.getStaticStrides());
     Value newSubview = builder.create<memref::SubViewOp>(
         subviewUse->getLoc(), newType.cast<MemRefType>(), val,
         subviewUse.getMixedOffsets(), subviewUse.getMixedSizes(),
diff --git a/compiler/src/iree/compiler/Codegen/VMVX/LowerLinalgMicrokernels.cpp b/compiler/src/iree/compiler/Codegen/VMVX/LowerLinalgMicrokernels.cpp
index 50bc697..492e3d0 100644
--- a/compiler/src/iree/compiler/Codegen/VMVX/LowerLinalgMicrokernels.cpp
+++ b/compiler/src/iree/compiler/Codegen/VMVX/LowerLinalgMicrokernels.cpp
@@ -923,15 +923,17 @@
     }
 
     int64_t innerDimsPos[2] = {0, 1};
-    if (ArrayAttr innerDimsPosAttr = op.getInnerDimsPosAttr()) {
-      innerDimsPos[0] = innerDimsPosAttr[0].cast<IntegerAttr>().getInt();
-      innerDimsPos[1] = innerDimsPosAttr[1].cast<IntegerAttr>().getInt();
+    ArrayRef<int64_t> innerDimsPosArr = op.getInnerDimsPos();
+    if (!innerDimsPosArr.empty()) {
+      innerDimsPos[0] = innerDimsPosArr[0];
+      innerDimsPos[1] = innerDimsPosArr[1];
     }
 
     int64_t outerDimsPerm[2] = {0, 1};
-    if (ArrayAttr outerDimsPermAttr = op.getOuterDimsPermAttr()) {
-      outerDimsPerm[0] = outerDimsPermAttr[0].cast<IntegerAttr>().getInt();
-      outerDimsPerm[1] = outerDimsPermAttr[1].cast<IntegerAttr>().getInt();
+    ArrayRef<int64_t> outerDimsPosArr = op.getOuterDimsPerm();
+    if (!outerDimsPosArr.empty()) {
+      outerDimsPerm[0] = outerDimsPosArr[0];
+      outerDimsPerm[1] = outerDimsPosArr[1];
     }
 
     int flags = 0;
@@ -1059,15 +1061,17 @@
     }
 
     int64_t innerDimsPos[2] = {0, 1};
-    if (ArrayAttr innerDimsPosAttr = op.getInnerDimsPosAttr()) {
-      innerDimsPos[0] = innerDimsPosAttr[0].cast<IntegerAttr>().getInt();
-      innerDimsPos[1] = innerDimsPosAttr[1].cast<IntegerAttr>().getInt();
+    ArrayRef<int64_t> innerDimsPosArr = op.getInnerDimsPos();
+    if (!innerDimsPosArr.empty()) {
+      innerDimsPos[0] = innerDimsPosArr[0];
+      innerDimsPos[1] = innerDimsPosArr[1];
     }
 
     int64_t outerDimsPerm[2] = {0, 1};
-    if (ArrayAttr outerDimsPermAttr = op.getOuterDimsPermAttr()) {
-      outerDimsPerm[0] = outerDimsPermAttr[0].cast<IntegerAttr>().getInt();
-      outerDimsPerm[1] = outerDimsPermAttr[1].cast<IntegerAttr>().getInt();
+    ArrayRef<int64_t> outerDimsPosArr = op.getOuterDimsPerm();
+    if (!outerDimsPosArr.empty()) {
+      outerDimsPerm[0] = outerDimsPosArr[0];
+      outerDimsPerm[1] = outerDimsPosArr[1];
     }
 
     int flags = 0;
diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.cpp b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.cpp
index a596b13..fb100b4 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.cpp
@@ -582,9 +582,7 @@
                        ShapedType::kDynamic);
 
   build(builder, state, returnType, source, sourceDynamicDims, offsets, sizes,
-        strides, builder.getI64ArrayAttr(staticOffsets),
-        builder.getI64ArrayAttr(staticSizes),
-        builder.getI64ArrayAttr(staticStrides));
+        strides, staticOffsets, staticSizes, staticStrides);
   state.addAttributes(attributes);
 }
 
@@ -689,9 +687,7 @@
                        ShapedType::kDynamic);
 
   build(builder, state, ArrayRef<Type>(), value, target, targetDynamicDims,
-        offsets, sizes, strides, builder.getI64ArrayAttr(staticOffsets),
-        builder.getI64ArrayAttr(staticSizes),
-        builder.getI64ArrayAttr(staticStrides));
+        offsets, sizes, strides, staticOffsets, staticSizes, staticStrides);
   state.addAttributes(attributes);
 }
 
diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.td b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.td
index 19b3db6..3299fdd 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.td
+++ b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.td
@@ -384,9 +384,9 @@
     Variadic<Index>:$offsets,
     Variadic<Index>:$sizes,
     Variadic<Index>:$strides,
-    I64ArrayAttr:$static_offsets,
-    I64ArrayAttr:$static_sizes,
-    I64ArrayAttr:$static_strides
+    DenseI64ArrayAttr:$static_offsets,
+    DenseI64ArrayAttr:$static_sizes,
+    DenseI64ArrayAttr:$static_strides
   );
   let results = (outs
     AnyRankedTensor:$result
@@ -448,11 +448,8 @@
     // Workaround for OffsetSizeAndStrideOpInterface being incompatible with
     // prefixed accessors.
     OperandRange offsets() { return getOffsets(); }
-    ArrayAttr static_offsets() { return getStaticOffsets(); }
     OperandRange sizes() { return getSizes(); }
-    ArrayAttr static_sizes() { return getStaticSizes(); }
     OperandRange strides() { return getStrides(); }
-    ArrayAttr static_strides() { return getStaticStrides(); }
 
     /// Returns the type of the result based on the sizes.
     static RankedTensorType inferResultType
@@ -506,9 +503,9 @@
     Variadic<Index>:$offsets,
     Variadic<Index>:$sizes,
     Variadic<Index>:$strides,
-    I64ArrayAttr:$static_offsets,
-    I64ArrayAttr:$static_sizes,
-    I64ArrayAttr:$static_strides
+    DenseI64ArrayAttr:$static_offsets,
+    DenseI64ArrayAttr:$static_sizes,
+    DenseI64ArrayAttr:$static_strides
   );
   let results = (outs);
 
@@ -568,11 +565,8 @@
     // Workaround for OffsetSizeAndStrideOpInterface being incompatible with
     // prefixed accessors.
     OperandRange offsets() { return getOffsets(); }
-    ArrayAttr static_offsets() { return getStaticOffsets(); }
     OperandRange sizes() { return getSizes(); }
-    ArrayAttr static_sizes() { return getStaticSizes(); }
     OperandRange strides() { return getStrides(); }
-    ArrayAttr static_strides() { return getStaticStrides(); }
 
     ValueRange getOperandDynamicDims(unsigned idx) {
       return idx == 0 ? getSizes() : getTargetDims();
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/PadLinalgOps.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/PadLinalgOps.cpp
index a1a9e72..5544486 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/PadLinalgOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/PadLinalgOps.cpp
@@ -113,8 +113,8 @@
     // Padding for K-dim doesn't change result size.
     if (paddingForM == 0 && paddingForN == 0) {
       auto paddedMatmulOp =
-          linalgOp.clone(rewriter, loc, {resultType},
-                         ArrayRef<Value>{paddedLhs, paddedRhs, result});
+          mlir::clone(rewriter, linalgOp, {resultType},
+                      ArrayRef<Value>{paddedLhs, paddedRhs, result});
       rewriter.replaceOp(linalgOp, paddedMatmulOp->getResults());
     } else {
       auto newResultType = RankedTensorType::get(
@@ -125,8 +125,8 @@
           loc, newResultType, result, createPadding({0, 0}),
           createPadding({paddingForM, paddingForN}), resultPaddingValue);
       auto paddedMatmulOp =
-          linalgOp.clone(rewriter, loc, {newResultType},
-                         ArrayRef<Value>{paddedLhs, paddedRhs, paddedResult});
+          mlir::clone(rewriter, linalgOp, {newResultType},
+                      ArrayRef<Value>{paddedLhs, paddedRhs, paddedResult});
 
       auto zero = rewriter.getI64IntegerAttr(0);
       auto one = rewriter.getI64IntegerAttr(1);
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/CUDA/CUDATarget.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/CUDA/CUDATarget.cpp
index 69caaca..17a8a9c 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/CUDA/CUDATarget.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/CUDA/CUDATarget.cpp
@@ -152,7 +152,7 @@
 
   llvm::PassInstrumentationCallbacks pic;
 
-  llvm::StandardInstrumentations si(false);
+  llvm::StandardInstrumentations si(module.getContext(), false);
   si.registerCallbacks(pic, &fam);
 
   llvm::PassBuilder pb(&targetMachine, pto, llvm::None, &pic);
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.cpp
index 9c83861..3414f05 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.cpp
@@ -52,6 +52,7 @@
 
   llvm::PassInstrumentationCallbacks passInstrumentationCallbacks;
   llvm::StandardInstrumentations standardInstrumentations(
+      module->getContext(),
       /*DebugLogging=*/false);
   standardInstrumentations.registerCallbacks(passInstrumentationCallbacks);
 
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilInterfaces.td b/compiler/src/iree/compiler/Dialect/Util/IR/UtilInterfaces.td
index 0857220..5dccf05 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/UtilInterfaces.td
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/UtilInterfaces.td
@@ -661,8 +661,8 @@
         auto resultIndex = result.cast<mlir::OpResult>().getResultNumber();
         auto operandIndex = cast<TiedOpInterface>($_op.getOperation())
             .getTiedResultOperandIndex(resultIndex);
-        return operandIndex.hasValue() ?
-            $_op.getOperand(operandIndex.getValue()) :
+        return operandIndex.has_value() ?
+            $_op.getOperand(operandIndex.value()) :
             nullptr;
       }]
     >,
diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/ImportUtils.h b/compiler/src/iree/compiler/Dialect/VM/Conversion/ImportUtils.h
index 47c8d73..949afbf 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/ImportUtils.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/ImportUtils.h
@@ -162,8 +162,8 @@
       ConversionPatternRewriter &rewriter) const override {
     auto results = rewriteToCall(op, adaptor, importOp,
                                  *this->getTypeConverter(), rewriter);
-    if (!results.hasValue()) return failure();
-    rewriter.replaceOp(op, results.getValue());
+    if (!results.has_value()) return failure();
+    rewriter.replaceOp(op, results.value());
     return success();
   }
 
diff --git a/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.td b/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.td
index 9b6e560..66411ad 100644
--- a/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.td
+++ b/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.td
@@ -354,10 +354,10 @@
       if (isMutable) {
         $_state.addAttribute("is_mutable", $_builder.getUnitAttr());
       }
-      if (initialValue.hasValue() &&
-                 (initialValue.getValue().isa<IntegerAttr>() ||
-                  initialValue.getValue().isa<FloatAttr>())) {
-        $_state.addAttribute("initial_value", initialValue.getValue());
+      if (initialValue.has_value() &&
+                 (initialValue.value().isa<IntegerAttr>() ||
+                  initialValue.value().isa<FloatAttr>())) {
+        $_state.addAttribute("initial_value", initialValue.value());
       }
       $_state.addAttribute("type", TypeAttr::get(type));
       $_state.attributes.append(attrs.begin(), attrs.end());
diff --git a/compiler/src/iree/compiler/InputConversion/MHLO/ConvertMHLOToLinalgExt.cpp b/compiler/src/iree/compiler/InputConversion/MHLO/ConvertMHLOToLinalgExt.cpp
index 230496a..0908fbd 100644
--- a/compiler/src/iree/compiler/InputConversion/MHLO/ConvertMHLOToLinalgExt.cpp
+++ b/compiler/src/iree/compiler/InputConversion/MHLO/ConvertMHLOToLinalgExt.cpp
@@ -246,8 +246,7 @@
 
     auto scatterOp = rewriter.create<IREE::LinalgExt::ScatterOp>(
         op.getLoc(), op->getResultTypes(), ValueRange{updates, indices},
-        ValueRange{original}, rewriter.getI64ArrayAttr(scatterDimMap),
-        op.getUniqueIndices());
+        ValueRange{original}, scatterDimMap, op.getUniqueIndices());
 
     rewriter.inlineRegionBefore(op.getUpdateComputation(),
                                 scatterOp.getRegion(),
diff --git a/compiler/src/iree/compiler/InputConversion/TMTensor/ConvertTMTensorToLinalgExt.cpp b/compiler/src/iree/compiler/InputConversion/TMTensor/ConvertTMTensorToLinalgExt.cpp
index 07dcc53..62ef491 100644
--- a/compiler/src/iree/compiler/InputConversion/TMTensor/ConvertTMTensorToLinalgExt.cpp
+++ b/compiler/src/iree/compiler/InputConversion/TMTensor/ConvertTMTensorToLinalgExt.cpp
@@ -59,7 +59,7 @@
 
     auto scatterOp = rewriter.create<IREE::LinalgExt::ScatterOp>(
         op.getLoc(), op->getResultTypes(), op.getInputs(), op.getOutputs(),
-        rewriter.getI64ArrayAttr(dimMap), op.getUniqueIndices());
+        dimMap, op.getUniqueIndices());
 
     rewriter.inlineRegionBefore(op.getRegion(), scatterOp.getRegion(),
                                 scatterOp.getRegion().begin());
diff --git a/integrations/tensorflow/WORKSPACE b/integrations/tensorflow/WORKSPACE
index e0dc7c8..71eb993 100644
--- a/integrations/tensorflow/WORKSPACE
+++ b/integrations/tensorflow/WORKSPACE
@@ -7,7 +7,7 @@
 
 load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")
 
-TENSORFLOW_COMMIT = "2358d6bfee56c96924f0a2d85c7792e877de6ce2"
+TENSORFLOW_COMMIT = "2116b012412b274f1471496ed15852175d90628b"
 
 git_repository(
     name = "org_tensorflow",
diff --git a/integrations/tensorflow/iree-dialects/include/iree-dialects/Dialect/LinalgExt/IR/LinalgExtOps.td b/integrations/tensorflow/iree-dialects/include/iree-dialects/Dialect/LinalgExt/IR/LinalgExtOps.td
index 1ba20dc..502d207 100644
--- a/integrations/tensorflow/iree-dialects/include/iree-dialects/Dialect/LinalgExt/IR/LinalgExtOps.td
+++ b/integrations/tensorflow/iree-dialects/include/iree-dialects/Dialect/LinalgExt/IR/LinalgExtOps.td
@@ -108,7 +108,7 @@
   let arguments = (ins
       Variadic<AnyRankedTensorOrMemRefType>:$inputs,
       Variadic<AnyRankedTensorOrMemRefType>:$outputs,
-      I64ArrayAttr:$dimension_map,
+      DenseI64ArrayAttr:$dimension_map,
       DefaultValuedAttr<BoolAttr, "true">:$unique_indices
   );
   let results = (outs Variadic<AnyRankedTensor>:$results);
@@ -163,10 +163,6 @@
       return getUpdateSliceRank() == 0;
     }
 
-    SmallVector<int64_t> dimensionMap() {
-      return extractFromI64ArrayAttr(getDimensionMap());
-    }
-
     // Method to implement for specifying output range for
     // DestinationStyleOpInterface
     std::pair<int64_t, int64_t> getDpsInitsPositionRange() {
@@ -578,10 +574,10 @@
 
   let arguments = (ins Variadic<AnyShaped>:$inputs,
     Variadic<AnyShaped>:$outputs,
-    DefaultValuedOptionalAttr<I64ArrayAttr, "{}">:$outer_dims_perm,
-    I64ArrayAttr:$inner_dims_pos,
+    DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$outer_dims_perm,
+    DenseI64ArrayAttr:$inner_dims_pos,
     Variadic<Index>:$inner_tiles,
-    I64ArrayAttr:$static_inner_tiles,
+    DenseI64ArrayAttr:$static_inner_tiles,
     Optional<AnyType>:$padding_value);
 
   let results = (outs Variadic<AnyRankedTensor>:$results);
@@ -732,10 +728,10 @@
 
   let arguments = (ins Variadic<AnyShaped>:$inputs,
     Variadic<AnyShaped>:$outputs,
-    DefaultValuedOptionalAttr<I64ArrayAttr, "{}">:$outer_dims_perm,
-    DefaultValuedAttr<I64ArrayAttr, "{}">:$inner_dims_pos,
+    DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$outer_dims_perm,
+    DefaultValuedAttr<DenseI64ArrayAttr, "{}">:$inner_dims_pos,
     Variadic<Index>:$inner_tiles,
-    I64ArrayAttr:$static_inner_tiles);
+    DenseI64ArrayAttr:$static_inner_tiles);
 
   let results = (outs Variadic<AnyRankedTensor>:$results);
   let assemblyFormat = [{
diff --git a/integrations/tensorflow/iree-dialects/lib/Dialect/LinalgExt/IR/LinalgExtOps.cpp b/integrations/tensorflow/iree-dialects/lib/Dialect/LinalgExt/IR/LinalgExtOps.cpp
index 436d2dc..6e0a36c 100644
--- a/integrations/tensorflow/iree-dialects/lib/Dialect/LinalgExt/IR/LinalgExtOps.cpp
+++ b/integrations/tensorflow/iree-dialects/lib/Dialect/LinalgExt/IR/LinalgExtOps.cpp
@@ -35,6 +35,7 @@
 #include "mlir/Support/LogicalResult.h"
 #include "mlir/Support/MathExtras.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/TypeSwitch.h"
@@ -161,7 +162,7 @@
     return op->emitOpError("expected index depth is static");
   }
 
-  auto dimMap = dimensionMap();
+  ArrayRef<int64_t> dimMap = getDimensionMap();
   if (dimMap.size() != indexDepth) {
     return op->emitOpError("invalid number of dimension map entries ");
   }
@@ -329,9 +330,8 @@
     resultTypes.push_back(tiledOriginal.getType());
   }
   Operation *tiledScatterOp =
-      cast<DestinationStyleOpInterface>(getOperation())
-          .clone(builder, loc, resultTypes,
-                 ValueRange{tiledUpdate, tiledIndices, tiledOriginal});
+      mlir::clone(builder, getOperation(), resultTypes,
+                  ValueRange{tiledUpdate, tiledIndices, tiledOriginal});
   return {tiledScatterOp};
 }
 
@@ -378,7 +378,7 @@
     starts[it.index() + offset] = it.value();
   }
 
-  auto dimMap = dimensionMap();
+  ArrayRef<int64_t> dimMap = getDimensionMap();
 
   for (auto i : llvm::seq<unsigned>(0, indexDepth)) {
     loadIndices.back() = b.create<arith::ConstantIndexOp>(loc, i);
@@ -509,7 +509,6 @@
          sizes.size() == static_cast<size_t>(rank));
   auto oneAttr = builder.getI64IntegerAttr(1);
   SmallVector<OpFoldResult> strides(rank, oneAttr);
-  Location loc = getLoc();
   SmallVector<Value> tiledOperands(getOutputs().size());
   for (auto en : llvm::enumerate(getOutputs())) {
     tiledOperands[en.index()] =
@@ -521,8 +520,8 @@
     resultTypes = llvm::to_vector<4>(
         llvm::map_range(tiledOperands, [&](Value v) { return v.getType(); }));
   }
-  Operation *tiledSortOp = cast<DestinationStyleOpInterface>(getOperation())
-                               .clone(builder, loc, resultTypes, tiledOperands);
+  Operation *tiledSortOp =
+      mlir::clone(builder, getOperation(), resultTypes, tiledOperands);
   return {tiledSortOp};
 }
 
@@ -830,7 +829,6 @@
                               ArrayRef<OpFoldResult> sizes) {
   int64_t rank = getOperandRank();
   SmallVector<OpFoldResult> strides(rank, builder.getI64IntegerAttr(1));
-  Location loc = getLoc();
   SmallVector<Value> tiledOperands(3);
   tiledOperands[0] = getStage();
   tiledOperands[1] = getRealCoeff();
@@ -844,8 +842,8 @@
       resultTypes.push_back(tiledOperands.back().getType());
     }
   }
-  Operation *tiledFftOp = cast<DestinationStyleOpInterface>(getOperation())
-                              .clone(builder, loc, resultTypes, tiledOperands);
+  Operation *tiledFftOp =
+      mlir::clone(builder, getOperation(), resultTypes, tiledOperands);
   return {tiledFftOp};
 }
 
@@ -1033,7 +1031,6 @@
          sizes.size() == static_cast<size_t>(rank));
   auto oneAttr = builder.getI64IntegerAttr(1);
   SmallVector<OpFoldResult> strides(rank, oneAttr);
-  Location loc = getLoc();
   SmallVector<Value> tiledOperands;
   tiledOperands.emplace_back(
       getSlice(builder, getLoc(), input(), offsets, sizes, strides));
@@ -1059,8 +1056,8 @@
     resultTypes.push_back(tiledOperands[2].getType());
   }
 
-  Operation *tiledScanOp = cast<DestinationStyleOpInterface>(getOperation())
-                               .clone(builder, loc, resultTypes, tiledOperands);
+  Operation *tiledScanOp =
+      mlir::clone(builder, getOperation(), resultTypes, tiledOperands);
   return {tiledScanOp};
 }
 
@@ -1208,8 +1205,8 @@
         getSlice(builder, loc, output(), mirrorOffsets, sizes, strides));
   }
 
-  Operation *tiledRevOp = cast<DestinationStyleOpInterface>(getOperation())
-                              .clone(builder, loc, resultTypes, tiledOperands);
+  Operation *tiledRevOp =
+      mlir::clone(builder, getOperation(), resultTypes, tiledOperands);
 
   return {tiledRevOp};
 }
@@ -1485,8 +1482,8 @@
     resultTypes.push_back(tiledOperands[tiledOperands.size() - 1].getType());
   }
 
-  Operation *tiledTopkOp = cast<DestinationStyleOpInterface>(getOperation())
-                               .clone(builder, loc, resultTypes, tiledOperands);
+  Operation *tiledTopkOp =
+      mlir::clone(builder, getOperation(), resultTypes, tiledOperands);
   return {tiledTopkOp};
 }
 
@@ -1549,10 +1546,10 @@
                 "applies to only pack or unpack operations");
   SmallVector<OpFoldResult> mixedInnerTiles;
   unsigned dynamicValIndex = 0;
-  for (Attribute attr : op.getStaticInnerTiles()) {
-    auto tileAttr = attr.cast<IntegerAttr>();
-    if (!ShapedType::isDynamic(tileAttr.getInt()))
-      mixedInnerTiles.push_back(tileAttr);
+  OpBuilder b(op.getContext());
+  for (int64_t tileSize : op.getStaticInnerTiles()) {
+    if (!ShapedType::isDynamic(tileSize))
+      mixedInnerTiles.push_back(b.getIndexAttr(tileSize));
     else
       mixedInnerTiles.push_back(op.getInnerTiles()[dynamicValIndex++]);
   }
@@ -1580,8 +1577,7 @@
   static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
                 "applies to only pack or unpack operations");
   DenseMap<int64_t, OpFoldResult> dimAndTileMapping;
-  SmallVector<int64_t> dimsToBlock =
-      extractFromI64ArrayAttr(op.getInnerDimsPos());
+  ArrayRef<int64_t> dimsToBlock = op.getInnerDimsPos();
   SmallVector<OpFoldResult> tiles = op.getMixedTiles();
   assert(tiles.size() == dimsToBlock.size() &&
          "tiles must match indices of dimension to block");
@@ -1623,10 +1619,8 @@
                                 ? packOrUnPack.getInputType()
                                 : packOrUnPack.getOutputType();
   int64_t unpackedRank = unpackedType.getRank();
-  SmallVector<int64_t> innerDimsPos =
-      extractFromI64ArrayAttr(packOrUnPack.getInnerDimsPos());
-  SmallVector<int64_t> outerDimPerm =
-      extractFromI64ArrayAttr(packOrUnPack.getOuterDimsPerm());
+  ArrayRef<int64_t> innerDimsPos = packOrUnPack.getInnerDimsPos();
+  ArrayRef<int64_t> outerDimPerm = packOrUnPack.getOuterDimsPerm();
   // Verify tiles. Make sure each provided tile is non-zero.
   SmallVector<OpFoldResult> mixedTiles = packOrUnPack.getMixedTiles();
   if (hasZeros(mixedTiles))
@@ -1712,11 +1706,8 @@
   SmallVector<Value> dynamicTileSizes;
   dispatchIndexOpFoldResults(innerTiles, dynamicTileSizes, staticTileSizes,
                              ShapedType::kDynamic);
-  build(builder, state, output.getType(), source, output,
-        outerDimsPerm.empty() ? nullptr
-                              : builder.getI64ArrayAttr(outerDimsPerm),
-        builder.getI64ArrayAttr(innerDimsPos), dynamicTileSizes,
-        builder.getI64ArrayAttr(staticTileSizes),
+  build(builder, state, output.getType(), source, output, outerDimsPerm,
+        innerDimsPos, dynamicTileSizes, staticTileSizes,
         (paddingValue ? paddingValue.value() : nullptr));
 }
 
@@ -1841,10 +1832,8 @@
   // the point loop? However, if we interchange `ivs` once more to go to the
   // canonical blocking format: ABCabc, this connection becomes trivial: Each
   // point loop is pointLoopsOffset + inputRank away from the tiled loop.
-  SmallVector<int64_t> dimsToInnerBlock =
-      extractFromI64ArrayAttr(packOp.getInnerDimsPos());
-  SmallVector<int64_t> dimsToOuterBlock =
-      extractFromI64ArrayAttr(packOp.getOuterDimsPerm());
+  ArrayRef<int64_t> dimsToInnerBlock = packOp.getInnerDimsPos();
+  ArrayRef<int64_t> dimsToOuterBlock = packOp.getOuterDimsPerm();
 
   SmallVector<Value> interchangedIvs = ivs;
   SmallVector<int64_t> interchangeVector =
@@ -1979,8 +1968,7 @@
 
   // The tiling is applied on interchanged dimensions. We have to undo the
   // interchange to map sizes and offsets to the original input.
-  SmallVector<int64_t> dimsToOuterBlock =
-      extractFromI64ArrayAttr(getOuterDimsPerm());
+  ArrayRef<int64_t> dimsToOuterBlock = getOuterDimsPerm();
   SmallVector<OpFoldResult> origOffsets(offsets.begin(), offsets.end());
   SmallVector<OpFoldResult> origSizes(sizes.begin(), sizes.end());
   if (!dimsToOuterBlock.empty()) {
@@ -2051,8 +2039,7 @@
   }
 
   Operation *tiledPackOp =
-      cast<DestinationStyleOpInterface>(getOperation())
-          .clone(builder, loc, tiledResultTypes, tiledOperands);
+      mlir::clone(builder, getOperation(), tiledResultTypes, tiledOperands);
 
   return {tiledPackOp};
 }
@@ -2109,11 +2096,8 @@
   SmallVector<Value> dynamicTileSizes;
   dispatchIndexOpFoldResults(innerTiles, dynamicTileSizes, staticTileSizes,
                              ShapedType::kDynamic);
-  build(builder, state, output.getType(), source, output,
-        outerDimsPerm.empty() ? nullptr
-                              : builder.getI64ArrayAttr(outerDimsPerm),
-        builder.getI64ArrayAttr(innerDimsPos), dynamicTileSizes,
-        builder.getI64ArrayAttr(staticTileSizes));
+  build(builder, state, output.getType(), source, output, outerDimsPerm,
+        innerDimsPos, dynamicTileSizes, staticTileSizes);
 }
 
 SmallVector<OpFoldResult> UnPackOp::getMixedTiles() {
@@ -2167,14 +2151,14 @@
   assert(inputIvsPointLoops.size() + inputIvs.size() == getInputRank() &&
          "expect same number of iduction variables equals to input rank");
   // interchange the point loops induction variables based on `inner_dim_pos`.
-  SmallVector<int64_t> innerDims = extractFromI64ArrayAttr(getInnerDimsPos());
+  ArrayRef<int64_t> innerDims = getInnerDimsPos();
   SmallVector<int64_t> interchangeVector =
       computeInterchangeFromDimPos(innerDims, getOutputRank());
   SmallVector<Value> interchangedInputIvsPointLoops = inputIvsPointLoops;
   interchangedInputIvsPointLoops = interchange<Value>(
       interchangedInputIvsPointLoops, interchangeVector, /*offset=*/0);
   // interchange the tiled loops induction variables based on `outer_dims_perm`.
-  SmallVector<int64_t> outerDims = extractFromI64ArrayAttr(getOuterDimsPerm());
+  ArrayRef<int64_t> outerDims = getOuterDimsPerm();
   if (!outerDims.empty()) {
     inputIvs = interchange<Value>(inputIvs, outerDims, /*offset=*/0);
   }
@@ -2219,8 +2203,21 @@
   auto sub = [&](OpFoldResult v1, OpFoldResult v2) -> OpFoldResult {
     return makeComposedFoldedAffineApply(builder, loc, subMap, {v1, v2});
   };
+  auto ceilDiv = [&](OpFoldResult v1, OpFoldResult v2) -> OpFoldResult {
+    return makeComposedFoldedAffineApply(builder, loc, dim0.ceilDiv(dim1),
+                                         {v1, v2});
+  };
+  auto floorDiv = [&](OpFoldResult v1, OpFoldResult v2) -> OpFoldResult {
+    return makeComposedFoldedAffineApply(builder, loc, dim0.floorDiv(dim1),
+                                         {v1, v2});
+  };
 
-  int64_t inputRank = getInputRank();
+  // The perfect tiling case indicates that the tiling sizes is are multiple of
+  // inner_tile_size. In this context, The indices of input slice are all
+  // aligned to head. No extra data is needed when representing the tiled unpack
+  // op.
+  bool isPerfectTilingCase = true;
+
   int64_t outputRank = getOutputRank();
   Attribute zeroAttr = builder.getIndexAttr(0);
   Attribute oneAttr = builder.getIndexAttr(1);
@@ -2228,39 +2225,62 @@
   SmallVector<OpFoldResult> inputIndices, inputSizes, outputNewOffsets,
       outputExpandedSizes;
   for (auto dim : llvm::seq<int64_t>(0, outputRank)) {
-    if (dimAndTileMapping.count(dim)) {
-      DivModValue firstCoord =
-          getDivMod(builder, loc,
-                    getValueOrCreateConstantIndexOp(builder, loc, offsets[dim]),
-                    getValueOrCreateConstantIndexOp(builder, loc,
-                                                    dimAndTileMapping[dim]));
-      DivModValue lastCoord = getDivMod(
-          builder, loc,
-          getValueOrCreateConstantIndexOp(
-              builder, loc, sub(add(offsets[dim], sizes[dim]), oneAttr)),
-          getValueOrCreateConstantIndexOp(builder, loc,
-                                          dimAndTileMapping[dim]));
+    if (!dimAndTileMapping.count(dim)) {
+      inputIndices.push_back(offsets[dim]);
+      inputSizes.push_back(sizes[dim]);
+      outputNewOffsets.push_back(zeroAttr);
+      outputExpandedSizes.push_back(sizes[dim]);
+      continue;
+    }
 
-      inputIndices.push_back(firstCoord.quotient);
-
-      // Get the upper bound because it could be an extract_slice case. The
-      // sizes are determined by loop bound and step, where loop bound is the
-      // size of output shape.
-      // In incomplete tile cases, the input could have larger shape, it is safe
-      // to extend the boundary because they are pre-padded. I.e., the size of
-      // input dim is always aligned to inner_tile_size.
-      FailureOr<int64_t> cstSize = linalg::getConstantUpperBoundForIndex(
-          getValueOrCreateConstantIndexOp(builder, loc, sizes[dim]));
-      Optional<int64_t> cstInnerSize =
-          getConstantIntValue(dimAndTileMapping[dim]);
-      if (!failed(cstSize) && cstInnerSize &&
-          cstSize.value() % cstInnerSize.value() == 0) {
-        inputSizes.push_back(
-            builder.getIndexAttr(cstSize.value() / cstInnerSize.value()));
-      } else {
-        inputSizes.push_back(
-            add(sub(lastCoord.quotient, firstCoord.quotient), oneAttr));
+    FailureOr<int64_t> cstSize = linalg::getConstantUpperBoundForIndex(
+        getValueOrCreateConstantIndexOp(builder, loc, sizes[dim]));
+    Optional<int64_t> cstInnerSize =
+        getConstantIntValue(dimAndTileMapping[dim]);
+    bool isAlignedToInnerTileSize = false;
+    if (!failed(cstSize) && cstInnerSize) {
+      // If the tiling size equals to the inner tiling size, the outer dims are
+      // always 1.
+      if (cstInnerSize.value() == cstSize.value()) {
+        inputIndices.push_back(floorDiv(offsets[dim], dimAndTileMapping[dim]));
+        inputSizes.push_back(builder.getIndexAttr(1));
+        outputNewOffsets.push_back(zeroAttr);
+        outputExpandedSizes.push_back(sizes[dim]);
+        continue;
       }
+      if (cstSize.value() % cstInnerSize.value() == 0)
+        isAlignedToInnerTileSize = true;
+    }
+
+    if (!isAlignedToInnerTileSize)
+      isPerfectTilingCase = false;
+
+    DivModValue firstCoord = getDivMod(
+        builder, loc,
+        getValueOrCreateConstantIndexOp(builder, loc, offsets[dim]),
+        getValueOrCreateConstantIndexOp(builder, loc, dimAndTileMapping[dim]));
+    DivModValue lastCoord = getDivMod(
+        builder, loc,
+        getValueOrCreateConstantIndexOp(
+            builder, loc, sub(add(offsets[dim], sizes[dim]), oneAttr)),
+        getValueOrCreateConstantIndexOp(builder, loc, dimAndTileMapping[dim]));
+
+    if (isAlignedToInnerTileSize) {
+      inputIndices.push_back(floorDiv(offsets[dim], dimAndTileMapping[dim]));
+      outputNewOffsets.push_back(zeroAttr);
+      outputExpandedSizes.push_back(sizes[dim]);
+
+      // The ceilDiv is needed here because there could be incomplete tile even
+      // it is perfect tiling cases. E.g.,
+      //   %0 = unpack tensor<33x2xf32> into tensor<64xf32>
+      // If the tiling size is 32, there will be three tiles. Two of them have
+      // size=32; one of them have size=2. The size is represented using
+      // affine_min op; we need ceilDiv.
+      inputSizes.push_back(ceilDiv(sizes[dim], dimAndTileMapping[dim]));
+    } else {
+      inputIndices.push_back(firstCoord.quotient);
+      inputSizes.push_back(
+          add(sub(lastCoord.quotient, firstCoord.quotient), oneAttr));
       outputNewOffsets.push_back(firstCoord.remainder);
 
       AffineExpr i, tile;
@@ -2270,21 +2290,16 @@
           builder, loc, i * tile,
           ArrayRef<OpFoldResult>{inputSizes.back(), dimAndTileMapping[dim]});
       outputExpandedSizes.push_back(size);
-    } else {
-      inputIndices.push_back(offsets[dim]);
-      inputSizes.push_back(sizes[dim]);
-      outputNewOffsets.push_back(zeroAttr);
-      outputExpandedSizes.push_back(sizes[dim]);
     }
   }
 
   // The tiling is applied on output dimensions. We have to apply the
   // interchange on input dimensions if outer_dims_perm is set.
-  SmallVector<int64_t> dimsToOuterBlock =
-      extractFromI64ArrayAttr(getOuterDimsPerm());
+  int64_t inputRank = getInputRank();
+  ArrayRef<int64_t> dimsToOuterBlock = getOuterDimsPerm();
   if (!dimsToOuterBlock.empty()) {
     SmallVector<int64_t> vec =
-        computeInterchangeFromDimPos(dimsToOuterBlock, getInputRank());
+        computeInterchangeFromDimPos(dimsToOuterBlock, inputRank);
     inputIndices = interchange<OpFoldResult>(inputIndices, vec);
     inputSizes = interchange<OpFoldResult>(inputSizes, vec);
   }
@@ -2298,20 +2313,28 @@
   tiledOperands.push_back(getSlice(builder, loc, getInput(), inputIndices,
                                    inputSizes, inputStrides));
 
-  // The tiling is only avaiable on tensors. It's fine to create a tensor.empty
-  // instead of tensor.pad because the op is not a destination-style op.
-  auto empty = builder.create<tensor::EmptyOp>(
-      loc, outputExpandedSizes, getOutputType().getElementType());
-  tiledOperands.push_back(empty.getResult());
+  SmallVector<OpFoldResult> outputStrides(outputRank, oneAttr);
+  if (isPerfectTilingCase) {
+    tiledOperands.push_back(
+        getSlice(builder, loc, getOutput(), offsets, sizes, outputStrides));
+  } else {
+    // The tiling is only avaiable on tensors. It's fine to create a
+    // tensor.empty instead of tensor.pad because the op is not a
+    // destination-style op.
+    auto empty = builder.create<tensor::EmptyOp>(
+        loc, outputExpandedSizes, getOutputType().getElementType());
+    tiledOperands.push_back(empty.getResult());
+  }
 
   SmallVector<Type, 4> tiledResultTypes;
   tiledResultTypes.push_back(tiledOperands[1].getType());
 
   Operation *tiledUnpackOp =
-      cast<DestinationStyleOpInterface>(getOperation())
-          .clone(builder, loc, tiledResultTypes, tiledOperands);
+      mlir::clone(builder, getOperation(), tiledResultTypes, tiledOperands);
 
-  SmallVector<OpFoldResult> outputStrides(outputRank, oneAttr);
+  if (isPerfectTilingCase)
+    return {tiledUnpackOp};
+
   Operation *extractSlice = builder.create<tensor::ExtractSliceOp>(
       loc, tiledUnpackOp->getResult(0), outputNewOffsets, sizes, outputStrides);
 
@@ -2340,6 +2363,174 @@
   return iteratorTypes;
 }
 
+//===----------------------------------------------------------------------===//
+// WinogradInputTransformOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult WinogradInputTransformOp::verify() {
+  Operation *op = getOperation();
+  if (getNumInputs() != 1) {
+    return op->emitOpError("expected one input operand");
+  }
+  if (getNumOutputs() != 1) {
+    return op->emitOpError("expected one output operand");
+  }
+  auto inputType = input().getType().cast<ShapedType>();
+  auto outputType = output().getType().cast<ShapedType>();
+  ArrayRef<int64_t> inputShape = inputType.getShape();
+  if (inputShape.size() != 4) {
+    return op->emitOpError("expected input operand to have rank 4");
+  }
+  ArrayRef<int64_t> outputShape = outputType.getShape();
+  if (outputType.getElementType() != inputType.getElementType()) {
+    return op->emitOpError(
+        "expected input/output element types to be identical");
+  }
+  if (getOutputOperandRank() != getInputOperandRank() + 2) {
+    return op->emitOpError(
+        "expected output rank to be equal to input rank + 2");
+  }
+  const SmallVector<int64_t> imageDims = imageDimensions();
+  const size_t numImageDims = imageDims.size();
+  llvm::SmallSetVector<int64_t, 2> imageDimsSet(imageDims.begin(),
+                                                imageDims.end());
+  if (imageDims.size() != 2) {
+    return op->emitOpError("expected only 2 image dimensions");
+  }
+  for (auto dim : imageDims) {
+    if ((dim < 0) || (dim > 3)) {
+      return op->emitOpError(
+          "expect image dimensions to be in the range: [0, 3]");
+    }
+  }
+  const int64_t outputTileSize = getOutputTileSize();
+  const int64_t kernelSize = getKernelSize();
+  const int64_t inputTileSize = getInputTileSize();
+  SmallVector<int64_t> expectedOutputShape(getOutputOperandRank(),
+                                           inputTileSize);
+  int outputIndex;
+  for (int i = 0; i < inputShape.size(); i++) {
+    outputIndex = i + numImageDims;
+    if (ShapedType::isDynamic(inputShape[i])) {
+      expectedOutputShape[outputIndex] = inputShape[i];
+      continue;
+    }
+    if (!imageDimsSet.contains(i)) {
+      expectedOutputShape[outputIndex] = inputShape[i];
+    } else {
+      expectedOutputShape[outputIndex] =
+          std::ceil((float)(inputShape[i] - kernelSize + 1) / outputTileSize);
+    }
+  }
+  if (!areShapesCompatible(expectedOutputShape, outputShape)) {
+    return op->emitOpError("incompatible output shape");
+  }
+  return success();
+}
+
+SmallVector<Range>
+WinogradInputTransformOp::getIterationDomain(OpBuilder &builder) {
+  Location loc = getLoc();
+  Value zero = builder.create<arith::ConstantIndexOp>(loc, 0);
+  Value one = builder.create<arith::ConstantIndexOp>(loc, 1);
+  Value source = input();
+  SmallVector<int64_t> imageDims = imageDimensions();
+  llvm::SmallSetVector<int64_t, 2> imageDimsSet(imageDims.begin(),
+                                                imageDims.end());
+  SmallVector<Range> loopBounds(imageDims.size());
+  int count = 0;
+  for (auto dim : llvm::seq<int64_t>(0, getInputOperandRank())) {
+    if (!imageDimsSet.contains(dim)) {
+      loopBounds[count].offset = zero;
+      loopBounds[count].size = getDimValue(builder, loc, source, dim);
+      loopBounds[count].stride = one;
+      count++;
+    }
+  }
+  return loopBounds;
+}
+
+SmallVector<utils::IteratorType>
+WinogradInputTransformOp::getLoopIteratorTypes() {
+  SmallVector<utils::IteratorType> iteratorTypes(getIterationDomainRank(),
+                                                 utils::IteratorType::parallel);
+  return iteratorTypes;
+}
+
+SmallVector<Operation *>
+WinogradInputTransformOp::getTiledImplementation(OpBuilder &builder,
+                                                 ArrayRef<OpFoldResult> offsets,
+                                                 ArrayRef<OpFoldResult> sizes) {
+
+  Location loc = getLoc();
+  auto one = builder.getIndexAttr(1);
+  auto zero = builder.getIndexAttr(0);
+
+  assert(offsets.size() == 2);
+  SmallVector<OpFoldResult> inputOffsets(getInputOperandRank(), zero);
+  SmallVector<OpFoldResult> outputOffsets(getOutputOperandRank(), zero);
+  outputOffsets[2] = inputOffsets[0] = offsets[0];
+  outputOffsets[5] = inputOffsets[3] = offsets[1];
+
+  SmallVector<OpFoldResult> inputStrides(getInputOperandRank(), one);
+  SmallVector<OpFoldResult> outputStrides(getOutputOperandRank(), one);
+
+  assert(sizes.size() == 2);
+  auto inputShape = input().getType().cast<ShapedType>().getShape();
+  auto outputShape = output().getType().cast<ShapedType>().getShape();
+  SmallVector<OpFoldResult> inputSizes =
+      getAsOpFoldResult(builder.getIndexArrayAttr(inputShape));
+  SmallVector<OpFoldResult> outputSizes =
+      getAsOpFoldResult(builder.getIndexArrayAttr(outputShape));
+  outputSizes[2] = inputSizes[0] = sizes[0];
+  outputSizes[5] = inputSizes[3] = sizes[1];
+
+  SmallVector<Value> tiledOperands;
+  tiledOperands.emplace_back(
+      getSlice(builder, loc, input(), inputOffsets, inputSizes, inputStrides));
+  tiledOperands.emplace_back(getSlice(builder, loc, output(), outputOffsets,
+                                      outputSizes, outputStrides));
+
+  SmallVector<Type, 4> resultTypes;
+  if (hasTensorSemantics()) {
+    resultTypes.push_back(tiledOperands[1].getType());
+  }
+
+  Operation *tiledOp =
+      mlir::clone(builder, getOperation(), resultTypes, tiledOperands);
+
+  return {tiledOp};
+}
+
+LogicalResult WinogradInputTransformOp::getResultTilePosition(
+    OpBuilder &builder, unsigned resultNumber, ArrayRef<OpFoldResult> offsets,
+    ArrayRef<OpFoldResult> sizes, SmallVector<OpFoldResult> &resultOffsets,
+    SmallVector<OpFoldResult> &resultSizes) {
+  if (resultNumber == 0) {
+    auto resultShape = output().getType().cast<ShapedType>().getShape();
+    resultSizes = getAsOpFoldResult(builder.getIndexArrayAttr(resultShape));
+    resultOffsets = SmallVector<OpFoldResult>(getOutputOperandRank(),
+                                              builder.getIndexAttr(0));
+    resultOffsets[2] = offsets[0];
+    resultOffsets[5] = offsets[1];
+    resultSizes[2] = sizes[0];
+    resultSizes[5] = sizes[1];
+    return success();
+  }
+  return failure();
+}
+
+LogicalResult WinogradInputTransformOp::fold(ArrayRef<Attribute>,
+                                             SmallVectorImpl<OpFoldResult> &) {
+  return memref::foldMemRefCast(*this);
+}
+
+LogicalResult WinogradInputTransformOp::reifyResultShapes(
+    OpBuilder &b, ReifiedRankedShapedTypeDims &reifiedReturnShapes) {
+  return cast<LinalgExtOp>(getOperation())
+      .reifyResultShapes(b, reifiedReturnShapes);
+}
+
 #define DEFINE_OP_GET_EFFECTS(OP_NAME)                                         \
   void OP_NAME::getEffects(                                                    \
       SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>      \
@@ -2358,6 +2549,7 @@
 DEFINE_OP_GET_EFFECTS(TopkOp)
 DEFINE_OP_GET_EFFECTS(PackOp)
 DEFINE_OP_GET_EFFECTS(UnPackOp)
+DEFINE_OP_GET_EFFECTS(WinogradInputTransformOp)
 
 //===----------------------------------------------------------------------===//
 // iree_linalg_ext.set_encoding
@@ -2487,9 +2679,7 @@
                                 : opOperand->get());
     }
     // Clone op.
-    Operation *newOp =
-        cast<DestinationStyleOpInterface>(op.getOperation())
-            .clone(rewriter, op->getLoc(), newResultTypes, newOperands);
+    Operation *newOp = mlir::clone(rewriter, op, newResultTypes, newOperands);
     SmallVector<Value, 4> replacements;
     replacements.reserve(newOp->getNumResults());
     for (auto result : llvm::zip(op->getResults(), newOp->getResults())) {
diff --git a/integrations/tensorflow/iree-dialects/lib/Dialect/LinalgExt/Passes/FoldIntoPackAndUnpackOps.cpp b/integrations/tensorflow/iree-dialects/lib/Dialect/LinalgExt/Passes/FoldIntoPackAndUnpackOps.cpp
index 5e2f887..0713ce6 100644
--- a/integrations/tensorflow/iree-dialects/lib/Dialect/LinalgExt/Passes/FoldIntoPackAndUnpackOps.cpp
+++ b/integrations/tensorflow/iree-dialects/lib/Dialect/LinalgExt/Passes/FoldIntoPackAndUnpackOps.cpp
@@ -54,10 +54,8 @@
         sliceOp.getLoc(), sliceOp.getMixedSizes(), elementType);
     rewriter.replaceOpWithNewOp<UnPackOp>(
         sliceOp, output.getType(), unpackOp.getInput(), output,
-        unpackOp.getOuterDimsPerm().empty() ? nullptr
-                                            : unpackOp.getOuterDimsPerm(),
-        unpackOp.getInnerDimsPos(), unpackOp.getInnerTiles(),
-        unpackOp.getStaticInnerTiles());
+        unpackOp.getOuterDimsPerm(), unpackOp.getInnerDimsPos(),
+        unpackOp.getInnerTiles(), unpackOp.getStaticInnerTiles());
     return success();
   }
 };
diff --git a/integrations/tensorflow/iree-dialects/lib/Dialect/LinalgExt/Transforms/Transforms.cpp b/integrations/tensorflow/iree-dialects/lib/Dialect/LinalgExt/Transforms/Transforms.cpp
index 24d5c43..58e892f 100644
--- a/integrations/tensorflow/iree-dialects/lib/Dialect/LinalgExt/Transforms/Transforms.cpp
+++ b/integrations/tensorflow/iree-dialects/lib/Dialect/LinalgExt/Transforms/Transforms.cpp
@@ -896,12 +896,11 @@
     }
     // The dimensions map in the order of output dimensions. Since the
     // interchange is applied, we have to undo it for input.
-    if (auto outerDims = packOp.getOuterDimsPerm()) {
-      inputExprs = undoInterchange<AffineExpr>(
-          inputExprs, extractFromI64ArrayAttr(outerDims));
+    if (!packOp.getOuterDimsPerm().empty()) {
+      inputExprs =
+          undoInterchange<AffineExpr>(inputExprs, packOp.getOuterDimsPerm());
     }
-    for (auto en :
-         llvm::enumerate(extractFromI64ArrayAttr(packOp.getInnerDimsPos()))) {
+    for (auto en : llvm::enumerate(packOp.getInnerDimsPos())) {
       inputExprs[en.value()] =
           rewriter.getAffineDimExpr(inputRank + en.index());
     }
@@ -967,8 +966,7 @@
         loc, readType, unpackOp.getInput(), readOffsets, readSizes,
         readStrides);
 
-    SmallVector<int64_t> innerDimsPos =
-        extractFromI64ArrayAttr(unpackOp.getInnerDimsPos());
+    ArrayRef<int64_t> innerDimsPos = unpackOp.getInnerDimsPos();
     auto interchangeVector =
         computeInterchangeFromDimPos(innerDimsPos, outputRank);
     SmallVector<int64_t> transpShape =
diff --git a/integrations/tensorflow/iree-dialects/test/Dialect/linalg_transform/drop-schedule.mlir b/integrations/tensorflow/iree-dialects/test/Dialect/linalg_transform/drop-schedule.mlir
index 7d0a9a7..de014ea 100644
--- a/integrations/tensorflow/iree-dialects/test/Dialect/linalg_transform/drop-schedule.mlir
+++ b/integrations/tensorflow/iree-dialects/test/Dialect/linalg_transform/drop-schedule.mlir
@@ -26,6 +26,6 @@
   transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%arg1: !pdl.operation):
     %0 = pdl_match @pdl_target in %arg1 : (!pdl.operation) -> !pdl.operation
-    transform.structured.tile %0 [4, 4, 4] {pad = false}
+    transform.structured.tile %0 [4, 4, 4]
   }
 }
diff --git a/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/LinalgExt/IR/LinalgExtOps.td b/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/LinalgExt/IR/LinalgExtOps.td
index 549811a..8fe7383 100644
--- a/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/LinalgExt/IR/LinalgExtOps.td
+++ b/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/LinalgExt/IR/LinalgExtOps.td
@@ -108,7 +108,7 @@
   let arguments = (ins
       Variadic<AnyRankedTensorOrMemRefType>:$inputs,
       Variadic<AnyRankedTensorOrMemRefType>:$outputs,
-      I64ArrayAttr:$dimension_map,
+      DenseI64ArrayAttr:$dimension_map,
       DefaultValuedAttr<BoolAttr, "true">:$unique_indices
   );
   let results = (outs Variadic<AnyRankedTensor>:$results);
@@ -163,10 +163,6 @@
       return getUpdateSliceRank() == 0;
     }
 
-    SmallVector<int64_t> dimensionMap() {
-      return extractFromI64ArrayAttr(getDimensionMap());
-    }
-
     // Method to implement for specifying output range for
     // DestinationStyleOpInterface
     std::pair<int64_t, int64_t> getDpsInitsPositionRange() {
@@ -578,10 +574,10 @@
 
   let arguments = (ins Variadic<AnyShaped>:$inputs,
     Variadic<AnyShaped>:$outputs,
-    DefaultValuedOptionalAttr<I64ArrayAttr, "{}">:$outer_dims_perm,
-    I64ArrayAttr:$inner_dims_pos,
+    DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$outer_dims_perm,
+    DenseI64ArrayAttr:$inner_dims_pos,
     Variadic<Index>:$inner_tiles,
-    I64ArrayAttr:$static_inner_tiles,
+    DenseI64ArrayAttr:$static_inner_tiles,
     Optional<AnyType>:$padding_value);
 
   let results = (outs Variadic<AnyRankedTensor>:$results);
@@ -731,10 +727,10 @@
 
   let arguments = (ins Variadic<AnyShaped>:$inputs,
     Variadic<AnyShaped>:$outputs,
-    DefaultValuedOptionalAttr<I64ArrayAttr, "{}">:$outer_dims_perm,
-    DefaultValuedAttr<I64ArrayAttr, "{}">:$inner_dims_pos,
+    DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$outer_dims_perm,
+    DefaultValuedAttr<DenseI64ArrayAttr, "{}">:$inner_dims_pos,
     Variadic<Index>:$inner_tiles,
-    I64ArrayAttr:$static_inner_tiles);
+    DenseI64ArrayAttr:$static_inner_tiles);
 
   let results = (outs Variadic<AnyRankedTensor>:$results);
   let assemblyFormat = [{
@@ -917,7 +913,7 @@
                        Variadic<AnyShaped>:$outputs,
                        I64Attr:$output_tile_size,
                        I64Attr:$kernel_size,
-                       I64ArrayAttr:$image_dimensions
+                       DenseI64ArrayAttr:$image_dimensions
   );
 
   let builders = [
@@ -961,7 +957,7 @@
       return getOutputTileSize() + getKernelSize() - 1;
     }
     SmallVector<int64_t> imageDimensions() {
-      return extractFromI64ArrayAttr(getImageDimensions());
+      return llvm::to_vector(getImageDimensions());
     }
     int64_t getIterationDomainRank() {
       SmallVector<int64_t> imageDims = imageDimensions();
diff --git a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgExt/IR/LinalgExtOps.cpp b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgExt/IR/LinalgExtOps.cpp
index 7c31937..d12c51e 100644
--- a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgExt/IR/LinalgExtOps.cpp
+++ b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgExt/IR/LinalgExtOps.cpp
@@ -162,7 +162,7 @@
     return op->emitOpError("expected index depth is static");
   }
 
-  auto dimMap = dimensionMap();
+  ArrayRef<int64_t> dimMap = getDimensionMap();
   if (dimMap.size() != indexDepth) {
     return op->emitOpError("invalid number of dimension map entries ");
   }
@@ -330,9 +330,8 @@
     resultTypes.push_back(tiledOriginal.getType());
   }
   Operation *tiledScatterOp =
-      cast<DestinationStyleOpInterface>(getOperation())
-          .clone(builder, loc, resultTypes,
-                 ValueRange{tiledUpdate, tiledIndices, tiledOriginal});
+      mlir::clone(builder, getOperation(), resultTypes,
+                  ValueRange{tiledUpdate, tiledIndices, tiledOriginal});
   return {tiledScatterOp};
 }
 
@@ -379,7 +378,7 @@
     starts[it.index() + offset] = it.value();
   }
 
-  auto dimMap = dimensionMap();
+  ArrayRef<int64_t> dimMap = getDimensionMap();
 
   for (auto i : llvm::seq<unsigned>(0, indexDepth)) {
     loadIndices.back() = b.create<arith::ConstantIndexOp>(loc, i);
@@ -510,7 +509,6 @@
          sizes.size() == static_cast<size_t>(rank));
   auto oneAttr = builder.getI64IntegerAttr(1);
   SmallVector<OpFoldResult> strides(rank, oneAttr);
-  Location loc = getLoc();
   SmallVector<Value> tiledOperands(getOutputs().size());
   for (auto en : llvm::enumerate(getOutputs())) {
     tiledOperands[en.index()] =
@@ -522,8 +520,8 @@
     resultTypes = llvm::to_vector<4>(
         llvm::map_range(tiledOperands, [&](Value v) { return v.getType(); }));
   }
-  Operation *tiledSortOp = cast<DestinationStyleOpInterface>(getOperation())
-                               .clone(builder, loc, resultTypes, tiledOperands);
+  Operation *tiledSortOp =
+      mlir::clone(builder, getOperation(), resultTypes, tiledOperands);
   return {tiledSortOp};
 }
 
@@ -831,7 +829,6 @@
                               ArrayRef<OpFoldResult> sizes) {
   int64_t rank = getOperandRank();
   SmallVector<OpFoldResult> strides(rank, builder.getI64IntegerAttr(1));
-  Location loc = getLoc();
   SmallVector<Value> tiledOperands(3);
   tiledOperands[0] = getStage();
   tiledOperands[1] = getRealCoeff();
@@ -845,8 +842,8 @@
       resultTypes.push_back(tiledOperands.back().getType());
     }
   }
-  Operation *tiledFftOp = cast<DestinationStyleOpInterface>(getOperation())
-                              .clone(builder, loc, resultTypes, tiledOperands);
+  Operation *tiledFftOp =
+      mlir::clone(builder, getOperation(), resultTypes, tiledOperands);
   return {tiledFftOp};
 }
 
@@ -1034,7 +1031,6 @@
          sizes.size() == static_cast<size_t>(rank));
   auto oneAttr = builder.getI64IntegerAttr(1);
   SmallVector<OpFoldResult> strides(rank, oneAttr);
-  Location loc = getLoc();
   SmallVector<Value> tiledOperands;
   tiledOperands.emplace_back(
       getSlice(builder, getLoc(), input(), offsets, sizes, strides));
@@ -1060,8 +1056,8 @@
     resultTypes.push_back(tiledOperands[2].getType());
   }
 
-  Operation *tiledScanOp = cast<DestinationStyleOpInterface>(getOperation())
-                               .clone(builder, loc, resultTypes, tiledOperands);
+  Operation *tiledScanOp =
+      mlir::clone(builder, getOperation(), resultTypes, tiledOperands);
   return {tiledScanOp};
 }
 
@@ -1209,8 +1205,8 @@
         getSlice(builder, loc, output(), mirrorOffsets, sizes, strides));
   }
 
-  Operation *tiledRevOp = cast<DestinationStyleOpInterface>(getOperation())
-                              .clone(builder, loc, resultTypes, tiledOperands);
+  Operation *tiledRevOp =
+      mlir::clone(builder, getOperation(), resultTypes, tiledOperands);
 
   return {tiledRevOp};
 }
@@ -1486,8 +1482,8 @@
     resultTypes.push_back(tiledOperands[tiledOperands.size() - 1].getType());
   }
 
-  Operation *tiledTopkOp = cast<DestinationStyleOpInterface>(getOperation())
-                               .clone(builder, loc, resultTypes, tiledOperands);
+  Operation *tiledTopkOp =
+      mlir::clone(builder, getOperation(), resultTypes, tiledOperands);
   return {tiledTopkOp};
 }
 
@@ -1550,10 +1546,10 @@
                 "applies to only pack or unpack operations");
   SmallVector<OpFoldResult> mixedInnerTiles;
   unsigned dynamicValIndex = 0;
-  for (Attribute attr : op.getStaticInnerTiles()) {
-    auto tileAttr = attr.cast<IntegerAttr>();
-    if (!ShapedType::isDynamic(tileAttr.getInt()))
-      mixedInnerTiles.push_back(tileAttr);
+  OpBuilder b(op.getContext());
+  for (int64_t tileSize : op.getStaticInnerTiles()) {
+    if (!ShapedType::isDynamic(tileSize))
+      mixedInnerTiles.push_back(b.getIndexAttr(tileSize));
     else
       mixedInnerTiles.push_back(op.getInnerTiles()[dynamicValIndex++]);
   }
@@ -1581,8 +1577,7 @@
   static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
                 "applies to only pack or unpack operations");
   DenseMap<int64_t, OpFoldResult> dimAndTileMapping;
-  SmallVector<int64_t> dimsToBlock =
-      extractFromI64ArrayAttr(op.getInnerDimsPos());
+  ArrayRef<int64_t> dimsToBlock = op.getInnerDimsPos();
   SmallVector<OpFoldResult> tiles = op.getMixedTiles();
   assert(tiles.size() == dimsToBlock.size() &&
          "tiles must match indices of dimension to block");
@@ -1624,10 +1619,8 @@
                                 ? packOrUnPack.getInputType()
                                 : packOrUnPack.getOutputType();
   int64_t unpackedRank = unpackedType.getRank();
-  SmallVector<int64_t> innerDimsPos =
-      extractFromI64ArrayAttr(packOrUnPack.getInnerDimsPos());
-  SmallVector<int64_t> outerDimPerm =
-      extractFromI64ArrayAttr(packOrUnPack.getOuterDimsPerm());
+  ArrayRef<int64_t> innerDimsPos = packOrUnPack.getInnerDimsPos();
+  ArrayRef<int64_t> outerDimPerm = packOrUnPack.getOuterDimsPerm();
   // Verify tiles. Make sure each provided tile is non-zero.
   SmallVector<OpFoldResult> mixedTiles = packOrUnPack.getMixedTiles();
   if (hasZeros(mixedTiles))
@@ -1715,9 +1708,9 @@
                              ShapedType::kDynamic);
   build(builder, state, output.getType(), source, output,
         outerDimsPerm.empty() ? nullptr
-                              : builder.getI64ArrayAttr(outerDimsPerm),
-        builder.getI64ArrayAttr(innerDimsPos), dynamicTileSizes,
-        builder.getI64ArrayAttr(staticTileSizes),
+                              : builder.getDenseI64ArrayAttr(outerDimsPerm),
+        builder.getDenseI64ArrayAttr(innerDimsPos), dynamicTileSizes,
+        builder.getDenseI64ArrayAttr(staticTileSizes),
         (paddingValue ? paddingValue.value() : nullptr));
 }
 
@@ -1842,10 +1835,8 @@
   // the point loop? However, if we interchange `ivs` once more to go to the
   // canonical blocking format: ABCabc, this connection becomes trivial: Each
   // point loop is pointLoopsOffset + inputRank away from the tiled loop.
-  SmallVector<int64_t> dimsToInnerBlock =
-      extractFromI64ArrayAttr(packOp.getInnerDimsPos());
-  SmallVector<int64_t> dimsToOuterBlock =
-      extractFromI64ArrayAttr(packOp.getOuterDimsPerm());
+  ArrayRef<int64_t> dimsToInnerBlock = packOp.getInnerDimsPos();
+  ArrayRef<int64_t> dimsToOuterBlock = packOp.getOuterDimsPerm();
 
   SmallVector<Value> interchangedIvs = ivs;
   SmallVector<int64_t> interchangeVector =
@@ -1980,8 +1971,7 @@
 
   // The tiling is applied on interchanged dimensions. We have to undo the
   // interchange to map sizes and offsets to the original input.
-  SmallVector<int64_t> dimsToOuterBlock =
-      extractFromI64ArrayAttr(getOuterDimsPerm());
+  ArrayRef<int64_t> dimsToOuterBlock = getOuterDimsPerm();
   SmallVector<OpFoldResult> origOffsets(offsets.begin(), offsets.end());
   SmallVector<OpFoldResult> origSizes(sizes.begin(), sizes.end());
   if (!dimsToOuterBlock.empty()) {
@@ -2052,8 +2042,7 @@
   }
 
   Operation *tiledPackOp =
-      cast<DestinationStyleOpInterface>(getOperation())
-          .clone(builder, loc, tiledResultTypes, tiledOperands);
+      mlir::clone(builder, getOperation(), tiledResultTypes, tiledOperands);
 
   return {tiledPackOp};
 }
@@ -2112,9 +2101,9 @@
                              ShapedType::kDynamic);
   build(builder, state, output.getType(), source, output,
         outerDimsPerm.empty() ? nullptr
-                              : builder.getI64ArrayAttr(outerDimsPerm),
-        builder.getI64ArrayAttr(innerDimsPos), dynamicTileSizes,
-        builder.getI64ArrayAttr(staticTileSizes));
+                              : builder.getDenseI64ArrayAttr(outerDimsPerm),
+        builder.getDenseI64ArrayAttr(innerDimsPos), dynamicTileSizes,
+        builder.getDenseI64ArrayAttr(staticTileSizes));
 }
 
 SmallVector<OpFoldResult> UnPackOp::getMixedTiles() {
@@ -2168,14 +2157,14 @@
   assert(inputIvsPointLoops.size() + inputIvs.size() == getInputRank() &&
          "expect same number of iduction variables equals to input rank");
   // interchange the point loops induction variables based on `inner_dim_pos`.
-  SmallVector<int64_t> innerDims = extractFromI64ArrayAttr(getInnerDimsPos());
+  ArrayRef<int64_t> innerDims = getInnerDimsPos();
   SmallVector<int64_t> interchangeVector =
       computeInterchangeFromDimPos(innerDims, getOutputRank());
   SmallVector<Value> interchangedInputIvsPointLoops = inputIvsPointLoops;
   interchangedInputIvsPointLoops = interchange<Value>(
       interchangedInputIvsPointLoops, interchangeVector, /*offset=*/0);
   // interchange the tiled loops induction variables based on `outer_dims_perm`.
-  SmallVector<int64_t> outerDims = extractFromI64ArrayAttr(getOuterDimsPerm());
+  ArrayRef<int64_t> outerDims = getOuterDimsPerm();
   if (!outerDims.empty()) {
     inputIvs = interchange<Value>(inputIvs, outerDims, /*offset=*/0);
   }
@@ -2313,8 +2302,7 @@
   // The tiling is applied on output dimensions. We have to apply the
   // interchange on input dimensions if outer_dims_perm is set.
   int64_t inputRank = getInputRank();
-  SmallVector<int64_t> dimsToOuterBlock =
-      extractFromI64ArrayAttr(getOuterDimsPerm());
+  ArrayRef<int64_t> dimsToOuterBlock = getOuterDimsPerm();
   if (!dimsToOuterBlock.empty()) {
     SmallVector<int64_t> vec =
         computeInterchangeFromDimPos(dimsToOuterBlock, inputRank);
@@ -2348,8 +2336,7 @@
   tiledResultTypes.push_back(tiledOperands[1].getType());
 
   Operation *tiledUnpackOp =
-      cast<DestinationStyleOpInterface>(getOperation())
-          .clone(builder, loc, tiledResultTypes, tiledOperands);
+      mlir::clone(builder, getOperation(), tiledResultTypes, tiledOperands);
 
   if (isPerfectTilingCase)
     return {tiledUnpackOp};
@@ -2515,8 +2502,8 @@
     resultTypes.push_back(tiledOperands[1].getType());
   }
 
-  Operation *tiledOp = cast<DestinationStyleOpInterface>(getOperation())
-                           .clone(builder, loc, resultTypes, tiledOperands);
+  Operation *tiledOp =
+      mlir::clone(builder, getOperation(), resultTypes, tiledOperands);
 
   return {tiledOp};
 }
@@ -2698,9 +2685,7 @@
                                 : opOperand->get());
     }
     // Clone op.
-    Operation *newOp =
-        cast<DestinationStyleOpInterface>(op.getOperation())
-            .clone(rewriter, op->getLoc(), newResultTypes, newOperands);
+    Operation *newOp = mlir::clone(rewriter, op, newResultTypes, newOperands);
     SmallVector<Value, 4> replacements;
     replacements.reserve(newOp->getNumResults());
     for (auto result : llvm::zip(op->getResults(), newOp->getResults())) {
diff --git a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgExt/Passes/FoldIntoPackAndUnpackOps.cpp b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgExt/Passes/FoldIntoPackAndUnpackOps.cpp
index 5e2f887..7477bc0 100644
--- a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgExt/Passes/FoldIntoPackAndUnpackOps.cpp
+++ b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgExt/Passes/FoldIntoPackAndUnpackOps.cpp
@@ -53,11 +53,8 @@
     Value output = rewriter.create<tensor::EmptyOp>(
         sliceOp.getLoc(), sliceOp.getMixedSizes(), elementType);
     rewriter.replaceOpWithNewOp<UnPackOp>(
-        sliceOp, output.getType(), unpackOp.getInput(), output,
-        unpackOp.getOuterDimsPerm().empty() ? nullptr
-                                            : unpackOp.getOuterDimsPerm(),
-        unpackOp.getInnerDimsPos(), unpackOp.getInnerTiles(),
-        unpackOp.getStaticInnerTiles());
+        sliceOp, unpackOp.getInput(), output, unpackOp.getInnerDimsPos(),
+        unpackOp.getMixedTiles(), unpackOp.getOuterDimsPerm());
     return success();
   }
 };
diff --git a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgExt/Transforms/Transforms.cpp b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgExt/Transforms/Transforms.cpp
index f7e16e8..1283374 100644
--- a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgExt/Transforms/Transforms.cpp
+++ b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgExt/Transforms/Transforms.cpp
@@ -789,7 +789,7 @@
     // The size is less than or equal to tileSize because outer dims are all 1s.
     Optional<int64_t> tileSize =
         getConstantIntValue(tileAndPosMapping.lookup(dim));
-    assert(tileSize.hasValue() && "dynamic inner tile size is not supported");
+    assert(tileSize.has_value() && "dynamic inner tile size is not supported");
     paddedShape.push_back(tileSize.value());
   }
   auto resultType =
@@ -826,12 +826,11 @@
     }
     // The dimensions map in the order of output dimensions. Since the
     // interchange is applied, we have to undo it for input.
-    if (auto outerDims = packOp.getOuterDimsPerm()) {
-      inputExprs = undoInterchange<AffineExpr>(
-          inputExprs, extractFromI64ArrayAttr(outerDims));
+    if (!packOp.getOuterDimsPerm().empty()) {
+      inputExprs =
+          undoInterchange<AffineExpr>(inputExprs, packOp.getOuterDimsPerm());
     }
-    for (auto en :
-         llvm::enumerate(extractFromI64ArrayAttr(packOp.getInnerDimsPos()))) {
+    for (auto en : llvm::enumerate(packOp.getInnerDimsPos())) {
       inputExprs[en.value()] =
           rewriter.getAffineDimExpr(inputRank + en.index());
     }
@@ -897,8 +896,7 @@
         loc, readType, unpackOp.getInput(), readOffsets, readSizes,
         readStrides);
 
-    SmallVector<int64_t> innerDimsPos =
-        extractFromI64ArrayAttr(unpackOp.getInnerDimsPos());
+    ArrayRef<int64_t> innerDimsPos = unpackOp.getInnerDimsPos();
     auto interchangeVector =
         computeInterchangeFromDimPos(innerDimsPos, outputRank);
     SmallVector<int64_t> transpShape =
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/drop-schedule.mlir b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/drop-schedule.mlir
index 7d0a9a7..de014ea 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/drop-schedule.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/drop-schedule.mlir
@@ -26,6 +26,6 @@
   transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%arg1: !pdl.operation):
     %0 = pdl_match @pdl_target in %arg1 : (!pdl.operation) -> !pdl.operation
-    transform.structured.tile %0 [4, 4, 4] {pad = false}
+    transform.structured.tile %0 [4, 4, 4]
   }
 }
diff --git a/third_party/llvm-project b/third_party/llvm-project
index 3c22b33..907e473 160000
--- a/third_party/llvm-project
+++ b/third_party/llvm-project
@@ -1 +1 @@
-Subproject commit 3c22b33821baf0d59ae4d947e0c6c900909b74cc
+Subproject commit 907e473569b40da44cbef1d353e05363651cc66f
diff --git a/third_party/mlir-hlo b/third_party/mlir-hlo
index 7d965dd..312c01c 160000
--- a/third_party/mlir-hlo
+++ b/third_party/mlir-hlo
@@ -1 +1 @@
-Subproject commit 7d965ddaad2f15278526d813d1b27b3a6ec3207a
+Subproject commit 312c01cb1221431c18103f31c68f2439928f7abe