Revert enabling Vulkan ASan tests, with cleanup. (#8550)
Attempt 2 at https://github.com/google/iree/pull/8545, with less forward-looking changes :/
diff --git a/build_tools/bazel_to_cmake/bazel_to_cmake_targets.py b/build_tools/bazel_to_cmake/bazel_to_cmake_targets.py
index 63c9133..aad06d4 100644
--- a/build_tools/bazel_to_cmake/bazel_to_cmake_targets.py
+++ b/build_tools/bazel_to_cmake/bazel_to_cmake_targets.py
@@ -97,6 +97,22 @@
"MhloDialect",
"MLIRMhloUtils",
],
+ "@mlir-hlo//:hlo_legalize_shape_ops_to_standard": [
+ "tensorflow::external_mhlo_includes",
+ "MhloShapeOpsToStandard",
+ ],
+ "@mlir-hlo//:hlo_legalize_to_arithmetic": [
+ "tensorflow::external_mhlo_includes",
+ "MhloToArithmeticConversion",
+ ],
+ "@mlir-hlo//:hlo_legalize_to_lhlo": [
+ "tensorflow::external_mhlo_includes",
+ "MhloToLhloConversion",
+ ],
+ "@mlir-hlo//:hlo_legalize_to_memref": [
+ "tensorflow::external_mhlo_includes",
+ "MhloToMemrefConversion",
+ ],
"@mlir-hlo//:legalize_control_flow": [
"tensorflow::external_mhlo_includes",
"MhloToStandard",
diff --git a/integrations/tensorflow/WORKSPACE b/integrations/tensorflow/WORKSPACE
index 3cd4a16..410fd2f 100644
--- a/integrations/tensorflow/WORKSPACE
+++ b/integrations/tensorflow/WORKSPACE
@@ -7,7 +7,7 @@
load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")
-TENSORFLOW_COMMIT = "58b667263ba5734ba175a7640f1a1d572e52bd3d"
+TENSORFLOW_COMMIT = "fe3fd49d08db3174730123cbab2fed8bbec9cf1b"
git_repository(
name = "org_tensorflow",
diff --git a/integrations/tensorflow/build_tools/bazel/iree-tf.bazelrc b/integrations/tensorflow/build_tools/bazel/iree-tf.bazelrc
index 2c26542..e20c24d 100644
--- a/integrations/tensorflow/build_tools/bazel/iree-tf.bazelrc
+++ b/integrations/tensorflow/build_tools/bazel/iree-tf.bazelrc
@@ -13,3 +13,4 @@
# Flags specific for working around tensorflow warnings.
build:generic_clang --copt=-Wno-inconsistent-missing-override --host_copt=-Wno-inconsistent-missing-override
+build:generic_clang --copt=-Wno-c++11-narrowing --host_copt=-Wno-c++11-narrowing
diff --git a/iree/compiler/Codegen/Common/TypePropagationPass.cpp b/iree/compiler/Codegen/Common/TypePropagationPass.cpp
index 851e64c..f120f46 100644
--- a/iree/compiler/Codegen/Common/TypePropagationPass.cpp
+++ b/iree/compiler/Codegen/Common/TypePropagationPass.cpp
@@ -238,7 +238,7 @@
if (outputType == legalizedOutputType) {
return rewriter.notifyMatchFailure(fillOp, "op already legal");
}
- Value value = adaptor.value();
+ Value value = adaptor.inputs().front();
Optional<Type> legalizedElementType =
getLegalizedElementType(value.getType());
if (!legalizedElementType) {
@@ -246,8 +246,8 @@
}
Value legalizedValue = convertElementType(
rewriter, fillOp->getLoc(), legalizedElementType.getValue(), value);
- rewriter.replaceOpWithNewOp<linalg::FillOp>(fillOp, legalizedValue,
- adaptor.output());
+ rewriter.replaceOpWithNewOp<linalg::FillOp>(
+ fillOp, ValueRange{legalizedValue}, ValueRange{adaptor.outputs()});
return success();
}
};
diff --git a/iree/compiler/Codegen/Common/test/convert_to_destination_passing_style.mlir b/iree/compiler/Codegen/Common/test/convert_to_destination_passing_style.mlir
index 6f2493f..0907d4c 100644
--- a/iree/compiler/Codegen/Common/test/convert_to_destination_passing_style.mlir
+++ b/iree/compiler/Codegen/Common/test/convert_to_destination_passing_style.mlir
@@ -74,7 +74,7 @@
%lhs_tile = flow.dispatch.tensor.load %lhs, offsets = [%iv0, 0], sizes = [%tilesize_y, %k], strides = [1, 1] : !flow.dispatch.tensor<readonly:?x?xf32>{%m, %k} -> tensor<?x?xf32>
%rhs_tile = flow.dispatch.tensor.load %rhs, offsets = [0, %iv1], sizes = [%k, %tilesize_x], strides = [1, 1] : !flow.dispatch.tensor<readonly:?x?xf32>{%k, %n} -> tensor<?x?xf32>
%init_tile = linalg.init_tensor [%tilesize_y, %tilesize_x] : tensor<?x?xf32>
- %fill_tile = linalg.fill(%cst, %init_tile) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
+ %fill_tile = linalg.fill ins(%cst : f32) outs(%init_tile : tensor<?x?xf32>) -> tensor<?x?xf32>
%matmul_tile = linalg.matmul ins(%lhs_tile, %rhs_tile : tensor<?x?xf32>, tensor<?x?xf32>) outs(%fill_tile : tensor<?x?xf32>) -> tensor<?x?xf32>
flow.dispatch.tensor.store %matmul_tile, %result, offsets = [%iv0, %iv1], sizes = [%tilesize_y, %tilesize_x], strides = [1, 1] : tensor<?x?xf32> -> !flow.dispatch.tensor<writeonly:?x?xf32>{%m, %n}
}
@@ -90,7 +90,8 @@
// CHECK-DAG: %[[LHS_TILE:.+]] = flow.dispatch.tensor.load %[[LHS]]
// CHECK-DAG: %[[RHS_TILE:.+]] = flow.dispatch.tensor.load %[[RHS]]
// CHECK-DAG: %[[RESULT_TILE:.+]] = flow.dispatch.tensor.load %[[RESULT]]
-// CHECK: %[[FILL_TILE:.+]] = linalg.fill(%{{.+}}, %[[RESULT_TILE]])
+// CHECK: %[[FILL_TILE:.+]] = linalg.fill
+// CHECK-SAME: outs(%[[RESULT_TILE]] :
// CHECK: %[[MATMUL_TILE:.+]] = linalg.matmul
// CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] : tensor<?x?xf32>, tensor<?x?xf32>)
// CHECK-SAME: outs(%[[FILL_TILE]] : tensor<?x?xf32>)
@@ -303,7 +304,7 @@
%7 = flow.dispatch.tensor.load %0, offsets = [%arg0, %arg1, 0], sizes = [%c1, %c32, 1024], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:4x32x1024xf32> -> tensor<?x?x1024xf32>
%8 = flow.dispatch.tensor.load %1, offsets = [%arg0, 0, %arg2], sizes = [%c1, 1024, %c32], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:4x1024x64xf32> -> tensor<?x1024x?xf32>
%9 = linalg.init_tensor [1, 32, 32] : tensor<1x32x32xf32>
- %10 = linalg.fill(%cst, %9) {__internal_linalg_transform__ = "workgroup"} : f32, tensor<1x32x32xf32> -> tensor<1x32x32xf32>
+ %10 = linalg.fill {__internal_linalg_transform__ = "workgroup"} ins(%cst : f32) outs(%9 : tensor<1x32x32xf32>) -> tensor<1x32x32xf32>
%11 = linalg.batch_matmul {__internal_linalg_transform__ = "workgroup", is_root_op} ins(%7, %8 : tensor<?x?x1024xf32>, tensor<?x1024x?xf32>) outs(%10 : tensor<1x32x32xf32>) -> tensor<1x32x32xf32>
%12 = tensor.cast %11 : tensor<1x32x32xf32> to tensor<?x?x?xf32>
flow.dispatch.tensor.store %12, %2, offsets = [%arg0, %arg1, %arg2], sizes = [%c1, %c32, %c32], strides = [1, 1, 1] : tensor<?x?x?xf32> -> !flow.dispatch.tensor<writeonly:4x32x64xf32>
@@ -321,7 +322,8 @@
// CHECK-DAG: %[[LHS_TILE:.+]] = flow.dispatch.tensor.load %[[LHS]]
// CHECK-DAG: %[[RHS_TILE:.+]] = flow.dispatch.tensor.load %[[RHS]]
// CHECK-DAG: %[[RESULT_TILE:.+]] = flow.dispatch.tensor.load %[[RESULT]]
-// CHECK: %[[FILL_TILE:.+]] = linalg.fill(%{{.+}}, %[[RESULT_TILE]])
+// CHECK: %[[FILL_TILE:.+]] = linalg.fill
+// CHECK-SAME: outs(%[[RESULT_TILE]] :
// CHECK: %[[MATMUL_TILE:.+]] = linalg.batch_matmul
// CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]]
// CHECK-SAME: outs(%[[FILL_TILE]]
@@ -497,7 +499,7 @@
%8 = tensor.extract_slice %cst_0[%arg1] [64] [1] : tensor<64xf32> to tensor<64xf32>
%9 = flow.dispatch.tensor.load %0, offsets = [%arg0, 0], sizes = [64, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:6400x64xf32> -> tensor<64x64xf32>
%10 = flow.dispatch.tensor.load %1, offsets = [0, %arg1], sizes = [64, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:64x64xf32> -> tensor<64x64xf32>
- %11 = linalg.fill(%cst_1, %7) : f32, tensor<64x64xf32> -> tensor<64x64xf32>
+ %11 = linalg.fill ins(%cst_1 : f32) outs(%7 : tensor<64x64xf32>) -> tensor<64x64xf32>
%12 = linalg.matmul {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 0], [8, 32, 0], [0, 0, 16]]>} ins(%9, %10 : tensor<64x64xf32>, tensor<64x64xf32>) outs(%11 : tensor<64x64xf32>) -> tensor<64x64xf32>
%13 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%8, %12 : tensor<64xf32>, tensor<64x64xf32>) outs(%7 : tensor<64x64xf32>) {
^bb0(%arg2: f32, %arg3: f32, %arg4: f32):
@@ -522,7 +524,8 @@
// CHECK-NOT: linalg.init_tensor
// CHECK: %[[LOAD:.+]] = flow.dispatch.tensor.load %[[OUTPUT]]
// CHECK-NOT: linalg.init_tensor
-// CHECK: linalg.fill(%{{.+}}, %[[LOAD]])
+// CHECK: linalg.fill
+// CHECK-SAME: outs(%[[LOAD]] :
// CHECK: %[[MATMUL:.+]] = linalg.matmul
// CHECK: %[[GENERIC:.+]] = linalg.generic
// CHECK-SAME: outs(%[[MATMUL]] :
@@ -557,7 +560,7 @@
%12 = affine.min affine_map<(d0) -> (-d0 + 49, 16)>(%arg1)
%13 = flow.dispatch.tensor.load %1, offsets = [0, %arg1], sizes = [16, %12], strides = [1, 1] : !flow.dispatch.tensor<readonly:16x49xf32> -> tensor<16x?xf32>
%14 = linalg.init_tensor [%10, %12] : tensor<?x?xf32>
- %15 = linalg.fill(%cst, %14) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
+ %15 = linalg.fill ins(%cst : f32) outs(%14 : tensor<?x?xf32>) -> tensor<?x?xf32>
%16 = linalg.matmul ins(%11, %13 : tensor<?x16xf32>, tensor<16x?xf32>) outs(%15 : tensor<?x?xf32>) -> tensor<?x?xf32>
%17 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%16 : tensor<?x?xf32>) outs(%9 : tensor<?x?xf32>) {
^bb0(%arg2: f32, %arg3: f32):
diff --git a/iree/compiler/Codegen/Common/test/iree_comprehensive_bufferize.mlir b/iree/compiler/Codegen/Common/test/iree_comprehensive_bufferize.mlir
index 5fb20f8..b9d84ee 100644
--- a/iree/compiler/Codegen/Common/test/iree_comprehensive_bufferize.mlir
+++ b/iree/compiler/Codegen/Common/test/iree_comprehensive_bufferize.mlir
@@ -99,7 +99,7 @@
%lhs_tile = flow.dispatch.tensor.load %lhs, offsets = [%iv0, 0], sizes = [%tilesize_y, %k], strides = [1, 1] : !flow.dispatch.tensor<readonly:?x?xf32>{%m, %k} -> tensor<?x?xf32>
%rhs_tile = flow.dispatch.tensor.load %rhs, offsets = [0, %iv1], sizes = [%k, %tilesize_x], strides = [1, 1] : !flow.dispatch.tensor<readonly:?x?xf32>{%k, %n} -> tensor<?x?xf32>
%init_tile = linalg.init_tensor [%tilesize_y, %tilesize_x] : tensor<?x?xf32>
- %fill_tile = linalg.fill(%cst, %init_tile) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
+ %fill_tile = linalg.fill ins(%cst : f32) outs(%init_tile : tensor<?x?xf32>) -> tensor<?x?xf32>
%matmul_tile = linalg.matmul ins(%lhs_tile, %rhs_tile : tensor<?x?xf32>, tensor<?x?xf32>) outs(%fill_tile : tensor<?x?xf32>) -> tensor<?x?xf32>
flow.dispatch.tensor.store %matmul_tile, %result, offsets = [%iv0, %iv1], sizes = [%tilesize_y, %tilesize_x], strides = [1, 1] : tensor<?x?xf32> -> !flow.dispatch.tensor<readwrite:?x?xf32>{%m, %n}
}
@@ -136,7 +136,9 @@
// CHECK-DAG: %[[LHS_TILE:.+]] = memref.subview %[[LHS]][%[[IV0]], 0] [%[[TILESIZE_Y]], %[[K]]]
// CHECK-DAG: %[[RHS_TILE:.+]] = memref.subview %[[RHS]][0, %[[IV1]]] [%[[K]], %[[TILESIZE_X]]]
// CHECK-DAG: %[[RESULT_TILE:.+]] = memref.subview %[[RESULT]][%[[IV0]], %[[IV1]]] [%[[TILESIZE_Y]], %[[TILESIZE_X]]]
-// CHECK: linalg.fill(%[[CST]], %[[RESULT_TILE]])
+// CHECK: linalg.fill
+// CHECK-SAME: ins(%[[CST]] :
+// CHECK-SAME: outs(%[[RESULT_TILE]] :
// CHECK: linalg.matmul
// CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]]
// CHECK-SAME: outs(%[[RESULT_TILE]]
diff --git a/iree/compiler/Codegen/Common/test/linalg_bufferize.mlir b/iree/compiler/Codegen/Common/test/linalg_bufferize.mlir
index a9eba4e..5e40783 100644
--- a/iree/compiler/Codegen/Common/test/linalg_bufferize.mlir
+++ b/iree/compiler/Codegen/Common/test/linalg_bufferize.mlir
@@ -736,7 +736,7 @@
%11 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 3)>(%arg1)[%workgroup_size_x]
%12 = flow.dispatch.tensor.load %1, offsets = [%c0, %arg1], sizes = [%c2, %11], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:2x3xf32> -> tensor<2x?xf32>
%13 = linalg.init_tensor [%9, %11] : tensor<?x?xf32>
- %14 = linalg.fill(%cst, %13) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
+ %14 = linalg.fill ins(%cst : f32) outs(%13 : tensor<?x?xf32>) -> tensor<?x?xf32>
%15 = linalg.matmul {__internal_linalg_transform__ = "workgroup"} ins(%10, %12 : tensor<?x2xf32>, tensor<2x?xf32>) outs(%14 : tensor<?x?xf32>) -> tensor<?x?xf32>
flow.dispatch.tensor.store %15, %2, offsets = [%arg0, %arg1], sizes = [%9, %11], strides = [%c1, %c1] : tensor<?x?xf32> -> !flow.dispatch.tensor<writeonly:1x3xf32>
}
@@ -754,7 +754,8 @@
// CHECK-DAG: %[[LHS_TILE:.+]] = memref.subview %[[RESHAPE_LHS]][%[[IV0]], 0]
// CHECK-DAG: %[[RESULT_TILE:.+]] = memref.subview %[[RETURN]][%[[IV0]], %[[IV1]]]
// CHECK-DAG: %[[RHS_TILE:.+]] = memref.subview %[[RHS]][0, %[[IV1]]]
-// CHECK: linalg.fill(%{{.+}}, %[[RESULT_TILE]])
+// CHECK: linalg.fill
+// CHECK-SAME: outs(%[[RESULT_TILE]] :
// CHECK: linalg.matmul
// CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]]
// CHECK-SAME: outs(%[[RESULT_TILE]]
@@ -920,7 +921,7 @@
%2 = linalg.init_tensor [3, 9] : tensor<3x9xi32>
%3 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:i32> -> tensor<i32>
%4 = tensor.extract %3[] : tensor<i32>
- %5 = linalg.fill(%4, %2) : i32, tensor<3x9xi32> -> tensor<3x9xi32>
+ %5 = linalg.fill ins(%4 : i32) outs(%2 : tensor<3x9xi32>) -> tensor<3x9xi32>
flow.dispatch.tensor.store %5, %1, offsets = [0, 0], sizes = [3, 9], strides = [1, 1] : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32>
return
}
@@ -929,7 +930,9 @@
// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer)
// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer)
// CHECK: %[[LOAD:.+]] = memref.load %[[ARG0]]
-// CHECK: linalg.fill(%[[LOAD]], %[[RET0]])
+// CHECK: linalg.fill
+// CHECK-SAME: ins(%[[LOAD]] :
+// CHECK-SAME: outs(%[[RET0]] :
// -----
@@ -992,7 +995,7 @@
%10 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 5)>(%arg1)[%workgroup_size_x]
%11 = tensor.extract_slice %cst[0, %arg1] [3, %10] [1, 1] : tensor<3x5xf32> to tensor<3x?xf32>
%12 = linalg.init_tensor [%8, %10] : tensor<?x?xf32>
- %13 = linalg.fill(%cst_0, %12) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
+ %13 = linalg.fill ins(%cst_0 : f32) outs(%12 : tensor<?x?xf32>) -> tensor<?x?xf32>
%14 = linalg.matmul {__internal_linalg_transform__ = "workgroup"} ins(%9, %11 : tensor<?x3xf32>, tensor<3x?xf32>) outs(%13 : tensor<?x?xf32>) -> tensor<?x?xf32>
flow.dispatch.tensor.store %14, %1, offsets = [%arg0, %arg1], sizes = [%8, %10], strides = [%c1, %c1] : tensor<?x?xf32> -> !flow.dispatch.tensor<writeonly:5x5xf32>
}
@@ -1011,7 +1014,8 @@
// CHECK-DAG: %[[LHS_SUBVIEW:.+]] = memref.subview %[[LHS]][%[[IV0]], 0]
// CHECK-DAG: %[[RHS_SUBVIEW:.+]] = memref.subview %[[RHS]][0, %[[IV1]]]
// CHECK-DAG: %[[RESULT_SUBVIEW:.+]] = memref.subview %[[RETURN]][%[[IV0]], %[[IV1]]]
-// CHECK: linalg.fill(%{{.+}}, %[[RESULT_SUBVIEW]])
+// CHECK: linalg.fill
+// CHECK-SAME: outs(%[[RESULT_SUBVIEW]] :
// CHECK: linalg.matmul
// CHECK-SAME: ins(%[[LHS_SUBVIEW]], %[[RHS_SUBVIEW]]
// CHECK-SAME: outs(%[[RESULT_SUBVIEW]]
@@ -1067,7 +1071,7 @@
%5 = tensor.extract %4[] : tensor<f32>
%6 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [1, 4, 6, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:1x4x6x1xf32> -> tensor<1x4x6x1xf32>
%7 = linalg.init_tensor [1, 2, 2, 1] : tensor<1x2x2x1xf32>
- %8 = linalg.fill(%5, %7) : f32, tensor<1x2x2x1xf32> -> tensor<1x2x2x1xf32>
+ %8 = linalg.fill ins(%5 : f32) outs(%7 : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
%9 = linalg.pooling_nhwc_sum {
dilations = dense<1> : vector<2xi64>,
strides = dense<[2, 3]> : vector<2xi64>
@@ -1083,7 +1087,9 @@
// CHECK-DAG: %[[INPUT:.+]] = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) : memref<1x4x6x1xf32>
// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) : memref<1x2x2x1xf32>
// CHECK: %[[INIT_VAL:.+]] = memref.load %[[INIT]][] : memref<f32>
-// CHECK: linalg.fill(%[[INIT_VAL]], %[[RET0]]) : f32, memref<1x2x2x1xf32>
+// CHECK: linalg.fill
+// CHECK-SAME: ins(%[[INIT_VAL]] :
+// CHECK-SAME: outs(%[[RET0]] :
// CHECK: linalg.pooling_nhwc_sum
// CHECK-SAME: dilations = dense<1> : vector<2xi64>
// CHECK-SAME: strides = dense<[2, 3]> : vector<2xi64>
@@ -1212,7 +1218,7 @@
%cst = arith.constant 0.0 : f32
%0 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
- %1 = linalg.fill(%cst, %0) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
+ %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%2 = linalg.conv_2d_nhwc_hwcf
{dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
ins(%input, %filter : tensor<1x225x225x16xf32>, tensor<3x3x16x32xf32>)
@@ -1238,7 +1244,8 @@
// CHECK-NOT: memref.alloc
// CHECK: %[[OUTPUT:.+]] = hal.interface.binding.subspan set(0) binding(3) type(storage_buffer)
-// CHECK: linalg.fill(%{{.+}}, %[[OUTPUT]])
+// CHECK: linalg.fill
+// CHECK-SAME: outs(%[[OUTPUT]] :
// CHECK-NEXT: linalg.conv_2d_nhwc_hwcf
// CHECK-SAME: outs(%[[OUTPUT]] : memref<1x112x112x32xf32>)
// CHECK-NEXT: linalg.generic
@@ -1262,13 +1269,13 @@
%cst0 = arith.constant 0.0 : f32
%cst1 = arith.constant 1.0 : f32
%0 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
- %1 = linalg.fill(%cst0, %0) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
+ %1 = linalg.fill ins(%cst0 : f32) outs(%0 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%2 = linalg.conv_2d_nhwc_hwcf
{dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
ins(%input, %filter : tensor<1x225x225x16xf32>, tensor<3x3x16x32xf32>)
outs(%1 : tensor<1x112x112x32xf32>)
-> tensor<1x112x112x32xf32>
- %3 = linalg.fill(%cst1, %0) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
+ %3 = linalg.fill ins(%cst1 : f32) outs(%0 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%4 = linalg.generic {
indexing_maps = [
affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>,
@@ -1289,10 +1296,12 @@
// CHECK-LABEL: func @dont_use_buffer_for_operand_when_output_tensor_used()
// CHECK: %[[ALLOC:.+]] = memref.alloc
// CHECK: %[[OUTPUT:.+]] = hal.interface.binding.subspan set(0) binding(3) type(storage_buffer)
-// CHECK: linalg.fill(%{{.+}}, %[[ALLOC]])
+// CHECK: linalg.fill
+// CHECK-SAME: outs(%[[ALLOC]] :
// CHECK-NEXT: linalg.conv_2d_nhwc_hwcf
// CHECK-SAME: outs(%[[ALLOC]] : memref<1x112x112x32xf32>)
-// CHECK-NEXT: linalg.fill(%{{.+}}, %[[OUTPUT]])
+// CHECK-NEXT: linalg.fill
+// CHECK-SAME: outs(%[[OUTPUT]] :
// CHECK-NEXT: linalg.generic
// CHECK-SAME: ins(%[[ALLOC]], %{{.+}} : memref<1x112x112x32xf32>, memref<32xf32>)
// CHECK-SAME: outs(%[[OUTPUT]] : memref<1x112x112x32xf32>)
@@ -1365,7 +1374,7 @@
%7 = flow.dispatch.tensor.load %0, offsets = [%arg0, %arg1, 0], sizes = [%c1, %c32, 1024], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:4x32x1024xf32> -> tensor<?x?x1024xf32>
%8 = flow.dispatch.tensor.load %1, offsets = [%arg0, 0, %arg2], sizes = [%c1, 1024, %c32], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:4x1024x64xf32> -> tensor<?x1024x?xf32>
%9 = linalg.init_tensor [1, 32, 32] : tensor<1x32x32xf32>
- %10 = linalg.fill(%cst, %9) {__internal_linalg_transform__ = "workgroup"} : f32, tensor<1x32x32xf32> -> tensor<1x32x32xf32>
+ %10 = linalg.fill {__internal_linalg_transform__ = "workgroup"} ins(%cst : f32) outs(%9 : tensor<1x32x32xf32>) -> tensor<1x32x32xf32>
%11 = linalg.batch_matmul {__internal_linalg_transform__ = "workgroup", is_root_op} ins(%7, %8 : tensor<?x?x1024xf32>, tensor<?x1024x?xf32>) outs(%10 : tensor<1x32x32xf32>) -> tensor<1x32x32xf32>
%12 = tensor.cast %11 : tensor<1x32x32xf32> to tensor<?x?x?xf32>
flow.dispatch.tensor.store %12, %2, offsets = [%arg0, %arg1, %arg2], sizes = [%c1, %c32, %c32], strides = [1, 1, 1] : tensor<?x?x?xf32> -> !flow.dispatch.tensor<writeonly:4x32x64xf32>
@@ -1383,7 +1392,9 @@
// CHECK: %[[LHSV:.+]] = memref.subview %[[LHS]]
// CHECK: %[[RHSV:.+]] = memref.subview %[[RHS]]
// CHECK: %[[RESULTV:.+]] = memref.subview %[[RESULT]]
-// CHECK: linalg.fill(%[[ZERO]], %[[RESULTV]])
+// CHECK: linalg.fill
+// CHECK-SAME: ins(%[[ZERO]] :
+// CHECK-SAME: outs(%[[RESULTV]] :
// CHECK: linalg.batch_matmul {{.*}} ins(%[[LHSV]], %[[RHSV]] : {{.*}}) outs(%[[RESULTV]]
// -----
@@ -1621,7 +1632,7 @@
%7 = flow.dispatch.tensor.load %0, offsets = [%arg0, 0], sizes = [64, 27], strides = [1, 1] : !flow.dispatch.tensor<readonly:12544x27xf32> -> tensor<64x27xf32>
%8 = flow.dispatch.tensor.load %1, offsets = [0, %arg1], sizes = [27, 16], strides = [1, 1] : !flow.dispatch.tensor<readonly:27x16xf32> -> tensor<27x16xf32>
%9 = linalg.init_tensor [64, 16] : tensor<64x16xf32>
- %10 = linalg.fill(%cst, %9) {__internal_linalg_transform__ = "workgroup"} : f32, tensor<64x16xf32> -> tensor<64x16xf32>
+ %10 = linalg.fill {__internal_linalg_transform__ = "workgroup"} ins(%cst : f32) outs(%9 : tensor<64x16xf32>) -> tensor<64x16xf32>
%11 = tensor.pad %7 low[0, 0] high[0, 5] {
^bb0(%arg2: index, %arg3: index): // no predecessors
tensor.yield %cst : f32
@@ -1639,23 +1650,29 @@
}
// CHECK-LABEL: func @padded_matmul()
-// CHECK-DAG: %[[LHS_PADDED:.+]] = memref.alloc() : memref<64x32xf32>
-// CHECK-DAG: %[[RHS_PADDED:.+]] = memref.alloc() : memref<32x16xf32>
-// CHECK-DAG: %[[C0:.+]] = arith.constant 0.000000e+00 : f32
-// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) : memref<12544x27xf32>
-// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) : memref<27x16xf32>
-// CHECK-DAG: %[[DST:.+]] = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) : memref<12544x16xf32>
-// CHECK-DAG: %[[LHS_V:.+]] = memref.subview %[[LHS]][%{{.*}}, 0] [64, 27] [1, 1]
-// CHECK-DAG: %[[RHS_V:.+]] = memref.subview %[[RHS]][0, %{{.*}}] [27, 16] [1, 1]
-// CHECK-DAG: %[[DST_V:.+]] = memref.subview %[[DST]][%{{.*}}, %{{.*}}] [64, 16] [1, 1]
-// CHECK: linalg.fill(%[[C0]], %[[DST_V]])
-// CHECK: linalg.fill(%[[C0]], %[[LHS_PADDED]]) : f32, memref<64x32xf32>
-// CHECK: %[[LHS_PADDED_INTER:.+]] = memref.subview %[[LHS_PADDED]][0, 0] [64, 27] [1, 1]
-// CHECK: linalg.generic {{.*}} ins(%[[LHS_V]] {{.*}} outs(%[[LHS_PADDED_INTER]]
-// CHECK: linalg.fill(%[[C0]], %[[RHS_PADDED]]) : f32, memref<32x16xf32>
-// CHECK: %[[RHS_PADDED_INTER:.+]] = memref.subview %[[RHS_PADDED]][0, 0] [27, 16] [1, 1]
-// CHECK: linalg.generic {{.*}} ins(%[[RHS_V]] {{.*}} outs(%[[RHS_PADDED_INTER]]
-// CHECK: linalg.matmul ins(%[[LHS_PADDED]], %[[RHS_PADDED]] : memref<64x32xf32>, memref<32x16xf32>)
+// CHECK-DAG: %[[LHS_PADDED:.+]] = memref.alloc() : memref<64x32xf32>
+// CHECK-DAG: %[[RHS_PADDED:.+]] = memref.alloc() : memref<32x16xf32>
+// CHECK-DAG: %[[C0:.+]] = arith.constant 0.000000e+00 : f32
+// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) : memref<12544x27xf32>
+// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) : memref<27x16xf32>
+// CHECK-DAG: %[[DST:.+]] = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) : memref<12544x16xf32>
+// CHECK-DAG: %[[LHS_V:.+]] = memref.subview %[[LHS]][%{{.*}}, 0] [64, 27] [1, 1]
+// CHECK-DAG: %[[RHS_V:.+]] = memref.subview %[[RHS]][0, %{{.*}}] [27, 16] [1, 1]
+// CHECK-DAG: %[[DST_V:.+]] = memref.subview %[[DST]][%{{.*}}, %{{.*}}] [64, 16] [1, 1]
+// CHECK: linalg.fill
+// CHECK-SAME: ins(%[[C0]] :
+// CHECK-SAME: outs(%[[DST_V]] :
+// CHECK: linalg.fill
+// CHECK-SAME: ins(%[[C0]] :
+// CHECK-SAME: outs(%[[LHS_PADDED]] :
+// CHECK: %[[LHS_PADDED_INTER:.+]] = memref.subview %[[LHS_PADDED]][0, 0] [64, 27] [1, 1]
+// CHECK: linalg.generic {{.*}} ins(%[[LHS_V]] {{.*}} outs(%[[LHS_PADDED_INTER]]
+// CHECK: linalg.fill
+// CHECK-SAME: ins(%[[C0]] :
+// CHECK-SAME: outs(%[[RHS_PADDED]] :
+// CHECK: %[[RHS_PADDED_INTER:.+]] = memref.subview %[[RHS_PADDED]][0, 0] [27, 16] [1, 1]
+// CHECK: linalg.generic {{.*}} ins(%[[RHS_V]] {{.*}} outs(%[[RHS_PADDED_INTER]]
+// CHECK: linalg.matmul ins(%[[LHS_PADDED]], %[[RHS_PADDED]] : memref<64x32xf32>, memref<32x16xf32>)
// -----
@@ -1695,7 +1712,7 @@
tensor.yield %cst : f32
} : tensor<2x?xf32> to tensor<4x4xf32>
%15 = linalg.init_tensor [4, 4] : tensor<4x4xf32>
- %16 = linalg.fill(%cst, %15) : f32, tensor<4x4xf32> -> tensor<4x4xf32>
+ %16 = linalg.fill ins(%cst : f32) outs(%15 : tensor<4x4xf32>) -> tensor<4x4xf32>
%17 = linalg.matmul {__internal_linalg_transform__ = "workgroup"} ins(%13, %14 : tensor<4x4xf32>, tensor<4x4xf32>) outs(%16 : tensor<4x4xf32>) -> tensor<4x4xf32>
%18 = tensor.extract_slice %17[0, 0] [%7, %9] [1, 1] : tensor<4x4xf32> to tensor<?x?xf32>
flow.dispatch.tensor.store %18, %2, offsets = [%arg0, %arg1], sizes = [%11, %12], strides = [1, 1] : tensor<?x?xf32> -> !flow.dispatch.tensor<writeonly:?x?xf32>{%m, %n}
@@ -1720,12 +1737,15 @@
// CHECK-DAG: %[[TILE_N:.+]] = affine.min #[[MAP1]](%[[IV1]])[%[[N]]]
// CHECK-DAG: %[[ARG0_SV:.+]] = memref.subview %[[ARG0]]
// CHECK-DAG: %[[ARG1_SV:.+]] = memref.subview %[[ARG1]]
-// CHECK: linalg.fill(%{{.*}}, %[[ALLOC_ARG0]]
+// CHECK: linalg.fill
+// CHECK-SAME: outs(%[[ALLOC_ARG0]] :
// CHECK: %[[ALLOC_ARG0_SV:.+]] = memref.subview %[[ALLOC_ARG0]]
// CHECK: linalg.generic {{.*}} ins(%[[ARG0_SV]] {{.*}} outs(%[[ALLOC_ARG0_SV]]
-// CHECK: linalg.fill(%{{.*}}, %[[ALLOC_ARG1]]
+// CHECK: linalg.fill
+// CHECK-SAME: outs(%[[ALLOC_ARG1]] :
// CHECK: linalg.generic {{.*}} ins(%[[ARG1_SV]]
-// CHECK: linalg.fill(%{{.*}}, %[[ALLOC_RET0]]
+// CHECK: linalg.fill
+// CHECK-SAME: outs(%[[ALLOC_RET0]] :
// CHECK: linalg.matmul
// CHECK-SAME: ins(%[[ALLOC_ARG0]], %[[ALLOC_ARG1]]
// CHECK-SAME: outs(%[[ALLOC_RET0]]
@@ -1756,8 +1776,8 @@
%7 = flow.dispatch.tensor.load %0, offsets = [0, %arg0], sizes = [%d0, %6], strides = [1, 1] : !flow.dispatch.tensor<readonly:?x?xi32>{%d0, %d1} -> tensor<?x?xi32>
%9 = flow.dispatch.tensor.load %1, offsets = [0, %arg0], sizes = [%d0, %6], strides = [1, 1] : !flow.dispatch.tensor<readonly:?x?xi32>{%d0, %d1} -> tensor<?x?xi32>
%13 = linalg.init_tensor [%6] : tensor<?xi32>
- %14 = linalg.fill(%c-2147483648_i32, %13) {__internal_linalg_transform__ = "workgroup", lowering_config = {tileSizes = [[128]]}} : i32, tensor<?xi32> -> tensor<?xi32>
- %17 = linalg.fill(%c0_i32, %13) {__internal_linalg_transform__ = "workgroup", lowering_config = {tileSizes = [[128]]}} : i32, tensor<?xi32> -> tensor<?xi32>
+ %14 = linalg.fill {__internal_linalg_transform__ = "workgroup", lowering_config = {tileSizes = [[128]]}} ins(%c-2147483648_i32 : i32) outs(%13 : tensor<?xi32>) -> tensor<?xi32>
+ %17 = linalg.fill {__internal_linalg_transform__ = "workgroup", lowering_config = {tileSizes = [[128]]}} ins(%c0_i32 : i32) outs(%13 : tensor<?xi32>) -> tensor<?xi32>
%18:2 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%7, %9 : tensor<?x?xi32>, tensor<?x?xi32>) outs(%14, %17 : tensor<?xi32>, tensor<?xi32>) attrs = {__internal_linalg_transform__ = "workgroup", lowering_config = {tileSizes = [[128]]}} {
^bb0(%arg1: i32, %arg2: i32, %arg3: i32, %arg4: i32): // no predecessors
%19 = arith.cmpi sge, %arg1, %arg3 : i32
@@ -1784,9 +1804,11 @@
// CHECK-DAG: %[[ARG0_SV:.+]] = memref.subview %[[ARG0]]
// CHECK-DAG: %[[ARG1_SV:.+]] = memref.subview %[[ARG1]]
// CHECK-DAG: %[[RET0_SV:.+]] = memref.subview %[[RET0]]
-// CHECK-DAG: linalg.fill(%{{.*}}, %[[RET0_SV]]
+// CHECK-DAG: linalg.fill
+// CHECK-SAME: outs(%[[RET0_SV]] :
// CHECK-DAG: %[[RET1_SV:.+]] = memref.subview %[[RET1]]
-// CHECK-DAG: linalg.fill(%{{.*}}, %[[RET1_SV]]
+// CHECK-DAG: linalg.fill
+// CHECK-SAME: outs(%[[RET1_SV]] :
// CHECK: linalg.generic
// CHECK-SAME: ins(%[[ARG0_SV]], %[[ARG1_SV]]
// CHECK-SAME: outs(%[[RET0_SV]], %[[RET1_SV]]
@@ -2020,7 +2042,7 @@
%9 = affine.min #map2(%arg1)
%10 = flow.dispatch.tensor.load %1, offsets = [0, %arg1], sizes = [144, %9], strides = [1, 1] : !flow.dispatch.tensor<readonly:144x370xf32> -> tensor<144x?xf32>
%11 = linalg.init_tensor [%7, %9] : tensor<?x?xf32>
- %12 = linalg.fill(%cst, %11) {__internal_linalg_transform__ = "workgroup", lowering_config = #config0} : f32, tensor<?x?xf32> -> tensor<?x?xf32>
+ %12 = linalg.fill {__internal_linalg_transform__ = "workgroup", lowering_config = #config0} ins(%cst : f32) outs(%11 : tensor<?x?xf32>) -> tensor<?x?xf32>
%13 = scf.for %arg2 = %c0 to %c250 step %c32 iter_args(%arg3 = %12) -> (tensor<?x?xf32>) {
%14 = scf.for %arg4 = %c0 to %c370 step %c32 iter_args(%arg5 = %arg3) -> (tensor<?x?xf32>) {
%15 = scf.for %arg6 = %c0 to %c144 step %c24 iter_args(%arg7 = %arg5) -> (tensor<?x?xf32>) {
@@ -2309,7 +2331,7 @@
%13 = flow.dispatch.tensor.load %0, offsets = [%arg0, 0], sizes = [%9, %k], strides = [1, 1] : !flow.dispatch.tensor<readonly:?x?xf32>{%m, %k} -> tensor<?x?xf32>
%15 = flow.dispatch.tensor.load %1, offsets = [0, %arg1], sizes = [%k, %10], strides = [1, 1] : !flow.dispatch.tensor<readonly:?x?xf32>{%k, %n} -> tensor<?x?xf32>
%16 = linalg.init_tensor [%9, %10] : tensor<?x?xf32>
- %17 = linalg.fill(%cst, %16) {__internal_linalg_transform__ = "workgroup"} : f32, tensor<?x?xf32> -> tensor<?x?xf32>
+ %17 = linalg.fill {__internal_linalg_transform__ = "workgroup"} ins(%cst : f32) outs(%16 : tensor<?x?xf32>) -> tensor<?x?xf32>
%18 = scf.for %arg2 = %c0 to %9 step %c4 iter_args(%arg3 = %17) -> (tensor<?x?xf32>) {
%20 = scf.for %arg4 = %c0 to %10 step %c4 iter_args(%arg5 = %arg3) -> (tensor<?x?xf32>) {
%21 = affine.min affine_map<(d0, d1) -> (4, d0 - d1)>(%9, %arg2)
@@ -2360,7 +2382,8 @@
// CHECK-DAG: %[[LHS_SUBVIEW1:.+]] = memref.subview %[[LHS]]
// CHECK-DAG: %[[RHS_SUBVIEW1:.+]] = memref.subview %[[RHS]]
// CHECK-DAG: %[[OUT_SUBVIEW1:.+]] = memref.subview %[[OUT]]
-// CHECK: linalg.fill(%{{.+}}, %[[OUT_SUBVIEW1]])
+// CHECK: linalg.fill
+// CHECK-SAME: outs(%[[OUT_SUBVIEW1]] :
// CHECK: scf.for
// CHECK: scf.for
// CHECK: %[[OUT_SUBVIEW2:.+]] = memref.subview %[[OUT_SUBVIEW1]]
@@ -2410,7 +2433,7 @@
%14 = affine.min affine_map<(d0) -> (-d0 + 1, 4)>(%arg1)
%15 = flow.dispatch.tensor.load %1, offsets = [0, %arg1], sizes = [3, %14], strides = [1, 1] : !flow.dispatch.tensor<readonly:?x?xf32>{%k, %n} -> tensor<?x?xf32>
%16 = linalg.init_tensor [%12, %14] : tensor<?x?xf32>
- %17 = linalg.fill(%cst, %16) {__internal_linalg_transform__ = "workgroup"} : f32, tensor<?x?xf32> -> tensor<?x?xf32>
+ %17 = linalg.fill {__internal_linalg_transform__ = "workgroup"} ins(%cst : f32) outs(%16 : tensor<?x?xf32>) -> tensor<?x?xf32>
%18 = linalg.matmul {__internal_linalg_transform__ = "workgroup"}
ins(%13, %15 : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%17: tensor<?x?xf32>) -> tensor<?x?xf32>
@@ -2438,7 +2461,8 @@
// CHECK-DAG: %[[LHS_SUBVIEW1:.+]] = memref.subview %[[LHS]]
// CHECK-DAG: %[[RHS_SUBVIEW1:.+]] = memref.subview %[[RHS]]
// CHECK-DAG: %[[OUT_SUBVIEW1:.+]] = memref.subview %[[OUT]]
-// CHECK: linalg.fill(%{{.+}}, %[[OUT_SUBVIEW1]])
+// CHECK: linalg.fill
+// CHECK-SAME: outs(%[[OUT_SUBVIEW1]] :
// CHECK: linalg.matmul
// CHECK-SAME: ins(%[[LHS_SUBVIEW1]], %[[RHS_SUBVIEW1]] :
// CHECK-SAME: outs(%[[OUT_SUBVIEW1]] :
@@ -2486,7 +2510,7 @@
%ts_2_m = affine.min #map_min(%iv2)[%c16, %ts_m]
%ts_2_n = affine.min #map_min(%iv3)[%c8, %ts_n]
%tile_init_2 = linalg.init_tensor [%ts_2_m, %ts_2_n] : tensor<?x?xf32>
- %fill_tile_2 = linalg.fill(%cst, %tile_init_2) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
+ %fill_tile_2 = linalg.fill ins(%cst : f32) outs(%tile_init_2 : tensor<?x?xf32>) -> tensor<?x?xf32>
%lhs_tile_2 = tensor.extract_slice %lhs_tile[%iv2, 0] [%ts_2_m, %k] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
%rhs_tile_2 = tensor.extract_slice %rhs_tile[0, %iv3] [%k, %ts_2_n] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
%matmul_tile_2 = linalg.matmul
@@ -2528,7 +2552,8 @@
// CHECK: scf.for %[[IV2:[a-zA-Z0-9]+]] =
// CHECK: scf.for %[[IV3:[a-zA-Z0-9]+]] =
// CHECK: %[[RESULT_TILE_2:.+]] = memref.subview %[[RESULT_TILE]][%[[IV2]], %[[IV3]]]
-// CHECK: linalg.fill(%{{.+}}, %[[RESULT_TILE_2]])
+// CHECK: linalg.fill
+// CHECK-SAME: outs(%[[RESULT_TILE_2]] :
// CHECK-DAG: %[[LHS_TILE_2:.+]] = memref.subview %[[LHS_TILE]][%[[IV2]], 0]
// CHECK-DAG: %[[RHS_TILE_2:.+]] = memref.subview %[[RHS_TILE]][0, %[[IV3]]]
// CHECK: linalg.matmul
diff --git a/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir b/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir
index bb8498b..8aef8cb 100644
--- a/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir
+++ b/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir
@@ -266,7 +266,7 @@
%8 = flow.dispatch.tensor.load %5, offsets = [0, 0, 0], sizes = [%0, %3, %2], strides = [1, 1, 1]
: !flow.dispatch.tensor<readonly:?x?x?xf32>{%0, %3, %2} -> tensor<?x?x?xf32>
%9 = linalg.init_tensor [%0, %1, %2] : tensor<?x?x?xf32>
- %10 = linalg.fill(%cst, %9) : f32, tensor<?x?x?xf32> -> tensor<?x?x?xf32>
+ %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
%11 = linalg.batch_matmul {lowering_config = #config}
ins(%7, %8 : tensor<?x?x?xf32>, tensor<?x?x?xf32>) outs(%10 : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
flow.dispatch.tensor.store %11, %6, offsets = [0, 0, 0], sizes = [%0, %1, %2], strides = [1, 1, 1]
@@ -323,7 +323,7 @@
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 512], strides = [1, 1]
: !flow.dispatch.tensor<readonly:256x512xf32> -> tensor<256x512xf32>
%5 = linalg.init_tensor [128, 512] : tensor<128x512xf32>
- %6 = linalg.fill(%cst, %5) : f32, tensor<128x512xf32> -> tensor<128x512xf32>
+ %6 = linalg.fill ins(%cst : f32) outs(%5 : tensor<128x512xf32>) -> tensor<128x512xf32>
%7 = linalg.matmul {lowering_config = #config}
ins(%3, %4 : tensor<128x256xf32>, tensor<256x512xf32>) outs(%6 : tensor<128x512xf32>) -> tensor<128x512xf32>
flow.dispatch.tensor.store %7, %2, offsets = [0, 0], sizes = [128, 512], strides = [1, 1]
@@ -352,7 +352,8 @@
// CHECK-DAG: %[[LHS:.+]] = flow.dispatch.tensor.load %{{.+}}, offsets = [%[[IV0]], 0], sizes = [32, 256]
// CHECK-DAG: %[[RHS:.+]] = flow.dispatch.tensor.load %{{.+}}, offsets = [0, %[[IV1]]], sizes = [256, 16]
// CHECK-DAG: %[[INIT:.+]] = linalg.init_tensor [32, 16]
-// CHECK-DAG: %[[FILL:.+]] = linalg.fill(%{{.+}}, %[[INIT]])
+// CHECK-DAG: %[[FILL:.+]] = linalg.fill
+// CHECK-SAME: outs(%[[INIT]] :
// CHECK-DAG: %[[GEMM:.+]] = linalg.matmul
// CHECK-SAME: outs(%[[FILL]] :
// CHECK: flow.dispatch.tensor.store %[[GEMM]]
@@ -787,7 +788,7 @@
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [3, 3, 96], strides = [1, 1, 1]
: !flow.dispatch.tensor<readonly:3x3x96xf32> -> tensor<3x3x96xf32>
%5 = linalg.init_tensor [1, 80, 80, 96] : tensor<1x80x80x96xf32>
- %6 = linalg.fill(%cst, %5) : f32, tensor<1x80x80x96xf32> -> tensor<1x80x80x96xf32>
+ %6 = linalg.fill ins(%cst : f32) outs(%5 : tensor<1x80x80x96xf32>) -> tensor<1x80x80x96xf32>
%7 = linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : tensor<2xi64>, lowering_config = #config, strides = dense<2> : tensor<2xi64>}
ins(%3, %4 : tensor<1x161x161x96xf32>, tensor<3x3x96xf32>) outs(%6 : tensor<1x80x80x96xf32>) -> tensor<1x80x80x96xf32>
flow.dispatch.tensor.store %7, %2, offsets = [0, 0, 0, 0], sizes = [1, 80, 80, 96], strides = [1, 1, 1, 1]
@@ -816,7 +817,8 @@
// CHECK: scf.for %[[IV1:.+]] =
// CHECK: scf.for %[[IV2:.+]] =
// CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 20, 40, 48]
-// CHECK: %[[FILL:.+]] = linalg.fill(%{{.+}}, %[[INIT]])
+// CHECK: %[[FILL:.+]] = linalg.fill
+// CHECK-SAME: outs(%[[INIT]] :
// CHECK: %[[RESULT:.+]] = linalg.depthwise_conv_2d_nhwc_hwc
// CHECK-SAME: outs(%[[FILL]] :
// CHECK: flow.dispatch.tensor.store %[[RESULT]], %{{.+}}, offsets = [0, %[[IV0]], %[[IV1]], %[[IV2]]], sizes = [1, 20, 40, 48]
@@ -913,7 +915,7 @@
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [240, 40], strides = [1, 1]
: !flow.dispatch.tensor<readonly:240x40xf32> -> tensor<240x40xf32>
%5 = linalg.init_tensor [196, 40] : tensor<196x40xf32>
- %6 = linalg.fill(%cst, %5) : f32, tensor<196x40xf32> -> tensor<196x40xf32>
+ %6 = linalg.fill ins(%cst : f32) outs(%5 : tensor<196x40xf32>) -> tensor<196x40xf32>
%7 = linalg.matmul {lowering_config = #config}
ins(%3, %4 : tensor<196x240xf32>, tensor<240x40xf32>) outs(%6 : tensor<196x40xf32>) -> tensor<196x40xf32>
flow.dispatch.tensor.store %7, %2, offsets = [0, 0], sizes = [196, 40], strides = [1, 1]
@@ -969,7 +971,7 @@
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [5, 5, 576], strides = [1, 1, 1]
: !flow.dispatch.tensor<readonly:5x5x576xf32> -> tensor<5x5x576xf32>
%5 = linalg.init_tensor [1, 7, 7, 576] : tensor<1x7x7x576xf32>
- %6 = linalg.fill(%cst, %5) : f32, tensor<1x7x7x576xf32> -> tensor<1x7x7x576xf32>
+ %6 = linalg.fill ins(%cst : f32) outs(%5 : tensor<1x7x7x576xf32>) -> tensor<1x7x7x576xf32>
%7 = linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : tensor<2xi64>, lowering_config = #config, strides = dense<1> : tensor<2xi64>}
ins(%3, %4 : tensor<1x11x11x576xf32>, tensor<5x5x576xf32>) outs(%6 : tensor<1x7x7x576xf32>) -> tensor<1x7x7x576xf32>
flow.dispatch.tensor.store %7, %2, offsets = [0, 0, 0, 0], sizes = [1, 7, 7, 576], strides = [1, 1, 1, 1]
@@ -1021,7 +1023,7 @@
%0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0, 0], sizes = [7, 7, 2048], strides = [1, 1, 1]
: !flow.dispatch.tensor<readonly:7x7x2048xf32> -> tensor<7x7x2048xf32>
%1 = linalg.init_tensor [7] : tensor<7xf32>
- %2 = linalg.fill(%cst, %1) : f32, tensor<7xf32> -> tensor<7xf32>
+ %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<7xf32>) -> tensor<7xf32>
%3 = linalg.generic {
indexing_maps = [#map0, #map1], iterator_types = ["parallel", "reduction", "reduction"]}
ins(%0 : tensor<7x7x2048xf32>) outs(%2 : tensor<7xf32>) attrs = {lowering_config = #config} {
@@ -1058,7 +1060,8 @@
// CHECK: scf.for %[[IV0:.+]] =
// CHECK: %[[INIT0:.+]] = linalg.init_tensor
// CHECK: %[[INIT:.+]] = linalg.init_tensor
-// CHECK: %[[FILL:.+]] = linalg.fill(%{{.+}}, %[[INIT]])
+// CHECK: %[[FILL:.+]] = linalg.fill
+// CHECK-SAME: outs(%[[INIT]] :
// CHECK: %[[REDUCE:.+]] = linalg.generic
// CHECK-SAME: outs(%[[FILL]] :
// CHECK: %[[GENERIC:.+]] = linalg.generic
diff --git a/iree/compiler/Codegen/Common/test/type_propagation.mlir b/iree/compiler/Codegen/Common/test/type_propagation.mlir
index 75ea3d6..a56c31e 100644
--- a/iree/compiler/Codegen/Common/test/type_propagation.mlir
+++ b/iree/compiler/Codegen/Common/test/type_propagation.mlir
@@ -225,7 +225,7 @@
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) : !flow.dispatch.tensor<writeonly:?xi8>{%d}
%1 = linalg.init_tensor [%d] : tensor<?xi1>
%false = arith.constant false
- %2 = linalg.fill(%false, %1) : i1, tensor<?xi1> -> tensor<?xi1>
+ %2 = linalg.fill ins(%false : i1) outs(%1 : tensor<?xi1>) -> tensor<?xi1>
%3 = arith.extui %2 : tensor<?xi1> to tensor<?xi8>
flow.dispatch.tensor.store %3, %0, offsets=[0], sizes=[%d], strides=[1] : tensor<?xi8> -> !flow.dispatch.tensor<writeonly:?xi8>{%d}
return
@@ -235,5 +235,7 @@
// CHECK-DAG: %[[INIT:.+]] = linalg.init_tensor
// CHECK-DAG: %[[FALSE:.+]] = arith.constant false
// CHECK-DAG: %[[EXT_SCALAR:.+]] = arith.extui %[[FALSE]]
-// CHECK: %[[FILL:.+]] = linalg.fill(%[[EXT_SCALAR]], %[[INIT]])
+// CHECK: %[[FILL:.+]] = linalg.fill
+// CHECK-SAME: ins(%[[EXT_SCALAR]] :
+// CHECK-SAME: outs(%[[INIT]] :
// CHECK: flow.dispatch.tensor.store %[[FILL]], %[[OUT]]
diff --git a/iree/compiler/Codegen/LLVMCPU/test/materialize_launch_configuration.mlir b/iree/compiler/Codegen/LLVMCPU/test/materialize_launch_configuration.mlir
index 593810f..3445c60 100644
--- a/iree/compiler/Codegen/LLVMCPU/test/materialize_launch_configuration.mlir
+++ b/iree/compiler/Codegen/LLVMCPU/test/materialize_launch_configuration.mlir
@@ -193,7 +193,7 @@
%rhs = flow.dispatch.tensor.load %rhs_binding, offsets = [0, 0, 0], sizes = [%B, %K, %N], strides = [1, 1, 1]
: !flow.dispatch.tensor<readonly:?x?x?xf32>{%B, %K, %N} -> tensor<?x?x?xf32>
%init = linalg.init_tensor [%B, %M, %N] : tensor<?x?x?xf32>
- %fill = linalg.fill(%cst, %init) : f32, tensor<?x?x?xf32> -> tensor<?x?x?xf32>
+ %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
%batch_gemm = linalg.batch_matmul
ins(%lhs, %rhs : tensor<?x?x?xf32>, tensor<?x?x?xf32>) outs(%fill : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
flow.dispatch.tensor.store %batch_gemm, %result_binding, offsets = [0, 0, 0], sizes = [%B, %M, %N], strides = [1, 1, 1]
@@ -240,7 +240,7 @@
%rhs = flow.dispatch.tensor.load %rhs_binding, offsets = [0, 0], sizes = [256, 512], strides = [1, 1]
: !flow.dispatch.tensor<readonly:256x512xf32> -> tensor<256x512xf32>
%init = linalg.init_tensor [128, 512] : tensor<128x512xf32>
- %fill = linalg.fill(%cst, %init) : f32, tensor<128x512xf32> -> tensor<128x512xf32>
+ %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<128x512xf32>) -> tensor<128x512xf32>
%gemm = linalg.matmul {compilation_info = #compilation}
ins(%lhs, %rhs : tensor<128x256xf32>, tensor<256x512xf32>)
outs(%fill : tensor<128x512xf32>) -> tensor<128x512xf32>
@@ -570,7 +570,7 @@
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 225, 225, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:1x225x225x3xf32> -> tensor<1x225x225x3xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 3, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:3x3x3x16xf32> -> tensor<3x3x3x16xf32>
%5 = linalg.init_tensor [1, 112, 112, 16] : tensor<1x112x112x16xf32>
- %6 = linalg.fill(%cst, %5) : f32, tensor<1x112x112x16xf32> -> tensor<1x112x112x16xf32>
+ %6 = linalg.fill ins(%cst : f32) outs(%5 : tensor<1x112x112x16xf32>) -> tensor<1x112x112x16xf32>
%7 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%3, %4 : tensor<1x225x225x3xf32>, tensor<3x3x3x16xf32>) outs(%6 : tensor<1x112x112x16xf32>) -> tensor<1x112x112x16xf32>
flow.dispatch.tensor.store %7, %2, offsets = [0, 0, 0, 0], sizes = [1, 112, 112, 16], strides = [1, 1, 1, 1] : tensor<1x112x112x16xf32> -> !flow.dispatch.tensor<writeonly:1x112x112x16xf32>
return
@@ -615,7 +615,7 @@
%filter = flow.dispatch.tensor.load %filter_binding, offsets = [0, 0, 0], sizes = [3, 3, 96], strides = [1, 1, 1]
: !flow.dispatch.tensor<readonly:3x3x96xf32> -> tensor<3x3x96xf32>
%init = linalg.init_tensor [1, 80, 80, 96] : tensor<1x80x80x96xf32>
- %fill = linalg.fill(%cst, %init) : f32, tensor<1x80x80x96xf32> -> tensor<1x80x80x96xf32>
+ %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x80x80x96xf32>) -> tensor<1x80x80x96xf32>
%conv = linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
ins(%input, %filter : tensor<1x161x161x96xf32>, tensor<3x3x96xf32>) outs(%fill : tensor<1x80x80x96xf32>) -> tensor<1x80x80x96xf32>
flow.dispatch.tensor.store %conv, %result_binding, offsets = [0, 0, 0, 0], sizes = [1, 80, 80, 96], strides = [1, 1, 1, 1]
@@ -707,7 +707,7 @@
%rhs = flow.dispatch.tensor.load %rhs_binding, offsets = [0, 0], sizes = [240, 40], strides = [1, 1]
: !flow.dispatch.tensor<readonly:240x40xf32> -> tensor<240x40xf32>
%init = linalg.init_tensor [196, 40] : tensor<196x40xf32>
- %fill = linalg.fill(%cst, %init) : f32, tensor<196x40xf32> -> tensor<196x40xf32>
+ %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<196x40xf32>) -> tensor<196x40xf32>
%gemm = linalg.matmul ins(%lhs, %rhs : tensor<196x240xf32>, tensor<240x40xf32>)
outs(%fill : tensor<196x40xf32>) -> tensor<196x40xf32>
flow.dispatch.tensor.store %gemm, %result_binding, offsets = [0, 0], sizes = [196, 40], strides = [1, 1]
@@ -755,7 +755,7 @@
%filter = flow.dispatch.tensor.load %filter_binding, offsets = [0, 0, 0], sizes = [5, 5, 576], strides = [1, 1, 1]
: !flow.dispatch.tensor<readonly:5x5x576xf32> -> tensor<5x5x576xf32>
%init = linalg.init_tensor [1, 7, 7, 576] : tensor<1x7x7x576xf32>
- %fill = linalg.fill(%cst, %init) : f32, tensor<1x7x7x576xf32> -> tensor<1x7x7x576xf32>
+ %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x7x7x576xf32>) -> tensor<1x7x7x576xf32>
%conv = linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
ins(%input, %filter : tensor<1x11x11x576xf32>, tensor<5x5x576xf32>)
outs(%fill : tensor<1x7x7x576xf32>) -> tensor<1x7x7x576xf32>
@@ -803,7 +803,7 @@
%rhs = flow.dispatch.tensor.load %rhs_binding, offsets = [0, 0], sizes = [512, 128], strides = [1, 1]
: !flow.dispatch.tensor<readonly:512x128xf32> -> tensor<512x128xf32>
%init = linalg.init_tensor [384, 128] : tensor<384x128xf32>
- %fill = linalg.fill(%cst, %init) : f32, tensor<384x128xf32> -> tensor<384x128xf32>
+ %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<384x128xf32>) -> tensor<384x128xf32>
%gemm = linalg.matmul ins(%lhs, %rhs : tensor<384x512xf32>, tensor<512x128xf32>)
outs(%fill : tensor<384x128xf32>) -> tensor<384x128xf32>
flow.dispatch.tensor.store %gemm, %result_binding, offsets = [0, 0], sizes = [384, 128], strides = [1, 1]
@@ -851,7 +851,7 @@
%rhs = flow.dispatch.tensor.load %rhs_binding, offsets = [0, 0], sizes = [512, 128], strides = [1, 1]
: !flow.dispatch.tensor<readonly:512x128xf32> -> tensor<512x128xf32>
%init = linalg.init_tensor [384, 128] : tensor<384x128xf32>
- %fill = linalg.fill(%cst, %init) : f32, tensor<384x128xf32> -> tensor<384x128xf32>
+ %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<384x128xf32>) -> tensor<384x128xf32>
%gemm = linalg.matmul ins(%lhs, %rhs : tensor<384x512xf32>, tensor<512x128xf32>)
outs(%fill : tensor<384x128xf32>) -> tensor<384x128xf32>
flow.dispatch.tensor.store %gemm, %result_binding, offsets = [0, 0], sizes = [384, 128], strides = [1, 1]
@@ -896,7 +896,7 @@
%input = flow.dispatch.tensor.load %arg0, offsets = [0, 0, 0], sizes = [7, 7, 2048], strides = [1, 1, 1]
: !flow.dispatch.tensor<readonly:7x7x2048xf32> -> tensor<7x7x2048xf32>
%init = linalg.init_tensor [7] : tensor<7xf32>
- %fill = linalg.fill(%cst, %init) : f32, tensor<7xf32> -> tensor<7xf32>
+ %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<7xf32>) -> tensor<7xf32>
%reduce = linalg.generic {
indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0)>],
iterator_types = ["parallel", "reduction", "reduction"]}
@@ -1110,7 +1110,7 @@
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [16, 49], strides = [1, 1] : !flow.dispatch.tensor<readonly:16x49xf32> -> tensor<16x49xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [33, 49], strides = [1, 1] : !flow.dispatch.tensor<readonly:33x49xf32> -> tensor<33x49xf32>
%7 = linalg.init_tensor [33, 49] : tensor<33x49xf32>
- %8 = linalg.fill(%cst, %7) : f32, tensor<33x49xf32> -> tensor<33x49xf32>
+ %8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<33x49xf32>) -> tensor<33x49xf32>
%9 = linalg.matmul ins(%4, %5 : tensor<33x16xf32>, tensor<16x49xf32>) outs(%8 : tensor<33x49xf32>) -> tensor<33x49xf32>
flow.dispatch.tensor.store %9, %3, offsets = [0, 0], sizes = [33, 49], strides = [1, 1] : tensor<33x49xf32> -> !flow.dispatch.tensor<writeonly:33x49xf32>
return
diff --git a/iree/compiler/Codegen/LLVMCPU/test/tile_fuse_and_vectorize.mlir b/iree/compiler/Codegen/LLVMCPU/test/tile_fuse_and_vectorize.mlir
index a9175c3..44ce48f 100644
--- a/iree/compiler/Codegen/LLVMCPU/test/tile_fuse_and_vectorize.mlir
+++ b/iree/compiler/Codegen/LLVMCPU/test/tile_fuse_and_vectorize.mlir
@@ -26,7 +26,7 @@
%7 = linalg.init_tensor [64, 64] : tensor<64x64xf32>
%8 = flow.dispatch.tensor.load %0, offsets = [%arg0, 0], sizes = [64, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:384x512xf32> -> tensor<64x512xf32>
%9 = flow.dispatch.tensor.load %1, offsets = [0, %arg1], sizes = [512, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:512x128xf32> -> tensor<512x64xf32>
- %10 = linalg.fill(%cst, %7) : f32, tensor<64x64xf32> -> tensor<64x64xf32>
+ %10 = linalg.fill ins(%cst : f32) outs(%7 : tensor<64x64xf32>) -> tensor<64x64xf32>
%11 = linalg.matmul {lowering_config = #config} ins(%8, %9 : tensor<64x512xf32>, tensor<512x64xf32>) outs(%10 : tensor<64x64xf32>) -> tensor<64x64xf32>
%12 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%11 : tensor<64x64xf32>) outs(%7 : tensor<64x64xf32>) {
^bb0(%arg2: f32, %arg3: f32): // no predecessors
@@ -109,7 +109,7 @@
%14 = flow.dispatch.tensor.load %2, offsets = [%arg0, 0], sizes = [64, 384], strides = [1, 1] : !flow.dispatch.tensor<readonly:384x384xf32> -> tensor<64x384xf32>
%15 = flow.dispatch.tensor.load %3, offsets = [0, %arg1], sizes = [384, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:384x512xf32> -> tensor<384x64xf32>
%16 = linalg.init_tensor [64, 64] : tensor<64x64xf32>
- %17 = linalg.fill(%cst, %16) : f32, tensor<64x64xf32> -> tensor<64x64xf32>
+ %17 = linalg.fill ins(%cst : f32) outs(%16 : tensor<64x64xf32>) -> tensor<64x64xf32>
%18 = linalg.matmul {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[], [32, 32, 32], [16, 16, 16]], native_vector_size = [16, 16, 16]>} ins(%14, %15 : tensor<64x384xf32>, tensor<384x64xf32>) outs(%17 : tensor<64x64xf32>) -> tensor<64x64xf32>
%19 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%18, %11, %12 : tensor<64x64xf32>, tensor<64xi32>, tensor<64x64xf32>) outs(%13 : tensor<64x64xf32>) {
^bb0(%arg2: f32, %arg3: i32, %arg4: f32, %arg5: f32): // no predecessors
@@ -178,7 +178,7 @@
%16 = flow.dispatch.tensor.load %4, offsets = [%arg0, 0], sizes = [16, 24], strides = [1, 1] : !flow.dispatch.tensor<readonly:784x24xf32> -> tensor<16x24xf32>
%17 = flow.dispatch.tensor.load %5, offsets = [0, %arg1], sizes = [24, 49], strides = [1, 1] : !flow.dispatch.tensor<readonly:24x96xf32> -> tensor<24x49xf32>
%18 = linalg.init_tensor [16, 49] : tensor<16x49xf32>
- %19 = linalg.fill(%cst, %18) : f32, tensor<16x49xf32> -> tensor<16x49xf32>
+ %19 = linalg.fill ins(%cst : f32) outs(%18 : tensor<16x49xf32>) -> tensor<16x49xf32>
%20 = linalg.matmul {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[], [16, 16, 32], [16, 16, 16]], native_vector_size = [16, 16, 16]>} ins(%16, %17 : tensor<16x24xf32>, tensor<24x49xf32>) outs(%19 : tensor<16x49xf32>) -> tensor<16x49xf32>
%21 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%20, %11, %12, %13, %14 : tensor<16x49xf32>, tensor<49xf32>, tensor<49xf32>, tensor<49xf32>, tensor<49xf32>) outs(%15 : tensor<16x49xf32>) {
^bb0(%arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32, %arg7: f32): // no predecessors
diff --git a/iree/compiler/Codegen/LLVMGPU/test/distribute_to_thread.mlir b/iree/compiler/Codegen/LLVMGPU/test/distribute_to_thread.mlir
index 847e138..726c24a 100644
--- a/iree/compiler/Codegen/LLVMGPU/test/distribute_to_thread.mlir
+++ b/iree/compiler/Codegen/LLVMGPU/test/distribute_to_thread.mlir
@@ -47,8 +47,7 @@
: memref<1024x1024xf32> to memref<1024x256xf32, #map4>
%11 = memref.subview %2[%arg0, %arg1] [2, 256] [1, 1]
: memref<1024x1024xf32> to memref<2x256xf32, #map4>
- linalg.fill(%cst, %11) {lowering_config = #config}
- : f32, memref<2x256xf32, #map4>
+ linalg.fill {lowering_config = #config} ins(%cst : f32) outs(%11 : memref<2x256xf32, #map4>)
linalg.matmul {lowering_config = #config}
ins(%8, %10 : memref<2x1024xf32, #map4>, memref<1024x256xf32, #map4>)
outs(%11 : memref<2x256xf32, #map4>)
@@ -130,7 +129,7 @@
%7 = memref.subview %0[%arg0, %arg1, 0] [1, 8, 1024] [1, 1, 1] : memref<4x32x1024xf32> to memref<1x8x1024xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 32768 + s0 + d1 * 1024 + d2)>>
%8 = memref.subview %1[%arg0, 0, %arg2] [1, 1024, 32] [1, 1, 1] : memref<4x1024x64xf32> to memref<1x1024x32xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 65536 + s0 + d1 * 64 + d2)>>
%9 = memref.subview %2[%arg0, %arg1, %arg2] [1, 8, 32] [1, 1, 1] : memref<4x32x64xf32> to memref<1x8x32xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 2048 + s0 + d1 * 64 + d2)>>
- linalg.fill(%cst, %9) {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[1, 8, 32, 32]]>} : f32, memref<1x8x32xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 2048 + s0 + d1 * 64 + d2)>>
+ linalg.fill {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[1, 8, 32, 32]]>} ins(%cst : f32) outs(%9 : memref<1x8x32xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 2048 + s0 + d1 * 64 + d2)>>)
linalg.batch_matmul {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[1, 8, 32, 32]]>} ins(%7, %8 : memref<1x8x1024xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 32768 + s0 + d1 * 1024 + d2)>>, memref<1x1024x32xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 65536 + s0 + d1 * 64 + d2)>>) outs(%9 : memref<1x8x32xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 2048 + s0 + d1 * 64 + d2)>>)
}
}
@@ -207,8 +206,7 @@
: memref<1024x1024xf32> to memref<1024x32xf32, #map4>
%11 = memref.subview %2[%arg0, %arg1] [2, 32] [1, 1]
: memref<1024x1024xf32> to memref<2x32xf32, #map4>
- linalg.fill(%cst, %11) {lowering_config = #config}
- : f32, memref<2x32xf32, #map4>
+ linalg.fill {lowering_config = #config} ins(%cst : f32) outs(%11 : memref<2x32xf32, #map4>)
linalg.matmul {lowering_config = #config}
ins(%8, %10 : memref<2x1024xf32, #map4>, memref<1024x32xf32, #map4>)
outs(%11 : memref<2x32xf32, #map4>)
@@ -271,7 +269,7 @@
%cst_0 = arith.constant 0xFF800000 : f32
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) : memref<1000xf32>
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) : memref<f32>
- linalg.fill(%cst_0, %1) {lowering_config = #config} : f32, memref<f32>
+ linalg.fill {lowering_config = #config} ins(%cst_0 : f32) outs(%1 : memref<f32>)
linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>], iterator_types = ["reduction"]} ins(%0 : memref<1000xf32>) outs(%1 : memref<f32>) attrs = {lowering_config = #config} {
^bb0(%arg0: f32, %arg1: f32): // no predecessors
%2 = arith.cmpf ogt, %arg0, %arg1 : f32
diff --git a/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir b/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir
index 62ea000..25370ff 100644
--- a/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir
+++ b/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir
@@ -61,9 +61,9 @@
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) : memref<2x3xf32>
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) : memref<3x4xf32>
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) : memref<2x4xf32>
- linalg.fill(%cst, %2) : f32, memref<2x4xf32>
- linalg.matmul ins(%0, %1 : memref<2x3xf32>, memref<3x4xf32>) outs(%2 : memref<2x4xf32>)
- return
+ linalg.fill ins(%cst : f32) outs(%2 : memref<2x4xf32>)
+ linalg.matmul ins(%0, %1 : memref<2x3xf32>, memref<3x4xf32>) outs(%2 : memref<2x4xf32>)
+ return
}
}
}
@@ -98,7 +98,7 @@
%cst_0 = arith.constant 0xFF800000 : f32
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) : memref<1000xf32>
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) : memref<f32>
- linalg.fill(%cst_0, %1) : f32, memref<f32>
+ linalg.fill ins(%cst_0 : f32) outs(%1 : memref<f32>)
linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>], iterator_types = ["reduction"]} ins(%0 : memref<1000xf32>) outs(%1 : memref<f32>) {
^bb0(%arg0: f32, %arg1: f32): // no predecessors
%2 = arith.cmpf ogt, %arg0, %arg1 : f32
@@ -309,7 +309,7 @@
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1]
: !flow.dispatch.tensor<readonly:256x1024xf32> -> tensor<256x1024xf32>
%15 = linalg.init_tensor [128, 1024] : tensor<128x1024xf32>
- %16 = linalg.fill(%cst, %15) : f32, tensor<128x1024xf32> -> tensor<128x1024xf32>
+ %16 = linalg.fill ins(%cst : f32) outs(%15 : tensor<128x1024xf32>) -> tensor<128x1024xf32>
%17 = linalg.matmul {__internal_linalg_transform__ = "workgroup", compilation_info = #compilation}
ins(%3, %4 : tensor<128x256xf32>, tensor<256x1024xf32>) outs(%16 : tensor<128x1024xf32>) -> tensor<128x1024xf32>
flow.dispatch.tensor.store %17, %2, offsets = [0, 0], sizes = [128, 1024], strides = [1, 1] : tensor<128x1024xf32> -> !flow.dispatch.tensor<writeonly:128x1024xf32>
diff --git a/iree/compiler/Codegen/LLVMGPU/test/illegal_configuration.mlir b/iree/compiler/Codegen/LLVMGPU/test/illegal_configuration.mlir
index 59735b5..58e9859 100644
--- a/iree/compiler/Codegen/LLVMGPU/test/illegal_configuration.mlir
+++ b/iree/compiler/Codegen/LLVMGPU/test/illegal_configuration.mlir
@@ -301,7 +301,7 @@
%7 = memref.subview %0[%arg0, %arg1, 0] [1, 8, 1024] [1, 1, 1] : memref<4x32x1024xf32> to memref<1x8x1024xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 32768 + s0 + d1 * 1024 + d2)>>
%8 = memref.subview %1[%arg0, 0, %arg2] [1, 1024, 32] [1, 1, 1] : memref<4x1024x64xf32> to memref<1x1024x32xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 65536 + s0 + d1 * 64 + d2)>>
%9 = memref.subview %2[%arg0, %arg1, %arg2] [1, 8, 32] [1, 1, 1] : memref<4x32x64xf32> to memref<1x8x32xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 2048 + s0 + d1 * 64 + d2)>>
- linalg.fill(%cst, %9) {lowering_config = #config} : f32, memref<1x8x32xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 2048 + s0 + d1 * 64 + d2)>>
+ linalg.fill {lowering_config = #config} ins(%cst : f32) outs(%9 : memref<1x8x32xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 2048 + s0 + d1 * 64 + d2)>>)
// expected-error @+1 {{Received first tile dimension of 2 instead of 0 for LLVMGPUMatmulTensorCore}}
linalg.batch_matmul {lowering_config = #config} ins(%7, %8 : memref<1x8x1024xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 32768 + s0 + d1 * 1024 + d2)>>, memref<1x1024x32xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 65536 + s0 + d1 * 64 + d2)>>) outs(%9 : memref<1x8x32xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 2048 + s0 + d1 * 64 + d2)>>)
}
diff --git a/iree/compiler/Codegen/LLVMGPU/test/nvvm_pipeline_test.mlir b/iree/compiler/Codegen/LLVMGPU/test/nvvm_pipeline_test.mlir
index 2379836..0e2cf86 100644
--- a/iree/compiler/Codegen/LLVMGPU/test/nvvm_pipeline_test.mlir
+++ b/iree/compiler/Codegen/LLVMGPU/test/nvvm_pipeline_test.mlir
@@ -68,7 +68,7 @@
%10 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1]
: !flow.dispatch.tensor<readonly:1024x1024xf32> -> tensor<1024x1024xf32>
%15 = linalg.init_tensor [1024, 1024] : tensor<1024x1024xf32>
- %16 = linalg.fill(%cst, %15) : f32, tensor<1024x1024xf32> -> tensor<1024x1024xf32>
+ %16 = linalg.fill ins(%cst : f32) outs(%15 : tensor<1024x1024xf32>) -> tensor<1024x1024xf32>
%17 = linalg.matmul ins(%8, %10 : tensor<1024x1024xf32>, tensor<1024x1024xf32>)
outs(%16 : tensor<1024x1024xf32>) -> tensor<1024x1024xf32>
flow.dispatch.tensor.store %17, %2, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1]
@@ -135,7 +135,7 @@
%10 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1]
: !flow.dispatch.tensor<readonly:1024x1024xf32> -> tensor<1024x1024xf32>
%15 = linalg.init_tensor [1024, 1024] : tensor<1024x1024xf32>
- %16 = linalg.fill(%cst, %15) : f32, tensor<1024x1024xf32> -> tensor<1024x1024xf32>
+ %16 = linalg.fill ins(%cst : f32) outs(%15 : tensor<1024x1024xf32>) -> tensor<1024x1024xf32>
%17 = linalg.generic #matmul_trait
ins(%8, %10 : tensor<1024x1024xf32>, tensor<1024x1024xf32>) outs(%16 : tensor<1024x1024xf32>) {
^bb(%a: f32, %b: f32, %c: f32) :
@@ -185,7 +185,7 @@
%13 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 2, 2, 1], strides = [1, 1, 1, 1]
: !flow.dispatch.tensor<readonly:3x2x2x1xf32> -> tensor<3x2x2x1xf32>
%20 = linalg.init_tensor [1, 2, 3, 1] : tensor<1x2x3x1xf32>
- %21 = linalg.fill(%cst, %20) : f32, tensor<1x2x3x1xf32> -> tensor<1x2x3x1xf32>
+ %21 = linalg.fill ins(%cst : f32) outs(%20 : tensor<1x2x3x1xf32>) -> tensor<1x2x3x1xf32>
%22 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
ins(%11, %13 : tensor<1x4x4x2xf32>, tensor<3x2x2x1xf32>) outs(%21 : tensor<1x2x3x1xf32>) -> tensor<1x2x3x1xf32>
flow.dispatch.tensor.store %22, %2, offsets = [0, 0, 0, 0], sizes = [1, 2, 3, 1], strides = [1, 1, 1, 1]
@@ -260,7 +260,7 @@
%5 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [14, 14, 96], strides = [1, 1, 1]
: !flow.dispatch.tensor<readonly:14x14x96xf32> -> tensor<14x14x96xf32>
%8 = linalg.init_tensor [96] : tensor<96xf32>
- %9 = linalg.fill(%cst, %8) : f32, tensor<96xf32> -> tensor<96xf32>
+ %9 = linalg.fill ins(%cst : f32) outs(%8 : tensor<96xf32>) -> tensor<96xf32>
%10 = linalg.generic {
indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2, d0)>, affine_map<(d0, d1, d2) -> (d0)>],
iterator_types = ["parallel", "reduction", "reduction"]}
@@ -352,7 +352,7 @@
%5 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [512, 16384], strides = [1, 1]
: !flow.dispatch.tensor<readonly:512x16384xf32> -> tensor<512x16384xf32>
%8 = linalg.init_tensor [16384] : tensor<16384xf32>
- %9 = linalg.fill(%cst, %8) : f32, tensor<16384xf32> -> tensor<16384xf32>
+ %9 = linalg.fill ins(%cst : f32) outs(%8 : tensor<16384xf32>) -> tensor<16384xf32>
%10 = linalg.generic {
indexing_maps = [#map3, #map4], iterator_types = ["parallel", "reduction"]}
ins(%5 : tensor<512x16384xf32>) outs(%9 : tensor<16384xf32>) {
@@ -402,7 +402,7 @@
%d = flow.dispatch.tensor.load %di, offsets = [0, 0], sizes = [2048, 512], strides = [1, 1]
: !flow.dispatch.tensor<readonly:2048x512xf32> -> tensor<2048x512xf32>
%init = linalg.init_tensor [2048, 512] : tensor<2048x512xf32>
- %f = linalg.fill(%cst, %init) : f32, tensor<2048x512xf32> -> tensor<2048x512xf32>
+ %f = linalg.fill ins(%cst : f32) outs(%init : tensor<2048x512xf32>) -> tensor<2048x512xf32>
%m = linalg.matmul ins(%3, %4 : tensor<2048x1024xf32>, tensor<1024x512xf32>) outs(%f : tensor<2048x512xf32>) -> tensor<2048x512xf32>
%init2 = linalg.init_tensor [2048, 512] : tensor<2048x512xf32>
%a = linalg.generic {
@@ -511,7 +511,7 @@
%13 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [4, 1024, 64], strides = [1, 1, 1]
: !flow.dispatch.tensor<readonly:4x1024x64xf32> -> tensor<4x1024x64xf32>
%17 = linalg.init_tensor [4, 32, 64] : tensor<4x32x64xf32>
- %18 = linalg.fill(%cst, %17) : f32, tensor<4x32x64xf32> -> tensor<4x32x64xf32>
+ %18 = linalg.fill ins(%cst : f32) outs(%17 : tensor<4x32x64xf32>) -> tensor<4x32x64xf32>
%19 = linalg.batch_matmul ins(%11, %13 : tensor<4x32x1024xf32>, tensor<4x1024x64xf32>)
outs(%18 : tensor<4x32x64xf32>) -> tensor<4x32x64xf32>
flow.dispatch.tensor.store %19, %2, offsets = [0, 0, 0], sizes = [4, 32, 64], strides = [1, 1, 1]
diff --git a/iree/compiler/Codegen/LLVMGPU/test/rocdl_pipeline_test.mlir b/iree/compiler/Codegen/LLVMGPU/test/rocdl_pipeline_test.mlir
index ad61567..3badaf2 100644
--- a/iree/compiler/Codegen/LLVMGPU/test/rocdl_pipeline_test.mlir
+++ b/iree/compiler/Codegen/LLVMGPU/test/rocdl_pipeline_test.mlir
@@ -67,7 +67,7 @@
%10 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1]
: !flow.dispatch.tensor<readonly:1024x1024xf32> -> tensor<1024x1024xf32>
%15 = linalg.init_tensor [1024, 1024] : tensor<1024x1024xf32>
- %16 = linalg.fill(%cst, %15) : f32, tensor<1024x1024xf32> -> tensor<1024x1024xf32>
+ %16 = linalg.fill ins(%cst : f32) outs(%15 : tensor<1024x1024xf32>) -> tensor<1024x1024xf32>
%17 = linalg.matmul ins(%8, %10 : tensor<1024x1024xf32>, tensor<1024x1024xf32>)
outs(%16 : tensor<1024x1024xf32>) -> tensor<1024x1024xf32>
flow.dispatch.tensor.store %17, %2, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1]
diff --git a/iree/compiler/Codegen/LLVMGPU/test/tensorcore_vectorization.mlir b/iree/compiler/Codegen/LLVMGPU/test/tensorcore_vectorization.mlir
index 3acdb83..eac1c9e 100644
--- a/iree/compiler/Codegen/LLVMGPU/test/tensorcore_vectorization.mlir
+++ b/iree/compiler/Codegen/LLVMGPU/test/tensorcore_vectorization.mlir
@@ -20,7 +20,7 @@
%10 = affine.apply affine_map<()[s0] -> (s0 * 32)>()[%9]
%11 = affine.apply affine_map<(d0) -> ((d0 floordiv 32) * 32)>(%8)
%12 = memref.subview %7[%10, %11] [32, 32] [1, 1] : memref<64x64xf32, affine_map<(d0, d1)[s0] -> (d0 * 512 + s0 + d1)>> to memref<32x32xf32, affine_map<(d0, d1)[s0] -> (d0 * 512 + s0 + d1)>>
- linalg.fill(%cst, %12) {__internal_linalg_transform__ = "vectorize"} : f32, memref<32x32xf32, affine_map<(d0, d1)[s0] -> (d0 * 512 + s0 + d1)>>
+ linalg.fill {__internal_linalg_transform__ = "vectorize"} ins(%cst : f32) outs(%12 : memref<32x32xf32, affine_map<(d0, d1)[s0] -> (d0 * 512 + s0 + d1)>>)
scf.for %arg0 = %c0 to %c1024 step %c16 {
%13 = memref.subview %5[0, %arg0] [64, 16] [1, 1] : memref<64x1024xf32, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>> to memref<64x16xf32, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>
%14 = memref.subview %6[%arg0, 0] [16, 64] [1, 1] : memref<1024x64xf32, affine_map<(d0, d1)[s0] -> (d0 * 512 + s0 + d1)>> to memref<16x64xf32, affine_map<(d0, d1)[s0] -> (d0 * 512 + s0 + d1)>>
diff --git a/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir b/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir
index c13e167..5d43eeb 100644
--- a/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir
@@ -32,7 +32,7 @@
%15 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 3, 512], strides = [1, 1, 1, 1]
: !flow.dispatch.tensor<readonly:3x3x3x512xf32> -> tensor<3x3x3x512xf32>
%22 = linalg.init_tensor [1, 112, 112, 512] : tensor<1x112x112x512xf32>
- %23 = linalg.fill(%cst, %22) : f32, tensor<1x112x112x512xf32> -> tensor<1x112x112x512xf32>
+ %23 = linalg.fill ins(%cst : f32) outs(%22 : tensor<1x112x112x512xf32>) -> tensor<1x112x112x512xf32>
%24 = linalg.conv_2d_nhwc_hwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
ins(%13, %15 : tensor<1x225x225x3xf32>, tensor<3x3x3x512xf32>) outs(%23 : tensor<1x112x112x512xf32>) -> tensor<1x112x112x512xf32>
flow.dispatch.tensor.store %24, %2, offsets = [0, 0, 0, 0], sizes = [1, 112, 112, 512], strides = [1, 1, 1, 1]
@@ -86,7 +86,7 @@
%15 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 3, 32], strides = [1, 1, 1, 1]
: !flow.dispatch.tensor<readonly:3x3x3x32xf32> -> tensor<3x3x3x32xf32>
%22 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
- %23 = linalg.fill(%cst, %22) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
+ %23 = linalg.fill ins(%cst : f32) outs(%22 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%24 = linalg.conv_2d_nhwc_hwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
ins(%13, %15 : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) outs(%23 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
flow.dispatch.tensor.store %24, %2, offsets = [0, 0, 0, 0], sizes = [1, 112, 112, 32], strides = [1, 1, 1, 1]
@@ -139,7 +139,7 @@
%15 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 3, 16], strides = [1, 1, 1, 1]
: !flow.dispatch.tensor<readonly:3x3x3x16xf32> -> tensor<3x3x3x16xf32>
%22 = linalg.init_tensor [1, 16, 16, 16] : tensor<1x16x16x16xf32>
- %23 = linalg.fill(%cst, %22) : f32, tensor<1x16x16x16xf32> -> tensor<1x16x16x16xf32>
+ %23 = linalg.fill ins(%cst : f32) outs(%22 : tensor<1x16x16x16xf32>) -> tensor<1x16x16x16xf32>
%24 = linalg.conv_2d_nhwc_hwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
ins(%13, %15 : tensor<1x33x33x3xf32>, tensor<3x3x3x16xf32>) outs(%23 : tensor<1x16x16x16xf32>) -> tensor<1x16x16x16xf32>
flow.dispatch.tensor.store %24, %2, offsets = [0, 0, 0, 0], sizes = [1, 16, 16, 16], strides = [1, 1, 1, 1]
@@ -194,7 +194,7 @@
%16 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [3, 3, 144], strides = [1, 1, 1]
: !flow.dispatch.tensor<readonly:3x3x144xf32> -> tensor<3x3x144xf32>
%23 = linalg.init_tensor [1, 28, 28, 144] : tensor<1x28x28x144xf32>
- %24 = linalg.fill(%cst, %23) : f32, tensor<1x28x28x144xf32> -> tensor<1x28x28x144xf32>
+ %24 = linalg.fill ins(%cst : f32) outs(%23 : tensor<1x28x28x144xf32>) -> tensor<1x28x28x144xf32>
%25 = linalg.depthwise_conv_2d_nhwc_hwc {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
ins(%14, %16 : tensor<1x57x57x144xf32>, tensor<3x3x144xf32>) outs(%24 : tensor<1x28x28x144xf32>) -> tensor<1x28x28x144xf32>
flow.dispatch.tensor.store %25, %2, offsets = [0, 0, 0, 0], sizes = [1, 28, 28, 144], strides = [1, 1, 1, 1]
@@ -248,7 +248,7 @@
%16 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [3, 3, 8], strides = [1, 1, 1]
: !flow.dispatch.tensor<readonly:3x3x8xf32> -> tensor<3x3x8xf32>
%23 = linalg.init_tensor [1, 4, 4, 8] : tensor<1x4x4x8xf32>
- %24 = linalg.fill(%cst, %23) : f32, tensor<1x4x4x8xf32> -> tensor<1x4x4x8xf32>
+ %24 = linalg.fill ins(%cst : f32) outs(%23 : tensor<1x4x4x8xf32>) -> tensor<1x4x4x8xf32>
%25 = linalg.depthwise_conv_2d_nhwc_hwc {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
ins(%14, %16 : tensor<1x9x9x8xf32>, tensor<3x3x8xf32>) outs(%24 : tensor<1x4x4x8xf32>) -> tensor<1x4x4x8xf32>
flow.dispatch.tensor.store %25, %2, offsets = [0, 0, 0, 0], sizes = [1, 4, 4, 8], strides = [1, 1, 1, 1]
diff --git a/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir b/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir
index a3a08a9..59d7b3b 100644
--- a/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir
@@ -32,7 +32,7 @@
%10 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [512, 2048], strides = [1, 1]
: !flow.dispatch.tensor<readonly:512x2048xf32> -> tensor<512x2048xf32>
%15 = linalg.init_tensor [1024, 2048] : tensor<1024x2048xf32>
- %16 = linalg.fill(%cst, %15) : f32, tensor<1024x2048xf32> -> tensor<1024x2048xf32>
+ %16 = linalg.fill ins(%cst : f32) outs(%15 : tensor<1024x2048xf32>) -> tensor<1024x2048xf32>
%17 = linalg.matmul {__internal_linalg_transform__ = "workgroup"}
ins(%8, %10 : tensor<1024x512xf32>, tensor<512x2048xf32>) outs(%16 : tensor<1024x2048xf32>) -> tensor<1024x2048xf32>
flow.dispatch.tensor.store %17, %2, offsets = [0, 0], sizes = [1024, 2048], strides = [1, 1]
@@ -86,7 +86,7 @@
%10 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [96, 24], strides = [1, 1]
: !flow.dispatch.tensor<readonly:96x24xf32> -> tensor<96x24xf32>
%15 = linalg.init_tensor [3136, 24] : tensor<3136x24xf32>
- %16 = linalg.fill(%cst, %15) : f32, tensor<3136x24xf32> -> tensor<3136x24xf32>
+ %16 = linalg.fill ins(%cst : f32) outs(%15 : tensor<3136x24xf32>) -> tensor<3136x24xf32>
%17 = linalg.matmul {__internal_linalg_transform__ = "workgroup"}
ins(%8, %10 : tensor<3136x96xf32>, tensor<96x24xf32>) outs(%16 : tensor<3136x24xf32>) -> tensor<3136x24xf32>
flow.dispatch.tensor.store %17, %2, offsets = [0, 0], sizes = [3136, 24], strides = [1, 1]
@@ -140,7 +140,7 @@
%10 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [192, 64], strides = [1, 1]
: !flow.dispatch.tensor<readonly:192x64xf32> -> tensor<192x64xf32>
%15 = linalg.init_tensor [196, 64] : tensor<196x64xf32>
- %16 = linalg.fill(%cst, %15) : f32, tensor<196x64xf32> -> tensor<196x64xf32>
+ %16 = linalg.fill ins(%cst : f32) outs(%15 : tensor<196x64xf32>) -> tensor<196x64xf32>
%17 = linalg.matmul {__internal_linalg_transform__ = "workgroup"}
ins(%8, %10 : tensor<196x192xf32>, tensor<192x64xf32>) outs(%16 : tensor<196x64xf32>) -> tensor<196x64xf32>
flow.dispatch.tensor.store %17, %2, offsets = [0, 0], sizes = [196, 64], strides = [1, 1]
@@ -189,7 +189,7 @@
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) : memref<12544x16xf32>
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) : memref<16x96xf32>
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) : memref<12544x96xf32>
- linalg.fill(%cst, %2) : f32, memref<12544x96xf32>
+ linalg.fill ins(%cst : f32) outs(%2 : memref<12544x96xf32>)
linalg.matmul {__internal_linalg_transform__ = "workgroup"} ins(%0, %1 : memref<12544x16xf32>, memref<16x96xf32>) outs(%2 : memref<12544x96xf32>)
return
}
@@ -238,7 +238,7 @@
%8 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [49, 576], strides = [1, 1] : !flow.dispatch.tensor<readonly:49x576xf32> -> tensor<49x576xf32>
%10 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [576, 160], strides = [1, 1] : !flow.dispatch.tensor<readonly:576x160xf32> -> tensor<576x160xf32>
%15 = linalg.init_tensor [49, 160] : tensor<49x160xf32>
- %16 = linalg.fill(%cst, %15) : f32, tensor<49x160xf32> -> tensor<49x160xf32>
+ %16 = linalg.fill ins(%cst : f32) outs(%15 : tensor<49x160xf32>) -> tensor<49x160xf32>
%17 = linalg.matmul {__internal_linalg_transform__ = "workgroup"}
ins(%8, %10 : tensor<49x576xf32>, tensor<576x160xf32>) outs(%16 : tensor<49x160xf32>) -> tensor<49x160xf32>
flow.dispatch.tensor.store %17, %2, offsets = [0, 0], sizes = [49, 160], strides = [1, 1]
@@ -292,7 +292,7 @@
%14 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [4, 32, 384], strides = [1, 1, 1]
: !flow.dispatch.tensor<readonly:4x32x384xf32> -> tensor<4x32x384xf32>
%21 = linalg.init_tensor [4, 384, 384] : tensor<4x384x384xf32>
- %22 = linalg.fill(%cst, %21) : f32, tensor<4x384x384xf32> -> tensor<4x384x384xf32>
+ %22 = linalg.fill ins(%cst : f32) outs(%21 : tensor<4x384x384xf32>) -> tensor<4x384x384xf32>
%23 = linalg.batch_matmul {__internal_linalg_transform__ = "workgroup"}
ins(%11, %14 : tensor<4x384x32xf32>, tensor<4x32x384xf32>) outs(%22 : tensor<4x384x384xf32>) -> tensor<4x384x384xf32>
flow.dispatch.tensor.store %23, %2, offsets = [0, 0, 0], sizes = [4, 384, 384], strides = [1, 1, 1]
@@ -346,7 +346,7 @@
%14 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [4, 32, 8], strides = [1, 1, 1]
: !flow.dispatch.tensor<readonly:4x32x8xf32> -> tensor<4x32x8xf32>
%21 = linalg.init_tensor [4, 8, 8] : tensor<4x8x8xf32>
- %22 = linalg.fill(%cst, %21) : f32, tensor<4x8x8xf32> -> tensor<4x8x8xf32>
+ %22 = linalg.fill ins(%cst : f32) outs(%21 : tensor<4x8x8xf32>) -> tensor<4x8x8xf32>
%23 = linalg.batch_matmul {__internal_linalg_transform__ = "workgroup"}
ins(%11, %14 : tensor<4x8x32xf32>, tensor<4x32x8xf32>) outs(%22 : tensor<4x8x8xf32>) -> tensor<4x8x8xf32>
flow.dispatch.tensor.store %23, %2, offsets = [0, 0, 0], sizes = [4, 8, 8], strides = [1, 1, 1]
diff --git a/iree/compiler/Codegen/SPIRV/test/config_default_conv.mlir b/iree/compiler/Codegen/SPIRV/test/config_default_conv.mlir
index 74373dc..42ee057 100644
--- a/iree/compiler/Codegen/SPIRV/test/config_default_conv.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/config_default_conv.mlir
@@ -45,7 +45,7 @@
%21 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0, 0], sizes = [3, 3, 3, 32], strides = [1, 1, 1, 1]
: !flow.dispatch.tensor<readonly:3x3x3x32xf32> -> tensor<3x3x3x32xf32>
%24 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
- %25 = linalg.fill(%cst, %24) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
+ %25 = linalg.fill ins(%cst : f32) outs(%24 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%26 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
ins(%19, %21 : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) outs(%25 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%27 = linalg.generic {indexing_maps = [#map7, #map7, #map7], iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
diff --git a/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ops.mlir b/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ops.mlir
index 7db31c6..dd29936 100644
--- a/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ops.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ops.mlir
@@ -117,7 +117,7 @@
%14 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 24, 24, 8], strides = [1, 1, 1, 1]
: !flow.dispatch.tensor<readonly:1x24x24x8xf32> -> tensor<1x24x24x8xf32>
%20 = linalg.init_tensor [1, 2, 2, 8] : tensor<1x2x2x8xf32>
- %21 = linalg.fill(%cst, %20) : f32, tensor<1x2x2x8xf32> -> tensor<1x2x2x8xf32>
+ %21 = linalg.fill ins(%cst : f32) outs(%20 : tensor<1x2x2x8xf32>) -> tensor<1x2x2x8xf32>
%22 = linalg.pooling_nhwc_sum {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : vector<2xi64>, strides = dense<12> : vector<2xi64>}
ins(%14, %2 : tensor<1x24x24x8xf32>, tensor<12x12xf32>)
outs(%21 : tensor<1x2x2x8xf32>) -> tensor<1x2x2x8xf32>
@@ -168,7 +168,7 @@
%13 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 76, 1, 1], strides = [1, 1, 1, 1]
: !flow.dispatch.tensor<readonly:1x76x1x1xf32> -> tensor<1x76x1x1xf32>
%18 = linalg.init_tensor [1, 38, 1, 1] : tensor<1x38x1x1xf32>
- %19 = linalg.fill(%cst, %18) : f32, tensor<1x38x1x1xf32> -> tensor<1x38x1x1xf32>
+ %19 = linalg.fill ins(%cst : f32) outs(%18 : tensor<1x38x1x1xf32>) -> tensor<1x38x1x1xf32>
%20 = linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<[2, 1]> : vector<2xi64>}
ins(%13, %2 : tensor<1x76x1x1xf32>, tensor<2x1xf32>)
outs(%19 : tensor<1x38x1x1xf32>) -> tensor<1x38x1x1xf32>
@@ -278,7 +278,7 @@
%14 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 21, 20, 1], strides = [1, 1, 1, 1]
: !flow.dispatch.tensor<readonly:1x21x20x1xf32> -> tensor<1x21x20x1xf32>
%18 = linalg.init_tensor [1, 19, 18, 1, 4] : tensor<1x19x18x1x4xf32>
- %19 = linalg.fill(%cst_9, %18) : f32, tensor<1x19x18x1x4xf32> -> tensor<1x19x18x1x4xf32>
+ %19 = linalg.fill ins(%cst_9 : f32) outs(%18 : tensor<1x19x18x1x4xf32>) -> tensor<1x19x18x1x4xf32>
%20 = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
ins(%14, %cst : tensor<1x21x20x1xf32>, tensor<3x3x1x4xf32>) outs(%19 : tensor<1x19x18x1x4xf32>) -> tensor<1x19x18x1x4xf32>
%21 = linalg.generic {
diff --git a/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir b/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir
index 1887d47..f8b6fea 100644
--- a/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir
@@ -33,7 +33,7 @@
%14 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 3, 32], strides = [1, 1, 1]
: !flow.dispatch.tensor<readonly:1x3x32xf32> -> tensor<1x3x32xf32>
%21 = linalg.init_tensor [1, 3, 32] : tensor<1x3x32xf32>
- %22 = linalg.fill(%cst, %21) : f32, tensor<1x3x32xf32> -> tensor<1x3x32xf32>
+ %22 = linalg.fill ins(%cst : f32) outs(%21 : tensor<1x3x32xf32>) -> tensor<1x3x32xf32>
%23 = linalg.batch_matmul {__internal_linalg_transform__ = "workgroup"}
ins(%11, %14 : tensor<1x3x3xf32>, tensor<1x3x32xf32>) outs(%22 : tensor<1x3x32xf32>) -> tensor<1x3x32xf32>
flow.dispatch.tensor.store %23, %2, offsets = [0, 0, 0], sizes = [1, 3, 32], strides = [1, 1, 1]
@@ -87,7 +87,7 @@
%10 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32, 16], strides = [1, 1]
: !flow.dispatch.tensor<readonly:32x16xi8> -> tensor<32x16xi8>
%15 = linalg.init_tensor [64, 16] : tensor<64x16xi32>
- %16 = linalg.fill(%c0_i32, %15) : i32, tensor<64x16xi32> -> tensor<64x16xi32>
+ %16 = linalg.fill ins(%c0_i32 : i32) outs(%15 : tensor<64x16xi32>) -> tensor<64x16xi32>
%17 = linalg.matmul {__internal_linalg_transform__ = "workgroup"}
ins(%8, %10 : tensor<64x32xi8>, tensor<32x16xi8>) outs(%16 : tensor<64x16xi32>) -> tensor<64x16xi32>
flow.dispatch.tensor.store %17, %2, offsets = [0, 0], sizes = [64, 16], strides = [1, 1]
@@ -145,7 +145,7 @@
%15 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [576, 273], strides = [1, 1]
: !flow.dispatch.tensor<readonly:576x273xf32> -> tensor<576x273xf32>
%16 = linalg.init_tensor [400, 273] : tensor<400x273xf32>
- %17 = linalg.fill(%cst, %16) : f32, tensor<400x273xf32> -> tensor<400x273xf32>
+ %17 = linalg.fill ins(%cst : f32) outs(%16 : tensor<400x273xf32>) -> tensor<400x273xf32>
%18 = linalg.matmul ins(%13, %15 : tensor<400x576xf32>, tensor<576x273xf32>) outs(%17 : tensor<400x273xf32>) -> tensor<400x273xf32>
%19 = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>],
@@ -211,7 +211,7 @@
%15 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [512, 546], strides = [1, 1]
: !flow.dispatch.tensor<readonly:512x546xf32> -> tensor<512x546xf32>
%16 = linalg.init_tensor [25, 546] : tensor<25x546xf32>
- %17 = linalg.fill(%cst, %16) : f32, tensor<25x546xf32> -> tensor<25x546xf32>
+ %17 = linalg.fill ins(%cst : f32) outs(%16 : tensor<25x546xf32>) -> tensor<25x546xf32>
%18 = linalg.matmul ins(%13, %15 : tensor<25x512xf32>, tensor<512x546xf32>) outs(%17 : tensor<25x546xf32>) -> tensor<25x546xf32>
%19 = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>],
@@ -287,7 +287,7 @@
%17 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [128, 1024], strides = [1, 1]
: !flow.dispatch.tensor<readonly:128x1024xf16> -> tensor<128x1024xf16>
%18 = linalg.init_tensor [256, 1024] : tensor<256x1024xf16>
- %19 = linalg.fill(%cst, %18) : f16, tensor<256x1024xf16> -> tensor<256x1024xf16>
+ %19 = linalg.fill ins(%cst : f16) outs(%18 : tensor<256x1024xf16>) -> tensor<256x1024xf16>
%20 = linalg.matmul ins(%15, %17 : tensor<256x128xf16>, tensor<128x1024xf16>) outs(%19 : tensor<256x1024xf16>) -> tensor<256x1024xf16>
%21 = linalg.generic {
indexing_maps = [#map5, #map5, #map5, #map5], iterator_types = ["parallel", "parallel"]}
diff --git a/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir b/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir
index 17257ae..75dbf88 100644
--- a/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir
@@ -32,7 +32,7 @@
%15 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 3, 512], strides = [1, 1, 1, 1]
: !flow.dispatch.tensor<readonly:3x3x3x512xf32> -> tensor<3x3x3x512xf32>
%22 = linalg.init_tensor [1, 112, 112, 512] : tensor<1x112x112x512xf32>
- %23 = linalg.fill(%cst, %22) : f32, tensor<1x112x112x512xf32> -> tensor<1x112x112x512xf32>
+ %23 = linalg.fill ins(%cst : f32) outs(%22 : tensor<1x112x112x512xf32>) -> tensor<1x112x112x512xf32>
%24 = linalg.conv_2d_nhwc_hwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
ins(%13, %15 : tensor<1x225x225x3xf32>, tensor<3x3x3x512xf32>)
outs(%23 : tensor<1x112x112x512xf32>) -> tensor<1x112x112x512xf32>
@@ -87,7 +87,7 @@
%15 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 3, 32], strides = [1, 1, 1, 1]
: !flow.dispatch.tensor<readonly:3x3x3x32xf32> -> tensor<3x3x3x32xf32>
%22 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
- %23 = linalg.fill(%cst, %22) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
+ %23 = linalg.fill ins(%cst : f32) outs(%22 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%24 = linalg.conv_2d_nhwc_hwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
ins(%13, %15 : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) outs(%23 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
flow.dispatch.tensor.store %24, %2, offsets = [0, 0, 0, 0], sizes = [1, 112, 112, 32], strides = [1, 1, 1, 1]
@@ -140,7 +140,7 @@
%15 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 3, 16], strides = [1, 1, 1, 1]
: !flow.dispatch.tensor<readonly:3x3x3x16xf32> -> tensor<3x3x3x16xf32>
%22 = linalg.init_tensor [1, 16, 16, 16] : tensor<1x16x16x16xf32>
- %23 = linalg.fill(%cst, %22) : f32, tensor<1x16x16x16xf32> -> tensor<1x16x16x16xf32>
+ %23 = linalg.fill ins(%cst : f32) outs(%22 : tensor<1x16x16x16xf32>) -> tensor<1x16x16x16xf32>
%24 = linalg.conv_2d_nhwc_hwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
ins(%13, %15 : tensor<1x33x33x3xf32>, tensor<3x3x3x16xf32>) outs(%23 : tensor<1x16x16x16xf32>) -> tensor<1x16x16x16xf32>
flow.dispatch.tensor.store %24, %2, offsets = [0, 0, 0, 0], sizes = [1, 16, 16, 16], strides = [1, 1, 1, 1]
@@ -194,7 +194,7 @@
%16 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [3, 3, 144], strides = [1, 1, 1]
: !flow.dispatch.tensor<readonly:3x3x144xf32> -> tensor<3x3x144xf32>
%23 = linalg.init_tensor [1, 28, 28, 144] : tensor<1x28x28x144xf32>
- %24 = linalg.fill(%cst, %23) : f32, tensor<1x28x28x144xf32> -> tensor<1x28x28x144xf32>
+ %24 = linalg.fill ins(%cst : f32) outs(%23 : tensor<1x28x28x144xf32>) -> tensor<1x28x28x144xf32>
%25 = linalg.depthwise_conv_2d_nhwc_hwc {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
ins(%14, %16 : tensor<1x57x57x144xf32>, tensor<3x3x144xf32>) outs(%24 : tensor<1x28x28x144xf32>) -> tensor<1x28x28x144xf32>
flow.dispatch.tensor.store %25, %2, offsets = [0, 0, 0, 0], sizes = [1, 28, 28, 144], strides = [1, 1, 1, 1]
@@ -249,7 +249,7 @@
%16 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [3, 3, 8], strides = [1, 1, 1]
: !flow.dispatch.tensor<readonly:3x3x8xf32> -> tensor<3x3x8xf32>
%23 = linalg.init_tensor [1, 1, 2, 8] : tensor<1x1x2x8xf32>
- %24 = linalg.fill(%cst, %23) : f32, tensor<1x1x2x8xf32> -> tensor<1x1x2x8xf32>
+ %24 = linalg.fill ins(%cst : f32) outs(%23 : tensor<1x1x2x8xf32>) -> tensor<1x1x2x8xf32>
%25 = linalg.depthwise_conv_2d_nhwc_hwc {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
ins(%14, %16 : tensor<1x3x5x8xf32>, tensor<3x3x8xf32>) outs(%24 : tensor<1x1x2x8xf32>) -> tensor<1x1x2x8xf32>
flow.dispatch.tensor.store %25, %2, offsets = [0, 0, 0, 0], sizes = [1, 1, 2, 8], strides = [1, 1, 1, 1]
diff --git a/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir b/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir
index d46e6be..ebddd9f 100644
--- a/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir
@@ -30,7 +30,7 @@
%8 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1024, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:1024x512xf32> -> tensor<1024x512xf32>
%10 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [512, 2048], strides = [1, 1] : !flow.dispatch.tensor<readonly:512x2048xf32> -> tensor<512x2048xf32>
%15 = linalg.init_tensor [1024, 2048] : tensor<1024x2048xf32>
- %16 = linalg.fill(%cst, %15) : f32, tensor<1024x2048xf32> -> tensor<1024x2048xf32>
+ %16 = linalg.fill ins(%cst : f32) outs(%15 : tensor<1024x2048xf32>) -> tensor<1024x2048xf32>
%17 = linalg.matmul {__internal_linalg_transform__ = "workgroup"}
ins(%8, %10 : tensor<1024x512xf32>, tensor<512x2048xf32>) outs(%16 : tensor<1024x2048xf32>) -> tensor<1024x2048xf32>
flow.dispatch.tensor.store %17, %2, offsets = [0, 0], sizes = [1024, 2048], strides = [1, 1]
@@ -82,7 +82,7 @@
%8 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [3136, 96], strides = [1, 1] : !flow.dispatch.tensor<readonly:3136x96xf32> -> tensor<3136x96xf32>
%10 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [96, 24], strides = [1, 1] : !flow.dispatch.tensor<readonly:96x24xf32> -> tensor<96x24xf32>
%15 = linalg.init_tensor [3136, 24] : tensor<3136x24xf32>
- %16 = linalg.fill(%cst, %15) : f32, tensor<3136x24xf32> -> tensor<3136x24xf32>
+ %16 = linalg.fill ins(%cst : f32) outs(%15 : tensor<3136x24xf32>) -> tensor<3136x24xf32>
%17 = linalg.matmul {__internal_linalg_transform__ = "workgroup"}
ins(%8, %10 : tensor<3136x96xf32>, tensor<96x24xf32>)
outs(%16 : tensor<3136x24xf32>) -> tensor<3136x24xf32>
@@ -137,7 +137,7 @@
%10 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [192, 64], strides = [1, 1]
: !flow.dispatch.tensor<readonly:192x64xf32> -> tensor<192x64xf32>
%15 = linalg.init_tensor [196, 64] : tensor<196x64xf32>
- %16 = linalg.fill(%cst, %15) : f32, tensor<196x64xf32> -> tensor<196x64xf32>
+ %16 = linalg.fill ins(%cst : f32) outs(%15 : tensor<196x64xf32>) -> tensor<196x64xf32>
%17 = linalg.matmul {__internal_linalg_transform__ = "workgroup"}
ins(%8, %10 : tensor<196x192xf32>, tensor<192x64xf32>) outs(%16 : tensor<196x64xf32>) -> tensor<196x64xf32>
flow.dispatch.tensor.store %17, %2, offsets = [0, 0], sizes = [196, 64], strides = [1, 1]
@@ -186,7 +186,7 @@
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) : memref<12544x16xf32>
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) : memref<16x96xf32>
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) : memref<12544x96xf32>
- linalg.fill(%cst, %2) : f32, memref<12544x96xf32>
+ linalg.fill ins(%cst : f32) outs(%2 : memref<12544x96xf32>)
linalg.matmul {__internal_linalg_transform__ = "workgroup"}
ins(%0, %1 : memref<12544x16xf32>, memref<16x96xf32>) outs(%2 : memref<12544x96xf32>)
return
@@ -238,7 +238,7 @@
%10 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [576, 160], strides = [1, 1]
: !flow.dispatch.tensor<readonly:576x160xf32> -> tensor<576x160xf32>
%15 = linalg.init_tensor [49, 160] : tensor<49x160xf32>
- %16 = linalg.fill(%cst, %15) : f32, tensor<49x160xf32> -> tensor<49x160xf32>
+ %16 = linalg.fill ins(%cst : f32) outs(%15 : tensor<49x160xf32>) -> tensor<49x160xf32>
%17 = linalg.matmul {__internal_linalg_transform__ = "workgroup"}
ins(%8, %10 : tensor<49x576xf32>, tensor<576x160xf32>) outs(%16 : tensor<49x160xf32>) -> tensor<49x160xf32>
flow.dispatch.tensor.store %17, %2, offsets = [0, 0], sizes = [49, 160], strides = [1, 1]
@@ -292,7 +292,7 @@
%14 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [4, 32, 384], strides = [1, 1, 1]
: !flow.dispatch.tensor<readonly:4x32x384xf32> -> tensor<4x32x384xf32>
%21 = linalg.init_tensor [4, 384, 384] : tensor<4x384x384xf32>
- %22 = linalg.fill(%cst, %21) : f32, tensor<4x384x384xf32> -> tensor<4x384x384xf32>
+ %22 = linalg.fill ins(%cst : f32) outs(%21 : tensor<4x384x384xf32>) -> tensor<4x384x384xf32>
%23 = linalg.batch_matmul {__internal_linalg_transform__ = "workgroup"}
ins(%11, %14 : tensor<4x384x32xf32>, tensor<4x32x384xf32>)
outs(%22 : tensor<4x384x384xf32>) -> tensor<4x384x384xf32>
@@ -348,7 +348,7 @@
%14 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [4, 32, 8], strides = [1, 1, 1]
: !flow.dispatch.tensor<readonly:4x32x8xf32> -> tensor<4x32x8xf32>
%21 = linalg.init_tensor [4, 2, 8] : tensor<4x2x8xf32>
- %22 = linalg.fill(%cst, %21) : f32, tensor<4x2x8xf32> -> tensor<4x2x8xf32>
+ %22 = linalg.fill ins(%cst : f32) outs(%21 : tensor<4x2x8xf32>) -> tensor<4x2x8xf32>
%23 = linalg.batch_matmul {__internal_linalg_transform__ = "workgroup"}
ins(%11, %14 : tensor<4x2x32xf32>, tensor<4x32x8xf32>) outs(%22 : tensor<4x2x8xf32>) -> tensor<4x2x8xf32>
flow.dispatch.tensor.store %23, %2, offsets = [0, 0, 0], sizes = [4, 2, 8], strides = [1, 1, 1]
diff --git a/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir b/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir
index d7b8532..ab22b21 100644
--- a/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir
@@ -56,7 +56,7 @@
%21 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [128, 1024], strides = [1, 1]
: !flow.dispatch.tensor<readonly:128x1024xf16> -> tensor<128x1024xf16>
%24 = linalg.init_tensor [256, 1024] : tensor<256x1024xf16>
- %25 = linalg.fill(%cst, %24) : f16, tensor<256x1024xf16> -> tensor<256x1024xf16>
+ %25 = linalg.fill ins(%cst : f16) outs(%24 : tensor<256x1024xf16>) -> tensor<256x1024xf16>
%26 = linalg.matmul ins(%19, %21 : tensor<256x128xf16>, tensor<128x1024xf16>) outs(%25 : tensor<256x1024xf16>) -> tensor<256x1024xf16>
%27 = linalg.generic {
indexing_maps = [#map5, #map5, #map5, #map5], iterator_types = ["parallel", "parallel"]}
@@ -133,7 +133,7 @@
%8 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 8], strides = [1, 1] : !flow.dispatch.tensor<readonly:256x8xf16> -> tensor<256x8xf16>
%10 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [8, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:8x1024xf16> -> tensor<8x1024xf16>
%15 = linalg.init_tensor [256, 1024] : tensor<256x1024xf16>
- %16 = linalg.fill(%cst, %15) : f16, tensor<256x1024xf16> -> tensor<256x1024xf16>
+ %16 = linalg.fill ins(%cst : f16) outs(%15 : tensor<256x1024xf16>) -> tensor<256x1024xf16>
%17 = linalg.matmul {__internal_linalg_transform__ = "workgroup"}
ins(%8, %10 : tensor<256x8xf16>, tensor<8x1024xf16>) outs(%16 : tensor<256x1024xf16>) -> tensor<256x1024xf16>
flow.dispatch.tensor.store %17, %2, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1]
diff --git a/iree/compiler/Codegen/SPIRV/test/create_fast_slow_path.mlir b/iree/compiler/Codegen/SPIRV/test/create_fast_slow_path.mlir
index 008d001..a318d01 100644
--- a/iree/compiler/Codegen/SPIRV/test/create_fast_slow_path.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/create_fast_slow_path.mlir
@@ -38,7 +38,7 @@
tensor.yield %cst : f32
} : tensor<1x?x?x3xf32> to tensor<1x?x?x3xf32>
%20 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, %arg2], sizes = [3, 3, 3, 32], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:3x3x3x32xf32> -> tensor<3x3x3x32xf32>
- %21 = linalg.fill(%cst, %9) {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 1, 4, 32], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4]]>} : f32, tensor<1x1x4x32xf32> -> tensor<1x1x4x32xf32>
+ %21 = linalg.fill {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 1, 4, 32], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4]]>} ins(%cst : f32) outs(%9 : tensor<1x1x4x32xf32>) -> tensor<1x1x4x32xf32>
%22 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 1, 4, 32], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4]]>, strides = dense<2> : tensor<2xi64>} ins(%19, %20 : tensor<1x?x?x3xf32>, tensor<3x3x3x32xf32>) outs(%21 : tensor<1x1x4x32xf32>) -> tensor<1x1x4x32xf32>
%23 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%22, %8 : tensor<1x1x4x32xf32>, tensor<1x1x4x32xf32>) outs(%9 : tensor<1x1x4x32xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 1, 4, 32], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4]]>} {
^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
diff --git a/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_cooperative_ops.mlir b/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_cooperative_ops.mlir
index 3dfe104..5c568c2 100644
--- a/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_cooperative_ops.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_cooperative_ops.mlir
@@ -47,7 +47,7 @@
%19 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [256, 128], strides = [1, 1] : !flow.dispatch.tensor<readonly:256x128xf16> -> tensor<256x128xf16>
%21 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [128, 1204], strides = [1, 1] : !flow.dispatch.tensor<readonly:128x1024xf16> -> tensor<128x1024xf16>
%24 = linalg.init_tensor [256, 1024] : tensor<256x1024xf16>
- %25 = linalg.fill(%cst, %24) : f16, tensor<256x1024xf16> -> tensor<256x1024xf16>
+ %25 = linalg.fill ins(%cst : f16) outs(%24 : tensor<256x1024xf16>) -> tensor<256x1024xf16>
%26 = linalg.matmul ins(%19, %21 : tensor<256x128xf16>, tensor<128x1024xf16>) outs(%25 : tensor<256x1024xf16>) -> tensor<256x1024xf16>
%27 = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>],
diff --git a/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir b/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir
index aa3e66f..cf8ceae 100644
--- a/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir
@@ -27,7 +27,7 @@
%8 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [4096, 4096], strides = [1, 1] : !flow.dispatch.tensor<readonly:4096x4096xf32> -> tensor<4096x4096xf32>
%10 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4096, 4096], strides = [1, 1] : !flow.dispatch.tensor<readonly:4096x4096xf32> -> tensor<4096x4096xf32>
%15 = linalg.init_tensor [4096, 4096] : tensor<4096x4096xf32>
- %16 = linalg.fill(%cst, %15) : f32, tensor<4096x4096xf32> -> tensor<4096x4096xf32>
+ %16 = linalg.fill ins(%cst : f32) outs(%15 : tensor<4096x4096xf32>) -> tensor<4096x4096xf32>
%17 = linalg.matmul ins(%8, %10 : tensor<4096x4096xf32>, tensor<4096x4096xf32>) outs(%16 : tensor<4096x4096xf32>) -> tensor<4096x4096xf32>
flow.dispatch.tensor.store %17, %2, offsets = [0, 0], sizes = [4096, 4096], strides = [1, 1] : tensor<4096x4096xf32> -> !flow.dispatch.tensor<writeonly:4096x4096xf32>
return
@@ -78,7 +78,7 @@
%15 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:1024x512xf32> -> tensor<1024x512xf32>
%17 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [512, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:512x256xf32> -> tensor<512x256xf32>
%20 = linalg.init_tensor [1024, 256] : tensor<1024x256xf32>
- %21 = linalg.fill(%cst, %20) : f32, tensor<1024x256xf32> -> tensor<1024x256xf32>
+ %21 = linalg.fill ins(%cst : f32) outs(%20 : tensor<1024x256xf32>) -> tensor<1024x256xf32>
%22 = linalg.matmul ins(%15, %17 : tensor<1024x512xf32>, tensor<512x256xf32>) outs(%21 : tensor<1024x256xf32>) -> tensor<1024x256xf32>
%23 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%22, %10 : tensor<1024x256xf32>, tensor<1024x256xf32>) outs(%13 : tensor<1024x256xf32>) {
^bb0(%arg2: f32, %arg3: f32, %arg4: f32):
diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir
index 980ae2d..aaabd0c 100644
--- a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir
@@ -51,7 +51,7 @@
%15 = affine.min affine_map<(d0)[s0] -> (-d0 + 1024, s0)>(%arg1)[%workgroup_size_y]
%16 = affine.min affine_map<(d0)[s0] -> (-d0 + 1024, s0)>(%arg2)[%workgroup_size_x]
%17 = linalg.init_tensor [%14, %15, %16] : tensor<?x?x?xf32>
- %18 = linalg.fill(%cst, %17) : f32, tensor<?x?x?xf32> -> tensor<?x?x?xf32>
+ %18 = linalg.fill ins(%cst : f32) outs(%17 : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
%19 = linalg.batch_matmul {lowering_config = #config} ins(%11, %13 : tensor<?x?x1024xf32>, tensor<?x1024x?xf32>) outs(%18 : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
flow.dispatch.tensor.store %19, %2, offsets = [%arg0, %arg1, %arg2], sizes = [%9, %10, %12], strides = [1, 1, 1] : tensor<?x?x?xf32> -> !flow.dispatch.tensor<writeonly:4x1024x1024xf32>
}
diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir
index e4ba66d..631ce5e 100644
--- a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir
@@ -55,7 +55,7 @@
%19 = affine.min affine_map<(d0)[s0] -> (-d0 + 112, s0)>(%arg1)[%workgroup_size_y]
%20 = affine.min affine_map<(d0)[s0] -> (-d0 + 16, s0)>(%arg2)[%workgroup_size_x]
%21 = linalg.init_tensor [1, %18, %19, %20] : tensor<1x?x?x?xf32>
- %22 = linalg.fill(%cst, %21) : f32, tensor<1x?x?x?xf32> -> tensor<1x?x?x?xf32>
+ %22 = linalg.fill ins(%cst : f32) outs(%21 : tensor<1x?x?x?xf32>) -> tensor<1x?x?x?xf32>
%23 = linalg.conv_2d_nhwc_hwcf {lowering_config = #config, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
ins(%13, %15 : tensor<1x?x?x8xf32>, tensor<3x3x8x?xf32>)
outs(%22 : tensor<1x?x?x?xf32>) -> tensor<1x?x?x?xf32>
@@ -146,7 +146,7 @@
%19 = affine.min affine_map<(d0)[s0] -> (-d0 + 56, s0)>(%arg1)[%workgroup_size_y]
%20 = affine.min affine_map<(d0)[s0] -> (-d0 + 96, s0)>(%arg2)[%workgroup_size_x]
%21 = linalg.init_tensor [1, %18, %19, %20] : tensor<1x?x?x?xf32>
- %22 = linalg.fill(%cst, %21) : f32, tensor<1x?x?x?xf32> -> tensor<1x?x?x?xf32>
+ %22 = linalg.fill ins(%cst : f32) outs(%21 : tensor<1x?x?x?xf32>) -> tensor<1x?x?x?xf32>
%23 = linalg.depthwise_conv_2d_nhwc_hwc {lowering_config = #config, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
ins(%14, %15 : tensor<1x?x?x?xf32>, tensor<3x3x?xf32>)
outs(%22 : tensor<1x?x?x?xf32>) -> tensor<1x?x?x?xf32>
@@ -257,7 +257,7 @@
%36 = affine.min affine_map<(d0)[s0] -> (-d0 + 112, s0)>(%arg1)[%workgroup_size_y]
%37 = affine.min affine_map<(d0)[s0] -> (-d0 + 32, s0)>(%arg2)[%workgroup_size_x]
%38 = linalg.init_tensor [1, %35, %36, %37] : tensor<1x?x?x?xf32>
- %39 = linalg.fill(%cst, %38) : f32, tensor<1x?x?x?xf32> -> tensor<1x?x?x?xf32>
+ %39 = linalg.fill ins(%cst : f32) outs(%38 : tensor<1x?x?x?xf32>) -> tensor<1x?x?x?xf32>
%40 = linalg.conv_2d_nhwc_hwcf {lowering_config = #config, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%32, %34 : tensor<1x?x?x3xf32>, tensor<3x3x3x?xf32>) outs(%39 : tensor<1x?x?x?xf32>) -> tensor<1x?x?x?xf32>
%41 = linalg.generic {lowering_config = #config, indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%40, %16 : tensor<1x?x?x?xf32>, tensor<1x?x?x?xf32>) outs(%20 : tensor<1x?x?x?xf32>) {
^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
@@ -386,7 +386,7 @@
%44 = affine.min affine_map<(d0)[s0] -> (-d0 + 112, s0)>(%arg1)[%workgroup_size_y]
%45 = affine.min affine_map<(d0)[s0] -> (-d0 + 32, s0)>(%arg2)[%workgroup_size_x]
%46 = linalg.init_tensor [1, %43, %44, %45] : tensor<1x?x?x?xf32>
- %47 = linalg.fill(%cst, %46) : f32, tensor<1x?x?x?xf32> -> tensor<1x?x?x?xf32>
+ %47 = linalg.fill ins(%cst : f32) outs(%46 : tensor<1x?x?x?xf32>) -> tensor<1x?x?x?xf32>
%48 = linalg.depthwise_conv_2d_nhwc_hwc {lowering_config = #config, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%40, %42 : tensor<1x?x?x?xf32>, tensor<3x3x?xf32>) outs(%47 : tensor<1x?x?x?xf32>) -> tensor<1x?x?x?xf32>
%49 = linalg.generic {lowering_config = #config, indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%12, %48 : tensor<?xf32>, tensor<1x?x?x?xf32>) outs(%18 : tensor<1x?x?x?xf32>) {
^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir
index bc18016..816824b 100644
--- a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir
@@ -42,7 +42,7 @@
%11 = affine.min affine_map<(d0)[s0] -> (-d0 + 4096, s0)>(%arg0)[%workgroup_size_y]
%12 = affine.min affine_map<(d0)[s0] -> (-d0 + 4096, s0)>(%arg1)[%workgroup_size_x]
%13 = linalg.init_tensor [%11, %12] : tensor<?x?xf16>
- %14 = linalg.fill(%cst, %13) : f16, tensor<?x?xf16> -> tensor<?x?xf16>
+ %14 = linalg.fill ins(%cst : f16) outs(%13 : tensor<?x?xf16>) -> tensor<?x?xf16>
%15 = linalg.matmul {lowering_config = #config} ins(%8, %10 : tensor<?x4096xf16>, tensor<4096x?xf16>) outs(%14 : tensor<?x?xf16>) -> tensor<?x?xf16>
flow.dispatch.tensor.store %15, %2, offsets = [%arg0, %arg1], sizes = [%7, %9], strides = [1, 1] : tensor<?x?xf16> -> !flow.dispatch.tensor<writeonly:4096x4096xf16>
}
@@ -106,7 +106,7 @@
%11 = affine.min affine_map<(d0)[s0] -> (-d0 + 4096, s0)>(%arg0)[%workgroup_size_y]
%12 = affine.min affine_map<(d0)[s0] -> (-d0 + 4096, s0)>(%arg1)[%workgroup_size_x]
%13 = linalg.init_tensor [%11, %12] : tensor<?x?xf32>
- %14 = linalg.fill(%cst, %13) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
+ %14 = linalg.fill ins(%cst : f32) outs(%13 : tensor<?x?xf32>) -> tensor<?x?xf32>
%15 = linalg.matmul {lowering_config = #config} ins(%8, %10 : tensor<?x4096xf32>, tensor<4096x?xf32>) outs(%14 : tensor<?x?xf32>) -> tensor<?x?xf32>
flow.dispatch.tensor.store %15, %2, offsets = [%arg0, %arg1], sizes = [%7, %9], strides = [1, 1] : tensor<?x?xf32> -> !flow.dispatch.tensor<writeonly:4096x4096xf32>
}
diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir
index 6b8bb0a..e770765 100644
--- a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir
@@ -66,7 +66,7 @@
%11 = memref.subview %2[%arg0, 0] [16, 128] [1, 1] : memref<256x128xf16> to memref<16x128xf16, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>>
%12 = memref.subview %3[0, %arg1] [128, 16] [1, 1] : memref<128x1024xf16> to memref<128x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>
%13 = memref.subview %4[%arg0, %arg1] [16, 16] [1, 1] : memref<256x1024xf16> to memref<16x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>
- linalg.fill(%cst, %13) {lowering_config = #config} : f16, memref<16x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>
+ linalg.fill {lowering_config = #config} ins(%cst : f16) outs(%13 : memref<16x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>)
linalg.matmul {lowering_config = #config}
ins(%11, %12 : memref<16x128xf16, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>>, memref<128x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>)
outs(%13 : memref<16x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>)
diff --git a/iree/compiler/Codegen/SPIRV/test/vectorize_matmul.mlir b/iree/compiler/Codegen/SPIRV/test/vectorize_matmul.mlir
index d1433c6..6c7ed93 100644
--- a/iree/compiler/Codegen/SPIRV/test/vectorize_matmul.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/vectorize_matmul.mlir
@@ -28,7 +28,7 @@
%10 = scf.for %arg2 = %c0 to %c2 step %c1 iter_args(%arg3 = %9) -> (tensor<2x128xf32>) {
%11 = scf.for %arg4 = %c0 to %c128 step %c4 iter_args(%arg5 = %arg3) -> (tensor<2x128xf32>) {
%12 = tensor.extract_slice %arg5[%arg2, %arg4] [1, 4] [1, 1] : tensor<2x128xf32> to tensor<1x4xf32>
- %13 = linalg.fill(%cst, %12) {lowering_config = #config} : f32, tensor<1x4xf32> -> tensor<1x4xf32>
+ %13 = linalg.fill {lowering_config = #config} ins(%cst : f32) outs(%12 : tensor<1x4xf32>) -> tensor<1x4xf32>
%14 = tensor.extract_slice %7[%arg2, 0] [1, 4] [1, 1] : tensor<2x4xf32> to tensor<1x4xf32>
%15 = tensor.extract_slice %8[0, %arg4] [4, 4] [1, 1] : tensor<4x128xf32> to tensor<4x4xf32>
%16 = linalg.matmul {lowering_config = #config} ins(%14, %15 : tensor<1x4xf32>, tensor<4x4xf32>) outs(%13 : tensor<1x4xf32>) -> tensor<1x4xf32>
diff --git a/iree/compiler/Codegen/Sandbox/test/fusion_expert.mlir b/iree/compiler/Codegen/Sandbox/test/fusion_expert.mlir
index f2cfcdd..beb6930 100644
--- a/iree/compiler/Codegen/Sandbox/test/fusion_expert.mlir
+++ b/iree/compiler/Codegen/Sandbox/test/fusion_expert.mlir
@@ -7,7 +7,7 @@
%d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
%d1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
%init = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
- %0 = linalg.fill(%cst, %init) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
+ %0 = linalg.fill ins(%cst : f32) outs(%init : tensor<?x?xf32>) -> tensor<?x?xf32>
%1 = linalg.matmul {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[10, 20, 30]]>}
ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
@@ -40,7 +40,7 @@
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%init = linalg.init_tensor [20, 120] : tensor<20x120xf32>
- %0 = linalg.fill(%cst, %init) : f32, tensor<20x120xf32> -> tensor<20x120xf32>
+ %0 = linalg.fill ins(%cst : f32) outs(%init : tensor<20x120xf32>) -> tensor<20x120xf32>
%1 = linalg.matmul {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[10, 20, 30]]>}
ins(%arg0, %arg1 : tensor<20x60xf32>, tensor<60x120xf32>)
outs(%0 : tensor<20x120xf32>) -> tensor<20x120xf32>
diff --git a/iree/compiler/Codegen/Sandbox/test/outline_one_parent_loop.mlir b/iree/compiler/Codegen/Sandbox/test/outline_one_parent_loop.mlir
index f6b671d..f774ec1 100644
--- a/iree/compiler/Codegen/Sandbox/test/outline_one_parent_loop.mlir
+++ b/iree/compiler/Codegen/Sandbox/test/outline_one_parent_loop.mlir
@@ -34,7 +34,7 @@
%13 = affine.min affine_map<(d0, d1) -> (8, -d0 + d1)>(%arg7, %5)
%14 = scf.for %arg9 = %c0 to %c16 step %c8 iter_args(%arg10 = %arg8) -> (tensor<?x16xf32>) {
%15 = tensor.extract_slice %arg10[%arg7, %arg9] [%13, 8] [1, 1] : tensor<?x16xf32> to tensor<?x8xf32>
- %16 = linalg.fill(%cst, %15) : f32, tensor<?x8xf32> -> tensor<?x8xf32>
+ %16 = linalg.fill ins(%cst : f32) outs(%15 : tensor<?x8xf32>) -> tensor<?x8xf32>
%17 = tensor.insert_slice %16 into %arg10[%arg7, %arg9] [%13, 8] [1, 1] : tensor<?x8xf32> into tensor<?x16xf32>
scf.yield %17 : tensor<?x16xf32>
}
diff --git a/iree/compiler/Dialect/Flow/IR/test/dispatch_workgroups_folding.mlir b/iree/compiler/Dialect/Flow/IR/test/dispatch_workgroups_folding.mlir
index 17a7944..6f8e598 100644
--- a/iree/compiler/Dialect/Flow/IR/test/dispatch_workgroups_folding.mlir
+++ b/iree/compiler/Dialect/Flow/IR/test/dispatch_workgroups_folding.mlir
@@ -113,8 +113,8 @@
%0 = flow.dispatch.tensor.load %arg0, offsets=[0], sizes=[9], strides = [1] : !flow.dispatch.tensor<readonly:9xi32> -> tensor<9xi32>
%1 = flow.dispatch.tensor.load %arg1, offsets=[0], sizes=[9], strides = [1] : !flow.dispatch.tensor<readonly:9xi32> -> tensor<9xi32>
%2 = linalg.init_tensor [] : tensor<i32>
- %3 = linalg.fill(%c-2147483648_i32, %2) : i32, tensor<i32> -> tensor<i32>
- %4 = linalg.fill(%c0_i32, %2) : i32, tensor<i32> -> tensor<i32>
+ %3 = linalg.fill ins(%c-2147483648_i32 : i32) outs(%2 : tensor<i32>) -> tensor<i32>
+ %4 = linalg.fill ins(%c0_i32 : i32) outs(%2 : tensor<i32>) -> tensor<i32>
flow.dispatch.tensor.store %3, %arg2, offsets = [], sizes = [], strides = [] : tensor<i32> -> !flow.dispatch.tensor<writeonly:i32>
flow.dispatch.tensor.store %4, %arg3, offsets = [], sizes = [], strides = [] : tensor<i32> -> !flow.dispatch.tensor<writeonly:i32>
flow.return
@@ -137,14 +137,14 @@
// Used as a result; should remain after canonicalization.
%c-2147483648_i32 = arith.constant -2147483648 : i32
%ret0_init = linalg.init_tensor [] : tensor<i32>
- %ret0_value = linalg.fill(%c-2147483648_i32, %ret0_init) : i32, tensor<i32> -> tensor<i32>
+ %ret0_value = linalg.fill ins(%c-2147483648_i32 : i32) outs(%ret0_init : tensor<i32>) -> tensor<i32>
flow.dispatch.tensor.store %ret0_value, %ret0, offsets = [], sizes = [], strides = [] : tensor<i32> -> !flow.dispatch.tensor<writeonly:i32>
// Unused as a result; should be stripped entirely.
%c0_i32 = arith.constant 0 : i32
%ret1_shaped = flow.dispatch.tie_shape %ret1 : !flow.dispatch.tensor<writeonly:?xi32>{%dim}
%ret1_init = linalg.init_tensor [%dim] : tensor<?xi32>
- %ret1_value = linalg.fill(%c0_i32, %ret1_init) : i32, tensor<?xi32> -> tensor<?xi32>
+ %ret1_value = linalg.fill ins(%c0_i32 : i32) outs(%ret1_init : tensor<?xi32>) -> tensor<?xi32>
flow.dispatch.tensor.store %ret1_value, %ret1_shaped, offsets = [0], sizes = [%dim], strides = [1] : tensor<?xi32> -> !flow.dispatch.tensor<writeonly:?xi32>{%dim}
flow.return
}
@@ -167,8 +167,8 @@
%0 = flow.dispatch.tensor.load %arg0, offsets=[0], sizes=[9], strides = [1] : !flow.dispatch.tensor<readonly:9xi32> -> tensor<9xi32>
%1 = flow.dispatch.tensor.load %arg1, offsets=[0], sizes=[9], strides = [1] : !flow.dispatch.tensor<readonly:9xi32> -> tensor<9xi32>
%2 = linalg.init_tensor [] : tensor<i32>
- %3 = linalg.fill(%c-2147483648_i32, %2) : i32, tensor<i32> -> tensor<i32>
- %4 = linalg.fill(%c0_i32, %2) : i32, tensor<i32> -> tensor<i32>
+ %3 = linalg.fill ins(%c-2147483648_i32 : i32) outs(%2 : tensor<i32>) -> tensor<i32>
+ %4 = linalg.fill ins(%c0_i32 : i32) outs(%2 : tensor<i32>) -> tensor<i32>
flow.dispatch.tensor.store %3, %arg2, offsets = [], sizes = [], strides = [] : tensor<i32> -> !flow.dispatch.tensor<writeonly:i32>
flow.dispatch.tensor.store %4, %arg3, offsets = [], sizes = [], strides = [] : tensor<i32> -> !flow.dispatch.tensor<readwrite:i32>
flow.return
@@ -190,8 +190,8 @@
%val = tensor.extract %0[] : tensor<i32>
%1 = flow.dispatch.tensor.load %arg1, offsets=[0], sizes=[9], strides = [1] : !flow.dispatch.tensor<readonly:9xi32> -> tensor<9xi32>
%2 = linalg.init_tensor [] : tensor<i32>
- %3 = linalg.fill(%c-2147483648_i32, %2) : i32, tensor<i32> -> tensor<i32>
- %4 = linalg.fill(%val, %2) : i32, tensor<i32> -> tensor<i32>
+ %3 = linalg.fill ins(%c-2147483648_i32 : i32) outs(%2 : tensor<i32>) -> tensor<i32>
+ %4 = linalg.fill ins(%val : i32) outs(%2 : tensor<i32>) -> tensor<i32>
flow.dispatch.tensor.store %3, %arg2, offsets = [], sizes = [], strides = [] : tensor<i32> -> !flow.dispatch.tensor<writeonly:i32>
flow.dispatch.tensor.store %4, %arg3, offsets = [], sizes = [], strides = [] : tensor<i32> -> !flow.dispatch.tensor<readwrite:i32>
flow.return
diff --git a/iree/compiler/Dialect/Flow/Transforms/test/convert_linalg_tensor_ops_after.mlir b/iree/compiler/Dialect/Flow/Transforms/test/convert_linalg_tensor_ops_after.mlir
index 3d8ccde..4dda3a4 100644
--- a/iree/compiler/Dialect/Flow/Transforms/test/convert_linalg_tensor_ops_after.mlir
+++ b/iree/compiler/Dialect/Flow/Transforms/test/convert_linalg_tensor_ops_after.mlir
@@ -9,7 +9,7 @@
%3 = affine.apply affine_map<(d0)[s0, s1] -> (d0 + s0 + s1)>(%1)[%arg2, %arg4]
%4 = affine.apply affine_map<(d0)[s0, s1] -> (d0 + s0 + s1)>(%2)[%arg3, %arg5]
%5 = linalg.init_tensor [%3, %4] : tensor<?x?xf32>
- %6 = linalg.fill(%0, %5) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
+ %6 = linalg.fill ins(%0 : f32) outs(%5 : tensor<?x?xf32>) -> tensor<?x?xf32>
%7 = flow.tensor.update %arg0, %6[%arg2, %arg3] : tensor<?x?xf32>{%1, %2} -> %6 as tensor<?x?xf32>{%3, %4}
return %7 : tensor<?x?xf32>
}
diff --git a/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors.mlir b/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors.mlir
index 60ff4c6..0755c49 100644
--- a/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors.mlir
+++ b/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors.mlir
@@ -90,7 +90,7 @@
%M = tensor.dim %A, %c0 : tensor<?x?xf32>
%N = tensor.dim %B, %c1 : tensor<?x?xf32>
%0 = linalg.init_tensor [%M, %N] : tensor<?x?xf32>
- %1 = linalg.fill(%zero, %0) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
+ %1 = linalg.fill ins(%zero : f32) outs(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
%2 = linalg.matmul ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%1 : tensor<?x?xf32>) -> tensor<?x?xf32>
return %2 : tensor<?x?xf32>
@@ -115,7 +115,9 @@
// CHECK-DAG: %[[LHS:.+]] = flow.dispatch.tensor.load %[[ARG0_CAPTURE]], {{.*}} : !flow.dispatch.tensor<readonly:?x?xf32>{%[[ARG0_DIM0_CAPTURE]], %[[ARG0_DIM1_CAPTURE]]}
// CHECK-DAG: %[[RHS:.+]] = flow.dispatch.tensor.load %[[ARG1_CAPTURE]], {{.*}} : !flow.dispatch.tensor<readonly:?x?xf32>{%[[ARG1_DIM0_CAPTURE]], %[[ARG1_DIM1_CAPTURE]]}
// CHECK-DAG: %[[INIT:.+]] = linalg.init_tensor
-// CHECK: %[[FILL:.+]] = linalg.fill(%[[ZERO]], %[[INIT]])
+// CHECK: %[[FILL:.+]] = linalg.fill
+// CHECK-SAME: ins(%[[ZERO]] :
+// CHECK-SAME: outs(%[[INIT]] :
// CHECK: %[[RESULT:.+]] = linalg.matmul
// CHECK-SAME: ins(%[[LHS]], %[[RHS]] : tensor<?x?xf32>, tensor<?x?xf32>)
// CHECK-SAME: outs(%[[FILL]] : tensor<?x?xf32>)
@@ -133,7 +135,7 @@
%N = tensor.dim %B, %c1 : tensor<?x?xf32>
%K = tensor.dim %A, %c1 : tensor<?x?xf32>
%0 = linalg.init_tensor [%M, %N] : tensor<?x?xf32>
- %1 = linalg.fill(%zero, %0) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
+ %1 = linalg.fill ins(%zero : f32) outs(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
%2 = linalg.init_tensor [%M, %K] : tensor<?x?xf32>
%3 = linalg.generic
{indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
@@ -172,7 +174,9 @@
// CHECK: flow.dispatch.workgroups[%[[N]], %[[M]], %[[C1]]]
// CHECK: %[[ZERO:.+]] = arith.constant 0.0
// CHECK: %[[INIT:.+]] = linalg.init_tensor
-// CHECK: %[[FILL:.+]] = linalg.fill(%[[ZERO]], %[[INIT]])
+// CHECK: %[[FILL:.+]] = linalg.fill
+// CHECK-SAME: ins(%[[ZERO]] :
+// CHECK-SAME: outs(%[[INIT]] :
// CHECK: linalg.matmul
// CHECK: outs(%[[FILL]] : tensor<?x?xf32>)
@@ -230,13 +234,13 @@
%m = tensor.dim %0, %c0 : tensor<?x4xf32>
%n1 = tensor.dim %rhs1, %c1 : tensor<4x?xf32>
%init1 = linalg.init_tensor [%m, %n1] : tensor<?x?xf32>
- %fill1 = linalg.fill(%cst, %init1) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
+ %fill1 = linalg.fill ins(%cst : f32) outs(%init1 : tensor<?x?xf32>) -> tensor<?x?xf32>
%1 = linalg.matmul
ins(%0, %rhs1 : tensor<?x4xf32>, tensor<4x?xf32>)
outs(%fill1 : tensor<?x?xf32>) -> tensor<?x?xf32>
%n2 = tensor.dim %rhs2, %c1 : tensor<4x?xf32>
%init2 = linalg.init_tensor [%m, %n2] : tensor<?x?xf32>
- %fill2 = linalg.fill(%cst, %init2) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
+ %fill2 = linalg.fill ins(%cst : f32) outs(%init2 : tensor<?x?xf32>) -> tensor<?x?xf32>
%2= linalg.matmul
ins(%0, %rhs2 : tensor<?x4xf32>, tensor<4x?xf32>)
outs(%fill2 : tensor<?x?xf32>) -> tensor<?x?xf32>
@@ -275,7 +279,7 @@
%3 = affine.apply affine_map<(d0)[s0, s1] -> (d0 + s0 + s1)>(%1)[%arg2, %arg4]
%4 = affine.apply affine_map<(d0)[s0, s1] -> (d0 + s0 + s1)>(%2)[%arg3, %arg5]
%5 = linalg.init_tensor [%3, %4] : tensor<?x?xf32>
- %6 = linalg.fill(%0, %5) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
+ %6 = linalg.fill ins(%0 : f32) outs(%5 : tensor<?x?xf32>) -> tensor<?x?xf32>
%7 = flow.tensor.update %arg0, %6[%arg2, %arg3] : tensor<?x?xf32>{%1, %2} -> %6 as tensor<?x?xf32>{%3, %4}
return %7 : tensor<?x?xf32>
}
@@ -365,7 +369,7 @@
func @conv2d(%input: tensor<1x225x225x16xf32>, %filter: tensor<3x3x16x32xf32>) -> tensor<1x112x112x32xf32> {
%0 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
%cst = arith.constant 0.000000e+00 : f32
- %1 = linalg.fill(%cst, %0) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
+ %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%2 = linalg.conv_2d_nhwc_hwcf
{dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
ins(%input, %filter : tensor<1x225x225x16xf32>, tensor<3x3x16x32xf32>)
@@ -387,7 +391,7 @@
func @depthwise_conv2d(%input: tensor<1x113x113x96xf32>, %filter: tensor<3x3x96xf32>) -> tensor<1x56x56x96xf32> {
%cst = arith.constant 0.000000e+00 : f32
%1 = linalg.init_tensor [1, 56, 56, 96] : tensor<1x56x56x96xf32>
- %2 = linalg.fill(%cst, %1) : f32, tensor<1x56x56x96xf32> -> tensor<1x56x56x96xf32>
+ %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x56x56x96xf32>) -> tensor<1x56x56x96xf32>
%4 = linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%input, %filter : tensor<1x113x113x96xf32>, tensor<3x3x96xf32>) outs(%2 : tensor<1x56x56x96xf32>) -> tensor<1x56x56x96xf32>
return %4 : tensor<1x56x56x96xf32>
}
@@ -448,7 +452,7 @@
func @fuse_non_tiled_reduction_fill(%input1: tensor<1000xf32>, %input2: tensor<1000xf32>, %offset: tensor<f32>) -> tensor<f32> {
%zero = arith.constant 0.0 : f32
%init = linalg.init_tensor [] : tensor<f32>
- %fill = linalg.fill(%zero, %init) : f32, tensor<f32> -> tensor<f32>
+ %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<f32>) -> tensor<f32>
%reduce = linalg.generic {
indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>, affine_map<(d0) -> ()>],
iterator_types = ["reduction"]}
@@ -701,8 +705,8 @@
%c0 = arith.constant 0 : index
%0 = tensor.dim %arg0, %c0 : tensor<?x?xi32>
%1 = linalg.init_tensor [%0] : tensor<?xi32>
- %2 = linalg.fill(%cmin, %1) : i32, tensor<?xi32> -> tensor<?xi32>
- %3 = linalg.fill(%c0_i32, %1) : i32, tensor<?xi32> -> tensor<?xi32>
+ %2 = linalg.fill ins(%cmin : i32) outs(%1 : tensor<?xi32>) -> tensor<?xi32>
+ %3 = linalg.fill ins(%c0_i32 : i32) outs(%1 : tensor<?xi32>) -> tensor<?xi32>
%4:2 = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>,
affine_map<(d0, d1) -> (d1, d0)>,
@@ -788,7 +792,7 @@
%2 = tensor.dim %0, %c0 : tensor<?x?xf32>
%3 = tensor.dim %1, %c1 : tensor<?x?xf32>
%4 = linalg.init_tensor [%2, %3] : tensor<?x?xf32>
- %5 = linalg.fill(%cst, %4) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
+ %5 = linalg.fill ins(%cst : f32) outs(%4 : tensor<?x?xf32>) -> tensor<?x?xf32>
%6 = linalg.matmul ins(%0, %1 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%5 : tensor<?x?xf32>) -> tensor<?x?xf32>
%7 = tensor.dim %6, %c0 : tensor<?x?xf32>
%8 = tensor.dim %6, %c1 : tensor<?x?xf32>
@@ -927,7 +931,7 @@
func @pooling_nwhc_sum_static(%input: tensor<1x33x33x160xf32>) -> tensor<1x3x3x160xf32> {
%cst = arith.constant 0.0 : f32
%1 = linalg.init_tensor [1, 3, 3, 160] : tensor<1x3x3x160xf32>
- %2 = linalg.fill(%cst, %1) : f32, tensor<1x3x3x160xf32> -> tensor<1x3x3x160xf32>
+ %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x3x3x160xf32>) -> tensor<1x3x3x160xf32>
%3 = linalg.init_tensor [11, 11] : tensor<11x11xf32>
%4 = linalg.pooling_nhwc_sum {dilations = dense<1> : vector<2xi64>, strides = dense<11> : vector<2xi64>} ins(%input, %3 : tensor<1x33x33x160xf32>, tensor<11x11xf32>) outs(%2 : tensor<1x3x3x160xf32>) -> tensor<1x3x3x160xf32>
return %4 : tensor<1x3x3x160xf32>
diff --git a/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors_fusion.mlir b/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors_fusion.mlir
index 6da28d8..d24826e 100644
--- a/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors_fusion.mlir
+++ b/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors_fusion.mlir
@@ -3,7 +3,7 @@
func @fuse_conv2d_elementwise(%input: tensor<1x225x225x16xf32>, %filter: tensor<3x3x16x32xf32>, %offset: tensor<32xf32>) -> tensor<1x112x112x32xf32> {
%cst = arith.constant 0.000000e+00 : f32
%0 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
- %1 = linalg.fill(%cst, %0) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
+ %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%2 = linalg.conv_2d_nhwc_hwcf
{dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
ins(%input, %filter : tensor<1x225x225x16xf32>, tensor<3x3x16x32xf32>)
@@ -33,7 +33,8 @@
// CHECK: flow.dispatch.workgroups
// CHECK: %[[INIT:.+]] = linalg.init_tensor
-// CHECK: %[[FILL:.+]] = linalg.fill(%{{.+}}, %[[INIT]])
+// CHECK: %[[FILL:.+]] = linalg.fill
+// CHECK-SAME: outs(%[[INIT]] :
// CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf
// CHECK-SAME: outs(%[[FILL]] :
// CHECK: linalg.generic
@@ -46,7 +47,7 @@
-> (tensor<1x112x112x32xf32>, tensor<1x112x112x32xf32>) {
%cst = arith.constant 0.000000e+00 : f32
%0 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
- %1 = linalg.fill(%cst, %0) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
+ %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%2 = linalg.conv_2d_nhwc_hwcf
{dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
ins(%input, %filter : tensor<1x225x225x16xf32>, tensor<3x3x16x32xf32>)
@@ -80,7 +81,7 @@
func @dont_fuse_conv2d_with_non_identity_map(%input: tensor<1x225x225x16xf32>, %filter: tensor<3x3x16x32xf32>, %offset: tensor<32xf32>) -> tensor<1x112x112x32xf32> {
%cst = arith.constant 0.000000e+00 : f32
%0 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
- %1 = linalg.fill(%cst, %0) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
+ %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
%2 = linalg.conv_2d_nhwc_hwcf
{dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
ins(%input, %filter : tensor<1x225x225x16xf32>, tensor<3x3x16x32xf32>)
diff --git a/iree/compiler/Dialect/Flow/Transforms/test/infer_numeric_narrowing.mlir b/iree/compiler/Dialect/Flow/Transforms/test/infer_numeric_narrowing.mlir
index ac21741..ef440ec 100644
--- a/iree/compiler/Dialect/Flow/Transforms/test/infer_numeric_narrowing.mlir
+++ b/iree/compiler/Dialect/Flow/Transforms/test/infer_numeric_narrowing.mlir
@@ -18,7 +18,7 @@
[[3.900000e+01], [0.000000e+00], [1.270000e+02]]> : tensor<3x1xf32>
%init_value = arith.constant 0.000000e+00 : f32
%0 = linalg.init_tensor [5, 1] : tensor<5x1xf32>
- %1 = linalg.fill(%init_value, %0) : f32, tensor<5x1xf32> -> tensor<5x1xf32>
+ %1 = linalg.fill ins(%init_value : f32) outs(%0 : tensor<5x1xf32>) -> tensor<5x1xf32>
%2 = linalg.matmul ins(%arg0, %rhs : tensor<5x3xf32>, tensor<3x1xf32>) outs(%1 : tensor<5x1xf32>) -> tensor<5x1xf32>
return %2 : tensor<5x1xf32>
}
@@ -30,7 +30,7 @@
[[-3.900000e+01], [0.000000e+00], [1.270000e+02]]> : tensor<3x1xf32>
%init_value = arith.constant 0.000000e+00 : f32
%0 = linalg.init_tensor [5, 1] : tensor<5x1xf32>
- %1 = linalg.fill(%init_value, %0) : f32, tensor<5x1xf32> -> tensor<5x1xf32>
+ %1 = linalg.fill ins(%init_value : f32) outs(%0 : tensor<5x1xf32>) -> tensor<5x1xf32>
%2 = linalg.matmul ins(%arg0, %rhs : tensor<5x3xf32>, tensor<3x1xf32>) outs(%1 : tensor<5x1xf32>) -> tensor<5x1xf32>
return %2 : tensor<5x1xf32>
}
@@ -43,7 +43,7 @@
[[0.000000e+00], [0.000000e+00], [-1.000000e+00]]> : tensor<3x1xf32>
%init_value = arith.constant 0.000000e+00 : f32
%0 = linalg.init_tensor [5, 1] : tensor<5x1xf32>
- %1 = linalg.fill(%init_value, %0) : f32, tensor<5x1xf32> -> tensor<5x1xf32>
+ %1 = linalg.fill ins(%init_value : f32) outs(%0 : tensor<5x1xf32>) -> tensor<5x1xf32>
%2 = linalg.matmul ins(%arg0, %rhs : tensor<5x3xf32>, tensor<3x1xf32>) outs(%1 : tensor<5x1xf32>) -> tensor<5x1xf32>
return %2 : tensor<5x1xf32>
}
@@ -56,7 +56,7 @@
[[1.000000e+00], [1.000000e+00], [2.000000e+00]]> : tensor<3x1xf32>
%init_value = arith.constant 0.000000e+00 : f32
%0 = linalg.init_tensor [5, 1] : tensor<5x1xf32>
- %1 = linalg.fill(%init_value, %0) : f32, tensor<5x1xf32> -> tensor<5x1xf32>
+ %1 = linalg.fill ins(%init_value : f32) outs(%0 : tensor<5x1xf32>) -> tensor<5x1xf32>
%2 = linalg.matmul ins(%arg0, %rhs : tensor<5x3xf32>, tensor<3x1xf32>) outs(%1 : tensor<5x1xf32>) -> tensor<5x1xf32>
return %2 : tensor<5x1xf32>
}
@@ -69,7 +69,7 @@
[[-1.000000e+00], [-1.000000e+00], [-2.000000e+00]]> : tensor<3x1xf32>
%init_value = arith.constant 0.000000e+00 : f32
%0 = linalg.init_tensor [5, 1] : tensor<5x1xf32>
- %1 = linalg.fill(%init_value, %0) : f32, tensor<5x1xf32> -> tensor<5x1xf32>
+ %1 = linalg.fill ins(%init_value : f32) outs(%0 : tensor<5x1xf32>) -> tensor<5x1xf32>
%2 = linalg.matmul ins(%arg0, %rhs : tensor<5x3xf32>, tensor<3x1xf32>) outs(%1 : tensor<5x1xf32>) -> tensor<5x1xf32>
return %2 : tensor<5x1xf32>
}
diff --git a/iree/compiler/Dialect/Flow/Transforms/test/matmul_to_mmt4d.mlir b/iree/compiler/Dialect/Flow/Transforms/test/matmul_to_mmt4d.mlir
index 39a5eb8..fa5b8ea 100644
--- a/iree/compiler/Dialect/Flow/Transforms/test/matmul_to_mmt4d.mlir
+++ b/iree/compiler/Dialect/Flow/Transforms/test/matmul_to_mmt4d.mlir
@@ -71,7 +71,7 @@
func @check_mmt4d_with_init_tensor_and_fill(%arg0: tensor<24x8xf32>, %arg1: tensor<8x32xf32>) -> tensor<24x32xf32> {
%c0 = arith.constant 0.0 : f32
%0 = linalg.init_tensor [24, 32] : tensor<24x32xf32>
- %1 = linalg.fill(%c0, %0) : f32, tensor<24x32xf32> -> tensor<24x32xf32>
+ %1 = linalg.fill ins(%c0 : f32) outs(%0 : tensor<24x32xf32>) -> tensor<24x32xf32>
%2 = linalg.matmul ins(%arg0, %arg1 : tensor<24x8xf32>, tensor<8x32xf32>) outs(%1 : tensor<24x32xf32>) -> tensor<24x32xf32>
return %2 : tensor<24x32xf32>
}
@@ -85,7 +85,8 @@
// CHECK: %[[RHS4D:.+]] = tensor.expand_shape %[[RHS]]
// CHECK-SAME: tensor<8x32xf32> into tensor<4x2x8x4xf32>
// CHECK: %[[DST_INIT:.+]] = linalg.init_tensor [3, 8, 8, 4] : tensor<3x8x8x4xf32>
-// CHECK: [[DST:.+]] linalg.fill(%[[ZERO:.+]], %[[DST_INIT]])
+// CHECK: [[DST:.+]] linalg.fill
+// CHECK-SAME: outs(%[[DST_INIT]] :
// -----
func @check_mmt4d_i8_static_pad(%arg0: tensor<3x5xi8>, %arg1: tensor<5x2xi8>, %arg2: tensor<3x2xi32>) -> tensor<3x2xi32> {
diff --git a/iree/compiler/Dialect/Flow/Transforms/test/optimize_numerics.mlir b/iree/compiler/Dialect/Flow/Transforms/test/optimize_numerics.mlir
index a39ebe0..ae14cd0 100644
--- a/iree/compiler/Dialect/Flow/Transforms/test/optimize_numerics.mlir
+++ b/iree/compiler/Dialect/Flow/Transforms/test/optimize_numerics.mlir
@@ -60,9 +60,9 @@
func @cast_fill(%arg0 : f32, %arg1 : tensor<3xf32>) -> tensor<3xi8> {
// CHECK: %[[SCALAR:.*]] = arith.fptosi %arg0 : f32 to i8
// CHECK: %[[INIT:.*]] = arith.fptosi %arg1 : tensor<3xf32> to tensor<3xi8>
- // CHECK: %[[RESULT:.*]] = linalg.fill(%[[SCALAR]], %[[INIT]]) : i8, tensor<3xi8> -> tensor<3xi8>
+ // CHECK: %[[RESULT:.*]] = linalg.fill ins(%[[SCALAR]] : i8) outs(%[[INIT]] : tensor<3xi8>) -> tensor<3xi8>
// CHECK: return %[[RESULT]]
- %0 = linalg.fill(%arg0, %arg1) : f32, tensor<3xf32> -> tensor<3xf32>
+ %0 = linalg.fill ins(%arg0 : f32) outs(%arg1 : tensor<3xf32>) -> tensor<3xf32>
%1 = arith.fptosi %0 : tensor<3xf32> to tensor<3xi8>
return %1 : tensor<3xi8>
}
diff --git a/iree/compiler/Dialect/Flow/Transforms/test/pad_tensor_to_tensor.mlir b/iree/compiler/Dialect/Flow/Transforms/test/pad_tensor_to_tensor.mlir
index 316023c..b3cf275 100644
--- a/iree/compiler/Dialect/Flow/Transforms/test/pad_tensor_to_tensor.mlir
+++ b/iree/compiler/Dialect/Flow/Transforms/test/pad_tensor_to_tensor.mlir
@@ -28,7 +28,9 @@
// CHECK-DAG: %[[RD0:.+]] = affine.apply #[[MAP0]]()[%[[ARG3]], %[[D0]]]
// CHECK-DAG: %[[RD1:.+]] = affine.apply #[[MAP1]]()[%[[ARG2]], %[[D1]]]
// CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[RD0]], %[[RD1]]]
-// CHECK: %[[FILL:.+]] = linalg.fill(%[[VAL]], %[[INIT]])
+// CHECK: %[[FILL:.+]] = linalg.fill
+// CHECK-SAME: ins(%[[VAL]] :
+// CHECK-SAME: outs(%[[INIT]] :
// CHECK: %[[RESULT:.+]] = tensor.insert_slice %[[ARG0]] into %[[FILL]][4, %[[ARG2]]] [%[[D0]], %[[D1]]] [1, 1]
// CHECK: return %[[RESULT]]
@@ -53,6 +55,8 @@
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<f32>
// CHECK-DAG: %[[VAL:.+]] = tensor.extract %[[ARG1]]
// CHECK: %[[INIT:.+]] = linalg.init_tensor [18, 12]
-// CHECK: %[[FILL:.+]] = linalg.fill(%[[VAL]], %[[INIT]])
+// CHECK: %[[FILL:.+]] = linalg.fill
+// CHECK-SAME: ins(%[[VAL]] :
+// CHECK-SAME: outs(%[[INIT]] :
// CHECK: %[[RESULT:.+]] = tensor.insert_slice %[[ARG0]] into %[[FILL]][4, 5] [12, 4] [1, 1]
// CHECK: return %[[RESULT]]
diff --git a/iree/compiler/Dialect/Stream/Builtins/fill_i64.mlir b/iree/compiler/Dialect/Stream/Builtins/fill_i64.mlir
index 6f417c8..a147963 100644
--- a/iree/compiler/Dialect/Stream/Builtins/fill_i64.mlir
+++ b/iree/compiler/Dialect/Stream/Builtins/fill_i64.mlir
@@ -20,7 +20,7 @@
scf.for %i = %1 to %count step %2 {
%3 = affine.min affine_map<(d0)[s0, s1] -> (s1, -d0 + s0)>(%i)[%count, %workgroup_size_0]
%4 = linalg.init_tensor [%3] : tensor<?xi64>
- %5 = linalg.fill(%value, %4) : i64, tensor<?xi64> -> tensor<?xi64>
+ %5 = linalg.fill ins(%value : i64) outs(%4 : tensor<?xi64>) -> tensor<?xi64>
flow.dispatch.tensor.store %5, %out, offsets = [%i], sizes = [%3], strides = [1] : tensor<?xi64> -> !flow.dispatch.tensor<writeonly:?xi64>{%count}
}
return
diff --git a/iree/compiler/Dialect/Stream/Builtins/splat_i64.mlir b/iree/compiler/Dialect/Stream/Builtins/splat_i64.mlir
index 4daad1e..a701eae 100644
--- a/iree/compiler/Dialect/Stream/Builtins/splat_i64.mlir
+++ b/iree/compiler/Dialect/Stream/Builtins/splat_i64.mlir
@@ -21,7 +21,7 @@
scf.for %i = %1 to %count step %2 {
%3 = affine.min affine_map<(d0)[s0, s1] -> (s1, -d0 + s0)>(%i)[%count, %workgroup_size_0]
%4 = linalg.init_tensor [%3] : tensor<?xi64>
- %5 = linalg.fill(%value, %4) : i64, tensor<?xi64> -> tensor<?xi64>
+ %5 = linalg.fill ins(%value : i64) outs(%4 : tensor<?xi64>) -> tensor<?xi64>
flow.dispatch.tensor.store %5, %out, offsets = [%i], sizes = [%3], strides = [1] : tensor<?xi64> -> !flow.dispatch.tensor<writeonly:?xi64>{%count}
}
return
diff --git a/iree/compiler/InputConversion/Common/test/linalg_quantized_matmul_to_matmul.mlir b/iree/compiler/InputConversion/Common/test/linalg_quantized_matmul_to_matmul.mlir
index 1ef6e39..67a0c07 100644
--- a/iree/compiler/InputConversion/Common/test/linalg_quantized_matmul_to_matmul.mlir
+++ b/iree/compiler/InputConversion/Common/test/linalg_quantized_matmul_to_matmul.mlir
@@ -30,7 +30,9 @@
// CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%[[LHS]], %[[RHS]] : tensor<?x?xi8>, tensor<?x?xi8>) outs(%[[ACC]] : tensor<?x?xi32>)
// CHECK-DAG: %[[INIT_RESULT:.+]] = linalg.init_tensor
// CHECK-DAG: %[[INIT_LHS_SUMS_ACC:.+]] = linalg.init_tensor
-// CHECK: %[[ZERO_LHS_SUMS_ACC:.+]] = linalg.fill(%[[C0_I32]], %[[INIT_LHS_SUMS_ACC]])
+// CHECK: %[[ZERO_LHS_SUMS_ACC:.+]] = linalg.fill
+// CHECK-SAME: ins(%[[C0_I32]] :
+// CHECK-SAME: outs(%[[INIT_LHS_SUMS_ACC]] :
// CHECK: %[[LHS_SUMS:.+]] = linalg.generic
// CHECK-SAME: "parallel", "reduction"
// CHECK-SAME: ins(%[[LHS]] : tensor<?x?xi8>)
@@ -55,7 +57,9 @@
// CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%[[LHS]], %[[RHS]] : tensor<?x?xi8>, tensor<?x?xi8>) outs(%[[ACC]] : tensor<?x?xi32>)
// CHECK-DAG: %[[INIT_RESULT:.+]] = linalg.init_tensor
// CHECK-DAG: %[[INIT_RHS_SUMS_ACC:.+]] = linalg.init_tensor
-// CHECK: %[[ZERO_RHS_SUMS_ACC:.+]] = linalg.fill(%[[C0_I32]], %[[INIT_RHS_SUMS_ACC]])
+// CHECK: %[[ZERO_RHS_SUMS_ACC:.+]] = linalg.fill
+// CHECK-SAME: ins(%[[C0_I32]] :
+// CHECK-SAME: outs(%[[INIT_RHS_SUMS_ACC]] :
// CHECK: %[[RHS_SUMS:.+]] = linalg.generic
// CHECK-SAME: "reduction", "parallel"
// CHECK-SAME: ins(%[[RHS]] : tensor<?x?xi8>)
@@ -80,13 +84,17 @@
// CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%[[LHS]], %[[RHS]] : tensor<?x?xi8>, tensor<?x?xi8>) outs(%[[ACC]] : tensor<?x?xi32>)
// CHECK-DAG: %[[INIT_RESULT:.+]] = linalg.init_tensor
// CHECK-DAG: %[[INIT_LHS_SUMS_ACC:.+]] = linalg.init_tensor
-// CHECK: %[[ZERO_LHS_SUMS_ACC:.+]] = linalg.fill(%[[C0_I32]], %[[INIT_LHS_SUMS_ACC]])
+// CHECK: %[[ZERO_LHS_SUMS_ACC:.+]] = linalg.fill
+// CHECK-SAME: ins(%[[C0_I32]] :
+// CHECK-SAME: outs(%[[INIT_LHS_SUMS_ACC]] :
// CHECK: %[[LHS_SUMS:.+]] = linalg.generic
// CHECK-SAME: "parallel", "reduction"
// CHECK-SAME: ins(%[[LHS]] : tensor<?x?xi8>)
// CHECK-SAME: outs(%[[ZERO_LHS_SUMS_ACC]] : tensor<?xi32>)
// CHECK: %[[INIT_RHS_SUMS_ACC:.+]] = linalg.init_tensor
-// CHECK: %[[ZERO_RHS_SUMS_ACC:.+]] = linalg.fill(%[[C0_I32]], %[[INIT_RHS_SUMS_ACC]])
+// CHECK: %[[ZERO_RHS_SUMS_ACC:.+]] = linalg.fill
+// CHECK-SAME: ins(%[[C0_I32]] :
+// CHECK-SAME: outs(%[[INIT_RHS_SUMS_ACC]] :
// CHECK: %[[RHS_SUMS:.+]] = linalg.generic
// CHECK-SAME: "reduction", "parallel"
// CHECK-SAME: ins(%[[RHS]] : tensor<?x?xi8>)
@@ -115,13 +123,17 @@
// CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%[[LHS]], %[[RHS]] : tensor<3x4xi8>, tensor<4x5xi8>) outs(%[[ACC]] : tensor<3x5xi32>)
// CHECK-DAG: %[[INIT_RESULT:.+]] = linalg.init_tensor
// CHECK-DAG: %[[INIT_LHS_SUMS_ACC:.+]] = linalg.init_tensor
-// CHECK: %[[ZERO_LHS_SUMS_ACC:.+]] = linalg.fill(%[[C0_I32]], %[[INIT_LHS_SUMS_ACC]])
+// CHECK: %[[ZERO_LHS_SUMS_ACC:.+]] = linalg.fill
+// CHECK-SAME: ins(%[[C0_I32]] :
+// CHECK-SAME: outs(%[[INIT_LHS_SUMS_ACC]] :
// CHECK: %[[LHS_SUMS:.+]] = linalg.generic
// CHECK-SAME: "parallel", "reduction"
// CHECK-SAME: ins(%[[LHS]] : tensor<3x4xi8>)
// CHECK-SAME: outs(%[[ZERO_LHS_SUMS_ACC]] : tensor<3xi32>)
// CHECK: %[[INIT_RHS_SUMS_ACC:.+]] = linalg.init_tensor
-// CHECK: %[[ZERO_RHS_SUMS_ACC:.+]] = linalg.fill(%[[C0_I32]], %[[INIT_RHS_SUMS_ACC]])
+// CHECK: %[[ZERO_RHS_SUMS_ACC:.+]] = linalg.fill
+// CHECK-SAME: ins(%[[C0_I32]] :
+// CHECK-SAME: outs(%[[INIT_RHS_SUMS_ACC]] :
// CHECK: %[[RHS_SUMS:.+]] = linalg.generic
// CHECK-SAME: "reduction", "parallel"
// CHECK-SAME: ins(%[[RHS]] : tensor<4x5xi8>)
diff --git a/iree/compiler/InputConversion/MHLO/BUILD b/iree/compiler/InputConversion/MHLO/BUILD
index 00b5217..e8093bc 100644
--- a/iree/compiler/InputConversion/MHLO/BUILD
+++ b/iree/compiler/InputConversion/MHLO/BUILD
@@ -93,6 +93,10 @@
"@llvm-project//mlir:Transforms",
"@mlir-hlo//:chlo_legalize_to_hlo",
"@mlir-hlo//:hlo",
+ "@mlir-hlo//:hlo_legalize_shape_ops_to_standard",
+ "@mlir-hlo//:hlo_legalize_to_arithmetic",
+ "@mlir-hlo//:hlo_legalize_to_lhlo",
+ "@mlir-hlo//:hlo_legalize_to_memref",
"@mlir-hlo//:legalize_control_flow",
"@mlir-hlo//:legalize_einsum_to_dot_general",
"@mlir-hlo//:legalize_gather_to_torch_index_select",
diff --git a/iree/compiler/InputConversion/MHLO/CMakeLists.txt b/iree/compiler/InputConversion/MHLO/CMakeLists.txt
index ccd8640..c153e45 100644
--- a/iree/compiler/InputConversion/MHLO/CMakeLists.txt
+++ b/iree/compiler/InputConversion/MHLO/CMakeLists.txt
@@ -83,7 +83,11 @@
MLIRTransforms
MhloDialect
MhloPasses
+ MhloShapeOpsToStandard
+ MhloToArithmeticConversion
+ MhloToLhloConversion
MhloToLinalg
+ MhloToMemrefConversion
MhloToStandard
iree::compiler::Dialect::Flow::IR
iree::compiler::Dialect::Flow::Transforms
diff --git a/iree/compiler/InputConversion/MHLO/MHLOToLinalgOnTensors.cpp b/iree/compiler/InputConversion/MHLO/MHLOToLinalgOnTensors.cpp
index 99fab42..ebc4504 100644
--- a/iree/compiler/InputConversion/MHLO/MHLOToLinalgOnTensors.cpp
+++ b/iree/compiler/InputConversion/MHLO/MHLOToLinalgOnTensors.cpp
@@ -84,7 +84,7 @@
rewriter.createOrFold<arith::AddIOp>(loc, resultDimSize, size);
}
sizes[dim] = resultDimSize;
- auto initTensor = rewriter.create<linalg::InitTensorOp>(
+ Value initTensor = rewriter.create<linalg::InitTensorOp>(
loc, resultType.getShape(), resultType.getElementType());
auto zeroAttr = rewriter.getZeroAttr(resultType.getElementType());
Value zero = rewriter.create<arith::ConstantOp>(loc, zeroAttr);
@@ -146,7 +146,7 @@
Value rhs) {
Value zero = b.create<arith::ConstantOp>(
loc, b.getZeroAttr(resultType.getElementType()));
- auto initTensor = b.create<linalg::InitTensorOp>(
+ Value initTensor = b.create<linalg::InitTensorOp>(
loc, /*dyn_size=*/ValueRange{}, resultType.getShape(),
resultType.getElementType());
Value zeroTensor =
diff --git a/iree/compiler/InputConversion/MHLO/test/fft.mlir b/iree/compiler/InputConversion/MHLO/test/fft.mlir
index b5ec7b6..367269b 100644
--- a/iree/compiler/InputConversion/MHLO/test/fft.mlir
+++ b/iree/compiler/InputConversion/MHLO/test/fft.mlir
@@ -14,12 +14,16 @@
// CHECK-DAG: %[[ImagMatrix:.+]] = arith.constant dense<"0x00000080{{.*}}"> : tensor<32x17xf32>
// CHECK-DAG: %[[Zero:.+]] = arith.constant 0.000000e+00 : f32
// CHECK: %[[RealInit:.+]] = linalg.init_tensor [17] : tensor<17xf32>
-// CHECK: %[[RealFill:.+]] = linalg.fill(%[[Zero]], %[[RealInit]])
+// CHECK: %[[RealFill:.+]] = linalg.fill
+// CHECK-SAME: ins(%[[Zero]] :
+// CHECK-SAME: outs(%[[RealInit]] :
// CHECK: %[[RealRes:.+]] = linalg.vecmat
// CHECK-SAME: ins(%[[Arg0]], %[[RealMatrix]] : tensor<32xf32>, tensor<32x17xf32>)
// CHECK-SAME: outs(%[[RealFill]] : tensor<17xf32>) -> tensor<17xf32>
// CHECK: %[[ImagInit:.+]] = linalg.init_tensor [17] : tensor<17xf32>
-// CHECK: %[[ImagFill:.+]] = linalg.fill(%[[Zero]], %[[ImagInit]])
+// CHECK: %[[ImagFill:.+]] = linalg.fill
+// CHECK-SAME: ins(%[[Zero]] :
+// CHECK-SAME: outs(%[[ImagInit]] :
// CHECK: %[[ImagRes:.+]] = linalg.vecmat
// CHECK-SAME: ins(%[[Arg0]], %[[ImagMatrix]] : tensor<32xf32>, tensor<32x17xf32>)
// CHECK-SAME: outs(%[[ImagFill]] : tensor<17xf32>) -> tensor<17xf32>
@@ -41,12 +45,16 @@
// CHECK-DAG: %[[ImagMatrix:.+]] = arith.constant dense<"0x00000080{{.*}}"> : tensor<32x17xf32>
// CHECK-DAG: %[[Zero:.+]] = arith.constant 0.000000e+00 : f32
// CHECK: %[[RealInit:.+]] = linalg.init_tensor [1, 17] : tensor<1x17xf32>
-// CHECK: %[[RealFill:.+]] = linalg.fill(%[[Zero]], %[[RealInit]])
+// CHECK: %[[RealFill:.+]] = linalg.fill
+// CHECK-SAME: ins(%[[Zero]] :
+// CHECK-SAME: outs(%[[RealInit]] :
// CHECK: %[[RealRes:.+]] = linalg.matmul
// CHECK-SAME: ins(%[[Arg0]], %[[RealMatrix]] : tensor<1x32xf32>, tensor<32x17xf32>)
// CHECK-SAME: outs(%[[RealFill]] : tensor<1x17xf32>) -> tensor<1x17xf32>
// CHECK: %[[ImagInit:.+]] = linalg.init_tensor [1, 17] : tensor<1x17xf32>
-// CHECK: %[[ImagFill:.+]] = linalg.fill(%[[Zero]], %[[ImagInit]])
+// CHECK: %[[ImagFill:.+]] = linalg.fill
+// CHECK-SAME: ins(%[[Zero]] :
+// CHECK-SAME: outs(%[[ImagInit]] :
// CHECK: %[[ImagRes:.+]] = linalg.matmul
// CHECK-SAME: ins(%[[Arg0]], %[[ImagMatrix]] : tensor<1x32xf32>, tensor<32x17xf32>)
// CHECK-SAME: outs(%[[ImagFill]] : tensor<1x17xf32>) -> tensor<1x17xf32>
diff --git a/iree/test/e2e/regression/fill_i64.mlir b/iree/test/e2e/regression/fill_i64.mlir
index 3bd8565..1bc2d21 100644
--- a/iree/test/e2e/regression/fill_i64.mlir
+++ b/iree/test/e2e/regression/fill_i64.mlir
@@ -10,12 +10,12 @@
%cv0 = arith.constant -1 : i64
%v0_init = linalg.init_tensor [%0, %1] : tensor<?x?xi64>
- %v0 = linalg.fill(%cv0, %v0_init) : i64, tensor<?x?xi64> -> tensor<?x?xi64>
+ %v0 = linalg.fill ins(%cv0 : i64) outs(%v0_init : tensor<?x?xi64>) -> tensor<?x?xi64>
// CHECK: 2x3xi64=[-1 -1 -1][-1 -1 -1]
%cv1 = arith.constant 9223372036854775807 : i64
%v1_init = linalg.init_tensor [%0, %1] : tensor<?x?xi64>
- %v1 = linalg.fill(%cv1, %v1_init) : i64, tensor<?x?xi64> -> tensor<?x?xi64>
+ %v1 = linalg.fill ins(%cv1 : i64) outs(%v1_init : tensor<?x?xi64>) -> tensor<?x?xi64>
// CHECK: 2x3xi64=[9223372036854775807 9223372036854775807 9223372036854775807][9223372036854775807 9223372036854775807 9223372036854775807]
return %v0, %v1 : tensor<?x?xi64>, tensor<?x?xi64>
diff --git a/iree/test/e2e/regression/linalg_ops.mlir b/iree/test/e2e/regression/linalg_ops.mlir
index 98173af..c9ec6e1 100644
--- a/iree/test/e2e/regression/linalg_ops.mlir
+++ b/iree/test/e2e/regression/linalg_ops.mlir
@@ -38,7 +38,7 @@
%bias = util.unfoldable_constant dense<1.0> : tensor<16xf32>
%init = linalg.init_tensor [1, 112, 112, 16] : tensor<1x112x112x16xf32>
%cst = arith.constant 0.0 : f32
- %fill = linalg.fill(%cst, %init) : f32, tensor<1x112x112x16xf32> -> tensor<1x112x112x16xf32>
+ %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x112x112x16xf32>) -> tensor<1x112x112x16xf32>
%conv = linalg.conv_2d_nhwc_hwcf
{dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
ins(%input, %filter : tensor<1x225x225x3xf32>, tensor<3x3x3x16xf32>)
diff --git a/iree/test/e2e/regression/linalg_quantized_matmul_vs_linalg_matmul.mlir b/iree/test/e2e/regression/linalg_quantized_matmul_vs_linalg_matmul.mlir
index 3378d41..58dfe83 100644
--- a/iree/test/e2e/regression/linalg_quantized_matmul_vs_linalg_matmul.mlir
+++ b/iree/test/e2e/regression/linalg_quantized_matmul_vs_linalg_matmul.mlir
@@ -21,7 +21,7 @@
// compute the sums along rows of %lhs.
%lhs_i32 = arith.extsi %lhs : tensor<3x4xi8> to tensor<3x4xi32>
%init_lhs_sums_uninitialized = linalg.init_tensor [3] : tensor<3xi32>
- %zero_lhs_sums = linalg.fill(%c_0, %init_lhs_sums_uninitialized) : i32, tensor<3xi32> -> tensor<3xi32>
+ %zero_lhs_sums = linalg.fill ins(%c_0 : i32) outs(%init_lhs_sums_uninitialized : tensor<3xi32>) -> tensor<3xi32>
%lhs_sums = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> (d0)>],
@@ -36,7 +36,7 @@
// compute the sums along columns of %rhs.
%rhs_i32 = arith.extsi %rhs : tensor<4x5xi8> to tensor<4x5xi32>
%init_rhs_sums_uninitialized = linalg.init_tensor [5] : tensor<5xi32>
- %zero_rhs_sums = linalg.fill(%c_0, %init_rhs_sums_uninitialized) : i32, tensor<5xi32> -> tensor<5xi32>
+ %zero_rhs_sums = linalg.fill ins(%c_0 : i32) outs(%init_rhs_sums_uninitialized : tensor<5xi32>) -> tensor<5xi32>
%rhs_sums = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> (d1)>],
@@ -93,7 +93,7 @@
// compute the sums along rows of %lhs.
%lhs_i32 = arith.extsi %lhs : tensor<?x?xi8> to tensor<?x?xi32>
%init_lhs_sums_uninitialized = linalg.init_tensor [%m_size] : tensor<?xi32>
- %zero_lhs_sums = linalg.fill(%c_0, %init_lhs_sums_uninitialized) : i32, tensor<?xi32> -> tensor<?xi32>
+ %zero_lhs_sums = linalg.fill ins(%c_0 : i32) outs(%init_lhs_sums_uninitialized : tensor<?xi32>) -> tensor<?xi32>
%lhs_sums = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> (d0)>],
@@ -108,7 +108,7 @@
// compute the sums along columns of %rhs.
%rhs_i32 = arith.extsi %rhs : tensor<?x?xi8> to tensor<?x?xi32>
%init_rhs_sums_uninitialized = linalg.init_tensor [%n_size] : tensor<?xi32>
- %zero_rhs_sums = linalg.fill(%c_0, %init_rhs_sums_uninitialized) : i32, tensor<?xi32> -> tensor<?xi32>
+ %zero_rhs_sums = linalg.fill ins(%c_0 : i32) outs(%init_rhs_sums_uninitialized : tensor<?xi32>) -> tensor<?xi32>
%rhs_sums = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> (d1)>],
@@ -192,7 +192,7 @@
%c_plus127 = arith.constant 127 : i32
%init_acc_uninitialized = linalg.init_tensor [3, 5] : tensor<3x5xi32>
- %zero_acc = linalg.fill(%c_0, %init_acc_uninitialized) : i32, tensor<3x5xi32> -> tensor<3x5xi32>
+ %zero_acc = linalg.fill ins(%c_0 : i32) outs(%init_acc_uninitialized : tensor<3x5xi32>) -> tensor<3x5xi32>
// Test special case: both zero points are 0
call @check_one_quantized_matmul_as_matmul_3x4x5(%lhs_3x4_1, %rhs_4x5_1, %c_0, %c_0, %zero_acc) : (tensor<3x4xi8>, tensor<4x5xi8>, i32, i32, tensor<3x5xi32>) -> ()
// Test special cases: one of the zero points is 0
diff --git a/third_party/llvm-project b/third_party/llvm-project
index c38fadb..8361c5d 160000
--- a/third_party/llvm-project
+++ b/third_party/llvm-project
@@ -1 +1 @@
-Subproject commit c38fadb7f97e8aca077df9c2a01ec43f8cb1f805
+Subproject commit 8361c5da30588d3d4a48eae648f53be1feb5cfad
diff --git a/third_party/mlir-hlo b/third_party/mlir-hlo
index f52e7dc..7727bff 160000
--- a/third_party/mlir-hlo
+++ b/third_party/mlir-hlo
@@ -1 +1 @@
-Subproject commit f52e7dc1210dead4d3ff416696cad1c794332959
+Subproject commit 7727bfff1a219c9cd60087a1ae0a4b7e52916f57