[NFC] Remove trailing whitespaces from *.mlir files. (#15757)
diff --git a/compiler/plugins/input/StableHLO/stablehlo-iree/Conversion/Preprocessing/test/canonicalization.mlir b/compiler/plugins/input/StableHLO/stablehlo-iree/Conversion/Preprocessing/test/canonicalization.mlir
index af6df8e..df6839f 100644
--- a/compiler/plugins/input/StableHLO/stablehlo-iree/Conversion/Preprocessing/test/canonicalization.mlir
+++ b/compiler/plugins/input/StableHLO/stablehlo-iree/Conversion/Preprocessing/test/canonicalization.mlir
@@ -655,7 +655,7 @@
func.return %0 : tensor<5x0xi32>
}
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<5x0xi32>
-// CHECK: return %[[EMPTY]]
+// CHECK: return %[[EMPTY]]
// -----
@@ -689,7 +689,7 @@
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x0xi32>
// CHECK: %[[SCATTER:.+]] = "stablehlo.scatter"(%arg0, %0, %arg2)
-// CHECK: return %[[SCATTER]]
+// CHECK: return %[[SCATTER]]
// -----
diff --git a/compiler/plugins/input/StableHLO/stablehlo-iree/Conversion/Preprocessing/test/stablehlo_to_stablehlo.mlir b/compiler/plugins/input/StableHLO/stablehlo-iree/Conversion/Preprocessing/test/stablehlo_to_stablehlo.mlir
index ce1feda..8e36b84 100644
--- a/compiler/plugins/input/StableHLO/stablehlo-iree/Conversion/Preprocessing/test/stablehlo_to_stablehlo.mlir
+++ b/compiler/plugins/input/StableHLO/stablehlo-iree/Conversion/Preprocessing/test/stablehlo_to_stablehlo.mlir
@@ -211,7 +211,7 @@
// -----
-// CHECK-LABEL: @rng_bitcast_f32
+// CHECK-LABEL: @rng_bitcast_f32
// CHECK-SAME: (%[[ARG0:.*]]: tensor<4xi32>)
func.func @rng_bitcast_f32(%arg0: tensor<4xi32>) -> (tensor<4xi32>, tensor<8xf32>) {
// CHECK: %[[OUT_STATE:.*]], %[[OUT_INT:.*]] = stablehlo.rng_bit_generator %[[ARG0]]
@@ -413,7 +413,7 @@
%7 = "stablehlo.compare"(%arg0, %arg1) {comparison_direction = #stablehlo<comparison_direction GT>} : (tensor<f32>, tensor<f32>) -> tensor<i1>
"stablehlo.return"(%7) : (tensor<i1>) -> ()
}) {dimension = 1 : i64, is_stable = true} : (tensor<16x16xf32>, tensor<16x16xi32>) -> (tensor<16x16xf32>, tensor<16x16xi32>)
- %1 = "stablehlo.slice"(%0#0) { start_indices = dense<[0, 0]> : tensor<2xi64>, limit_indices = dense<[16, 8]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64> } : (tensor<16x16xf32>) -> tensor<16x8xf32>
+ %1 = "stablehlo.slice"(%0#0) { start_indices = dense<[0, 0]> : tensor<2xi64>, limit_indices = dense<[16, 8]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64> } : (tensor<16x16xf32>) -> tensor<16x8xf32>
%2 = "stablehlo.slice"(%0#1) { start_indices = dense<[0, 0]> : tensor<2xi64>, limit_indices = dense<[16, 8]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64> } : (tensor<16x16xi32>) -> tensor<16x8xi32>
return %1, %2 : tensor<16x8xf32>, tensor<16x8xi32>
}
@@ -434,7 +434,7 @@
%7 = "stablehlo.compare"(%arg0, %arg1) {comparison_direction = #stablehlo<comparison_direction GT>} : (tensor<f32>, tensor<f32>) -> tensor<i1>
"stablehlo.return"(%7) : (tensor<i1>) -> ()
}) {dimension = 2 : i64, is_stable = true} : (tensor<16x16x16xf32>, tensor<16x16x16xi32>) -> (tensor<16x16x16xf32>, tensor<16x16x16xi32>)
- %1 = "stablehlo.slice"(%0#0) { start_indices = dense<[0, 0, 0]> : tensor<3xi64>, limit_indices = dense<[16, 16, 8]> : tensor<3xi64>, strides = dense<[1, 1, 1]> : tensor<3xi64> } : (tensor<16x16x16xf32>) -> tensor<16x16x8xf32>
+ %1 = "stablehlo.slice"(%0#0) { start_indices = dense<[0, 0, 0]> : tensor<3xi64>, limit_indices = dense<[16, 16, 8]> : tensor<3xi64>, strides = dense<[1, 1, 1]> : tensor<3xi64> } : (tensor<16x16x16xf32>) -> tensor<16x16x8xf32>
%2 = "stablehlo.slice"(%0#1) { start_indices = dense<[0, 0, 0]> : tensor<3xi64>, limit_indices = dense<[16, 16, 8]> : tensor<3xi64>, strides = dense<[1, 1, 1]> : tensor<3xi64> } : (tensor<16x16x16xi32>) -> tensor<16x16x8xi32>
return %1, %2 : tensor<16x16x8xf32>, tensor<16x16x8xi32>
}
@@ -455,7 +455,7 @@
%7 = "stablehlo.compare"(%arg0, %arg1) {comparison_direction = #stablehlo<comparison_direction GT>} : (tensor<f32>, tensor<f32>) -> tensor<i1>
"stablehlo.return"(%7) : (tensor<i1>) -> ()
}) {dimension = 2 : i64, is_stable = true} : (tensor<16x16x16xf32>, tensor<16x16x16xi32>) -> (tensor<16x16x16xf32>, tensor<16x16x16xi32>)
- %1 = "stablehlo.slice"(%0#0) { start_indices = dense<[0, 0, 0]> : tensor<3xi64>, limit_indices = dense<[16, 16, 8]> : tensor<3xi64>, strides = dense<[1, 1, 1]> : tensor<3xi64> } : (tensor<16x16x16xf32>) -> tensor<16x16x8xf32>
+ %1 = "stablehlo.slice"(%0#0) { start_indices = dense<[0, 0, 0]> : tensor<3xi64>, limit_indices = dense<[16, 16, 8]> : tensor<3xi64>, strides = dense<[1, 1, 1]> : tensor<3xi64> } : (tensor<16x16x16xf32>) -> tensor<16x16x8xf32>
%2 = "stablehlo.slice"(%0#1) { start_indices = dense<[0, 0, 0]> : tensor<3xi64>, limit_indices = dense<[16, 16, 8]> : tensor<3xi64>, strides = dense<[1, 1, 1]> : tensor<3xi64> } : (tensor<16x16x16xi32>) -> tensor<16x16x8xi32>
return %1, %2 : tensor<16x16x8xf32>, tensor<16x16x8xi32>
}
@@ -478,8 +478,8 @@
func.func private @top_k_gt_f32_comparator(%arg0: tensor<f32>, %arg1: tensor<f32>, %arg2: tensor<i32>, %arg3: tensor<i32>) -> tensor<i1> {
%0 = stablehlo.compare GT, %arg0, %arg1 : (tensor<f32>, tensor<f32>) -> tensor<i1>
- stablehlo.return %0 : tensor<i1>
-}
+ stablehlo.return %0 : tensor<i1>
+}
// CHECK-LABEL: @custom_call_topk
// CHECK-SAME: %[[ARG0:.+]]: tensor<1x160xf32>
@@ -496,8 +496,8 @@
func.func private @bottom_k_gt_f32_comparator(%arg0: tensor<f32>, %arg1: tensor<f32>, %arg2: tensor<i32>, %arg3: tensor<i32>) -> tensor<i1> {
%0 = stablehlo.compare LT, %arg0, %arg1 : (tensor<f32>, tensor<f32>) -> tensor<i1>
- stablehlo.return %0 : tensor<i1>
-}
+ stablehlo.return %0 : tensor<i1>
+}
// CHECK-LABEL: @custom_call_bottomk
// CHECK-SAME: %[[ARG0:.+]]: tensor<1x160xf32>
diff --git a/compiler/plugins/input/Torch/torch-iree/InputConversion/test/torch_to_iree.mlir b/compiler/plugins/input/Torch/torch-iree/InputConversion/test/torch_to_iree.mlir
index e89142b..493965a 100644
--- a/compiler/plugins/input/Torch/torch-iree/InputConversion/test/torch_to_iree.mlir
+++ b/compiler/plugins/input/Torch/torch-iree/InputConversion/test/torch_to_iree.mlir
@@ -31,7 +31,7 @@
// -----
// Verify we can decompose complex ops
-// CHECK: func @main(%arg0: tensor<2x3x4xf32>) -> (tensor<2x3x4xf32>, tensor<2x3x4xf32>)
+// CHECK: func @main(%arg0: tensor<2x3x4xf32>) -> (tensor<2x3x4xf32>, tensor<2x3x4xf32>)
// CHECK: tensor.empty
module {
func.func @main(%arg0: !torch.vtensor<[2,3,4],f32>) -> (!torch.vtensor<[2,3,4],f32>, !torch.vtensor<[2,3,4],f32>) {
diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/test/lower_to_ukernel_ops.mlir b/compiler/src/iree/compiler/Codegen/Common/CPU/test/lower_to_ukernel_ops.mlir
index c6aee63..5e769aa 100644
--- a/compiler/src/iree/compiler/Codegen/Common/CPU/test/lower_to_ukernel_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/CPU/test/lower_to_ukernel_ops.mlir
@@ -621,8 +621,8 @@
%dim_2 = tensor.dim %arg0, %c3 : tensor<?x?x?x?xi8>
%0 = tensor.empty(%dim, %dim_0, %dim_1, %dim_2) : tensor<?x?x?x?xi32>
%1 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>,
- affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>],
- iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
+ affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>],
+ iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
ins(%arg0 : tensor<?x?x?x?xi8>) outs(%0 : tensor<?x?x?x?xi32>) {
^bb0(%in: i8, %out: i32):
%5 = arith.extsi %in : i8 to i32
@@ -634,8 +634,8 @@
%dim_6 = tensor.dim %arg1, %c3 : tensor<?x?x?x?xi8>
%2 = tensor.empty(%dim_3, %dim_4, %dim_5, %dim_6) : tensor<?x?x?x?xi32>
%3 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>,
- affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>],
- iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
+ affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>],
+ iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
ins(%arg1 : tensor<?x?x?x?xi8>) outs(%2 : tensor<?x?x?x?xi32>) {
^bb0(%in: i8, %out: i32):
%5 = arith.extsi %in : i8 to i32
@@ -683,8 +683,8 @@
%dim_2 = tensor.dim %arg0, %c3 : tensor<?x?x?x?xi16>
%0 = tensor.empty(%dim, %dim_0, %dim_1, %dim_2) : tensor<?x?x?x?xi32>
%1 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>,
- affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>],
- iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
+ affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>],
+ iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
ins(%arg0 : tensor<?x?x?x?xi16>) outs(%0 : tensor<?x?x?x?xi32>) {
^bb0(%in: i16, %out: i32):
%5 = arith.extsi %in : i16 to i32
@@ -696,8 +696,8 @@
%dim_6 = tensor.dim %arg1, %c3 : tensor<?x?x?x?xi4>
%2 = tensor.empty(%dim_3, %dim_4, %dim_5, %dim_6) : tensor<?x?x?x?xi32>
%3 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>,
- affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>],
- iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
+ affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>],
+ iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
ins(%arg1 : tensor<?x?x?x?xi4>) outs(%2 : tensor<?x?x?x?xi32>) {
^bb0(%in: i4, %out: i32):
%5 = arith.extui %in : i4 to i32
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tile_reduction.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tile_reduction.mlir
index 60cc518..1f274c4 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tile_reduction.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tile_reduction.mlir
@@ -192,7 +192,7 @@
// CHECK: scf.yield %{{.+}} : tensor<1x2x64xf32>
// CHECK: scf.yield %{{.+}} : tensor<1x2x64xf32>
-// CHECK: linalg.generic
+// CHECK: linalg.generic
// CHECK-SAME: iterator_types = ["parallel", "reduction", "reduction"]
// CHECK-SAME: ins(%[[LN]] : tensor<1x2x64xf32>)
// CHECK-SAME: outs(%{{.+}} : tensor<1xf32>)
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/transform_gpu_workgroup_swizzle.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/transform_gpu_workgroup_swizzle.mlir
index 1982183..af39659 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/transform_gpu_workgroup_swizzle.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/transform_gpu_workgroup_swizzle.mlir
@@ -33,7 +33,7 @@
%variant_op: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
transform.iree.workgroup_swizzle %0 { log_tile = 3 } : (!transform.any_op) -> ()
- transform.yield
+ transform.yield
}
} // module
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/decompose_batch_mmt4d_ops.mlir b/compiler/src/iree/compiler/Codegen/Common/test/decompose_batch_mmt4d_ops.mlir
index d6cab0d..8c2a5e4 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/decompose_batch_mmt4d_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/decompose_batch_mmt4d_ops.mlir
@@ -43,9 +43,9 @@
func.func @batch_mmt4d_with_extened_inputs(%arg0: tensor<1x10x32x8x1xi8>, %arg1: tensor<1x80x32x4x1xi8>, %arg2: tensor<1x10x80x8x4xi32>) -> tensor<1x10x80x8x4xi32> {
%c0_i32 = arith.constant 0 : i32
%0 = tensor.empty() : tensor<1x10x32x8x1xi32>
- %1 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>,
- affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>],
- iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]}
+ %1 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>,
+ affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>],
+ iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]}
ins(%arg0 : tensor<1x10x32x8x1xi8>) outs(%0 : tensor<1x10x32x8x1xi32>) {
^bb0(%in: i8, %out: i32):
%6 = arith.extsi %in : i8 to i32
@@ -53,7 +53,7 @@
} -> tensor<1x10x32x8x1xi32>
%2 = tensor.empty() : tensor<1x80x32x4x1xi32>
%3 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>,
- affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>],
+ affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>],
iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]}
ins(%arg1 : tensor<1x80x32x4x1xi8>) outs(%2 : tensor<1x80x32x4x1xi32>) {
^bb0(%in: i8, %out: i32):
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/iree_expand_strided_metadata.mlir b/compiler/src/iree/compiler/Codegen/Common/test/iree_expand_strided_metadata.mlir
index a506df4..ce8fa5a 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/iree_expand_strided_metadata.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/iree_expand_strided_metadata.mlir
@@ -12,7 +12,7 @@
// CHECK-DAG: %[[C128:.+]] = arith.constant 128 : index
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
// CHECK-DAG: %[[BASE_BUFFER:.+]], %[[BASE_OFFSET:.+]], %[[BASE_SIZES:.+]]:2, %[[BASE_STRIDES:.+]]:2 = memref.extract_strided_metadata %arg0
-// CHECK: %[[SUB_OFFSET:.+]] = affine.apply #[[MAP]]()[%arg1, %arg2]
+// CHECK: %[[SUB_OFFSET:.+]] = affine.apply #[[MAP]]()[%arg1, %arg2]
// CHECK: return %[[BASE_BUFFER]], %[[SUB_OFFSET]], %[[C64]], %[[C64]], %[[C128]], %[[C1]]
// -----
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/reductions.mlir b/compiler/src/iree/compiler/Codegen/Common/test/reductions.mlir
index 3459164..aeefd04 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/reductions.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/reductions.mlir
@@ -114,7 +114,7 @@
indexing_maps = [affine_map<(d0) -> (d0)>,
affine_map<(d0) -> (d0)>],
iterator_types = ["parallel"]}
- ins(%5 : !out_tensor_t) outs(%6 : !out_tensor_t) {
+ ins(%5 : !out_tensor_t) outs(%6 : !out_tensor_t) {
^bb0(%arg3: f32, %arg4: f32):
%4 = math.sqrt %arg3 : f32
linalg.yield %4 : f32
@@ -174,7 +174,7 @@
indexing_maps = [affine_map<(d0) -> (d0)>,
affine_map<(d0) -> (d0)>],
iterator_types = ["parallel"]}
- ins(%6 : !out_tensor_t) outs(%7 : !out_tensor_t) {
+ ins(%6 : !out_tensor_t) outs(%7 : !out_tensor_t) {
^bb0(%arg3: f32, %arg4: f32):
%4 = math.sqrt %arg3 : f32
linalg.yield %4 : f32
@@ -236,7 +236,7 @@
indexing_maps = [affine_map<(d0) -> (d0)>,
affine_map<(d0) -> (d0)>],
iterator_types = ["parallel"]}
- ins(%6 : !out_tensor_t) outs(%7 : !out_tensor_t) {
+ ins(%6 : !out_tensor_t) outs(%7 : !out_tensor_t) {
^bb0(%arg3: f32, %arg4: f32):
%4 = math.sqrt %arg3 : f32
linalg.yield %4 : f32
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/reductions_codegen_spec.mlir b/compiler/src/iree/compiler/Codegen/Common/test/reductions_codegen_spec.mlir
index d2a67aa..91df2fd 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/reductions_codegen_spec.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/reductions_codegen_spec.mlir
@@ -7,7 +7,7 @@
%maybe_leading, %original_fill, %reduction, %maybe_trailing_0 =
transform.iree.match_callback failures(propagate) "reduction"(%root)
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
-
+
%_, %more_parallel_fill, %parallel_reduction, %combiner_op =
transform.structured.split_reduction %reduction { split_factor = 2, insert_split_dimension = 1 }
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
@@ -18,7 +18,7 @@
%outer_tiled, %grid_loop = transform.structured.tile_using_forall %fusion_root_1 tile_sizes [1]
( mapping = [#gpu.block<x>] )
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
-
+
%func = transform.structured.match ops{["func.func"]} in %root : (!transform.any_op) -> !transform.any_op
transform.apply_patterns to %func {
transform.apply_patterns.iree.bubble_expand
@@ -50,7 +50,7 @@
tile_sizes [1] ( mapping = [#gpu.thread<z>] )
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
transform.structured.fuse_into_containing_op %fusion_group_22_full into %block_loop_22 : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
-
+
%fusion_group_21 = transform.merge_handles %maybe_leading_2, %more_parallel_fill_2
: !transform.any_op
@@ -59,7 +59,7 @@
tile_sizes [1, 1] ( mapping = [#gpu.thread<z>, #gpu.thread<y>] )
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
transform.structured.fuse_into_containing_op %fusion_group_21 into %block_loop_21 : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
-
+
// Step 3. Rank-reduce.
// ===========================================================================
transform.apply_patterns to %func {
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/repeated_matcher_use.mlir b/compiler/src/iree/compiler/Codegen/Common/test/repeated_matcher_use.mlir
index 3193304..f376ebc 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/repeated_matcher_use.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/repeated_matcher_use.mlir
@@ -26,7 +26,7 @@
%dummy3 = tensor.empty() : tensor<10xf32>
%c0 = arith.constant 0.0 : f32
%operand = linalg.fill ins(%c0 : f32) outs(%dummy1 : tensor<10xf32>) -> tensor<10xf32>
-
+
// expected-remark @below {{first}}
%first = linalg.generic {
indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>],
@@ -82,7 +82,7 @@
%c5 = arith.constant 5.0 : f32
%operand5 = linalg.fill ins(%c5 : f32) outs(%dummy5 : tensor<10xf32>) -> tensor<10xf32>
%operand = linalg.fill ins(%c0 : f32) outs(%dummy1 : tensor<10xf32>) -> tensor<10xf32>
-
+
%first = linalg.generic {
indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>],
iterator_types = ["parallel"]
@@ -134,7 +134,7 @@
%dummy5 = tensor.empty() : tensor<10xf32>
%c0 = arith.constant 0.0 : f32
%operand = linalg.fill ins(%c0 : f32) outs(%dummy1 : tensor<10xf32>) -> tensor<10xf32>
-
+
%first = linalg.generic {
indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>],
iterator_types = ["parallel"]
@@ -231,7 +231,7 @@
%0 = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>],
iterator_types = ["parallel", "parallel"]
- } ins(%arg0: tensor<42x10xf32>)
+ } ins(%arg0: tensor<42x10xf32>)
outs(%init: tensor<10x42xf32>) {
^bb0(%arg1: f32, %arg2: f32):
linalg.yield %arg1 : f32
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/transform_match_partial_reduction.mlir b/compiler/src/iree/compiler/Codegen/Common/test/transform_match_partial_reduction.mlir
index 3e4e546..a7bf5cd 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/transform_match_partial_reduction.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/transform_match_partial_reduction.mlir
@@ -11,7 +11,7 @@
%result = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> (d0)>],
- iterator_types = ["parallel", "reduction"]}
+ iterator_types = ["parallel", "reduction"]}
ins(%arg0 : tensor<8x479xf32>)
outs(%fill : tensor<8xf32>) {
^bb0(%in: f32, %out: f32):
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/transform_ops_invalid.mlir b/compiler/src/iree/compiler/Codegen/Common/test/transform_ops_invalid.mlir
index e8be084..5453916 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/transform_ops_invalid.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/transform_ops_invalid.mlir
@@ -1,4 +1,4 @@
-// RUN: iree-opt %s --split-input-file --iree-transform-dialect-interpreter --verify-diagnostics
+// RUN: iree-opt %s --split-input-file --iree-transform-dialect-interpreter --verify-diagnostics
module attributes { transform.with_named_sequence } {
transform.named_sequence @__transform_main(%root: !transform.any_op {transform.readonly}) {
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/type_propagation.mlir b/compiler/src/iree/compiler/Codegen/Common/test/type_propagation.mlir
index 194644c..7a6176b 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/type_propagation.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/type_propagation.mlir
@@ -460,7 +460,7 @@
// CHECK-DAG: %[[A_TENSOR:.+]] = flow.dispatch.tensor.load %[[A]]
// CHECK-DAG: %[[B_TENSOR:.+]] = flow.dispatch.tensor.load %[[B]]
// CHECK: %[[SORT:.+]]:2 = iree_linalg_ext.sort dimension(0)
-// CHECK-SAME: outs(%[[A_TENSOR]], %[[B_TENSOR]] : tensor<1xi32>, tensor<1xi8>)
+// CHECK-SAME: outs(%[[A_TENSOR]], %[[B_TENSOR]] : tensor<1xi32>, tensor<1xi8>)
// CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: i32, %[[ARG1:[a-zA-Z0-9]+]]: i32, %[[ARG2:[a-zA-Z0-9]+]]: i8, %[[ARG3:[a-zA-Z0-9]+]]: i8)
// CHECK-DAG: %[[CMPI:.+]] = arith.cmpi ult, %[[ARG0]], %[[ARG1]] : i32
// CHECK: iree_linalg_ext.yield %[[CMPI]]
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/hal_interface_bindings.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/hal_interface_bindings.mlir
index 97c795a..a48ac2a 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/hal_interface_bindings.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/hal_interface_bindings.mlir
@@ -17,7 +17,7 @@
// CHECK: %[[OFFSET_PTR0:.+]] = llvm.getelementptr %[[BASE_PTR]][18]
// CHECK: %[[OFFSET_D0:.+]] = llvm.mul %[[C5]], %[[C2]]
// CHECK: %[[INDEX1:.+]] = llvm.add %[[OFFSET_D0]], %[[C1]]
- // CHECK: %[[OFFSET_PTR1:.+]] = llvm.getelementptr %[[OFFSET_PTR0]][%[[INDEX1]]]
+ // CHECK: %[[OFFSET_PTR1:.+]] = llvm.getelementptr %[[OFFSET_PTR0]][%[[INDEX1]]]
// CHECK: %[[VALUE:.+]] = llvm.load %[[OFFSET_PTR1]]
%c1 = arith.constant 1 : index
%c5 = arith.constant 5 : index
@@ -105,7 +105,7 @@
// CHECK: %[[OFFSET_ZEXT:.+]] = llvm.zext %[[OFFSET]]
%offset = hal.interface.constant.load[0] : index
%dim0 = hal.interface.constant.load[1]: index
-
+
// CHECK: %[[STATE3:.+]] = llvm.load %arg1
// CHECK: %[[BINDING_PTRS:.+]] = llvm.extractvalue %[[STATE3]][10]
// CHECK: %[[ARRAY_PTR:.+]] = llvm.getelementptr %[[BINDING_PTRS]][1] : (!llvm.ptr) -> !llvm.ptr, !llvm.ptr
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transform_dialect_bufferize.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transform_dialect_bufferize.mlir
index 273543b..466c259 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transform_dialect_bufferize.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transform_dialect_bufferize.mlir
@@ -40,6 +40,6 @@
transform.iree.eliminate_empty_tensors %variant_op : (!transform.any_op) -> ()
%variant_op_3 = transform.iree.bufferize %variant_op : (!transform.any_op) -> (!transform.any_op)
%func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!transform.any_op) -> !transform.any_op
- transform.yield
+ transform.yield
}
} // module
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/attention_transform_spec.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/attention_transform_spec.mlir
index e33e07e..17b2157 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/attention_transform_spec.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/attention_transform_spec.mlir
@@ -145,6 +145,6 @@
} : !transform.any_op
transform.iree.apply_cse %func_8 : !transform.any_op
transform.memref.erase_dead_alloc_and_stores %func_8 : (!transform.any_op) -> ()
- transform.yield
+ transform.yield
}
} //// module
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/cast_address_space_function.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/cast_address_space_function.mlir
index 59eb11d..1f1274f 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/cast_address_space_function.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/cast_address_space_function.mlir
@@ -1,9 +1,9 @@
// RUN: iree-opt --iree-codegen-lower-ukernel-ops-to-calls --iree-llvmgpu-cast-address-space-function %s --split-input-file | FileCheck %s
module {
- func.func private @foo(memref<f32>, memref<f32, #gpu.address_space<workgroup>>, memref<f32, #gpu.address_space<workgroup>>)
+ func.func private @foo(memref<f32>, memref<f32, #gpu.address_space<workgroup>>, memref<f32, #gpu.address_space<workgroup>>)
func.func @bar() {
- %alloc_1 = memref.alloc() : memref<110xf32, #gpu.address_space<workgroup>>
+ %alloc_1 = memref.alloc() : memref<110xf32, #gpu.address_space<workgroup>>
%alloc_2 = memref.alloc() : memref<128xf32>
%alloc_3 = memref.alloc() : memref<128xf32, #gpu.address_space<workgroup>>
%a1:4 = memref.extract_strided_metadata %alloc_1 : memref<110xf32, #gpu.address_space<workgroup>> -> memref<f32, #gpu.address_space<workgroup>>, index, index, index
@@ -14,12 +14,12 @@
}
}
-// CHECK: func.func private @foo(memref<f32>, memref<f32>, memref<f32>)
+// CHECK: func.func private @foo(memref<f32>, memref<f32>, memref<f32>)
// CHECK-LABEL: func.func @bar
-// CHECK: %[[a1:.+]] = memref.alloc() : memref<110xf32, #gpu.address_space<workgroup>>
-// CHECK: %[[a2:.+]] = memref.alloc() : memref<128xf32, #gpu.address_space<workgroup>>
-// CHECK: %[[b1:.+]], %{{.*}}, %{{.*}}, %{{.*}} = memref.extract_strided_metadata %[[a1]] : memref<110xf32, #gpu.address_space<workgroup>> -> memref<f32, #gpu.address_space<workgroup>>, index, index, index
+// CHECK: %[[a1:.+]] = memref.alloc() : memref<110xf32, #gpu.address_space<workgroup>>
+// CHECK: %[[a2:.+]] = memref.alloc() : memref<128xf32, #gpu.address_space<workgroup>>
+// CHECK: %[[b1:.+]], %{{.*}}, %{{.*}}, %{{.*}} = memref.extract_strided_metadata %[[a1]] : memref<110xf32, #gpu.address_space<workgroup>> -> memref<f32, #gpu.address_space<workgroup>>, index, index, index
// CHECK: %[[b2:.+]], %{{.*}}, %{{.*}}, %{{.*}} = memref.extract_strided_metadata %[[a2]] : memref<128xf32, #gpu.address_space<workgroup>> -> memref<f32, #gpu.address_space<workgroup>>, index, index, index
// CHECK: %[[C1:.+]] = memref.memory_space_cast %[[b1]] : memref<f32, #gpu.address_space<workgroup>> to memref<f32>
// CHECK: %[[C2:.+]] = memref.memory_space_cast %[[b2]] : memref<f32, #gpu.address_space<workgroup>> to memref<f32>
@@ -29,11 +29,11 @@
module {
func.func @bar() {
- %alloc_1 = memref.alloc() : memref<110xf32, #gpu.address_space<workgroup>>
+ %alloc_1 = memref.alloc() : memref<110xf32, #gpu.address_space<workgroup>>
%alloc_2 = memref.alloc() : memref<128xf32>
%alloc_3 = memref.alloc() : memref<128xf32, #gpu.address_space<workgroup>>
- iree_codegen.ukernel.generic "fastfunction" ins(%alloc_1, %alloc_2 : memref<110xf32, #gpu.address_space<workgroup>>, memref<128xf32>)
- outs(%alloc_3 : memref<128xf32, #gpu.address_space<workgroup>>)
+ iree_codegen.ukernel.generic "fastfunction" ins(%alloc_1, %alloc_2 : memref<110xf32, #gpu.address_space<workgroup>>, memref<128xf32>)
+ outs(%alloc_3 : memref<128xf32, #gpu.address_space<workgroup>>)
return
}
}
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/create_async_groups.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/create_async_groups.mlir
index 1fc2285..099764b 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/create_async_groups.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/create_async_groups.mlir
@@ -14,7 +14,7 @@
vector.transfer_write %1, %0[%c0, %c0, %c0] {in_bounds = [true]} : vector<4xf32>, memref<4x32x16xf32, #gpu.address_space<workgroup>>
// CHECK-NOT: nvgpu.device_async_create_group
- // CHECK: %[[CP1:.*]] = nvgpu.device_async_copy {{.*}}, {{.*}}, 1
+ // CHECK: %[[CP1:.*]] = nvgpu.device_async_copy {{.*}}, {{.*}}, 1
%2 = vector.transfer_read %a[%c0, %c4], %cst_0 {in_bounds = [true]} : memref<1024x1024xf32>, vector<1xf32>
vector.transfer_write %2, %0[%c0, %c4, %c0] {in_bounds = [true]} : vector<1xf32>, memref<4x32x16xf32, #gpu.address_space<workgroup>>
// CHECK: %[[G:.*]] = nvgpu.device_async_create_group %[[CP0]], %[[CP1]]
@@ -27,7 +27,7 @@
transform.named_sequence @__transform_main(%variant_op: !transform.any_op {transform.readonly}) {
%top_level_func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
transform.iree.create_async_groups %top_level_func {use_mma_sync} : (!transform.any_op) -> ()
- transform.yield
+ transform.yield
}
} // module
@@ -61,7 +61,7 @@
transform.named_sequence @__transform_main(%variant_op: !transform.any_op {transform.readonly}) {
%top_level_func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
transform.iree.create_async_groups %top_level_func : (!transform.any_op) -> ()
- transform.yield
+ transform.yield
}
} // module
@@ -92,7 +92,7 @@
%vector_transfer = transform.structured.match ops{["memref.alloc"]} in %top_level_func : (!transform.any_op) -> !transform.any_op
// expected-error@below {{transform applied to the wrong op kind}}
transform.iree.create_async_groups %vector_transfer : (!transform.any_op) -> ()
- transform.yield
+ transform.yield
}
} // module
@@ -124,7 +124,7 @@
transform.named_sequence @__transform_main(%variant_op: !transform.any_op {transform.readonly}) {
%top_level_func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
transform.iree.create_async_groups %top_level_func : (!transform.any_op) -> ()
- transform.yield
+ transform.yield
}
} // module
@@ -149,7 +149,7 @@
// CHECK-NOT: nvgpu.device_async_create_group
// CHECK-NOT: nvgpu.device_async_copy
- // CHECK: vector.load
+ // CHECK: vector.load
// CHECK: vector.store
%2 = vector.load %b[%c0, %c4] : memref<1024x1024xf16>, vector<1xf16>
vector.store %2, %alloc_1[%c0, %c4, %c0] : memref<4x32x16xf16, #gpu.address_space<workgroup>>, vector<1xf16>
@@ -162,6 +162,6 @@
transform.named_sequence @__transform_main(%variant_op: !transform.any_op {transform.readonly}) {
%top_level_func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
transform.iree.create_async_groups %top_level_func : (!transform.any_op) -> ()
- transform.yield
+ transform.yield
}
} // module
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/layout_analysis_and_distribution.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/layout_analysis_and_distribution.mlir
index bc34439..aff4f69 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/layout_analysis_and_distribution.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/layout_analysis_and_distribution.mlir
@@ -21,7 +21,7 @@
transform.named_sequence @__transform_main(%variant_op: !transform.any_op {transform.readonly}) {
%top_level_func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
%transformed_func = transform.iree.layout_analysis_and_distribution %top_level_func : (!transform.any_op) -> (!transform.any_op)
- transform.yield
+ transform.yield
}
} // module
@@ -154,7 +154,7 @@
transform.named_sequence @__transform_main(%variant_op: !transform.any_op {transform.readonly}) {
%top_level_func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
%transformed_func = transform.iree.layout_analysis_and_distribution %top_level_func : (!transform.any_op) -> (!transform.any_op)
- transform.yield
+ transform.yield
}
} // module
@@ -354,7 +354,7 @@
transform.named_sequence @__transform_main(%variant_op: !transform.any_op {transform.readonly}) {
%top_level_func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
%transformed_func = transform.iree.layout_analysis_and_distribution %top_level_func : (!transform.any_op) -> (!transform.any_op)
- transform.yield
+ transform.yield
}
} // module
@@ -542,7 +542,7 @@
transform.named_sequence @__transform_main(%variant_op: !transform.any_op {transform.readonly}) {
%top_level_func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
%transformed_func = transform.iree.layout_analysis_and_distribution %top_level_func : (!transform.any_op) -> (!transform.any_op)
- transform.yield
+ transform.yield
}
} // module
@@ -710,7 +710,7 @@
transform.named_sequence @__transform_main(%variant_op: !transform.any_op {transform.readonly}) {
%top_level_func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
%transformed_func = transform.iree.layout_analysis_and_distribution %top_level_func : (!transform.any_op) -> (!transform.any_op)
- transform.yield
+ transform.yield
}
} // module
@@ -860,7 +860,7 @@
transform.named_sequence @__transform_main(%variant_op: !transform.any_op {transform.readonly}) {
%top_level_func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
%transformed_func = transform.iree.layout_analysis_and_distribution %top_level_func : (!transform.any_op) -> (!transform.any_op)
- transform.yield
+ transform.yield
}
} // module
@@ -983,7 +983,7 @@
%top_level_func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
%reordered_func = transform.iree.reorder_transpose %top_level_func : (!transform.any_op) -> !transform.any_op
transform.iree.apply_cse %reordered_func : !transform.any_op
- transform.yield
+ transform.yield
}
} // module
@@ -1108,7 +1108,7 @@
transform.named_sequence @__transform_main(%variant_op: !transform.any_op {transform.readonly}) {
%top_level_func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
%transformed_func = transform.iree.layout_analysis_and_distribution %top_level_func : (!transform.any_op) -> (!transform.any_op)
- transform.yield
+ transform.yield
}
} // module
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/pack_shared_memory_alloc.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/pack_shared_memory_alloc.mlir
index 2c120d8..3c36ee0 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/pack_shared_memory_alloc.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/pack_shared_memory_alloc.mlir
@@ -21,7 +21,7 @@
// CHECK: memref.view %[[PACKED]][%[[C0]]][] : memref<1024xi8, #gpu.address_space<workgroup>> to memref<128xf32, #gpu.address_space<workgroup>>
// CHECK: %[[C512:.+]] = arith.constant 512 : index
// CHECK: memref.view %[[PACKED]][%[[C512]]][] : memref<1024xi8, #gpu.address_space<workgroup>> to memref<128xf32, #gpu.address_space<workgroup>>
-// CHECK: nvgpu.device_async_create_group
+// CHECK: nvgpu.device_async_create_group
// CHECK: nvgpu.device_async_wait %0 {numGroups = 0 : i32}
// CHECK: gpu.barrier
// CHECK: memref.view %[[PACKED]][%[[C0]]][] : memref<1024xi8, #gpu.address_space<workgroup>> to memref<32xf32, #gpu.address_space<workgroup>>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_batch_matmul.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_batch_matmul.mlir
index 839d389..46517b7 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_batch_matmul.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_batch_matmul.mlir
@@ -113,7 +113,7 @@
// CHECK: %[[RHS:.+]] = transform.get_producer_of_operand %[[PADDED]][1]
// CHECK: %[[RHS_DPS:.+]] = transform.structured.rewrite_in_destination_passing_style %[[RHS]]
-// CHECK: transform.structured.tile_using_forall %[[LHS]]
+// CHECK: transform.structured.tile_using_forall %[[LHS]]
// DEFAULT: num_threads [1, 32, 4](mapping = [#gpu.thread<linear_dim_2>, #gpu.thread<linear_dim_1>, #gpu.thread<linear_dim_0>])
// OPTIONS: num_threads [1, 64, 2](mapping = [#gpu.thread<linear_dim_2>, #gpu.thread<linear_dim_1>, #gpu.thread<linear_dim_0>])
// CHECK: apply_patterns
@@ -122,10 +122,10 @@
// CHECK: transform.structured.match ops{["scf.if"]}
// CHECK: transform.scf.take_assumed_branch %{{.*}} take_else_branch
-// CHECK: transform.structured.tile_using_forall %[[RHS_DPS]]
+// CHECK: transform.structured.tile_using_forall %[[RHS_DPS]]
// DEFAULT: num_threads [8, 16, 1](mapping = [#gpu.thread<linear_dim_2>, #gpu.thread<linear_dim_1>, #gpu.thread<linear_dim_0>])
// OPTIONS: num_threads [2, 8, 8](mapping = [#gpu.thread<linear_dim_2>, #gpu.thread<linear_dim_1>, #gpu.thread<linear_dim_0>])
-// CHECK: apply_patterns
+// CHECK: apply_patterns
// CHECK: transform.iree.apply_licm
// CHECK: transform.iree.apply_cse
@@ -139,7 +139,7 @@
// CHECK: transform.structured.tile_using_forall
// DEFAULT: num_threads [1, 2, 64](mapping = [#gpu.thread<z>, #gpu.thread<y>, #gpu.thread<x>])
// OPTIONS: num_threads [1, 4, 32](mapping = [#gpu.thread<z>, #gpu.thread<y>, #gpu.thread<x>])
-// CHECK: apply_patterns
+// CHECK: apply_patterns
// CHECK: transform.iree.apply_licm
// CHECK: transform.iree.apply_cse
@@ -211,7 +211,7 @@
// OPTIONS: factor = 3
// CHECK: apply_patterns
// CHECK: transform.apply_patterns.vector.transfer_to_scf full_unroll = true
-// CHECK: apply_patterns
+// CHECK: apply_patterns
// CHECK: transform.iree.apply_licm
// CHECK: transform.iree.apply_cse
// CHECK: transform.iree.create_async_groups
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_bufferize.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_bufferize.mlir
index 1769c4b..4468fd0 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_bufferize.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_bufferize.mlir
@@ -24,7 +24,7 @@
%6 = linalg.matmul ins(%p, %4 : tensor<250x500xf32>, tensor<500x1020xf32>) outs(%5 : tensor<250x1020xf32>) -> tensor<250x1020xf32>
flow.dispatch.tensor.store %6, %2, offsets=[0, 0], sizes=[250, 1020], strides=[1, 1] : tensor<250x1020xf32> -> !flow.dispatch.tensor<readwrite:tensor<250x1020xf32>>
- return
+ return
}
}
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_foreach_to_gpu_spec.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_foreach_to_gpu_spec.mlir
index ebf730c..4a2276d 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_foreach_to_gpu_spec.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_foreach_to_gpu_spec.mlir
@@ -2,7 +2,7 @@
transform.named_sequence @__transform_main(
%variant_op: !transform.any_op {transform.consumed}) {
%0 = transform.structured.match ops{["linalg.fill"]} in %variant_op : (!transform.any_op) -> !transform.any_op
- %forall, %tiled_fill = transform.structured.tile_using_forall %0 num_threads [5, 1]
+ %forall, %tiled_fill = transform.structured.tile_using_forall %0 num_threads [5, 1]
( mapping = [#gpu.thread<y>, #gpu.thread<x>] )
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
@@ -28,7 +28,7 @@
transform.iree.eliminate_empty_tensors %variant_op : (!transform.any_op) -> ()
%variant_op_3 = transform.iree.bufferize %variant_op : (!transform.any_op) -> (!transform.any_op)
%memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!transform.any_op) -> !transform.any_op
- transform.iree.map_nested_forall_to_gpu_threads %memref_func
+ transform.iree.map_nested_forall_to_gpu_threads %memref_func
workgroup_dims = [10, 11, 1] : (!transform.any_op) -> ()
// Late canonicalizations to cleanup and pass the checks
@@ -40,7 +40,7 @@
} : !transform.any_op
transform.iree.apply_licm %memref_func : !transform.any_op
transform.iree.apply_cse %memref_func : !transform.any_op
- transform.yield
+ transform.yield
}
} // module
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_vector_distribution_spec.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_vector_distribution_spec.mlir
index 04474cd..74d7e52 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_vector_distribution_spec.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_vector_distribution_spec.mlir
@@ -1,11 +1,11 @@
module attributes { transform.with_named_sequence } {
transform.named_sequence @__transform_main(%variant_op: !transform.any_op {transform.readonly}) {
- %if_op = transform.structured.match ops{["scf.if"]} in %variant_op
+ %if_op = transform.structured.match ops{["scf.if"]} in %variant_op
: (!transform.any_op) -> !transform.any_op
%warp = transform.iree.vector.to_warp_execute_on_lane_0 %if_op { warp_size = 32 }
: (!transform.any_op) -> !transform.any_op
- %isolated = transform.get_parent_op %warp {isolated_from_above}
+ %isolated = transform.get_parent_op %warp {isolated_from_above}
: (!transform.any_op) -> !transform.any_op
transform.iree.vector.warp_distribute %isolated
: (!transform.any_op) -> ()
@@ -22,6 +22,6 @@
transform.iree.apply_licm %func_op : !transform.any_op
transform.iree.apply_cse %func_op : !transform.any_op
- transform.yield
+ transform.yield
}
} // module
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_vector_warp_execute_on_lane_0_spec.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_vector_warp_execute_on_lane_0_spec.mlir
index 33fa29e..29540c2 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_vector_warp_execute_on_lane_0_spec.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_vector_warp_execute_on_lane_0_spec.mlir
@@ -1,7 +1,7 @@
module attributes { transform.with_named_sequence } {
transform.named_sequence @__transform_main(%variant_op: !transform.any_op {transform.readonly}) {
- %if_op = transform.structured.match ops{["scf.if"]} in %variant_op
+ %if_op = transform.structured.match ops{["scf.if"]} in %variant_op
: (!transform.any_op) -> !transform.any_op
transform.iree.vector.to_warp_execute_on_lane_0 %if_op { warp_size = 32 }
: (!transform.any_op) -> !transform.any_op
@@ -14,6 +14,6 @@
} : !transform.any_op
transform.iree.apply_licm %func_op : !transform.any_op
transform.iree.apply_cse %func_op : !transform.any_op
- transform.yield
+ transform.yield
}
} // module
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_pack_shared_memory_alloc.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_pack_shared_memory_alloc.mlir
index 504950e..2654797 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_pack_shared_memory_alloc.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_pack_shared_memory_alloc.mlir
@@ -7,7 +7,7 @@
// CHECK: memref.view %[[PACKED]][%[[C0]]][] : memref<1024xi8, #gpu.address_space<workgroup>> to memref<128xf32, #gpu.address_space<workgroup>>
// CHECK: %[[C512:.+]] = arith.constant 512 : index
// CHECK: memref.view %[[PACKED]][%[[C512]]][] : memref<1024xi8, #gpu.address_space<workgroup>> to memref<128xf32, #gpu.address_space<workgroup>>
-// CHECK: nvgpu.device_async_create_group
+// CHECK: nvgpu.device_async_create_group
// CHECK: nvgpu.device_async_wait %0 {numGroups = 0 : i32}
// CHECK: gpu.barrier
// CHECK: memref.view %[[PACKED]][%[[C0]]][] : memref<1024xi8, #gpu.address_space<workgroup>> to memref<32xf32, #gpu.address_space<workgroup>>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_promote_operands.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_promote_operands.mlir
index 757baf4..3d1ce7f 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_promote_operands.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_promote_operands.mlir
@@ -33,7 +33,7 @@
%matmul = transform.structured.match ops{["linalg.matmul"]} in %variant_op
: (!transform.any_op) -> !transform.any_op
%promoted_matmul, %alloc_0, %alloc_1 =
- transform.iree.promote_operands %matmul [0, 1]
+ transform.iree.promote_operands %matmul [0, 1]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
// Late canonicalizations to cleanup and pass the checks.
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_vector_to_mma.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_vector_to_mma.mlir
index 6bfe345..82334e9 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_vector_to_mma.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_vector_to_mma.mlir
@@ -57,7 +57,7 @@
} : !transform.any_op
transform.iree.vector.vector_to_mma_conversion %func { use_wmma } : (!transform.any_op) -> ()
- // Apply canonicalization post-hoc to trigger DCE and pass the test
+ // Apply canonicalization post-hoc to trigger DCE and pass the test
// (i.e. all vector.contract are dead).
// TODO: consider having the vector_to_mma_conversion do the DCE automatically.
transform.apply_patterns to %func {
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/vector_to_gpu.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/vector_to_gpu.mlir
index 1558636..140bf1b 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/vector_to_gpu.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/vector_to_gpu.mlir
@@ -170,12 +170,12 @@
%mask1 = vector.create_mask %i : vector<1xi1>
// CHECK: %[[CP1:.*]] = nvgpu.device_async_copy {{.*}}, {{.*}}, 1, %[[I]]
%2 = vector.transfer_read %a[%c0, %c4], %cst_0, %mask1 {in_bounds = [true]} : memref<1024x1024xf32>, vector<1xf32>
- vector.transfer_write %2, %0[%c0, %c4, %c0] {in_bounds = [true]} : vector<1xf32>, memref<4x32x16xf32, #gpu.address_space<workgroup>>
+ vector.transfer_write %2, %0[%c0, %c4, %c0] {in_bounds = [true]} : vector<1xf32>, memref<4x32x16xf32, #gpu.address_space<workgroup>>
// CHECK: %[[G:.*]] = nvgpu.device_async_create_group %[[CP0]], %[[CP1]]
// CHECK: nvgpu.device_async_wait %[[G]]
%3 = vector.transfer_read %a[%c0, %c4], %cst_0, %mask1 {in_bounds = [true]} : memref<1024x1024xf32>, vector<1xf32>
- vector.transfer_write %3, %0[%c0, %c4, %c0], %mask1 {in_bounds = [true]} : vector<1xf32>, memref<4x32x16xf32, #gpu.address_space<workgroup>>
+ vector.transfer_write %3, %0[%c0, %c4, %c0], %mask1 {in_bounds = [true]} : vector<1xf32>, memref<4x32x16xf32, #gpu.address_space<workgroup>>
// We cannot generate async copy if the write is masked.
// CHECK-NOT: nvgpu.device_async_copy
return
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_reduction.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_reduction.mlir
index bd435a5..8aec774 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_reduction.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_reduction.mlir
@@ -136,13 +136,13 @@
func.func @subgroup_reduce_dynamic() {
%c32_i64 = arith.constant 32 : i64
%cst = arith.constant 0.000000e+00 : f32
- %cst_0 = arith.constant 2.000000e+00 : f32
+ %cst_0 = arith.constant 2.000000e+00 : f32
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load[0] : i32
- %1 = hal.interface.constant.load[1] : i32
+ %1 = hal.interface.constant.load[1] : i32
%2 = arith.extui %0 : i32 to i64
%3 = arith.extui %1 : i32 to i64
- %4 = arith.shli %3, %c32_i64 : i64
+ %4 = arith.shli %3, %c32_i64 : i64
%5 = arith.ori %2, %4 : i64
%6 = arith.index_castui %5 : i64 to index
%7 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8xf32>>
@@ -155,7 +155,7 @@
^bb0(%in: f32, %out: f32):
%14 = math.powf %in, %cst_0 : f32
%15 = arith.addf %14, %out : f32
- linalg.yield %15 : f32
+ linalg.yield %15 : f32
} -> tensor<8xf32>
flow.dispatch.tensor.store %13, %7, offsets = [0], sizes = [8], strides = [1] : tensor<8xf32> -> !flow.dispatch.tensor<writeonly:tensor<8xf32>>
return
diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/test/dispatch_workgroups_folding.mlir b/compiler/src/iree/compiler/Dialect/Flow/IR/test/dispatch_workgroups_folding.mlir
index 74fc9b0..3ec11fb 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/IR/test/dispatch_workgroups_folding.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/IR/test/dispatch_workgroups_folding.mlir
@@ -173,7 +173,7 @@
flow.dispatch.tensor.store %empty, %binding, offsets = [0, 0], sizes = [%wl0, %wl1], strides = [1, 1]
: tensor<?x?xf32> -> !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%wl0, %wl1}
flow.return
- }
+ }
return %result : tensor<?x?xf32>
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/collapse_linalg_generic_on_tensors.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/collapse_linalg_generic_on_tensors.mlir
index 5e4ec9b..6ebe9e4 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/collapse_linalg_generic_on_tensors.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/collapse_linalg_generic_on_tensors.mlir
@@ -8,10 +8,10 @@
%input_ptr = util.global.address @"__transpose_10_input" : !util.ptr<!type>
%input = util.global.load.indirect %input_ptr : !util.ptr<!type> -> !type
%output = tensor.empty() : !type
-
+
// Can collapse All (d0, d1, d2, d3, d4, d5)
- %6 = linalg.generic { indexing_maps = [
- affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)>,
+ %6 = linalg.generic { indexing_maps = [
+ affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)>,
affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)>],
iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]
}
@@ -43,10 +43,10 @@
%input_ptr = util.global.address @"__transpose_10_input" : !util.ptr<!type>
%input = util.global.load.indirect %input_ptr : !util.ptr<!type> -> !type
%output = tensor.empty() : !type
-
+
// Can collapse (d0, d1) and (d5, d6)
- %6 = linalg.generic { indexing_maps = [
- affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d4, d3, d5, d6)>,
+ %6 = linalg.generic { indexing_maps = [
+ affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d4, d3, d5, d6)>,
affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3, d4, d5, d6)>],
iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "parallel", "parallel"]
}
@@ -78,10 +78,10 @@
%input_ptr = util.global.address @"__transpose_10_input" : !util.ptr<!type>
%input = util.global.load.indirect %input_ptr : !util.ptr<!type> -> !type
%output = tensor.empty() : !type
-
+
// Can collapse (d0, d1) and (d3, d4, d5, d6, d7)
- %result = linalg.generic { indexing_maps = [
- affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4, d5, d6, d7)>,
+ %result = linalg.generic { indexing_maps = [
+ affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4, d5, d6, d7)>,
affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4, d5, d6, d7)>],
iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "parallel", "parallel", "parallel"]
}
@@ -112,10 +112,10 @@
%input_ptr = util.global.address @"__transpose_10_input" : !util.ptr<!type>
%input = util.global.load.indirect %input_ptr : !util.ptr<!type> -> !type
%output = tensor.empty() : !type
-
+
// Can collapse (d0, d1) and (d6, d7)
- %result = linalg.generic { indexing_maps = [
- affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4, d5, d6, d7)>,
+ %result = linalg.generic { indexing_maps = [
+ affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4, d5, d6, d7)>,
affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d5, d4, d6, d7)>],
iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "parallel", "parallel", "parallel"]
}
@@ -149,16 +149,16 @@
%input2 = util.global.load.indirect %input_ptr : !util.ptr<!type> -> !type
%input3 = util.global.load.indirect %input_ptr : !util.ptr<!type> -> !type
%output = tensor.empty() : !type
-
+
// Can collapse (d0, d1) and (d6, d7)
- %result = linalg.generic { indexing_maps = [
- affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4, d5, d6, d7)>,
- affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d4, d3, d5, d6, d7)>,
- affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d4, d3, d2, d5, d6, d7)>,
+ %result = linalg.generic { indexing_maps = [
+ affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4, d5, d6, d7)>,
+ affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d4, d3, d5, d6, d7)>,
+ affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d4, d3, d2, d5, d6, d7)>,
affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4, d5, d6, d7)>],
iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "parallel", "parallel"]
}
- ins(%input, %input2, %input3 : !type, !type, !type)
+ ins(%input, %input2, %input3 : !type, !type, !type)
outs(%output : !type) {
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32):
linalg.yield %arg1 : f32
@@ -190,10 +190,10 @@
%input_ptr = util.global.address @"__transpose_10_input" : !util.ptr<!type>
%input = util.global.load.indirect %input_ptr : !util.ptr<!type> -> !type
%output = tensor.empty() : !type
-
+
// Can collapse (d2, d3) and (d6, d7)
- %result = linalg.generic { indexing_maps = [
- affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4, d5, d6, d7)>,
+ %result = linalg.generic { indexing_maps = [
+ affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4, d5, d6, d7)>,
affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d5, d4, d6, d7)>],
iterator_types = ["parallel", "reduction", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]
}
@@ -226,9 +226,9 @@
%input_ptr = util.global.address @"__transpose_10_input" : !util.ptr<!type_in>
%input = util.global.load.indirect %input_ptr : !util.ptr<!type_in> -> !type_in
%output = tensor.empty() : !type_out
-
- %result = linalg.generic { indexing_maps = [
- affine_map<(d0, d1, d2, d3) -> (d1, d2, d3)>,
+
+ %result = linalg.generic { indexing_maps = [
+ affine_map<(d0, d1, d2, d3) -> (d1, d2, d3)>,
affine_map<(d0, d1, d2, d3) -> (d1, d2, d3, d0)>],
iterator_types = ["parallel", "parallel", "parallel", "parallel"]
}
@@ -258,8 +258,8 @@
%c0 = arith.constant 0 : index
%output = tensor.empty() : !type_out
// Can collapse (d3, d0, d1)
- %6 = linalg.generic { indexing_maps = [
- affine_map<(d0, d1, d2, d3, d4, d5) -> (d3, d0, d1, d5)>,
+ %6 = linalg.generic { indexing_maps = [
+ affine_map<(d0, d1, d2, d3, d4, d5) -> (d3, d0, d1, d5)>,
affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d3, d0, d1, d4, d5)>],
iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]
}
@@ -291,8 +291,8 @@
%c0 = arith.constant 0 : index
%input = tensor.empty() : !type_in
%output = tensor.empty() : !type_out
- %6 = linalg.generic { indexing_maps = [
- affine_map<(d0, d1, d2) -> (d0, d2)>,
+ %6 = linalg.generic { indexing_maps = [
+ affine_map<(d0, d1, d2) -> (d0, d2)>,
affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
iterator_types = ["parallel", "parallel", "parallel"]
}
@@ -318,10 +318,10 @@
%input_ptr = util.global.address @"__transpose_10_input" : !util.ptr<!type_in>
%input = util.global.load.indirect %input_ptr : !util.ptr<!type_in> -> !type_in
%output = tensor.empty() : !type_out
-
+
// Can collapse (d0, d1) and (d6, d7)
- %result = linalg.generic { indexing_maps = [
- affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4, d5, d6, d7)>,
+ %result = linalg.generic { indexing_maps = [
+ affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4, d5, d6, d7)>,
affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d3, d5, d4, d6, d7)>],
iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "parallel", "parallel", "parallel"]
}
@@ -352,7 +352,7 @@
// Can collapse (d1, d3, d0)
%result = linalg.generic {
- indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d1, d3, d0)>,
+ indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d1, d3, d0)>,
affine_map<(d0, d1, d2, d3, d4) -> (d2, d1, d3, d0, d4)>],
iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]}
ins(%input : !type_in) outs(%output : !type_out) {
@@ -378,9 +378,9 @@
%output = tensor.empty() : !type_out
// Can collapse (d1, d0)
- %result = linalg.generic {
+ %result = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d1, d0)>],
- iterator_types = ["parallel", "parallel"] }
+ iterator_types = ["parallel", "parallel"] }
ins(%input : !type_in) outs(%output : !type_out) {
^bb0(%arg1: f32, %arg2: f32):
linalg.yield %arg1 : f32
@@ -432,12 +432,12 @@
%output1 = tensor.empty() : !type
%output2 = tensor.empty() : !type
%output3 = tensor.empty() : !type
-
- %6, %7, %8, %9 = linalg.generic { indexing_maps = [
- affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d4, d3, d5)>,
- affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d4, d3, d5)>,
- affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d4, d3, d5)>,
- affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d4, d3, d5)>,
+
+ %6, %7, %8, %9 = linalg.generic { indexing_maps = [
+ affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d4, d3, d5)>,
+ affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d4, d3, d5)>,
+ affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d4, d3, d5)>,
+ affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d4, d3, d5)>,
affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d4, d3, d5)>],
iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/form_dispatch_regions.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/form_dispatch_regions.mlir
index d5c0ea0..966d54a 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/form_dispatch_regions.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/form_dispatch_regions.mlir
@@ -425,7 +425,7 @@
return %0#1 : tensor<?xf32>
}
// CHECK: func @no_yield_dead_results
-// CHECK: %[[RESULT:.+]] = flow.dispatch.region
+// CHECK: %[[RESULT:.+]] = flow.dispatch.region
// CHECK: %[[GENERIC:.+]]:2 = linalg.generic
// CHECK: flow.return %[[GENERIC]]#1
// CHECK: return %[[RESULT]]
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/form_dispatch_workgroups.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/form_dispatch_workgroups.mlir
index 85f263b..adabc66 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/form_dispatch_workgroups.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/form_dispatch_workgroups.mlir
@@ -22,10 +22,10 @@
%0 = flow.dispatch.region -> (tensor<10x20xf32>) {
%cst_0 = arith.constant dense<1.000000e+00> : tensor<10x20xf32>
cf.cond_br %arg0, ^bb1, ^bb2
- ^bb1: // pred: ^bb0
+ ^bb1: // pred: ^bb0
%2 = tensor.empty() : tensor<10x20xf32>
flow.return %2 : tensor<10x20xf32>
- ^bb2: // pred: ^bb0
+ ^bb2: // pred: ^bb0
flow.return %cst_0 : tensor<10x20xf32>
}
return %0 : tensor<10x20xf32>
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/raise_special_ops.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/raise_special_ops.mlir
index f4a3b70..802c855 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/raise_special_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/raise_special_ops.mlir
@@ -87,7 +87,7 @@
} -> tensor<10x4096x4096xf16>
%229 = tensor.empty() : tensor<10x4096xf16>
%230 = linalg.fill ins(%cst_121 : f16) outs(%229 : tensor<10x4096xf16>) -> tensor<10x4096xf16>
- %231 = linalg.generic
+ %231 = linalg.generic
{indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>],
iterator_types = ["parallel", "parallel", "reduction"]}
ins(%228 : tensor<10x4096x4096xf16>) outs(%230 : tensor<10x4096xf16>) {
@@ -95,7 +95,7 @@
%5290 = arith.addf %in, %out : f16
linalg.yield %5290 : f16
} -> tensor<10x4096xf16>
- %232 = linalg.generic
+ %232 = linalg.generic
{indexing_maps = [
affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
affine_map<(d0, d1, d2) -> (d0, d1)>,
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors.mlir
index cab7e8b..b34db57 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors.mlir
@@ -114,7 +114,7 @@
%cst = complex.constant [3.000000e+00 : f32, 1.000000e+01 : f32] : complex<f32>
%0 = stream.tensor.sizeof tensor<6xcomplex<f32>> : index
// CHECK: %[[I64NUMBER:.+]] = complex.constant [3.000000e+00 : f32, 1.000000e+01 : f32] : complex<f32>
- // CHECK: %[[BITCAST:.+]] = complex.bitcast %[[I64NUMBER]] : complex<f32> to i64
+ // CHECK: %[[BITCAST:.+]] = complex.bitcast %[[I64NUMBER]] : complex<f32> to i64
// CHECK: %[[SPLAT_RES:.+]] = stream.async.splat %[[BITCAST]]
%1 = stream.tensor.splat %cst : complex<f32> -> tensor<6xcomplex<f32>> in !stream.resource<*>{%0}
// CHECK: return %[[SPLAT_RES]]
@@ -126,7 +126,7 @@
// CHECK-LABEL: @denseTensorSplatDynamicComplexF32
func.func @denseTensorSplatDynamicComplexF32(%arg0: !stream.resource<*>, %arg1: complex<f32>) -> (!stream.resource<*>) {
%0 = stream.tensor.sizeof tensor<6xcomplex<f32>> : index
- // CHECK: %[[BITCAST:.+]] = complex.bitcast %arg1 : complex<f32> to i64
+ // CHECK: %[[BITCAST:.+]] = complex.bitcast %arg1 : complex<f32> to i64
// CHECK: %[[SPLAT_RES:.+]] = stream.async.splat %[[BITCAST]]
%1 = stream.tensor.splat %arg1 : complex<f32> -> tensor<6xcomplex<f32>> in !stream.resource<*>{%0}
// CHECK: return %[[SPLAT_RES]]
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/propagate_subviews.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/propagate_subviews.mlir
index 188c211..23934c4 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/propagate_subviews.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/propagate_subviews.mlir
@@ -170,7 +170,7 @@
cf.switch %flag : i32, [
default: ^bb1(%resource0, %resource1 : !stream.resource<external>, !stream.resource<transient>),
0: ^bb1(%resource0, %resource1 : !stream.resource<external>, !stream.resource<transient>)
- ]
+ ]
// CHECK: ^bb1(%[[BB1_RESOURCE0:.+]]: !stream.resource<external>, %[[BB1_STORAGE_SIZE0:.+]]: index, %[[BB1_OFFSET0:.+]]: index, %[[BB1_LENGTH0:.+]]: index, %[[BB1_RESOURCE1:.+]]: !stream.resource<transient>, %[[BB1_STORAGE_SIZE1:.+]]: index, %[[BB1_OFFSET1:.+]]: index, %[[BB1_LENGTH1:.+]]: index):
^bb1(%bb1_resource0: !stream.resource<external>, %bb1_resource1: !stream.resource<transient>):
diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/hoist_into_globals.mlir b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/hoist_into_globals.mlir
index af4585f..191992d 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/hoist_into_globals.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/hoist_into_globals.mlir
@@ -258,7 +258,7 @@
// CHECK: %[[CST:.+]] = arith.constant
// CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[CST]]
// CHECK: %[[RESULT:.+]] = flow.dispatch.region
-// CHECK: %[[ADD:.+]] = linalg.generic
+// CHECK: %[[ADD:.+]] = linalg.generic
// CHECK-SAME: %[[EXPANDED]]
// CHECK: flow.return %[[ADD]]
// CHECK: return %[[RESULT]]
@@ -277,7 +277,7 @@
func.func @main() -> (tensor<128xi8>) {
%0 = arith.constant dense<0> : tensor<32xi8>
%1 = arith.constant dense<0> : tensor<32xi8>
- %2 = "iree_unregistered.const_expr"(%0, %1)
+ %2 = "iree_unregistered.const_expr"(%0, %1)
: (tensor<32xi8>, tensor<32xi8>) -> tensor<128xi8>
return %2 : tensor<128xi8>
}
@@ -294,7 +294,7 @@
func.func @main() -> (tensor<129xi8>) {
%0 = arith.constant dense<0> : tensor<32xi8>
%1 = arith.constant dense<0> : tensor<32xi8>
- %2 = "iree_unregistered.const_expr"(%0, %1)
+ %2 = "iree_unregistered.const_expr"(%0, %1)
: (tensor<32xi8>, tensor<32xi8>) -> tensor<129xi8>
return %2 : tensor<129xi8>
}
diff --git a/compiler/src/iree/compiler/Dialect/VMVX/Transforms/test/resolve_buffer_descriptors.mlir b/compiler/src/iree/compiler/Dialect/VMVX/Transforms/test/resolve_buffer_descriptors.mlir
index a33639a..225a908 100644
--- a/compiler/src/iree/compiler/Dialect/VMVX/Transforms/test/resolve_buffer_descriptors.mlir
+++ b/compiler/src/iree/compiler/Dialect/VMVX/Transforms/test/resolve_buffer_descriptors.mlir
@@ -8,7 +8,7 @@
%base_buffer, %offset, %sizes:2, %strides:2 = vmvx.get_buffer_descriptor %0 : memref<64x64xf32, #map0> -> !util.buffer, index, index, index, index, index
return %base_buffer, %offset, %sizes#0, %sizes#1, %strides#0, %strides#1 : !util.buffer, index, index, index, index, index
}
-// CHECK: #[[MAP:.+]] = affine_map<()[s0, s1, s2, s3, s4] -> (s0 + s1 * s2 + s3 * s4)>
+// CHECK: #[[MAP:.+]] = affine_map<()[s0, s1, s2, s3, s4] -> (s0 + s1 * s2 + s3 * s4)>
// CHECK: func @resolve_subview(
// CHECK-DAG: %[[BASE_BUFFER:.+]], %[[BASE_OFFSET:.+]], %[[BASE_SIZES:.+]]:2, %[[BASE_STRIDES:.+]]:2 = vmvx.get_buffer_descriptor %arg0
// CHECK-DAG: %[[C64:.+]] = arith.constant 64 : index
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/expand_vectors.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/expand_vectors.mlir
index bad1503..c6dfcd8 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/test/expand_vectors.mlir
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/expand_vectors.mlir
@@ -133,10 +133,10 @@
%c0 = arith.constant 0 : index
%dim = tensor.dim %arg0, %c0 : tensor<?xbf16>
%0 = tensor.empty(%dim) : tensor<?xf32>
- %casted0 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>,
- affine_map<(d0) -> (d0)>],
- iterator_types = ["parallel"]}
- ins(%arg0 : tensor<?xbf16>)
+ %casted0 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>,
+ affine_map<(d0) -> (d0)>],
+ iterator_types = ["parallel"]}
+ ins(%arg0 : tensor<?xbf16>)
outs(%0 : tensor<?xf32>) {
^bb0(%in: bf16, %out: f32):
%2 = arith.extf %in : bf16 to f32
@@ -169,10 +169,10 @@
%c0 = arith.constant 0 : index
%dim = tensor.dim %arg1, %c0 : tensor<?xi8>
%0 = tensor.empty(%dim) : tensor<?xi32>
- %casted1 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>,
- affine_map<(d0) -> (d0)>],
- iterator_types = ["parallel"]}
- ins(%arg1 : tensor<?xi8>)
+ %casted1 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>,
+ affine_map<(d0) -> (d0)>],
+ iterator_types = ["parallel"]}
+ ins(%arg1 : tensor<?xi8>)
outs(%0 : tensor<?xi32>) {
^bb0(%in: i8, %out: i32):
%2 = arith.extsi %in : i8 to i32
@@ -206,10 +206,10 @@
%c1 = arith.constant 1 : index
%dim = tensor.dim %arg0, %c1 : tensor<3x?xf16>
%0 = tensor.empty(%dim) : tensor<3x?xf32>
- %casted0 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
- affine_map<(d0, d1) -> (d0, d1)>],
- iterator_types = ["parallel", "parallel"]}
- ins(%arg0 : tensor<3x?xf16>)
+ %casted0 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
+ affine_map<(d0, d1) -> (d0, d1)>],
+ iterator_types = ["parallel", "parallel"]}
+ ins(%arg0 : tensor<3x?xf16>)
outs(%0 : tensor<3x?xf32>) {
^bb0(%in: f16, %out: f32):
%2 = arith.extf %in : f16 to f32
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/fuse_dequantization_matmul.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/fuse_dequantization_matmul.mlir
index 3d0d010..fd8d196 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/test/fuse_dequantization_matmul.mlir
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/fuse_dequantization_matmul.mlir
@@ -7,11 +7,11 @@
%1 = tensor.empty() : tensor<11008x32x128xf32>
%2 = linalg.fill ins(%cst : f32) outs(%0 : tensor<11008xf32>) -> tensor<11008xf32>
%3 = linalg.generic {
- indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
- affine_map<(d0, d1, d2) -> (d0, d1)>,
- affine_map<(d0, d1, d2) -> (d0, d1)>,
- affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
- iterator_types = ["parallel", "parallel", "parallel"]}
+ indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
+ affine_map<(d0, d1, d2) -> (d0, d1)>,
+ affine_map<(d0, d1, d2) -> (d0, d1)>,
+ affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
+ iterator_types = ["parallel", "parallel", "parallel"]}
ins(%arg0, %arg2, %arg3 : tensor<11008x32x128xi4>, tensor<11008x32xf32>, tensor<11008x32xf32>) outs(%1 : tensor<11008x32x128xf32>) {
^bb0(%in: i4, %in_0: f32, %in_1: f32, %out: f32):
%5 = arith.extui %in : i4 to i32
@@ -21,10 +21,10 @@
linalg.yield %8 : f32
} -> tensor<11008x32x128xf32>
%4 = linalg.generic {
- indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>,
- affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
- affine_map<(d0, d1, d2) -> (d0)>],
- iterator_types = ["parallel", "reduction", "reduction"]}
+ indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>,
+ affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
+ affine_map<(d0, d1, d2) -> (d0)>],
+ iterator_types = ["parallel", "reduction", "reduction"]}
ins(%arg1, %3 : tensor<32x128xf32>, tensor<11008x32x128xf32>) outs(%2 : tensor<11008xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%5 = arith.mulf %in, %in_0 : f32
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/lift_generic_to_transpose_batch_matmul.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/lift_generic_to_transpose_batch_matmul.mlir
index e8eb5ee..4854bdd 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/test/lift_generic_to_transpose_batch_matmul.mlir
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/lift_generic_to_transpose_batch_matmul.mlir
@@ -5,11 +5,11 @@
%c0_i32 = arith.constant 0 : i32
%0 = tensor.empty() : tensor<11008x32xi32>
%1 = linalg.fill ins(%c0_i32 : i32) outs(%0 : tensor<11008x32xi32>) -> tensor<11008x32xi32>
- %2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>,
- affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
- affine_map<(d0, d1, d2) -> (d0, d1)>],
- iterator_types = ["parallel", "parallel", "reduction"]}
- ins(%arg0, %arg1 : tensor<32x128xi16>, tensor<11008x32x128xi4>)
+ %2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>,
+ affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
+ affine_map<(d0, d1, d2) -> (d0, d1)>],
+ iterator_types = ["parallel", "parallel", "reduction"]}
+ ins(%arg0, %arg1 : tensor<32x128xi16>, tensor<11008x32x128xi4>)
outs(%1 : tensor<11008x32xi32>) {
^bb0(%in: i16, %in_0: i4, %out: i32):
%3 = arith.extsi %in : i16 to i32
@@ -52,11 +52,11 @@
%c0_i32 = arith.constant 0 : i32
%0 = tensor.empty() : tensor<11008x32xi32>
%1 = linalg.fill ins(%c0_i32 : i32) outs(%0 : tensor<11008x32xi32>) -> tensor<11008x32xi32>
- %2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
- affine_map<(d0, d1, d2) -> (d2, d1)>,
- affine_map<(d0, d1, d2) -> (d0, d1)>],
- iterator_types = ["parallel", "parallel", "reduction"]}
- ins(%arg0, %arg1 : tensor<11008x32x128xi4>, tensor<128x32xi16>)
+ %2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
+ affine_map<(d0, d1, d2) -> (d2, d1)>,
+ affine_map<(d0, d1, d2) -> (d0, d1)>],
+ iterator_types = ["parallel", "parallel", "reduction"]}
+ ins(%arg0, %arg1 : tensor<11008x32x128xi4>, tensor<128x32xi16>)
outs(%1 : tensor<11008x32xi32>) {
^bb0(%in: i4, %in_0: i16, %out: i32):
%3 = arith.extui %in : i4 to i32
@@ -101,11 +101,11 @@
%c0_i32 = arith.constant 0 : i32
%0 = tensor.empty() : tensor<11008x32x8xi32>
%1 = linalg.fill ins(%c0_i32 : i32) outs(%0 : tensor<11008x32x8xi32>) -> tensor<11008x32x8xi32>
- %2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d1, d2)>,
- affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>,
- affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>],
- iterator_types = ["parallel", "parallel", "reduction", "parallel"]}
- ins(%arg0, %arg1 : tensor<8x32x128xi16>, tensor<11008x32x128xi4>)
+ %2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d1, d2)>,
+ affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>,
+ affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>],
+ iterator_types = ["parallel", "parallel", "reduction", "parallel"]}
+ ins(%arg0, %arg1 : tensor<8x32x128xi16>, tensor<11008x32x128xi4>)
outs(%1 : tensor<11008x32x8xi32>) {
^bb0(%in: i16, %in_0: i4, %out: i32):
%3 = arith.extsi %in : i16 to i32
@@ -151,11 +151,11 @@
%dim = tensor.dim %arg0, %c1 : tensor<8x?x128xi16>
%0 = tensor.empty(%dim) : tensor<11008x?x8xi32>
%1 = linalg.fill ins(%c0_i32 : i32) outs(%0 : tensor<11008x?x8xi32>) -> tensor<11008x?x8xi32>
- %2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d1, d2)>,
- affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>,
- affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>],
- iterator_types = ["parallel", "parallel", "reduction", "parallel"]}
- ins(%arg0, %arg1 : tensor<8x?x128xi16>, tensor<11008x?x128xi4>)
+ %2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d1, d2)>,
+ affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>,
+ affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>],
+ iterator_types = ["parallel", "parallel", "reduction", "parallel"]}
+ ins(%arg0, %arg1 : tensor<8x?x128xi16>, tensor<11008x?x128xi4>)
outs(%1 : tensor<11008x?x8xi32>) {
^bb0(%in: i16, %in_0: i4, %out: i32):
%3 = arith.extsi %in : i16 to i32
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/remove_zero_extent_tensors.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/remove_zero_extent_tensors.mlir
index dffb238..289f372 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/test/remove_zero_extent_tensors.mlir
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/remove_zero_extent_tensors.mlir
@@ -24,7 +24,7 @@
return %1 : tensor<?x?xf32>
}
// CHECK: func @zero_sized_tensor_insert(%[[ARG0:.+]]: tensor<?x?xf32>
-// CHECK: return %[[ARG0]]
+// CHECK: return %[[ARG0]]
// -----
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/set_encoding.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/set_encoding.mlir
index 2deef49..aac9f7f 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/test/set_encoding.mlir
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/set_encoding.mlir
@@ -676,10 +676,10 @@
%arg2 : tensor<64x100x500xi32>) -> tensor<64x100x500xi32> {
%0 = tensor.empty() : tensor<64x250x500xi32>
%casted0 = arith.extui %arg0 : tensor<64x100x250xi8> to tensor<64x100x250xi32>
- %casted1 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
- affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
- iterator_types = ["parallel", "parallel", "parallel"]}
- ins(%arg1 : tensor<64x250x500xi8>)
+ %casted1 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
+ affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
+ iterator_types = ["parallel", "parallel", "parallel"]}
+ ins(%arg1 : tensor<64x250x500xi8>)
outs(%0 : tensor<64x250x500xi32>) {
^bb0(%in: i8, %out: i32):
%2 = arith.extsi %in : i8 to i32
@@ -845,7 +845,7 @@
}
// CHECK: func @batch_matmul_f32f32f32_narrow_MN(
-// CHECK: iree_linalg_ext.upper_bound_tile_size tensor<64x4x250xf32, #iree_linalg_ext.encoding<user = BATCH_MATMUL, role = LHS, element_types = [f32, f32, f32], matmul_narrow_M = 4 : index, matmul_narrow_N = 2 : index>>
+// CHECK: iree_linalg_ext.upper_bound_tile_size tensor<64x4x250xf32, #iree_linalg_ext.encoding<user = BATCH_MATMUL, role = LHS, element_types = [f32, f32, f32], matmul_narrow_M = 4 : index, matmul_narrow_N = 2 : index>>
// CHECK: iree_linalg_ext.upper_bound_tile_size tensor<64x250x2xf32, #iree_linalg_ext.encoding<user = BATCH_MATMUL, role = RHS, element_types = [f32, f32, f32], matmul_narrow_M = 4 : index, matmul_narrow_N = 2 : index>>
-// CHECK: iree_linalg_ext.upper_bound_tile_size tensor<64x4x2xf32, #iree_linalg_ext.encoding<user = BATCH_MATMUL, role = RESULT, element_types = [f32, f32, f32], matmul_narrow_M = 4 : index, matmul_narrow_N = 2 : index>>
+// CHECK: iree_linalg_ext.upper_bound_tile_size tensor<64x4x2xf32, #iree_linalg_ext.encoding<user = BATCH_MATMUL, role = RESULT, element_types = [f32, f32, f32], matmul_narrow_M = 4 : index, matmul_narrow_N = 2 : index>>
// CHECK: linalg.batch_matmul
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/convert_to_loops.mlir b/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/convert_to_loops.mlir
index 5bc4e3a..43d9ad6 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/convert_to_loops.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/convert_to_loops.mlir
@@ -1417,7 +1417,7 @@
func.func @unpack(%arg0: memref<1x4x6x6x2xf32>, %arg1: memref<1x6x6x8xf32>) {
iree_linalg_ext.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2] into %arg1 : (memref<1x4x6x6x2xf32> memref<1x6x6x8xf32>)
- return
+ return
}
// CHECK-DAG: #[[MAP:.+]] = affine_map<(d0) -> (d0 floordiv 2)>
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/tile_and_decompose_attention.mlir b/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/tile_and_decompose_attention.mlir
index 322f8ab..4691721 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/tile_and_decompose_attention.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/tile_and_decompose_attention.mlir
@@ -170,8 +170,8 @@
// TILING-SAME: [1, 1, 1] : tensor<?x?x?xf32> to tensor<?x?xf32>
// TILING: %[[EXTRACTED_SLICE_4:.+]] = tensor.extract_slice %[[QUERY]][0, 0, 0] [1, %[[DIM]], %[[DIM_0]]] [1, 1,
// TILING-SAME: 1] : tensor<?x?x?xf32> to tensor<?x?xf32>
-// TILING: %[[TILED_ATTENTION]]:3 = iree_linalg_ext.attention ins(%[[EXTRACTED_SLICE_4]], %[[EXTRACTED_SLICE]], %[[EXTRACTED_SLICE_3]] :
-// TILING-SAME: outs(%[[ARG7]], %[[ARG8]], %[[ARG9]] :
+// TILING: %[[TILED_ATTENTION]]:3 = iree_linalg_ext.attention ins(%[[EXTRACTED_SLICE_4]], %[[EXTRACTED_SLICE]], %[[EXTRACTED_SLICE_3]] :
+// TILING-SAME: outs(%[[ARG7]], %[[ARG8]], %[[ARG9]] :
// TILING-SAME: -> tensor<?x?xf32>, tensor<?xf32>, tensor<?xf32>
// TILING: scf.yield %[[TILED_ATTENTION]]#0, %[[TILED_ATTENTION]]#1, %[[TILED_ATTENTION]]#2 : tensor<?x?xf32>, tensor<?xf32>, tensor<?xf32>
// TILING: }
diff --git a/tests/e2e/stablehlo_ops/philox.mlir b/tests/e2e/stablehlo_ops/philox.mlir
index 119d8b7..24e5d4c 100644
--- a/tests/e2e/stablehlo_ops/philox.mlir
+++ b/tests/e2e/stablehlo_ops/philox.mlir
@@ -1,5 +1,5 @@
func.func @philox_i32() {
- %inp = util.unfoldable_constant dense<[1, 2, 3, 4]> : tensor<4xi32>
+ %inp = util.unfoldable_constant dense<[1, 2, 3, 4]> : tensor<4xi32>
%0:2 = "stablehlo.rng_bit_generator"(%inp) {rng_algorithm = #stablehlo<rng_algorithm PHILOX>} : (tensor<4xi32>) -> (tensor<4xi32>, tensor<8xi32>)
check.expect_eq_const(%0#1, dense<[-1788415499, 854201270, -855525523, 2043148971, 110723240, 146396481, -1258660138, -1968502964]> : tensor<8xi32>) : tensor<8xi32>
return
diff --git a/tests/e2e/stablehlo_ops/three_fry.mlir b/tests/e2e/stablehlo_ops/three_fry.mlir
index b06aef7..789476f 100644
--- a/tests/e2e/stablehlo_ops/three_fry.mlir
+++ b/tests/e2e/stablehlo_ops/three_fry.mlir
@@ -1,5 +1,5 @@
func.func @three_fry_i32() {
- %inp = util.unfoldable_constant dense<[1, 2, 3, 4]> : tensor<4xi32>
+ %inp = util.unfoldable_constant dense<[1, 2, 3, 4]> : tensor<4xi32>
%0:2 = "stablehlo.rng_bit_generator"(%inp) {rng_algorithm = #stablehlo<rng_algorithm THREE_FRY>} : (tensor<4xi32>) -> (tensor<4xi32>, tensor<8xi32>)
check.expect_eq_const(%0#1, dense<[-1997982863, -261361928, -1008514867, 1226850200, 1419974734, -277475325, 1033030661, -1926332264]> : tensor<8xi32>) : tensor<8xi32>
return
diff --git a/tests/transform_dialect/cpu/attention_codegen_spec.mlir b/tests/transform_dialect/cpu/attention_codegen_spec.mlir
index 73ffab9..c011025 100644
--- a/tests/transform_dialect/cpu/attention_codegen_spec.mlir
+++ b/tests/transform_dialect/cpu/attention_codegen_spec.mlir
@@ -65,9 +65,9 @@
transform.memref.erase_dead_alloc_and_stores %func_8 : (!transform.any_op) -> ()
transform.yield
} // codegen
-
+
// Find `hal.executable.variant`.
- transform.named_sequence @match_variant_for_codegen(%root: !transform.any_op {transform.readonly})
+ transform.named_sequence @match_variant_for_codegen(%root: !transform.any_op {transform.readonly})
-> !transform.any_op {
transform.match.operation_name %root ["hal.executable.variant"] : !transform.any_op
transform.yield %root : !transform.any_op
@@ -78,7 +78,7 @@
transform.foreach_match in %root
@match_variant_for_codegen -> @codegen
: (!transform.any_op) -> (!transform.any_op)
- transform.yield
+ transform.yield
}
} // module
diff --git a/tests/transform_dialect/cpu/contraction-packing-and-dispatch.mlir b/tests/transform_dialect/cpu/contraction-packing-and-dispatch.mlir
index f8ab046..622db6a 100644
--- a/tests/transform_dialect/cpu/contraction-packing-and-dispatch.mlir
+++ b/tests/transform_dialect/cpu/contraction-packing-and-dispatch.mlir
@@ -59,6 +59,6 @@
matmul_packed_sizes = [8, 16, 32]
matmul_inner_dims_order = [0, 1, 2]
: (!transform.any_op) -> !transform.op<"linalg.generic">
- transform.yield
+ transform.yield
}
} // module
diff --git a/tests/transform_dialect/cpu/contraction-packing.mlir b/tests/transform_dialect/cpu/contraction-packing.mlir
index d103303..d8105de 100644
--- a/tests/transform_dialect/cpu/contraction-packing.mlir
+++ b/tests/transform_dialect/cpu/contraction-packing.mlir
@@ -26,9 +26,9 @@
// CHECK: tensor.pack %{{.*}} inner_dims_pos = [1, 0] inner_tiles = [16, 32]
// CHECK: tensor.pack %{{.*}} inner_dims_pos = [0, 1] inner_tiles = [8, 16]
// CHECK: linalg.generic
- // CHECK-SAME: indexing_maps = [#[[$map_lhs]], #[[$map_rhs]], #[[$map_res]]]
+ // CHECK-SAME: indexing_maps = [#[[$map_lhs]], #[[$map_rhs]], #[[$map_res]]]
// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]}
- // CHECK-SAME: ins(%{{.*}} : tensor<155x18x8x32xf32>, tensor<18x56x16x32xf32>)
+ // CHECK-SAME: ins(%{{.*}} : tensor<155x18x8x32xf32>, tensor<18x56x16x32xf32>)
// CHECK-SAME: outs(%{{.*}} : tensor<155x56x8x16xf32>)
// CHECK: tensor.unpack %{{.*}} inner_dims_pos = [0, 1] inner_tiles = [8, 16]
%0 = linalg.matmul
@@ -54,9 +54,9 @@
// CHECK: tensor.pack %{{.*}} inner_dims_pos = [1, 0] inner_tiles = [16, 32]
// CHECK: tensor.pack %{{.*}} inner_dims_pos = [0, 1] inner_tiles = [8, 16]
// CHECK: linalg.generic
- // CHECK-SAME: indexing_maps = [#[[$map_tlhs]], #[[$map_rhs]], #[[$map_res]]]
+ // CHECK-SAME: indexing_maps = [#[[$map_tlhs]], #[[$map_rhs]], #[[$map_res]]]
// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]}
- // CHECK-SAME: ins(%{{.*}} : tensor<18x155x8x32xf32>, tensor<18x56x16x32xf32>)
+ // CHECK-SAME: ins(%{{.*}} : tensor<18x155x8x32xf32>, tensor<18x56x16x32xf32>)
// CHECK-SAME: outs(%{{.*}} : tensor<155x56x8x16xf32>)
// CHECK: tensor.unpack %{{.*}} inner_dims_pos = [0, 1] inner_tiles = [8, 16]
%0 = linalg.generic #matmul_tnn_trait
@@ -87,9 +87,9 @@
// CHECK: tensor.pack %{{.*}} inner_dims_pos = [0, 1] inner_tiles = [16, 32]
// CHECK: tensor.pack %{{.*}} inner_dims_pos = [0, 1] inner_tiles = [8, 16]
// CHECK: linalg.generic
- // CHECK-SAME: indexing_maps = [#[[$map_lhs]], #[[$map_trhs]], #[[$map_res]]]
+ // CHECK-SAME: indexing_maps = [#[[$map_lhs]], #[[$map_trhs]], #[[$map_res]]]
// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]}
- // CHECK-SAME: ins(%{{.*}} : tensor<155x18x8x32xf32>, tensor<56x18x16x32xf32>)
+ // CHECK-SAME: ins(%{{.*}} : tensor<155x18x8x32xf32>, tensor<56x18x16x32xf32>)
// CHECK-SAME: outs(%{{.*}} : tensor<155x56x8x16xf32>)
// CHECK: tensor.unpack %{{.*}} inner_dims_pos = [0, 1] inner_tiles = [8, 16]
%0 = linalg.generic #matmul_ntn_trait
@@ -120,9 +120,9 @@
// CHECK: tensor.pack %{{.*}} inner_dims_pos = [1, 0] inner_tiles = [16, 32]
// CHECK: tensor.pack %{{.*}} inner_dims_pos = [1, 0] inner_tiles = [8, 16]
// CHECK: linalg.generic
- // CHECK-SAME: indexing_maps = [#[[$map_lhs]], #[[$map_rhs]], #[[$map_tres]]]
+ // CHECK-SAME: indexing_maps = [#[[$map_lhs]], #[[$map_rhs]], #[[$map_tres]]]
// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]}
- // CHECK-SAME: ins(%{{.*}} : tensor<155x18x8x32xf32>, tensor<18x56x16x32xf32>)
+ // CHECK-SAME: ins(%{{.*}} : tensor<155x18x8x32xf32>, tensor<18x56x16x32xf32>)
// CHECK-SAME: outs(%{{.*}} : tensor<56x155x8x16xf32>)
// CHECK: tensor.unpack %{{.*}} inner_dims_pos = [1, 0] inner_tiles = [8, 16]
%0 = linalg.generic #matmul_nnt_trait
@@ -140,15 +140,15 @@
transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
%matmul = transform.structured.match interface{LinalgOp} in %module_op
: (!transform.any_op) -> (!transform.any_op)
-
- // Generalized packing rewrite extracts a gemm from any linalg op that contains
+
+ // Generalized packing rewrite extracts a gemm from any linalg op that contains
// one. This acts as a powerful normalization step: after this point, we have a
// gemm (i.e. 3-D contraction with (m,n,k)=(8,16,32) ) on the 3 most minor
// dimensions.
transform.structured.pack_greedily %matmul
matmul_packed_sizes = [8, 16, 32] matmul_inner_dims_order = [0, 1, 2]
: (!transform.any_op) -> !transform.op<"linalg.generic">
- transform.yield
+ transform.yield
}
} // module
diff --git a/tests/transform_dialect/cpu/fold_tensor_slice_into_transfer.mlir b/tests/transform_dialect/cpu/fold_tensor_slice_into_transfer.mlir
index 18c675a..2fb784a 100644
--- a/tests/transform_dialect/cpu/fold_tensor_slice_into_transfer.mlir
+++ b/tests/transform_dialect/cpu/fold_tensor_slice_into_transfer.mlir
@@ -105,6 +105,6 @@
transform.apply_patterns to %func_op {
transform.apply_patterns.iree.fold_tensor_slice_into_transfer
} : !transform.op<"func.func">
- transform.yield
+ transform.yield
}
} // module
diff --git a/tests/transform_dialect/cpu/matmul_codegen_default_spec.mlir b/tests/transform_dialect/cpu/matmul_codegen_default_spec.mlir
index df5231d..34eaa2f 100644
--- a/tests/transform_dialect/cpu/matmul_codegen_default_spec.mlir
+++ b/tests/transform_dialect/cpu/matmul_codegen_default_spec.mlir
@@ -22,6 +22,6 @@
// =========================================================
%memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!transform.any_op) -> !transform.any_op
transform.iree.forall_to_workgroup %memref_func : (!transform.any_op) -> ()
- transform.yield
+ transform.yield
}
} // module
diff --git a/tests/transform_dialect/cpu/transform_library.mlir b/tests/transform_dialect/cpu/transform_library.mlir
index b390561..3754c47 100644
--- a/tests/transform_dialect/cpu/transform_library.mlir
+++ b/tests/transform_dialect/cpu/transform_library.mlir
@@ -1,18 +1,18 @@
module attributes { transform.with_named_sequence } {
- transform.named_sequence @custom_matmul(%variant_op: !transform.any_op {transform.consumed}) {
+ transform.named_sequence @custom_matmul(%variant_op: !transform.any_op {transform.consumed}) {
%0 = transform.structured.match ops{["linalg.matmul"]} in %variant_op : (!transform.any_op) -> !transform.any_op
-
+
%tiled_generic, %forall =
- transform.structured.tile_using_forall %0 num_threads [2]
+ transform.structured.tile_using_forall %0 num_threads [2]
// TODO: IREE needs own workgroup mapping attribute.
( mapping = [#gpu.block<x>] )
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
transform.iree.populate_workgroup_count_region_using_num_threads_slice %forall
: (!transform.any_op) -> ()
-
+
// Canonicalization/CSE is needed before bufferization otherwise unnecessary
// allocs will be created.
- %func_op = transform.structured.match ops{["func.func"]} in %variant_op
+ %func_op = transform.structured.match ops{["func.func"]} in %variant_op
: (!transform.any_op) -> !transform.any_op
transform.apply_patterns to %func_op {
transform.apply_patterns.iree.fold_fill_into_pad
@@ -22,10 +22,10 @@
} : !transform.any_op
transform.iree.apply_cse %func_op : !transform.any_op
%variant_op_3 = transform.iree.bufferize %variant_op : (!transform.any_op) -> (!transform.any_op)
- %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3
: (!transform.any_op) -> !transform.any_op
transform.iree.forall_to_workgroup %memref_func : (!transform.any_op) -> ()
-
+
// CSE is needed on the workgroup_count region to pass this particular test.
transform.iree.apply_cse %variant_op_3 : !transform.any_op
%exports = transform.structured.match ops{["hal.executable.export"]} in %variant_op_3 : (!transform.any_op) -> !transform.any_op
diff --git a/tests/transform_dialect/cuda/double_mma_layout_analysis_codegen_spec.mlir b/tests/transform_dialect/cuda/double_mma_layout_analysis_codegen_spec.mlir
index 9982106..a14398d 100644
--- a/tests/transform_dialect/cuda/double_mma_layout_analysis_codegen_spec.mlir
+++ b/tests/transform_dialect/cuda/double_mma_layout_analysis_codegen_spec.mlir
@@ -68,6 +68,6 @@
%func_10 = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!transform.any_op) -> !transform.any_op
%func_11 = transform.iree.layout_analysis_and_distribution %func_10 : (!transform.any_op) -> (!transform.any_op)
- transform.yield
+ transform.yield
}
} // module
diff --git a/tests/transform_dialect/cuda/mma.mlir b/tests/transform_dialect/cuda/mma.mlir
index 2093e1c..42aed57 100644
--- a/tests/transform_dialect/cuda/mma.mlir
+++ b/tests/transform_dialect/cuda/mma.mlir
@@ -37,7 +37,7 @@
} : !transform.any_op
transform.iree.vector.vector_to_mma_conversion %func { use_wmma } : (!transform.any_op) -> ()
- // Apply canonicalization post-hoc to trigger DCE and pass the test
+ // Apply canonicalization post-hoc to trigger DCE and pass the test
// (i.e. all vector.contract are dead).
// TODO: consider having the vector_to_mma_conversion do the DCE automatically.
transform.apply_patterns to %func {
@@ -85,7 +85,7 @@
} : !transform.any_op
transform.iree.vector.vector_to_mma_conversion %func { use_mma_sync } : (!transform.any_op) -> ()
- // Apply canonicalization post-hoc to trigger DCE and pass the test
+ // Apply canonicalization post-hoc to trigger DCE and pass the test
// (i.e. all vector.contract are dead).
// TODO: consider having the vector_to_mma_conversion do the DCE automatically.
transform.apply_patterns to %func {
diff --git a/tests/transform_dialect/cuda/mma_using_layout_analysis_codegen_spec.mlir b/tests/transform_dialect/cuda/mma_using_layout_analysis_codegen_spec.mlir
index cea3833..9c1c8bf 100644
--- a/tests/transform_dialect/cuda/mma_using_layout_analysis_codegen_spec.mlir
+++ b/tests/transform_dialect/cuda/mma_using_layout_analysis_codegen_spec.mlir
@@ -19,7 +19,7 @@
// Promote operands in order to test loading from shared memory.
%matmul_2 = transform.structured.match ops{["linalg.matmul"]} in %variant_op : (!transform.any_op) -> !transform.any_op
%promoted_matmul, %alloc_0, %alloc_1 =
- transform.iree.promote_operands %matmul_2 [0, 1]
+ transform.iree.promote_operands %matmul_2 [0, 1]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
@@ -70,6 +70,6 @@
// ===========================================================================
%func_10 = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!transform.any_op) -> !transform.any_op
%func_11 = transform.iree.layout_analysis_and_distribution %func_10 : (!transform.any_op) -> (!transform.any_op)
- transform.yield
+ transform.yield
}
} // module
diff --git a/tests/transform_dialect/cuda/reduction.mlir b/tests/transform_dialect/cuda/reduction.mlir
index 500eba6..2642b03 100644
--- a/tests/transform_dialect/cuda/reduction.mlir
+++ b/tests/transform_dialect/cuda/reduction.mlir
@@ -72,6 +72,6 @@
// CHECK: vector.reduction <add>
// CHECK: vector.transfer_write
// CHECK: gpu.barrier
-
+
// EXEC: result[0]: hal.buffer_view
// EXEC-NEXT: 8xf32=64 64 64 64 64 64 64 64
diff --git a/tests/transform_dialect/cuda/reduction_codegen_spec.mlir b/tests/transform_dialect/cuda/reduction_codegen_spec.mlir
index 0ab6d6a..13dc3f2 100644
--- a/tests/transform_dialect/cuda/reduction_codegen_spec.mlir
+++ b/tests/transform_dialect/cuda/reduction_codegen_spec.mlir
@@ -26,16 +26,16 @@
// Step 3. Second level of tiling + fusion parallelizes to threads.
// ===========================================================================
- %fill_1d = transform.structured.match ops{["linalg.fill"]} filter_result_type = tensor<1xf32> in %variant_op
+ %fill_1d = transform.structured.match ops{["linalg.fill"]} filter_result_type = tensor<1xf32> in %variant_op
: (!transform.any_op) -> !transform.any_op
%block_combiner_op, %forall_block_combiner_op =
- transform.structured.tile_using_forall %grid_combiner_op tile_sizes [1]
+ transform.structured.tile_using_forall %grid_combiner_op tile_sizes [1]
( mapping = [#gpu.thread<z>] )
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
transform.structured.fuse_into_containing_op %fill_1d into %forall_block_combiner_op : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
// Canonicalizations.
- %func_op = transform.structured.match ops{["func.func"]} in %variant_op
+ %func_op = transform.structured.match ops{["func.func"]} in %variant_op
: (!transform.any_op) -> !transform.any_op
transform.apply_patterns to %func_op {
transform.apply_patterns.iree.fold_fill_into_pad
@@ -45,20 +45,20 @@
transform.iree.apply_licm %func_op : !transform.any_op
transform.iree.apply_cse %func_op : !transform.any_op
- %fill_2d = transform.structured.match ops{["linalg.fill"]} filter_result_type = tensor<1x2xf32> in %variant_op
+ %fill_2d = transform.structured.match ops{["linalg.fill"]} filter_result_type = tensor<1x2xf32> in %variant_op
: (!transform.any_op) -> !transform.any_op
%grid_more_parallel_op = transform.structured.match ops{["linalg.generic"]}
- attributes{iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<reduction>]} in %variant_op
+ attributes{iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<reduction>]} in %variant_op
: (!transform.any_op) -> !transform.any_op
%block_more_parallel_op, %forall_block_more_parallel_op =
- transform.structured.tile_using_forall %grid_more_parallel_op tile_sizes [1, 1]
+ transform.structured.tile_using_forall %grid_more_parallel_op tile_sizes [1, 1]
( mapping = [#gpu.thread<z>, #gpu.thread<y>] )
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
transform.structured.fuse_into_containing_op %fill_2d into %forall_block_more_parallel_op : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
// Step 4. Rank-reduce and vectorize.
// ===========================================================================
- %func = transform.structured.match ops{["func.func"]} in %variant_op
+ %func = transform.structured.match ops{["func.func"]} in %variant_op
: (!transform.any_op) -> !transform.any_op
transform.apply_patterns to %func {
transform.apply_patterns.iree.fold_reshape_into_tensor_hal_interface
@@ -74,12 +74,12 @@
} : !transform.any_op
transform.iree.eliminate_empty_tensors %variant_op : (!transform.any_op) -> ()
%variant_op_3 = transform.iree.bufferize { target_gpu } %variant_op : (!transform.any_op) -> !transform.any_op
- %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3
: (!transform.any_op) -> !transform.any_op
// Step 6. Post-bufferization mapping to blocks and threads.
// ===========================================================================
- %func_5 = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %func_5 = transform.structured.match ops{["func.func"]} in %variant_op_3
: (!transform.any_op) -> !transform.any_op
transform.iree.forall_to_workgroup %func_5 : (!transform.any_op) -> ()
transform.iree.map_nested_forall_to_gpu_threads %func_5
@@ -93,7 +93,7 @@
transform.apply_patterns.memref.fold_memref_alias_ops
transform.apply_patterns.vector.cast_away_vector_leading_one_dim
} : !transform.any_op
- %if_op = transform.structured.match ops{["scf.if"]} in %variant_op_3
+ %if_op = transform.structured.match ops{["scf.if"]} in %variant_op_3
: (!transform.any_op) -> !transform.any_op
// Don't complain about unsupported if (threadIdx.x == 0 && threadIdx.y == 0)
// at this point.
@@ -115,6 +115,6 @@
transform.iree.apply_licm %func_op_3 : !transform.any_op
transform.iree.apply_cse %func_op_3 : !transform.any_op
- transform.yield
+ transform.yield
}
} // module
diff --git a/tests/transform_dialect/cuda/reduction_eltwise_codegen_spec.mlir b/tests/transform_dialect/cuda/reduction_eltwise_codegen_spec.mlir
index fb4bcd6..b036039 100644
--- a/tests/transform_dialect/cuda/reduction_eltwise_codegen_spec.mlir
+++ b/tests/transform_dialect/cuda/reduction_eltwise_codegen_spec.mlir
@@ -154,7 +154,7 @@
transform.iree.apply_licm %func_op_3 : !transform.any_op
transform.iree.apply_cse %func_op_3 : !transform.any_op
- transform.yield
+ transform.yield
}
} // module
diff --git a/tests/transform_dialect/cuda/reduction_v2_codegen_spec.mlir b/tests/transform_dialect/cuda/reduction_v2_codegen_spec.mlir
index b1479a0..956ff0e 100644
--- a/tests/transform_dialect/cuda/reduction_v2_codegen_spec.mlir
+++ b/tests/transform_dialect/cuda/reduction_v2_codegen_spec.mlir
@@ -18,7 +18,7 @@
// Step 2. Split the reduction to get meatier parallelism.
// ===========================================================================
- %block_more_parallel_fill_op_2, %block_more_parallel_op_2, %block_combiner_op_2, %forall =
+ %block_more_parallel_fill_op_2, %block_more_parallel_op_2, %block_combiner_op_2, %forall =
transform.structured.tile_reduction_using_for %grid_reduction by tile_sizes = [0, 128]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
%_1:2 =
@@ -36,7 +36,7 @@
// 2nd op is [parallel, reduction] of 1x128, map the 1-dim to threadIdx.y to
// trigger mapping of the reduction to threadIdx.x via predication via `if (x==0)`.
%_3:2 =
- transform.structured.tile_using_forall %block_combiner_op_2 tile_sizes [1]
+ transform.structured.tile_using_forall %block_combiner_op_2 tile_sizes [1]
( mapping = [#gpu.thread<y>] )
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
@@ -87,7 +87,7 @@
transform.apply_patterns.memref.fold_memref_alias_ops
transform.apply_patterns.vector.cast_away_vector_leading_one_dim
} : !transform.any_op
- %if_op = transform.structured.match ops{["scf.if"]} in %variant_op_3
+ %if_op = transform.structured.match ops{["scf.if"]} in %variant_op_3
: (!transform.any_op) -> !transform.any_op
%warp = transform.iree.vector.to_warp_execute_on_lane_0 %if_op { warp_size = 32 } : (!transform.any_op) -> !transform.any_op
transform.iree.vector.warp_distribute %func_7
@@ -102,7 +102,7 @@
} : !transform.any_op
transform.iree.apply_licm %func_7 : !transform.any_op
transform.iree.apply_cse %func_7 : !transform.any_op
-
- transform.yield
+
+ transform.yield
}
} // module
diff --git a/tests/transform_dialect/cuda/softmax_codegen_spec.mlir b/tests/transform_dialect/cuda/softmax_codegen_spec.mlir
index 345be1f..a255f23 100644
--- a/tests/transform_dialect/cuda/softmax_codegen_spec.mlir
+++ b/tests/transform_dialect/cuda/softmax_codegen_spec.mlir
@@ -109,6 +109,6 @@
%warp = transform.iree.vector.to_warp_execute_on_lane_0 %if_op { warp_size = 32 } : (!transform.any_op) -> !transform.any_op
transform.iree.vector.warp_distribute %end_func : (!transform.any_op) -> ()
- transform.yield
+ transform.yield
}
} // module
diff --git a/tests/transform_dialect/cuda/softmax_partial_codegen_spec.mlir b/tests/transform_dialect/cuda/softmax_partial_codegen_spec.mlir
index 7c1564f..d62558f 100644
--- a/tests/transform_dialect/cuda/softmax_partial_codegen_spec.mlir
+++ b/tests/transform_dialect/cuda/softmax_partial_codegen_spec.mlir
@@ -92,6 +92,6 @@
: (!transform.any_op) -> !transform.any_op
transform.iree.vector.warp_distribute %end_func : (!transform.any_op) -> ()
- transform.yield
+ transform.yield
}
} // module
diff --git a/tests/transform_dialect/cuda/softmax_v2_codegen_spec.mlir b/tests/transform_dialect/cuda/softmax_v2_codegen_spec.mlir
index 67e3cb3..aefcd43 100644
--- a/tests/transform_dialect/cuda/softmax_v2_codegen_spec.mlir
+++ b/tests/transform_dialect/cuda/softmax_v2_codegen_spec.mlir
@@ -137,7 +137,7 @@
} : !transform.any_op
transform.iree.apply_licm %func_op_3 : !transform.any_op
transform.iree.apply_cse %func_op_3 : !transform.any_op
-
- transform.yield
+
+ transform.yield
}
} // module