| // RUN: iree-opt %s --iree-codegen-linalg-bufferize -canonicalize -cse -split-input-file | IreeFileCheck %s |
| |
| func @tile_from_tensor_load() { |
| %c0 = constant 0 : index |
| %c2 = constant 2 : index |
| %c4 = constant 4 : index |
| %c1 = constant 1 : index |
| %c3 = constant 3 : index |
| %0 = hal.interface.binding.subspan @io::@TENSOR_LHS[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %1 = hal.interface.binding.subspan @io::@TENSOR_RHS[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %2 = hal.interface.binding.subspan @io::@TENSOR_INIT[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %3 = hal.interface.binding.subspan @io::@ret0[%c0] : !flow.dispatch.tensor<writeonly:?x?xf32> |
| %4 = hal.interface.workgroup.id[0] : index |
| %5 = hal.interface.workgroup.id[1] : index |
| scf.for %arg0 = %5 to %c2 step %c2 { |
| scf.for %arg1 = %4 to %c4 step %c4 { |
| %6 = flow.dispatch.tensor.load %0, offsets = [%arg0, %c0], sizes = [%c1, %c3], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<1x3xf32> |
| %7 = flow.dispatch.tensor.load %1, offsets = [%c0, %arg1], sizes = [%c3, %c1], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<3x1xf32> |
| %8 = flow.dispatch.tensor.load %2, offsets = [%arg0, %arg1], sizes = [%c1, %c1], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<1x1xf32> |
| %9 = linalg.matmul ins(%6, %7 : tensor<1x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<1x1xf32>) -> tensor<1x1xf32> |
| flow.dispatch.tensor.store %9, %3, offsets = [%arg0, %arg1], sizes = [%c1, %c1], strides = [%c1, %c1] : tensor<1x1xf32> -> !flow.dispatch.tensor<writeonly:?x?xf32> |
| } |
| } |
| return |
| } |
| |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @TENSOR_LHS, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @TENSOR_RHS, set=0, binding=1, type="StorageBuffer", access="Read" |
| hal.interface.binding @TENSOR_INIT, set=0, binding=2, type="StorageBuffer", access="Read" |
| hal.interface.binding @ret0, set=0, binding=3, type="StorageBuffer", access="Write|Discard" |
| } |
| // CHECK-LABEL: func @tile_from_tensor_load() |
| // CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan @io::@TENSOR_LHS |
| // CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan @io::@TENSOR_RHS |
| // CHECK-DAG: %[[TENSOR_INIT:.+]] = hal.interface.binding.subspan @io::@TENSOR_INIT |
| // CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan @io::@ret0 |
| // CHECK: scf.for %[[IV0:.+]] = {{.+}} { |
| // CHECK: scf.for %[[IV1:.+]] = {{.+}} { |
| // CHECK-DAG: %[[LHS:.+]] = memref.subview %[[TENSOR_LHS]][%[[IV0]], 0] [1, 3] [1, 1] |
| // CHECK-DAG: %[[RHS:.+]] = memref.subview %[[TENSOR_RHS]][0, %[[IV1]]] [3, 1] [1, 1] |
| // CHECK-DAG: %[[INIT:.+]] = memref.subview %[[TENSOR_INIT]][%[[IV0]], %[[IV1]]] [1, 1] [1, 1] |
| // CHECK-DAG: %[[RESULT:.+]] = memref.subview %[[RETURN]][%[[IV0]], %[[IV1]]] [1, 1] [1, 1] |
| // CHECK: linalg.copy(%[[INIT]], %[[RESULT]]) |
| // CHECK: linalg.matmul |
| // CHECK-SAME: ins(%[[LHS]], %[[RHS]] |
| // CHECK-SAME: outs(%[[RESULT]] |
| |
| // ----- |
| |
| func @tile_from_tensor_load_inplace() { |
| %c0 = constant 0 : index |
| %c2 = constant 2 : index |
| %c4 = constant 4 : index |
| %c1 = constant 1 : index |
| %c3 = constant 3 : index |
| %0 = hal.interface.binding.subspan @io::@TENSOR_LHS[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %1 = hal.interface.binding.subspan @io::@TENSOR_RHS[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %2 = hal.interface.binding.subspan @io::@TENSOR_INIT[%c0] : !flow.dispatch.tensor<readwrite:?x?xf32> |
| %4 = hal.interface.workgroup.id[0] : index |
| %5 = hal.interface.workgroup.id[1] : index |
| scf.for %arg0 = %5 to %c2 step %c2 { |
| scf.for %arg1 = %4 to %c4 step %c4 { |
| %6 = flow.dispatch.tensor.load %0, offsets = [%arg0, %c0], sizes = [%c1, %c3], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<1x3xf32> |
| %7 = flow.dispatch.tensor.load %1, offsets = [%c0, %arg1], sizes = [%c3, %c1], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<3x1xf32> |
| %8 = flow.dispatch.tensor.load %2, offsets = [%arg0, %arg1], sizes = [%c1, %c1], strides = [%c1, %c1] : !flow.dispatch.tensor<readwrite:?x?xf32> -> tensor<1x1xf32> |
| %9 = linalg.matmul ins(%6, %7 : tensor<1x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<1x1xf32>) -> tensor<1x1xf32> |
| flow.dispatch.tensor.store %9, %2, offsets = [%arg0, %arg1], sizes = [%c1, %c1], strides = [%c1, %c1] : tensor<1x1xf32> -> !flow.dispatch.tensor<readwrite:?x?xf32> |
| } |
| } |
| return |
| } |
| |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @TENSOR_LHS, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @TENSOR_RHS, set=0, binding=1, type="StorageBuffer", access="Read" |
| hal.interface.binding @TENSOR_INIT, set=0, binding=2, type="StorageBuffer", access="Read|Write" |
| } |
| // CHECK-LABEL: func @tile_from_tensor_load_inplace() |
| // CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan @io::@TENSOR_LHS |
| // CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan @io::@TENSOR_RHS |
| // CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan @io::@TENSOR_INIT |
| // CHECK: scf.for %[[IV0:.+]] = {{.+}} { |
| // CHECK: scf.for %[[IV1:.+]] = {{.+}} { |
| // CHECK-DAG: %[[LHS:.+]] = memref.subview %[[TENSOR_LHS]][%[[IV0]], 0] [1, 3] [1, 1] |
| // CHECK-DAG: %[[RHS:.+]] = memref.subview %[[TENSOR_RHS]][0, %[[IV1]]] [3, 1] [1, 1] |
| // CHECK-DAG: %[[RESULT:.+]] = memref.subview %[[RETURN]][%[[IV0]], %[[IV1]]] [1, 1] [1, 1] |
| // CHECK: linalg.matmul |
| // CHECK-SAME: ins(%[[LHS]], %[[RHS]] |
| // CHECK-SAME: outs(%[[RESULT]] |
| |
| // ----- |
| |
| func @tile_from_tensor_load_inplace_and_copy() { |
| %c0 = constant 0 : index |
| %c2 = constant 2 : index |
| %c4 = constant 4 : index |
| %c1 = constant 1 : index |
| %c3 = constant 3 : index |
| %0 = hal.interface.binding.subspan @io::@TENSOR_LHS[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %1 = hal.interface.binding.subspan @io::@TENSOR_RHS[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %2 = hal.interface.binding.subspan @io::@TENSOR_INIT[%c0] : !flow.dispatch.tensor<readwrite:?x?xf32> |
| %3 = hal.interface.binding.subspan @io::@ret0[%c0] : !flow.dispatch.tensor<writeonly:?x?xf32> |
| %4 = hal.interface.workgroup.id[0] : index |
| %5 = hal.interface.workgroup.id[1] : index |
| scf.for %arg0 = %5 to %c2 step %c2 { |
| scf.for %arg1 = %4 to %c4 step %c4 { |
| %6 = flow.dispatch.tensor.load %0, offsets = [%arg0, %c0], sizes = [%c1, %c3], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<1x3xf32> |
| %7 = flow.dispatch.tensor.load %1, offsets = [%c0, %arg1], sizes = [%c3, %c1], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<3x1xf32> |
| %8 = flow.dispatch.tensor.load %2, offsets = [%arg0, %arg1], sizes = [%c1, %c1], strides = [%c1, %c1] : !flow.dispatch.tensor<readwrite:?x?xf32> -> tensor<1x1xf32> |
| %9 = linalg.matmul ins(%6, %7 : tensor<1x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<1x1xf32>) -> tensor<1x1xf32> |
| flow.dispatch.tensor.store %9, %2, offsets = [%arg0, %arg1], sizes = [%c1, %c1], strides = [%c1, %c1] : tensor<1x1xf32> -> !flow.dispatch.tensor<readwrite:?x?xf32> |
| flow.dispatch.tensor.store %9, %3, offsets = [%arg0, %arg1], sizes = [%c1, %c1], strides = [%c1, %c1] : tensor<1x1xf32> -> !flow.dispatch.tensor<writeonly:?x?xf32> |
| } |
| } |
| return |
| } |
| |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @TENSOR_LHS, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @TENSOR_RHS, set=0, binding=1, type="StorageBuffer", access="Read" |
| hal.interface.binding @TENSOR_INIT, set=0, binding=2, type="StorageBuffer", access="Read|Write" |
| hal.interface.binding @ret0, set=0, binding=3, type="StorageBuffer", access="Write|Discard" |
| } |
| // CHECK-LABEL: func @tile_from_tensor_load_inplace_and_copy() |
| // CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan @io::@TENSOR_LHS |
| // CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan @io::@TENSOR_RHS |
| // CHECK-DAG: %[[RETURN1:.+]] = hal.interface.binding.subspan @io::@TENSOR_INIT |
| // CHECK-DAG: %[[RETURN2:.+]] = hal.interface.binding.subspan @io::@ret0 |
| // CHECK: scf.for %[[IV0:.+]] = {{.+}} { |
| // CHECK: scf.for %[[IV1:.+]] = {{.+}} { |
| // CHECK-DAG: %[[LHS:.+]] = memref.subview %[[TENSOR_LHS]][%[[IV0]], 0] [1, 3] [1, 1] |
| // CHECK-DAG: %[[RHS:.+]] = memref.subview %[[TENSOR_RHS]][0, %[[IV1]]] [3, 1] [1, 1] |
| // CHECK-DAG: %[[RESULT1:.+]] = memref.subview %[[RETURN1]][%[[IV0]], %[[IV1]]] [1, 1] [1, 1] |
| // CHECK: linalg.matmul |
| // CHECK-SAME: ins(%[[LHS]], %[[RHS]] |
| // CHECK-SAME: outs(%[[RESULT1]] |
| // CHECK: %[[RESULT2:.+]] = memref.subview %[[RETURN2]][%[[IV0]], %[[IV1]]] [1, 1] [1, 1] |
| // CHECK: linalg.copy(%[[RESULT1]], %[[RESULT2]]) |
| |
| // ----- |
| |
| #map = affine_map<(d0, d1) -> (d0, d1)> |
| func @tile_from_pointwise_lhs() { |
| %c0 = constant 0 : index |
| %c2 = constant 2 : index |
| %c4 = constant 4 : index |
| %c1 = constant 1 : index |
| %c3 = constant 3 : index |
| %0 = hal.interface.binding.subspan @io::@TENSOR_LHS[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %1 = hal.interface.binding.subspan @io::@TENSOR_RHS[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %2 = hal.interface.binding.subspan @io::@TENSOR_INIT[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %3 = hal.interface.binding.subspan @io::@ret0[%c0] : !flow.dispatch.tensor<writeonly:?x?xf32> |
| %4 = hal.interface.workgroup.id[0] : index |
| %5 = hal.interface.workgroup.id[1] : index |
| scf.for %arg0 = %5 to %c2 step %c2 { |
| scf.for %arg1 = %4 to %c4 step %c4 { |
| %6 = flow.dispatch.tensor.load %0, offsets = [%arg0, %c0], sizes = [%c1, %c3], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<1x3xf32> |
| %7 = flow.dispatch.tensor.load %1, offsets = [%c0, %arg1], sizes = [%c3, %c1], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<3x1xf32> |
| %shape = linalg.init_tensor [1, 3] : tensor<1x3xf32> |
| %8 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} |
| ins(%6 : tensor<1x3xf32>) outs(%shape : tensor<1x3xf32>) { |
| ^bb0(%arg2: f32, %s: f32): // no predecessors |
| linalg.yield %arg2 : f32 |
| } -> tensor<1x3xf32> |
| %9 = flow.dispatch.tensor.load %2, offsets = [%arg0, %arg1], sizes = [%c1, %c1], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<1x1xf32> |
| %10 = linalg.matmul ins(%8, %7 : tensor<1x3xf32>, tensor<3x1xf32>) outs(%9 : tensor<1x1xf32>) -> tensor<1x1xf32> |
| flow.dispatch.tensor.store %10, %3, offsets = [%arg0, %arg1], sizes = [%c1, %c1], strides = [%c1, %c1] : tensor<1x1xf32> -> !flow.dispatch.tensor<writeonly:?x?xf32> |
| } |
| } |
| return |
| } |
| |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @TENSOR_LHS, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @TENSOR_RHS, set=0, binding=1, type="StorageBuffer", access="Read" |
| hal.interface.binding @TENSOR_INIT, set=0, binding=2, type="StorageBuffer", access="Read" |
| hal.interface.binding @ret0, set=0, binding=3, type="StorageBuffer", access="Write|Discard" |
| } |
| // CHECK-LABEL: func @tile_from_pointwise_lhs() |
| // CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan @io::@TENSOR_LHS |
| // CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan @io::@TENSOR_RHS |
| // CHECK-DAG: %[[TENSOR_INIT:.+]] = hal.interface.binding.subspan @io::@TENSOR_INIT |
| // CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan @io::@ret0 |
| // CHECK: scf.for %[[IV0:.+]] = {{.+}} { |
| // CHECK: scf.for %[[IV1:.+]] = {{.+}} { |
| // CHECK-DAG: %[[LHS:.+]] = memref.subview %[[TENSOR_LHS]][%[[IV0]], 0] [1, 3] [1, 1] |
| // CHECK-DAG: %[[RHS:.+]] = memref.subview %[[TENSOR_RHS]][0, %[[IV1]]] [3, 1] [1, 1] |
| // CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<1x3xf32> |
| // CHECK: linalg.generic |
| // CHECK-SAME: ins(%[[LHS]] : |
| // CHECK-SAME: outs(%[[ALLOC]] |
| // CHECK-DAG: %[[INIT:.+]] = memref.subview %[[TENSOR_INIT]][%[[IV0]], %[[IV1]]] [1, 1] [1, 1] |
| // CHECK-DAG: %[[RESULT:.+]] = memref.subview %[[RETURN]][%[[IV0]], %[[IV1]]] [1, 1] [1, 1] |
| // CHECK: linalg.copy(%[[INIT]], %[[RESULT]]) |
| // CHECK: linalg.matmul |
| // CHECK-SAME: ins(%[[ALLOC]], %[[RHS]] |
| // CHECK-SAME: outs(%[[RESULT]] |
| |
| // ----- |
| |
| #map = affine_map<(d0, d1) -> (d0, d1)> |
| func @tile_from_pointwise_lhs_inplace() { |
| %c0 = constant 0 : index |
| %c2 = constant 2 : index |
| %c4 = constant 4 : index |
| %c1 = constant 1 : index |
| %c3 = constant 3 : index |
| %0 = hal.interface.binding.subspan @io::@TENSOR_LHS[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %1 = hal.interface.binding.subspan @io::@TENSOR_RHS[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %2 = hal.interface.binding.subspan @io::@TENSOR_INIT[%c0] : !flow.dispatch.tensor<readwrite:?x?xf32> |
| %4 = hal.interface.workgroup.id[0] : index |
| %5 = hal.interface.workgroup.id[1] : index |
| scf.for %arg0 = %5 to %c2 step %c2 { |
| scf.for %arg1 = %4 to %c4 step %c4 { |
| %6 = flow.dispatch.tensor.load %0, offsets = [%arg0, %c0], sizes = [%c1, %c3], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<1x3xf32> |
| %7 = flow.dispatch.tensor.load %1, offsets = [%c0, %arg1], sizes = [%c3, %c1], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<3x1xf32> |
| %shape = linalg.init_tensor [1, 3] : tensor<1x3xf32> |
| %8 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} |
| ins(%6 : tensor<1x3xf32>) outs(%shape : tensor<1x3xf32>) { |
| ^bb0(%arg2: f32, %s: f32): // no predecessors |
| linalg.yield %arg2 : f32 |
| } -> tensor<1x3xf32> |
| %9 = flow.dispatch.tensor.load %2, offsets = [%arg0, %arg1], sizes = [%c1, %c1], strides = [%c1, %c1] : !flow.dispatch.tensor<readwrite:?x?xf32> -> tensor<1x1xf32> |
| %10 = linalg.matmul ins(%8, %7 : tensor<1x3xf32>, tensor<3x1xf32>) outs(%9 : tensor<1x1xf32>) -> tensor<1x1xf32> |
| flow.dispatch.tensor.store %10, %2, offsets = [%arg0, %arg1], sizes = [%c1, %c1], strides = [%c1, %c1] : tensor<1x1xf32> -> !flow.dispatch.tensor<readwrite:?x?xf32> |
| } |
| } |
| return |
| } |
| |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @TENSOR_LHS, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @TENSOR_RHS, set=0, binding=1, type="StorageBuffer", access="Read" |
| hal.interface.binding @TENSOR_INIT, set=0, binding=2, type="StorageBuffer", access="Read|Write" |
| } |
| // CHECK-LABEL: func @tile_from_pointwise_lhs_inplace() |
| // CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan @io::@TENSOR_LHS |
| // CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan @io::@TENSOR_RHS |
| // CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan @io::@TENSOR_INIT |
| // CHECK: scf.for %[[IV0:.+]] = {{.+}} { |
| // CHECK: scf.for %[[IV1:.+]] = {{.+}} { |
| // CHECK-DAG: %[[LHS:.+]] = memref.subview %[[TENSOR_LHS]][%[[IV0]], 0] [1, 3] [1, 1] |
| // CHECK-DAG: %[[RHS:.+]] = memref.subview %[[TENSOR_RHS]][0, %[[IV1]]] [3, 1] [1, 1] |
| // CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<1x3xf32> |
| // CHECK: linalg.generic |
| // CHECK-SAME: ins(%[[LHS]] : |
| // CHECK-SAME: outs(%[[ALLOC]] |
| // CHECK-DAG: %[[RESULT:.+]] = memref.subview %[[RETURN]][%[[IV0]], %[[IV1]]] [1, 1] [1, 1] |
| // CHECK: linalg.matmul |
| // CHECK-SAME: ins(%[[ALLOC]], %[[RHS]] |
| // CHECK-SAME: outs(%[[RESULT]] |
| |
| // ----- |
| |
| #map = affine_map<(d0, d1) -> (d0, d1)> |
| func @tile_from_pointwise_outs() { |
| %c0 = constant 0 : index |
| %c2 = constant 2 : index |
| %c4 = constant 4 : index |
| %c1 = constant 1 : index |
| %c3 = constant 3 : index |
| %0 = hal.interface.binding.subspan @io::@TENSOR_LHS[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %1 = hal.interface.binding.subspan @io::@TENSOR_RHS[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %2 = hal.interface.binding.subspan @io::@TENSOR_INIT[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %3 = hal.interface.binding.subspan @io::@ret0[%c0] : !flow.dispatch.tensor<writeonly:?x?xf32> |
| %4 = hal.interface.workgroup.id[0] : index |
| %5 = hal.interface.workgroup.id[1] : index |
| scf.for %arg0 = %5 to %c2 step %c2 { |
| scf.for %arg1 = %4 to %c4 step %c4 { |
| %6 = flow.dispatch.tensor.load %0, offsets = [%arg0, %c0], sizes = [%c1, %c3], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<1x3xf32> |
| %7 = flow.dispatch.tensor.load %1, offsets = [%c0, %arg1], sizes = [%c3, %c1], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<3x1xf32> |
| %8 = flow.dispatch.tensor.load %2, offsets = [%arg0, %arg1], sizes = [%c1, %c1], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<1x1xf32> |
| %shape = linalg.init_tensor [1, 1] : tensor<1x1xf32> |
| %9 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} |
| ins(%8 : tensor<1x1xf32>) outs(%shape : tensor<1x1xf32>) { |
| ^bb0(%arg2: f32, %s: f32): // no predecessors |
| linalg.yield %arg2 : f32 |
| } -> tensor<1x1xf32> |
| %10 = linalg.matmul ins(%6, %7 : tensor<1x3xf32>, tensor<3x1xf32>) outs(%9 : tensor<1x1xf32>) -> tensor<1x1xf32> |
| flow.dispatch.tensor.store %10, %3, offsets = [%arg0, %arg1], sizes = [%c1, %c1], strides = [%c1, %c1] : tensor<1x1xf32> -> !flow.dispatch.tensor<writeonly:?x?xf32> |
| } |
| } |
| return |
| } |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @TENSOR_LHS, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @TENSOR_RHS, set=0, binding=1, type="StorageBuffer", access="Read" |
| hal.interface.binding @TENSOR_INIT, set=0, binding=2, type="StorageBuffer", access="Read" |
| hal.interface.binding @ret0, set=0, binding=3, type="StorageBuffer", access="Write|Discard" |
| } |
| // CHECK-LABEL: func @tile_from_pointwise_outs() |
| // CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan @io::@TENSOR_LHS |
| // CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan @io::@TENSOR_RHS |
| // CHECK-DAG: %[[TENSOR_INIT:.+]] = hal.interface.binding.subspan @io::@TENSOR_INIT |
| // CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan @io::@ret0 |
| // CHECK: scf.for %[[IV0:.+]] = {{.+}} { |
| // CHECK: scf.for %[[IV1:.+]] = {{.+}} { |
| // CHECK-DAG: %[[RESULT:.+]] = memref.subview %[[RETURN]][%[[IV0]], %[[IV1]]] [1, 1] [1, 1] |
| // CHECK-DAG: %[[LHS:.+]] = memref.subview %[[TENSOR_LHS]][%[[IV0]], 0] [1, 3] [1, 1] |
| // CHECK-DAG: %[[RHS:.+]] = memref.subview %[[TENSOR_RHS]][0, %[[IV1]]] [3, 1] [1, 1] |
| // CHECK-DAG: %[[INIT:.+]] = memref.subview %[[TENSOR_INIT]][%[[IV0]], %[[IV1]]] [1, 1] [1, 1] |
| // CHECK: linalg.generic |
| // CHECK-SAME: ins(%[[INIT]] : |
| // CHECK-SAME: outs(%[[RESULT]] |
| // CHECK: linalg.matmul |
| // CHECK-SAME: ins(%[[LHS]], %[[RHS]] |
| // CHECK-SAME: outs(%[[RESULT]] |
| |
| // ----- |
| |
| #map = affine_map<(d0, d1) -> (d0, d1)> |
| func @tile_from_pointwise_outs_inplace() { |
| %c0 = constant 0 : index |
| %c2 = constant 2 : index |
| %c4 = constant 4 : index |
| %c1 = constant 1 : index |
| %c3 = constant 3 : index |
| %0 = hal.interface.binding.subspan @io::@TENSOR_LHS[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %1 = hal.interface.binding.subspan @io::@TENSOR_RHS[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %2 = hal.interface.binding.subspan @io::@TENSOR_INIT[%c0] : !flow.dispatch.tensor<readwrite:?x?xf32> |
| %4 = hal.interface.workgroup.id[0] : index |
| %5 = hal.interface.workgroup.id[1] : index |
| scf.for %arg0 = %5 to %c2 step %c2 { |
| scf.for %arg1 = %4 to %c4 step %c4 { |
| %6 = flow.dispatch.tensor.load %0, offsets = [%arg0, %c0], sizes = [%c1, %c3], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<1x3xf32> |
| %7 = flow.dispatch.tensor.load %1, offsets = [%c0, %arg1], sizes = [%c3, %c1], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<3x1xf32> |
| %8 = flow.dispatch.tensor.load %2, offsets = [%arg0, %arg1], sizes = [%c1, %c1], strides = [%c1, %c1] : !flow.dispatch.tensor<readwrite:?x?xf32> -> tensor<1x1xf32> |
| %shape = linalg.init_tensor [1, 1] : tensor<1x1xf32> |
| %9 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} |
| ins(%8 : tensor<1x1xf32>) outs(%shape : tensor<1x1xf32>) { |
| ^bb0(%arg2: f32, %s: f32): // no predecessors |
| linalg.yield %arg2 : f32 |
| } -> tensor<1x1xf32> |
| %10 = linalg.matmul ins(%6, %7 : tensor<1x3xf32>, tensor<3x1xf32>) outs(%9 : tensor<1x1xf32>) -> tensor<1x1xf32> |
| flow.dispatch.tensor.store %10, %2, offsets = [%arg0, %arg1], sizes = [%c1, %c1], strides = [%c1, %c1] : tensor<1x1xf32> -> !flow.dispatch.tensor<readwrite:?x?xf32> |
| } |
| } |
| return |
| } |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @TENSOR_LHS, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @TENSOR_RHS, set=0, binding=1, type="StorageBuffer", access="Read" |
| hal.interface.binding @TENSOR_INIT, set=0, binding=2, type="StorageBuffer", access="Read|Write" |
| } |
| // CHECK-LABEL: func @tile_from_pointwise_outs_inplace() |
| // CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan @io::@TENSOR_LHS |
| // CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan @io::@TENSOR_RHS |
| // CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan @io::@TENSOR_INIT |
| // CHECK: scf.for %[[IV0:.+]] = {{.+}} { |
| // CHECK: scf.for %[[IV1:.+]] = {{.+}} { |
| // CHECK-DAG: %[[RESULT:.+]] = memref.subview %[[RETURN]][%[[IV0]], %[[IV1]]] [1, 1] [1, 1] |
| // CHECK-DAG: %[[LHS:.+]] = memref.subview %[[TENSOR_LHS]][%[[IV0]], 0] [1, 3] [1, 1] |
| // CHECK-DAG: %[[RHS:.+]] = memref.subview %[[TENSOR_RHS]][0, %[[IV1]]] [3, 1] [1, 1] |
| // CHECK: linalg.generic |
| // CHECK-SAME: outs(%[[RESULT]] |
| // CHECK: linalg.matmul |
| // CHECK-SAME: ins(%[[LHS]], %[[RHS]] |
| // CHECK-SAME: outs(%[[RESULT]] |
| |
| // ----- |
| |
| #map = affine_map<(d0, d1) -> (d0, d1)> |
| func @tile_from_matmul_outs() { |
| %c0 = constant 0 : index |
| %c2 = constant 2 : index |
| %c4 = constant 4 : index |
| %c1 = constant 1 : index |
| %c3 = constant 3 : index |
| %0 = hal.interface.binding.subspan @io::@TENSOR_LHS[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %1 = hal.interface.binding.subspan @io::@TENSOR_RHS[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %2 = hal.interface.binding.subspan @io::@TENSOR_INIT[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %3 = hal.interface.binding.subspan @io::@ret0[%c0] : !flow.dispatch.tensor<writeonly:?x?xf32> |
| %4 = hal.interface.workgroup.id[0] : index |
| %5 = hal.interface.workgroup.id[1] : index |
| scf.for %arg0 = %5 to %c2 step %c2 { |
| scf.for %arg1 = %4 to %c4 step %c4 { |
| %6 = flow.dispatch.tensor.load %0, offsets = [%arg0, %c0], sizes = [%c1, %c3], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<1x3xf32> |
| %7 = flow.dispatch.tensor.load %1, offsets = [%c0, %arg1], sizes = [%c3, %c1], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<3x1xf32> |
| %8 = flow.dispatch.tensor.load %2, offsets = [%arg0, %arg1], sizes = [%c1, %c1], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<1x1xf32> |
| %shape = linalg.init_tensor [1, 1] : tensor<1x1xf32> |
| %9 = linalg.matmul ins(%6, %7 : tensor<1x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<1x1xf32>) -> tensor<1x1xf32> |
| %10 = linalg.matmul ins(%6, %7 : tensor<1x3xf32>, tensor<3x1xf32>) outs(%9 : tensor<1x1xf32>) -> tensor<1x1xf32> |
| flow.dispatch.tensor.store %10, %3, offsets = [%arg0, %arg1], sizes = [%c1, %c1], strides = [%c1, %c1] : tensor<1x1xf32> -> !flow.dispatch.tensor<writeonly:?x?xf32> |
| } |
| } |
| return |
| } |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @TENSOR_LHS, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @TENSOR_RHS, set=0, binding=1, type="StorageBuffer", access="Read" |
| hal.interface.binding @TENSOR_INIT, set=0, binding=2, type="StorageBuffer", access="Read" |
| hal.interface.binding @ret0, set=0, binding=3, type="StorageBuffer", access="Write|Discard" |
| } |
| // CHECK-LABEL: func @tile_from_matmul_outs() |
| // CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan @io::@TENSOR_LHS |
| // CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan @io::@TENSOR_RHS |
| // CHECK-DAG: %[[TENSOR_INIT:.+]] = hal.interface.binding.subspan @io::@TENSOR_INIT |
| // CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan @io::@ret0 |
| // CHECK: scf.for %[[IV0:.+]] = {{.+}} { |
| // CHECK: scf.for %[[IV1:.+]] = {{.+}} { |
| // CHECK-DAG: %[[LHS:.+]] = memref.subview %[[TENSOR_LHS]][%[[IV0]], 0] [1, 3] [1, 1] |
| // CHECK-DAG: %[[RHS:.+]] = memref.subview %[[TENSOR_RHS]][0, %[[IV1]]] [3, 1] [1, 1] |
| // CHECK-DAG: %[[INIT:.+]] = memref.subview %[[TENSOR_INIT]][%[[IV0]], %[[IV1]]] [1, 1] [1, 1] |
| // CHECK-DAG: %[[RESULT:.+]] = memref.subview %[[RETURN]][%[[IV0]], %[[IV1]]] [1, 1] [1, 1] |
| // CHECK: linalg.copy(%[[INIT]], %[[RESULT]]) |
| // CHECK: linalg.matmul |
| // CHECK-SAME: outs(%[[RESULT]] |
| // CHECK: linalg.matmul |
| // CHECK-SAME: outs(%[[RESULT]] |
| |
| // ----- |
| |
| #map = affine_map<(d0, d1) -> (d0, d1)> |
| func @tile_from_matmul_outs_inplace() { |
| %c0 = constant 0 : index |
| %c2 = constant 2 : index |
| %c4 = constant 4 : index |
| %c1 = constant 1 : index |
| %c3 = constant 3 : index |
| %0 = hal.interface.binding.subspan @io::@TENSOR_LHS[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %1 = hal.interface.binding.subspan @io::@TENSOR_RHS[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %2 = hal.interface.binding.subspan @io::@TENSOR_INIT[%c0] : !flow.dispatch.tensor<readwrite:?x?xf32> |
| %4 = hal.interface.workgroup.id[0] : index |
| %5 = hal.interface.workgroup.id[1] : index |
| scf.for %arg0 = %5 to %c2 step %c2 { |
| scf.for %arg1 = %4 to %c4 step %c4 { |
| %6 = flow.dispatch.tensor.load %0, offsets = [%arg0, %c0], sizes = [%c1, %c3], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<1x3xf32> |
| %7 = flow.dispatch.tensor.load %1, offsets = [%c0, %arg1], sizes = [%c3, %c1], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<3x1xf32> |
| %8 = flow.dispatch.tensor.load %2, offsets = [%arg0, %arg1], sizes = [%c1, %c1], strides = [%c1, %c1] : !flow.dispatch.tensor<readwrite:?x?xf32> -> tensor<1x1xf32> |
| %9 = linalg.matmul ins(%6, %7 : tensor<1x3xf32>, tensor<3x1xf32>) outs(%8 : tensor<1x1xf32>) -> tensor<1x1xf32> |
| %10 = linalg.matmul ins(%6, %7 : tensor<1x3xf32>, tensor<3x1xf32>) outs(%9 : tensor<1x1xf32>) -> tensor<1x1xf32> |
| flow.dispatch.tensor.store %10, %2, offsets = [%arg0, %arg1], sizes = [%c1, %c1], strides = [%c1, %c1] : tensor<1x1xf32> -> !flow.dispatch.tensor<readwrite:?x?xf32> |
| } |
| } |
| return |
| } |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @TENSOR_LHS, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @TENSOR_RHS, set=0, binding=1, type="StorageBuffer", access="Read" |
| hal.interface.binding @TENSOR_INIT, set=0, binding=2, type="StorageBuffer", access="Read|Write" |
| } |
| // CHECK-LABEL: func @tile_from_matmul_outs_inplace() |
| // CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan @io::@TENSOR_LHS |
| // CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan @io::@TENSOR_RHS |
| // CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan @io::@TENSOR_INIT |
| // CHECK: scf.for %[[IV0:.+]] = {{.+}} { |
| // CHECK: scf.for %[[IV1:.+]] = {{.+}} { |
| // CHECK-DAG: %[[RESULT:.+]] = memref.subview %[[RETURN]][%[[IV0]], %[[IV1]]] [1, 1] [1, 1] |
| // CHECK-DAG: %[[LHS:.+]] = memref.subview %[[TENSOR_LHS]][%[[IV0]], 0] [1, 3] [1, 1] |
| // CHECK-DAG: %[[RHS:.+]] = memref.subview %[[TENSOR_RHS]][0, %[[IV1]]] [3, 1] [1, 1] |
| // CHECK: linalg.matmul |
| // CHECK-SAME: outs(%[[RESULT]] |
| // CHECK: linalg.matmul |
| // CHECK-SAME: outs(%[[RESULT]] |
| |
| |
| // ----- |
| |
| func @bufferize_dynamic() { |
| %c0 = constant 0 : index |
| %c1 = constant 1 : index |
| %0 = hal.interface.binding.subspan @io::@arg0[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %1 = hal.interface.binding.subspan @io::@arg1[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %2 = hal.interface.binding.subspan @io::@arg2[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %3 = hal.interface.binding.subspan @io::@ret0[%c0] : !flow.dispatch.tensor<writeonly:?x?xf32> |
| %4 = hal.interface.load.constant offset = 0 : index |
| %5 = hal.interface.load.constant offset = 1 : index |
| %6 = hal.interface.load.constant offset = 2 : index |
| %7 = hal.interface.load.constant offset = 3 : index |
| %8 = hal.interface.load.constant offset = 4 : index |
| %9 = hal.interface.load.constant offset = 5 : index |
| %10 = hal.interface.load.constant offset = 6 : index |
| %11 = hal.interface.load.constant offset = 7 : index |
| %12 = shapex.make_ranked_shape %4, %5 : (index, index) -> !shapex.ranked_shape<[?,?]> |
| %13 = flow.dispatch.tie_shape %0, %12 : (!flow.dispatch.tensor<readonly:?x?xf32>, !shapex.ranked_shape<[?,?]>) -> !flow.dispatch.tensor<readonly:?x?xf32> |
| %14 = shapex.make_ranked_shape %6, %7 : (index, index) -> !shapex.ranked_shape<[?,?]> |
| %15 = flow.dispatch.tie_shape %1, %14 : (!flow.dispatch.tensor<readonly:?x?xf32>, !shapex.ranked_shape<[?,?]>) -> !flow.dispatch.tensor<readonly:?x?xf32> |
| %16 = shapex.make_ranked_shape %8, %9 : (index, index) -> !shapex.ranked_shape<[?,?]> |
| %17 = flow.dispatch.tie_shape %2, %16 : (!flow.dispatch.tensor<readonly:?x?xf32>, !shapex.ranked_shape<[?,?]>) -> !flow.dispatch.tensor<readonly:?x?xf32> |
| %18 = shapex.make_ranked_shape %10, %11 : (index, index) -> !shapex.ranked_shape<[?,?]> |
| %19 = flow.dispatch.tie_shape %3, %18 : (!flow.dispatch.tensor<writeonly:?x?xf32>, !shapex.ranked_shape<[?,?]>) -> !flow.dispatch.tensor<writeonly:?x?xf32> |
| %workgroup_size_x = hal.interface.workgroup.size[0] : index |
| %workgroup_size_y = hal.interface.workgroup.size[1] : index |
| %workgroup_id_x = hal.interface.workgroup.id[0] : index |
| %workgroup_count_x = hal.interface.workgroup.count[0] : index |
| %workgroup_id_y = hal.interface.workgroup.id[1] : index |
| %workgroup_count_y = hal.interface.workgroup.count[1] : index |
| %20 = muli %workgroup_size_y, %workgroup_id_y : index |
| %21 = muli %workgroup_size_y, %workgroup_count_y : index |
| scf.for %arg0 = %20 to %4 step %21 { |
| %22 = muli %workgroup_size_x, %workgroup_id_x : index |
| %23 = muli %workgroup_size_x, %workgroup_count_x : index |
| scf.for %arg1 = %22 to %7 step %23 { |
| %24 = affine.min affine_map<(d0)[s0, s1] -> (s1, -d0 + s0)>(%arg0)[%4, %workgroup_size_y] |
| %25 = flow.dispatch.tensor.load %13, offsets = [%arg0, %c0], sizes = [%24, %5], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<?x?xf32> |
| %26 = affine.min affine_map<(d0)[s0, s1] -> (s1, -d0 + s0)>(%arg1)[%7, %workgroup_size_x] |
| %27 = flow.dispatch.tensor.load %15, offsets = [%c0, %arg1], sizes = [%6, %26], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<?x?xf32> |
| %28 = affine.min affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>(%arg0)[%workgroup_size_y, %8] |
| %29 = affine.min affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>(%arg1)[%workgroup_size_x, %9] |
| %30 = flow.dispatch.tensor.load %17, offsets = [%arg0, %arg1], sizes = [%28, %29], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<?x?xf32> |
| %31 = linalg.matmul {__internal_linalg_transform__ = "workgroup"} ins(%25, %27 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%30 : tensor<?x?xf32>) -> tensor<?x?xf32> |
| flow.dispatch.tensor.store %31, %19, offsets = [%arg0, %arg1], sizes = [%28, %29], strides = [%c1, %c1] : tensor<?x?xf32> -> !flow.dispatch.tensor<writeonly:?x?xf32> |
| } |
| } |
| return |
| } |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" |
| hal.interface.binding @arg2, set=0, binding=2, type="StorageBuffer", access="Read" |
| hal.interface.binding @ret0, set=0, binding=3, type="StorageBuffer", access="Write|Discard" |
| } |
| // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0, s1] -> (s1, -d0 + s0)> |
| // CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)> |
| // CHECK: func @bufferize_dynamic() |
| // CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan @io::@arg0 |
| // CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan @io::@arg1 |
| // CHECK-DAG: %[[INIT:.+]] = hal.interface.binding.subspan @io::@arg2 |
| // CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan @io::@ret0 |
| // CHECK-DAG: %[[DIM0:.+]] = hal.interface.load.constant offset = 0 : index |
| // CHECK-DAG: %[[DIM1:.+]] = hal.interface.load.constant offset = 1 : index |
| // CHECK-DAG: %[[DIM2:.+]] = hal.interface.load.constant offset = 2 : index |
| // CHECK-DAG: %[[DIM3:.+]] = hal.interface.load.constant offset = 3 : index |
| // CHECK-DAG: %[[DIM4:.+]] = hal.interface.load.constant offset = 4 : index |
| // CHECK-DAG: %[[DIM5:.+]] = hal.interface.load.constant offset = 5 : index |
| // CHECK-DAG: %[[DIM6:.+]] = hal.interface.load.constant offset = 6 : index |
| // CHECK-DAG: %[[DIM7:.+]] = hal.interface.load.constant offset = 7 : index |
| // CHECK: %[[SHAPE_LHS:.+]] = shapex.make_ranked_shape %[[DIM0]], %[[DIM1]] |
| // CHECK: %[[LHS_SHAPED:.+]] = shapex.tie_shape %[[LHS]], %[[SHAPE_LHS]] |
| // CHECK: %[[SHAPE_RHS:.+]] = shapex.make_ranked_shape %[[DIM2]], %[[DIM3]] |
| // CHECK: %[[RHS_SHAPED:.+]] = shapex.tie_shape %[[RHS]], %[[SHAPE_RHS]] |
| // CHECK: %[[SHAPE_INIT:.+]] = shapex.make_ranked_shape %[[DIM4]], %[[DIM5]] |
| // CHECK: %[[INIT_SHAPED:.+]] = shapex.tie_shape %[[INIT]], %[[SHAPE_INIT]] |
| // CHECK: %[[SHAPE_RESULT:.+]] = shapex.make_ranked_shape %[[DIM6]], %[[DIM7]] |
| // CHECK: %[[RESULT_SHAPED:.+]] = shapex.tie_shape %[[RESULT]], %[[SHAPE_RESULT]] |
| // CHECK-DAG: %[[WGSIZE_X:.+]] = hal.interface.workgroup.size[0] |
| // CHECK-DAG: %[[WGSIZE_Y:.+]] = hal.interface.workgroup.size[1] |
| // CHECK: scf.for %[[IV0:.+]] = {{.+}} { |
| // CHECK: scf.for %[[IV1:.+]] = {{.+}} { |
| // CHECK: %[[TILE_M:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[DIM0]], %[[WGSIZE_Y]]] |
| // CHECK: %[[LHS_TILE:.+]] = memref.subview %[[LHS_SHAPED]][%[[IV0]], 0] [%[[TILE_M]], %[[DIM1]]] |
| // CHECK: %[[TILE_N:.+]] = affine.min #[[MAP0]](%[[IV1]])[%[[DIM3]], %[[WGSIZE_X]]] |
| // CHECK-DAG: %[[RHS_TILE:.+]] = memref.subview %[[RHS_SHAPED]][0, %[[IV1]]] [%[[DIM2]], %[[TILE_N]]] |
| // CHECK: %[[TILE_M_2:.+]] = affine.min #[[MAP2]](%[[IV0]])[%[[WGSIZE_Y]], %[[DIM4]]] |
| // CHECK: %[[TILE_N_2:.+]] = affine.min #[[MAP2]](%[[IV1]])[%[[WGSIZE_X]], %[[DIM5]]] |
| // CHECK-DAG: %[[INIT_TILE:.+]] = memref.subview %[[INIT_SHAPED]][%[[IV0]], %[[IV1]]] [%[[TILE_M_2]], %[[TILE_N_2]]] |
| // CHECK-DAG: %[[RESULT_TILE:.+]] = memref.subview %[[RESULT_SHAPED]][%[[IV0]], %[[IV1]]] [%[[TILE_M_2]], %[[TILE_N_2]]] |
| // CHECK: linalg.copy(%[[INIT_TILE]], %[[RESULT_TILE]]) |
| // CHECK: linalg.matmul |
| // CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] |
| // CHECK-SAME: outs(%[[RESULT_TILE]] |
| |
| // ----- |
| |
| func @bufferize_dynamic_inplace() { |
| %c0 = constant 0 : index |
| %c1 = constant 1 : index |
| %0 = hal.interface.binding.subspan @io::@arg0[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %1 = hal.interface.binding.subspan @io::@arg1[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %2 = hal.interface.binding.subspan @io::@arg2[%c0] : !flow.dispatch.tensor<readwrite:?x?xf32> |
| %4 = hal.interface.load.constant offset = 0 : index |
| %5 = hal.interface.load.constant offset = 1 : index |
| %6 = hal.interface.load.constant offset = 2 : index |
| %7 = hal.interface.load.constant offset = 3 : index |
| %8 = hal.interface.load.constant offset = 4 : index |
| %9 = hal.interface.load.constant offset = 5 : index |
| %12 = shapex.make_ranked_shape %4, %5 : (index, index) -> !shapex.ranked_shape<[?,?]> |
| %13 = flow.dispatch.tie_shape %0, %12 : (!flow.dispatch.tensor<readonly:?x?xf32>, !shapex.ranked_shape<[?,?]>) -> !flow.dispatch.tensor<readonly:?x?xf32> |
| %14 = shapex.make_ranked_shape %6, %7 : (index, index) -> !shapex.ranked_shape<[?,?]> |
| %15 = flow.dispatch.tie_shape %1, %14 : (!flow.dispatch.tensor<readonly:?x?xf32>, !shapex.ranked_shape<[?,?]>) -> !flow.dispatch.tensor<readonly:?x?xf32> |
| %16 = shapex.make_ranked_shape %8, %9 : (index, index) -> !shapex.ranked_shape<[?,?]> |
| %17 = flow.dispatch.tie_shape %2, %16 : (!flow.dispatch.tensor<readwrite:?x?xf32>, !shapex.ranked_shape<[?,?]>) -> !flow.dispatch.tensor<readwrite:?x?xf32> |
| %workgroup_size_x = hal.interface.workgroup.size[0] : index |
| %workgroup_size_y = hal.interface.workgroup.size[1] : index |
| %workgroup_id_x = hal.interface.workgroup.id[0] : index |
| %workgroup_count_x = hal.interface.workgroup.count[0] : index |
| %workgroup_id_y = hal.interface.workgroup.id[1] : index |
| %workgroup_count_y = hal.interface.workgroup.count[1] : index |
| %20 = muli %workgroup_size_y, %workgroup_id_y : index |
| %21 = muli %workgroup_size_y, %workgroup_count_y : index |
| scf.for %arg0 = %20 to %4 step %21 { |
| %22 = muli %workgroup_size_x, %workgroup_id_x : index |
| %23 = muli %workgroup_size_x, %workgroup_count_x : index |
| scf.for %arg1 = %22 to %7 step %23 { |
| %24 = affine.min affine_map<(d0)[s0, s1] -> (s1, -d0 + s0)>(%arg0)[%4, %workgroup_size_y] |
| %25 = flow.dispatch.tensor.load %13, offsets = [%arg0, %c0], sizes = [%24, %5], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<?x?xf32> |
| %26 = affine.min affine_map<(d0)[s0, s1] -> (s1, -d0 + s0)>(%arg1)[%7, %workgroup_size_x] |
| %27 = flow.dispatch.tensor.load %15, offsets = [%c0, %arg1], sizes = [%6, %26], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<?x?xf32> |
| %28 = affine.min affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>(%arg0)[%workgroup_size_y, %8] |
| %29 = affine.min affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>(%arg1)[%workgroup_size_x, %9] |
| %30 = flow.dispatch.tensor.load %17, offsets = [%arg0, %arg1], sizes = [%28, %29], strides = [%c1, %c1] : !flow.dispatch.tensor<readwrite:?x?xf32> -> tensor<?x?xf32> |
| %31 = linalg.matmul {__internal_linalg_transform__ = "workgroup"} ins(%25, %27 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%30 : tensor<?x?xf32>) -> tensor<?x?xf32> |
| flow.dispatch.tensor.store %31, %17, offsets = [%arg0, %arg1], sizes = [%28, %29], strides = [%c1, %c1] : tensor<?x?xf32> -> !flow.dispatch.tensor<readwrite:?x?xf32> |
| } |
| } |
| return |
| } |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" |
| hal.interface.binding @arg2, set=0, binding=2, type="StorageBuffer", access="Read|Write" |
| } |
| // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0, s1] -> (s1, -d0 + s0)> |
| // CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)> |
| // CHECK: func @bufferize_dynamic_inplace() |
| // CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan @io::@arg0 |
| // CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan @io::@arg1 |
| // CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan @io::@arg2 |
| // CHECK-DAG: %[[DIM0:.+]] = hal.interface.load.constant offset = 0 : index |
| // CHECK-DAG: %[[DIM1:.+]] = hal.interface.load.constant offset = 1 : index |
| // CHECK-DAG: %[[DIM2:.+]] = hal.interface.load.constant offset = 2 : index |
| // CHECK-DAG: %[[DIM3:.+]] = hal.interface.load.constant offset = 3 : index |
| // CHECK-DAG: %[[DIM4:.+]] = hal.interface.load.constant offset = 4 : index |
| // CHECK-DAG: %[[DIM5:.+]] = hal.interface.load.constant offset = 5 : index |
| // CHECK: %[[SHAPE_LHS:.+]] = shapex.make_ranked_shape %[[DIM0]], %[[DIM1]] |
| // CHECK: %[[LHS_SHAPED:.+]] = shapex.tie_shape %[[LHS]], %[[SHAPE_LHS]] |
| // CHECK: %[[SHAPE_RHS:.+]] = shapex.make_ranked_shape %[[DIM2]], %[[DIM3]] |
| // CHECK: %[[RHS_SHAPED:.+]] = shapex.tie_shape %[[RHS]], %[[SHAPE_RHS]] |
| // CHECK: %[[SHAPE_RESULT:.+]] = shapex.make_ranked_shape %[[DIM4]], %[[DIM5]] |
| // CHECK: %[[RESULT_SHAPED:.+]] = shapex.tie_shape %[[RESULT]], %[[SHAPE_RESULT]] |
| // CHECK-DAG: %[[WGSIZE_X:.+]] = hal.interface.workgroup.size[0] |
| // CHECK-DAG: %[[WGSIZE_Y:.+]] = hal.interface.workgroup.size[1] |
| // CHECK: scf.for %[[IV0:.+]] = {{.+}} { |
| // CHECK: scf.for %[[IV1:.+]] = {{.+}} { |
| // CHECK: %[[TILE_M:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[DIM0]], %[[WGSIZE_Y]]] |
| // CHECK: %[[LHS_TILE:.+]] = memref.subview %[[LHS_SHAPED]][%[[IV0]], 0] [%[[TILE_M]], %[[DIM1]]] |
| // CHECK: %[[TILE_N:.+]] = affine.min #[[MAP0]](%[[IV1]])[%[[DIM3]], %[[WGSIZE_X]]] |
| // CHECK-DAG: %[[RHS_TILE:.+]] = memref.subview %[[RHS_SHAPED]][0, %[[IV1]]] [%[[DIM2]], %[[TILE_N]]] |
| // CHECK: %[[TILE_M_2:.+]] = affine.min #[[MAP2]](%[[IV0]])[%[[WGSIZE_Y]], %[[DIM4]]] |
| // CHECK: %[[TILE_N_2:.+]] = affine.min #[[MAP2]](%[[IV1]])[%[[WGSIZE_X]], %[[DIM5]]] |
| // CHECK-DAG: %[[RESULT_TILE:.+]] = memref.subview %[[RESULT_SHAPED]][%[[IV0]], %[[IV1]]] [%[[TILE_M_2]], %[[TILE_N_2]]] |
| // CHECK: linalg.matmul |
| // CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] |
| // CHECK-SAME: outs(%[[RESULT_TILE]] |
| |
| // ----- |
| |
| func @reshape_simple() { |
| %c0 = constant 0 : index |
| %c1 = constant 1 : index |
| %c3 = constant 3 : index |
| %c4 = constant 4 : index |
| %c12 = constant 12 : index |
| %0 = hal.interface.binding.subspan @io::@arg0[%c0] : !flow.dispatch.tensor<readonly:12xi32> |
| %1 = hal.interface.binding.subspan @io::@ret0[%c0] : !flow.dispatch.tensor<writeonly:3x4xi32> |
| %2 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:12xi32> -> tensor<12xi32> |
| %3 = linalg.tensor_reshape %2 [affine_map<(d0, d1) -> (d0, d1)>] : tensor<12xi32> into tensor<3x4xi32> |
| flow.dispatch.tensor.store %3, %1, offsets = [], sizes = [], strides = [] : tensor<3x4xi32> -> !flow.dispatch.tensor<writeonly:3x4xi32> |
| return |
| } |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard" |
| } |
| // CHECK: #[[MAP:.+]] = affine_map<(d0, d1) -> (d0, d1)> |
| // CHECK: func @reshape_simple() |
| // CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan @io::@arg0 |
| // CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan @io::@ret0 |
| // CHECK: %[[RESHAPE:.+]] = linalg.reshape %[[ARG0]] [#[[MAP]]] |
| // CHECK: linalg.copy(%[[RESHAPE]], %[[RET0]]) |
| |
| // ----- |
| |
| func @reshape_fused_source() { |
| %c0 = constant 0 : index |
| %c1 = constant 1 : index |
| %c3 = constant 3 : index |
| %c4 = constant 4 : index |
| %c12 = constant 12 : index |
| %0 = hal.interface.binding.subspan @io::@arg0[%c0] : !flow.dispatch.tensor<readonly:12xi32> |
| %1 = hal.interface.binding.subspan @io::@ret0[%c0] : !flow.dispatch.tensor<writeonly:3x4xi32> |
| %2 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:12xi32> -> tensor<12xi32> |
| %3 = linalg.tensor_reshape %2 [affine_map<(d0, d1) -> (d0, d1)>] : tensor<12xi32> into tensor<3x4xi32> |
| %4 = linalg.init_tensor [3, 4] : tensor<3x4xi32> |
| %5 = linalg.generic { |
| indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], |
| iterator_types = ["parallel", "parallel"]} |
| ins(%3 : tensor<3x4xi32>) outs(%4 : tensor<3x4xi32>) { |
| ^bb0(%arg0 : i32, %arg1 : i32): |
| %6 = addi %arg0, %arg0 : i32 |
| linalg.yield %6 : i32 |
| } -> tensor<3x4xi32> |
| flow.dispatch.tensor.store %5, %1, offsets = [], sizes = [], strides = [] : tensor<3x4xi32> -> !flow.dispatch.tensor<writeonly:3x4xi32> |
| return |
| } |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard" |
| } |
| // CHECK-DAG: #[[MAP:.+]] = affine_map<(d0, d1) -> (d0, d1)> |
| // CHECK: func @reshape_fused_source() |
| // CHECK: %[[C0:.+]] = constant 0 |
| // CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan @io::@arg0[%[[C0]]] : memref<12xi32> |
| // CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan @io::@ret0[%[[C0]]] : memref<3x4xi32> |
| // CHECK: %[[RESHAPE:.+]] = linalg.reshape %[[ARG0]] [#[[MAP]]] |
| // CHECK: linalg.generic |
| // CHECK-SAME: ins(%[[RESHAPE]] : memref<3x4xi32>) |
| // CHECK-SAME: outs(%[[RET0]] : memref<3x4xi32>) |
| |
| // ----- |
| |
| func @reshape_fused_source_and_copyout() { |
| %c0 = constant 0 : index |
| %c1 = constant 1 : index |
| %c3 = constant 3 : index |
| %c4 = constant 4 : index |
| %c12 = constant 12 : index |
| %0 = hal.interface.binding.subspan @io::@arg0[%c0] : !flow.dispatch.tensor<readonly:12xi32> |
| %1 = hal.interface.binding.subspan @io::@ret0[%c0] : !flow.dispatch.tensor<writeonly:3x4xi32> |
| %2 = hal.interface.binding.subspan @io::@ret1[%c0] : !flow.dispatch.tensor<writeonly:3x4xi32> |
| %3 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:12xi32> -> tensor<12xi32> |
| %4 = linalg.tensor_reshape %3 [affine_map<(d0, d1) -> (d0, d1)>] : tensor<12xi32> into tensor<3x4xi32> |
| %5 = linalg.init_tensor [3, 4] : tensor<3x4xi32> |
| %6 = linalg.generic { |
| indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], |
| iterator_types = ["parallel", "parallel"]} |
| ins(%4 : tensor<3x4xi32>) outs(%5 : tensor<3x4xi32>) { |
| ^bb0(%arg0 : i32, %arg1 : i32): |
| %7 = addi %arg0, %arg0 : i32 |
| linalg.yield %7 : i32 |
| } -> tensor<3x4xi32> |
| flow.dispatch.tensor.store %6, %1, offsets = [], sizes = [], strides = [] : tensor<3x4xi32> -> !flow.dispatch.tensor<writeonly:3x4xi32> |
| flow.dispatch.tensor.store %4, %2, offsets = [], sizes = [], strides = [] : tensor<3x4xi32> -> !flow.dispatch.tensor<writeonly:3x4xi32> |
| return |
| } |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard" |
| hal.interface.binding @ret1, set=0, binding=2, type="StorageBuffer", access="Write|Discard" |
| } |
| // CHECK-DAG: #[[MAP:.+]] = affine_map<(d0, d1) -> (d0, d1)> |
| // CHECK: func @reshape_fused_source_and_copyout() |
| // CHECK: %[[C0:.+]] = constant 0 |
| // CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan @io::@arg0[%[[C0]]] : memref<12xi32> |
| // CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan @io::@ret0[%[[C0]]] : memref<3x4xi32> |
| // CHECK-DAG: %[[RET1:.+]] = hal.interface.binding.subspan @io::@ret1[%[[C0]]] : memref<3x4xi32> |
| // CHECK: %[[RESHAPE:.+]] = linalg.reshape %[[ARG0]] [#[[MAP]]] |
| // CHECK: linalg.generic |
| // CHECK-SAME: ins(%[[RESHAPE]] : memref<3x4xi32>) |
| // CHECK-SAME: outs(%[[RET0]] : memref<3x4xi32>) |
| // CHECK: linalg.copy(%[[RESHAPE]], %[[RET1]]) |
| |
| // ----- |
| |
| func @reshape_fused_target() { |
| %c0 = constant 0 : index |
| %c1 = constant 1 : index |
| %c3 = constant 3 : index |
| %c4 = constant 4 : index |
| %c12 = constant 12 : index |
| %0 = hal.interface.binding.subspan @io::@arg0[%c0] : !flow.dispatch.tensor<readonly:3x4xi32> |
| %1 = hal.interface.binding.subspan @io::@ret0[%c0] : !flow.dispatch.tensor<writeonly:12xi32> |
| %2 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:3x4xi32> -> tensor<3x4xi32> |
| %3 = linalg.init_tensor [3, 4] : tensor<3x4xi32> |
| %4 = linalg.generic { |
| indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], |
| iterator_types = ["parallel", "parallel"]} |
| ins(%2 : tensor<3x4xi32>) outs(%3 : tensor<3x4xi32>) { |
| ^bb0(%arg0 : i32, %arg1 : i32): |
| %5 = addi %arg0, %arg0 : i32 |
| linalg.yield %5 : i32 |
| } -> tensor<3x4xi32> |
| %5 = linalg.tensor_reshape %4 [affine_map<(d0, d1) -> (d0, d1)>] : tensor<3x4xi32> into tensor<12xi32> |
| flow.dispatch.tensor.store %5, %1, offsets = [], sizes = [], strides = [] : tensor<12xi32> -> !flow.dispatch.tensor<writeonly:12xi32> |
| return |
| } |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard" |
| } |
| // CHECK-DAG: #[[MAP:.+]] = affine_map<(d0, d1) -> (d0, d1)> |
| // CHECK: func @reshape_fused_target() |
| // CHECK: %[[C0:.+]] = constant 0 |
| // CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan @io::@arg0[%[[C0]]] : memref<3x4xi32> |
| // CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan @io::@ret0[%[[C0]]] : memref<12xi32> |
| // CHECK-DAG: %[[RESHAPE:.+]] = linalg.reshape %[[RET0]] [#[[MAP]]] |
| // CHECK: linalg.generic |
| // CHECK-SAME: ins(%[[ARG0]] : memref<3x4xi32>) |
| // CHECK-SAME: outs(%[[RESHAPE]] : memref<3x4xi32>) |
| |
| // ----- |
| |
| func @dot_general_lowering() { |
| %cst = constant 0.000000e+00 : f32 |
| %c3 = constant 3 : index |
| %c0 = constant 0 : index |
| %c2 = constant 2 : index |
| %c1 = constant 1 : index |
| %0 = hal.interface.binding.subspan @io::@arg0[%c0] : !flow.dispatch.tensor<readonly:1x1x2xf32> |
| %1 = hal.interface.binding.subspan @io::@arg1[%c0] : !flow.dispatch.tensor<readonly:2x3xf32> |
| %2 = hal.interface.binding.subspan @io::@ret0[%c0] : !flow.dispatch.tensor<writeonly:1x3xf32> |
| %3 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:1x1x2xf32> -> tensor<1x1x2xf32> |
| %4 = linalg.tensor_reshape %3 [affine_map<(d0, d1, d2) -> (d0, d1)>, affine_map<(d0, d1, d2) -> (d2)>] : tensor<1x1x2xf32> into tensor<1x2xf32> |
| %workgroup_size_x = hal.interface.workgroup.size[0] : index |
| %workgroup_size_y = hal.interface.workgroup.size[1] : index |
| %workgroup_id_x = hal.interface.workgroup.id[0] : index |
| %workgroup_count_x = hal.interface.workgroup.count[0] : index |
| %workgroup_id_y = hal.interface.workgroup.id[1] : index |
| %workgroup_count_y = hal.interface.workgroup.count[1] : index |
| %5 = muli %workgroup_size_y, %workgroup_id_y : index |
| %6 = muli %workgroup_size_y, %workgroup_count_y : index |
| scf.for %arg0 = %5 to %c1 step %6 { |
| %7 = muli %workgroup_size_x, %workgroup_id_x : index |
| %8 = muli %workgroup_size_x, %workgroup_count_x : index |
| scf.for %arg1 = %7 to %c3 step %8 { |
| %9 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 1)>(%arg0)[%workgroup_size_y] |
| %10 = subtensor %4[%arg0, 0] [%9, 2] [1, 1] : tensor<1x2xf32> to tensor<?x2xf32> |
| %11 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 3)>(%arg1)[%workgroup_size_x] |
| %12 = flow.dispatch.tensor.load %1, offsets = [%c0, %arg1], sizes = [%c2, %11], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:2x3xf32> -> tensor<2x?xf32> |
| %13 = linalg.init_tensor [%9, %11] : tensor<?x?xf32> |
| %14 = linalg.fill(%13, %cst) : tensor<?x?xf32>, f32 -> tensor<?x?xf32> |
| %15 = linalg.matmul {__internal_linalg_transform__ = "workgroup"} ins(%10, %12 : tensor<?x2xf32>, tensor<2x?xf32>) outs(%14 : tensor<?x?xf32>) -> tensor<?x?xf32> |
| flow.dispatch.tensor.store %15, %2, offsets = [%arg0, %arg1], sizes = [%9, %11], strides = [%c1, %c1] : tensor<?x?xf32> -> !flow.dispatch.tensor<writeonly:1x3xf32> |
| } |
| } |
| return |
| } |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @arg1, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard" |
| } |
| // CHECK-LABEL: func @dot_general_lowering() |
| // CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan @io::@arg0 |
| // CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan @io::@arg1 |
| // CHECK-DAG: %[[RESHAPE_LHS:.+]] = linalg.reshape %[[LHS]] |
| // CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan @io::@ret0 |
| // CHECK: scf.for %[[IV0:.+]] = {{.+}} { |
| // CHECK: scf.for %[[IV1:.+]] = {{.+}} { |
| // CHECK-DAG: %[[LHS_TILE:.+]] = memref.subview %[[RESHAPE_LHS]][%[[IV0]], 0] |
| // CHECK-DAG: %[[RESULT_TILE:.+]] = memref.subview %[[RETURN]][%[[IV0]], %[[IV1]]] |
| // CHECK-DAG: %[[RHS_TILE:.+]] = memref.subview %[[RHS]][0, %[[IV1]]] |
| // CHECK: linalg.fill(%[[RESULT_TILE]], %{{.+}}) |
| // CHECK: linalg.matmul |
| // CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] |
| // CHECK-SAME: outs(%[[RESULT_TILE]] |
| |
| // ----- |
| |
| func @slice() { |
| %c0 = constant 0 : index |
| %0 = hal.interface.binding.subspan @io::@arg0[%c0] : !flow.dispatch.tensor<readonly:?x?xi32> |
| %1 = hal.interface.binding.subspan @io::@ret0[%c0] : !flow.dispatch.tensor<writeonly:?x?xi32> |
| %2 = hal.interface.load.constant offset = 0 : index |
| %3 = hal.interface.load.constant offset = 1 : index |
| %4 = hal.interface.load.constant offset = 2 : index |
| %5 = hal.interface.load.constant offset = 3 : index |
| %6 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:?x?xi32> -> tensor<?x?xi32> |
| %7 = subtensor %6[%2, %3] [%4, %5] [1, 1] : tensor<?x?xi32> to tensor<?x?xi32> |
| flow.dispatch.tensor.store %7, %1, offsets = [], sizes = [], strides = [] : tensor<?x?xi32> -> !flow.dispatch.tensor<writeonly:?x?xi32> |
| return |
| } |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard" |
| } |
| // CHECK-LABEL: func @slice() |
| // CHECK-DAG: %[[ARG:.+]] = hal.interface.binding.subspan @io::@arg0 |
| // CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan @io::@ret0 |
| // CHECK: %[[SUBVIEW:.+]] = memref.subview %[[ARG]] |
| // CHECK: linalg.copy(%[[SUBVIEW]], %[[RETURN]]) |
| |
| // ----- |
| |
| func @slice_rank_reducing() { |
| %c0 = constant 0 : index |
| %0 = hal.interface.binding.subspan @io::@arg0[%c0] : !flow.dispatch.tensor<readonly:?x?x?xi32> |
| %1 = hal.interface.binding.subspan @io::@ret0[%c0] : !flow.dispatch.tensor<writeonly:?x?xi32> |
| %2 = hal.interface.load.constant offset = 0 : index |
| %3 = hal.interface.load.constant offset = 1 : index |
| %4 = hal.interface.load.constant offset = 2 : index |
| %5 = hal.interface.load.constant offset = 3 : index |
| %6 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:?x?x?xi32> -> tensor<?x?x?xi32> |
| %7 = subtensor %6[%2, %2, %3] [%4, 1, %5] [1, 1, 1] : tensor<?x?x?xi32> to tensor<?x?xi32> |
| flow.dispatch.tensor.store %7, %1, offsets = [], sizes = [], strides = [] : tensor<?x?xi32> -> !flow.dispatch.tensor<writeonly:?x?xi32> |
| return |
| } |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard" |
| } |
| // CHECK-LABEL: func @slice_rank_reducing() |
| // CHECK-DAG: %[[ARG:.+]] = hal.interface.binding.subspan @io::@arg0 |
| // CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan @io::@ret0 |
| // CHECK: %[[SUBVIEW:.+]] = memref.subview %[[ARG]] |
| // CHECK: linalg.copy(%[[SUBVIEW]], %[[RETURN]]) |
| |
| // ----- |
| |
| func @slice_multiple_copy() { |
| %c0 = constant 0 : index |
| %0 = hal.interface.binding.subspan @io::@arg0[%c0] : !flow.dispatch.tensor<readonly:?x?x?xi32> |
| %1 = hal.interface.binding.subspan @io::@ret0[%c0] : !flow.dispatch.tensor<writeonly:?x?x?xi32> |
| %2 = hal.interface.binding.subspan @io::@ret1[%c0] : !flow.dispatch.tensor<writeonly:?x?xi32> |
| %3 = hal.interface.load.constant offset = 0 : index |
| %4 = hal.interface.load.constant offset = 1 : index |
| %5 = hal.interface.load.constant offset = 2 : index |
| %6 = hal.interface.load.constant offset = 3 : index |
| %7 = hal.interface.load.constant offset = 4 : index |
| %8 = hal.interface.load.constant offset = 5 : index |
| %9 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:?x?x?xi32> -> tensor<?x?x?xi32> |
| %10 = subtensor %9[%3, %4, %5] [%6, %7, %8] [1, 1, 1] : tensor<?x?x?xi32> to tensor<?x?x?xi32> |
| %11 = subtensor %9[%3, %4, %5] [%6, 1, %8] [1, 1, 1] : tensor<?x?x?xi32> to tensor<?x?xi32> |
| flow.dispatch.tensor.store %10, %1, offsets = [], sizes = [], strides = [] : tensor<?x?x?xi32> -> !flow.dispatch.tensor<writeonly:?x?x?xi32> |
| flow.dispatch.tensor.store %11, %2, offsets = [%3, %5], sizes = [%6, %8], strides = [1, 1] : tensor<?x?xi32> -> !flow.dispatch.tensor<writeonly:?x?xi32> |
| return |
| } |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard" |
| hal.interface.binding @ret1, set=0, binding=2, type="StorageBuffer", access="Write|Discard" |
| } |
| // CHECK-LABEL: func @slice_multiple_copy() |
| // CHECK-DAG: %[[ARG:.+]] = hal.interface.binding.subspan @io::@arg0 |
| // CHECK-DAG: %[[RETURN1:.+]] = hal.interface.binding.subspan @io::@ret0 |
| // CHECK-DAG: %[[RETURN2:.+]] = hal.interface.binding.subspan @io::@ret1 |
| // CHECK: %[[SUBVIEW1:.+]] = memref.subview %[[ARG]] |
| // CHECK: %[[SUBVIEW2:.+]] = memref.subview %[[ARG]] |
| // CHECK: linalg.copy(%[[SUBVIEW1]], %[[RETURN1]]) |
| // CHECK: %[[RETURNVIEW:.+]] = memref.subview %[[RETURN2]] |
| // CHECK: linalg.copy(%[[SUBVIEW2]], %[[RETURNVIEW]]) |
| |
| // ----- |
| |
| func @slice_multiple_copy() { |
| %c0 = constant 0 : index |
| %0 = hal.interface.binding.subspan @io::@arg0[%c0] : !flow.dispatch.tensor<readonly:?x?x?xi32> |
| %1 = hal.interface.binding.subspan @io::@ret0[%c0] : !flow.dispatch.tensor<writeonly:?x?x?xi32> |
| %2 = hal.interface.binding.subspan @io::@ret1[%c0] : !flow.dispatch.tensor<writeonly:?x?xi32> |
| %3 = hal.interface.load.constant offset = 0 : index |
| %4 = hal.interface.load.constant offset = 1 : index |
| %5 = hal.interface.load.constant offset = 2 : index |
| %6 = hal.interface.load.constant offset = 3 : index |
| %7 = hal.interface.load.constant offset = 4 : index |
| %8 = hal.interface.load.constant offset = 5 : index |
| %9 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:?x?x?xi32> -> tensor<?x?x?xi32> |
| %10 = subtensor %9[%3, %4, %5] [%6, %7, %8] [1, 1, 1] : tensor<?x?x?xi32> to tensor<?x?x?xi32> |
| %11 = subtensor %9[%3, %4, %5] [%6, 1, %8] [1, 1, 1] : tensor<?x?x?xi32> to tensor<?x?xi32> |
| flow.dispatch.tensor.store %10, %1, offsets = [], sizes = [], strides = [] : tensor<?x?x?xi32> -> !flow.dispatch.tensor<writeonly:?x?x?xi32> |
| flow.dispatch.tensor.store %11, %2, offsets = [%3, %5], sizes = [%6, %8], strides = [1, 1] : tensor<?x?xi32> -> !flow.dispatch.tensor<writeonly:?x?xi32> |
| return |
| } |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard" |
| hal.interface.binding @ret1, set=0, binding=2, type="StorageBuffer", access="Write|Discard" |
| } |
| // CHECK-LABEL: func @slice_multiple_copy() |
| // CHECK-DAG: %[[ARG:.+]] = hal.interface.binding.subspan @io::@arg0 |
| // CHECK-DAG: %[[RETURN1:.+]] = hal.interface.binding.subspan @io::@ret0 |
| // CHECK-DAG: %[[RETURN2:.+]] = hal.interface.binding.subspan @io::@ret1 |
| // CHECK: %[[SUBVIEW1:.+]] = memref.subview %[[ARG]] |
| // CHECK: %[[SUBVIEW2:.+]] = memref.subview %[[ARG]] |
| // CHECK: linalg.copy(%[[SUBVIEW1]], %[[RETURN1]]) |
| // CHECK: %[[RETURNVIEW:.+]] = memref.subview %[[RETURN2]] |
| // CHECK: linalg.copy(%[[SUBVIEW2]], %[[RETURNVIEW]]) |
| |
| // ----- |
| |
| func @slice_in_place() { |
| %c0 = constant 0 : index |
| %0 = hal.interface.binding.subspan @io::@arg0[%c0] : !flow.dispatch.tensor<readwrite:?x?xi32> |
| %2 = hal.interface.load.constant offset = 0 : index |
| %3 = hal.interface.load.constant offset = 1 : index |
| %4 = hal.interface.load.constant offset = 2 : index |
| %5 = hal.interface.load.constant offset = 3 : index |
| %6 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readwrite:?x?xi32> -> tensor<?x?xi32> |
| flow.dispatch.tensor.store %6, %0, offsets = [], sizes = [], strides = [] : tensor<?x?xi32> -> !flow.dispatch.tensor<readwrite:?x?xi32> |
| return |
| } |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read|Write" |
| } |
| // CHECK-LABEL: func @slice_in_place() |
| // CHECK-NOT: linalg.copy |
| |
| |
| // ----- |
| |
| func @slice_whole_stride_dispatch_0() { |
| %c0 = constant 0 : index |
| %0 = hal.interface.binding.subspan @io::@arg0[%c0] : !flow.dispatch.tensor<readonly:?x?xi32> |
| %1 = hal.interface.binding.subspan @io::@ret0[%c0] : !flow.dispatch.tensor<writeonly:?x?xi32> |
| %2 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:?x?xi32> -> tensor<?x?xi32> |
| %3 = subtensor %2[1, 0] [1, 4] [1, 1] : tensor<?x?xi32> to tensor<1x4xi32> |
| flow.dispatch.tensor.store %3, %1, offsets = [], sizes = [], strides = [] : tensor<1x4xi32> -> !flow.dispatch.tensor<writeonly:?x?xi32> |
| return |
| } |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard" |
| } |
| // CHECK-LABEL: func @slice_whole_stride_dispatch_0() |
| // CHECK-DAG: %[[INPUT:.+]] = hal.interface.binding.subspan @io::@arg0 |
| // CHECK-DAG: %[[OUTPUT:.+]] = hal.interface.binding.subspan @io::@ret0 |
| // CHECK: %[[SUBVIEW:.+]] = memref.subview %[[INPUT]] |
| // CHECK: linalg.copy(%[[SUBVIEW]], %[[OUTPUT]]) |
| |
| // ----- |
| |
| func @subtensor_insert() { |
| %c0 = constant 0 : index |
| %c1 = constant 1 : index |
| %0 = hal.interface.binding.subspan @io::@arg0[%c0] : !flow.dispatch.tensor<readonly:?x?xi32> |
| %1 = hal.interface.binding.subspan @io::@arg1[%c0] : !flow.dispatch.tensor<readonly:?x?xi32> |
| %2 = hal.interface.binding.subspan @io::@ret0[%c0] : !flow.dispatch.tensor<writeonly:?x?xi32> |
| %3 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:?x?xi32> -> tensor<?x?xi32> |
| %4 = flow.dispatch.tensor.load %1, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:?x?xi32> -> tensor<?x?xi32> |
| %5 = memref.dim %3, %c0 : tensor<?x?xi32> |
| %6 = memref.dim %3, %c1 : tensor<?x?xi32> |
| %7 = subtensor_insert %3 into %4[3, 4] [%5, %6] [1, 1] : tensor<?x?xi32> into tensor<?x?xi32> |
| flow.dispatch.tensor.store %7, %2, offsets = [], sizes = [], strides = [] : tensor<?x?xi32> -> !flow.dispatch.tensor<writeonly:?x?xi32> |
| return |
| } |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" |
| hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" |
| } |
| // CHECK-LABEL: func @subtensor_insert() |
| // CHECK-DAG: %[[C0:.+]] = constant 0 |
| // CHECK-DAG: %[[C1:.+]] = constant 1 |
| // CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan @io::@arg0 |
| // CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan @io::@arg1 |
| // CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan @io::@ret0 |
| // CHECK-DAG: %[[D0:.+]] = memref.dim %[[ARG0]], %[[C0]] |
| // CHECK-DAG: %[[D1:.+]] = memref.dim %[[ARG0]], %[[C1]] |
| // CHECK: linalg.copy(%[[ARG1]], %[[RET0]]) |
| // CHECK: %[[SUBVIEW:.+]] = memref.subview %[[RET0]][3, 4] [%[[D0]], %[[D1]]] [1, 1] |
| // CHECK: linalg.copy(%[[ARG0]], %[[SUBVIEW]]) |
| |
| // ----- |
| |
| func @tensor_extract() { |
| %c0 = constant 0 : index |
| %0 = hal.interface.binding.subspan @io::@arg0[%c0] : !flow.dispatch.tensor<readonly:i32> |
| %1 = hal.interface.binding.subspan @io::@ret0[%c0] : !flow.dispatch.tensor<writeonly:3x9xi32> |
| %2 = linalg.init_tensor [3, 9] : tensor<3x9xi32> |
| %3 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> |
| %4 = tensor.extract %3[] : tensor<i32> |
| %5 = linalg.fill(%2, %4) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> |
| flow.dispatch.tensor.store %5, %1, offsets = [], sizes = [], strides = [] : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> |
| return |
| } |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard" |
| } |
| // CHECK-LABEL: func @tensor_extract() |
| // CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan @io::@arg0 |
| // CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan @io::@ret0 |
| // CHECK: %[[LOAD:.+]] = memref.load %[[ARG0]] |
| // CHECK: linalg.fill(%[[RET0]], %[[LOAD]]) |
| |
| // ----- |
| |
| func @load_to_store() { |
| %c0 = constant 0 : index |
| %1 = hal.interface.binding.subspan @io::@ret0[%c0] : !flow.dispatch.tensor<writeonly:3x4xi32> |
| %2 = hal.interface.binding.subspan @io::@arg0[%c0] : !flow.dispatch.tensor<readonly:3x4xi32> |
| %3 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:3x4xi32> -> tensor<3x4xi32> |
| flow.dispatch.tensor.store %3, %1, offsets = [], sizes = [], strides = [] : tensor<3x4xi32> -> !flow.dispatch.tensor<writeonly:3x4xi32> |
| return |
| } |
| |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard" |
| } |
| |
| // CHECK-LABEL: func @load_to_store() |
| // CHECK: %[[OUT:.+]] = hal.interface.binding.subspan @io::@ret0[%c0] : memref<3x4xi32> |
| // CHECK: %[[IN:.+]] = hal.interface.binding.subspan @io::@arg0[%c0] : memref<3x4xi32> |
| // CHECK: linalg.copy(%[[IN]], %[[OUT]]) : memref<3x4xi32>, memref<3x4xi32> |
| |
| // ----- |
| |
| func @constant() { |
| %c0 = constant 0 : index |
| %cst = constant dense<[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]> : tensor<2x2x3xi32> |
| %0 = hal.interface.binding.subspan @io::@ret0[%c0] : !flow.dispatch.tensor<writeonly:2x2x3xi32> |
| flow.dispatch.tensor.store %cst, %0, offsets = [], sizes = [], strides = [] : tensor<2x2x3xi32> -> !flow.dispatch.tensor<writeonly:2x2x3xi32> |
| return |
| } |
| // CHECK-LABEL: func @constant() |
| // CHECK: %[[CST:.+]] = constant {{.+}} : tensor<2x2x3xi32> |
| // CHECK: %[[MEMREF:.+]] = memref.buffer_cast %[[CST]] : memref<2x2x3xi32> |
| // CHECK: %[[RESULT:.+]] = hal.interface.binding.subspan @io::@ret0 |
| // CHECK: linalg.copy(%[[MEMREF]], %[[RESULT]]) |
| |
| // ----- |
| |
| func @rhs_non_splat_constant() { |
| %c0 = constant 0 : index |
| %cst = constant dense<[[0.706495285, -0.567672312, 0.483717591, 0.522725761, 0.7563259], [-0.0899272263, -0.283501834, -0.350822538, -0.351515919, -0.337136656], [-0.451804549, 0.372324884, -0.620518147, 0.235451385, 0.851095855]]> : tensor<3x5xf32> |
| %cst_0 = constant 0.000000e+00 : f32 |
| %c5 = constant 5 : index |
| %c1 = constant 1 : index |
| %0 = hal.interface.binding.subspan @io::@arg0[%c0] : !flow.dispatch.tensor<readonly:1x5x3x1xf32> |
| %1 = hal.interface.binding.subspan @io::@ret0[%c0] : !flow.dispatch.tensor<writeonly:5x5xf32> |
| %2 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:1x5x3x1xf32> -> tensor<1x5x3x1xf32> |
| %3 = linalg.tensor_reshape %2 [affine_map<(d0, d1, d2, d3) -> (d0, d1)>, affine_map<(d0, d1, d2, d3) -> (d2, d3)>] : tensor<1x5x3x1xf32> into tensor<5x3xf32> |
| %workgroup_size_x = hal.interface.workgroup.size[0] : index |
| %workgroup_size_y = hal.interface.workgroup.size[1] : index |
| %workgroup_id_x = hal.interface.workgroup.id[0] : index |
| %workgroup_count_x = hal.interface.workgroup.count[0] : index |
| %workgroup_id_y = hal.interface.workgroup.id[1] : index |
| %workgroup_count_y = hal.interface.workgroup.count[1] : index |
| %4 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_y, %workgroup_size_y] |
| %5 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_y, %workgroup_size_y] |
| scf.for %arg0 = %4 to %c5 step %5 { |
| %6 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_x, %workgroup_size_x] |
| %7 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_x, %workgroup_size_x] |
| scf.for %arg1 = %6 to %c5 step %7 { |
| %8 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 5)>(%arg0)[%workgroup_size_y] |
| %9 = subtensor %3[%arg0, 0] [%8, 3] [1, 1] : tensor<5x3xf32> to tensor<?x3xf32> |
| %10 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + 5)>(%arg1)[%workgroup_size_x] |
| %11 = subtensor %cst[0, %arg1] [3, %10] [1, 1] : tensor<3x5xf32> to tensor<3x?xf32> |
| %12 = linalg.init_tensor [%8, %10] : tensor<?x?xf32> |
| %13 = linalg.fill(%12, %cst_0) : tensor<?x?xf32>, f32 -> tensor<?x?xf32> |
| %14 = linalg.matmul {__internal_linalg_transform__ = "workgroup"} ins(%9, %11 : tensor<?x3xf32>, tensor<3x?xf32>) outs(%13 : tensor<?x?xf32>) -> tensor<?x?xf32> |
| flow.dispatch.tensor.store %14, %1, offsets = [%arg0, %arg1], sizes = [%8, %10], strides = [%c1, %c1] : tensor<?x?xf32> -> !flow.dispatch.tensor<writeonly:5x5xf32> |
| } |
| } |
| return |
| } |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard" |
| } |
| // CHECK-LABEL: func @rhs_non_splat_constant |
| // CHECK-DAG: %[[CONSTANT:.+]] = constant {{.+}} : tensor<3x5xf32> |
| // CHECK-DAG: %[[RHS:.+]] = memref.buffer_cast %[[CONSTANT]] |
| // CHECK-DAG: %[[LHS_INPUT:.+]] = hal.interface.binding.subspan @io::@arg0[%{{.+}}] : memref<1x5x3x1xf32> |
| // CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan @io::@ret0[%{{.+}}] : memref<5x5xf32> |
| // CHECK: %[[LHS:.+]] = linalg.reshape %[[LHS_INPUT]] |
| // CHECK: scf.for %[[IV0:.+]] = |
| // CHECK: scf.for %[[IV1:.+]] = |
| // CHECK-DAG: %[[LHS_SUBVIEW:.+]] = memref.subview %[[LHS]][%[[IV0]], 0] |
| // CHECK-DAG: %[[RHS_SUBVIEW:.+]] = memref.subview %[[RHS]][0, %[[IV1]]] |
| // CHECK-DAG: %[[RESULT_SUBVIEW:.+]] = memref.subview %[[RETURN]][%[[IV0]], %[[IV1]]] |
| // CHECK: linalg.fill(%[[RESULT_SUBVIEW]], %{{.+}}) |
| // CHECK: linalg.matmul |
| // CHECK-SAME: ins(%[[LHS_SUBVIEW]], %[[RHS_SUBVIEW]] |
| // CHECK-SAME: outs(%[[RESULT_SUBVIEW]] |
| |
| // ----- |
| |
| func @gather() { |
| %c0 = constant 0 : index |
| %c1 = constant 1 : index |
| %0 = hal.interface.binding.subspan @io::@arg0[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %1 = hal.interface.binding.subspan @io::@arg1[%c0] : !flow.dispatch.tensor<readonly:?xi32> |
| %2 = hal.interface.binding.subspan @io::@ret0[%c0] : !flow.dispatch.tensor<writeonly:?x?xf32> |
| %4 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = []: !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<?x?xf32> |
| %5 = flow.dispatch.tensor.load %1, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:?xi32> -> tensor<?xi32> |
| %d0 = memref.dim %5, %c0 : tensor<?xi32> |
| %d1 = memref.dim %4, %c1 : tensor<?x?xf32> |
| %3 = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32> |
| %7 = linalg.indexed_generic {indexing_maps = [affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%5 : tensor<?xi32>) outs(%3 : tensor<?x?xf32>) { |
| ^bb0(%arg0: index, %arg1: index, %arg2: i32, %arg3: f32): // no predecessors |
| %8 = index_cast %arg2 : i32 to index |
| %9 = tensor.extract %4[%8, %arg1] : tensor<?x?xf32> |
| linalg.yield %9 : f32 |
| } -> tensor<?x?xf32> |
| flow.dispatch.tensor.store %7, %2, offsets = [], sizes = [], strides = [] : tensor<?x?xf32> -> !flow.dispatch.tensor<writeonly:?x?xf32> |
| return |
| } |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" |
| hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" |
| } |
| // CHECK-LABEL: func @gather() |
| // CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan @io::@arg0 |
| // CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan @io::@arg1 |
| // CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan @io::@ret0 |
| // CHECK: linalg.indexed_generic |
| // CHECK: %[[VAL:.+]] = memref.load %[[ARG0]] |
| // CHECK: linalg.yield %[[VAL]] |
| |
| // ----- |
| |
| func @pooling_nhwc_sum() { |
| %c2 = constant 2 : index |
| %c0 = constant 0 : index |
| %c1 = constant 1 : index |
| %0 = hal.interface.binding.subspan @io::@ro0[%c0] : !flow.dispatch.tensor<readonly:f32> |
| %1 = hal.interface.binding.subspan @io::@ro1[%c0] : !flow.dispatch.tensor<readonly:1x4x6x1xf32> |
| %2 = hal.interface.binding.subspan @io::@wo2[%c0] : !flow.dispatch.tensor<writeonly:1x2x2x1xf32> |
| %3 = linalg.init_tensor [2, 3] : tensor<2x3xf32> |
| %4 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:f32> -> tensor<f32> |
| %5 = tensor.extract %4[] : tensor<f32> |
| %6 = flow.dispatch.tensor.load %1, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:1x4x6x1xf32> -> tensor<1x4x6x1xf32> |
| %7 = linalg.init_tensor [1, 2, 2, 1] : tensor<1x2x2x1xf32> |
| %8 = linalg.fill(%7, %5) : tensor<1x2x2x1xf32>, f32 -> tensor<1x2x2x1xf32> |
| %9 = linalg.pooling_nhwc_sum { |
| dilations = dense<1> : vector<2xi64>, |
| strides = dense<[2, 3]> : vector<2xi64> |
| } ins(%6, %3 : tensor<1x4x6x1xf32>, tensor<2x3xf32>) |
| outs(%8 : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> |
| flow.dispatch.tensor.store %9, %2, offsets = [], sizes = [], strides = [] : tensor<1x2x2x1xf32> -> !flow.dispatch.tensor<writeonly:1x2x2x1xf32> |
| return |
| } |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @ro1, set=0, binding=1, type="StorageBuffer", access="Read" |
| hal.interface.binding @wo2, set=0, binding=2, type="StorageBuffer", access="Write|Discard" |
| } |
| // CHECK-LABEL: func @pooling_nhwc_sum |
| // CHECK-DAG: %[[INPUT:.+]] = hal.interface.binding.subspan @io::@ro1[%c0] : memref<1x4x6x1xf32> |
| // CHECK-DAG: %[[INIT:.+]] = hal.interface.binding.subspan @io::@ro0[%c0] : memref<f32> |
| // CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan @io::@wo2[%c0] : memref<1x2x2x1xf32> |
| // CHECK: %[[INIT_VAL:.+]] = memref.load %[[INIT]][] : memref<f32> |
| // CHECK: linalg.fill(%[[RET0]], %[[INIT_VAL]]) : memref<1x2x2x1xf32>, f32 |
| // CHECK: %[[WINDOW:.+]] = memref.alloc() : memref<2x3xf32> |
| // CHECK: linalg.pooling_nhwc_sum |
| // CHECK-SAME: dilations = dense<1> : vector<2xi64> |
| // CHECK-SAME: strides = dense<[2, 3]> : vector<2xi64> |
| // CHECK-SAME: ins(%[[INPUT]], %[[WINDOW]] : memref<1x4x6x1xf32>, memref<2x3xf32>) |
| // CHECK-SAME: outs(%[[RET0]] : memref<1x2x2x1xf32>) |
| |
| // ----- |
| |
| func @read_only_subtensor() { |
| %c0 = constant 0 : index |
| %c1 = constant 1 : index |
| %0 = hal.interface.binding.subspan @io::@wo2[%c0] : !flow.dispatch.tensor<writeonly:?x?xf32> |
| %1 = hal.interface.binding.subspan @io::@ro0[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %2 = flow.dispatch.tensor.load %1, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<?x?xf32> |
| %3 = hal.interface.binding.subspan @io::@ro1[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %4 = flow.dispatch.tensor.load %3, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<?x?xf32> |
| %workgroup_size_x = hal.interface.workgroup.size[0] : index |
| %workgroup_size_y = hal.interface.workgroup.size[1] : index |
| %workgroup_id_x = hal.interface.workgroup.id[0] : index |
| %workgroup_count_x = hal.interface.workgroup.count[0] : index |
| %workgroup_id_y = hal.interface.workgroup.id[1] : index |
| %workgroup_count_y = hal.interface.workgroup.count[1] : index |
| %5 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_y, %workgroup_size_y] |
| %6 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_y, %workgroup_size_y] |
| %dim0 = memref.dim %2, %c0 : tensor<?x?xf32> |
| %dim1 = memref.dim %2, %c1 : tensor<?x?xf32> |
| scf.for %arg0 = %5 to %dim0 step %6 { |
| %7 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_id_x, %workgroup_size_x] |
| %8 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%workgroup_count_x, %workgroup_size_x] |
| scf.for %arg1 = %7 to %dim1 step %8 { |
| %9 = affine.min affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>(%arg0)[%workgroup_size_y, %dim0] |
| %10 = affine.min affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>(%arg1)[%workgroup_size_x, %dim1] |
| %11 = subtensor %2[%arg0, %arg1] [%9, %10] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32> |
| %12 = affine.min affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>(%arg0)[%workgroup_size_y, %dim0] |
| %13 = affine.min affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>(%arg1)[%workgroup_size_x, %dim1] |
| %14 = subtensor %2[%arg0, %arg1] [%12, %13] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32> |
| %15 = affine.min affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>(%arg0)[%workgroup_size_y, %dim0] |
| %16 = affine.min affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>(%arg1)[%workgroup_size_x, %dim1] |
| %17 = subtensor %4[%arg0, %arg1] [%15, %16] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32> |
| %18 = affine.min affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>(%arg0)[%workgroup_size_y, %dim0] |
| %19 = affine.min affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>(%arg1)[%workgroup_size_x, %dim1] |
| %20 = subtensor %4[%arg0, %arg1] [%18, %19] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32> |
| %21 = affine.min affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>(%arg0)[%workgroup_size_y, %dim0] |
| %22 = affine.min affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>(%arg1)[%workgroup_size_x, %dim1] |
| %23 = linalg.init_tensor [%21, %22] : tensor<?x?xf32> |
| %24 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%11, %14, %17, %20 : tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>) outs(%23 : tensor<?x?xf32>) attrs = {__internal_linalg_transform__ = "workgroup"} { |
| ^bb0(%arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): // no predecessors |
| %25 = mulf %arg4, %arg5 : f32 |
| %26 = mulf %arg2, %arg3 : f32 |
| %27 = addf %26, %25 : f32 |
| %28 = math.sqrt %27 : f32 |
| linalg.yield %28 : f32 |
| } -> tensor<?x?xf32> |
| flow.dispatch.tensor.store %24, %0, offsets = [%arg0, %arg1], sizes = [%21, %22], strides = [%c1, %c1] : tensor<?x?xf32> -> !flow.dispatch.tensor<writeonly:?x?xf32> |
| } |
| } |
| return |
| }hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @ro1, set=0, binding=1, type="StorageBuffer", access="Read" |
| hal.interface.binding @wo2, set=0, binding=2, type="StorageBuffer", access="Write|Discard" |
| } |
| // CHECK-LABEL: func @read_only_subtensor |
| // CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan @io::@ro0[%c0] : memref<?x?xf32> |
| // CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan @io::@ro1[%c0] : memref<?x?xf32> |
| // CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan @io::@wo2[%c0] : memref<?x?xf32> |
| // CHECK: scf.for |
| // CHECK: scf.for |
| // CHECK-DAG: %[[SV1:.+]] = memref.subview %[[ARG0]] |
| // CHECK-DAG: %[[SV2:.+]] = memref.subview %[[ARG1]] |
| // CHECK-DAG: %[[SV3:.+]] = memref.subview %[[RET0]] |
| // CHECK: linalg.generic |
| // CHECK-SAME: ins(%[[SV1]], %[[SV1]], %[[SV2]], %[[SV2]] : |
| // CHECK-SAME: outs(%[[SV3]] : |
| |
| // ----- |
| |
| func @reshape_read_only() { |
| %c0 = constant 0 : index |
| %0 = hal.interface.binding.subspan @io::@ro0[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %1 = hal.interface.binding.subspan @io::@wo0[%c0] : !flow.dispatch.tensor<writeonly:?xf32> |
| %2 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<?x?xf32> |
| %3 = linalg.tensor_reshape %2 [affine_map<(d0, d1) -> (d0, d1)>] |
| : tensor<?x?xf32> into tensor<?xf32> |
| %4 = memref.dim %3, %c0 : tensor<?xf32> |
| %5 = linalg.init_tensor [%4] : tensor<?xf32> |
| %6 = linalg.generic { |
| indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], |
| iterator_types = ["parallel"]} |
| ins(%3 : tensor<?xf32>) outs(%5 : tensor<?xf32>) { |
| ^bb0(%arg0 : f32, %arg1 : f32): |
| %7 = addf %arg0, %arg0 : f32 |
| linalg.yield %7 : f32 |
| } -> tensor<?xf32> |
| flow.dispatch.tensor.store %6, %1, offsets = [], sizes = [], strides = []: tensor<?xf32> -> !flow.dispatch.tensor<writeonly:?xf32> |
| return |
| } |
| // CHECK-LABEL: func @reshape_read_only |
| // CHECK-DAG: %[[INPUT:.+]] = hal.interface.binding.subspan @io::@ro0 |
| // CHECK-DAG: %[[OUTPUT:.+]] = hal.interface.binding.subspan @io::@wo0 |
| // CHECK: %[[RESHAPE:.+]] = linalg.reshape %[[INPUT]] |
| // CHECK: linalg.generic |
| // CHECK-SAME: ins(%[[RESHAPE]] : memref<?xf32>) |
| // CHECK-SAME: outs(%[[OUTPUT]] : memref<?xf32>) |
| |
| // ----- |
| |
| func @use_buffer_for_operand_when_output_tensor_not_used() { |
| %c0 = constant 0 : index |
| |
| %input_subspan = hal.interface.binding.subspan @interface_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:1x225x225x16xf32> |
| %filter_subspan = hal.interface.binding.subspan @interface_io::@ro1[%c0] : !flow.dispatch.tensor<readonly:3x3x16x32xf32> |
| %offset_subspan = hal.interface.binding.subspan @interface_io::@ro2[%c0] : !flow.dispatch.tensor<readonly:32xf32> |
| %output_subspan = hal.interface.binding.subspan @interface_io::@wo3[%c0] : !flow.dispatch.tensor<writeonly:1x112x112x32xf32> |
| |
| %input = flow.dispatch.tensor.load %input_subspan, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:1x225x225x16xf32> -> tensor<1x225x225x16xf32> |
| %filter = flow.dispatch.tensor.load %filter_subspan, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:3x3x16x32xf32> -> tensor<3x3x16x32xf32> |
| %offset = flow.dispatch.tensor.load %offset_subspan, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:32xf32> -> tensor<32xf32> |
| |
| %cst = constant 0.0 : f32 |
| %0 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32> |
| %1 = linalg.fill(%0, %cst) : tensor<1x112x112x32xf32>, f32 -> tensor<1x112x112x32xf32> |
| %2 = linalg.conv_2d_input_nhwc_filter_hwcf |
| {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} |
| ins(%input, %filter : tensor<1x225x225x16xf32>, tensor<3x3x16x32xf32>) |
| outs(%1 : tensor<1x112x112x32xf32>) |
| -> tensor<1x112x112x32xf32> |
| %3 = linalg.generic { |
| indexing_maps = [ |
| affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, |
| affine_map<(d0, d1, d2, d3) -> (d3)>, |
| affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], |
| iterator_types = ["parallel", "parallel", "parallel", "parallel"]} |
| ins(%2, %offset: tensor<1x112x112x32xf32>, tensor<32xf32>) |
| outs(%0 : tensor<1x112x112x32xf32>) { |
| ^bb0(%a: f32, %b: f32, %c: f32): |
| %sub = subf %a, %b : f32 |
| linalg.yield %sub : f32 |
| } -> tensor<1x112x112x32xf32> |
| flow.dispatch.tensor.store %3, %output_subspan, offsets = [], sizes = [], strides = [] : tensor<1x112x112x32xf32> -> !flow.dispatch.tensor<writeonly:1x112x112x32xf32> |
| return |
| } |
| |
| hal.interface @interface_io attributes {sym_visibility = "private"} { |
| hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @ro1, set=0, binding=1, type="StorageBuffer", access="Read" |
| hal.interface.binding @ro2, set=0, binding=2, type="StorageBuffer", access="Read" |
| hal.interface.binding @wo3, set=0, binding=3, type="StorageBuffer", access="Write|Discard" |
| } |
| |
| // CHECK: func @use_buffer_for_operand_when_output_tensor_not_used() |
| |
| // CHECK-NOT: memref.alloc |
| // CHECK: %[[OUTPUT:.+]] = hal.interface.binding.subspan @interface_io::@wo3 |
| // CHECK: linalg.fill(%[[OUTPUT]], %{{.+}}) |
| // CHECK-NEXT: linalg.conv_2d_input_nhwc_filter_hwcf |
| // CHECK-SAME: outs(%[[OUTPUT]] : memref<1x112x112x32xf32>) |
| // CHECK-NEXT: linalg.generic |
| // CHECK-SAME: ins(%[[OUTPUT]], %{{.+}} : memref<1x112x112x32xf32>, memref<32xf32>) |
| // CHECK-SAME: outs(%[[OUTPUT]] : memref<1x112x112x32xf32>) |
| |
| // ----- |
| |
| func @dont_use_buffer_for_operand_when_output_tensor_used() { |
| %c0 = constant 0 : index |
| |
| %input_subspan = hal.interface.binding.subspan @interface_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:1x225x225x16xf32> |
| %filter_subspan = hal.interface.binding.subspan @interface_io::@ro1[%c0] : !flow.dispatch.tensor<readonly:3x3x16x32xf32> |
| %offset_subspan = hal.interface.binding.subspan @interface_io::@ro2[%c0] : !flow.dispatch.tensor<readonly:32xf32> |
| %output_subspan = hal.interface.binding.subspan @interface_io::@wo3[%c0] : !flow.dispatch.tensor<writeonly:1x112x112x32xf32> |
| |
| %input = flow.dispatch.tensor.load %input_subspan, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:1x225x225x16xf32> -> tensor<1x225x225x16xf32> |
| %filter = flow.dispatch.tensor.load %filter_subspan, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:3x3x16x32xf32> -> tensor<3x3x16x32xf32> |
| %offset = flow.dispatch.tensor.load %offset_subspan, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:32xf32> -> tensor<32xf32> |
| |
| %cst0 = constant 0.0 : f32 |
| %cst1 = constant 1.0 : f32 |
| %0 = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32> |
| %1 = linalg.fill(%0, %cst0) : tensor<1x112x112x32xf32>, f32 -> tensor<1x112x112x32xf32> |
| %2 = linalg.conv_2d_input_nhwc_filter_hwcf |
| {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} |
| ins(%input, %filter : tensor<1x225x225x16xf32>, tensor<3x3x16x32xf32>) |
| outs(%1 : tensor<1x112x112x32xf32>) |
| -> tensor<1x112x112x32xf32> |
| %3 = linalg.fill(%0, %cst1) : tensor<1x112x112x32xf32>, f32 -> tensor<1x112x112x32xf32> |
| %4 = linalg.generic { |
| indexing_maps = [ |
| affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, |
| affine_map<(d0, d1, d2, d3) -> (d3)>, |
| affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], |
| iterator_types = ["parallel", "parallel", "parallel", "parallel"]} |
| ins(%2, %offset: tensor<1x112x112x32xf32>, tensor<32xf32>) |
| outs(%3 : tensor<1x112x112x32xf32>) { |
| ^bb0(%a: f32, %b: f32, %c: f32): |
| %sub = subf %a, %b : f32 |
| %add = addf %sub, %c : f32 |
| linalg.yield %add : f32 |
| } -> tensor<1x112x112x32xf32> |
| flow.dispatch.tensor.store %4, %output_subspan, offsets = [], sizes = [], strides = []: tensor<1x112x112x32xf32> -> !flow.dispatch.tensor<writeonly:1x112x112x32xf32> |
| return |
| } |
| |
| // CHECK-LABEL: func @dont_use_buffer_for_operand_when_output_tensor_used() |
| |
| // CHECK: %[[OUTPUT:.+]] = hal.interface.binding.subspan @interface_io::@wo3 |
| // CHECK: %[[ALLOC:.+]] = memref.alloc |
| // CHECK-NEXT: linalg.fill(%[[ALLOC]], %{{.+}}) |
| // CHECK-NEXT: linalg.conv_2d_input_nhwc_filter_hwcf |
| // CHECK-SAME: outs(%[[ALLOC]] : memref<1x112x112x32xf32>) |
| // CHECK-NEXT: linalg.fill(%[[OUTPUT]], %{{.+}}) |
| // CHECK-NEXT: linalg.generic |
| // CHECK-SAME: ins(%[[ALLOC]], %{{.+}} : memref<1x112x112x32xf32>, memref<32xf32>) |
| // CHECK-SAME: outs(%[[OUTPUT]] : memref<1x112x112x32xf32>) |
| |
| // ----- |
| |
| func @bufferize_cst_output_tensor() { |
| %c0 = constant 0 : index |
| %cst1 = constant dense<-2147483648> : tensor<i32> |
| %zero = constant 0.000000e+00 : f32 |
| %cst5 = constant dense<[1, 2, 3, 4, 5]> : tensor<5xi32> |
| %input = hal.interface.binding.subspan @io::@ro0[%c0] : !flow.dispatch.tensor<readonly:5xf32> |
| %output = hal.interface.binding.subspan @io::@wo1[%c0] : !flow.dispatch.tensor<writeonly:i32> |
| %1 = flow.dispatch.tensor.load %input, offsets=[], sizes=[], strides=[] : !flow.dispatch.tensor<readonly:5xf32> -> tensor<5xf32> |
| %2 = linalg.generic { |
| indexing_maps = [affine_map<(d0) -> (-d0 + 4)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>], |
| iterator_types = ["reduction"]} |
| ins(%1, %cst5 : tensor<5xf32>, tensor<5xi32>) |
| outs(%cst1 : tensor<i32>) { |
| ^bb0(%arg0: f32, %arg1: i32, %arg2: i32): |
| %8 = cmpf oeq, %arg0, %zero : f32 |
| %9 = zexti %8 : i1 to i32 |
| %10 = muli %9, %arg1 : i32 |
| %11 = cmpi sgt, %10, %arg2 : i32 |
| %12 = select %11, %10, %arg2 : i32 |
| linalg.yield %12 : i32 |
| } -> tensor<i32> |
| flow.dispatch.tensor.store %2, %output, offsets=[], sizes=[], strides=[] : tensor<i32> -> !flow.dispatch.tensor<writeonly:i32> |
| return |
| } |
| |
| hal.interface @interface_io attributes {sym_visibility = "private"} { |
| hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" |
| } |
| |
| // CHECK-LABEL: func @bufferize_cst_output_tensor() |
| |
| // CHECK: %[[CST1:.+]] = constant dense<-2147483648> : tensor<i32> |
| // CHECK: %[[CST5:.+]] = constant dense<[1, 2, 3, 4, 5]> : tensor<5xi32> |
| // CHECK: %[[CAST1:.+]] = memref.buffer_cast %[[CST1]] : memref<i32> |
| // CHECK: %[[CAST5:.+]] = memref.buffer_cast %[[CST5]] : memref<5xi32> |
| // CHECK: %[[INPUT:.+]] = hal.interface.binding.subspan @io::@ro0[%c0] : memref<5xf32> |
| // CHECK: %[[OUTPUT:.+]] = hal.interface.binding.subspan @io::@wo1[%c0] : memref<i32> |
| // CHECK: linalg.copy(%[[CAST1]], %[[OUTPUT]]) |
| // CHECK: linalg.generic |
| // CHECK-SAME: ins(%[[INPUT]], %[[CAST5]] : memref<5xf32>, memref<5xi32>) |
| // CHECK-SAME: outs(%[[OUTPUT]] : memref<i32>) |
| |
| // ----- |
| |
| func @cast_follwed_by_store() { |
| %c0 = constant 0 : index |
| %cst = constant 0.000000e+00 : f32 |
| %c4 = constant 4 : index |
| %c64 = constant 64 : index |
| %c1 = constant 1 : index |
| %c32 = constant 32 : index |
| %0 = hal.interface.binding.subspan @io::@s0b0_ro_external[%c0] : !flow.dispatch.tensor<readonly:4x32x1024xf32> |
| %1 = hal.interface.binding.subspan @io::@s0b1_ro_external[%c0] : !flow.dispatch.tensor<readonly:4x1024x64xf32> |
| %2 = hal.interface.binding.subspan @io::@s0b2_xw_external[%c0] : !flow.dispatch.tensor<writeonly:4x32x64xf32> |
| %workgroup_id_x = hal.interface.workgroup.id[0] : index |
| %workgroup_count_x = hal.interface.workgroup.count[0] : index |
| %workgroup_id_y = hal.interface.workgroup.id[1] : index |
| %workgroup_count_y = hal.interface.workgroup.count[1] : index |
| %workgroup_id_z = hal.interface.workgroup.id[2] : index |
| %workgroup_count_z = hal.interface.workgroup.count[2] : index |
| scf.for %arg0 = %workgroup_id_z to %c4 step %workgroup_count_z { |
| %3 = affine.apply affine_map<()[s0] -> (s0 * 32)>()[%workgroup_id_y] |
| %4 = affine.apply affine_map<()[s0] -> (s0 * 32)>()[%workgroup_count_y] |
| scf.for %arg1 = %3 to %c32 step %4 { |
| %5 = affine.apply affine_map<()[s0] -> (s0 * 32)>()[%workgroup_id_x] |
| %6 = affine.apply affine_map<()[s0] -> (s0 * 32)>()[%workgroup_count_x] |
| scf.for %arg2 = %5 to %c64 step %6 { |
| %7 = flow.dispatch.tensor.load %0, offsets = [%arg0, %arg1, 0], sizes = [%c1, %c32, 1024], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:4x32x1024xf32> -> tensor<?x?x1024xf32> |
| %8 = flow.dispatch.tensor.load %1, offsets = [%arg0, 0, %arg2], sizes = [%c1, 1024, %c32], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:4x1024x64xf32> -> tensor<?x1024x?xf32> |
| %9 = linalg.init_tensor [1, 32, 32] : tensor<1x32x32xf32> |
| %10 = linalg.fill(%9, %cst) {__internal_linalg_transform__ = "workgroup"} : tensor<1x32x32xf32>, f32 -> tensor<1x32x32xf32> |
| %11 = linalg.batch_matmul {__internal_linalg_transform__ = "workgroup", is_root_op} ins(%7, %8 : tensor<?x?x1024xf32>, tensor<?x1024x?xf32>) outs(%10 : tensor<1x32x32xf32>) -> tensor<1x32x32xf32> |
| %12 = tensor.cast %11 : tensor<1x32x32xf32> to tensor<?x?x?xf32> |
| flow.dispatch.tensor.store %12, %2, offsets = [%arg0, %arg1, %arg2], sizes = [%c1, %c32, %c32], strides = [1, 1, 1] : tensor<?x?x?xf32> -> !flow.dispatch.tensor<writeonly:4x32x64xf32> |
| } |
| } |
| } |
| return |
| } |
| |
| // CHECK-LABEL: func @cast_follwed_by_store() |
| // CHECK-DAG: %[[ZERO:.+]] = constant 0.000000e+00 : f32 |
| // CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan @io::@s0b0_ro_external[%c0] : memref<4x32x1024xf32> |
| // CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan @io::@s0b1_ro_external[%c0] : memref<4x1024x64xf32> |
| // CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan @io::@s0b2_xw_external[%c0] : memref<4x32x64xf32> |
| // CHECK: %[[LHSV:.+]] = memref.subview %[[LHS]] |
| // CHECK: %[[RHSV:.+]] = memref.subview %[[RHS]] |
| // CHECK: %[[RESULTV:.+]] = memref.subview %[[RESULT]] |
| // CHECK: linalg.fill(%[[RESULTV]], %[[ZERO]]) |
| // CHECK: linalg.batch_matmul {{.*}} ins(%[[LHSV]], %[[RHSV]] : {{.*}}) outs(%[[RESULTV]] |
| |
| // ----- |
| |
| func @rank_reduced_subtensor_insert() { |
| %c0 = constant 0 : index |
| %c1 = constant 1 : index |
| %c2 = constant 2 : index |
| %0 = hal.interface.binding.subspan @io::@arg0[%c0] : !flow.dispatch.tensor<readonly:?x?xf32> |
| %1 = hal.interface.binding.subspan @io::@ret0[%c0] : !flow.dispatch.tensor<readwrite:?x?x?xf32> |
| %2 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:?x?xf32> -> tensor<?x?xf32> |
| %3 = flow.dispatch.tensor.load %1, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readwrite:?x?x?xf32> -> tensor<?x?x?xf32> |
| %4 = memref.dim %3, %c1 : tensor<?x?x?xf32> |
| %5 = memref.dim %3, %c2 : tensor<?x?x?xf32> |
| %6 = subtensor_insert %2 into %3[0, 0, 0] [1, %4, %5] [1, 1, 1] : tensor<?x?xf32> into tensor<?x?x?xf32> |
| flow.dispatch.tensor.store %6, %1, offsets = [], sizes = [], strides = [] : tensor<?x?x?xf32> -> !flow.dispatch.tensor<readwrite:?x?x?xf32> |
| return |
| } |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Read|Write" |
| } |
| // CHECK-LABEL: func @rank_reduced_subtensor_insert() |
| // CHECK-DAG: %[[ARG:.+]] = hal.interface.binding.subspan @io::@arg0 |
| // CHECK-DAG: %[[RET:.+]] = hal.interface.binding.subspan @io::@ret0 |
| // CHECK: %[[SUBVIEW:.+]] = memref.subview %[[RET]] |
| // CHECK: linalg.copy(%[[ARG]], %[[SUBVIEW]]) |
| |
| // ----- |
| |
| #map0 = affine_map<(d0, d1, d2) -> (d0, d2)> |
| #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> |
| #map2 = affine_map<(d0, d1, d2) -> (d0, d1)> |
| func @bufferize_transfer_op() { |
| %c3 = constant 3 : index |
| %cst = constant 0.000000e+00 : f32 |
| %c0 = constant 0 : index |
| %c2 = constant 2 : index |
| %c1 = constant 1 : index |
| %0 = hal.interface.binding.subspan @io::@arg0[%c0] : !flow.dispatch.tensor<readonly:2x3xf32> |
| %1 = hal.interface.binding.subspan @io::@arg1[%c0] : !flow.dispatch.tensor<readonly:3x4xf32> |
| %2 = hal.interface.binding.subspan @io::@arg2[%c0] : !flow.dispatch.tensor<readonly:2x4xf32> |
| %3 = hal.interface.binding.subspan @io::@ret0[%c0] : !flow.dispatch.tensor<writeonly:2x4xf32> |
| %4 = flow.dispatch.tensor.load %0, offsets = [%c0, %c0], sizes = [%c1, %c3], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:2x3xf32> -> tensor<2x3xf32> |
| %5 = flow.dispatch.tensor.load %1, offsets = [%c0, %c0], sizes = [%c3, %c1], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:3x4xf32> -> tensor<3x1xf32> |
| %6 = flow.dispatch.tensor.load %2, offsets = [%c0, %c0], sizes = [%c1, %c1], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:2x4xf32> -> tensor<2x1xf32> |
| %7 = vector.transfer_read %4[%c0, %c0], %cst {in_bounds = [true, true]} : tensor<2x3xf32>, vector<1x1xf32> |
| %8 = vector.transfer_read %4[%c0, %c1], %cst {in_bounds = [true, true]} : tensor<2x3xf32>, vector<1x1xf32> |
| %9 = vector.transfer_read %4[%c0, %c2], %cst {in_bounds = [true, true]} : tensor<2x3xf32>, vector<1x1xf32> |
| %10 = vector.transfer_read %4[%c1, %c0], %cst {in_bounds = [true, true]} : tensor<2x3xf32>, vector<1x1xf32> |
| %11 = vector.transfer_read %4[%c1, %c1], %cst {in_bounds = [true, true]} : tensor<2x3xf32>, vector<1x1xf32> |
| %12 = vector.transfer_read %4[%c1, %c2], %cst {in_bounds = [true, true]} : tensor<2x3xf32>, vector<1x1xf32> |
| %13 = vector.transfer_read %5[%c0, %c0], %cst {in_bounds = [true, true]} : tensor<3x1xf32>, vector<1x1xf32> |
| %14 = vector.transfer_read %5[%c1, %c0], %cst {in_bounds = [true, true]} : tensor<3x1xf32>, vector<1x1xf32> |
| %15 = vector.transfer_read %5[%c2, %c0], %cst {in_bounds = [true, true]} : tensor<3x1xf32>, vector<1x1xf32> |
| %16 = vector.transfer_read %6[%c0, %c0], %cst {in_bounds = [true, true]} : tensor<2x1xf32>, vector<1x1xf32> |
| %17 = vector.transfer_read %6[%c1, %c0], %cst {in_bounds = [true, true]} : tensor<2x1xf32>, vector<1x1xf32> |
| %18 = vector.contract {indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]} %7, %13, %16 : vector<1x1xf32>, vector<1x1xf32> into vector<1x1xf32> |
| %19 = vector.contract {indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]} %8, %14, %18 : vector<1x1xf32>, vector<1x1xf32> into vector<1x1xf32> |
| %20 = vector.contract {indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]} %9, %15, %19 : vector<1x1xf32>, vector<1x1xf32> into vector<1x1xf32> |
| %21 = vector.contract {indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]} %10, %13, %17 : vector<1x1xf32>, vector<1x1xf32> into vector<1x1xf32> |
| %22 = vector.contract {indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]} %11, %14, %21 : vector<1x1xf32>, vector<1x1xf32> into vector<1x1xf32> |
| %23 = vector.contract {indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]} %12, %15, %22 : vector<1x1xf32>, vector<1x1xf32> into vector<1x1xf32> |
| %24 = vector.transfer_write %20, %6[%c0, %c0] {in_bounds = [true, true]} : vector<1x1xf32>, tensor<2x1xf32> |
| %25 = vector.transfer_write %23, %24[%c1, %c0] {in_bounds = [true, true]} : vector<1x1xf32>, tensor<2x1xf32> |
| flow.dispatch.tensor.store %25, %3, offsets = [%c0, %c0], sizes = [%c1, %c1], strides = [%c1, %c1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:2x4xf32> |
| return |
| } |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" |
| hal.interface.binding @arg2, set=0, binding=2, type="StorageBuffer", access="Read" |
| hal.interface.binding @ret0, set=0, binding=3, type="StorageBuffer", access="Write|Discard" |
| } |
| // CHECK-LABEL: func @bufferize_transfer_op() |
| // CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan @io::@arg0 |
| // CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan @io::@arg1 |
| // CHECK-DAG: %[[ARG2:.+]] = hal.interface.binding.subspan @io::@arg2 |
| // CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan @io::@ret0 |
| // CHECK-DAG: %[[ARG0V:.+]] = memref.subview %[[ARG0]] |
| // CHECK-DAG: %[[ARG1V:.+]] = memref.subview %[[ARG1]] |
| // CHECK-DAG: %[[ARG2V:.+]] = memref.subview %[[ARG2]] |
| // CHECK-COUNT-6: vector.transfer_read %[[ARG0V]] |
| // CHECK-COUNT-3: vector.transfer_read %[[ARG1V]] |
| // CHECK-COUNT-2: vector.transfer_read %[[ARG2V]] |
| // CHECK: %[[RET0V:.+]] = memref.subview %[[RET0]] |
| // CHECK: linalg.copy(%[[ARG2V]], %[[RET0V]]) |
| // CHECK: vector.transfer_write %{{.+}}, %[[RET0V]] |
| // CHECK: vector.transfer_write %{{.+}}, %[[RET0V]] |
| |
| // ----- |
| |
| #map0 = affine_map<(d0, d1, d2) -> (d0, d2)> |
| #map1 = affine_map<(d0, d1, d2) -> (d2, d1)> |
| #map2 = affine_map<(d0, d1, d2) -> (d0, d1)> |
| func @bufferize_transfer_op_inplace() { |
| %c3 = constant 3 : index |
| %cst = constant 0.000000e+00 : f32 |
| %c0 = constant 0 : index |
| %c2 = constant 2 : index |
| %c1 = constant 1 : index |
| %0 = hal.interface.binding.subspan @io::@arg0[%c0] : !flow.dispatch.tensor<readonly:2x3xf32> |
| %1 = hal.interface.binding.subspan @io::@arg1[%c0] : !flow.dispatch.tensor<readonly:3x4xf32> |
| %3 = hal.interface.binding.subspan @io::@ret0[%c0] : !flow.dispatch.tensor<readwrite:2x4xf32> |
| %4 = flow.dispatch.tensor.load %0, offsets = [%c0, %c0], sizes = [%c1, %c3], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:2x3xf32> -> tensor<2x3xf32> |
| %5 = flow.dispatch.tensor.load %1, offsets = [%c0, %c0], sizes = [%c3, %c1], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:3x4xf32> -> tensor<3x1xf32> |
| %6 = flow.dispatch.tensor.load %3, offsets = [%c0, %c0], sizes = [%c1, %c1], strides = [%c1, %c1] : !flow.dispatch.tensor<readwrite:2x4xf32> -> tensor<2x1xf32> |
| %7 = vector.transfer_read %4[%c0, %c0], %cst {in_bounds = [true, true]} : tensor<2x3xf32>, vector<1x1xf32> |
| %8 = vector.transfer_read %4[%c0, %c1], %cst {in_bounds = [true, true]} : tensor<2x3xf32>, vector<1x1xf32> |
| %9 = vector.transfer_read %4[%c0, %c2], %cst {in_bounds = [true, true]} : tensor<2x3xf32>, vector<1x1xf32> |
| %10 = vector.transfer_read %4[%c1, %c0], %cst {in_bounds = [true, true]} : tensor<2x3xf32>, vector<1x1xf32> |
| %11 = vector.transfer_read %4[%c1, %c1], %cst {in_bounds = [true, true]} : tensor<2x3xf32>, vector<1x1xf32> |
| %12 = vector.transfer_read %4[%c1, %c2], %cst {in_bounds = [true, true]} : tensor<2x3xf32>, vector<1x1xf32> |
| %13 = vector.transfer_read %5[%c0, %c0], %cst {in_bounds = [true, true]} : tensor<3x1xf32>, vector<1x1xf32> |
| %14 = vector.transfer_read %5[%c1, %c0], %cst {in_bounds = [true, true]} : tensor<3x1xf32>, vector<1x1xf32> |
| %15 = vector.transfer_read %5[%c2, %c0], %cst {in_bounds = [true, true]} : tensor<3x1xf32>, vector<1x1xf32> |
| %16 = vector.transfer_read %6[%c0, %c0], %cst {in_bounds = [true, true]} : tensor<2x1xf32>, vector<1x1xf32> |
| %17 = vector.transfer_read %6[%c1, %c0], %cst {in_bounds = [true, true]} : tensor<2x1xf32>, vector<1x1xf32> |
| %18 = vector.contract {indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]} %7, %13, %16 : vector<1x1xf32>, vector<1x1xf32> into vector<1x1xf32> |
| %19 = vector.contract {indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]} %8, %14, %18 : vector<1x1xf32>, vector<1x1xf32> into vector<1x1xf32> |
| %20 = vector.contract {indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]} %9, %15, %19 : vector<1x1xf32>, vector<1x1xf32> into vector<1x1xf32> |
| %21 = vector.contract {indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]} %10, %13, %17 : vector<1x1xf32>, vector<1x1xf32> into vector<1x1xf32> |
| %22 = vector.contract {indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]} %11, %14, %21 : vector<1x1xf32>, vector<1x1xf32> into vector<1x1xf32> |
| %23 = vector.contract {indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]} %12, %15, %22 : vector<1x1xf32>, vector<1x1xf32> into vector<1x1xf32> |
| %24 = vector.transfer_write %20, %6[%c0, %c0] {in_bounds = [true, true]} : vector<1x1xf32>, tensor<2x1xf32> |
| %25 = vector.transfer_write %23, %24[%c1, %c0] {in_bounds = [true, true]} : vector<1x1xf32>, tensor<2x1xf32> |
| flow.dispatch.tensor.store %25, %3, offsets = [%c0, %c0], sizes = [%c1, %c1], strides = [%c1, %c1] : tensor<2x1xf32> -> !flow.dispatch.tensor<readwrite:2x4xf32> |
| return |
| } |
| hal.interface @io attributes {sym_visibility = "private"} { |
| hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" |
| hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" |
| hal.interface.binding @ret0, set=0, binding=3, type="StorageBuffer", access="Write|Discard" |
| } |
| // CHECK-LABEL: func @bufferize_transfer_op_inplace() |
| // CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan @io::@arg0 |
| // CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan @io::@arg1 |
| // CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan @io::@ret0 |
| // CHECK-DAG: %[[ARG0V:.+]] = memref.subview %[[ARG0]] |
| // CHECK-DAG: %[[ARG1V:.+]] = memref.subview %[[ARG1]] |
| // CHECK-DAG: %[[RET0V:.+]] = memref.subview %[[RET0]] |
| // CHECK-COUNT-6: vector.transfer_read %[[ARG0V]] |
| // CHECK-COUNT-3: vector.transfer_read %[[ARG1V]] |
| // CHECK-COUNT-2: vector.transfer_read %[[RET0V]] |
| // CHECK-NOT: linalg.copy |
| // CHECK: vector.transfer_write %{{.+}}, %[[RET0V]] |
| // CHECK: vector.transfer_write %{{.+}}, %[[RET0V]] |