blob: 641c7f73f40eebba05b2e822cae7b97443f4de3b [file] [log] [blame]
!A_size = tensor<3x5xf32>
!B_size = tensor<5x3xf32>
!C_size = tensor<3x3xf32>
func.func @matmul_static(
%A : !A_size, %B : !B_size, %C : !C_size) -> !C_size {
%0 = linalg.matmul ins(%A, %B : !A_size, !B_size)
outs(%C : !C_size) -> !C_size
return %0 : !C_size
}
// RUN: iree-opt %s --iree-hal-target-backends=llvm-cpu \
// RUN: --iree-abi-transformation-pipeline \
// RUN: --iree-flow-transformation-pipeline \
// RUN: --iree-flow-dispatch-use-transform-dialect=%p/matmul_dispatch_spec.mlir | \
// RUN: FileCheck %s --check-prefixes=DISPATCH
// TODO: make this test drop transform dialect usage at the flow level and use:
// --iree-flow-transformation-pipeline --iree-flow-convert-region-to-workgroups
// Atm the 3rd flow.dispatch.tensor.load shows as readonly instead of readwrite.
// DISPATCH: flow.executable private @matmul_static_dispatch_0 {
// DISPATCH: flow.executable.export public @matmul_static_dispatch_0_matmul_3x3x5
// DISPATCH: builtin.module {
// DISPATCH: func.func @matmul_static_dispatch_0_matmul_3x3x5
// DISPATCH: flow.dispatch.tensor.load {{.*}}, offsets = [0, 0], sizes = [3, 5], strides = [1, 1] : !flow.dispatch.tensor<readonly:3x5xf32> -> tensor<3x5xf32>
// DISPATCH: flow.dispatch.tensor.load {{.*}}, offsets = [0, 0], sizes = [5, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:5x3xf32> -> tensor<5x3xf32>
// DISPATCH: flow.dispatch.tensor.load {{.*}}, offsets = [0, 0], sizes = [3, 3], strides = [1, 1] : !flow.dispatch.tensor<readwrite:3x3xf32> -> tensor<3x3xf32>
// DISPATCH: linalg.matmul ins({{.*}} : tensor<3x5xf32>, tensor<5x3xf32>) outs({{.*}} : tensor<3x3xf32>) -> tensor<3x3xf32>
// DISPATCH: flow.dispatch.tensor.store {{.*}} offsets = [0, 0], sizes = [3, 3], strides = [1, 1] : tensor<3x3xf32> -> !flow.dispatch.tensor<readwrite:3x3xf32>
// DISPATCH: return
// RUN: iree-opt %s --iree-hal-target-backends=llvm-cpu \
// RUN: --iree-abi-transformation-pipeline \
// RUN: --iree-flow-transformation-pipeline \
// RUN: --iree-flow-dispatch-use-transform-dialect=%p/matmul_dispatch_spec.mlir \
// RUN: --iree-stream-transformation-pipeline \
// RUN: --iree-hal-configuration-pipeline | \
// RUN: iree-opt --pass-pipeline='hal.executable(hal.executable.variant(iree-llvmcpu-lower-executable-target))' \
// RUN: --iree-codegen-llvmcpu-use-transform-dialect=%p/matmul_codegen_spec.mlir | \
// RUN: FileCheck %s --check-prefixes=CODEGEN
// CODEGEN: hal.executable private @matmul_static_dispatch_0 {
// CODEGEN: hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
//
// The signature of the hal.executable.export region is subject to conventions
// at the flow level. These conventions are materialized in IR e.g. into
// stream.cmd.dispatch before codegen gets invoked.
// As a consequence, the tile_size/num_threads/workgroup_count passed to
// transform.tile_to_foreach_thread needs to be aware of this convention.
// For now we use our own convention that sizes are static and no other bbArg
// than !hal.device is present.
//
// CODEGEN: hal.executable.export public @matmul_static_dispatch_0_matmul_3x3x5 ordinal(0) layout(#{{.*}}) attributes {translation_info = #translation} {
// CODEGEN: ^bb0(%{{.*}}: !hal.device):
// CODEGEN: arith.constant 2 : index
// CODEGEN: arith.constant 1 : index
// CODEGEN: hal.return %{{.*}}, %{{.*}}, %{{.*}} : index, index, index
// CODEGEN: }
// CODEGEN: builtin.module {
// CODEGEN: func.func @matmul_static_dispatch_0_matmul_3x3x5() {
// CODEGEN: arith.constant 0 : index
// CODEGEN: hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset({{.*}}) alignment(64) : memref<3x5xf32>
// CODEGEN: memref.assume_alignment %{{.*}}, 64 : memref<3x5xf32>
// CODEGEN: hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset({{.*}}) alignment(64) : memref<5x3xf32>
// CODEGEN: memref.assume_alignment %{{.*}}, 64 : memref<5x3xf32>
// CODEGEN: hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset({{.*}}) alignment(64) : memref<3x3xf32>
// CODEGEN: memref.assume_alignment %{{.*}}, 64 : memref<3x3xf32>
// CODEGEN: %[[workgroup_id_x:.*]] = hal.interface.workgroup.id[0] : index
// CODEGEN: affine.apply {{.*}}()[%workgroup_id_x]
// CODEGEN: memref.subview %{{.*}}[%{{.*}}, 0] [%{{.*}}, 5] [1, 1] : memref<3x5xf32> to memref<?x5xf32, #{{.*}}>
// CODEGEN: memref.subview %{{.*}}[%{{.*}}, 0] [%{{.*}}, 3] [1, 1] : memref<3x3xf32> to memref<?x3xf32, #{{.*}}>
// CODEGEN: linalg.matmul ins(%{{.*}}, %{{.*}} : memref<?x5xf32, #map3>, memref<5x3xf32>) outs(%{{.*}} : memref<?x3xf32, #{{.*}}>)
// RUN: iree-compile %s --iree-hal-target-backends=llvm-cpu \
// RUN: --iree-flow-dispatch-use-transform-dialect=%p/matmul_dispatch_spec.mlir \
// RUN: --iree-codegen-llvmcpu-use-transform-dialect=%p/matmul_codegen_spec.mlir | \
// RUN: iree-run-module --entry_function=matmul_static \
// RUN: --function_input="3x5xf32=1 1 1 1 1 1 1 1 1 1 1 1 1 1 1" \
// RUN: --function_input="5x3xf32=1 1 1 1 1 1 1 1 1 1 1 1 1 1 1" \
// RUN: --function_input="3x3xf32=0 0 0 0 0 0 0 0 0"| \
// RUN: FileCheck %s --check-prefixes=EXEC
// EXEC: 3x3xf32=[5 5 5][5 5 5][5 5 5]
// RUN: iree-compile --iree-hal-target-backends=llvm-cpu \
// RUN: --iree-flow-dispatch-use-transform-dialect=%p/matmul_tiled_dispatch_spec.mlir \
// RUN: --iree-flow-export-benchmark-funcs %s | \
// RUN: iree-benchmark-module --device=local-task | \
// RUN: FileCheck %s --check-prefixes=BENCHMARK-MODULE
// When running iree-benchmark-module, we only check the existence of the func.
// BENCHMARK-MODULE: matmul_static