Add CPU matmul benchmark test (#10174)

This test illustrates how a simple matmul example can be compiled with
the transform dialect and then benchmarked. Parameter search will use
the commands that are used in this test.
diff --git a/tests/transform_dialect/cpu/BUILD b/tests/transform_dialect/cpu/BUILD
new file mode 100644
index 0000000..7ea3ee5
--- /dev/null
+++ b/tests/transform_dialect/cpu/BUILD
@@ -0,0 +1,36 @@
+# Copyright 2022 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# Tests for end-to-end IREE support of entire models or their close derivatives.
+
+load("//build_tools/bazel:iree_lit_test.bzl", "iree_lit_test_suite")
+
+package(
+    features = ["layering_check"],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+iree_lit_test_suite(
+    name = "lit",
+    srcs = ["matmul.mlir"],
+    cfg = "//tests:lit.cfg.py",
+    # transform dialect spec files are MLIR files that specify a transformation,
+    # they need to be included as data.
+    data = [
+        "matmul_dispatch_spec.mlir",
+    ],
+    tags = [
+        "noasan",
+        "nomsan",
+        "notsan",
+        "noubsan",
+    ],
+    tools = [
+        "//tools:iree-benchmark-module",
+        "//tools:iree-compile",
+        "@llvm-project//llvm:FileCheck",
+    ],
+)
diff --git a/tests/transform_dialect/cpu/CMakeLists.txt b/tests/transform_dialect/cpu/CMakeLists.txt
new file mode 100644
index 0000000..93207ff
--- /dev/null
+++ b/tests/transform_dialect/cpu/CMakeLists.txt
@@ -0,0 +1,31 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from           #
+# tests/transform_dialect/cpu/BUILD                                            #
+#                                                                              #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary   #
+# CMake-only content.                                                          #
+#                                                                              #
+# To disable autogeneration for this file entirely, delete this header.        #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_lit_test_suite(
+  NAME
+    lit
+  SRCS
+    "matmul.mlir"
+  TOOLS
+    FileCheck
+    iree-benchmark-module
+    iree-compile
+  DATA
+    matmul_dispatch_spec.mlir
+  LABELS
+    "noasan"
+    "nomsan"
+    "notsan"
+    "noubsan"
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/tests/transform_dialect/cpu/matmul.mlir b/tests/transform_dialect/cpu/matmul.mlir
new file mode 100644
index 0000000..cefaa91
--- /dev/null
+++ b/tests/transform_dialect/cpu/matmul.mlir
@@ -0,0 +1,17 @@
+// RUN: iree-compile --iree-hal-target-backends=llvm-cpu \
+// RUN:     --iree-flow-dispatch-use-transform-dialect=%p/matmul_dispatch_spec.mlir \
+// RUN:     --iree-flow-export-benchmark-funcs %s | \
+// RUN: iree-benchmark-module --device=local-task | \
+// RUN: FileCheck %s
+
+!A_size = tensor<50x100xf32>
+!B_size = tensor<100x50xf32>
+!C_size = tensor<50x50xf32>
+
+// CHECK: tile_matmul_with_constant
+func.func @tile_matmul_with_constant(
+    %A : !A_size, %B : !B_size, %C : !C_size) -> !C_size {
+  %0 = linalg.matmul ins(%A, %B : !A_size, !B_size)
+                     outs(%C : !C_size) -> !C_size
+  return %0 : !C_size
+}
diff --git a/tests/transform_dialect/cpu/matmul_dispatch_spec.mlir b/tests/transform_dialect/cpu/matmul_dispatch_spec.mlir
new file mode 100644
index 0000000..0c33097
--- /dev/null
+++ b/tests/transform_dialect/cpu/matmul_dispatch_spec.mlir
@@ -0,0 +1,9 @@
+transform.with_pdl_patterns {
+^bb0(%arg0: !pdl.operation):
+  transform.structured.canonicalized_sequence %arg0 {
+  ^bb1(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1
+    %foreach_op, %tiled_op = transform.structured.tile_to_foreach_thread_op %0 num_threads [10, 20]
+    %dispatch_op = transform.iree.foreach_thread_to_flow %foreach_op
+  }
+}