Add e2e tests for linalg_ext.scan op (#8073)

This patch adds e2e tests for linalg_ext.scan.
The tests validate inclusive/exclusive, add and mul,
and 1d and 2d scans along different dimensions.
diff --git a/iree/test/e2e/linalg_ext_ops/BUILD b/iree/test/e2e/linalg_ext_ops/BUILD
index fc0b0e6..b631d71 100644
--- a/iree/test/e2e/linalg_ext_ops/BUILD
+++ b/iree/test/e2e/linalg_ext_ops/BUILD
@@ -19,6 +19,7 @@
         # keep sorted
         [
             "reverse.mlir",
+            "scan.mlir",
         ],
         include = ["*.mlir"],
         exclude = [
@@ -42,6 +43,7 @@
         # keep sorted
         [
             "reverse.mlir",
+            "scan.mlir",
         ],
         include = ["*.mlir"],
         exclude = [
@@ -57,6 +59,7 @@
         # keep sorted
         [
             "reverse.mlir",
+            "scan.mlir",
         ],
         include = ["*.mlir"],
         exclude = [
@@ -72,6 +75,7 @@
         # keep sorted
         [
             "reverse.mlir",
+            "scan.mlir",
         ],
         include = ["*.mlir"],
         exclude = [
diff --git a/iree/test/e2e/linalg_ext_ops/CMakeLists.txt b/iree/test/e2e/linalg_ext_ops/CMakeLists.txt
index 7d46dba..5473006 100644
--- a/iree/test/e2e/linalg_ext_ops/CMakeLists.txt
+++ b/iree/test/e2e/linalg_ext_ops/CMakeLists.txt
@@ -15,6 +15,7 @@
     check_cuda
   SRCS
     "reverse.mlir"
+    "scan.mlir"
   TARGET_BACKEND
     "cuda"
   DRIVER
@@ -32,6 +33,7 @@
     check_dylib-llvm-aot_dylib
   SRCS
     "reverse.mlir"
+    "scan.mlir"
   TARGET_BACKEND
     "dylib-llvm-aot"
   DRIVER
@@ -43,6 +45,7 @@
     check_vmvx_vmvx
   SRCS
     "reverse.mlir"
+    "scan.mlir"
   TARGET_BACKEND
     "vmvx"
   DRIVER
@@ -54,6 +57,7 @@
     check_vulkan-spirv_vulkan
   SRCS
     "reverse.mlir"
+    "scan.mlir"
   TARGET_BACKEND
     "vulkan-spirv"
   DRIVER
diff --git a/iree/test/e2e/linalg_ext_ops/scan.mlir b/iree/test/e2e/linalg_ext_ops/scan.mlir
new file mode 100644
index 0000000..a63b577
--- /dev/null
+++ b/iree/test/e2e/linalg_ext_ops/scan.mlir
@@ -0,0 +1,111 @@
+func @scan_1d_dim0_inclusive_sum() {
+  %input = util.unfoldable_constant dense<[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]> : tensor<6xf32>
+
+  %init = linalg.init_tensor [6] : tensor<6xf32>
+  %c0 = arith.constant 0.0 : f32
+  %0 = iree_linalg_ext.scan
+         dimension(0) inclusive(true)
+         ins(%input, %c0 : tensor<6xf32>, f32)
+         outs(%init : tensor<6xf32>) {
+           ^bb0(%arg0 : f32, %arg1 : f32):
+             %sum = arith.addf %arg0, %arg1 : f32
+             iree_linalg_ext.yield %sum : f32
+         } -> tensor<6xf32>
+
+  check.expect_almost_eq_const(
+      %0,
+      dense<[1.0, 3.0, 6.0, 10.0, 15.0, 21.0]> : tensor<6xf32>
+  ) : tensor<6xf32>
+
+  return
+}
+
+func @scan_1d_dim0_exclusive_sum() {
+  %input = util.unfoldable_constant dense<[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]> : tensor<6xf32>
+
+  %init = linalg.init_tensor [6] : tensor<6xf32>
+  %c0 = arith.constant 0.0 : f32
+  %0 = iree_linalg_ext.scan
+         dimension(0) inclusive(false)
+         ins(%input, %c0 : tensor<6xf32>, f32)
+         outs(%init : tensor<6xf32>) {
+           ^bb0(%arg0 : f32, %arg1 : f32):
+             %sum = arith.addf %arg0, %arg1 : f32
+             iree_linalg_ext.yield %sum : f32
+         } -> tensor<6xf32>
+
+  check.expect_almost_eq_const(
+      %0,
+      dense<[0.0, 1.0, 3.0, 6.0, 10.0, 15.0]> : tensor<6xf32>
+  ) : tensor<6xf32>
+
+  return
+}
+
+func @scan_1d_dim0_inclusive_mul() {
+  %input = util.unfoldable_constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi32>
+
+  %init = linalg.init_tensor [6] : tensor<6xi32>
+  %c0 = arith.constant 1 : i32
+  %0 = iree_linalg_ext.scan
+         dimension(0) inclusive(true)
+         ins(%input, %c0 : tensor<6xi32>, i32)
+         outs(%init : tensor<6xi32>) {
+           ^bb0(%arg0 : i32, %arg1 : i32):
+             %sum = arith.muli %arg0, %arg1 : i32
+             iree_linalg_ext.yield %sum : i32
+         } -> tensor<6xi32>
+
+  check.expect_eq_const(
+      %0,
+      dense<[1, 2, 6, 24, 120, 720]> : tensor<6xi32>
+  ) : tensor<6xi32>
+
+  return
+}
+
+func @scan_2d_dim0_inclusive_sum() {
+  %input = util.unfoldable_constant dense<[[1, 2, 3],
+                                           [4, 5, 6]]> : tensor<2x3xi32>
+
+  %init = linalg.init_tensor [2, 3] : tensor<2x3xi32>
+  %c0 = arith.constant 0 : i32
+  %0 = iree_linalg_ext.scan
+         dimension(0) inclusive(true)
+         ins(%input, %c0 : tensor<2x3xi32>, i32)
+         outs(%init : tensor<2x3xi32>) {
+           ^bb0(%arg0 : i32, %arg1 : i32):
+             %sum = arith.addi %arg0, %arg1 : i32
+             iree_linalg_ext.yield %sum : i32
+         } -> tensor<2x3xi32>
+
+  check.expect_eq_const(
+      %0,
+      dense<[[1, 2, 3], [5, 7, 9]]> : tensor<2x3xi32>
+  ) : tensor<2x3xi32>
+
+  return
+}
+
+func @scan_2d_dim1_inclusive_sum() {
+  %input = util.unfoldable_constant dense<[[1, 2, 3],
+                                           [4, 5, 6]]> : tensor<2x3xi32>
+
+  %init = linalg.init_tensor [2, 3] : tensor<2x3xi32>
+  %c0 = arith.constant 0 : i32
+  %0 = iree_linalg_ext.scan
+         dimension(1) inclusive(true)
+         ins(%input, %c0 : tensor<2x3xi32>, i32)
+         outs(%init : tensor<2x3xi32>) {
+           ^bb0(%arg0 : i32, %arg1 : i32):
+             %sum = arith.addi %arg0, %arg1 : i32
+             iree_linalg_ext.yield %sum : i32
+         } -> tensor<2x3xi32>
+
+  check.expect_eq_const(
+      %0,
+      dense<[[1, 3, 6], [4, 9, 15]]> : tensor<2x3xi32>
+  ) : tensor<2x3xi32>
+
+  return
+}