[Transform] Drop transform stub test file (#11669)

This has been moved to the iree-samples repository where it is a better
fit.
diff --git a/tests/transform_dialect/cuda/benchmark_linalg_reductions.stub.mlir b/tests/transform_dialect/cuda/benchmark_linalg_reductions.stub.mlir
deleted file mode 100644
index e4d0df6..0000000
--- a/tests/transform_dialect/cuda/benchmark_linalg_reductions.stub.mlir
+++ /dev/null
@@ -1,131 +0,0 @@
-// Example usages:
-//
-// With the IREE pipeline:
-//
-// cat tests/transform_dialect/cuda/benchmark_linalg_reductions.stub.mlir | \
-// sed "s/\${SZ1}/1024/g" | \
-// sed "s/\${SZ2}/1024/g" | \
-// iree-compile - --iree-hal-target-backends=cuda --iree-hal-benchmark-dispatch-repeat-count=5 | \
-// nvprof --print-gpu-trace iree-run-module --entry_function=reduction_2d_static --device=cuda --function_input="1024x1024xf32=1" 2>&1 | \
-// grep reduction
-//
-// With the transform dialect:
-//
-// cat tests/transform_dialect/cuda/benchmark_linalg_reductions.stub.mlir | \
-// sed "s/\${SZ1}/1024/g" | \
-// sed "s/\${SZ2}/1024/g" | \
-// iree-compile - --iree-hal-target-backends=cuda --iree-codegen-llvmgpu-enable-transform-dialect-jit --iree-hal-benchmark-dispatch-repeat-count=5 | \
-// nvprof --print-gpu-trace iree-run-module --entry_function=reduction_2d_static --device=cuda --function_input="1024x1024xf32=1" 2>&1 | \
-// grep reduction
-
-!in_tensor_reduction_2d_static_t = tensor<${SZ1}x${SZ2}xf32>
-!out_tensor_reduction_2d_static_t = tensor<${SZ1}xf32>
-
-func.func @reduction_2d_static(%arg : !in_tensor_reduction_2d_static_t) -> (!out_tensor_reduction_2d_static_t) {
-  %c0 = arith.constant 0 : index
-  %cst = arith.constant -0.000000e+00 : f32
-  
-  %d0 = tensor.dim %arg, %c0 : !in_tensor_reduction_2d_static_t
-  %0 = tensor.empty() : !out_tensor_reduction_2d_static_t
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : !out_tensor_reduction_2d_static_t) ->   !out_tensor_reduction_2d_static_t
-  %2 = linalg.generic {
-    indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
-                     affine_map<(d0, d1) -> (d0)>],
-    iterator_types = ["parallel", "reduction"]}
-    ins(%arg : !in_tensor_reduction_2d_static_t) outs(%1 : !out_tensor_reduction_2d_static_t) {
-      ^bb0(%arg3: f32, %arg4: f32):
-        %3 = arith.addf %arg3, %arg4 : f32
-        linalg.yield %3 : f32
-      } -> !out_tensor_reduction_2d_static_t
-  return %2 : !out_tensor_reduction_2d_static_t
-}
-
-func.func @reduction_2d_elementwise_static(%arg : !in_tensor_reduction_2d_static_t) -> (!in_tensor_reduction_2d_static_t) {
-  %c0 = arith.constant 0 : index
-  %c1 = arith.constant 1 : index
-  %cst = arith.constant -0.000000e+00 : f32
-  
-  %d0 = tensor.dim %arg, %c0 : !in_tensor_reduction_2d_static_t
-  %d1 = tensor.dim %arg, %c1 : !in_tensor_reduction_2d_static_t
-  %0 = tensor.empty() : !out_tensor_reduction_2d_static_t
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : !out_tensor_reduction_2d_static_t) ->   !out_tensor_reduction_2d_static_t
-  %2 = linalg.generic {
-    indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
-                     affine_map<(d0, d1) -> (d0)>],
-    iterator_types = ["parallel", "reduction"]}
-    ins(%arg : !in_tensor_reduction_2d_static_t) outs(%1 : !out_tensor_reduction_2d_static_t) {
-      ^bb0(%arg3: f32, %arg4: f32):
-        %3 = arith.addf %arg3, %arg4 : f32
-        linalg.yield %3 : f32
-      } -> !out_tensor_reduction_2d_static_t
-
-  %cst_0 = arith.constant 3.840000e+02 : f32
-  %i = tensor.empty() : !in_tensor_reduction_2d_static_t
-  %3 = linalg.generic {
-    indexing_maps = [affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d0, d1)>],
-    iterator_types = ["parallel", "parallel"]}
-    ins(%2 : !out_tensor_reduction_2d_static_t) outs(%i : !in_tensor_reduction_2d_static_t) {
-      ^bb0(%arg0: f32, %arg1: f32):
-        %12 = arith.divf %arg0, %cst_0 : f32
-        linalg.yield %12 : f32
-      } -> !in_tensor_reduction_2d_static_t
-
-  return %3 : !in_tensor_reduction_2d_static_t
-}
-
-!in_tensor_reduction_2d_dynamic_t = tensor<?x?xf32>
-!out_tensor_reduction_2d_dynamic_t = tensor<?xf32>
-
-func.func @reduction_2d_dynamic(%arg : !in_tensor_reduction_2d_dynamic_t) -> (!out_tensor_reduction_2d_dynamic_t) {
-  %c0 = arith.constant 0 : index
-  %c1 = arith.constant 0 : index
-  %cst = arith.constant -0.000000e+00 : f32
-  
-  %d0 = tensor.dim %arg, %c0 : !in_tensor_reduction_2d_dynamic_t
-  %d1 = tensor.dim %arg, %c1 : !in_tensor_reduction_2d_dynamic_t
-  %0 = tensor.empty(%d0) : !out_tensor_reduction_2d_dynamic_t
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : !out_tensor_reduction_2d_dynamic_t) ->   !out_tensor_reduction_2d_dynamic_t
-  %2 = linalg.generic {
-    indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
-                     affine_map<(d0, d1) -> (d0)>],
-    iterator_types = ["parallel", "reduction"]}
-    ins(%arg : !in_tensor_reduction_2d_dynamic_t) outs(%1 : !out_tensor_reduction_2d_dynamic_t) {
-      ^bb0(%arg3: f32, %arg4: f32):
-        %3 = arith.addf %arg3, %arg4 : f32
-        linalg.yield %3 : f32
-      } -> !out_tensor_reduction_2d_dynamic_t
-  return %2 : !out_tensor_reduction_2d_dynamic_t
-}
-
-func.func @reduction_2d_elementwise_dynamic(%arg : !in_tensor_reduction_2d_dynamic_t) -> (!in_tensor_reduction_2d_dynamic_t) {
-  %c0 = arith.constant 0 : index
-  %c1 = arith.constant 1 : index
-  %cst = arith.constant -0.000000e+00 : f32
-  
-  %d0 = tensor.dim %arg, %c0 : !in_tensor_reduction_2d_dynamic_t
-  %d1 = tensor.dim %arg, %c1 : !in_tensor_reduction_2d_dynamic_t
-  %0 = tensor.empty(%d0) : !out_tensor_reduction_2d_dynamic_t
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : !out_tensor_reduction_2d_dynamic_t) ->   !out_tensor_reduction_2d_dynamic_t
-  %2 = linalg.generic {
-    indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
-                     affine_map<(d0, d1) -> (d0)>],
-    iterator_types = ["parallel", "reduction"]}
-    ins(%arg : !in_tensor_reduction_2d_dynamic_t) outs(%1 : !out_tensor_reduction_2d_dynamic_t) {
-      ^bb0(%arg3: f32, %arg4: f32):
-        %3 = arith.addf %arg3, %arg4 : f32
-        linalg.yield %3 : f32
-      } -> !out_tensor_reduction_2d_dynamic_t
-
-  %cst_0 = arith.constant 3.840000e+02 : f32
-  %i = tensor.empty(%d0, %d1) : !in_tensor_reduction_2d_dynamic_t
-  %3 = linalg.generic {
-    indexing_maps = [affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d0, d1)>],
-    iterator_types = ["parallel", "parallel"]}
-    ins(%2 : !out_tensor_reduction_2d_dynamic_t) outs(%i : !in_tensor_reduction_2d_dynamic_t) {
-      ^bb0(%arg0: f32, %arg1: f32):
-        %12 = arith.divf %arg0, %cst_0 : f32
-        linalg.yield %12 : f32
-      } -> !in_tensor_reduction_2d_dynamic_t
-
-  return %3 : !in_tensor_reduction_2d_dynamic_t
-}