[GPU] Enable tensor.pack e2e tests for rocm backend. (#17587)
The SplitFullPartialTransferPass and WorkgroupSpecializationPass are no longer needed because we have
much more mature vector lowering. It was added long time ago.
Progress on https://github.com/iree-org/iree/issues/17186
---------
Signed-off-by: hanhanW <hanhan0912@gmail.com>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp
index 8a7132a..944d2aa 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp
@@ -811,23 +811,22 @@
void addGPUPackUnPackPasses(OpPassManager &funcPassManager) {
tileAndDistributeToWorkgroup(funcPassManager);
-
- funcPassManager.addPass(createCanonicalizerPass());
- funcPassManager.addPass(createWorkgroupSpecializationPass());
funcPassManager.addPass(createCanonicalizerPass());
funcPassManager.addPass(createCSEPass());
funcPassManager.addPass(createGPUTensorTilePass());
+ funcPassManager.addPass(createCanonicalizerPass());
+ funcPassManager.addPass(createCSEPass());
+
funcPassManager.addPass(
createDecomposePackUnPackOpsPass(/*tileOuterToOne=*/true));
+ funcPassManager.addPass(createCanonicalizerPass());
+ funcPassManager.addPass(createCSEPass());
addGPUVectorizationPasses(funcPassManager);
addBufferizePasses(funcPassManager);
- // distribute foreach threads
funcPassManager.addPass(createGPUDistributePass());
-
- funcPassManager.addPass(createSplitFullPartialTransferPass("linalg-copy"));
}
void addGPUSimpleDistributePassPipeline(OpPassManager &funcPassManager) {
diff --git a/tests/e2e/tensor_ops/CMakeLists.txt b/tests/e2e/tensor_ops/CMakeLists.txt
index 3a98ced..52b7932 100644
--- a/tests/e2e/tensor_ops/CMakeLists.txt
+++ b/tests/e2e/tensor_ops/CMakeLists.txt
@@ -93,3 +93,34 @@
)
### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
+
+if(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx")
+
+unset(IREE_HIP_TEST_COMPILER_FLAGS)
+list(APPEND IREE_HIP_TEST_COMPILER_FLAGS
+ "--iree-rocm-target-chip=${IREE_HIP_TEST_TARGET_CHIP}"
+)
+
+iree_check_single_backend_test_suite(
+ NAME
+ check_hip_stream
+ SRCS
+ "pack.mlir"
+ "pack_i8.mlir"
+ TARGET_BACKEND
+ "rocm"
+ DRIVER
+ "hip"
+ COMPILER_FLAGS
+ ${IREE_HIP_TEST_COMPILER_FLAGS}
+ RUNNER_ARGS
+ "--hip_use_streams=true"
+ LABELS
+ "noasan"
+ "nomsan"
+ "notsan"
+ "noubsan"
+ "requires-gpu-amd"
+)
+
+endif(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx")