[Test] Add onnx_ops test suites with O2/O3 optimization level. (#21838)
It helps capture failures earlier because users typically use O2/O3 for
model performance. The main difference is that the aggressive fusion is
enabled starting from O2.
---------
Signed-off-by: hanhanW <hanhan0912@gmail.com>
diff --git a/.github/workflows/pkgci_test_onnx.yml b/.github/workflows/pkgci_test_onnx.yml
index 6df31d1..25493d6 100644
--- a/.github/workflows/pkgci_test_onnx.yml
+++ b/.github/workflows/pkgci_test_onnx.yml
@@ -26,33 +26,41 @@
matrix:
include:
# CPU
- - name: cpu_llvm_sync
- config-file: onnx_ops_cpu_llvm_sync.json
+ - name: cpu_llvm_sync_O0
+ config-file: onnx_ops_cpu_llvm_sync_O0.json
+ numprocesses: auto
+ runs-on: ubuntu-24.04
+ - name: cpu_llvm_sync_O2
+ config-file: onnx_ops_cpu_llvm_sync_O2.json
numprocesses: auto
runs-on: ubuntu-24.04
# AMD GPU
- - name: amdgpu_hip_rdna3
+ - name: amdgpu_hip_rdna3_O0
numprocesses: 1
- config-file: onnx_ops_gpu_hip_rdna3.json
+ config-file: onnx_ops_gpu_hip_rdna3_O0.json
runs-on: nodai-amdgpu-w7900-x86-64
- - name: amdgpu_vulkan
+ - name: amdgpu_hip_rdna3_O3
+ numprocesses: 1
+ config-file: onnx_ops_gpu_hip_rdna3_O3.json
+ runs-on: nodai-amdgpu-w7900-x86-64
+ - name: amdgpu_vulkan_O0
numprocesses: 4
- config-file: onnx_ops_gpu_vulkan.json
+ config-file: onnx_ops_gpu_vulkan_O0.json
runs-on: nodai-amdgpu-w7900-x86-64
# NVIDIA GPU
# TODO(#18238): migrate to new runner cluster
- # - name: nvidiagpu_cuda
- # config-file: onnx_ops_gpu_cuda.json
+ # - name: nvidiagpu_cuda_O0
+ # config-file: onnx_ops_gpu_cuda_O0.json
# numprocesses: 4
# runs-on:
# - self-hosted # must come first
# - environment=prod
# - gpu # TODO(scotttodd): qualify further with vendor/model
# - os-family=Linux
- # - name: nvidiagpu_vulkan
- # config-file: onnx_ops_gpu_vulkan.json
+ # - name: nvidiagpu_vulkan_O0
+ # config-file: onnx_ops_gpu_vulkan_O0.json
# numprocesses: 4
# runs-on:
# - self-hosted # must come first
diff --git a/tests/external/iree-test-suites/onnx_ops/onnx_ops_cpu_llvm_sync.json b/tests/external/iree-test-suites/onnx_ops/onnx_ops_cpu_llvm_sync_O0.json
similarity index 99%
rename from tests/external/iree-test-suites/onnx_ops/onnx_ops_cpu_llvm_sync.json
rename to tests/external/iree-test-suites/onnx_ops/onnx_ops_cpu_llvm_sync_O0.json
index 58a32c3..e10fbc7 100644
--- a/tests/external/iree-test-suites/onnx_ops/onnx_ops_cpu_llvm_sync.json
+++ b/tests/external/iree-test-suites/onnx_ops/onnx_ops_cpu_llvm_sync_O0.json
@@ -3,7 +3,8 @@
"iree_compile_flags": [
"--iree-hal-target-device=local",
"--iree-hal-local-target-device-backends=llvm-cpu",
- "--iree-input-demote-f64-to-f32=false"
+ "--iree-input-demote-f64-to-f32=false",
+ "--iree-opt-level=O0"
],
"iree_run_module_flags": [
"--device=local-sync"
diff --git a/tests/external/iree-test-suites/onnx_ops/onnx_ops_cpu_llvm_sync.json b/tests/external/iree-test-suites/onnx_ops/onnx_ops_cpu_llvm_sync_O2.json
similarity index 91%
copy from tests/external/iree-test-suites/onnx_ops/onnx_ops_cpu_llvm_sync.json
copy to tests/external/iree-test-suites/onnx_ops/onnx_ops_cpu_llvm_sync_O2.json
index 58a32c3..d5d18d8 100644
--- a/tests/external/iree-test-suites/onnx_ops/onnx_ops_cpu_llvm_sync.json
+++ b/tests/external/iree-test-suites/onnx_ops/onnx_ops_cpu_llvm_sync_O2.json
@@ -3,7 +3,8 @@
"iree_compile_flags": [
"--iree-hal-target-device=local",
"--iree-hal-local-target-device-backends=llvm-cpu",
- "--iree-input-demote-f64-to-f32=false"
+ "--iree-input-demote-f64-to-f32=false",
+ "--iree-opt-level=O2"
],
"iree_run_module_flags": [
"--device=local-sync"
@@ -81,6 +82,25 @@
"onnx/node/generated/test_image_decoder_decode_pnm_rgb",
"onnx/node/generated/test_image_decoder_decode_tiff_rgb",
"onnx/node/generated/test_image_decoder_decode_webp_rgb",
+ "onnx/node/generated/test_layer_normalization_2d_axis1",
+ "onnx/node/generated/test_layer_normalization_2d_axis_negative_1",
+ "onnx/node/generated/test_layer_normalization_3d_axis1_epsilon",
+ "onnx/node/generated/test_layer_normalization_3d_axis2_epsilon",
+ "onnx/node/generated/test_layer_normalization_3d_axis_negative_1_epsilon",
+ "onnx/node/generated/test_layer_normalization_3d_axis_negative_2_epsilon",
+ "onnx/node/generated/test_layer_normalization_4d_axis1",
+ "onnx/node/generated/test_layer_normalization_4d_axis2",
+ "onnx/node/generated/test_layer_normalization_4d_axis3",
+ "onnx/node/generated/test_layer_normalization_4d_axis_negative_1",
+ "onnx/node/generated/test_layer_normalization_4d_axis_negative_2",
+ "onnx/node/generated/test_layer_normalization_4d_axis_negative_3",
+ "onnx/node/generated/test_layer_normalization_default_axis",
+ "onnx/node/generated/test_logsoftmax_axis_0",
+ "onnx/node/generated/test_logsoftmax_axis_0_expanded",
+ "onnx/node/generated/test_logsoftmax_axis_0_expanded_ver18",
+ "onnx/node/generated/test_logsoftmax_axis_1",
+ "onnx/node/generated/test_logsoftmax_axis_1_expanded",
+ "onnx/node/generated/test_logsoftmax_axis_1_expanded_ver18",
"onnx/node/generated/test_loop11",
"onnx/node/generated/test_lppool_2d_dilations",
"onnx/node/generated/test_lppool_2d_same_lower",
@@ -106,6 +126,7 @@
"onnx/node/generated/test_nllloss_NCd1d2_with_weight_reduction_sum_ii",
"onnx/node/generated/test_nllloss_NCd1d2d3_none_no_weight_negative_ii",
"onnx/node/generated/test_nllloss_NCd1d2d3d4d5_mean_weight",
+ "onnx/node/generated/test_nllloss_NCd1d2d3d4d5_mean_weight_expanded",
"onnx/node/generated/test_nllloss_NCd1d2d3d4d5_none_no_weight",
"onnx/node/generated/test_nonmaxsuppression_two_classes",
"onnx/node/generated/test_nonzero_example",
@@ -292,6 +313,12 @@
"onnx/node/generated/test_slice_neg_steps",
"onnx/node/generated/test_slice_negative_axes",
"onnx/node/generated/test_slice_start_out_of_bounds",
+ "onnx/node/generated/test_softmax_axis_0",
+ "onnx/node/generated/test_softmax_axis_0_expanded",
+ "onnx/node/generated/test_softmax_axis_0_expanded_ver18",
+ "onnx/node/generated/test_softmax_axis_1",
+ "onnx/node/generated/test_softmax_axis_1_expanded",
+ "onnx/node/generated/test_softmax_axis_1_expanded_ver18",
"onnx/node/generated/test_stft",
"onnx/node/generated/test_training_dropout",
"onnx/node/generated/test_training_dropout_default",
@@ -327,8 +354,12 @@
"onnx/node/generated/test_gridsample_nearest_align_corners_0_additional_1",
"onnx/node/generated/test_gridsample_nearest_align_corners_1_additional_1",
"onnx/node/generated/test_lstm_with_peepholes",
+ "onnx/node/generated/test_nonmaxsuppression_center_point_box_format",
"onnx/node/generated/test_nonmaxsuppression_flipped_coordinates",
+ "onnx/node/generated/test_nonmaxsuppression_identical_boxes",
+ "onnx/node/generated/test_nonmaxsuppression_limit_output_size",
"onnx/node/generated/test_nonmaxsuppression_single_box",
+ "onnx/node/generated/test_nonmaxsuppression_suppress_by_IOU",
"onnx/node/generated/test_nonmaxsuppression_suppress_by_IOU_and_scores",
"onnx/node/generated/test_pow",
"onnx/node/generated/test_pow_example",
diff --git a/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_cuda.json b/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_cuda_O0.json
similarity index 99%
rename from tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_cuda.json
rename to tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_cuda_O0.json
index dba0c5b..758590f 100644
--- a/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_cuda.json
+++ b/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_cuda_O0.json
@@ -2,7 +2,8 @@
"config_name": "gpu_cuda_t4",
"iree_compile_flags": [
"--iree-hal-target-device=cuda",
- "--iree-input-demote-f64-to-f32=false"
+ "--iree-input-demote-f64-to-f32=false",
+ "--iree-opt-level=O0"
],
"iree_run_module_flags": [
"--device=cuda"
diff --git a/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_hip_rdna3.json b/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_hip_rdna3_O0.json
similarity index 99%
rename from tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_hip_rdna3.json
rename to tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_hip_rdna3_O0.json
index 1ee7bfd..7780cc4 100644
--- a/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_hip_rdna3.json
+++ b/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_hip_rdna3_O0.json
@@ -3,7 +3,8 @@
"iree_compile_flags": [
"--iree-hal-target-device=hip",
"--iree-hip-target=gfx1100",
- "--iree-input-demote-f64-to-f32=false"
+ "--iree-input-demote-f64-to-f32=false",
+ "--iree-opt-level=O0"
],
"iree_run_module_flags": [
"--device=hip"
diff --git a/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_hip_rdna3.json b/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_hip_rdna3_O3.json
similarity index 98%
copy from tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_hip_rdna3.json
copy to tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_hip_rdna3_O3.json
index 1ee7bfd..ec771f4 100644
--- a/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_hip_rdna3.json
+++ b/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_hip_rdna3_O3.json
@@ -3,7 +3,8 @@
"iree_compile_flags": [
"--iree-hal-target-device=hip",
"--iree-hip-target=gfx1100",
- "--iree-input-demote-f64-to-f32=false"
+ "--iree-input-demote-f64-to-f32=false",
+ "--iree-opt-level=O3"
],
"iree_run_module_flags": [
"--device=hip"
@@ -318,8 +319,12 @@
"onnx/node/generated/test_gridsample_nearest_align_corners_0_additional_1",
"onnx/node/generated/test_gridsample_nearest_align_corners_1_additional_1",
"onnx/node/generated/test_lstm_with_peepholes",
+ "onnx/node/generated/test_nonmaxsuppression_center_point_box_format",
"onnx/node/generated/test_nonmaxsuppression_flipped_coordinates",
+ "onnx/node/generated/test_nonmaxsuppression_identical_boxes",
+ "onnx/node/generated/test_nonmaxsuppression_limit_output_size",
"onnx/node/generated/test_nonmaxsuppression_single_box",
+ "onnx/node/generated/test_nonmaxsuppression_suppress_by_IOU",
"onnx/node/generated/test_nonmaxsuppression_suppress_by_IOU_and_scores",
"onnx/node/generated/test_pow",
"onnx/node/generated/test_qlinearmatmul_2D_int8_float16",
diff --git a/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_vulkan.json b/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_vulkan_O0.json
similarity index 99%
rename from tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_vulkan.json
rename to tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_vulkan_O0.json
index bb2377e..6979cda 100644
--- a/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_vulkan.json
+++ b/tests/external/iree-test-suites/onnx_ops/onnx_ops_gpu_vulkan_O0.json
@@ -2,7 +2,8 @@
"config_name": "gpu_vulkan",
"iree_compile_flags": [
"--iree-hal-target-device=vulkan",
- "--iree-input-demote-f64-to-f32"
+ "--iree-input-demote-f64-to-f32",
+ "--iree-opt-level=O0"
],
"iree_run_module_flags": [
"--device=vulkan"