| iree_generated_e2e_runner_test( |
| NAME |
| e2e_attention_cpu_f16_f16_f16_small |
| TEST_TYPE |
| attention |
| GENERATOR |
| "generate_e2e_attention_tests.py" |
| GENERATOR_ARGS |
| "--query_type=f16" |
| "--key_type=f16" |
| "--value_type=f16" |
| "--shapes=small" |
| TEST_RUNNER |
| iree_tools_testing_e2e_iree-e2e-attention-test |
| TARGET_BACKENDS |
| "llvm-cpu" |
| DRIVERS |
| "local-task" |
| LABELS |
| "hostonly" |
| "local" |
| ) |
| |
| iree_generated_e2e_runner_test( |
| NAME |
| e2e_attention_cpu_f16_f16_f16_medium |
| TEST_TYPE |
| attention |
| GENERATOR |
| "generate_e2e_attention_tests.py" |
| GENERATOR_ARGS |
| "--query_type=f16" |
| "--key_type=f16" |
| "--value_type=f16" |
| "--shapes=medium" |
| TEST_RUNNER |
| iree_tools_testing_e2e_iree-e2e-attention-test |
| TARGET_BACKENDS |
| "llvm-cpu" |
| DRIVERS |
| "local-task" |
| LABELS |
| "hostonly" |
| "local" |
| ) |
| |
| iree_generated_e2e_runner_test( |
| NAME |
| e2e_attention_cpu_f16_f16_f16_large |
| TEST_TYPE |
| attention |
| GENERATOR |
| "generate_e2e_attention_tests.py" |
| GENERATOR_ARGS |
| "--query_type=f16" |
| "--key_type=f16" |
| "--value_type=f16" |
| "--shapes=large" |
| TEST_RUNNER |
| iree_tools_testing_e2e_iree-e2e-attention-test |
| TARGET_BACKENDS |
| "llvm-cpu" |
| DRIVERS |
| "local-task" |
| LABELS |
| "hostonly" |
| "local" |
| ) |
| |
| # Decode tests: m=1 (single token attending to cached KV) with all-ones mask. |
| iree_generated_e2e_runner_test( |
| NAME |
| e2e_attention_cpu_f16_f16_f16_decode_small |
| TEST_TYPE |
| attention |
| GENERATOR |
| "generate_e2e_attention_tests.py" |
| GENERATOR_ARGS |
| "--query_type=f16" |
| "--key_type=f16" |
| "--value_type=f16" |
| "--shapes=decode_small" |
| TEST_RUNNER |
| iree_tools_testing_e2e_iree-e2e-attention-test |
| TARGET_BACKENDS |
| "llvm-cpu" |
| DRIVERS |
| "local-task" |
| LABELS |
| "hostonly" |
| "local" |
| ) |
| |
| iree_generated_e2e_runner_test( |
| NAME |
| e2e_attention_cpu_f16_f16_f16_decode_medium |
| TEST_TYPE |
| attention |
| GENERATOR |
| "generate_e2e_attention_tests.py" |
| GENERATOR_ARGS |
| "--query_type=f16" |
| "--key_type=f16" |
| "--value_type=f16" |
| "--shapes=decode_medium" |
| TEST_RUNNER |
| iree_tools_testing_e2e_iree-e2e-attention-test |
| TARGET_BACKENDS |
| "llvm-cpu" |
| DRIVERS |
| "local-task" |
| LABELS |
| "hostonly" |
| "local" |
| ) |
| |
| iree_generated_e2e_runner_test( |
| NAME |
| e2e_attention_cpu_f16_f16_f16_decode_large |
| TEST_TYPE |
| attention |
| GENERATOR |
| "generate_e2e_attention_tests.py" |
| GENERATOR_ARGS |
| "--query_type=f16" |
| "--key_type=f16" |
| "--value_type=f16" |
| "--shapes=decode_large" |
| TEST_RUNNER |
| iree_tools_testing_e2e_iree-e2e-attention-test |
| TARGET_BACKENDS |
| "llvm-cpu" |
| DRIVERS |
| "local-task" |
| LABELS |
| "hostonly" |
| "local" |
| ) |
| |
| # Prefill tests: m=k2 (self-attention) with causal mask. |
| iree_generated_e2e_runner_test( |
| NAME |
| e2e_attention_cpu_f16_f16_f16_prefill_small |
| TEST_TYPE |
| attention |
| GENERATOR |
| "generate_e2e_attention_tests.py" |
| GENERATOR_ARGS |
| "--query_type=f16" |
| "--key_type=f16" |
| "--value_type=f16" |
| "--shapes=prefill_small" |
| "--mask_type=causal" |
| TEST_RUNNER |
| iree_tools_testing_e2e_iree-e2e-attention-test |
| TARGET_BACKENDS |
| "llvm-cpu" |
| DRIVERS |
| "local-task" |
| LABELS |
| "hostonly" |
| "local" |
| ) |
| |
| iree_generated_e2e_runner_test( |
| NAME |
| e2e_attention_cpu_f16_f16_f16_prefill_medium |
| TEST_TYPE |
| attention |
| GENERATOR |
| "generate_e2e_attention_tests.py" |
| GENERATOR_ARGS |
| "--query_type=f16" |
| "--key_type=f16" |
| "--value_type=f16" |
| "--shapes=prefill_medium" |
| "--mask_type=causal" |
| TEST_RUNNER |
| iree_tools_testing_e2e_iree-e2e-attention-test |
| TARGET_BACKENDS |
| "llvm-cpu" |
| DRIVERS |
| "local-task" |
| LABELS |
| "hostonly" |
| "local" |
| TIMEOUT |
| 300 |
| ) |
| |
| # To distinguish between CDNA(gfx9) and RDNA3(gfx11) |
| if(IREE_ROCM_TEST_TARGET_CHIP MATCHES "^gfx9") |
| |
| unset(IREE_HIP_TEST_COMPILER_FLAGS) |
| list(APPEND IREE_HIP_TEST_COMPILER_FLAGS |
| "--iree-rocm-target=${IREE_ROCM_TEST_TARGET_CHIP}" |
| ) |
| |
| iree_generated_e2e_runner_test( |
| NAME |
| e2e_attention_gpu_cdna3_f16_f16_f16_small |
| TEST_TYPE |
| attention |
| GENERATOR |
| "generate_e2e_attention_tests.py" |
| GENERATOR_ARGS |
| "--query_type=f16" |
| "--key_type=f16" |
| "--value_type=f16" |
| "--shapes=small" |
| TEST_RUNNER |
| iree_tools_testing_e2e_iree-e2e-attention-test |
| TARGET_BACKENDS |
| "rocm" |
| DRIVERS |
| "hip" |
| COMPILER_FLAGS |
| ${IREE_HIP_TEST_COMPILER_FLAGS} |
| LABELS |
| "noasan" |
| "nomsan" |
| "notsan" |
| "noubsan" |
| "requires-gpu-cdna3" |
| ) |
| |
| iree_generated_e2e_runner_test( |
| NAME |
| e2e_attention_gpu_cdna3_f16_f16_f16_medium |
| TEST_TYPE |
| attention |
| GENERATOR |
| "generate_e2e_attention_tests.py" |
| GENERATOR_ARGS |
| "--query_type=f16" |
| "--key_type=f16" |
| "--value_type=f16" |
| "--shapes=medium" |
| TEST_RUNNER |
| iree_tools_testing_e2e_iree-e2e-attention-test |
| TARGET_BACKENDS |
| "rocm" |
| DRIVERS |
| "hip" |
| COMPILER_FLAGS |
| ${IREE_HIP_TEST_COMPILER_FLAGS} |
| LABELS |
| "noasan" |
| "nomsan" |
| "notsan" |
| "noubsan" |
| "requires-gpu-cdna3" |
| ) |
| |
| iree_generated_e2e_runner_test( |
| NAME |
| e2e_attention_gpu_cdna3_f16_f16_f16_large |
| TEST_TYPE |
| attention |
| GENERATOR |
| "generate_e2e_attention_tests.py" |
| GENERATOR_ARGS |
| "--query_type=f16" |
| "--key_type=f16" |
| "--value_type=f16" |
| "--shapes=large" |
| TEST_RUNNER |
| iree_tools_testing_e2e_iree-e2e-attention-test |
| TARGET_BACKENDS |
| "rocm" |
| DRIVERS |
| "hip" |
| COMPILER_FLAGS |
| ${IREE_HIP_TEST_COMPILER_FLAGS} |
| LABELS |
| "noasan" |
| "nomsan" |
| "notsan" |
| "noubsan" |
| "requires-gpu-cdna3" |
| ) |
| |
| # GPU Decode tests: m=1 (single token attending to cached KV) with all-ones mask. |
| iree_generated_e2e_runner_test( |
| NAME |
| e2e_attention_gpu_cdna3_f16_f16_f16_decode_small |
| TEST_TYPE |
| attention |
| GENERATOR |
| "generate_e2e_attention_tests.py" |
| GENERATOR_ARGS |
| "--query_type=f16" |
| "--key_type=f16" |
| "--value_type=f16" |
| "--shapes=decode_small" |
| TEST_RUNNER |
| iree_tools_testing_e2e_iree-e2e-attention-test |
| TARGET_BACKENDS |
| "rocm" |
| DRIVERS |
| "hip" |
| COMPILER_FLAGS |
| ${IREE_HIP_TEST_COMPILER_FLAGS} |
| LABELS |
| "noasan" |
| "nomsan" |
| "notsan" |
| "noubsan" |
| "requires-gpu-cdna3" |
| ) |
| |
| iree_generated_e2e_runner_test( |
| NAME |
| e2e_attention_gpu_cdna3_f16_f16_f16_decode_medium |
| TEST_TYPE |
| attention |
| GENERATOR |
| "generate_e2e_attention_tests.py" |
| GENERATOR_ARGS |
| "--query_type=f16" |
| "--key_type=f16" |
| "--value_type=f16" |
| "--shapes=decode_medium" |
| TEST_RUNNER |
| iree_tools_testing_e2e_iree-e2e-attention-test |
| TARGET_BACKENDS |
| "rocm" |
| DRIVERS |
| "hip" |
| COMPILER_FLAGS |
| ${IREE_HIP_TEST_COMPILER_FLAGS} |
| LABELS |
| "noasan" |
| "nomsan" |
| "notsan" |
| "noubsan" |
| "requires-gpu-cdna3" |
| ) |
| |
| iree_generated_e2e_runner_test( |
| NAME |
| e2e_attention_gpu_cdna3_f16_f16_f16_decode_large |
| TEST_TYPE |
| attention |
| GENERATOR |
| "generate_e2e_attention_tests.py" |
| GENERATOR_ARGS |
| "--query_type=f16" |
| "--key_type=f16" |
| "--value_type=f16" |
| "--shapes=decode_large" |
| TEST_RUNNER |
| iree_tools_testing_e2e_iree-e2e-attention-test |
| TARGET_BACKENDS |
| "rocm" |
| DRIVERS |
| "hip" |
| COMPILER_FLAGS |
| ${IREE_HIP_TEST_COMPILER_FLAGS} |
| LABELS |
| "noasan" |
| "nomsan" |
| "notsan" |
| "noubsan" |
| "requires-gpu-cdna3" |
| ) |
| |
| # GPU Prefill tests: m=k2 (self-attention) with causal mask. |
| iree_generated_e2e_runner_test( |
| NAME |
| e2e_attention_gpu_cdna3_f16_f16_f16_prefill_small |
| TEST_TYPE |
| attention |
| GENERATOR |
| "generate_e2e_attention_tests.py" |
| GENERATOR_ARGS |
| "--query_type=f16" |
| "--key_type=f16" |
| "--value_type=f16" |
| "--shapes=prefill_small" |
| "--mask_type=causal" |
| TEST_RUNNER |
| iree_tools_testing_e2e_iree-e2e-attention-test |
| TARGET_BACKENDS |
| "rocm" |
| DRIVERS |
| "hip" |
| COMPILER_FLAGS |
| ${IREE_HIP_TEST_COMPILER_FLAGS} |
| LABELS |
| "noasan" |
| "nomsan" |
| "notsan" |
| "noubsan" |
| "requires-gpu-cdna3" |
| ) |
| |
| iree_generated_e2e_runner_test( |
| NAME |
| e2e_attention_gpu_cdna3_f16_f16_f16_prefill_medium |
| TEST_TYPE |
| attention |
| GENERATOR |
| "generate_e2e_attention_tests.py" |
| GENERATOR_ARGS |
| "--query_type=f16" |
| "--key_type=f16" |
| "--value_type=f16" |
| "--shapes=prefill_medium" |
| "--mask_type=causal" |
| TEST_RUNNER |
| iree_tools_testing_e2e_iree-e2e-attention-test |
| TARGET_BACKENDS |
| "rocm" |
| DRIVERS |
| "hip" |
| COMPILER_FLAGS |
| ${IREE_HIP_TEST_COMPILER_FLAGS} |
| LABELS |
| "noasan" |
| "nomsan" |
| "notsan" |
| "noubsan" |
| "requires-gpu-cdna3" |
| ) |
| |
| endif() |