blob: ed58cdd51f6d5d9ad090525e70a50236b8eb571c [file]
iree_generated_e2e_runner_test(
NAME
e2e_attention_cpu_f16_f16_f16_small
TEST_TYPE
attention
GENERATOR
"generate_e2e_attention_tests.py"
GENERATOR_ARGS
"--query_type=f16"
"--key_type=f16"
"--value_type=f16"
"--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-attention-test
TARGET_BACKENDS
"llvm-cpu"
DRIVERS
"local-task"
LABELS
"hostonly"
"local"
)
iree_generated_e2e_runner_test(
NAME
e2e_attention_cpu_f16_f16_f16_medium
TEST_TYPE
attention
GENERATOR
"generate_e2e_attention_tests.py"
GENERATOR_ARGS
"--query_type=f16"
"--key_type=f16"
"--value_type=f16"
"--shapes=medium"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-attention-test
TARGET_BACKENDS
"llvm-cpu"
DRIVERS
"local-task"
LABELS
"hostonly"
"local"
)
iree_generated_e2e_runner_test(
NAME
e2e_attention_cpu_f16_f16_f16_large
TEST_TYPE
attention
GENERATOR
"generate_e2e_attention_tests.py"
GENERATOR_ARGS
"--query_type=f16"
"--key_type=f16"
"--value_type=f16"
"--shapes=large"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-attention-test
TARGET_BACKENDS
"llvm-cpu"
DRIVERS
"local-task"
LABELS
"hostonly"
"local"
)
# Decode tests: m=1 (single token attending to cached KV) with all-ones mask.
iree_generated_e2e_runner_test(
NAME
e2e_attention_cpu_f16_f16_f16_decode_small
TEST_TYPE
attention
GENERATOR
"generate_e2e_attention_tests.py"
GENERATOR_ARGS
"--query_type=f16"
"--key_type=f16"
"--value_type=f16"
"--shapes=decode_small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-attention-test
TARGET_BACKENDS
"llvm-cpu"
DRIVERS
"local-task"
LABELS
"hostonly"
"local"
)
iree_generated_e2e_runner_test(
NAME
e2e_attention_cpu_f16_f16_f16_decode_medium
TEST_TYPE
attention
GENERATOR
"generate_e2e_attention_tests.py"
GENERATOR_ARGS
"--query_type=f16"
"--key_type=f16"
"--value_type=f16"
"--shapes=decode_medium"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-attention-test
TARGET_BACKENDS
"llvm-cpu"
DRIVERS
"local-task"
LABELS
"hostonly"
"local"
)
iree_generated_e2e_runner_test(
NAME
e2e_attention_cpu_f16_f16_f16_decode_large
TEST_TYPE
attention
GENERATOR
"generate_e2e_attention_tests.py"
GENERATOR_ARGS
"--query_type=f16"
"--key_type=f16"
"--value_type=f16"
"--shapes=decode_large"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-attention-test
TARGET_BACKENDS
"llvm-cpu"
DRIVERS
"local-task"
LABELS
"hostonly"
"local"
)
# Prefill tests: m=k2 (self-attention) with causal mask.
iree_generated_e2e_runner_test(
NAME
e2e_attention_cpu_f16_f16_f16_prefill_small
TEST_TYPE
attention
GENERATOR
"generate_e2e_attention_tests.py"
GENERATOR_ARGS
"--query_type=f16"
"--key_type=f16"
"--value_type=f16"
"--shapes=prefill_small"
"--mask_type=causal"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-attention-test
TARGET_BACKENDS
"llvm-cpu"
DRIVERS
"local-task"
LABELS
"hostonly"
"local"
)
iree_generated_e2e_runner_test(
NAME
e2e_attention_cpu_f16_f16_f16_prefill_medium
TEST_TYPE
attention
GENERATOR
"generate_e2e_attention_tests.py"
GENERATOR_ARGS
"--query_type=f16"
"--key_type=f16"
"--value_type=f16"
"--shapes=prefill_medium"
"--mask_type=causal"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-attention-test
TARGET_BACKENDS
"llvm-cpu"
DRIVERS
"local-task"
LABELS
"hostonly"
"local"
TIMEOUT
300
)
# To distinguish between CDNA(gfx9) and RDNA3(gfx11)
if(IREE_ROCM_TEST_TARGET_CHIP MATCHES "^gfx9")
unset(IREE_HIP_TEST_COMPILER_FLAGS)
list(APPEND IREE_HIP_TEST_COMPILER_FLAGS
"--iree-rocm-target=${IREE_ROCM_TEST_TARGET_CHIP}"
)
iree_generated_e2e_runner_test(
NAME
e2e_attention_gpu_cdna3_f16_f16_f16_small
TEST_TYPE
attention
GENERATOR
"generate_e2e_attention_tests.py"
GENERATOR_ARGS
"--query_type=f16"
"--key_type=f16"
"--value_type=f16"
"--shapes=small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-attention-test
TARGET_BACKENDS
"rocm"
DRIVERS
"hip"
COMPILER_FLAGS
${IREE_HIP_TEST_COMPILER_FLAGS}
LABELS
"noasan"
"nomsan"
"notsan"
"noubsan"
"requires-gpu-cdna3"
)
iree_generated_e2e_runner_test(
NAME
e2e_attention_gpu_cdna3_f16_f16_f16_medium
TEST_TYPE
attention
GENERATOR
"generate_e2e_attention_tests.py"
GENERATOR_ARGS
"--query_type=f16"
"--key_type=f16"
"--value_type=f16"
"--shapes=medium"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-attention-test
TARGET_BACKENDS
"rocm"
DRIVERS
"hip"
COMPILER_FLAGS
${IREE_HIP_TEST_COMPILER_FLAGS}
LABELS
"noasan"
"nomsan"
"notsan"
"noubsan"
"requires-gpu-cdna3"
)
iree_generated_e2e_runner_test(
NAME
e2e_attention_gpu_cdna3_f16_f16_f16_large
TEST_TYPE
attention
GENERATOR
"generate_e2e_attention_tests.py"
GENERATOR_ARGS
"--query_type=f16"
"--key_type=f16"
"--value_type=f16"
"--shapes=large"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-attention-test
TARGET_BACKENDS
"rocm"
DRIVERS
"hip"
COMPILER_FLAGS
${IREE_HIP_TEST_COMPILER_FLAGS}
LABELS
"noasan"
"nomsan"
"notsan"
"noubsan"
"requires-gpu-cdna3"
)
# GPU Decode tests: m=1 (single token attending to cached KV) with all-ones mask.
iree_generated_e2e_runner_test(
NAME
e2e_attention_gpu_cdna3_f16_f16_f16_decode_small
TEST_TYPE
attention
GENERATOR
"generate_e2e_attention_tests.py"
GENERATOR_ARGS
"--query_type=f16"
"--key_type=f16"
"--value_type=f16"
"--shapes=decode_small"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-attention-test
TARGET_BACKENDS
"rocm"
DRIVERS
"hip"
COMPILER_FLAGS
${IREE_HIP_TEST_COMPILER_FLAGS}
LABELS
"noasan"
"nomsan"
"notsan"
"noubsan"
"requires-gpu-cdna3"
)
iree_generated_e2e_runner_test(
NAME
e2e_attention_gpu_cdna3_f16_f16_f16_decode_medium
TEST_TYPE
attention
GENERATOR
"generate_e2e_attention_tests.py"
GENERATOR_ARGS
"--query_type=f16"
"--key_type=f16"
"--value_type=f16"
"--shapes=decode_medium"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-attention-test
TARGET_BACKENDS
"rocm"
DRIVERS
"hip"
COMPILER_FLAGS
${IREE_HIP_TEST_COMPILER_FLAGS}
LABELS
"noasan"
"nomsan"
"notsan"
"noubsan"
"requires-gpu-cdna3"
)
iree_generated_e2e_runner_test(
NAME
e2e_attention_gpu_cdna3_f16_f16_f16_decode_large
TEST_TYPE
attention
GENERATOR
"generate_e2e_attention_tests.py"
GENERATOR_ARGS
"--query_type=f16"
"--key_type=f16"
"--value_type=f16"
"--shapes=decode_large"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-attention-test
TARGET_BACKENDS
"rocm"
DRIVERS
"hip"
COMPILER_FLAGS
${IREE_HIP_TEST_COMPILER_FLAGS}
LABELS
"noasan"
"nomsan"
"notsan"
"noubsan"
"requires-gpu-cdna3"
)
# GPU Prefill tests: m=k2 (self-attention) with causal mask.
iree_generated_e2e_runner_test(
NAME
e2e_attention_gpu_cdna3_f16_f16_f16_prefill_small
TEST_TYPE
attention
GENERATOR
"generate_e2e_attention_tests.py"
GENERATOR_ARGS
"--query_type=f16"
"--key_type=f16"
"--value_type=f16"
"--shapes=prefill_small"
"--mask_type=causal"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-attention-test
TARGET_BACKENDS
"rocm"
DRIVERS
"hip"
COMPILER_FLAGS
${IREE_HIP_TEST_COMPILER_FLAGS}
LABELS
"noasan"
"nomsan"
"notsan"
"noubsan"
"requires-gpu-cdna3"
)
iree_generated_e2e_runner_test(
NAME
e2e_attention_gpu_cdna3_f16_f16_f16_prefill_medium
TEST_TYPE
attention
GENERATOR
"generate_e2e_attention_tests.py"
GENERATOR_ARGS
"--query_type=f16"
"--key_type=f16"
"--value_type=f16"
"--shapes=prefill_medium"
"--mask_type=causal"
TEST_RUNNER
iree_tools_testing_e2e_iree-e2e-attention-test
TARGET_BACKENDS
"rocm"
DRIVERS
"hip"
COMPILER_FLAGS
${IREE_HIP_TEST_COMPILER_FLAGS}
LABELS
"noasan"
"nomsan"
"notsan"
"noubsan"
"requires-gpu-cdna3"
)
endif()