Support intput/expected output in benchmark definitions (#15327)
- Add input and expected output fields to benchmark definitions.
- Pass input and expected output URL to Linux benchmark tool.
With this change, Linux benchmark tool will check the output if
`--verify` is specified.
#15282
diff --git a/build_tools/benchmarks/common/benchmark_driver_test.py b/build_tools/benchmarks/common/benchmark_driver_test.py
index f2dd076..95fc6f7 100644
--- a/build_tools/benchmarks/common/benchmark_driver_test.py
+++ b/build_tools/benchmarks/common/benchmark_driver_test.py
@@ -141,7 +141,7 @@
module_generation_config=gen_config,
module_execution_config=exec_config_a,
target_device_spec=device_spec,
- input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+ input_data=common_definitions.DEFAULT_INPUT_DATA,
tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
)
exec_config_b = iree_definitions.ModuleExecutionConfig.build(
@@ -154,7 +154,7 @@
module_generation_config=gen_config,
module_execution_config=exec_config_b,
target_device_spec=device_spec,
- input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+ input_data=common_definitions.DEFAULT_INPUT_DATA,
tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
)
self.case1 = BenchmarkCase(
@@ -201,7 +201,7 @@
module_generation_config=gen_config_rv64,
module_execution_config=exec_config_b,
target_device_spec=device_spec_rv64,
- input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+ input_data=common_definitions.DEFAULT_INPUT_DATA,
tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
)
self.incompatible_case = BenchmarkCase(
diff --git a/build_tools/benchmarks/common/benchmark_suite.py b/build_tools/benchmarks/common/benchmark_suite.py
index aa6ba88..77db81c 100644
--- a/build_tools/benchmarks/common/benchmark_suite.py
+++ b/build_tools/benchmarks/common/benchmark_suite.py
@@ -215,6 +215,9 @@
driver_info=driver_info,
benchmark_tool_name=run_config.tool.value,
benchmark_case_dir=module_dir_path,
+ input_uri=model.input_url,
+ expected_output_uri=model.expected_output_url,
+ verify_params=model.verify_params,
run_config=run_config,
)
benchmark_cases.append(benchmark_case)
diff --git a/build_tools/benchmarks/common/benchmark_suite_test.py b/build_tools/benchmarks/common/benchmark_suite_test.py
index 208c35e..82e2aec 100644
--- a/build_tools/benchmarks/common/benchmark_suite_test.py
+++ b/build_tools/benchmarks/common/benchmark_suite_test.py
@@ -52,7 +52,7 @@
),
module_execution_config=exec_config,
target_device_spec=device_spec,
- input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+ input_data=common_definitions.DEFAULT_INPUT_DATA,
tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
)
@@ -139,6 +139,8 @@
source_url="",
entry_function="predict",
input_types=["1xf32"],
+ input_url="https://abc/inputs_npy.tgz",
+ expected_output_url="https://abc/outputs_npy.tgz",
)
exec_config_a = iree_definitions.ModuleExecutionConfig.build(
id="exec_a",
@@ -182,7 +184,7 @@
),
module_execution_config=exec_config_a,
target_device_spec=device_spec_a,
- input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+ input_data=common_definitions.DEFAULT_INPUT_DATA,
tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
)
run_config_b = iree_definitions.E2EModelRunConfig.build(
@@ -194,7 +196,7 @@
),
module_execution_config=exec_config_b,
target_device_spec=device_spec_b,
- input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+ input_data=common_definitions.DEFAULT_INPUT_DATA,
tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
)
run_config_c = iree_definitions.E2EModelRunConfig.build(
@@ -206,7 +208,7 @@
),
module_execution_config=exec_config_a,
target_device_spec=device_spec_a,
- input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+ input_data=common_definitions.DEFAULT_INPUT_DATA,
tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
)
run_configs = [run_config_a, run_config_b, run_config_c]
@@ -247,6 +249,8 @@
driver_info=IREE_DRIVERS_INFOS["iree-llvm-cpu-sync"],
benchmark_tool_name="iree-benchmark-module",
benchmark_case_dir=run_config_c_case_dir,
+ input_uri=model_tf.input_url,
+ expected_output_uri=model_tf.expected_output_url,
run_config=run_config_c,
)
],
diff --git a/build_tools/benchmarks/export_benchmark_config_test.py b/build_tools/benchmarks/export_benchmark_config_test.py
index 1bfb6df..88fff2f 100644
--- a/build_tools/benchmarks/export_benchmark_config_test.py
+++ b/build_tools/benchmarks/export_benchmark_config_test.py
@@ -68,7 +68,7 @@
module_generation_config=COMMON_GEN_CONFIG,
module_execution_config=COMMON_EXEC_CONFIG,
target_device_spec=device_spec_a,
- input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+ input_data=common_definitions.DEFAULT_INPUT_DATA,
tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
presets=["preset_x"],
)
@@ -76,7 +76,7 @@
module_generation_config=COMMON_GEN_CONFIG,
module_execution_config=COMMON_EXEC_CONFIG,
target_device_spec=device_spec_b,
- input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+ input_data=common_definitions.DEFAULT_INPUT_DATA,
tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
presets=["preset_y"],
)
@@ -84,7 +84,7 @@
module_generation_config=COMMON_GEN_CONFIG,
module_execution_config=COMMON_EXEC_CONFIG,
target_device_spec=device_spec_c,
- input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+ input_data=common_definitions.DEFAULT_INPUT_DATA,
tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
presets=["preset_y", "preset_z"],
)
@@ -133,21 +133,21 @@
module_generation_config=COMMON_GEN_CONFIG,
module_execution_config=COMMON_EXEC_CONFIG,
target_device_spec=device_spec_a,
- input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+ input_data=common_definitions.DEFAULT_INPUT_DATA,
tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
)
run_config_b = iree_definitions.E2EModelRunConfig.build(
module_generation_config=COMMON_GEN_CONFIG,
module_execution_config=COMMON_EXEC_CONFIG,
target_device_spec=device_spec_b,
- input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+ input_data=common_definitions.DEFAULT_INPUT_DATA,
tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
)
run_config_c = iree_definitions.E2EModelRunConfig.build(
module_generation_config=COMMON_GEN_CONFIG,
module_execution_config=COMMON_EXEC_CONFIG,
target_device_spec=device_spec_c,
- input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+ input_data=common_definitions.DEFAULT_INPUT_DATA,
tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
)
@@ -184,14 +184,14 @@
module_generation_config=COMMON_GEN_CONFIG,
module_execution_config=COMMON_EXEC_CONFIG,
target_device_spec=device_spec_a,
- input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+ input_data=common_definitions.DEFAULT_INPUT_DATA,
tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
)
run_config_b = iree_definitions.E2EModelRunConfig.build(
module_generation_config=COMMON_GEN_CONFIG,
module_execution_config=COMMON_EXEC_CONFIG,
target_device_spec=device_spec_b,
- input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+ input_data=common_definitions.DEFAULT_INPUT_DATA,
tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
)
@@ -261,7 +261,7 @@
module_generation_config=small_gen_config,
module_execution_config=COMMON_EXEC_CONFIG,
target_device_spec=device_spec_a,
- input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+ input_data=common_definitions.DEFAULT_INPUT_DATA,
tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
presets=["preset_x"],
)
@@ -269,7 +269,7 @@
module_generation_config=big_gen_config,
module_execution_config=COMMON_EXEC_CONFIG,
target_device_spec=device_spec_b,
- input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+ input_data=common_definitions.DEFAULT_INPUT_DATA,
tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
presets=["preset_y"],
)
diff --git a/build_tools/python/benchmark_suites/iree/benchmark_collections_test.py b/build_tools/python/benchmark_suites/iree/benchmark_collections_test.py
index 37e4c7c..8c2d95f 100644
--- a/build_tools/python/benchmark_suites/iree/benchmark_collections_test.py
+++ b/build_tools/python/benchmark_suites/iree/benchmark_collections_test.py
@@ -143,7 +143,7 @@
),
module_execution_config=EXEC_CONFIG,
target_device_spec=DEVICE_SPEC,
- input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+ input_data=common_definitions.DEFAULT_INPUT_DATA,
tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
run_flags=[],
)
@@ -158,7 +158,7 @@
),
module_execution_config=EXEC_CONFIG,
target_device_spec=DEVICE_SPEC,
- input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+ input_data=common_definitions.DEFAULT_INPUT_DATA,
tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
run_flags=[],
)
@@ -177,7 +177,7 @@
),
module_execution_config=EXEC_CONFIG,
target_device_spec=DEVICE_SPEC,
- input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+ input_data=common_definitions.DEFAULT_INPUT_DATA,
tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
run_flags=[],
)
@@ -192,7 +192,7 @@
),
module_execution_config=EXEC_CONFIG,
target_device_spec=DEVICE_SPEC,
- input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+ input_data=common_definitions.DEFAULT_INPUT_DATA,
tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
run_flags=[],
)
@@ -214,7 +214,7 @@
),
module_execution_config=EXEC_CONFIG,
target_device_spec=DEVICE_SPEC,
- input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+ input_data=common_definitions.DEFAULT_INPUT_DATA,
tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
run_flags=[],
)
@@ -229,7 +229,7 @@
),
module_execution_config=EXEC_CONFIG,
target_device_spec=DEVICE_SPEC,
- input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+ input_data=common_definitions.DEFAULT_INPUT_DATA,
tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
run_flags=[],
)
@@ -251,7 +251,7 @@
),
module_execution_config=EXEC_CONFIG,
target_device_spec=DEVICE_SPEC,
- input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+ input_data=common_definitions.DEFAULT_INPUT_DATA,
tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
run_flags=[],
)
diff --git a/build_tools/python/benchmark_suites/iree/utils.py b/build_tools/python/benchmark_suites/iree/utils.py
index ae71475..17a47a4 100644
--- a/build_tools/python/benchmark_suites/iree/utils.py
+++ b/build_tools/python/benchmark_suites/iree/utils.py
@@ -24,7 +24,7 @@
module_generation_config=module_generation_config,
module_execution_config=module_execution_config,
target_device_spec=device_spec,
- input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+ input_data=common_definitions.DEFAULT_INPUT_DATA,
tool=tool,
tags=tags,
presets=presets,
diff --git a/build_tools/python/e2e_test_artifacts/cmake_generator/iree_rule_generator_test.py b/build_tools/python/e2e_test_artifacts/cmake_generator/iree_rule_generator_test.py
index e29b3e8..3353eba 100644
--- a/build_tools/python/e2e_test_artifacts/cmake_generator/iree_rule_generator_test.py
+++ b/build_tools/python/e2e_test_artifacts/cmake_generator/iree_rule_generator_test.py
@@ -194,7 +194,7 @@
host_environment=common_definitions.HostEnvironment.LINUX_X86_64,
architecture=common_definitions.DeviceArchitecture.RV64_GENERIC,
),
- input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+ input_data=common_definitions.DEFAULT_INPUT_DATA,
tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
presets=["test"],
)
diff --git a/build_tools/python/e2e_test_framework/definitions/common_definitions.py b/build_tools/python/e2e_test_framework/definitions/common_definitions.py
index e7d5cb2..1a6a4a3 100644
--- a/build_tools/python/e2e_test_framework/definitions/common_definitions.py
+++ b/build_tools/python/e2e_test_framework/definitions/common_definitions.py
@@ -8,7 +8,7 @@
import dataclasses
from dataclasses import dataclass
from enum import Enum
-from typing import List, Sequence
+from typing import List, Optional, Sequence
from e2e_test_framework import serialization, unique_ids
@@ -109,13 +109,6 @@
EXPORTED_TFLITE = "exported_tflite"
-class InputDataFormat(Enum):
- """Model input data format."""
-
- ZEROS = "zeros"
- NUMPY_NPY = "numpy_npy"
-
-
@serialization.serializable(type_key="device_specs")
@dataclass(frozen=True)
class DeviceSpec(object):
@@ -189,6 +182,15 @@
entry_function: str
# Input types. E.g., ["100x100xf32", "200x200x5xf32"].
input_types: List[str]
+ # URL to fetch input data tgz. The archive should contain
+ # "input_{0,1,...}.npy" for each input.
+ input_url: Optional[str] = None
+ # URL to fetch expected output tgz. The archive should contain
+ # "ouptut_0.npy".
+ expected_output_url: Optional[str] = None
+ # Parameters for iree-run-module to control the tolerance.
+ # For example: --expected_f32_threshold=0.0001
+ verify_params: List[str] = dataclasses.field(default_factory=list)
def __str__(self):
return self.name
@@ -201,27 +203,16 @@
id: str
# Associated model.
- model_id: str
- # Friendly name.
name: str
- # Tags that describe the data characteristics.
- tags: List[str]
- data_format: InputDataFormat
- source_url: str
def __str__(self):
return self.name
-# All-zeros dummy input data. Runners will generate the zeros input with proper
-# shapes.
-ZEROS_MODEL_INPUT_DATA = ModelInputData(
+# Get input from model input_url if available; otherwise use all zeros.
+DEFAULT_INPUT_DATA = ModelInputData(
id=unique_ids.MODEL_INPUT_DATA_ZEROS,
- model_id="",
- name="zeros",
- tags=[],
- data_format=InputDataFormat.ZEROS,
- source_url="",
+ name="default",
)
diff --git a/build_tools/python/e2e_test_framework/definitions/iree_definitions_test.py b/build_tools/python/e2e_test_framework/definitions/iree_definitions_test.py
index 71262e5..e497b34 100644
--- a/build_tools/python/e2e_test_framework/definitions/iree_definitions_test.py
+++ b/build_tools/python/e2e_test_framework/definitions/iree_definitions_test.py
@@ -143,10 +143,7 @@
gen_config,
exec_config,
device_spec,
- # TODO(#15282): ZEROS_MODEL_INPUT_DATA should be renamed to
- # DEFAULT_INPUT_DATA, which means to use input npys if available;
- # otherwise use all zeros data.
- input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+ input_data=common_definitions.DEFAULT_INPUT_DATA,
tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
)