Support intput/expected output in benchmark definitions (#15327)

- Add input and expected output fields to benchmark definitions.
- Pass input and expected output URL to Linux benchmark tool.

With this change, Linux benchmark tool will check the output if
`--verify` is specified.

#15282
diff --git a/build_tools/benchmarks/common/benchmark_driver_test.py b/build_tools/benchmarks/common/benchmark_driver_test.py
index f2dd076..95fc6f7 100644
--- a/build_tools/benchmarks/common/benchmark_driver_test.py
+++ b/build_tools/benchmarks/common/benchmark_driver_test.py
@@ -141,7 +141,7 @@
             module_generation_config=gen_config,
             module_execution_config=exec_config_a,
             target_device_spec=device_spec,
-            input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+            input_data=common_definitions.DEFAULT_INPUT_DATA,
             tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
         )
         exec_config_b = iree_definitions.ModuleExecutionConfig.build(
@@ -154,7 +154,7 @@
             module_generation_config=gen_config,
             module_execution_config=exec_config_b,
             target_device_spec=device_spec,
-            input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+            input_data=common_definitions.DEFAULT_INPUT_DATA,
             tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
         )
         self.case1 = BenchmarkCase(
@@ -201,7 +201,7 @@
             module_generation_config=gen_config_rv64,
             module_execution_config=exec_config_b,
             target_device_spec=device_spec_rv64,
-            input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+            input_data=common_definitions.DEFAULT_INPUT_DATA,
             tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
         )
         self.incompatible_case = BenchmarkCase(
diff --git a/build_tools/benchmarks/common/benchmark_suite.py b/build_tools/benchmarks/common/benchmark_suite.py
index aa6ba88..77db81c 100644
--- a/build_tools/benchmarks/common/benchmark_suite.py
+++ b/build_tools/benchmarks/common/benchmark_suite.py
@@ -215,6 +215,9 @@
                 driver_info=driver_info,
                 benchmark_tool_name=run_config.tool.value,
                 benchmark_case_dir=module_dir_path,
+                input_uri=model.input_url,
+                expected_output_uri=model.expected_output_url,
+                verify_params=model.verify_params,
                 run_config=run_config,
             )
             benchmark_cases.append(benchmark_case)
diff --git a/build_tools/benchmarks/common/benchmark_suite_test.py b/build_tools/benchmarks/common/benchmark_suite_test.py
index 208c35e..82e2aec 100644
--- a/build_tools/benchmarks/common/benchmark_suite_test.py
+++ b/build_tools/benchmarks/common/benchmark_suite_test.py
@@ -52,7 +52,7 @@
             ),
             module_execution_config=exec_config,
             target_device_spec=device_spec,
-            input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+            input_data=common_definitions.DEFAULT_INPUT_DATA,
             tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
         )
 
@@ -139,6 +139,8 @@
             source_url="",
             entry_function="predict",
             input_types=["1xf32"],
+            input_url="https://abc/inputs_npy.tgz",
+            expected_output_url="https://abc/outputs_npy.tgz",
         )
         exec_config_a = iree_definitions.ModuleExecutionConfig.build(
             id="exec_a",
@@ -182,7 +184,7 @@
             ),
             module_execution_config=exec_config_a,
             target_device_spec=device_spec_a,
-            input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+            input_data=common_definitions.DEFAULT_INPUT_DATA,
             tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
         )
         run_config_b = iree_definitions.E2EModelRunConfig.build(
@@ -194,7 +196,7 @@
             ),
             module_execution_config=exec_config_b,
             target_device_spec=device_spec_b,
-            input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+            input_data=common_definitions.DEFAULT_INPUT_DATA,
             tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
         )
         run_config_c = iree_definitions.E2EModelRunConfig.build(
@@ -206,7 +208,7 @@
             ),
             module_execution_config=exec_config_a,
             target_device_spec=device_spec_a,
-            input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+            input_data=common_definitions.DEFAULT_INPUT_DATA,
             tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
         )
         run_configs = [run_config_a, run_config_b, run_config_c]
@@ -247,6 +249,8 @@
                     driver_info=IREE_DRIVERS_INFOS["iree-llvm-cpu-sync"],
                     benchmark_tool_name="iree-benchmark-module",
                     benchmark_case_dir=run_config_c_case_dir,
+                    input_uri=model_tf.input_url,
+                    expected_output_uri=model_tf.expected_output_url,
                     run_config=run_config_c,
                 )
             ],
diff --git a/build_tools/benchmarks/export_benchmark_config_test.py b/build_tools/benchmarks/export_benchmark_config_test.py
index 1bfb6df..88fff2f 100644
--- a/build_tools/benchmarks/export_benchmark_config_test.py
+++ b/build_tools/benchmarks/export_benchmark_config_test.py
@@ -68,7 +68,7 @@
             module_generation_config=COMMON_GEN_CONFIG,
             module_execution_config=COMMON_EXEC_CONFIG,
             target_device_spec=device_spec_a,
-            input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+            input_data=common_definitions.DEFAULT_INPUT_DATA,
             tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
             presets=["preset_x"],
         )
@@ -76,7 +76,7 @@
             module_generation_config=COMMON_GEN_CONFIG,
             module_execution_config=COMMON_EXEC_CONFIG,
             target_device_spec=device_spec_b,
-            input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+            input_data=common_definitions.DEFAULT_INPUT_DATA,
             tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
             presets=["preset_y"],
         )
@@ -84,7 +84,7 @@
             module_generation_config=COMMON_GEN_CONFIG,
             module_execution_config=COMMON_EXEC_CONFIG,
             target_device_spec=device_spec_c,
-            input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+            input_data=common_definitions.DEFAULT_INPUT_DATA,
             tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
             presets=["preset_y", "preset_z"],
         )
@@ -133,21 +133,21 @@
             module_generation_config=COMMON_GEN_CONFIG,
             module_execution_config=COMMON_EXEC_CONFIG,
             target_device_spec=device_spec_a,
-            input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+            input_data=common_definitions.DEFAULT_INPUT_DATA,
             tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
         )
         run_config_b = iree_definitions.E2EModelRunConfig.build(
             module_generation_config=COMMON_GEN_CONFIG,
             module_execution_config=COMMON_EXEC_CONFIG,
             target_device_spec=device_spec_b,
-            input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+            input_data=common_definitions.DEFAULT_INPUT_DATA,
             tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
         )
         run_config_c = iree_definitions.E2EModelRunConfig.build(
             module_generation_config=COMMON_GEN_CONFIG,
             module_execution_config=COMMON_EXEC_CONFIG,
             target_device_spec=device_spec_c,
-            input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+            input_data=common_definitions.DEFAULT_INPUT_DATA,
             tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
         )
 
@@ -184,14 +184,14 @@
             module_generation_config=COMMON_GEN_CONFIG,
             module_execution_config=COMMON_EXEC_CONFIG,
             target_device_spec=device_spec_a,
-            input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+            input_data=common_definitions.DEFAULT_INPUT_DATA,
             tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
         )
         run_config_b = iree_definitions.E2EModelRunConfig.build(
             module_generation_config=COMMON_GEN_CONFIG,
             module_execution_config=COMMON_EXEC_CONFIG,
             target_device_spec=device_spec_b,
-            input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+            input_data=common_definitions.DEFAULT_INPUT_DATA,
             tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
         )
 
@@ -261,7 +261,7 @@
             module_generation_config=small_gen_config,
             module_execution_config=COMMON_EXEC_CONFIG,
             target_device_spec=device_spec_a,
-            input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+            input_data=common_definitions.DEFAULT_INPUT_DATA,
             tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
             presets=["preset_x"],
         )
@@ -269,7 +269,7 @@
             module_generation_config=big_gen_config,
             module_execution_config=COMMON_EXEC_CONFIG,
             target_device_spec=device_spec_b,
-            input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+            input_data=common_definitions.DEFAULT_INPUT_DATA,
             tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
             presets=["preset_y"],
         )
diff --git a/build_tools/python/benchmark_suites/iree/benchmark_collections_test.py b/build_tools/python/benchmark_suites/iree/benchmark_collections_test.py
index 37e4c7c..8c2d95f 100644
--- a/build_tools/python/benchmark_suites/iree/benchmark_collections_test.py
+++ b/build_tools/python/benchmark_suites/iree/benchmark_collections_test.py
@@ -143,7 +143,7 @@
             ),
             module_execution_config=EXEC_CONFIG,
             target_device_spec=DEVICE_SPEC,
-            input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+            input_data=common_definitions.DEFAULT_INPUT_DATA,
             tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
             run_flags=[],
         )
@@ -158,7 +158,7 @@
             ),
             module_execution_config=EXEC_CONFIG,
             target_device_spec=DEVICE_SPEC,
-            input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+            input_data=common_definitions.DEFAULT_INPUT_DATA,
             tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
             run_flags=[],
         )
@@ -177,7 +177,7 @@
             ),
             module_execution_config=EXEC_CONFIG,
             target_device_spec=DEVICE_SPEC,
-            input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+            input_data=common_definitions.DEFAULT_INPUT_DATA,
             tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
             run_flags=[],
         )
@@ -192,7 +192,7 @@
             ),
             module_execution_config=EXEC_CONFIG,
             target_device_spec=DEVICE_SPEC,
-            input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+            input_data=common_definitions.DEFAULT_INPUT_DATA,
             tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
             run_flags=[],
         )
@@ -214,7 +214,7 @@
             ),
             module_execution_config=EXEC_CONFIG,
             target_device_spec=DEVICE_SPEC,
-            input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+            input_data=common_definitions.DEFAULT_INPUT_DATA,
             tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
             run_flags=[],
         )
@@ -229,7 +229,7 @@
             ),
             module_execution_config=EXEC_CONFIG,
             target_device_spec=DEVICE_SPEC,
-            input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+            input_data=common_definitions.DEFAULT_INPUT_DATA,
             tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
             run_flags=[],
         )
@@ -251,7 +251,7 @@
             ),
             module_execution_config=EXEC_CONFIG,
             target_device_spec=DEVICE_SPEC,
-            input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+            input_data=common_definitions.DEFAULT_INPUT_DATA,
             tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
             run_flags=[],
         )
diff --git a/build_tools/python/benchmark_suites/iree/utils.py b/build_tools/python/benchmark_suites/iree/utils.py
index ae71475..17a47a4 100644
--- a/build_tools/python/benchmark_suites/iree/utils.py
+++ b/build_tools/python/benchmark_suites/iree/utils.py
@@ -24,7 +24,7 @@
             module_generation_config=module_generation_config,
             module_execution_config=module_execution_config,
             target_device_spec=device_spec,
-            input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+            input_data=common_definitions.DEFAULT_INPUT_DATA,
             tool=tool,
             tags=tags,
             presets=presets,
diff --git a/build_tools/python/e2e_test_artifacts/cmake_generator/iree_rule_generator_test.py b/build_tools/python/e2e_test_artifacts/cmake_generator/iree_rule_generator_test.py
index e29b3e8..3353eba 100644
--- a/build_tools/python/e2e_test_artifacts/cmake_generator/iree_rule_generator_test.py
+++ b/build_tools/python/e2e_test_artifacts/cmake_generator/iree_rule_generator_test.py
@@ -194,7 +194,7 @@
                 host_environment=common_definitions.HostEnvironment.LINUX_X86_64,
                 architecture=common_definitions.DeviceArchitecture.RV64_GENERIC,
             ),
-            input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+            input_data=common_definitions.DEFAULT_INPUT_DATA,
             tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
             presets=["test"],
         )
diff --git a/build_tools/python/e2e_test_framework/definitions/common_definitions.py b/build_tools/python/e2e_test_framework/definitions/common_definitions.py
index e7d5cb2..1a6a4a3 100644
--- a/build_tools/python/e2e_test_framework/definitions/common_definitions.py
+++ b/build_tools/python/e2e_test_framework/definitions/common_definitions.py
@@ -8,7 +8,7 @@
 import dataclasses
 from dataclasses import dataclass
 from enum import Enum
-from typing import List, Sequence
+from typing import List, Optional, Sequence
 
 from e2e_test_framework import serialization, unique_ids
 
@@ -109,13 +109,6 @@
     EXPORTED_TFLITE = "exported_tflite"
 
 
-class InputDataFormat(Enum):
-    """Model input data format."""
-
-    ZEROS = "zeros"
-    NUMPY_NPY = "numpy_npy"
-
-
 @serialization.serializable(type_key="device_specs")
 @dataclass(frozen=True)
 class DeviceSpec(object):
@@ -189,6 +182,15 @@
     entry_function: str
     # Input types. E.g., ["100x100xf32", "200x200x5xf32"].
     input_types: List[str]
+    # URL to fetch input data tgz. The archive should contain
+    # "input_{0,1,...}.npy" for each input.
+    input_url: Optional[str] = None
+    # URL to fetch expected output tgz. The archive should contain
+    # "ouptut_0.npy".
+    expected_output_url: Optional[str] = None
+    # Parameters for iree-run-module to control the tolerance.
+    # For example: --expected_f32_threshold=0.0001
+    verify_params: List[str] = dataclasses.field(default_factory=list)
 
     def __str__(self):
         return self.name
@@ -201,27 +203,16 @@
 
     id: str
     # Associated model.
-    model_id: str
-    # Friendly name.
     name: str
-    # Tags that describe the data characteristics.
-    tags: List[str]
-    data_format: InputDataFormat
-    source_url: str
 
     def __str__(self):
         return self.name
 
 
-# All-zeros dummy input data. Runners will generate the zeros input with proper
-# shapes.
-ZEROS_MODEL_INPUT_DATA = ModelInputData(
+# Get input from model input_url if available; otherwise use all zeros.
+DEFAULT_INPUT_DATA = ModelInputData(
     id=unique_ids.MODEL_INPUT_DATA_ZEROS,
-    model_id="",
-    name="zeros",
-    tags=[],
-    data_format=InputDataFormat.ZEROS,
-    source_url="",
+    name="default",
 )
 
 
diff --git a/build_tools/python/e2e_test_framework/definitions/iree_definitions_test.py b/build_tools/python/e2e_test_framework/definitions/iree_definitions_test.py
index 71262e5..e497b34 100644
--- a/build_tools/python/e2e_test_framework/definitions/iree_definitions_test.py
+++ b/build_tools/python/e2e_test_framework/definitions/iree_definitions_test.py
@@ -143,10 +143,7 @@
             gen_config,
             exec_config,
             device_spec,
-            # TODO(#15282): ZEROS_MODEL_INPUT_DATA should be renamed to
-            # DEFAULT_INPUT_DATA, which means to use input npys if available;
-            # otherwise use all zeros data.
-            input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+            input_data=common_definitions.DEFAULT_INPUT_DATA,
             tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE,
         )