Adding state functionality to iree-run-trace and improving ergonomics. (#12534)
In order to support executing pipelines where outputs of one call are
passed into another the trace replay functionality has grown slightly
closer to turing complete (and loops are definitely coming :) by
obtaining input/output control, numpy npy file access, and a blackboard
for temporary values. A test demonstrating the file format and some
`--help` info has been added to `iree-run-trace` to at least have a
reference not generated by python and ensure it mostly works.
---
The new `!input.get`/`!input.take`/`!output.set`/`!output.push` macros
can be used in any source sequence such as function call arguments.
These will either get (assign semantics) or take (move semantics) a
value from the input list and set or push a value to the output list.
`iree-run-trace` now supports the same `--input=`/`--output=` flags as
`iree-run-module` and they define the input/output handling for the
whole trace pipeline as if calling a single function.
```yaml
type: call
function: module.fn
# pass the first two `--input=` flag values and a constant
args:
- !input.take 0
- !input.take 1
- !hal.buffer_view 4xf32=0,1,2,3
# store the two results into `--output=` 0 and 1 (pushing)
results:
- !output.set 0
- !output.push
```
---
In addition to the input/output lists there's also a user-defined
blackboard that provides storage for the duration of the trace. Slots
can be set by using `!blackboard.set`/`!blackboard.push` on any target
sequence such as function call results and later retrieved in any source
sequence with `!blackboard.get`/`!blackboard.take`.
```yaml
# save call results to the blackboard
type: call
function: module.return_two_things
results:
- !blackboard.push
- !blackboard.push
---
# load prior results from the blackboard
type: call
function: module.consume_three_things
args:
- !input.take 0
- !blackboard.take 0
- !blackboard.take 1
```
---
The `--input=` and `--output=`-style works for pipeline-style traces
while larger traces may need programmatic control over I/O and the
blackboard. The `numpy_load` and `numpy_save` events have been added
which allow for loading or saving one or more `arrays` to a .npy file
`path`. This can be used to stream outputs during processing by using
the `append: true` node when saving or sharding to different files.
```yaml
# load blackboard slot 3 and 4 from a .npy file
type: numpy_load
path: input.npy
arrays:
- !blackboard.set 3
- !blackboard.set 4
---
# save a few arrays to a .npy file
type: numpy_save
path: output.npy
append: false
arrays:
- !blackboard.get 3
- !input.get 0
- !hal.buffer_view 4xf32=0,1,2,3
```
---
There's some helpers that'd be useful to add (enqueue/dequeue, pop, etc)
that could make it easier to write more complex pipelines. The
blackboard could also be changed to using a hash table so that string
keys could be used instead of just ordinals.
Fixes #12525.
Fixes #12526.
diff --git a/.github/workflows/benchmark_execution.yml b/.github/workflows/benchmark_execution.yml
index 2b7bcaf..d317f5e 100644
--- a/.github/workflows/benchmark_execution.yml
+++ b/.github/workflows/benchmark_execution.yml
@@ -165,19 +165,15 @@
- name: "Running benchmarks"
id: run
env:
- BENCHMARK_CONFIG: ${{ steps.download-assets.outputs.benchmark-config }}
+ IREE_EXECUTION_BENCHMARK_CONFIG: ${{ steps.download-assets.outputs.benchmark-config }}
IREE_DOCKER_WRAPPER: ./build_tools/github_actions/docker_run.sh
IREE_NORMAL_BENCHMARK_TOOLS_DIR: ${{ steps.unpack-tools.outputs.normal-benchmark-tools-dir }}
IREE_TRACED_BENCHMARK_TOOLS_DIR: ${{ steps.unpack-tools.outputs.traced-benchmark-tools-dir }}
- IREE_DEVICE_NAME: ${{ env.DEVICE_NAME }}
+ IREE_TARGET_DEVICE_NAME: ${{ env.DEVICE_NAME }}
IREE_E2E_TEST_ARTIFACTS_DIR: ${{ env.E2E_TEST_ARTIFACTS_DIR }}
- IREE_RUN_CONFIG: run-config.json
IREE_BENCHMARK_RESULTS: ${{ env.BENCHMARK_RESULTS_DIR }}/benchmark-results-${{ matrix.benchmark.device_name }}.json
run: |
mkdir -p ${BENCHMARK_RESULTS_DIR}
- jq --arg DEVICE_NAME "${IREE_DEVICE_NAME}" \
- '.[$DEVICE_NAME] | .run_configs' \
- "${BENCHMARK_CONFIG}" > "${IREE_RUN_CONFIG}"
./build_tools/benchmarks/run_benchmarks.sh
echo "benchmark-results=${IREE_BENCHMARK_RESULTS}" >> "${GITHUB_OUTPUT}"
- name: "Uploading benchmark results"
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c34cc8c..902ff37 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -65,7 +65,7 @@
BASE_REF: HEAD^
outputs:
should-run: ${{ steps.configure.outputs.should-run }}
- ci-stage: ${{ steps.configure.outputs.ci-stage }}
+ is-pr: ${{ steps.configure.outputs.is-pr }}
runner-env: ${{ steps.configure.outputs.runner-env }}
runner-group: ${{ steps.configure.outputs.runner-group }}
write-caches: ${{ steps.configure.outputs.write-caches }}
@@ -126,7 +126,7 @@
##############################################################################
build_all:
needs: setup
- if: needs.setup.outputs.should-run == 'true'
+ if: fromJson(needs.setup.outputs.should-run)
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
@@ -191,7 +191,7 @@
build_test_all_windows:
needs: setup
- if: needs.setup.outputs.should-run == 'true' && needs.setup.outputs.ci-stage == 'postsubmit'
+ if: fromJson(needs.setup.outputs.should-run) && ! fromJson(needs.setup.outputs.is-pr)
runs-on: managed-windows-cpu
defaults:
run:
@@ -258,7 +258,7 @@
build_test_all_macos:
needs: setup
- if: needs.setup.outputs.should-run == 'true' && needs.setup.outputs.ci-stage == 'postsubmit'
+ if: fromJson(needs.setup.outputs.should-run) && ! fromJson(needs.setup.outputs.is-pr)
runs-on:
- ${{ github.repository == 'openxla/iree' && 'self-hosted' || 'macos-11' }} # must come first
- runner-group=postsubmit
@@ -291,7 +291,7 @@
build_test_all_bazel:
needs: setup
- if: needs.setup.outputs.should-run == 'true'
+ if: fromJson(needs.setup.outputs.should-run)
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
@@ -314,7 +314,7 @@
test_all:
needs: [setup, build_all]
- if: needs.setup.outputs.should-run == 'true'
+ if: fromJson(needs.setup.outputs.should-run)
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
@@ -344,7 +344,7 @@
test_gpu:
needs: [setup, build_all]
- if: needs.setup.outputs.should-run == 'true'
+ if: fromJson(needs.setup.outputs.should-run)
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
@@ -387,7 +387,7 @@
##############################################################################
build_test_runtime:
needs: setup
- if: needs.setup.outputs.should-run == 'true'
+ if: fromJson(needs.setup.outputs.should-run)
runs-on: ubuntu-20.04-64core
env:
BUILD_DIR: build-runtime
@@ -415,7 +415,7 @@
build_test_runtime_windows:
needs: setup
- if: needs.setup.outputs.should-run == 'true'
+ if: fromJson(needs.setup.outputs.should-run)
runs-on: managed-windows-cpu
defaults:
run:
@@ -444,7 +444,7 @@
##############################################################################
build_tf_integrations:
needs: setup
- if: needs.setup.outputs.should-run == 'true'
+ if: fromJson(needs.setup.outputs.should-run)
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
@@ -491,7 +491,7 @@
test_tf_integrations:
needs: [setup, build_all, build_tf_integrations]
- if: needs.setup.outputs.should-run == 'true'
+ if: fromJson(needs.setup.outputs.should-run)
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
@@ -530,7 +530,7 @@
test_tf_integrations_gpu:
needs: [setup, build_all, build_tf_integrations]
- if: needs.setup.outputs.should-run == 'true'
+ if: fromJson(needs.setup.outputs.should-run)
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
@@ -578,7 +578,7 @@
# TODO(#11263): Drop this job once the IREE_BUILD_BENCHMARKS is removed.
test_build_benchmark_suites:
needs: [setup, build_all, build_tf_integrations]
- if: needs.setup.outputs.should-run == 'true'
+ if: fromJson(needs.setup.outputs.should-run)
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
@@ -621,7 +621,7 @@
##############################################################################
python_release_packages:
needs: setup
- if: needs.setup.outputs.should-run == 'true'
+ if: fromJson(needs.setup.outputs.should-run)
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
@@ -662,7 +662,7 @@
asan:
needs: setup
- if: needs.setup.outputs.should-run == 'true'
+ if: fromJson(needs.setup.outputs.should-run)
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
@@ -690,7 +690,7 @@
tsan:
needs: setup
- if: needs.setup.outputs.should-run == 'true'
+ if: fromJson(needs.setup.outputs.should-run)
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
@@ -715,7 +715,7 @@
small_runtime:
needs: setup
- if: needs.setup.outputs.should-run == 'true'
+ if: fromJson(needs.setup.outputs.should-run)
runs-on: ubuntu-20.04-64core
env:
BUILD_DIR: build-runtime
@@ -742,7 +742,7 @@
gcc:
needs: setup
- if: needs.setup.outputs.should-run == 'true'
+ if: fromJson(needs.setup.outputs.should-run)
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
@@ -774,7 +774,7 @@
tracing:
needs: setup
- if: needs.setup.outputs.should-run == 'true'
+ if: fromJson(needs.setup.outputs.should-run)
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
@@ -803,7 +803,7 @@
debug:
needs: setup
- if: needs.setup.outputs.should-run == 'true'
+ if: fromJson(needs.setup.outputs.should-run)
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
@@ -836,7 +836,7 @@
build_benchmark_tools:
needs: [setup, build_all]
- if: needs.setup.outputs.should-run == 'true'
+ if: fromJson(needs.setup.outputs.should-run)
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
@@ -919,7 +919,7 @@
build_e2e_test_artifacts:
needs: [setup, build_all, build_tf_integrations]
- if: needs.setup.outputs.should-run == 'true'
+ if: fromJson(needs.setup.outputs.should-run)
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
@@ -987,7 +987,7 @@
compilation_benchmarks:
needs: [setup, build_e2e_test_artifacts]
- if: needs.setup.outputs.should-run == 'true' && needs.setup.outputs.benchmark-presets != ''
+ if: fromJson(needs.setup.outputs.should-run) && needs.setup.outputs.benchmark-presets != ''
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
@@ -1036,12 +1036,11 @@
GENERATION_CONFIG: generation-config.json
COMPILE_STATS_RESULTS: benchmark-results/compile-stats-results.json
run: |
- jq '.generation_configs' "${COMPILATION_CONFIG}" > "${GENERATION_CONFIG}"
mkdir -p benchmark-results
./build_tools/benchmarks/collect_compilation_statistics.py alpha \
--e2e_test_artifacts_dir="${E2E_TEST_ARTIFACTS_DIR}" \
--build_log="${E2E_TEST_ARTIFACTS_BUILD_LOG}" \
- --generation_config="${GENERATION_CONFIG}" \
+ --compilation_benchmark_config="${COMPILATION_CONFIG}" \
--output="${COMPILE_STATS_RESULTS}"
echo "compile-stats-results=${COMPILE_STATS_RESULTS}" >> "${GITHUB_OUTPUT}"
- name: "Uploading benchmark results"
@@ -1061,7 +1060,7 @@
execution_benchmarks:
needs: [setup, build_benchmark_tools, build_e2e_test_artifacts]
- if: needs.setup.outputs.should-run == 'true' && needs.setup.outputs.benchmark-presets != ''
+ if: fromJson(needs.setup.outputs.should-run) && needs.setup.outputs.benchmark-presets != ''
uses: ./.github/workflows/benchmark_execution.yml
with:
# env.GCS_DIR is also duplicated in this workflow. See the note there on
@@ -1075,7 +1074,7 @@
process_benchmark_results:
needs: [setup, compilation_benchmarks, execution_benchmarks]
- if: needs.setup.outputs.should-run == 'true' && needs.setup.outputs.benchmark-presets != ''
+ if: fromJson(needs.setup.outputs.should-run) && needs.setup.outputs.benchmark-presets != ''
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
@@ -1112,7 +1111,7 @@
"${EXECUTION_BENCHMARK_RESULTS_DIR}"
echo "execution-benchmark-results-pattern=${EXECUTION_BENCHMARK_RESULTS_DIR}/benchmark-results-*.json" >> "${GITHUB_OUTPUT}"
- name: Generating comment
- if: needs.setup.outputs.ci-stage == 'presubmit'
+ if: fromJson(needs.setup.outputs.is-pr)
id: generate-comment
env:
# Wildcard pattern to match all execution benchmark results. Empty if
@@ -1139,7 +1138,7 @@
# Due to security reasons, instead of posting the comment to PR, we only
# upload the comment data in presubmit workflow and trigger the posting
# workflow on the main branch. See post_benchmark_comment.yaml
- if: needs.setup.outputs.ci-stage == 'presubmit'
+ if: fromJson(needs.setup.outputs.is-pr)
env:
BENCHMARK_COMMENT_ARTIFACT: ${{ steps.generate-comment.outputs.benchmark-comment-artifact }}
BENCHMARK_COMMENT_GCS_ARTIFACT: ${{ env.GCS_DIR }}/${{ steps.generate-comment.outputs.benchmark-comment-artifact }}
@@ -1148,7 +1147,7 @@
"${BENCHMARK_COMMENT_ARTIFACT}" \
"${BENCHMARK_COMMENT_GCS_ARTIFACT}"
- name: Uploading results to dashboard
- if: needs.setup.outputs.ci-stage == 'postsubmit'
+ if: github.ref_name == 'main'
env:
EXECUTION_BENCHMARK_RESULTS_PATTERN: ${{ steps.download-execution-results.outputs.execution-benchmark-results-pattern }}
IREE_DASHBOARD_API_TOKEN: ${{ secrets.IREE_DASHBOARD_API_TOKEN }}
@@ -1167,7 +1166,7 @@
cross_compile_and_test:
needs: [setup, build_all]
- if: needs.setup.outputs.should-run == 'true'
+ if: fromJson(needs.setup.outputs.should-run)
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
@@ -1256,7 +1255,7 @@
test_benchmark_suites:
needs: [setup, build_all, build_e2e_test_artifacts]
- if: needs.setup.outputs.should-run == 'true'
+ if: fromJson(needs.setup.outputs.should-run)
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
@@ -1393,7 +1392,7 @@
fi
- name: Posting to Discord
uses: sarisia/actions-status-discord@c193626e5ce172002b8161e116aa897de7ab5383 # v1.10.2
- if: failure() && needs.setup.outputs.ci-stage == 'postsubmit'
+ if: failure() && github.ref_name == 'main'
with:
webhook: ${{ secrets.DISCORD_WEBHOOK }}
description: "The following jobs failed: ${{ steps.failed_jobs.outputs.failed-jobs }}"
diff --git a/.github/workflows/run_shark_tank.yml b/.github/workflows/run_shark_tank.yml
index d1a4dc6..6c8f768 100644
--- a/.github/workflows/run_shark_tank.yml
+++ b/.github/workflows/run_shark_tank.yml
@@ -20,7 +20,6 @@
PR_BODY: ${{ github.event.pull_request.body }}
outputs:
should-run: ${{ steps.configure.outputs.should-run }}
- ci-stage: ${{ steps.configure.outputs.ci-stage }}
runner-env: ${{ steps.configure.outputs.runner-env }}
runner-group: ${{ steps.configure.outputs.runner-group }}
write-caches: ${{ steps.configure.outputs.write-caches }}
diff --git a/build_tools/benchmarks/collect_compilation_statistics.py b/build_tools/benchmarks/collect_compilation_statistics.py
index bc91dd2..3550065 100755
--- a/build_tools/benchmarks/collect_compilation_statistics.py
+++ b/build_tools/benchmarks/collect_compilation_statistics.py
@@ -133,11 +133,13 @@
return module_path
-def get_module_map_from_generation_config(
- serialized_gen_config: TextIO, e2e_test_artifacts_dir: pathlib.PurePath
+def get_module_map_from_compilation_benchmark_config(
+ compilation_benchmark_config_data: TextIO,
+ e2e_test_artifacts_dir: pathlib.PurePath
) -> Dict[CompilationInfo, pathlib.Path]:
+ benchmark_config = json.load(compilation_benchmark_config_data)
gen_configs = serialization.unpack_and_deserialize(
- data=json.load(serialized_gen_config),
+ data=benchmark_config["generation_configs"],
root_type=List[iree_definitions.ModuleGenerationConfig])
module_map = {}
for gen_config in gen_configs:
@@ -203,8 +205,9 @@
def _alpha_get_module_map_and_build_log(args: argparse.Namespace):
- module_map = get_module_map_from_generation_config(
- serialized_gen_config=args.generation_config.open("r"),
+ config_data = args.compilation_benchmark_config.open("r")
+ module_map = get_module_map_from_compilation_benchmark_config(
+ compilation_benchmark_config_data=config_data,
e2e_test_artifacts_dir=args.e2e_test_artifacts_dir)
return module_map, args.build_log
@@ -257,10 +260,10 @@
alpha_parser.set_defaults(
get_module_map_and_build_log=_alpha_get_module_map_and_build_log)
alpha_parser.add_argument(
- "--generation_config",
+ "--compilation_benchmark_config",
type=_check_file_path,
required=True,
- help="Exported module generation config of e2e test artifacts.")
+ help="Exported compilation benchmark config of e2e test artifacts.")
alpha_parser.add_argument("--build_log",
type=_check_file_path,
required=True,
diff --git a/build_tools/benchmarks/collect_compilation_statistics_test.py b/build_tools/benchmarks/collect_compilation_statistics_test.py
index e03e4f2..ac96bdc 100644
--- a/build_tools/benchmarks/collect_compilation_statistics_test.py
+++ b/build_tools/benchmarks/collect_compilation_statistics_test.py
@@ -92,7 +92,7 @@
self.assertEqual(moduel_path, "/abcd-compile-stats.vmfb")
- def test_get_module_map_from_generation_config(self):
+ def test_get_module_map_from_compilation_benchmark_config(self):
model_a = common_definitions.Model(
id="1234",
name="tflite_m",
@@ -126,12 +126,14 @@
imported_model=imported_model_a, compile_config=compile_config_a)
gen_config_b = iree_definitions.ModuleGenerationConfig.with_flag_generation(
imported_model=imported_model_a, compile_config=compile_config_b)
- serialized_gen_config = json.dumps(
- serialization.serialize_and_pack([gen_config_a, gen_config_b]))
+ benchmark_config = dict(generation_configs=serialization.serialize_and_pack(
+ [gen_config_a, gen_config_b]),
+ module_dir_paths=["a", "b"])
root_dir = pathlib.PurePath("artifacts_dir")
- module_map = collect_compilation_statistics.get_module_map_from_generation_config(
- serialized_gen_config=StringIO(serialized_gen_config),
+ module_map = collect_compilation_statistics.get_module_map_from_compilation_benchmark_config(
+ compilation_benchmark_config_data=StringIO(
+ json.dumps(benchmark_config)),
e2e_test_artifacts_dir=root_dir)
compile_info_a = common.benchmark_definition.CompilationInfo(
diff --git a/build_tools/benchmarks/common/benchmark_config.py b/build_tools/benchmarks/common/benchmark_config.py
index 2677857..2d2bc59 100644
--- a/build_tools/benchmarks/common/benchmark_config.py
+++ b/build_tools/benchmarks/common/benchmark_config.py
@@ -113,7 +113,7 @@
else:
# TODO(#11076): Remove legacy path.
build_dir = args.build_dir.resolve()
- if args.run_config is not None:
+ if args.execution_benchmark_config is not None:
root_benchmark_dir = build_dir / E2E_TEST_ARTIFACTS_REL_PATH
else:
root_benchmark_dir = build_dir / BENCHMARK_SUITE_REL_PATH
diff --git a/build_tools/benchmarks/common/benchmark_config_test.py b/build_tools/benchmarks/common/benchmark_config_test.py
index 7bef77a..e432cf5 100644
--- a/build_tools/benchmarks/common/benchmark_config_test.py
+++ b/build_tools/benchmarks/common/benchmark_config_test.py
@@ -93,13 +93,15 @@
def test_build_from_args_with_e2e_test_artifacts_dir(self):
with tempfile.TemporaryDirectory() as e2e_test_artifacts_dir:
- run_config = pathlib.Path(e2e_test_artifacts_dir) / "run_config.json"
- run_config.touch()
+ exec_bench_config = pathlib.Path(
+ e2e_test_artifacts_dir) / "exec_bench_config.json"
+ exec_bench_config.touch()
args = common_arguments.Parser().parse_args([
f"--tmp_dir={self.tmp_dir}",
f"--normal_benchmark_tool_dir={self.normal_tool_dir}",
f"--e2e_test_artifacts_dir={e2e_test_artifacts_dir}",
- f"--run_config={run_config}"
+ f"--execution_benchmark_config={exec_bench_config}",
+ f"--target_device_name=device_a",
])
config = benchmark_config.BenchmarkConfig.build_from_args(
@@ -108,14 +110,16 @@
self.assertEqual(config.root_benchmark_dir,
pathlib.Path(e2e_test_artifacts_dir))
- def test_build_from_args_with_run_config_and_build_dir(self):
+ def test_build_from_args_with_execution_benchmark_config_and_build_dir(self):
with tempfile.TemporaryDirectory() as e2e_test_artifacts_dir:
- run_config = pathlib.Path(e2e_test_artifacts_dir) / "run_config.json"
- run_config.touch()
+ exec_bench_config = pathlib.Path(
+ e2e_test_artifacts_dir) / "exec_bench_config.json"
+ exec_bench_config.touch()
args = common_arguments.Parser().parse_args([
f"--tmp_dir={self.tmp_dir}",
f"--normal_benchmark_tool_dir={self.normal_tool_dir}",
- f"--run_config={run_config}",
+ f"--execution_benchmark_config={exec_bench_config}",
+ f"--target_device_name=device_a",
str(self.build_dir)
])
diff --git a/build_tools/benchmarks/common/common_arguments.py b/build_tools/benchmarks/common/common_arguments.py
index ea14265..b6fc8f0 100644
--- a/build_tools/benchmarks/common/common_arguments.py
+++ b/build_tools/benchmarks/common/common_arguments.py
@@ -59,7 +59,7 @@
help=(
"Path to the IREE e2e test artifacts directory. This will override "
"<build-dir> and eventually replace it. For now must use with "
- "--run_config"))
+ "--execution_benchmark_config"))
self.add_argument(
"--normal_benchmark_tool_dir",
@@ -155,17 +155,33 @@
"for). In that case, no --benchmark_repetitions flag will be passed."
" If not specified, a --benchmark_repetitions will be passed "
"instead.")
- self.add_argument("--run_config",
+ self.add_argument("--execution_benchmark_config",
type=_check_file_path,
default=None,
- help="JSON file of the run config")
+ help="JSON config for the execution benchmarks")
+ self.add_argument("--target_device_name",
+ type=str,
+ default=None,
+ help="Target device in benchmark config to run")
def parse_args(
self, arg_strs: Optional[Sequence[str]] = None) -> argparse.Namespace:
args = super().parse_args(arg_strs)
- if args.e2e_test_artifacts_dir is not None and args.run_config is None:
- raise self.error("--e2e_test_artifacts_dir requires --run_config.")
+ # TODO(#11076): Remove these checks and make --execution_benchmark_config
+ # and --target_device_name required args.
+ use_new_benchmark_suite = (args.execution_benchmark_config is not None or
+ args.target_device_name is not None)
+ if use_new_benchmark_suite:
+ if (args.execution_benchmark_config is None or
+ args.target_device_name is None):
+ self.error(
+ "--execution_benchmark_config and --target_device_name must be set together."
+ )
+ elif args.e2e_test_artifacts_dir is not None:
+ self.error(
+ "--e2e_test_artifacts_dir requires --execution_benchmark_config and --target_device_name."
+ )
return args
diff --git a/build_tools/benchmarks/common/common_arguments_test.py b/build_tools/benchmarks/common/common_arguments_test.py
index 143a850..1d9a3a6 100644
--- a/build_tools/benchmarks/common/common_arguments_test.py
+++ b/build_tools/benchmarks/common/common_arguments_test.py
@@ -42,12 +42,19 @@
with self.assertRaises(SystemExit):
arg_parser.parse_args(["--trace_capture_tool=nonexistent", "."])
- def test_parser_e2e_test_artifacts_dir_requires_run_config(self):
+ def test_parser_e2e_test_artifacts_dir_needs_execution_benchmark_config(self):
arg_parser = common.common_arguments.Parser()
with tempfile.TemporaryDirectory() as tempdir:
with self.assertRaises(SystemExit):
arg_parser.parse_args([f"--e2e_test_artifacts_dir={tempdir}"])
+ def test_parser_only_execution_benchmark_config_or_target_device_name(self):
+ arg_parser = common.common_arguments.Parser()
+ with self.assertRaises(SystemExit):
+ arg_parser.parse_args([f"--execution_benchmark_config"])
+ with self.assertRaises(SystemExit):
+ arg_parser.parse_args([f"--target_device_name"])
+
if __name__ == "__main__":
unittest.main()
diff --git a/build_tools/benchmarks/run_benchmarks.sh b/build_tools/benchmarks/run_benchmarks.sh
index 991973f..7b8694b 100755
--- a/build_tools/benchmarks/run_benchmarks.sh
+++ b/build_tools/benchmarks/run_benchmarks.sh
@@ -20,11 +20,11 @@
set -euo pipefail
DOCKER_WRAPPER="${IREE_DOCKER_WRAPPER:-./build_tools/docker/docker_run.sh}"
-DEVICE_NAME="${IREE_DEVICE_NAME}"
NORMAL_BENCHMARK_TOOLS_DIR="${IREE_NORMAL_BENCHMARK_TOOLS_DIR}"
E2E_TEST_ARTIFACTS_DIR="${1:-${IREE_E2E_TEST_ARTIFACTS_DIR}}"
-RUN_CONFIG="${2:-${IREE_RUN_CONFIG}}"
-BENCHMARK_RESULTS="${3:-${IREE_BENCHMARK_RESULTS}}"
+EXECUTION_BENCHMARK_CONFIG="${2:-${IREE_EXECUTION_BENCHMARK_CONFIG}}"
+TARGET_DEVICE_NAME="${3:-${IREE_TARGET_DEVICE_NAME}}"
+BENCHMARK_RESULTS="${4:-${IREE_BENCHMARK_RESULTS}}"
if [[ "${DEVICE_NAME}" == "a2-highgpu-1g" ]]; then
${DOCKER_WRAPPER} \
@@ -34,7 +34,8 @@
./build_tools/benchmarks/run_benchmarks_on_linux.py \
--normal_benchmark_tool_dir="${NORMAL_BENCHMARK_TOOLS_DIR}" \
--e2e_test_artifacts_dir="${E2E_TEST_ARTIFACTS_DIR}" \
- --run_config="${RUN_CONFIG}" \
+ --execution_benchmark_config="${EXECUTION_BENCHMARK_CONFIG}" \
+ --target_device_name="${TARGET_DEVICE_NAME}" \
--output="${BENCHMARK_RESULTS}" \
--verbose
elif [[ "${DEVICE_NAME}" == "c2-standard-16" ]]; then
@@ -43,7 +44,8 @@
./build_tools/benchmarks/run_benchmarks_on_linux.py \
--normal_benchmark_tool_dir="${NORMAL_BENCHMARK_TOOLS_DIR}" \
--e2e_test_artifacts_dir="${E2E_TEST_ARTIFACTS_DIR}" \
- --run_config="${RUN_CONFIG}" \
+ --execution_benchmark_config="${EXECUTION_BENCHMARK_CONFIG}" \
+ --target_device_name="${TARGET_DEVICE_NAME}" \
--output="${BENCHMARK_RESULTS}" \
--device_model=GCP-c2-standard-16 \
--cpu_uarch=CascadeLake \
diff --git a/build_tools/benchmarks/run_benchmarks_on_android.py b/build_tools/benchmarks/run_benchmarks_on_android.py
index 0ff954d..3891da4 100755
--- a/build_tools/benchmarks/run_benchmarks_on_android.py
+++ b/build_tools/benchmarks/run_benchmarks_on_android.py
@@ -339,7 +339,7 @@
if args.verbose:
print(device_info)
- if args.run_config is not None:
+ if args.execution_benchmark_config is not None:
raise ValueError("Run config option isn't supported yet.")
commit = get_git_commit_hash("HEAD")
diff --git a/build_tools/benchmarks/run_benchmarks_on_linux.py b/build_tools/benchmarks/run_benchmarks_on_linux.py
index e8a022c..d8b43ce 100755
--- a/build_tools/benchmarks/run_benchmarks_on_linux.py
+++ b/build_tools/benchmarks/run_benchmarks_on_linux.py
@@ -147,14 +147,17 @@
commit = get_git_commit_hash("HEAD")
benchmark_config = BenchmarkConfig.build_from_args(args, commit)
- if args.run_config is None:
+ if args.execution_benchmark_config is None:
# TODO(#11076): Remove legacy path.
benchmark_suite = BenchmarkSuite.load_from_benchmark_suite_dir(
benchmark_config.root_benchmark_dir)
else:
- run_config_data = json.loads(args.run_config.read_text())
+ benchmark_groups = json.loads(args.execution_benchmark_config.read_text())
+ benchmark_group = benchmark_groups.get(args.target_device_name)
+ if benchmark_group is None:
+ raise ValueError("Target device not found in the benchmark config.")
run_configs = serialization.unpack_and_deserialize(
- data=run_config_data,
+ data=benchmark_group["run_configs"],
root_type=typing.List[iree_definitions.E2EModelRunConfig])
benchmark_suite = BenchmarkSuite.load_from_run_configs(
run_configs=run_configs)
diff --git a/build_tools/cmake/iree_python.cmake b/build_tools/cmake/iree_python.cmake
index 5aaab3f..b5dcdd3 100644
--- a/build_tools/cmake/iree_python.cmake
+++ b/build_tools/cmake/iree_python.cmake
@@ -279,6 +279,9 @@
"ARGS;LABELS;TIMEOUT"
${ARGN}
)
+ if(NOT IREE_BUILD_PYTHON_BINDINGS)
+ return()
+ endif()
iree_local_py_test(
NAME
diff --git a/build_tools/github_actions/configure_ci.py b/build_tools/github_actions/configure_ci.py
index 49ac80d..04309a2 100755
--- a/build_tools/github_actions/configure_ci.py
+++ b/build_tools/github_actions/configure_ci.py
@@ -29,15 +29,12 @@
import difflib
import fnmatch
+import json
import os
import subprocess
import textwrap
from typing import Iterable, Mapping, MutableMapping
-PULL_REQUEST_EVENT_NAME = "pull_request"
-PUSH_EVENT_NAME = "push"
-SCHEDULE_EVENT_NAME = "schedule"
-WORKFLOW_DISPATCH_EVENT_NAME = "workflow_dispatch"
SKIP_CI_KEY = "skip-ci"
RUNNER_ENV_KEY = "runner-env"
BENCHMARK_PRESET_KEY = "benchmarks"
@@ -86,7 +83,7 @@
print(f"Setting outputs: {d}")
step_output_file = os.environ["GITHUB_OUTPUT"]
with open(step_output_file, "a") as f:
- f.writelines(f"{k}={v}" "\n" for k, v in d.items())
+ f.writelines(f"{k}={v}" + "\n" for k, v in d.items())
def write_job_summary(summary: str):
@@ -121,7 +118,9 @@
</details>""").format("".join(diffs)))
-def get_trailers() -> Mapping[str, str]:
+def get_trailers(is_pr: bool) -> Mapping[str, str]:
+ if not is_pr:
+ return {}
title = os.environ["PR_TITLE"]
body = os.environ.get("PR_BODY", "")
original_title = os.environ.get("ORIGINAL_PR_TITLE")
@@ -167,10 +166,10 @@
return any(not skip_path(p) for p in get_modified_paths(base_ref))
-def should_run_ci(event_name, trailers) -> bool:
- if event_name != PULL_REQUEST_EVENT_NAME:
- print(f"Running CI independent of diff because run was not triggered by a"
- f" pull request event (event name is '{event_name}')")
+def should_run_ci(is_pr: bool, trailers: Mapping[str, str]) -> bool:
+ if not is_pr:
+ print("Running CI independent of diff because run was not triggered by a"
+ " pull request event.")
return True
if SKIP_CI_KEY in trailers:
@@ -204,19 +203,7 @@
return runner_env
-def get_ci_stage(event_name):
- if event_name == PULL_REQUEST_EVENT_NAME:
- return "presubmit"
- elif event_name == PUSH_EVENT_NAME:
- return "postsubmit"
- elif event_name == SCHEDULE_EVENT_NAME:
- return "postsubmit"
- elif event_name == WORKFLOW_DISPATCH_EVENT_NAME:
- return "unknown"
- raise ValueError(f"Unrecognized event name '{event_name}'")
-
-
-def get_benchmark_presets(ci_stage: str, trailers: Mapping[str, str]) -> str:
+def get_benchmark_presets(is_pr: bool, trailers: Mapping[str, str]) -> str:
"""Parses and validates the benchmark presets from trailers.
Args:
@@ -226,7 +213,7 @@
A comma separated preset string, which later will be parsed by
build_tools/benchmarks/export_benchmark_config.py.
"""
- if ci_stage == "postsubmit":
+ if not is_pr:
preset_options = ["all"]
else:
trailer = trailers.get(BENCHMARK_PRESET_KEY)
@@ -248,22 +235,16 @@
def main():
- output: MutableMapping[str, str] = {}
- event_name = os.environ["GITHUB_EVENT_NAME"]
- trailers = get_trailers() if event_name == PULL_REQUEST_EVENT_NAME else {}
- if should_run_ci(event_name, trailers):
- output["should-run"] = "true"
- else:
- output["should-run"] = "false"
- output[RUNNER_ENV_KEY] = get_runner_env(trailers)
- ci_stage = get_ci_stage(event_name)
- output["ci-stage"] = ci_stage
- output["runner-group"] = ci_stage
- write_caches = "0"
- if ci_stage == "postsubmit":
- write_caches = "1"
- output["write-caches"] = write_caches
- output["benchmark-presets"] = get_benchmark_presets(ci_stage, trailers)
+ is_pr = os.environ["GITHUB_EVENT_NAME"] == "pull_request"
+ trailers = get_trailers(is_pr)
+ output = {
+ "should-run": json.dumps(should_run_ci(is_pr, trailers)),
+ "is-pr": json.dumps(is_pr),
+ "runner-env": get_runner_env(trailers),
+ "runner-group": "presubmit" if is_pr else "postsubmit",
+ "write-caches": "0" if is_pr else "1",
+ "benchmark-presets": get_benchmark_presets(is_pr, trailers),
+ }
set_output(output)
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp
index 44ec121..30186c6 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp
@@ -215,6 +215,9 @@
nestedModulePM.addNestedPass<func::FuncOp>(
createOptimizeVectorTransferPass());
+ // Hoist loop invariant code to avoid pipelining it.
+ nestedModulePM.addNestedPass<func::FuncOp>(
+ createLoopInvariantCodeMotionPass());
// Pipeline memory operations.
nestedModulePM.addNestedPass<func::FuncOp>(createGPUPipeliningPass());
}
@@ -270,6 +273,9 @@
nestedModulePM.addPass(createCanonicalizerPass());
nestedModulePM.addPass(createCSEPass());
+ // Hoist loop invariant code to avoid pipelining it.
+ nestedModulePM.addNestedPass<func::FuncOp>(
+ createLoopInvariantCodeMotionPass());
PipeliningSchedulingStrategy schedule =
llvmgpuUseMMASync ? PipeliningSchedulingStrategy::nvidiaTensorCore
: PipeliningSchedulingStrategy::loadGlobalStage0;
diff --git a/runtime/src/iree/base/internal/cpu.c b/runtime/src/iree/base/internal/cpu.c
index 3bc25f1..66f784c 100644
--- a/runtime/src/iree/base/internal/cpu.c
+++ b/runtime/src/iree/base/internal/cpu.c
@@ -221,39 +221,6 @@
}
//===----------------------------------------------------------------------===//
-// Architecture-specific string lookup
-//===----------------------------------------------------------------------===//
-
-#define IREE_TEST_FIELD_BIT(field_key, field_value, bit_value) \
- if (iree_string_view_equal(key, IREE_SV(field_key))) { \
- *out_value = iree_all_bits_set((field_value), (bit_value)) ? 1 : 0; \
- return true; \
- }
-
-#if defined(IREE_ARCH_ARM_64)
-
-static bool iree_cpu_lookup_data_by_key_for_arch(
- const uint64_t* fields, iree_string_view_t key,
- int64_t* IREE_RESTRICT out_value) {
- IREE_TEST_FIELD_BIT("dotprod", fields[0], IREE_CPU_DATA0_ARM_64_DOTPROD);
- IREE_TEST_FIELD_BIT("i8mm", fields[0], IREE_CPU_DATA0_ARM_64_I8MM);
- return false;
-}
-
-#else
-
-static bool iree_cpu_lookup_data_by_key_for_arch(
- const uint64_t* fields, iree_string_view_t key,
- int64_t* IREE_RESTRICT out_value) {
- // Not yet implemented for this architecture.
- return false;
-}
-
-#endif // IREE_ARCH_*
-
-#undef IREE_TEST_FIELD_BIT
-
-//===----------------------------------------------------------------------===//
// Processor data query
//===----------------------------------------------------------------------===//
@@ -289,15 +256,30 @@
sizeof(*out_fields));
}
+//===----------------------------------------------------------------------===//
+// Processor data lookup by key
+//===----------------------------------------------------------------------===//
+
iree_status_t iree_cpu_lookup_data_by_key(iree_string_view_t key,
int64_t* IREE_RESTRICT out_value) {
- if (!iree_cpu_lookup_data_by_key_for_arch(iree_cpu_data_cache_, key,
- out_value)) {
- return iree_make_status(IREE_STATUS_NOT_FOUND,
- "CPU data key '%.*s' not found", (int)key.size,
- key.data);
+#define IREE_CPU_FEATURE_BIT(arch, field_index, bit_pos, bit_name, llvm_name) \
+ if (IREE_ARCH_ENUM == IREE_ARCH_ENUM_##arch) { \
+ if (iree_string_view_equal(key, IREE_SV(llvm_name))) { \
+ *out_value = iree_all_bits_set( \
+ (iree_cpu_data_cache_[field_index]), \
+ IREE_CPU_FEATURE_BIT_NAME(arch, field_index, bit_name)) \
+ ? 1 \
+ : 0; \
+ return iree_ok_status(); \
+ } \
}
- return iree_ok_status();
+#include "iree/schemas/cpu_feature_bits.inl"
+#undef IREE_CPU_FEATURE_BIT
+
+ return iree_make_status(
+ IREE_STATUS_NOT_FOUND,
+ "CPU feature '%.*s' unknown on this architecture (%s)", (int)key.size,
+ key.data, IREE_ARCH);
}
//===----------------------------------------------------------------------===//
diff --git a/runtime/src/iree/base/target_platform.h b/runtime/src/iree/base/target_platform.h
index 1abd987..376fb27 100644
--- a/runtime/src/iree/base/target_platform.h
+++ b/runtime/src/iree/base/target_platform.h
@@ -11,6 +11,7 @@
// one platform+architecture pair for that platform.
//
// IREE_ARCH ("arm_32", "arm_64", etc)
+// IREE_ARCH_ENUM (IREE_ARCH_ENUM_ARM_32, etc)
// IREE_ARCH_ARM_32
// IREE_ARCH_ARM_64
// IREE_ARCH_RISCV_32
@@ -53,41 +54,60 @@
// IREE_ARCH_*
//==============================================================================
+enum iree_arch_enum_e {
+ IREE_ARCH_ENUM_ARM_32,
+ IREE_ARCH_ENUM_ARM_64,
+ IREE_ARCH_ENUM_RISCV_32,
+ IREE_ARCH_ENUM_RISCV_64,
+ IREE_ARCH_ENUM_WASM_32,
+ IREE_ARCH_ENUM_WASM_64,
+ IREE_ARCH_ENUM_X86_32,
+ IREE_ARCH_ENUM_X86_64,
+};
+
#if defined(__arm__) || defined(__arm64) || defined(__aarch64__) || \
defined(__thumb__) || defined(__TARGET_ARCH_ARM) || \
defined(__TARGET_ARCH_THUMB) || defined(_M_ARM)
#if defined(__arm64) || defined(__aarch64__)
#define IREE_ARCH "arm_64"
+#define IREE_ARCH_ENUM IREE_ARCH_ENUM_ARM_64
#define IREE_ARCH_ARM_64 1
#else
#define IREE_ARCH "arm_32"
+#define IREE_ARCH_ENUM IREE_ARCH_ENUM_ARM_32
#define IREE_ARCH_ARM_32 1
#endif // __arm64
#endif // ARM
#if defined(__riscv) && (__riscv_xlen == 32)
#define IREE_ARCH "riscv_32"
+#define IREE_ARCH_ENUM IREE_ARCH_ENUM_RISCV_32
#define IREE_ARCH_RISCV_32 1
#elif defined(__riscv) && (__riscv_xlen == 64)
#define IREE_ARCH "riscv_64"
+#define IREE_ARCH_ENUM IREE_ARCH_ENUM_RISCV_64
#define IREE_ARCH_RISCV_64 1
#endif // RISCV
#if defined(__wasm32__)
#define IREE_ARCH "wasm_32"
+#define IREE_ARCH_ENUM IREE_ARCH_ENUM_WASM_32
#define IREE_ARCH_WASM_32 1
#elif defined(__wasm64__)
#define IREE_ARCH "wasm_64"
+#define IREE_ARCH_ENUM IREE_ARCH_ENUM_WASM_64
#define IREE_ARCH_WASM_64 1
#endif // WASM
#if defined(__i386__) || defined(__i486__) || defined(__i586__) || \
defined(__i686__) || defined(__i386) || defined(_M_IX86) || defined(_X86_)
#define IREE_ARCH "x86_32"
+#define IREE_ARCH_ENUM IREE_ARCH_ENUM_X86_32
#define IREE_ARCH_X86_32 1
#elif defined(__x86_64) || defined(__x86_64__) || defined(__amd64__) || \
defined(__amd64) || defined(_M_X64)
#define IREE_ARCH "x86_64"
+#define IREE_ARCH_ENUM IREE_ARCH_ENUM_X86_64
#define IREE_ARCH_X86_64 1
#endif // X86
diff --git a/runtime/src/iree/schemas/BUILD b/runtime/src/iree/schemas/BUILD
index 6f4464a..33af0e2 100644
--- a/runtime/src/iree/schemas/BUILD
+++ b/runtime/src/iree/schemas/BUILD
@@ -70,5 +70,6 @@
name = "cpu_data",
hdrs = [
"cpu_data.h",
+ "cpu_feature_bits.inl",
],
)
diff --git a/runtime/src/iree/schemas/CMakeLists.txt b/runtime/src/iree/schemas/CMakeLists.txt
index 78a8e9a..5e96962 100644
--- a/runtime/src/iree/schemas/CMakeLists.txt
+++ b/runtime/src/iree/schemas/CMakeLists.txt
@@ -93,6 +93,7 @@
cpu_data
HDRS
"cpu_data.h"
+ "cpu_feature_bits.inl"
DEPS
PUBLIC
diff --git a/runtime/src/iree/schemas/cpu_data.h b/runtime/src/iree/schemas/cpu_data.h
index 50a4ab3..46df305 100644
--- a/runtime/src/iree/schemas/cpu_data.h
+++ b/runtime/src/iree/schemas/cpu_data.h
@@ -60,58 +60,16 @@
// in the future.
#define IREE_CPU_DATA_FIELD_COUNT 8
+#define IREE_CPU_FEATURE_BIT_NAME(arch, field_index, bit_name) \
+ IREE_CPU_DATA##field_index##_##arch##_##bit_name
+
// Bitmasks and values for processor data field 0.
enum iree_cpu_data_field_0_e {
- //===--------------------------------------------------------------------===//
- // IREE_ARCH_ARM_64 / aarch64
- //===--------------------------------------------------------------------===//
-
- // TODO: add several common ARM ISA extensions and allocate some ranges of
- // bits for some families/eras. If we just start out with bits 0 and 1
- // allocated for dotprod and i8mm, we are quickly going to have a hard-to-read
- // enumeration here.
- IREE_CPU_DATA0_ARM_64_DOTPROD = 1ull << 0,
- IREE_CPU_DATA0_ARM_64_I8MM = 1ull << 1,
-
- //===--------------------------------------------------------------------===//
- // IREE_ARCH_X86_64 / x86-64
- //===--------------------------------------------------------------------===//
-
- // SSE features. Note: SSE and SSE2 are mandatory parts of X86-64.
- IREE_CPU_DATA0_X86_64_SSE3 = 1ull << 0,
- IREE_CPU_DATA0_X86_64_SSSE3 = 1ull << 1,
- IREE_CPU_DATA0_X86_64_SSE41 = 1ull << 2,
- IREE_CPU_DATA0_X86_64_SSE42 = 1ull << 3,
- IREE_CPU_DATA0_X86_64_SSE4A = 1ull << 4,
-
- // AVX features.
- IREE_CPU_DATA0_X86_64_AVX = 1ull << 10,
- IREE_CPU_DATA0_X86_64_FMA3 = 1ull << 11,
- IREE_CPU_DATA0_X86_64_FMA4 = 1ull << 12,
- IREE_CPU_DATA0_X86_64_XOP = 1ull << 13,
- IREE_CPU_DATA0_X86_64_F16C = 1ull << 14,
- IREE_CPU_DATA0_X86_64_AVX2 = 1ull << 15,
-
- // AVX-512 features.
- IREE_CPU_DATA0_X86_64_AVX512F = 1ull << 20,
- IREE_CPU_DATA0_X86_64_AVX512CD = 1ull << 21,
- IREE_CPU_DATA0_X86_64_AVX512VL = 1ull << 22,
- IREE_CPU_DATA0_X86_64_AVX512DQ = 1ull << 23,
- IREE_CPU_DATA0_X86_64_AVX512BW = 1ull << 24,
- IREE_CPU_DATA0_X86_64_AVX512IFMA = 1ull << 25,
- IREE_CPU_DATA0_X86_64_AVX512VBMI = 1ull << 26,
- IREE_CPU_DATA0_X86_64_AVX512VPOPCNTDQ = 1ull << 27,
- IREE_CPU_DATA0_X86_64_AVX512VNNI = 1ull << 28,
- IREE_CPU_DATA0_X86_64_AVX512VBMI2 = 1ull << 29,
- IREE_CPU_DATA0_X86_64_AVX512BITALG = 1ull << 30,
- IREE_CPU_DATA0_X86_64_AVX512BF16 = 1ull << 31,
- IREE_CPU_DATA0_X86_64_AVX512FP16 = 1ull << 32,
-
- // AMX features.
- IREE_CPU_DATA0_X86_64_AMXTILE = 1ull << 50,
- IREE_CPU_DATA0_X86_64_AMXINT8 = 1ull << 51,
- IREE_CPU_DATA0_X86_64_AMXBF16 = 1ull << 52,
+#define IREE_CPU_FEATURE_BIT(arch, field_index, bit_pos, bit_name, llvm_name) \
+ IREE_CPU_FEATURE_BIT_NAME(arch, field_index, bit_name) = 1ull << bit_pos,
+#include "iree/schemas/cpu_feature_bits.inl"
+#undef IREE_CPU_FEATURE_BIT
};
diff --git a/runtime/src/iree/schemas/cpu_feature_bits.inl b/runtime/src/iree/schemas/cpu_feature_bits.inl
new file mode 100644
index 0000000..93ef0a9
--- /dev/null
+++ b/runtime/src/iree/schemas/cpu_feature_bits.inl
@@ -0,0 +1,87 @@
+// Copyright 2023 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+//===----------------------------------------------------------------------===//
+// CPU features: IREE cpu_data bits and mapping to LLVM target attribute keys.
+//===----------------------------------------------------------------------===//
+//
+// Refer to the file comment in cpu_data.h. Summary:
+// - This is included in both compiler and runtime.
+// - Unconditionally define CPU features for all target architectures, not just
+// host, because this is needed by the compiler when targeting non-host.
+// - The bit values will soon be set in stone, because they will be encoded in
+// generated modules.
+// - Try to pack related features in the same cpu_data field and in nearby bits
+// if possible, on a best-effort basis.
+
+#ifndef IREE_CPU_FEATURE_BIT
+#error Define IREE_CPU_FEATURE_BIT before including this file.
+#endif
+
+// Format:
+// IREE_CPU_FEATURE_BIT(arch, field_index, bit_pos, bit_name, "llvm_name")
+//
+// Where:
+// - `arch` is the CPU architecture that this CPU feature applies to, in
+// IREE's uppercase convention (e.g. ARM_64, X86_64; see IREE_ARCH_*).
+// - `field_index` is the index into the array returned by `iree_cpu_data_fields()`.
+// Allowed values range from 0 to (IREE_CPU_DATA_FIELD_COUNT-1).
+// - `bit_pos` is the position of the feature bit within that cpu data field.
+// As these fields are uint64_t, the range of `bit_pos` is 0..63.
+// - `bit_name` is the suffix to use to form the IREE C identifier for this
+// feature's bit value.
+// - `llvm_name` is the string name of the corresponding LLVM target attribute
+// (without a leading +).
+
+//===----------------------------------------------------------------------===//
+// IREE_ARCH_ARM_64 / aarch64
+//===----------------------------------------------------------------------===//
+
+// TODO: add several common ARM ISA extensions and allocate some ranges of
+// bits for some families/eras. If we just start out with bits 0 and 1
+// allocated for dotprod and i8mm, we are quickly going to have a hard-to-read
+// enumeration here.
+IREE_CPU_FEATURE_BIT(ARM_64, 0, 0, DOTPROD, "dotprod")
+IREE_CPU_FEATURE_BIT(ARM_64, 0, 1, I8MM, "i8mm")
+
+//===----------------------------------------------------------------------===//
+// IREE_ARCH_X86_64 / x86-64
+//===----------------------------------------------------------------------===//
+
+// SSE features. Note: SSE and SSE2 are mandatory parts of X86-64.
+IREE_CPU_FEATURE_BIT(X86_64, 0, 0, SSE3, "sse3")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 1, SSSE3, "ssse3")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 2, SSE41, "sse4.1")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 3, SSE42, "sse4.2")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 4, SSE4A, "sse4a")
+
+// AVX features.
+IREE_CPU_FEATURE_BIT(X86_64, 0, 10, AVX, "avx")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 11, FMA3, "fma")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 12, FMA4, "fma4")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 13, XOP, "xop")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 14, F16C, "f16c")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 15, AVX2, "avx2")
+
+// AVX-512 features.
+IREE_CPU_FEATURE_BIT(X86_64, 0, 20, AVX512F, "avx512f")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 21, AVX512CD, "avx512cd")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 22, AVX512VL, "avx512vl")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 23, AVX512DQ, "avx512dq")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 24, AVX512BW, "avx512bw")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 25, AVX512IFMA, "avx512ifma")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 26, AVX512VBMI, "avx512vbmi")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 27, AVX512VPOPCNTDQ, "avx512vpopcntdq")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 28, AVX512VNNI, "avx512vnni")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 29, AVX512VBMI2, "avx512vbmi2")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 30, AVX512BITALG, "avx512bitalg")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 31, AVX512BF16, "avx512bf16")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 32, AVX512FP16, "avx512fp16")
+
+// AMX features.
+IREE_CPU_FEATURE_BIT(X86_64, 0, 50, AMXTILE, "amx-tile")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 51, AMXINT8, "amx-int8")
+IREE_CPU_FEATURE_BIT(X86_64, 0, 52, AMXBF16, "amx-bf16")
diff --git a/tools/BUILD b/tools/BUILD
index fff940c..3a37662 100644
--- a/tools/BUILD
+++ b/tools/BUILD
@@ -212,6 +212,16 @@
)
cc_binary(
+ name = "iree-cpuinfo",
+ srcs = ["iree-cpuinfo.c"],
+ deps = [
+ "//runtime/src/iree/base",
+ "//runtime/src/iree/base/internal:cpu",
+ "//runtime/src/iree/schemas:cpu_data",
+ ],
+)
+
+cc_binary(
name = "iree-tblgen",
srcs = [
"//compiler/src/iree/compiler/Dialect/VM/Tools:GenSrcs",
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index ea8d47c..50e56d1 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -187,6 +187,17 @@
yaml
)
+iree_cc_binary(
+ NAME
+ iree-cpuinfo
+ SRCS
+ "iree-cpuinfo.c"
+ DEPS
+ iree::base
+ iree::base::internal::cpu
+ iree::schemas::cpu_data
+)
+
if(IREE_BUILD_COMPILER)
# If a target backend that requires LLD to link codegen executables is
# enabled, install the target.
diff --git a/tools/iree-cpuinfo.c b/tools/iree-cpuinfo.c
new file mode 100644
index 0000000..d740465
--- /dev/null
+++ b/tools/iree-cpuinfo.c
@@ -0,0 +1,25 @@
+// Copyright 2023 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <stdio.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/cpu.h"
+
+int main(int argc, char *argv[]) {
+ iree_cpu_initialize(iree_allocator_system());
+
+#define IREE_CPU_FEATURE_BIT(arch, field_index, bit_pos, bit_name, llvm_name) \
+ if (IREE_ARCH_ENUM == IREE_ARCH_ENUM_##arch) { \
+ int64_t result = 0; \
+ IREE_CHECK_OK(iree_cpu_lookup_data_by_key(IREE_SV(llvm_name), &result)); \
+ printf("%-20s %ld\n", llvm_name, result); \
+ }
+#include "iree/schemas/cpu_feature_bits.inl"
+#undef IREE_CPU_FEATURE_BIT
+
+ return 0;
+}