Merge google -> main (#7935)
* 697b59f2a Merge pull request #7932 from GMNGeoffrey:main-to-google
* 62aa54afc Integrate LLVM at llvm/llvm-project@a3ea9052d6a1
* 629fb8869 Synchronize submodules with LLVM at llvm/llvm-project@55c71c9eac9b
* 3585ae827 Integrate LLVM at llvm/llvm-project@55c71c9eac9b
* 78bbf9a69 Integrate LLVM at llvm/llvm-project@6f1a501fddae
* 53328622e Integrate LLVM at llvm/llvm-project@7f9e9c7fc341
* 795ba9c7f Integrate LLVM at llvm/llvm-project@ec0e4545caa1
* 5aacc205d Integrate LLVM at llvm/llvm-project@65d7fd0239bf
diff --git a/benchmarks/README.md b/benchmarks/README.md
index 5ac8fd1..55f5d63 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -60,3 +60,32 @@
### Other project metrics
TODO(#6161): Collect metrics for miscellaneous IREE system states
+
+## Developer notes
+
+These are ad-hoc notes added for developers to help triage errors.
+
+### Repro of TFLite model errors
+
+These steps help reproduce the failures in TFLite models.
+
+1. Install `iree-import-tflite`.
+ ```
+ $ python -m pip install iree-tools-tflite-snapshot -f https://github.com/google/iree/releases
+ ```
+
+2. Confirm the binary `iree-import-tflite` is in your path by running
+ ```
+ $ iree-import-tflite --help
+ ```
+
+3. Download the TFLite flatbuffer for the failing benchmarks. The
+location can be found from [this CMakeLists.txt file](./TFLite/CMakeLists.txt)
+
+4. The input TOSA model can be generated by running
+ ```
+ $ iree-import-tflite <tflite file> -o <tosa output file>
+ ```
+
+5. The exact flags used to compile and run the benchmarks can be
+found in [this CMakeLists.txt file](./TFLite/CMakeLists.txt)
diff --git a/benchmarks/TFLite/CMakeLists.txt b/benchmarks/TFLite/CMakeLists.txt
index bd443a0..9702492 100644
--- a/benchmarks/TFLite/CMakeLists.txt
+++ b/benchmarks/TFLite/CMakeLists.txt
@@ -192,53 +192,55 @@
"--task_topology_group_count=1"
)
-iree_benchmark_suite(
- MODULES
- "${DEEPLABV3_FP32_MODULE}"
- "${MOBILESSD_FP32_MODULE}"
- "${POSENET_FP32_MODULE}"
- "${MOBILEBERT_FP32_MODULE}"
- "${MOBILENET_V2_MODULE}"
- "${MOBILENET_V3SMALL_MODULE}"
+# TODO(#7792): Re-enable these when we are able to run different benchmarks
+# depending on use-case (presubmit, postsubmit, nightly, etc.)
+# iree_benchmark_suite(
+# MODULES
+# "${DEEPLABV3_FP32_MODULE}"
+# "${MOBILESSD_FP32_MODULE}"
+# "${POSENET_FP32_MODULE}"
+# "${MOBILEBERT_FP32_MODULE}"
+# "${MOBILENET_V2_MODULE}"
+# "${MOBILENET_V3SMALL_MODULE}"
- BENCHMARK_MODES
- "2-thread,big-core,full-inference,default-flags"
- "2-thread,little-core,full-inference,default-flags"
- TARGET_BACKEND
- "dylib-llvm-aot"
- TARGET_ARCHITECTURE
- "CPU-ARM64-v8A"
- TRANSLATION_FLAGS
- ${ANDROID_CPU_TRANSLATION_FLAGS}
- DRIVER
- "dylib"
- RUNTIME_FLAGS
- "--task_topology_group_count=2"
-)
+# BENCHMARK_MODES
+# "2-thread,big-core,full-inference,default-flags"
+# "2-thread,little-core,full-inference,default-flags"
+# TARGET_BACKEND
+# "dylib-llvm-aot"
+# TARGET_ARCHITECTURE
+# "CPU-ARM64-v8A"
+# TRANSLATION_FLAGS
+# ${ANDROID_CPU_TRANSLATION_FLAGS}
+# DRIVER
+# "dylib"
+# RUNTIME_FLAGS
+# "--task_topology_group_count=2"
+# )
-iree_benchmark_suite(
- MODULES
- "${DEEPLABV3_FP32_MODULE}"
- "${MOBILESSD_FP32_MODULE}"
- "${POSENET_FP32_MODULE}"
- "${MOBILEBERT_FP32_MODULE}"
- "${MOBILENET_V2_MODULE}"
- "${MOBILENET_V3SMALL_MODULE}"
+# iree_benchmark_suite(
+# MODULES
+# "${DEEPLABV3_FP32_MODULE}"
+# "${MOBILESSD_FP32_MODULE}"
+# "${POSENET_FP32_MODULE}"
+# "${MOBILEBERT_FP32_MODULE}"
+# "${MOBILENET_V2_MODULE}"
+# "${MOBILENET_V3SMALL_MODULE}"
- BENCHMARK_MODES
- "3-thread,big-core,full-inference,default-flags"
- "3-thread,little-core,full-inference,default-flags"
- TARGET_BACKEND
- "dylib-llvm-aot"
- TARGET_ARCHITECTURE
- "CPU-ARM64-v8A"
- TRANSLATION_FLAGS
- ${ANDROID_CPU_TRANSLATION_FLAGS}
- DRIVER
- "dylib"
- RUNTIME_FLAGS
- "--task_topology_group_count=3"
-)
+# BENCHMARK_MODES
+# "3-thread,big-core,full-inference,default-flags"
+# "3-thread,little-core,full-inference,default-flags"
+# TARGET_BACKEND
+# "dylib-llvm-aot"
+# TARGET_ARCHITECTURE
+# "CPU-ARM64-v8A"
+# TRANSLATION_FLAGS
+# ${ANDROID_CPU_TRANSLATION_FLAGS}
+# DRIVER
+# "dylib"
+# RUNTIME_FLAGS
+# "--task_topology_group_count=3"
+# )
iree_benchmark_suite(
MODULES
@@ -369,6 +371,9 @@
"dylib-sync"
)
+# TODO(#7792): Consider re-enabling little-core experimental-flags if we start
+# optimizing for little cores or we can just run them occasionally
+
# CPU, Dylib, 1 through 4 threads, big/little-core, full-inference.
iree_benchmark_suite(
MODULES
@@ -381,7 +386,7 @@
BENCHMARK_MODES
"1-thread,big-core,full-inference,experimental-flags"
- "1-thread,little-core,full-inference,experimental-flags"
+ # "1-thread,little-core,full-inference,experimental-flags"
TARGET_BACKEND
"dylib-llvm-aot"
TARGET_ARCHITECTURE
@@ -396,57 +401,59 @@
"--task_topology_group_count=1"
)
-iree_benchmark_suite(
- MODULES
- "${DEEPLABV3_FP32_MODULE}"
- "${MOBILESSD_FP32_MODULE}"
- "${POSENET_FP32_MODULE}"
- "${MOBILEBERT_FP32_MODULE}"
- "${MOBILENET_V2_MODULE}"
- "${MOBILENET_V3SMALL_MODULE}"
+# TODO(#7792): Re-enable these when we are able to run different benchmarks
+# depending on use-case (presubmit, postsubmit, nightly, etc.)
+# iree_benchmark_suite(
+# MODULES
+# "${DEEPLABV3_FP32_MODULE}"
+# "${MOBILESSD_FP32_MODULE}"
+# "${POSENET_FP32_MODULE}"
+# "${MOBILEBERT_FP32_MODULE}"
+# "${MOBILENET_V2_MODULE}"
+# "${MOBILENET_V3SMALL_MODULE}"
- BENCHMARK_MODES
- "2-thread,big-core,full-inference,experimental-flags"
- "2-thread,little-core,full-inference,experimental-flags"
- TARGET_BACKEND
- "dylib-llvm-aot"
- TARGET_ARCHITECTURE
- "CPU-ARM64-v8A"
- TRANSLATION_FLAGS
- ${ANDROID_CPU_TRANSLATION_FLAGS}
- "--iree-flow-inline-constants-max-byte-length=2048"
- "--iree-llvm-loop-unrolling=true"
- DRIVER
- "dylib"
- RUNTIME_FLAGS
- "--task_topology_group_count=2"
-)
+# BENCHMARK_MODES
+# "2-thread,big-core,full-inference,experimental-flags"
+# "2-thread,little-core,full-inference,experimental-flags"
+# TARGET_BACKEND
+# "dylib-llvm-aot"
+# TARGET_ARCHITECTURE
+# "CPU-ARM64-v8A"
+# TRANSLATION_FLAGS
+# ${ANDROID_CPU_TRANSLATION_FLAGS}
+# "--iree-flow-inline-constants-max-byte-length=2048"
+# "--iree-llvm-loop-unrolling=true"
+# DRIVER
+# "dylib"
+# RUNTIME_FLAGS
+# "--task_topology_group_count=2"
+# )
-iree_benchmark_suite(
- MODULES
- "${DEEPLABV3_FP32_MODULE}"
- "${MOBILESSD_FP32_MODULE}"
- "${POSENET_FP32_MODULE}"
- "${MOBILEBERT_FP32_MODULE}"
- "${MOBILENET_V2_MODULE}"
- "${MOBILENET_V3SMALL_MODULE}"
+# iree_benchmark_suite(
+# MODULES
+# "${DEEPLABV3_FP32_MODULE}"
+# "${MOBILESSD_FP32_MODULE}"
+# "${POSENET_FP32_MODULE}"
+# "${MOBILEBERT_FP32_MODULE}"
+# "${MOBILENET_V2_MODULE}"
+# "${MOBILENET_V3SMALL_MODULE}"
- BENCHMARK_MODES
- "3-thread,big-core,full-inference,experimental-flags"
- "3-thread,little-core,full-inference,experimental-flags"
- TARGET_BACKEND
- "dylib-llvm-aot"
- TARGET_ARCHITECTURE
- "CPU-ARM64-v8A"
- TRANSLATION_FLAGS
- ${ANDROID_CPU_TRANSLATION_FLAGS}
- "--iree-flow-inline-constants-max-byte-length=2048"
- "--iree-llvm-loop-unrolling=true"
- DRIVER
- "dylib"
- RUNTIME_FLAGS
- "--task_topology_group_count=3"
-)
+# BENCHMARK_MODES
+# "3-thread,big-core,full-inference,experimental-flags"
+# "3-thread,little-core,full-inference,experimental-flags"
+# TARGET_BACKEND
+# "dylib-llvm-aot"
+# TARGET_ARCHITECTURE
+# "CPU-ARM64-v8A"
+# TRANSLATION_FLAGS
+# ${ANDROID_CPU_TRANSLATION_FLAGS}
+# "--iree-flow-inline-constants-max-byte-length=2048"
+# "--iree-llvm-loop-unrolling=true"
+# DRIVER
+# "dylib"
+# RUNTIME_FLAGS
+# "--task_topology_group_count=3"
+# )
iree_benchmark_suite(
MODULES
@@ -459,7 +466,7 @@
BENCHMARK_MODES
"4-thread,big-core,full-inference,experimental-flags"
- "4-thread,little-core,full-inference,experimental-flags"
+ # "4-thread,little-core,full-inference,experimental-flags"
TARGET_BACKEND
"dylib-llvm-aot"
TARGET_ARCHITECTURE