Merge pull request #7934 from google/benvanik-buffer-hooks
Adding iree_hal_allocator_t::deallocate_buffer.
diff --git a/SUBMODULE_VERSIONS.txt b/SUBMODULE_VERSIONS.txt
index 1daeded..696af44 100644
--- a/SUBMODULE_VERSIONS.txt
+++ b/SUBMODULE_VERSIONS.txt
@@ -4,14 +4,14 @@
aa533abfd4232b01f9e57041d70114d5a77e6de0 third_party/googletest
88b845dee001723c4a0db1fe5477de735b6d3bb0 third_party/liburing
f8f760f7387d2cc56a2fc7b1be313a3bf3f7f58c third_party/libyaml
-4f60a42878b0d46bc2cc84d8f0d316cac2c60c9d third_party/llvm-project
-cf097ee16b718cce7498747416772e1b3a7e9dc6 third_party/mlir-hlo
+a3ea9052d6a16b13607046df6a324403fb51888d third_party/llvm-project
+4d4adc2e0dd7368b1a1cad6d8ebd26f9476ecbf0 third_party/mlir-hlo
3f701faace7addc75d16dea8a6cd769fa5b3f260 third_party/musl
59aa99860c60bd171b9565e9920f125fdb749267 third_party/pybind11
e9cc6403341baf0edd430a4027b074d0a06b782f third_party/spirv_cross
d53b49635b7484e86959608a65a64d8121e6a385 third_party/spirv_headers
af1a5bc352164740c1cc1354942b1c6b72eacb8a third_party/stblib
-ef0b7c51b6cd0caac025bfe671e0b767e3413468 third_party/tensorflow
+f435ae9dee673e83504618b77e1be8cddda73e74 third_party/tensorflow
058e89011fceca912d43638ebb6b85992147fcfe third_party/tracy
9e62d027636cd7210f60d934f56107ed6e1579b8 third_party/vulkan_headers
5c8b3ba955f0dbb30d18afc420f3a38adc779231 third_party/vulkan_memory_allocator
diff --git a/benchmarks/README.md b/benchmarks/README.md
index 5ac8fd1..55f5d63 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -60,3 +60,32 @@
### Other project metrics
TODO(#6161): Collect metrics for miscellaneous IREE system states
+
+## Developer notes
+
+These are ad-hoc notes added for developers to help triage errors.
+
+### Repro of TFLite model errors
+
+These steps help reproduce the failures in TFLite models.
+
+1. Install `iree-import-tflite`.
+ ```
+ $ python -m pip install iree-tools-tflite-snapshot -f https://github.com/google/iree/releases
+ ```
+
+2. Confirm the binary `iree-import-tflite` is in your path by running
+ ```
+ $ iree-import-tflite --help
+ ```
+
+3. Download the TFLite flatbuffer for the failing benchmarks. The
+location can be found from [this CMakeLists.txt file](./TFLite/CMakeLists.txt)
+
+4. The input TOSA model can be generated by running
+ ```
+ $ iree-import-tflite <tflite file> -o <tosa output file>
+ ```
+
+5. The exact flags used to compile and run the benchmarks can be
+found in [this CMakeLists.txt file](./TFLite/CMakeLists.txt)
diff --git a/benchmarks/TFLite/CMakeLists.txt b/benchmarks/TFLite/CMakeLists.txt
index bd443a0..9702492 100644
--- a/benchmarks/TFLite/CMakeLists.txt
+++ b/benchmarks/TFLite/CMakeLists.txt
@@ -192,53 +192,55 @@
"--task_topology_group_count=1"
)
-iree_benchmark_suite(
- MODULES
- "${DEEPLABV3_FP32_MODULE}"
- "${MOBILESSD_FP32_MODULE}"
- "${POSENET_FP32_MODULE}"
- "${MOBILEBERT_FP32_MODULE}"
- "${MOBILENET_V2_MODULE}"
- "${MOBILENET_V3SMALL_MODULE}"
+# TODO(#7792): Re-enable these when we are able to run different benchmarks
+# depending on use-case (presubmit, postsubmit, nightly, etc.)
+# iree_benchmark_suite(
+# MODULES
+# "${DEEPLABV3_FP32_MODULE}"
+# "${MOBILESSD_FP32_MODULE}"
+# "${POSENET_FP32_MODULE}"
+# "${MOBILEBERT_FP32_MODULE}"
+# "${MOBILENET_V2_MODULE}"
+# "${MOBILENET_V3SMALL_MODULE}"
- BENCHMARK_MODES
- "2-thread,big-core,full-inference,default-flags"
- "2-thread,little-core,full-inference,default-flags"
- TARGET_BACKEND
- "dylib-llvm-aot"
- TARGET_ARCHITECTURE
- "CPU-ARM64-v8A"
- TRANSLATION_FLAGS
- ${ANDROID_CPU_TRANSLATION_FLAGS}
- DRIVER
- "dylib"
- RUNTIME_FLAGS
- "--task_topology_group_count=2"
-)
+# BENCHMARK_MODES
+# "2-thread,big-core,full-inference,default-flags"
+# "2-thread,little-core,full-inference,default-flags"
+# TARGET_BACKEND
+# "dylib-llvm-aot"
+# TARGET_ARCHITECTURE
+# "CPU-ARM64-v8A"
+# TRANSLATION_FLAGS
+# ${ANDROID_CPU_TRANSLATION_FLAGS}
+# DRIVER
+# "dylib"
+# RUNTIME_FLAGS
+# "--task_topology_group_count=2"
+# )
-iree_benchmark_suite(
- MODULES
- "${DEEPLABV3_FP32_MODULE}"
- "${MOBILESSD_FP32_MODULE}"
- "${POSENET_FP32_MODULE}"
- "${MOBILEBERT_FP32_MODULE}"
- "${MOBILENET_V2_MODULE}"
- "${MOBILENET_V3SMALL_MODULE}"
+# iree_benchmark_suite(
+# MODULES
+# "${DEEPLABV3_FP32_MODULE}"
+# "${MOBILESSD_FP32_MODULE}"
+# "${POSENET_FP32_MODULE}"
+# "${MOBILEBERT_FP32_MODULE}"
+# "${MOBILENET_V2_MODULE}"
+# "${MOBILENET_V3SMALL_MODULE}"
- BENCHMARK_MODES
- "3-thread,big-core,full-inference,default-flags"
- "3-thread,little-core,full-inference,default-flags"
- TARGET_BACKEND
- "dylib-llvm-aot"
- TARGET_ARCHITECTURE
- "CPU-ARM64-v8A"
- TRANSLATION_FLAGS
- ${ANDROID_CPU_TRANSLATION_FLAGS}
- DRIVER
- "dylib"
- RUNTIME_FLAGS
- "--task_topology_group_count=3"
-)
+# BENCHMARK_MODES
+# "3-thread,big-core,full-inference,default-flags"
+# "3-thread,little-core,full-inference,default-flags"
+# TARGET_BACKEND
+# "dylib-llvm-aot"
+# TARGET_ARCHITECTURE
+# "CPU-ARM64-v8A"
+# TRANSLATION_FLAGS
+# ${ANDROID_CPU_TRANSLATION_FLAGS}
+# DRIVER
+# "dylib"
+# RUNTIME_FLAGS
+# "--task_topology_group_count=3"
+# )
iree_benchmark_suite(
MODULES
@@ -369,6 +371,9 @@
"dylib-sync"
)
+# TODO(#7792): Consider re-enabling little-core experimental-flags if we start
+# optimizing for little cores or we can just run them occasionally
+
# CPU, Dylib, 1 through 4 threads, big/little-core, full-inference.
iree_benchmark_suite(
MODULES
@@ -381,7 +386,7 @@
BENCHMARK_MODES
"1-thread,big-core,full-inference,experimental-flags"
- "1-thread,little-core,full-inference,experimental-flags"
+ # "1-thread,little-core,full-inference,experimental-flags"
TARGET_BACKEND
"dylib-llvm-aot"
TARGET_ARCHITECTURE
@@ -396,57 +401,59 @@
"--task_topology_group_count=1"
)
-iree_benchmark_suite(
- MODULES
- "${DEEPLABV3_FP32_MODULE}"
- "${MOBILESSD_FP32_MODULE}"
- "${POSENET_FP32_MODULE}"
- "${MOBILEBERT_FP32_MODULE}"
- "${MOBILENET_V2_MODULE}"
- "${MOBILENET_V3SMALL_MODULE}"
+# TODO(#7792): Re-enable these when we are able to run different benchmarks
+# depending on use-case (presubmit, postsubmit, nightly, etc.)
+# iree_benchmark_suite(
+# MODULES
+# "${DEEPLABV3_FP32_MODULE}"
+# "${MOBILESSD_FP32_MODULE}"
+# "${POSENET_FP32_MODULE}"
+# "${MOBILEBERT_FP32_MODULE}"
+# "${MOBILENET_V2_MODULE}"
+# "${MOBILENET_V3SMALL_MODULE}"
- BENCHMARK_MODES
- "2-thread,big-core,full-inference,experimental-flags"
- "2-thread,little-core,full-inference,experimental-flags"
- TARGET_BACKEND
- "dylib-llvm-aot"
- TARGET_ARCHITECTURE
- "CPU-ARM64-v8A"
- TRANSLATION_FLAGS
- ${ANDROID_CPU_TRANSLATION_FLAGS}
- "--iree-flow-inline-constants-max-byte-length=2048"
- "--iree-llvm-loop-unrolling=true"
- DRIVER
- "dylib"
- RUNTIME_FLAGS
- "--task_topology_group_count=2"
-)
+# BENCHMARK_MODES
+# "2-thread,big-core,full-inference,experimental-flags"
+# "2-thread,little-core,full-inference,experimental-flags"
+# TARGET_BACKEND
+# "dylib-llvm-aot"
+# TARGET_ARCHITECTURE
+# "CPU-ARM64-v8A"
+# TRANSLATION_FLAGS
+# ${ANDROID_CPU_TRANSLATION_FLAGS}
+# "--iree-flow-inline-constants-max-byte-length=2048"
+# "--iree-llvm-loop-unrolling=true"
+# DRIVER
+# "dylib"
+# RUNTIME_FLAGS
+# "--task_topology_group_count=2"
+# )
-iree_benchmark_suite(
- MODULES
- "${DEEPLABV3_FP32_MODULE}"
- "${MOBILESSD_FP32_MODULE}"
- "${POSENET_FP32_MODULE}"
- "${MOBILEBERT_FP32_MODULE}"
- "${MOBILENET_V2_MODULE}"
- "${MOBILENET_V3SMALL_MODULE}"
+# iree_benchmark_suite(
+# MODULES
+# "${DEEPLABV3_FP32_MODULE}"
+# "${MOBILESSD_FP32_MODULE}"
+# "${POSENET_FP32_MODULE}"
+# "${MOBILEBERT_FP32_MODULE}"
+# "${MOBILENET_V2_MODULE}"
+# "${MOBILENET_V3SMALL_MODULE}"
- BENCHMARK_MODES
- "3-thread,big-core,full-inference,experimental-flags"
- "3-thread,little-core,full-inference,experimental-flags"
- TARGET_BACKEND
- "dylib-llvm-aot"
- TARGET_ARCHITECTURE
- "CPU-ARM64-v8A"
- TRANSLATION_FLAGS
- ${ANDROID_CPU_TRANSLATION_FLAGS}
- "--iree-flow-inline-constants-max-byte-length=2048"
- "--iree-llvm-loop-unrolling=true"
- DRIVER
- "dylib"
- RUNTIME_FLAGS
- "--task_topology_group_count=3"
-)
+# BENCHMARK_MODES
+# "3-thread,big-core,full-inference,experimental-flags"
+# "3-thread,little-core,full-inference,experimental-flags"
+# TARGET_BACKEND
+# "dylib-llvm-aot"
+# TARGET_ARCHITECTURE
+# "CPU-ARM64-v8A"
+# TRANSLATION_FLAGS
+# ${ANDROID_CPU_TRANSLATION_FLAGS}
+# "--iree-flow-inline-constants-max-byte-length=2048"
+# "--iree-llvm-loop-unrolling=true"
+# DRIVER
+# "dylib"
+# RUNTIME_FLAGS
+# "--task_topology_group_count=3"
+# )
iree_benchmark_suite(
MODULES
@@ -459,7 +466,7 @@
BENCHMARK_MODES
"4-thread,big-core,full-inference,experimental-flags"
- "4-thread,little-core,full-inference,experimental-flags"
+ # "4-thread,little-core,full-inference,experimental-flags"
TARGET_BACKEND
"dylib-llvm-aot"
TARGET_ARCHITECTURE
diff --git a/iree/compiler/Dialect/Flow/Transforms/Passes.cpp b/iree/compiler/Dialect/Flow/Transforms/Passes.cpp
index c40e2ed..f1a7f30 100644
--- a/iree/compiler/Dialect/Flow/Transforms/Passes.cpp
+++ b/iree/compiler/Dialect/Flow/Transforms/Passes.cpp
@@ -123,6 +123,7 @@
// Input should now be legal.
.addPass(createVerifyInputLegalityPass);
+ passManager.addPass(mlir::createLinalgNamedOpConversionPass());
buildGlobalOptimizationPassPipeline(passManager, transformOptions);
// Perform cleanup after variable simplification as more canonicalizers may be
@@ -143,7 +144,6 @@
.addPass(mlir::createCSEPass)
.addPredicatedPass(clEnableLinalgDetensorize,
mlir::createLinalgDetensorizePass)
-
// Dispatch region formation.
.addPass(createConvertToFlowBeforeDispatchFormation)
.addPass(mlir::createCanonicalizerPass)
diff --git a/third_party/llvm-project b/third_party/llvm-project
index 4f60a42..a3ea905 160000
--- a/third_party/llvm-project
+++ b/third_party/llvm-project
@@ -1 +1 @@
-Subproject commit 4f60a42878b0d46bc2cc84d8f0d316cac2c60c9d
+Subproject commit a3ea9052d6a16b13607046df6a324403fb51888d
diff --git a/third_party/mlir-hlo b/third_party/mlir-hlo
index cf097ee..4d4adc2 160000
--- a/third_party/mlir-hlo
+++ b/third_party/mlir-hlo
@@ -1 +1 @@
-Subproject commit cf097ee16b718cce7498747416772e1b3a7e9dc6
+Subproject commit 4d4adc2e0dd7368b1a1cad6d8ebd26f9476ecbf0
diff --git a/third_party/tensorflow b/third_party/tensorflow
index ef0b7c5..f435ae9 160000
--- a/third_party/tensorflow
+++ b/third_party/tensorflow
@@ -1 +1 @@
-Subproject commit ef0b7c51b6cd0caac025bfe671e0b767e3413468
+Subproject commit f435ae9dee673e83504618b77e1be8cddda73e74