Update benchmarks to use "llvm-cpu" and "local-task" names consistently. (#9942)

diff --git a/benchmarks/TF/linux-x86_64.cmake b/benchmarks/TF/linux-x86_64.cmake
index c656dba..053f590 100644
--- a/benchmarks/TF/linux-x86_64.cmake
+++ b/benchmarks/TF/linux-x86_64.cmake
@@ -22,7 +22,7 @@
   "--iree-llvm-target-triple=x86_64-unknown-linux-gnu"
 )
 
-# CPU, Dylib-Sync, x86_64, full-inference
+# CPU, LLVM, local-sync, x86_64, full-inference
 iree_benchmark_suite(
   GROUP_NAME
     "linux-x86_64"
@@ -33,7 +33,7 @@
   BENCHMARK_MODES
     "full-inference,default-flags"
   TARGET_BACKEND
-    "dylib-llvm-aot"
+    "llvm-cpu"
   TARGET_ARCHITECTURE
     "CPU-x86_64-CascadeLake"
   COMPILATION_FLAGS
@@ -41,12 +41,12 @@
   BENCHMARK_TOOL
     iree-benchmark-module
   CONFIG
-    "iree-dylib-sync"
+    "iree-llvm-cpu-sync"
   DRIVER
     "local-sync"
 )
 
-# CPU, Dylib, 1 thread, x86_64, full-inference
+# CPU, LLVM, local-task, 1 thread, x86_64, full-inference
 iree_benchmark_suite(
   GROUP_NAME
     "linux-x86_64"
@@ -57,7 +57,7 @@
   BENCHMARK_MODES
     "1-thread,full-inference,default-flags"
   TARGET_BACKEND
-    "dylib-llvm-aot"
+    "llvm-cpu"
   TARGET_ARCHITECTURE
     "CPU-x86_64-CascadeLake"
   COMPILATION_FLAGS
@@ -65,14 +65,14 @@
   BENCHMARK_TOOL
     iree-benchmark-module
   CONFIG
-    "iree-dylib"
+    "iree-llvm-cpu"
   DRIVER
     "local-task"
   RUNTIME_FLAGS
     "--task_topology_group_count=1"
 )
 
-# CPU, Dylib, 4 threads, x86_64, full-inference
+# CPU, LLVM, local-task, 4 threads, x86_64, full-inference
 iree_benchmark_suite(
   GROUP_NAME
     "linux-x86_64"
@@ -83,7 +83,7 @@
   BENCHMARK_MODES
     "4-thread,full-inference,default-flags"
   TARGET_BACKEND
-    "dylib-llvm-aot"
+    "llvm-cpu"
   TARGET_ARCHITECTURE
     "CPU-x86_64-CascadeLake"
   COMPILATION_FLAGS
@@ -91,14 +91,14 @@
   BENCHMARK_TOOL
     iree-benchmark-module
   CONFIG
-    "iree-dylib"
+    "iree-llvm-cpu"
   DRIVER
     "local-task"
   RUNTIME_FLAGS
     "--task_topology_group_count=4"
 )
 
-# CPU, Dylib, 8 threads, x86_64, full-inference
+# CPU, LLVM, local-task, 8 threads, x86_64, full-inference
 iree_benchmark_suite(
   GROUP_NAME
     "linux-x86_64"
@@ -109,7 +109,7 @@
   BENCHMARK_MODES
     "8-thread,full-inference,default-flags"
   TARGET_BACKEND
-    "dylib-llvm-aot"
+    "llvm-cpu"
   TARGET_ARCHITECTURE
     "CPU-x86_64-CascadeLake"
   COMPILATION_FLAGS
@@ -117,7 +117,7 @@
   BENCHMARK_TOOL
     iree-benchmark-module
   CONFIG
-    "iree-dylib"
+    "iree-llvm-cpu"
   DRIVER
     "local-task"
   RUNTIME_FLAGS
diff --git a/benchmarks/TFLite/android-arm64-v8a.cmake b/benchmarks/TFLite/android-arm64-v8a.cmake
index 82b972d..bae325b 100644
--- a/benchmarks/TFLite/android-arm64-v8a.cmake
+++ b/benchmarks/TFLite/android-arm64-v8a.cmake
@@ -20,7 +20,7 @@
   "--iree-input-type=tosa"
   "--iree-llvm-target-triple=aarch64-none-linux-android29")
 
-# CPU, Dylib-Sync, big/little-core, full-inference
+# CPU, LLVM, local-sync, big/little-core, full-inference
 iree_benchmark_suite(
   GROUP_NAME
     "android-arm64-v8a"
@@ -38,7 +38,7 @@
     "big-core,full-inference,default-flags"
     "little-core,full-inference,default-flags"
   TARGET_BACKEND
-    "dylib-llvm-aot"
+    "llvm-cpu"
   TARGET_ARCHITECTURE
     "CPU-ARM64-v8A"
   COMPILATION_FLAGS
@@ -46,12 +46,12 @@
   BENCHMARK_TOOL
     iree-benchmark-module
   CONFIG
-    "iree-dylib-sync"
+    "iree-llvm-cpu-sync"
   DRIVER
     "local-sync"
 )
 
-# CPU, Dylib, 1 through 4 threads, big/little-core, full-inference.
+# CPU, LLVM, local-task, 1 through 4 threads, big/little-core, full-inference.
 iree_benchmark_suite(
   GROUP_NAME
     "android-arm64-v8a"
@@ -69,7 +69,7 @@
     "1-thread,big-core,full-inference,default-flags"
     "1-thread,little-core,full-inference,default-flags"
   TARGET_BACKEND
-    "dylib-llvm-aot"
+    "llvm-cpu"
   TARGET_ARCHITECTURE
     "CPU-ARM64-v8A"
   COMPILATION_FLAGS
@@ -77,7 +77,7 @@
   BENCHMARK_TOOL
     iree-benchmark-module
   CONFIG
-    "iree-dylib"
+    "iree-llvm-cpu"
   DRIVER
     "local-task"
   RUNTIME_FLAGS
@@ -102,7 +102,7 @@
 #     "2-thread,big-core,full-inference,default-flags"
 #     "2-thread,little-core,full-inference,default-flags"
 #   TARGET_BACKEND
-#     "dylib-llvm-aot"
+#     "llvm-cpu"
 #   TARGET_ARCHITECTURE
 #     "CPU-ARM64-v8A"
 #   COMPILATION_FLAGS
@@ -110,7 +110,7 @@
 #   BENCHMARK_TOOL
 #     iree-benchmark-module
 #   CONFIG
-#    "iree-dylib"
+#    "iree-llvm-cpu"
 #   DRIVER
 #     "local-task"
 #   RUNTIME_FLAGS
@@ -133,7 +133,7 @@
 #     "3-thread,big-core,full-inference,default-flags"
 #     "3-thread,little-core,full-inference,default-flags"
 #   TARGET_BACKEND
-#     "dylib-llvm-aot"
+#     "llvm-cpu"
 #   TARGET_ARCHITECTURE
 #     "CPU-ARM64-v8A"
 #   COMPILATION_FLAGS
@@ -141,7 +141,7 @@
 #   BENCHMARK_TOOL
 #     iree-benchmark-module
 #   CONFIG
-#    "iree-dylib"
+#    "iree-llvm-cpu"
 #   DRIVER
 #     "local-task"
 #   RUNTIME_FLAGS
@@ -165,7 +165,7 @@
     "4-thread,big-core,full-inference,default-flags"
     "4-thread,little-core,full-inference,default-flags"
   TARGET_BACKEND
-    "dylib-llvm-aot"
+    "llvm-cpu"
   TARGET_ARCHITECTURE
     "CPU-ARM64-v8A"
   COMPILATION_FLAGS
@@ -173,7 +173,7 @@
   BENCHMARK_TOOL
     iree-benchmark-module
   CONFIG
-    "iree-dylib"
+    "iree-llvm-cpu"
   DRIVER
     "local-task"
   RUNTIME_FLAGS
@@ -196,7 +196,7 @@
 #                                                                              #
 ################################################################################
 
-# CPU, Dylib-Sync, big/little-core, full-inference
+# CPU, LLVM, local-sync, big/little-core, full-inference
 # NOTE: this is not enabling any SIMD extension beyond baseline Aarch64.
 # At the moment we use that for fp32 models. We would change that when new
 # devices support relevant fp32 SIMD extensions beyond that (e.g. +f32mm).
@@ -216,7 +216,7 @@
     "big-core,full-inference,experimental-flags"
     "little-core,full-inference,experimental-flags"
   TARGET_BACKEND
-    "dylib-llvm-aot"
+    "llvm-cpu"
   TARGET_ARCHITECTURE
     "CPU-ARM64-v8A"
   COMPILATION_FLAGS
@@ -225,12 +225,12 @@
   BENCHMARK_TOOL
     iree-benchmark-module
   CONFIG
-    "iree-dylib-sync"
+    "iree-llvm-cpu-sync"
   DRIVER
     "local-sync"
 )
 
-# CPU, Dylib-Sync, big/little-core, full-inference, +dotprod
+# CPU, LLVM, local-sync, big/little-core, full-inference, +dotprod
 # NOTE: +dotprod is only relevant to int8, not fp32.
 # TODO: add a +i8mm variant, supported by new devices already. No rush: our i8mm
 # kernel is currently naive, not ready for benchmarking.
@@ -245,7 +245,7 @@
     "big-core,full-inference,experimental-flags"
     "little-core,full-inference,experimental-flags"
   TARGET_BACKEND
-    "dylib-llvm-aot"
+    "llvm-cpu"
   TARGET_ARCHITECTURE
     "CPU-ARM64-v8A"
   COMPILATION_FLAGS
@@ -255,7 +255,7 @@
   BENCHMARK_TOOL
     iree-benchmark-module
   CONFIG
-    "iree-dylib-sync"
+    "iree-llvm-cpu-sync"
   DRIVER
     "local-sync"
 )
@@ -263,7 +263,7 @@
 # TODO(#7792): Consider re-enabling little-core experimental-flags if we start
 # optimizing for little cores or we can just run them occasionally
 
-# CPU, Dylib, 1 through 4 threads, big/little-core, full-inference.
+# CPU, LLVM, local-task, 1 through 4 threads, big/little-core, full-inference.
 # NOTE: this is not enabling any SIMD extension beyond baseline Aarch64.
 # At the moment we use that for fp32 models. We would change that when new
 # devices support relevant fp32 SIMD extensions beyond that (e.g. f32mm).
@@ -283,7 +283,7 @@
     "1-thread,big-core,full-inference,experimental-flags"
     # "1-thread,little-core,full-inference,experimental-flags"
   TARGET_BACKEND
-    "dylib-llvm-aot"
+    "llvm-cpu"
   TARGET_ARCHITECTURE
     "CPU-ARM64-v8A"
   COMPILATION_FLAGS
@@ -292,14 +292,14 @@
   BENCHMARK_TOOL
     iree-benchmark-module
   CONFIG
-    "iree-dylib"
+    "iree-llvm-cpu"
   DRIVER
     "local-task"
   RUNTIME_FLAGS
     "--task_topology_group_count=1"
 )
 
-# CPU, Dylib, 1 through 4 threads, big/little-core, full-inference, +dotprod
+# CPU, LLVM, local-task, 1 through 4 threads, big/little-core, full-inference, +dotprod
 # NOTE: +dotprod is only relevant to int8, not fp32.
 # TODO: add a +i8mm variant, supported by new devices already. No rush: our i8mm
 # kernel is currently naive, not ready for benchmarking.
@@ -314,7 +314,7 @@
     "1-thread,big-core,full-inference,experimental-flags"
     # "1-thread,little-core,full-inference,experimental-flags"
   TARGET_BACKEND
-    "dylib-llvm-aot"
+    "llvm-cpu"
   TARGET_ARCHITECTURE
     "CPU-ARM64-v8A"
   COMPILATION_FLAGS
@@ -324,7 +324,7 @@
   BENCHMARK_TOOL
     iree-benchmark-module
   CONFIG
-    "iree-dylib"
+    "iree-llvm-cpu"
   DRIVER
     "local-task"
   RUNTIME_FLAGS
@@ -349,7 +349,7 @@
 #     "2-thread,big-core,full-inference,experimental-flags"
 #     "2-thread,little-core,full-inference,experimental-flags"
 #   TARGET_BACKEND
-#     "dylib-llvm-aot"
+#     "llvm-cpu"
 #   TARGET_ARCHITECTURE
 #     "CPU-ARM64-v8A"
 #   COMPILATION_FLAGS
@@ -358,7 +358,7 @@
 #   BENCHMARK_TOOL
 #     iree-benchmark-module
 #   CONFIG
-#    "iree-dylib"
+#    "iree-llvm-cpu"
 #   DRIVER
 #     "local-task"
 #   RUNTIME_FLAGS
@@ -381,7 +381,7 @@
 #     "3-thread,big-core,full-inference,experimental-flags"
 #     "3-thread,little-core,full-inference,experimental-flags"
 #   TARGET_BACKEND
-#     "dylib-llvm-aot"
+#     "llvm-cpu"
 #   TARGET_ARCHITECTURE
 #     "CPU-ARM64-v8A"
 #   COMPILATION_FLAGS
@@ -390,14 +390,14 @@
 #   BENCHMARK_TOOL
 #     iree-benchmark-module
 #   CONFIG
-#    "iree-dylib"
+#    "iree-llvm-cpu"
 #   DRIVER
 #     "local-task"
 #   RUNTIME_FLAGS
 #     "--task_topology_group_count=3"
 # )
 
-# CPU, Dylib, 1 through 4 threads, big/little-core, full-inference.
+# CPU, LLVM, local-task, 1 through 4 threads, big/little-core, full-inference.
 # NOTE: this is not enabling any SIMD extension beyond baseline Aarch64.
 # At the moment we use that for fp32 models. We would change that when new
 # devices support relevant fp32 SIMD extensions beyond that (e.g. f32mm).
@@ -417,7 +417,7 @@
     "4-thread,big-core,full-inference,experimental-flags"
     # "4-thread,little-core,full-inference,experimental-flags"
   TARGET_BACKEND
-    "dylib-llvm-aot"
+    "llvm-cpu"
   TARGET_ARCHITECTURE
     "CPU-ARM64-v8A"
   COMPILATION_FLAGS
@@ -427,14 +427,14 @@
   BENCHMARK_TOOL
     iree-benchmark-module
   CONFIG
-    "iree-dylib"
+    "iree-llvm-cpu"
   DRIVER
     "local-task"
   RUNTIME_FLAGS
     "--task_topology_group_count=4"
 )
 
-# CPU, Dylib-Sync, big/little-core, full-inference, +dotprod
+# CPU, LLVM, local-sync, big/little-core, full-inference, +dotprod
 # NOTE: +dotprod is only relevant to int8, not fp32.
 # TODO: add a +i8mm variant, supported by new devices already. No rush: our i8mm
 # kernel is currently naive, not ready for benchmarking.
@@ -449,7 +449,7 @@
     "4-thread,big-core,full-inference,experimental-flags"
     # "4-thread,little-core,full-inference,experimental-flags"
   TARGET_BACKEND
-    "dylib-llvm-aot"
+    "llvm-cpu"
   TARGET_ARCHITECTURE
     "CPU-ARM64-v8A"
   COMPILATION_FLAGS
@@ -460,7 +460,7 @@
   BENCHMARK_TOOL
     iree-benchmark-module
   CONFIG
-    "iree-dylib"
+    "iree-llvm-cpu"
   DRIVER
     "local-task"
   RUNTIME_FLAGS
diff --git a/benchmarks/TFLite/linux-riscv.cmake b/benchmarks/TFLite/linux-riscv.cmake
index 9aacd4c..0d84005 100644
--- a/benchmarks/TFLite/linux-riscv.cmake
+++ b/benchmarks/TFLite/linux-riscv.cmake
@@ -36,7 +36,7 @@
   "--riscv-v-fixed-length-vector-lmul-max=8"
 )
 
-# CPU, Dylib-Sync, RV64-Generic, full-inference
+# CPU, LLVM, local-sync, RV64-Generic, full-inference
 iree_benchmark_suite(
   GROUP_NAME
     "linux-riscv"
@@ -52,7 +52,7 @@
   BENCHMARK_MODES
     "full-inference,default-flags"
   TARGET_BACKEND
-    "dylib-llvm-aot"
+    "llvm-cpu"
   TARGET_ARCHITECTURE
     "CPU-RV64-Generic"
   COMPILATION_FLAGS
@@ -60,12 +60,12 @@
   BENCHMARK_TOOL
     iree-benchmark-module
   CONFIG
-    "iree-dylib-sync"
+    "iree-llvm-cpu-sync"
   DRIVER
     "local-sync"
 )
 
-# CPU, Dylib-Sync, RV32-Generic, full-inference
+# CPU, LLVM, local-sync, RV32-Generic, full-inference
 # Note this target is for codegen only. Inference is only possible with
 # the cross-compiled runtime and an emulator.
 iree_benchmark_suite(
@@ -80,7 +80,7 @@
   BENCHMARK_MODES
     "full-inference,default-flags"
   TARGET_BACKEND
-    "dylib-llvm-aot"
+    "llvm-cpu"
   TARGET_ARCHITECTURE
     "CPU-RV32-Generic"
   COMPILATION_FLAGS
@@ -88,7 +88,7 @@
   BENCHMARK_TOOL
     iree-benchmark-module
   CONFIG
-    "iree-dylib-sync"
+    "iree-llvm-cpu-sync"
   DRIVER
-    "dylib-sync"
+    "local-sync"
 )
diff --git a/benchmarks/TFLite/linux-x86_64.cmake b/benchmarks/TFLite/linux-x86_64.cmake
index fa8d64c..79dcb23 100644
--- a/benchmarks/TFLite/linux-x86_64.cmake
+++ b/benchmarks/TFLite/linux-x86_64.cmake
@@ -22,7 +22,7 @@
   "--iree-llvm-target-triple=x86_64-unknown-linux-gnu"
 )
 
-# CPU, Dylib-Sync, x86_64, full-inference
+# CPU, LLVM, local-sync, x86_64, full-inference
 iree_benchmark_suite(
   GROUP_NAME
     "linux-x86_64"
@@ -41,7 +41,7 @@
   BENCHMARK_MODES
     "full-inference,default-flags"
   TARGET_BACKEND
-    "dylib-llvm-aot"
+    "llvm-cpu"
   TARGET_ARCHITECTURE
     "CPU-x86_64-CascadeLake"
   COMPILATION_FLAGS
@@ -49,12 +49,12 @@
   BENCHMARK_TOOL
     iree-benchmark-module
   CONFIG
-    "iree-dylib-sync"
+    "iree-llvm-cpu-sync"
   DRIVER
     "local-sync"
 )
 
-# CPU, Dylib, 1 thread, x86_64, full-inference
+# CPU, LLVM, local-task, 1 thread, x86_64, full-inference
 iree_benchmark_suite(
   GROUP_NAME
     "linux-x86_64"
@@ -73,7 +73,7 @@
   BENCHMARK_MODES
     "1-thread,full-inference,default-flags"
   TARGET_BACKEND
-    "dylib-llvm-aot"
+    "llvm-cpu"
   TARGET_ARCHITECTURE
     "CPU-x86_64-CascadeLake"
   COMPILATION_FLAGS
@@ -81,14 +81,14 @@
   BENCHMARK_TOOL
     iree-benchmark-module
   CONFIG
-    "iree-dylib"
+    "iree-llvm-cpu"
   DRIVER
     "local-task"
   RUNTIME_FLAGS
     "--task_topology_group_count=1"
 )
 
-# CPU, Dylib, 4 threads, x86_64, full-inference
+# CPU, LLVM, local-task, 4 threads, x86_64, full-inference
 iree_benchmark_suite(
   GROUP_NAME
     "linux-x86_64"
@@ -107,7 +107,7 @@
   BENCHMARK_MODES
     "4-thread,full-inference,default-flags"
   TARGET_BACKEND
-    "dylib-llvm-aot"
+    "llvm-cpu"
   TARGET_ARCHITECTURE
     "CPU-x86_64-CascadeLake"
   COMPILATION_FLAGS
@@ -115,14 +115,14 @@
   BENCHMARK_TOOL
     iree-benchmark-module
   CONFIG
-    "iree-dylib"
+    "iree-llvm-cpu"
   DRIVER
     "local-task"
   RUNTIME_FLAGS
     "--task_topology_group_count=4"
 )
 
-# CPU, Dylib, 8 threads, x86_64, full-inference
+# CPU, LLVM, local-task, 8 threads, x86_64, full-inference
 iree_benchmark_suite(
   GROUP_NAME
     "linux-x86_64"
@@ -141,7 +141,7 @@
   BENCHMARK_MODES
     "8-thread,full-inference,default-flags"
   TARGET_BACKEND
-    "dylib-llvm-aot"
+    "llvm-cpu"
   TARGET_ARCHITECTURE
     "CPU-x86_64-CascadeLake"
   COMPILATION_FLAGS
@@ -149,7 +149,7 @@
   BENCHMARK_TOOL
     iree-benchmark-module
   CONFIG
-    "iree-dylib"
+    "iree-llvm-cpu"
   DRIVER
     "local-task"
   RUNTIME_FLAGS
diff --git a/build_tools/benchmarks/README.md b/build_tools/benchmarks/README.md
index ca01f9d..6560778 100644
--- a/build_tools/benchmarks/README.md
+++ b/build_tools/benchmarks/README.md
@@ -44,7 +44,7 @@
 ./run_benchmarks_on_linux.py \
   --normal_benchmark_tool_dir=$IREE_NORMAL_TOOL_DIR \
   --model_name_regex="MobileBertSquad" \
-  --driver_filter_regex="dylib" \
+  --driver_filter_regex="local-task" \
   --mode_regex="4-threads" \
   --output=results.json $IREE_BUILD_DIR
 ```
diff --git a/build_tools/benchmarks/common/benchmark_definition.py b/build_tools/benchmarks/common/benchmark_definition.py
index 922ec58..e0cb81d 100644
--- a/build_tools/benchmarks/common/benchmark_definition.py
+++ b/build_tools/benchmarks/common/benchmark_definition.py
@@ -47,7 +47,7 @@
   """An object describing a IREE HAL driver.
 
   It includes the following characteristics:
-  - pretty_name: the pretty name, e.g., 'IREE-DyLib'
+  - pretty_name: the pretty name, e.g., 'IREE-LLVM-CPU'
   - device_type: the targeted device type, e.g., 'CPU'
   - driver_name: runtime driver flag, e.g., 'local-task'
   - loader_name: executable loader name, if used
@@ -63,10 +63,10 @@
 # mapping to more friendly ones and detach to keep driver names used in
 # benchmark presentation stable.
 IREE_DRIVERS_INFOS = {
-    "iree-dylib":
-        DriverInfo("IREE-Dylib", "CPU", "local-task", "embedded-elf"),
-    "iree-dylib-sync":
-        DriverInfo("IREE-Dylib-Sync", "CPU", "local-sync", "embedded-elf"),
+    "iree-llvm-cpu":
+        DriverInfo("IREE-LLVM-CPU", "CPU", "local-task", "embedded-elf"),
+    "iree-llvm-cpu-sync":
+        DriverInfo("IREE-LLVM-CPU-Sync", "CPU", "local-sync", "embedded-elf"),
     "iree-vmvx":
         DriverInfo("IREE-VMVX", "CPU", "local-task", "vmvx-module"),
     "iree-vmvx-sync":
diff --git a/build_tools/benchmarks/common/benchmark_driver_test.py b/build_tools/benchmarks/common/benchmark_driver_test.py
index b46939b..314eee7 100644
--- a/build_tools/benchmarks/common/benchmark_driver_test.py
+++ b/build_tools/benchmarks/common/benchmark_driver_test.py
@@ -77,14 +77,14 @@
                           model_tags=[],
                           bench_mode=["1-thread", "full-inference"],
                           target_arch="CPU-ARM64-v8A",
-                          driver_info=IREE_DRIVERS_INFOS["iree-dylib"],
+                          driver_info=IREE_DRIVERS_INFOS["iree-llvm-cpu"],
                           benchmark_case_dir="case1",
                           benchmark_tool_name="tool")
     case2 = BenchmarkCase(model_name="DeepNetv2",
                           model_tags=["f32"],
                           bench_mode=["full-inference"],
                           target_arch="CPU-ARM64-v8A",
-                          driver_info=IREE_DRIVERS_INFOS["iree-dylib-sync"],
+                          driver_info=IREE_DRIVERS_INFOS["iree-llvm-cpu-sync"],
                           benchmark_case_dir="case2",
                           benchmark_tool_name="tool")
     self.benchmark_suite = BenchmarkSuite({
@@ -102,11 +102,11 @@
     os.makedirs(os.path.join(self.tmp_dir.name, CAPTURES_REL_PATH))
     benchmark_filename = os.path.join(
         self.tmp_dir.name, BENCHMARK_RESULTS_REL_PATH,
-        "MobileNetv2 [fp32,imagenet] (TFLite) big-core,full-inference with IREE-Dylib @ Pixel-4 (CPU-ARMv8.2-A).json"
+        "MobileNetv2 [fp32,imagenet] (TFLite) big-core,full-inference with IREE-LLVM-CPU @ Pixel-4 (CPU-ARMv8.2-A).json"
     )
     capture_filename = os.path.join(
         self.tmp_dir.name, CAPTURES_REL_PATH,
-        "MobileNetv2 [fp32,imagenet] (TFLite) big-core,full-inference with IREE-Dylib @ Pixel-4 (CPU-ARMv8.2-A).tracy"
+        "MobileNetv2 [fp32,imagenet] (TFLite) big-core,full-inference with IREE-LLVM-CPU @ Pixel-4 (CPU-ARMv8.2-A).tracy"
     )
     with open(os.path.join(benchmark_filename), "w") as f:
       f.write("")
@@ -132,21 +132,21 @@
     self.assertEqual(driver.get_benchmark_result_filenames(), [
         os.path.join(
             self.tmp_dir.name, BENCHMARK_RESULTS_REL_PATH,
-            "DeepNet (TFLite) 1-thread,full-inference with IREE-Dylib @ Unknown (CPU-ARMv8-A).json"
+            "DeepNet (TFLite) 1-thread,full-inference with IREE-LLVM-CPU @ Unknown (CPU-ARMv8-A).json"
         ),
         os.path.join(
             self.tmp_dir.name, BENCHMARK_RESULTS_REL_PATH,
-            "DeepNetv2 [f32] (TFLite) full-inference with IREE-Dylib-Sync @ Unknown (CPU-ARMv8-A).json"
+            "DeepNetv2 [f32] (TFLite) full-inference with IREE-LLVM-CPU-Sync @ Unknown (CPU-ARMv8-A).json"
         )
     ])
     self.assertEqual(driver.get_capture_filenames(), [
         os.path.join(
             self.tmp_dir.name, CAPTURES_REL_PATH,
-            "DeepNet (TFLite) 1-thread,full-inference with IREE-Dylib @ Unknown (CPU-ARMv8-A).tracy"
+            "DeepNet (TFLite) 1-thread,full-inference with IREE-LLVM-CPU @ Unknown (CPU-ARMv8-A).tracy"
         ),
         os.path.join(
             self.tmp_dir.name, CAPTURES_REL_PATH,
-            "DeepNetv2 [f32] (TFLite) full-inference with IREE-Dylib-Sync @ Unknown (CPU-ARMv8-A).tracy"
+            "DeepNetv2 [f32] (TFLite) full-inference with IREE-LLVM-CPU-Sync @ Unknown (CPU-ARMv8-A).tracy"
         )
     ])
     self.assertEqual(driver.get_benchmark_errors(), [])
diff --git a/build_tools/benchmarks/common/benchmark_suite_test.py b/build_tools/benchmarks/common/benchmark_suite_test.py
index f49986f..2e86c6e 100644
--- a/build_tools/benchmarks/common/benchmark_suite_test.py
+++ b/build_tools/benchmarks/common/benchmark_suite_test.py
@@ -29,7 +29,7 @@
                           model_tags=[],
                           bench_mode=["1-thread", "full-inference"],
                           target_arch="CPU-ARMv8",
-                          driver_info=IREE_DRIVERS_INFOS["iree-dylib"],
+                          driver_info=IREE_DRIVERS_INFOS["iree-llvm-cpu"],
                           benchmark_case_dir="case1",
                           benchmark_tool_name="tool")
     case2 = BenchmarkCase(model_name="deepnetv2",
@@ -43,7 +43,7 @@
                           model_tags=["f32"],
                           bench_mode=["full-inference"],
                           target_arch="CPU-x86_64",
-                          driver_info=IREE_DRIVERS_INFOS["iree-dylib-sync"],
+                          driver_info=IREE_DRIVERS_INFOS["iree-llvm-cpu-sync"],
                           benchmark_case_dir="case3",
                           benchmark_tool_name="tool")
     suite = BenchmarkSuite({
@@ -104,7 +104,7 @@
                                         model_tags=["f32"],
                                         bench_mode=["4-thread", "full"],
                                         target_arch="CPU-ARMv8",
-                                        config="iree-dylib",
+                                        config="iree-llvm-cpu",
                                         tool="run-cpu-bench")
       case2 = BenchmarkSuiteTest.__create_bench(pytorch_dir,
                                                 model_name="DeepNetv2",
diff --git a/build_tools/benchmarks/common/benchmark_thresholds.py b/build_tools/benchmarks/common/benchmark_thresholds.py
index 16282f1..1346f4a 100644
--- a/build_tools/benchmarks/common/benchmark_thresholds.py
+++ b/build_tools/benchmarks/common/benchmark_thresholds.py
@@ -41,26 +41,27 @@
 # match is used.
 BENCHMARK_THRESHOLDS = [
     # Fluctuating benchmarks on CPUs.
-    BenchmarkThreshold(re.compile(r"^DeepLabV3.*big-core.*Dylib.* @ Pixel"), 20,
-                       ThresholdUnit.PERCENTAGE),
-    BenchmarkThreshold(
-        re.compile(r"^MobileBertSquad.*big-core.*Dylib-Sync @ Pixel-4"), 50,
-        ThresholdUnit.PERCENTAGE),
-    BenchmarkThreshold(re.compile(r"^MobileNetV2.*Dylib @ Pixel"), 15,
-                       ThresholdUnit.PERCENTAGE),
-    BenchmarkThreshold(re.compile(r"^MobileNetV3Small.*Dylib-Sync @ Pixel-6"),
+    BenchmarkThreshold(re.compile(r"^DeepLabV3.*big-core.*LLVM-CPU.* @ Pixel"),
                        20, ThresholdUnit.PERCENTAGE),
     BenchmarkThreshold(
-        re.compile(r"^MobileNetV3Small.*big-core.*Dylib @ Pixel-6"), 20,
+        re.compile(r"^MobileBertSquad.*big-core.*LLVM-CPU-Sync @ Pixel-4"), 50,
         ThresholdUnit.PERCENTAGE),
-    BenchmarkThreshold(
-        re.compile(r"^MobileNetV3Small.*little-core.*Dylib @ Pixel"), 20,
-        ThresholdUnit.PERCENTAGE),
-    BenchmarkThreshold(
-        re.compile(r"^MobileSSD.*little-core.*Dylib.* @ Pixel-6"), 20,
-        ThresholdUnit.PERCENTAGE),
-    BenchmarkThreshold(re.compile(r"^PoseNet.*big-core.*Dylib.* @ Pixel-6"), 20,
+    BenchmarkThreshold(re.compile(r"^MobileNetV2.*LLVM-CPU @ Pixel"), 15,
                        ThresholdUnit.PERCENTAGE),
+    BenchmarkThreshold(
+        re.compile(r"^MobileNetV3Small.*LLVM-CPU-Sync @ Pixel-6"), 20,
+        ThresholdUnit.PERCENTAGE),
+    BenchmarkThreshold(
+        re.compile(r"^MobileNetV3Small.*big-core.*LLVM-CPU @ Pixel-6"), 20,
+        ThresholdUnit.PERCENTAGE),
+    BenchmarkThreshold(
+        re.compile(r"^MobileNetV3Small.*little-core.*LLVM-CPU @ Pixel"), 20,
+        ThresholdUnit.PERCENTAGE),
+    BenchmarkThreshold(
+        re.compile(r"^MobileSSD.*little-core.*LLVM-CPU.* @ Pixel-6"), 20,
+        ThresholdUnit.PERCENTAGE),
+    BenchmarkThreshold(re.compile(r"^PoseNet.*big-core.*LLVM-CPU.* @ Pixel-6"),
+                       20, ThresholdUnit.PERCENTAGE),
 
     # Fluctuating benchmarks on GPUs.
     BenchmarkThreshold(
diff --git a/build_tools/benchmarks/comparisons/README.md b/build_tools/benchmarks/comparisons/README.md
index 0124b54..8913dff 100644
--- a/build_tools/benchmarks/comparisons/README.md
+++ b/build_tools/benchmarks/comparisons/README.md
@@ -18,7 +18,7 @@
   └── models/
         ├── tflite/*.tflite
         └── iree/
-              └── <driver>/*.vmfb e.g. dylib, vulkan, cuda.
+              └── <target>/*.vmfb e.g. llvm-cpu, vulkan, cuda.
 ```
 
 # Prerequisites
diff --git a/build_tools/benchmarks/comparisons/run_benchmarks.py b/build_tools/benchmarks/comparisons/run_benchmarks.py
index 1c42a00..8cdfd2b 100644
--- a/build_tools/benchmarks/comparisons/run_benchmarks.py
+++ b/build_tools/benchmarks/comparisons/run_benchmarks.py
@@ -17,7 +17,7 @@
   └── models/
         ├── tflite/*.tflite
         └── iree/
-              └── <driver>/*.vmfb e.g. dylib, vulkan, cuda.
+              └── <target>/*.vmfb e.g. llvm-cpu, vulkan, cuda.
 
 """
 
diff --git a/build_tools/benchmarks/comparisons/setup_desktop.sh b/build_tools/benchmarks/comparisons/setup_desktop.sh
index 1ad2d8f..1de08d7 100644
--- a/build_tools/benchmarks/comparisons/setup_desktop.sh
+++ b/build_tools/benchmarks/comparisons/setup_desktop.sh
@@ -50,7 +50,7 @@
 TFLITE_MODEL_DIR="${ROOT_DIR}/models/tflite"
 IREE_MODEL_DIR="${ROOT_DIR}/models/iree"
 mkdir -p "${IREE_MODEL_DIR}/cuda"
-mkdir -p "${IREE_MODEL_DIR}/dylib"
+mkdir -p "${IREE_MODEL_DIR}/llvm-cpu"
 
 MODEL_NAME="mobilebert_float_384_gpu"
 bazel-bin/iree_tf_compiler/iree-import-tflite "${TFLITE_MODEL_DIR}/${MODEL_NAME}.tflite" -o "${IREE_MODEL_DIR}/${MODEL_NAME}.mlir"
@@ -66,16 +66,16 @@
   "${IREE_MODEL_DIR}/${MODEL_NAME}.mlir" \
   --o "${IREE_MODEL_DIR}/cuda/${MODEL_NAME}.vmfb"
 # Build for x86.
-echo "Compiling ${MODEL_NAME}.vmfb for dylib..."
+echo "Compiling ${MODEL_NAME}.vmfb for llvm-cpu..."
 "${IREE_COMPILE_PATH}" \
   --iree-input-type=tosa \
   --iree-llvm-target-cpu-features=host \
-  --iree-hal-target-backends=dylib-llvm-aot \
+  --iree-hal-target-backends=llvm-cpu \
   --iree-llvm-debug-symbols=false \
   --iree-vm-bytecode-module-strip-source-map=true \
   --iree-vm-emit-polyglot-zip=false \
   "${IREE_MODEL_DIR}/${MODEL_NAME}.mlir" \
-  --o "${IREE_MODEL_DIR}/dylib/${MODEL_NAME}.vmfb"
+  --o "${IREE_MODEL_DIR}/llvm-cpu/${MODEL_NAME}.vmfb"
 
 cp "${SOURCE_DIR}/iree-build/tools/iree-benchmark-module" "${ROOT_DIR}/"
 
diff --git a/build_tools/benchmarks/comparisons/setup_mobile.sh b/build_tools/benchmarks/comparisons/setup_mobile.sh
index 5f81003..45f8422 100644
--- a/build_tools/benchmarks/comparisons/setup_mobile.sh
+++ b/build_tools/benchmarks/comparisons/setup_mobile.sh
@@ -67,7 +67,7 @@
 TFLITE_MODEL_DIR="${ROOT_DIR}/models/tflite"
 IREE_MODEL_DIR="${ROOT_DIR}/models/iree"
 mkdir -p "${IREE_MODEL_DIR}/vulkan"
-mkdir -p "${IREE_MODEL_DIR}/local-task"
+mkdir -p "${IREE_MODEL_DIR}/llvm-cpu"
 
 for i in $(ls ${ROOT_DIR}/models/tflite/); do
   MODEL_NAME=$(basename $i .tflite)
@@ -77,18 +77,18 @@
   echo -e "\tCompiling ${MODEL_NAME}.vmfb for aarch64..."
   "${IREE_COMPILE_PATH}" \
     --iree-input-type=tosa \
-    --iree-hal-target-backends=dylib-llvm-aot \
+    --iree-hal-target-backends=llvm-cpu \
     --iree-llvm-target-triple=aarch64-none-linux-android29 \
     --iree-llvm-debug-symbols=false \
     --iree-vm-bytecode-module-strip-source-map=true \
     --iree-vm-emit-polyglot-zip=false \
     "${IREE_MODEL_DIR}/${MODEL_NAME}.mlir" \
-    --o "${IREE_MODEL_DIR}/local-task/${MODEL_NAME}.vmfb"
+    --o "${IREE_MODEL_DIR}/llvm-cpu/${MODEL_NAME}.vmfb"
 
   echo -e "\tCompiling ${MODEL_NAME}_mmt4d.vmfb for aarch64..."
   "${IREE_COMPILE_PATH}" \
     --iree-input-type=tosa \
-    --iree-hal-target-backends=dylib-llvm-aot \
+    --iree-hal-target-backends=llvm-cpu \
     --iree-llvm-target-triple=aarch64-none-linux-android29 \
     "--iree-flow-mmt4d-target-options=arch=aarch64 features=+dotprod" \
     --iree-llvm-target-cpu-features=+dotprod \
@@ -96,7 +96,7 @@
     --iree-vm-bytecode-module-strip-source-map=true \
     --iree-vm-emit-polyglot-zip=false \
     "${IREE_MODEL_DIR}/${MODEL_NAME}.mlir" \
-    --o "${IREE_MODEL_DIR}/local-task/${MODEL_NAME}_mmt4d.vmfb"
+    --o "${IREE_MODEL_DIR}/llvm-cpu/${MODEL_NAME}_mmt4d.vmfb"
 
   if [[ "${GPU_TYPE}" = "mali" ]]; then
     echo -e "\tCompiling ${MODEL_NAME}.vmfb for vulkan mali..."