Update samples to use "llvm-cpu" and "local-task" names consistently. (#9932)

Progress on https://github.com/iree-org/iree/issues/9930

Also revised some of the markdown files for formatting and clarity while I was in there.
diff --git a/build_tools/kokoro/gcp_ubuntu/cmake/linux/riscv64/tests/simple_embedded_tests.run b/build_tools/kokoro/gcp_ubuntu/cmake/linux/riscv64/tests/simple_embedded_tests.run
index ff4d07a..eba6704 100644
--- a/build_tools/kokoro/gcp_ubuntu/cmake/linux/riscv64/tests/simple_embedded_tests.run
+++ b/build_tools/kokoro/gcp_ubuntu/cmake/linux/riscv64/tests/simple_embedded_tests.run
@@ -1,3 +1,3 @@
-// RUN: ${TEST_CMD} ${BUILD_RISCV_DIR}/samples/simple_embedding/simple_embedding_dylib
+// RUN: ${TEST_CMD} ${BUILD_RISCV_DIR}/samples/simple_embedding/simple_embedding_embedded
 // RUN: ${TEST_CMD} ${BUILD_RISCV_DIR}/samples/simple_embedding/simple_embedding_embedded_sync
 // RUN: ${TEST_CMD} ${BUILD_RISCV_DIR}/samples/simple_embedding/simple_embedding_vmvx_sync
diff --git a/experimental/web/generate_web_metrics.sh b/experimental/web/generate_web_metrics.sh
index 6c73a01..5c429c8 100644
--- a/experimental/web/generate_web_metrics.sh
+++ b/experimental/web/generate_web_metrics.sh
@@ -130,7 +130,7 @@
   # compile-time statistics that we can.
   "${IREE_COMPILE_PATH?}" "${INPUT_FILE}" \
     --iree-input-type=tosa \
-    --iree-hal-target-backends=llvm \
+    --iree-hal-target-backends=llvm-cpu \
     --iree-llvm-target-triple=wasm32-unknown-emscripten \
     --iree-hal-dump-executable-sources-to="${ARTIFACTS_DIR}" \
     --iree-hal-dump-executable-binaries-to="${ARTIFACTS_DIR}" \
@@ -164,7 +164,7 @@
   # compile-time statistics that we can.
   "${IREE_COMPILE_PATH?}" "${INPUT_FILE}" \
     --iree-input-type=tosa \
-    --iree-hal-target-backends=llvm \
+    --iree-hal-target-backends=llvm-cpu \
     --iree-hal-dump-executable-sources-to="${ARTIFACTS_DIR}" \
     --iree-hal-dump-executable-binaries-to="${ARTIFACTS_DIR}" \
     --iree-scheduling-dump-statistics-format=csv \
diff --git a/experimental/web/sample_dynamic/build_sample.sh b/experimental/web/sample_dynamic/build_sample.sh
index 1eb61ef..6e8b887 100755
--- a/experimental/web/sample_dynamic/build_sample.sh
+++ b/experimental/web/sample_dynamic/build_sample.sh
@@ -49,7 +49,7 @@
   echo "  Compiling '$1' sample..."
   ${COMPILE_TOOL?} $2 \
     --iree-input-type=mhlo \
-    --iree-hal-target-backends=llvm \
+    --iree-hal-target-backends=llvm-cpu \
     --iree-llvm-target-triple=wasm32-unknown-emscripten \
     --iree-llvm-target-cpu-features=+atomics,+bulk-memory,+simd128 \
     --o ${BINARY_DIR}/$1.vmfb
diff --git a/experimental/web/sample_dynamic/index.html b/experimental/web/sample_dynamic/index.html
index 0c65798..871d5c7 100644
--- a/experimental/web/sample_dynamic/index.html
+++ b/experimental/web/sample_dynamic/index.html
@@ -158,7 +158,7 @@
 
     <textarea type="text" readonly spellcheck="false"
     class="form-control" style="width:610px; height:90px; resize:none; font-family: monospace;">
---iree-hal-target-backends=llvm \
+--iree-hal-target-backends=llvm-cpu \
 --iree-llvm-target-triple=wasm32-unknown-emscripten \
 --iree-llvm-target-cpu-features=+atomics,+bulk-memory,+simd128 \</textarea>
 
diff --git a/experimental/web/sample_static/build_sample.sh b/experimental/web/sample_static/build_sample.sh
index f780fca..b9a979d 100755
--- a/experimental/web/sample_static/build_sample.sh
+++ b/experimental/web/sample_static/build_sample.sh
@@ -51,7 +51,7 @@
 echo "=== Compiling MLIR to static library output (.vmfb, .h, .o) ==="
 ${COMPILE_TOOL?} ${INPUT_PATH} \
   --iree-input-type=mhlo \
-  --iree-hal-target-backends=llvm \
+  --iree-hal-target-backends=llvm-cpu \
   --iree-llvm-target-triple=wasm32-unknown-unknown \
   --iree-llvm-target-cpu-features=+simd128 \
   --iree-llvm-link-static \
diff --git a/samples/colab/mnist_training.ipynb b/samples/colab/mnist_training.ipynb
index 3251d34..cba73c2 100644
--- a/samples/colab/mnist_training.ipynb
+++ b/samples/colab/mnist_training.ipynb
@@ -331,7 +331,7 @@
         "cellView": "form"
       },
       "source": [
-        "backend_choice = \"dylib-llvm-aot (CPU)\" #@param [ \"vmvx (CPU)\", \"dylib-llvm-aot (CPU)\", \"vulkan-spirv (GPU/SwiftShader – requires additional drivers) \" ]\n",
+        "backend_choice = \"llvm-cpu (CPU)\" #@param [ \"vmvx (CPU)\", \"llvm-cpu (CPU)\", \"vulkan-spirv (GPU/SwiftShader – requires additional drivers) \" ]\n",
         "backend_choice = backend_choice.split(' ')[0]"
       ],
       "execution_count": 9,
diff --git a/samples/dynamic_shapes/README.md b/samples/dynamic_shapes/README.md
index 107db9b..251d5b5 100644
--- a/samples/dynamic_shapes/README.md
+++ b/samples/dynamic_shapes/README.md
@@ -82,7 +82,7 @@
 
     ```
     ../iree-build/tools/iree-compile \
-        --iree-hal-target-backends=cpu \
+        --iree-hal-target-backends=llvm-cpu \
         --iree-input-type=mhlo \
         dynamic_shapes.mlir -o dynamic_shapes_cpu.vmfb
     ```
diff --git a/samples/dynamic_shapes/test.sh b/samples/dynamic_shapes/test.sh
index 6517f78..54a416d 100755
--- a/samples/dynamic_shapes/test.sh
+++ b/samples/dynamic_shapes/test.sh
@@ -27,7 +27,7 @@
 
 # 3. Compile `dynamic_shapes.mlir` using `iree-compile`.
 ${BUILD_DIR}/tools/iree-compile \
-  --iree-hal-target-backends=cpu \
+  --iree-hal-target-backends=llvm-cpu \
   --iree-input-type=mhlo \
   ${ARTIFACTS_DIR}/dynamic_shapes.mlir -o ${ARTIFACTS_DIR}/dynamic_shapes_cpu.vmfb
 
diff --git a/samples/simple_embedding/BUILD b/samples/simple_embedding/BUILD
index aa8dacb..9856586 100644
--- a/samples/simple_embedding/BUILD
+++ b/samples/simple_embedding/BUILD
@@ -77,11 +77,11 @@
         "simple_embedding.c",
     ],
     deps = [
-        ":simple_embedding_test_bytecode_module_dylib_arm_32_c",
-        ":simple_embedding_test_bytecode_module_dylib_arm_64_c",
-        ":simple_embedding_test_bytecode_module_dylib_riscv_32_c",
-        ":simple_embedding_test_bytecode_module_dylib_riscv_64_c",
-        ":simple_embedding_test_bytecode_module_dylib_x86_64_c",
+        ":simple_embedding_test_bytecode_module_cpu_arm_32_c",
+        ":simple_embedding_test_bytecode_module_cpu_arm_64_c",
+        ":simple_embedding_test_bytecode_module_cpu_riscv_32_c",
+        ":simple_embedding_test_bytecode_module_cpu_riscv_64_c",
+        ":simple_embedding_test_bytecode_module_cpu_x86_64_c",
         "//runtime/src/iree/base",
         "//runtime/src/iree/hal",
         "//runtime/src/iree/hal/drivers/local_sync:sync_driver",
@@ -94,11 +94,11 @@
 )
 
 iree_bytecode_module(
-    name = "simple_embedding_test_bytecode_module_dylib_x86_64",
+    name = "simple_embedding_test_bytecode_module_cpu_x86_64",
     src = "simple_embedding_test.mlir",
-    c_identifier = "iree_samples_simple_embedding_test_module_dylib_x86_64",
+    c_identifier = "iree_samples_simple_embedding_test_module_cpu_x86_64",
     flags = [
-        "--iree-hal-target-backends=dylib-llvm-aot",
+        "--iree-hal-target-backends=llvm-cpu",
         "--iree-llvm-target-triple=x86_64-pc-linux-elf",
         "--iree-llvm-debug-symbols=false",
         "--iree-vm-bytecode-module-strip-source-map=true",
@@ -107,11 +107,11 @@
 )
 
 iree_bytecode_module(
-    name = "simple_embedding_test_bytecode_module_dylib_riscv_32",
+    name = "simple_embedding_test_bytecode_module_cpu_riscv_32",
     src = "simple_embedding_test.mlir",
-    c_identifier = "iree_samples_simple_embedding_test_module_dylib_riscv_32",
+    c_identifier = "iree_samples_simple_embedding_test_module_cpu_riscv_32",
     flags = [
-        "--iree-hal-target-backends=dylib-llvm-aot",
+        "--iree-hal-target-backends=llvm-cpu",
         "--iree-llvm-target-triple=riscv32-pc-linux-elf",
         "--iree-llvm-target-cpu=generic-rv32",
         "--iree-llvm-target-cpu-features=+m,+f",
@@ -123,11 +123,11 @@
 )
 
 iree_bytecode_module(
-    name = "simple_embedding_test_bytecode_module_dylib_riscv_64",
+    name = "simple_embedding_test_bytecode_module_cpu_riscv_64",
     src = "simple_embedding_test.mlir",
-    c_identifier = "iree_samples_simple_embedding_test_module_dylib_riscv_64",
+    c_identifier = "iree_samples_simple_embedding_test_module_cpu_riscv_64",
     flags = [
-        "--iree-hal-target-backends=dylib-llvm-aot",
+        "--iree-hal-target-backends=llvm-cpu",
         "--iree-llvm-target-triple=riscv64-pc-linux-elf",
         "--iree-llvm-target-cpu=generic-rv64",
         "--iree-llvm-target-cpu-features=+m,+a,+f,+d,+c",
@@ -139,11 +139,11 @@
 )
 
 iree_bytecode_module(
-    name = "simple_embedding_test_bytecode_module_dylib_arm_32",
+    name = "simple_embedding_test_bytecode_module_cpu_arm_32",
     src = "simple_embedding_test.mlir",
-    c_identifier = "iree_samples_simple_embedding_test_module_dylib_arm_32",
+    c_identifier = "iree_samples_simple_embedding_test_module_cpu_arm_32",
     flags = [
-        "--iree-hal-target-backends=dylib-llvm-aot",
+        "--iree-hal-target-backends=llvm-cpu",
         "--iree-llvm-target-triple=armv7a-pc-linux-elf",
         "--iree-llvm-target-float-abi=hard",
         "--iree-llvm-debug-symbols=false",
@@ -153,11 +153,11 @@
 )
 
 iree_bytecode_module(
-    name = "simple_embedding_test_bytecode_module_dylib_arm_64",
+    name = "simple_embedding_test_bytecode_module_cpu_arm_64",
     src = "simple_embedding_test.mlir",
-    c_identifier = "iree_samples_simple_embedding_test_module_dylib_arm_64",
+    c_identifier = "iree_samples_simple_embedding_test_module_cpu_arm_64",
     flags = [
-        "--iree-hal-target-backends=dylib-llvm-aot",
+        "--iree-hal-target-backends=llvm-cpu",
         "--iree-llvm-target-triple=aarch64-pc-linux-elf",
         "--iree-llvm-debug-symbols=false",
         "--iree-vm-bytecode-module-strip-source-map=true",
@@ -178,15 +178,15 @@
 )
 
 cc_binary(
-    name = "simple_embedding_dylib",
+    name = "simple_embedding_embedded",
     srcs = [
-        "device_dylib.c",
+        "device_embedded.c",
         "simple_embedding.c",
     ],
     deps = [
-        ":simple_embedding_test_bytecode_module_dylib_arm_64_c",
-        ":simple_embedding_test_bytecode_module_dylib_riscv_64_c",
-        ":simple_embedding_test_bytecode_module_dylib_x86_64_c",
+        ":simple_embedding_test_bytecode_module_cpu_arm_64_c",
+        ":simple_embedding_test_bytecode_module_cpu_riscv_64_c",
+        ":simple_embedding_test_bytecode_module_cpu_x86_64_c",
         "//runtime/src/iree/base",
         "//runtime/src/iree/hal",
         "//runtime/src/iree/hal/drivers/local_task:task_driver",
@@ -200,8 +200,8 @@
 )
 
 native_test(
-    name = "simple_embedding_dylib_test",
-    src = ":simple_embedding_dylib",
+    name = "simple_embedding_embedded_test",
+    src = ":simple_embedding_embedded",
 )
 
 iree_cmake_extra_content(
diff --git a/samples/simple_embedding/CMakeLists.txt b/samples/simple_embedding/CMakeLists.txt
index 5b20d4e..6ed7711 100644
--- a/samples/simple_embedding/CMakeLists.txt
+++ b/samples/simple_embedding/CMakeLists.txt
@@ -62,11 +62,11 @@
     "device_embedded_sync.c"
     "simple_embedding.c"
   DEPS
-    ::simple_embedding_test_bytecode_module_dylib_arm_32_c
-    ::simple_embedding_test_bytecode_module_dylib_arm_64_c
-    ::simple_embedding_test_bytecode_module_dylib_riscv_32_c
-    ::simple_embedding_test_bytecode_module_dylib_riscv_64_c
-    ::simple_embedding_test_bytecode_module_dylib_x86_64_c
+    ::simple_embedding_test_bytecode_module_cpu_arm_32_c
+    ::simple_embedding_test_bytecode_module_cpu_arm_64_c
+    ::simple_embedding_test_bytecode_module_cpu_riscv_32_c
+    ::simple_embedding_test_bytecode_module_cpu_riscv_64_c
+    ::simple_embedding_test_bytecode_module_cpu_x86_64_c
     iree::base
     iree::hal
     iree::hal::drivers::local_sync::sync_driver
@@ -79,13 +79,13 @@
 
 iree_bytecode_module(
   NAME
-    simple_embedding_test_bytecode_module_dylib_x86_64
+    simple_embedding_test_bytecode_module_cpu_x86_64
   SRC
     "simple_embedding_test.mlir"
   C_IDENTIFIER
-    "iree_samples_simple_embedding_test_module_dylib_x86_64"
+    "iree_samples_simple_embedding_test_module_cpu_x86_64"
   FLAGS
-    "--iree-hal-target-backends=dylib-llvm-aot"
+    "--iree-hal-target-backends=llvm-cpu"
     "--iree-llvm-target-triple=x86_64-pc-linux-elf"
     "--iree-llvm-debug-symbols=false"
     "--iree-vm-bytecode-module-strip-source-map=true"
@@ -95,13 +95,13 @@
 
 iree_bytecode_module(
   NAME
-    simple_embedding_test_bytecode_module_dylib_riscv_32
+    simple_embedding_test_bytecode_module_cpu_riscv_32
   SRC
     "simple_embedding_test.mlir"
   C_IDENTIFIER
-    "iree_samples_simple_embedding_test_module_dylib_riscv_32"
+    "iree_samples_simple_embedding_test_module_cpu_riscv_32"
   FLAGS
-    "--iree-hal-target-backends=dylib-llvm-aot"
+    "--iree-hal-target-backends=llvm-cpu"
     "--iree-llvm-target-triple=riscv32-pc-linux-elf"
     "--iree-llvm-target-cpu=generic-rv32"
     "--iree-llvm-target-cpu-features=+m,+f"
@@ -114,13 +114,13 @@
 
 iree_bytecode_module(
   NAME
-    simple_embedding_test_bytecode_module_dylib_riscv_64
+    simple_embedding_test_bytecode_module_cpu_riscv_64
   SRC
     "simple_embedding_test.mlir"
   C_IDENTIFIER
-    "iree_samples_simple_embedding_test_module_dylib_riscv_64"
+    "iree_samples_simple_embedding_test_module_cpu_riscv_64"
   FLAGS
-    "--iree-hal-target-backends=dylib-llvm-aot"
+    "--iree-hal-target-backends=llvm-cpu"
     "--iree-llvm-target-triple=riscv64-pc-linux-elf"
     "--iree-llvm-target-cpu=generic-rv64"
     "--iree-llvm-target-cpu-features=+m,+a,+f,+d,+c"
@@ -133,13 +133,13 @@
 
 iree_bytecode_module(
   NAME
-    simple_embedding_test_bytecode_module_dylib_arm_32
+    simple_embedding_test_bytecode_module_cpu_arm_32
   SRC
     "simple_embedding_test.mlir"
   C_IDENTIFIER
-    "iree_samples_simple_embedding_test_module_dylib_arm_32"
+    "iree_samples_simple_embedding_test_module_cpu_arm_32"
   FLAGS
-    "--iree-hal-target-backends=dylib-llvm-aot"
+    "--iree-hal-target-backends=llvm-cpu"
     "--iree-llvm-target-triple=armv7a-pc-linux-elf"
     "--iree-llvm-target-float-abi=hard"
     "--iree-llvm-debug-symbols=false"
@@ -150,13 +150,13 @@
 
 iree_bytecode_module(
   NAME
-    simple_embedding_test_bytecode_module_dylib_arm_64
+    simple_embedding_test_bytecode_module_cpu_arm_64
   SRC
     "simple_embedding_test.mlir"
   C_IDENTIFIER
-    "iree_samples_simple_embedding_test_module_dylib_arm_64"
+    "iree_samples_simple_embedding_test_module_cpu_arm_64"
   FLAGS
-    "--iree-hal-target-backends=dylib-llvm-aot"
+    "--iree-hal-target-backends=llvm-cpu"
     "--iree-llvm-target-triple=aarch64-pc-linux-elf"
     "--iree-llvm-debug-symbols=false"
     "--iree-vm-bytecode-module-strip-source-map=true"
@@ -175,14 +175,14 @@
 
 iree_cc_binary(
   NAME
-    simple_embedding_dylib
+    simple_embedding_embedded
   SRCS
-    "device_dylib.c"
+    "device_embedded.c"
     "simple_embedding.c"
   DEPS
-    ::simple_embedding_test_bytecode_module_dylib_arm_64_c
-    ::simple_embedding_test_bytecode_module_dylib_riscv_64_c
-    ::simple_embedding_test_bytecode_module_dylib_x86_64_c
+    ::simple_embedding_test_bytecode_module_cpu_arm_64_c
+    ::simple_embedding_test_bytecode_module_cpu_riscv_64_c
+    ::simple_embedding_test_bytecode_module_cpu_x86_64_c
     iree::base
     iree::hal
     iree::hal::drivers::local_task::task_driver
@@ -196,9 +196,9 @@
 
 iree_native_test(
   NAME
-    "simple_embedding_dylib_test"
+    "simple_embedding_embedded_test"
   SRC
-    ::simple_embedding_dylib
+    ::simple_embedding_embedded
 )
 
 endif()
diff --git a/samples/simple_embedding/README.md b/samples/simple_embedding/README.md
index 4dcb318..a34a5f8 100644
--- a/samples/simple_embedding/README.md
+++ b/samples/simple_embedding/README.md
@@ -3,63 +3,67 @@
 This sample shows how to run a simple pointwise array multiplication bytecode
 module on various HAL device targets with the minimum runtime overhead. Some of
 these devices are compatible with bare-metal system without threading or file IO
-supports.
+support.
 
-# Background
+## Background
 
 The main bytecode testing tool
-[iree-run-module](https://github.com/iree-org/iree/tree/main/tools/iree-run-module-main.cc)
+[iree-run-module](../../tools/iree-run-module-main.cc)
 requires a proper operating system support to set up the runtime environment to
 execute an IREE bytecode module. For embedded systems, the support such as file
 system or multi-thread asynchronous control may not be available. This sample
 demonstrates how to setup the simplest framework to load and run the IREE
 bytecode with various target backends.
 
-# Build instructions
+## Build instructions
 
-## CMake (native and cross compilation)
+### CMake (native and cross compilation)
 
 Set up the CMake configuration with `-DIREE_BUILD_SAMPLES=ON` (default on)
 
 Then run
 ```sh
-cmake --build <build dir> --target iree/samples/simple_embedding/all
+cmake --build <build dir> --target samples/simple_embedding/all
 ```
 
-## Bazel (host only)
+### Bazel (host only)
+
 ```sh
-bazel build iree/samples/simple_embedding:all
+bazel build samples/simple_embedding:all
 ```
 
 The resulting executables are listed as `simple_embedding_<HAL devices>`.
 
-# Code structure
+## Code structure
 
 The sample consists of three parts:
 
-## simple_embedding_test.mlir
+### simple_embedding_test.mlir
+
 The simple pointwise array multiplication op with the entry function called
 `simple_mul`, two <4xf32> inputs, and one <4xf32> output. The ML bytecode
-modules are automatically generated during the build time with the targed HAL
-device configurations from the host compiler `iree-tranlate`.
+modules are automatically generated during the build time with the target HAL
+device configurations from the host compiler `iree-compile`.
 
-## simple_embedding.c
+### simple_embedding.c
 
 The main function of the sample has the following steps:
-1. Create a VM instance.
-2. Create a HAL module based on the target device (see the next section).
-3. Load the bytecode module of the ML workload.
-4. Asssociate the HAL module with the bytecode module in the VM context.
-5. Prepare the function entry point and inputs.
-6. Invoke function.
-7. Retrieve function output.
 
-## device_*.c
+1. Create a VM instance
+2. Create a HAL module based on the target device (see the next section)
+3. Load the bytecode module of the ML workload
+4. Associate the HAL module with the bytecode module in the VM context
+5. Prepare the function entry point and inputs
+6. Invoke function
+7. Retrieve function output
 
-The HAL device for different target backends. The device is a `module_loader` +
-`executor` combination. For example,
-[device_embedded_sync.c](https://github.com/iree-org/iree/blob/main/iree/samples/simple_embedding/device_embedded_sync.c)
-uses the embedded library loader and the synchronous executor:
+### device_*.c
+
+The HAL device for different target backends. Devices are created using a
+specific executable loader and device constructor. For example,
+[device_embedded_sync.c](./device_embedded_sync.c) creates a "sync" device with
+the embedded ELF loader:
+
 ```c
 iree_hal_sync_device_params_t params;
 iree_hal_sync_device_params_initialize(&params);
@@ -68,41 +72,43 @@
       iree_hal_executable_import_provider_null(), iree_allocator_system(),
       &loader));
 
-iree_string_view_t identifier = iree_make_cstring_view("dylib");
+iree_string_view_t identifier = iree_make_cstring_view("local-sync");
 
 iree_status_t status =
     iree_hal_sync_device_create(identifier, &params, /*loader_count=*/1,
                                 &loader, iree_allocator_system(), device);
 ```
 
-Whereas for
-[device_dylib.c](https://github.com/iree-org/iree/blob/main/iree/samples/simple_embedding/device_dylib.c),
-the executor is replaced with the multi-thread ready asynchronous task executor:
+Whereas for [device_embedded.c](./device_embedded.c), the "sync device" is
+replaced with the multithreaded "task device", which uses a "task executor":
+
 ```c
 ...
 iree_task_executor_t* executor = NULL;
 iree_status_t status =
     iree_task_executor_create_from_flags(iree_allocator_system(), &executor);
 
-iree_string_view_t identifier = iree_make_cstring_view("dylib");
+iree_string_view_t identifier = iree_make_cstring_view("local-task");
 if (iree_status_is_ok(status)) {
   // Create the device.
   status = iree_hal_task_device_create(identifier, &params, executor,
-                                        /*loader_count=*/1, &loader,
-                                        iree_allocator_system(), device);
+                                       /*loader_count=*/1, &loader,
+                                       iree_allocator_system(), device);
 ```
 An example that utilizes a higher-level driver registry is in
-[device_vulkan.c](https://github.com/iree-org/iree/blob/main/iree/samples/simple_embedding/device_vulkan.c)
+[device_vulkan.c](./device_vulkan.c)
 
-### Load device-specific bytecode module
+#### Load device-specific bytecode module
+
 To avoid the file IO, the bytecode module is converted into a data stream
 (`module_data`) that's embedded in the executable. The same strategy can be
 applied to build applications for the embedded systems without a proper file IO.
 
-# Generic platform support
+## Generic platform support
+
 Some of the devices in this sample support a generic platform (or the
 machine mode without an operating system). For example, `device_vmvx_sync`
 should support any architecture that IREE supports, and `device_embedded_sync`
-should support any architecture that supports `dylib-llvm-aot` codegen target
+should support any architecture that supports `llvm-cpu` codegen target
 backend (may need to add the bytecode module data if it is not already in
-[device_embedded_sync.c](https://github.com/iree-org/iree/blob/main/iree/samples/simple_embedding/device_embedded_sync.c)).
+[device_embedded_sync.c](./device_embedded_sync.c)).
diff --git a/samples/simple_embedding/device_dylib.c b/samples/simple_embedding/device_embedded.c
similarity index 83%
rename from samples/simple_embedding/device_dylib.c
rename to samples/simple_embedding/device_embedded.c
index 3e13118..5d365e6 100644
--- a/samples/simple_embedding/device_dylib.c
+++ b/samples/simple_embedding/device_embedded.c
@@ -4,7 +4,7 @@
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-// A example of setting up the the dylib driver.
+// A example of setting up a local-task device with the embedded ELF loader.
 
 #include <stddef.h>
 
@@ -16,13 +16,13 @@
 #include "iree/task/api.h"
 
 // Compiled module embedded here to avoid file IO:
-#include "samples/simple_embedding/simple_embedding_test_bytecode_module_dylib_arm_64_c.h"
-#include "samples/simple_embedding/simple_embedding_test_bytecode_module_dylib_riscv_64_c.h"
-#include "samples/simple_embedding/simple_embedding_test_bytecode_module_dylib_x86_64_c.h"
+#include "samples/simple_embedding/simple_embedding_test_bytecode_module_cpu_arm_64_c.h"
+#include "samples/simple_embedding/simple_embedding_test_bytecode_module_cpu_riscv_64_c.h"
+#include "samples/simple_embedding/simple_embedding_test_bytecode_module_cpu_x86_64_c.h"
 
 iree_status_t create_sample_device(iree_allocator_t host_allocator,
                                    iree_hal_device_t** out_device) {
-  // Set paramters for the device created in the next step.
+  // Set parameters for the device created in the next step.
   iree_hal_task_device_params_t params;
   iree_hal_task_device_params_initialize(&params);
 
@@ -34,8 +34,9 @@
   iree_status_t status =
       iree_task_executor_create_from_flags(host_allocator, &executor);
 
+  iree_string_view_t identifier = iree_make_cstring_view("local-task");
+
   // Use the default host allocator for buffer allocations.
-  iree_string_view_t identifier = iree_make_cstring_view("dylib");
   iree_hal_allocator_t* device_allocator = NULL;
   if (iree_status_is_ok(status)) {
     status = iree_hal_allocator_create_heap(identifier, host_allocator,
@@ -59,13 +60,13 @@
 const iree_const_byte_span_t load_bytecode_module_data() {
 #if IREE_ARCH_X86_64
   const struct iree_file_toc_t* module_file_toc =
-      iree_samples_simple_embedding_test_module_dylib_x86_64_create();
+      iree_samples_simple_embedding_test_module_cpu_x86_64_create();
 #elif IREE_ARCH_RISCV_64
   const struct iree_file_toc_t* module_file_toc =
-      iree_samples_simple_embedding_test_module_dylib_riscv_64_create();
+      iree_samples_simple_embedding_test_module_cpu_riscv_64_create();
 #elif IREE_ARCH_ARM_64
   const struct iree_file_toc_t* module_file_toc =
-      iree_samples_simple_embedding_test_module_dylib_arm_64_create();
+      iree_samples_simple_embedding_test_module_cpu_arm_64_create();
 #else
 #error "Unsupported platform."
 #endif
diff --git a/samples/simple_embedding/device_embedded_sync.c b/samples/simple_embedding/device_embedded_sync.c
index eee1746..9ab079f 100644
--- a/samples/simple_embedding/device_embedded_sync.c
+++ b/samples/simple_embedding/device_embedded_sync.c
@@ -4,7 +4,7 @@
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-// A example of setting up the embedded-sync driver.
+// A example of setting up a local-sync device with the embedded ELF loader.
 
 #include <stddef.h>
 
@@ -16,15 +16,15 @@
 
 // Compiled module embedded here to avoid file IO:
 #if IREE_ARCH_ARM_32
-#include "samples/simple_embedding/simple_embedding_test_bytecode_module_dylib_arm_32_c.h"
+#include "samples/simple_embedding/simple_embedding_test_bytecode_module_cpu_arm_32_c.h"
 #elif IREE_ARCH_ARM_64
-#include "samples/simple_embedding/simple_embedding_test_bytecode_module_dylib_arm_64_c.h"
+#include "samples/simple_embedding/simple_embedding_test_bytecode_module_cpu_arm_64_c.h"
 #elif IREE_ARCH_RISCV_32
-#include "samples/simple_embedding/simple_embedding_test_bytecode_module_dylib_riscv_32_c.h"
+#include "samples/simple_embedding/simple_embedding_test_bytecode_module_cpu_riscv_32_c.h"
 #elif IREE_ARCH_RISCV_64
-#include "samples/simple_embedding/simple_embedding_test_bytecode_module_dylib_riscv_64_c.h"
+#include "samples/simple_embedding/simple_embedding_test_bytecode_module_cpu_riscv_64_c.h"
 #elif IREE_ARCH_X86_64
-#include "samples/simple_embedding/simple_embedding_test_bytecode_module_dylib_x86_64_c.h"
+#include "samples/simple_embedding/simple_embedding_test_bytecode_module_cpu_x86_64_c.h"
 #endif
 
 iree_status_t create_sample_device(iree_allocator_t host_allocator,
@@ -37,8 +37,9 @@
   IREE_RETURN_IF_ERROR(iree_hal_embedded_elf_loader_create(
       iree_hal_executable_import_provider_null(), host_allocator, &loader));
 
+  iree_string_view_t identifier = iree_make_cstring_view("local-sync");
+
   // Use the default host allocator for buffer allocations.
-  iree_string_view_t identifier = iree_make_cstring_view("dylib");
   iree_hal_allocator_t* device_allocator = NULL;
   iree_status_t status = iree_hal_allocator_create_heap(
       identifier, host_allocator, host_allocator, &device_allocator);
@@ -58,19 +59,19 @@
 const iree_const_byte_span_t load_bytecode_module_data() {
 #if IREE_ARCH_X86_64
   const struct iree_file_toc_t* module_file_toc =
-      iree_samples_simple_embedding_test_module_dylib_x86_64_create();
+      iree_samples_simple_embedding_test_module_cpu_x86_64_create();
 #elif IREE_ARCH_RISCV_32
   const struct iree_file_toc_t* module_file_toc =
-      iree_samples_simple_embedding_test_module_dylib_riscv_32_create();
+      iree_samples_simple_embedding_test_module_cpu_riscv_32_create();
 #elif IREE_ARCH_RISCV_64
   const struct iree_file_toc_t* module_file_toc =
-      iree_samples_simple_embedding_test_module_dylib_riscv_64_create();
+      iree_samples_simple_embedding_test_module_cpu_riscv_64_create();
 #elif IREE_ARCH_ARM_32
   const struct iree_file_toc_t* module_file_toc =
-      iree_samples_simple_embedding_test_module_dylib_arm_32_create();
+      iree_samples_simple_embedding_test_module_cpu_arm_32_create();
 #elif IREE_ARCH_ARM_64
   const struct iree_file_toc_t* module_file_toc =
-      iree_samples_simple_embedding_test_module_dylib_arm_64_create();
+      iree_samples_simple_embedding_test_module_cpu_arm_64_create();
 #else
 #error "Unsupported platform."
 #endif
diff --git a/samples/static_library/CMakeLists.txt b/samples/static_library/CMakeLists.txt
index d547869..5a226f3 100644
--- a/samples/static_library/CMakeLists.txt
+++ b/samples/static_library/CMakeLists.txt
@@ -16,7 +16,7 @@
 ## Example with VM bytecode module.
 # Setup args for iree-compile.
 set(_COMPILE_ARGS)
-list(APPEND _COMPILE_ARGS "--iree-hal-target-backends=dylib-llvm-aot")
+list(APPEND _COMPILE_ARGS "--iree-hal-target-backends=llvm-cpu")
 list(APPEND _COMPILE_ARGS "--iree-llvm-link-embedded=false")
 list(APPEND _COMPILE_ARGS "--iree-llvm-link-static")
 list(APPEND _COMPILE_ARGS "--iree-llvm-static-library-output-path=simple_mul.o")
@@ -102,7 +102,7 @@
 # Setup args for iree-compile.
 set(_COMPILE_ARGS)
 list(APPEND _COMPILE_ARGS "--iree-mlir-to-vm-c-module")
-list(APPEND _COMPILE_ARGS "--iree-hal-target-backends=dylib-llvm-aot")
+list(APPEND _COMPILE_ARGS "--iree-hal-target-backends=llvm-cpu")
 list(APPEND _COMPILE_ARGS "--iree-llvm-link-embedded=false")
 list(APPEND _COMPILE_ARGS "--iree-llvm-link-static")
 list(APPEND _COMPILE_ARGS "--iree-llvm-static-library-output-path=simple_mul_c_module.o")
diff --git a/samples/static_library/README.md b/samples/static_library/README.md
index 0775ae9..855913a 100644
--- a/samples/static_library/README.md
+++ b/samples/static_library/README.md
@@ -31,13 +31,14 @@
 _Note: run the following commands from IREE's github repo root._
 
 1. Configure CMake for building the static library then demo. You'll need to set
-the flags building samples, the compiler, and the `dylib-llvm-aot`
-driver/backend. See
-[here](https://iree-org.github.io/iree/building-from-source/getting-started/)
+the flags building samples, the compiler, the `llvm-cpu`
+compiler target backend, and the `local-sync` runtime HAL driver (see
+[the getting started guide](https://iree-org.github.io/iree/building-from-source/getting-started/)
 for general instructions on building using CMake):
 
   ```shell
   cmake -B ../iree-build/ \
+    -DCMAKE_BUILD_TYPE=RelWithDebInfo .
     -DIREE_BUILD_SAMPLES=ON \
     -DIREE_TARGET_BACKEND_DEFAULTS=OFF \
     -DIREE_TARGET_BACKEND_LLVM_CPU=ON \
@@ -45,7 +46,6 @@
     -DIREE_HAL_DRIVER_LOCAL_SYNC=ON \
     -DIREE_HAL_EXECUTABLE_LOADER_DEFAULTS=OFF \
     -DIREE_BUILD_COMPILER=ON \
-    -DCMAKE_BUILD_TYPE=RelWithDebInfo .
   ```
 
 2. Build the `static_library_demo` CMake target to create the static demo. This
@@ -54,26 +54,26 @@
 (`simple_mul.vmfb`) which are finally built into the demo binary:
 
   ```shell
-  cmake --build ../iree-build/ --target iree_samples_static_library_demo
+  cmake --build ../iree-build/ --target iree_samples_static_library_static_library_demo
   ```
 
 3. Run the sample binary:
 
   ```shell
-  ../iree-build/iree/samples/static_library/static_library_demo
+  ../iree-build/samples/static_library/static_library_demo
 
-  # Output: static_library_run passed
+  # Output: static_library_run_bytecode passed
   ```
 
 ### Changing compilation options
 
 The steps above build both the compiler for the host (machine doing the
 compiling) and the demo for the target using same options as the host machine.
-If you wish to target a different deployment other than the host, you'll need to
+If you wish to target a different platform other than the host, you'll need to
 compile the library and demo with different options.
 
 For example, see
-[documentation](https://iree-org.github.io/iree/building-from-source/android/)
+[this documentation](https://iree-org.github.io/iree/building-from-source/android/)
 on cross compiling on Android.
 
 Note: separating the target from the host will require modifying dependencies in
diff --git a/samples/static_library/static_library_demo.c b/samples/static_library/static_library_demo.c
index 6a15412..719802c 100644
--- a/samples/static_library/static_library_demo.c
+++ b/samples/static_library/static_library_demo.c
@@ -75,7 +75,7 @@
                                           iree_allocator_system(), &instance);
   }
 
-  // Create dylib device with static loader.
+  // Create local device with static loader.
   iree_hal_device_t* device = NULL;
   if (iree_status_is_ok(status)) {
     status = create_device_with_static_loader(iree_allocator_system(), &device);
diff --git a/samples/variables_and_state/README.md b/samples/variables_and_state/README.md
index 191bca1..92b3651 100644
--- a/samples/variables_and_state/README.md
+++ b/samples/variables_and_state/README.md
@@ -92,7 +92,7 @@
 
 ```
 ../iree-build/tools/iree-compile \
-    --iree-hal-target-backends=cpu \
+    --iree-hal-target-backends=llvm-cpu \
     --iree-input-type=mhlo \
     counter.mlir -o counter_cpu.vmfb
 ```
diff --git a/samples/vision_inference/README.md b/samples/vision_inference/README.md
index bf644bb..f8543a8 100644
--- a/samples/vision_inference/README.md
+++ b/samples/vision_inference/README.md
@@ -21,7 +21,7 @@
 iree-compile \
     ../models/mnist.mlir \
     --iree-input-type=mhlo \
-    --iree-hal-target-backends=cpu \
+    --iree-hal-target-backends=llvm-cpu \
     -o /tmp/mnist_cpu.vmfb
 
 # Convert the test image to the 1x28x28x1xf32 buffer format the program expects.