Add more models to TFLite benchmarks (#11205)
diff --git a/build_tools/benchmarks/comparisons/mobilebert_fp32_commands.py b/build_tools/benchmarks/comparisons/mobilebert_fp32_commands.py index 4adb80f..826f0ee 100644 --- a/build_tools/benchmarks/comparisons/mobilebert_fp32_commands.py +++ b/build_tools/benchmarks/comparisons/mobilebert_fp32_commands.py
@@ -74,8 +74,8 @@ class MobilebertFP32CommandFactory(BenchmarkCommandFactory): """ Generates `BenchmarkCommand` objects specific to running MobileBert.""" - def __init__(self, base_dir: str): - self._model_name = "mobilebert_float_384_gpu" + def __init__(self, base_dir: str, model_name: str): + self._model_name = model_name self._base_dir = base_dir self._iree_benchmark_binary_path = os.path.join(base_dir, "iree-benchmark-module")
diff --git a/build_tools/benchmarks/comparisons/run_benchmarks.py b/build_tools/benchmarks/comparisons/run_benchmarks.py index edbceb6..00b0fb4 100644 --- a/build_tools/benchmarks/comparisons/run_benchmarks.py +++ b/build_tools/benchmarks/comparisons/run_benchmarks.py
@@ -109,14 +109,17 @@ def main(args): # Create factories for all models to be benchmarked. command_factory = [] - command_factory.append(MobilebertFP32CommandFactory(args.base_dir)) + command_factory.append( + MobilebertFP32CommandFactory(args.base_dir, "mobilebert_float_384_gpu")) command_factory.append(MobilebertInt8CommandFactory(args.base_dir)) command_factory.append( + MobilebertFP32CommandFactory(args.base_dir, "albert_lite_base_squadv1_1")) + command_factory.append( SimpleCommandFactory(args.base_dir, "mobilenet_v2_1.0_224", "1x224x224x3xf32")) command_factory.append( SimpleCommandFactory(args.base_dir, "mobilenet_v2_224_1.0_uint8", - "1x224x224x3xui8", "input", "1,224,224,3")) + "1x224x224x3xui8")) command_factory.append( SimpleCommandFactory(args.base_dir, "deeplabv3", "1x257x257x3xf32")) command_factory.append( @@ -127,6 +130,24 @@ command_factory.append( SimpleCommandFactory(args.base_dir, "resnet_v2_101_1_default_1", "1x299x299x3xf32")) + command_factory.append( + SimpleCommandFactory(args.base_dir, "ssd_mobilenet_v2_fpnlite_uint8", + "1x320x320x3xui8")) + command_factory.append( + SimpleCommandFactory(args.base_dir, "ssd_mobilenet_v2_fpnlite_fp32", + "1x320x320x3xf32")) + command_factory.append( + SimpleCommandFactory(args.base_dir, "efficientnet_lite0_int8_2", + "1x224x224x3xui8")) + command_factory.append( + SimpleCommandFactory(args.base_dir, "efficientnet_lite0_fp32_2", + "1x224x224x3xf32")) + command_factory.append( + SimpleCommandFactory(args.base_dir, "inception_v4_299_uint8", + "1x299x299x3xui8")) + command_factory.append( + SimpleCommandFactory(args.base_dir, "inception_v4_299_fp32", + "1x299x299x3xf32")) if args.mode == "desktop": results_path = os.path.join(args.output_dir, "results.csv")
diff --git a/build_tools/benchmarks/comparisons/setup_desktop.sh b/build_tools/benchmarks/comparisons/setup_desktop.sh index 582bb68..8404804 100644 --- a/build_tools/benchmarks/comparisons/setup_desktop.sh +++ b/build_tools/benchmarks/comparisons/setup_desktop.sh
@@ -9,10 +9,10 @@ set -euo pipefail # Install Bazel. From https://www.tensorflow.org/install/source -#npm install -g @bazel/bazelisk +npm install -g @bazel/bazelisk # Create root dir. -ROOT_DIR=/tmp/mobilebert_benchmarks +ROOT_DIR=/tmp/benchmarks rm -rf "${ROOT_DIR}" mkdir "${ROOT_DIR}" mkdir "${ROOT_DIR}/models" @@ -31,6 +31,14 @@ wget https://storage.googleapis.com/iree-model-artifacts/person_detect.tflite -P "${ROOT_DIR}/models/tflite/" wget https://storage.googleapis.com/iree-model-artifacts/ssd_mobilenet_v2_static_1.0_int8.tflite -P "${ROOT_DIR}/models/tflite/" wget https://storage.googleapis.com/iree-model-artifacts/resnet_v2_101_1_default_1.tflite -P "${ROOT_DIR}/models/tflite/" +wget https://storage.googleapis.com/iree-model-artifacts/asr_conformer_int8.tflite -P "${ROOT_DIR}/models/tflite/" +wget https://storage.googleapis.com/iree-model-artifacts/albert_lite_base_squadv1_1.tflite -P "${ROOT_DIR}/models/tflite/" +wget https://storage.googleapis.com/iree-model-artifacts/ssd_mobilenet_v2_fpnlite_fp32.tflite -P "${ROOT_DIR}/models/tflite/" +wget https://storage.googleapis.com/iree-model-artifacts/ssd_mobilenet_v2_fpnlite_uint8.tflite -P "${ROOT_DIR}/models/tflite/" +wget https://storage.googleapis.com/iree-model-artifacts/inception_v4_299_fp32.tflite -P "${ROOT_DIR}/models/tflite/" +wget https://storage.googleapis.com/iree-model-artifacts/inception_v4_299_uint8.tflite -P "${ROOT_DIR}/models/tflite/" +wget https://storage.googleapis.com/iree-model-artifacts/efficientnet_lite0_fp32_2.tflite -P "${ROOT_DIR}/models/tflite/" +wget https://storage.googleapis.com/iree-model-artifacts/efficientnet_lite0_int8_2.tflite -P "${ROOT_DIR}/models/tflite/" # Build IREE source. SOURCE_DIR=/tmp/github @@ -42,7 +50,7 @@ cd iree git submodule update --init -cmake -GNinja -B ../iree-build/ -S . -DCMAKE_BUILD_TYPE=RelWithDebInfo -DIREE_ENABLE_ASSERTIONS=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DIREE_ENABLE_LLD=ON -DIREE_HAL_DRIVER_CUDA=ON -DIREE_TARGET_BACKEND_CUDA=ON +cmake -GNinja -B ../iree-build/ -S . -DCMAKE_CXX_FLAGS="-Wno-deprecated-builtins" -DCMAKE_BUILD_TYPE=RelWithDebInfo -DIREE_ENABLE_ASSERTIONS=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DIREE_ENABLE_LLD=ON -DIREE_HAL_DRIVER_CUDA=ON -DIREE_TARGET_BACKEND_CUDA=ON cmake --build ../iree-build/ export CC=clang @@ -50,13 +58,16 @@ python3 configure_bazel.py cd integrations/tensorflow +bazel build -c opt --cxxopt="-Wno-deprecated-builtins" iree_tf_compiler:iree-import-tflite ./symlink_binaries.sh -bazel build -c opt iree_tf_compiler:iree-import-tflite +IREE_IMPORT_TFLITE_PATH="$(pwd)/bazel-bin/iree_tf_compiler/iree-import-tflite" IREE_COMPILE_PATH="${SOURCE_DIR}/iree-build/tools/iree-compile" - TFLITE_MODEL_DIR="${ROOT_DIR}/models/tflite" IREE_MODEL_DIR="${ROOT_DIR}/models/iree" + +rm -rf "${IREE_MODEL_DIR}/cuda" +rm -rf "${IREE_MODEL_DIR}/llvm-cpu" mkdir -p "${IREE_MODEL_DIR}/cuda" mkdir -p "${IREE_MODEL_DIR}/llvm-cpu" @@ -66,7 +77,7 @@ MODEL_NAME=$(basename $i .tflite) echo "Processing ${MODEL_NAME} ..." - ${IREE_IMPORT_TFLITE_PATH} "${TFLITE_MODEL_DIR}/${MODEL_NAME}.tflite" -o "${IREE_MODEL_DIR}/${MODEL_NAME}.mlir" + ${IREE_IMPORT_TFLITE_PATH} "${TFLITE_MODEL_DIR}/${MODEL_NAME}.tflite" -o "${IREE_MODEL_DIR}/${MODEL_NAME}.mlir" || true # Build for CUDA. echo "Compiling ${MODEL_NAME}.vmfb for cuda..." "${IREE_COMPILE_PATH}" \ @@ -76,21 +87,47 @@ --iree-llvm-debug-symbols=false \ --iree-vm-bytecode-module-strip-source-map=true \ --iree-vm-emit-polyglot-zip=false \ - "${IREE_MODEL_DIR}/${MODEL_NAME}.mlir" \ + "${IREE_MODEL_DIR}/${MODEL_NAME}.mlir" \ --o "${IREE_MODEL_DIR}/cuda/${MODEL_NAME}.vmfb" || true + + echo "Compiling ${MODEL_NAME}_fp16.vmfb for cuda..." + "${IREE_COMPILE_PATH}" \ + --iree-input-type=tosa \ + --iree-hal-target-backends=cuda \ + --iree-hal-cuda-llvm-target-arch=sm_80 \ + --iree-flow-demote-f32-to-f16 \ + --iree-llvm-debug-symbols=false \ + --iree-vm-bytecode-module-strip-source-map=true \ + --iree-vm-emit-polyglot-zip=false \ + "${IREE_MODEL_DIR}/${MODEL_NAME}.mlir" \ + --o "${IREE_MODEL_DIR}/cuda/${MODEL_NAME}_fp16.vmfb" || true + # Build for x86. echo "Compiling ${MODEL_NAME}.vmfb for llvm-cpu..." "${IREE_COMPILE_PATH}" \ --iree-input-type=tosa \ - --iree-llvm-target-cpu-features=host \ --iree-hal-target-backends=llvm-cpu \ + --iree-llvm-target-cpu=cascadelake \ + --iree-llvm-target-triple=x86_64-unknown-linux-gnu \ --iree-llvm-debug-symbols=false \ --iree-vm-bytecode-module-strip-source-map=true \ --iree-vm-emit-polyglot-zip=false \ "${IREE_MODEL_DIR}/${MODEL_NAME}.mlir" \ --o "${IREE_MODEL_DIR}/llvm-cpu/${MODEL_NAME}.vmfb" || true - done + echo "Compiling ${MODEL_NAME}_padfuse.vmfb for llvm-cpu..." + "${IREE_COMPILE_PATH}" \ + --iree-input-type=tosa \ + --iree-hal-target-backends=llvm-cpu \ + --iree-llvm-target-cpu=cascadelake \ + --iree-llvm-target-triple=x86_64-unknown-linux-gnu \ + --iree-flow-enable-fuse-padding-into-linalg-consumer-ops \ + --iree-llvm-debug-symbols=false \ + --iree-vm-bytecode-module-strip-source-map=true \ + --iree-vm-emit-polyglot-zip=false \ + "${IREE_MODEL_DIR}/${MODEL_NAME}.mlir" \ + --o "${IREE_MODEL_DIR}/llvm-cpu/${MODEL_NAME}_padfuse.vmfb" || true +done cp "${SOURCE_DIR}/iree-build/tools/iree-benchmark-module" "${ROOT_DIR}/" @@ -119,4 +156,3 @@ --output_dir=${ROOT_DIR}/output --mode=desktop cat "${ROOT_DIR}/output/results.csv" -
diff --git a/build_tools/benchmarks/comparisons/setup_mobile.sh b/build_tools/benchmarks/comparisons/setup_mobile.sh index fb29d6c..e746934 100644 --- a/build_tools/benchmarks/comparisons/setup_mobile.sh +++ b/build_tools/benchmarks/comparisons/setup_mobile.sh
@@ -39,6 +39,14 @@ wget https://storage.googleapis.com/iree-model-artifacts/person_detect.tflite -P "${ROOT_DIR}/models/tflite/" wget https://storage.googleapis.com/iree-model-artifacts/ssd_mobilenet_v2_static_1.0_int8.tflite -P "${ROOT_DIR}/models/tflite/" wget https://storage.googleapis.com/iree-model-artifacts/resnet_v2_101_1_default_1.tflite -P "${ROOT_DIR}/models/tflite/" +wget https://storage.googleapis.com/iree-model-artifacts/asr_conformer_int8.tflite -P "${ROOT_DIR}/models/tflite/" +wget https://storage.googleapis.com/iree-model-artifacts/albert_lite_base_squadv1_1.tflite -P "${ROOT_DIR}/models/tflite/" +wget https://storage.googleapis.com/iree-model-artifacts/ssd_mobilenet_v2_fpnlite_fp32.tflite -P "${ROOT_DIR}/models/tflite/" +wget https://storage.googleapis.com/iree-model-artifacts/ssd_mobilenet_v2_fpnlite_uint8.tflite -P "${ROOT_DIR}/models/tflite/" +wget https://storage.googleapis.com/iree-model-artifacts/inception_v4_299_fp32.tflite -P "${ROOT_DIR}/models/tflite/" +wget https://storage.googleapis.com/iree-model-artifacts/inception_v4_299_uint8.tflite -P "${ROOT_DIR}/models/tflite/" +wget https://storage.googleapis.com/iree-model-artifacts/efficientnet_lite0_fp32_2.tflite -P "${ROOT_DIR}/models/tflite/" +wget https://storage.googleapis.com/iree-model-artifacts/efficientnet_lite0_int8_2.tflite -P "${ROOT_DIR}/models/tflite/" # Build IREE source. SOURCE_DIR=/tmp/github @@ -59,11 +67,11 @@ export CC=clang export CXX=clang++ -python configure_bazel.py +python3 configure_bazel.py cd integrations/tensorflow -./symlink_binaries.sh bazel build -c opt iree_tf_compiler:iree-import-tflite +./symlink_binaries.sh echo "Done building iree-import-tflite" echo @@ -80,9 +88,9 @@ # keep going. for i in $(ls ${ROOT_DIR}/models/tflite/); do MODEL_NAME=$(basename $i .tflite) - echo "Processing ${MODEL_NAME} ..." - ${IREE_IMPORT_TFLITE_PATH} "${TFLITE_MODEL_DIR}/${MODEL_NAME}.tflite" -o "${IREE_MODEL_DIR}/${MODEL_NAME}.mlir" + + ${IREE_IMPORT_TFLITE_PATH} "${TFLITE_MODEL_DIR}/${MODEL_NAME}.tflite" -o "${IREE_MODEL_DIR}/${MODEL_NAME}.mlir" || true echo -e "\tCompiling ${MODEL_NAME}.vmfb for aarch64..." "${IREE_COMPILE_PATH}" \ --iree-input-type=tosa \ @@ -94,6 +102,19 @@ "${IREE_MODEL_DIR}/${MODEL_NAME}.mlir" \ --o "${IREE_MODEL_DIR}/llvm-cpu/${MODEL_NAME}.vmfb" || true + echo -e "\tCompiling ${MODEL_NAME}_padfuse.vmfb for aarch64..." + "${IREE_COMPILE_PATH}" \ + --iree-input-type=tosa \ + --iree-hal-target-backends=llvm-cpu \ + --iree-llvm-target-triple=aarch64-none-linux-android29 \ + --iree-llvm-debug-symbols=false \ + --iree-vm-bytecode-module-strip-source-map=true \ + --iree-vm-emit-polyglot-zip=false \ + "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops" \ + "--iree-llvmcpu-enable-pad-consumer-fusion" \ + "${IREE_MODEL_DIR}/${MODEL_NAME}.mlir" \ + --o "${IREE_MODEL_DIR}/llvm-cpu/${MODEL_NAME}_padfuse.vmfb" || true + echo -e "\tCompiling ${MODEL_NAME}_mmt4d.vmfb for aarch64..." "${IREE_COMPILE_PATH}" \ --iree-input-type=tosa \ @@ -101,6 +122,8 @@ --iree-llvm-target-triple=aarch64-none-linux-android29 \ "--iree-flow-mmt4d-target-options=arch=aarch64 features=+dotprod" \ --iree-llvm-target-cpu-features=+dotprod \ + "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops" \ + "--iree-llvmcpu-enable-pad-consumer-fusion" \ --iree-llvm-debug-symbols=false \ --iree-vm-bytecode-module-strip-source-map=true \ --iree-vm-emit-polyglot-zip=false \ @@ -114,6 +137,8 @@ --iree-llvm-target-triple=aarch64-none-linux-android29 \ "--iree-flow-mmt4d-target-options=arch=aarch64 features=+dotprod" \ --iree-llvm-target-cpu-features=+dotprod \ + "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops" \ + "--iree-llvmcpu-enable-pad-consumer-fusion" \ --iree-flow-enable-conv-img2col-transform \ --iree-llvm-debug-symbols=false \ --iree-vm-bytecode-module-strip-source-map=true \ @@ -121,7 +146,6 @@ "${IREE_MODEL_DIR}/${MODEL_NAME}.mlir" \ --o "${IREE_MODEL_DIR}/llvm-cpu/${MODEL_NAME}_im2col_mmt4d.vmfb" || true - if [[ "${GPU_TYPE}" = "mali" ]]; then echo -e "\tCompiling ${MODEL_NAME}.vmfb for vulkan mali..." "${IREE_COMPILE_PATH}" \ @@ -133,7 +157,20 @@ --iree-vm-emit-polyglot-zip=false \ "${IREE_MODEL_DIR}/${MODEL_NAME}.mlir" \ --o "${IREE_MODEL_DIR}/vulkan/${MODEL_NAME}.vmfb" || true - echo -e "\tCompiling ${MODEL_NAME}_fp16.vmfb for vulkan mali..." + + echo -e "\tCompiling ${MODEL_NAME}_padfuse.vmfb for vulkan mali..." + "${IREE_COMPILE_PATH}" \ + --iree-input-type=tosa \ + --iree-hal-target-backends=vulkan-spirv \ + --iree-vulkan-target-triple=valhall-unknown-android31 \ + --iree-llvm-debug-symbols=false \ + --iree-vm-bytecode-module-strip-source-map=true \ + --iree-vm-emit-polyglot-zip=false \ + --iree-flow-enable-fuse-padding-into-linalg-consumer-ops \ + "${IREE_MODEL_DIR}/${MODEL_NAME}.mlir" \ + --o "${IREE_MODEL_DIR}/vulkan/${MODEL_NAME}_padfuse.vmfb" || true + + echo -e "\tCompiling ${MODEL_NAME}_fp16.vmfb for vulkan mali..." "${IREE_COMPILE_PATH}" \ --iree-input-type=tosa \ --iree-hal-target-backends=vulkan-spirv \ @@ -142,19 +179,9 @@ --iree-llvm-debug-symbols=false \ --iree-vm-bytecode-module-strip-source-map=true \ --iree-vm-emit-polyglot-zip=false \ + --iree-flow-enable-fuse-padding-into-linalg-consumer-ops \ "${IREE_MODEL_DIR}/${MODEL_NAME}.mlir" \ --o "${IREE_MODEL_DIR}/vulkan/${MODEL_NAME}_fp16.vmfb" || true - echo -e "\tCompiling ${MODEL_NAME}_padfuse.vmfb for vulkan mali..." - "${IREE_COMPILE_PATH}" \ - --iree-input-type=tosa \ - --iree-hal-target-backends=vulkan-spirv \ - --iree-vulkan-target-triple=valhall-unknown-android31 \ - --iree-flow-enable-fuse-padding-into-linalg-consumer-ops \ - --iree-llvm-debug-symbols=false \ - --iree-vm-bytecode-module-strip-source-map=true \ - --iree-vm-emit-polyglot-zip=false \ - "${IREE_MODEL_DIR}/${MODEL_NAME}.mlir" \ - --o "${IREE_MODEL_DIR}/vulkan/${MODEL_NAME}_padfuse.vmfb" || true else echo -e "\tCompiling ${MODEL_NAME}.vmfb for vulkan adreno..." "${IREE_COMPILE_PATH}" \ @@ -164,6 +191,7 @@ --iree-llvm-debug-symbols=false \ --iree-vm-bytecode-module-strip-source-map=true \ --iree-vm-emit-polyglot-zip=false \ + --iree-flow-enable-fuse-padding-into-linalg-consumer-ops \ "${IREE_MODEL_DIR}/${MODEL_NAME}.mlir" \ --o "${IREE_MODEL_DIR}/vulkan/${MODEL_NAME}.vmfb" || true fi @@ -183,7 +211,7 @@ -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK?}/build/cmake/android.toolchain.cmake" \ -DIREE_HOST_BINARY_ROOT="${PWD}/../iree-build/install" \ -DANDROID_ABI="arm64-v8a" \ - -DANDROID_PLATFORM="android-29" \ + -DANDROID_PLATFORM="latest" \ -DIREE_BUILD_COMPILER=OFF \ . cmake --build ../iree-build-android/ @@ -211,7 +239,7 @@ cp "${SOURCE_DIR}/tensorflow/bazel-bin/tensorflow/lite/tools/benchmark/benchmark_model" "${ROOT_DIR}/" -echo Pushing benchmarking artifacts to device. +echo "Pushing benchmarking artifacts to device." DEVICE_ROOT_DIR=/data/local/tmp/benchmarks adb shell rm -r "${DEVICE_ROOT_DIR}" || true adb push "${ROOT_DIR}" /data/local/tmp
diff --git a/build_tools/benchmarks/comparisons/simple_commands.py b/build_tools/benchmarks/comparisons/simple_commands.py index 6b5ef6e..6bfca23 100644 --- a/build_tools/benchmarks/comparisons/simple_commands.py +++ b/build_tools/benchmarks/comparisons/simple_commands.py
@@ -135,6 +135,17 @@ driver=driver) commands.append(iree) + model_padfuse_name = self._model_name + "_padfuse" + iree_padfuse_model_path = os.path.join(self._base_dir, "models", "iree", + backend, + model_padfuse_name + ".vmfb") + iree_padfuse = IreeWrapper(self._iree_benchmark_binary_path, + model_padfuse_name, + iree_padfuse_model_path, + self._function_input, + driver=driver) + commands.append(iree_padfuse) + # Test mmt4d only on mobile. if device == "mobile": model_mmt4d_name = self._model_name + "_mmt4d"
diff --git a/build_tools/benchmarks/reporting/parse_tflite_benchmarks.py b/build_tools/benchmarks/reporting/parse_tflite_benchmarks.py index 6396ba8..d08af97 100755 --- a/build_tools/benchmarks/reporting/parse_tflite_benchmarks.py +++ b/build_tools/benchmarks/reporting/parse_tflite_benchmarks.py
@@ -33,8 +33,16 @@ # A map of model name to data type. _MODEL_TO_DATA_TYPE = { + "albert_lite_base_squadv1_1": "fp32", + "albert_lite_base_squadv1_1_fp16": "fp16", "deeplabv3": "fp32", "deeplabv3_fp16": "fp16", + "efficientnet_lite0_fp32_2": "fp32", + "efficientnet_lite0_fp32_2_fp16": "fp16", + "efficientnet_lite0_int8_2": "int8", + "inception_v4_299_fp32": "fp32", + "inception_v4_299_fp32_fp16": "fp16", + "inception_v4_299_uint8": "uint8", "mobilebert-baseline-tf2-quant": "int8", "mobilebert_float_384_gpu": "fp32", "mobilebert_float_384_gpu_fp16": "fp16", @@ -45,6 +53,9 @@ "resnet_v2_101_1_default_1": "fp32", "resnet_v2_101_1_default_1_fp16": "fp16", "ssd_mobilenet_v2_static_1.0_int8": "int8", + "ssd_mobilenet_v2_fpnlite_fp32": "fp32", + "ssd_mobilenet_v2_fpnlite_fp32_fp16": "fp16", + "ssd_mobilenet_v2_fpnlite_uint8": 'uint8', } # Column headers.