Enable LTO optimization by default for runtime releases. (#16811)
This is done by generalizing the primordial `IREE_SIZE_OPTIMIZED` flag
into a `IREE_RUNTIME_OPTIMIZATION_PROFILE` that:
* Can enable 'lto' or 'size'.
* Is scoped to just the runtime targets.
* Minimally does the right thing for 'size' on Linux vs just on Windows
(not the goal of this patch but drops ~300KB from binary sizes when
enabled).
The compile time delta for a clean build of the runtime in full LTO vs
regular mode was not measured precisely but is in the noise (i.e. <1m).
As such, just enabling by default for Python release binaries.
Others can be enabled via: `-DIREE_RUNTIME_OPTIMIZATION_PROFILE=lto`,
which is recommended for benchmarking, etc.
Note that this removes the use of the CMake option
`IREE_SIZE_OPTIMIZED`. It was never even declared properly as an option
and didn't do the same class of thing across Windows/Linux. This has
been fixed and it can be enabled via
`-DIREE_RUNTIME_OPTIMIZATION_PROFILE=size`. Note that as on Windows,
this implies LTO. If old behavior without LTO is desired, we can add a
profile for that.
Progress on #898.
---------
Co-authored-by: Scott Todd <scott.todd0@gmail.com>
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7603dd5..6bd7261 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -123,6 +123,11 @@
# CI coverage is established.
option(BUILD_SHARED_LIBS "Instructs CMake to build libraries as shared if possible" OFF)
+# Control of LTO settings for the runtime build.
+set(IREE_RUNTIME_OPTIMIZATION_PROFILE "" CACHE STRING
+ "Build optimization profile to apply. One of '', 'lto', 'size'.")
+set(IREE_LTO_MODE "full" CACHE STRING "LTO type, 'thin' or 'full'. Only consulted on clang-like compilers.")
+
#-------------------------------------------------------------------------------
# IREE command-line tooling configuration
#-------------------------------------------------------------------------------
diff --git a/build_tools/cmake/build_runtime.sh b/build_tools/cmake/build_runtime.sh
index 7db3542..4fb41ff 100755
--- a/build_tools/cmake/build_runtime.sh
+++ b/build_tools/cmake/build_runtime.sh
@@ -28,6 +28,8 @@
"-DPython3_EXECUTABLE=${IREE_PYTHON3_EXECUTABLE}"
"-DPYTHON_EXECUTABLE=${IREE_PYTHON3_EXECUTABLE}"
"-DCMAKE_BUILD_TYPE=RelWithDebInfo"
+ "-DIREE_RUNTIME_OPTIMIZATION_PROFILE=lto"
+ "-DIREE_FORCE_LTO_COMPAT_BINUTILS_ON_LINUX=ON"
"-DIREE_BUILD_COMPILER=OFF"
)
diff --git a/build_tools/cmake/build_runtime_small.sh b/build_tools/cmake/build_runtime_small.sh
index 88cb2c9..eb456e2 100755
--- a/build_tools/cmake/build_runtime_small.sh
+++ b/build_tools/cmake/build_runtime_small.sh
@@ -22,5 +22,7 @@
-DPYTHON_EXECUTABLE="${IREE_PYTHON3_EXECUTABLE}" \
-DCMAKE_BUILD_TYPE=MinSizeRel \
-DIREE_SIZE_OPTIMIZED=ON \
+ -DIREE_FORCE_LTO_COMPAT_BINUTILS_ON_LINUX=size \
+ -DIREE_FORCE_GCC_BINUTILS_ON_LINUX=ON \
-DIREE_BUILD_COMPILER=OFF
"${CMAKE_BIN?}" --build "${BUILD_DIR}" -- -k 0
diff --git a/build_tools/cmake/external_cc_library.cmake b/build_tools/cmake/external_cc_library.cmake
index 02ae7ff..a49ca65 100644
--- a/build_tools/cmake/external_cc_library.cmake
+++ b/build_tools/cmake/external_cc_library.cmake
@@ -134,6 +134,8 @@
PRIVATE
${_RULE_COPTS}
${IREE_DEFAULT_COPTS}
+ INTERFACE
+ ${IREE_INTERFACE_COPTS}
)
target_link_options(${_NAME}
PRIVATE
diff --git a/build_tools/cmake/iree_cc_binary.cmake b/build_tools/cmake/iree_cc_binary.cmake
index 2e8af2f..c1b6025 100644
--- a/build_tools/cmake/iree_cc_binary.cmake
+++ b/build_tools/cmake/iree_cc_binary.cmake
@@ -124,6 +124,7 @@
target_compile_options(${_NAME}
PRIVATE
${IREE_DEFAULT_COPTS}
+ ${IREE_INTERFACE_COPTS}
${_RULE_COPTS}
)
target_link_options(${_NAME}
diff --git a/build_tools/cmake/iree_cc_library.cmake b/build_tools/cmake/iree_cc_library.cmake
index a9cc1e9..980f8ff 100644
--- a/build_tools/cmake/iree_cc_library.cmake
+++ b/build_tools/cmake/iree_cc_library.cmake
@@ -230,6 +230,8 @@
PRIVATE
${IREE_DEFAULT_COPTS}
${_RULE_COPTS}
+ INTERFACE
+ ${IREE_INTERFACE_COPTS}
)
target_link_options(${_NAME}
PRIVATE
diff --git a/build_tools/cmake/iree_copts.cmake b/build_tools/cmake/iree_copts.cmake
index 10342b4..0f792e1 100644
--- a/build_tools/cmake/iree_copts.cmake
+++ b/build_tools/cmake/iree_copts.cmake
@@ -415,48 +415,109 @@
endif()
#-------------------------------------------------------------------------------
-# Size-optimized build flags
+# Flag sets used different optimization profiles.
#-------------------------------------------------------------------------------
-# TODO(#898): add a dedicated size-constrained configuration.
-if(IREE_SIZE_OPTIMIZED)
- iree_select_compiler_opts(IREE_SIZE_OPTIMIZED_DEFAULT_COPTS
- MSVC_OR_CLANG_CL
- "/GS-"
- "/GL"
- "/Gw"
- "/Gy"
- "/DNDEBUG"
- "/Os"
- "/Oy"
- "/Zi"
- "/c"
+iree_select_compiler_opts(IREE_LTO_COPTS
+ CLANG
+ "-flto=${IREE_LTO_MODE}"
+ GCC
+ "-flto"
+ "-fuse-linker-plugin"
+ MSVC_OR_CLANG_CL
+ "/GL"
+)
+
+iree_select_compiler_opts(IREE_LTO_LINKOPTS
+ CLANG
+ "-flto=${IREE_LTO_MODE}"
+ GCC
+ "-flto"
+ MSVC_OR_CLANG_CL
+ "-LTCG"
+)
+
+iree_select_compiler_opts(IREE_SIZE_OPTIMIZED_DEFAULT_COPTS
+ MSVC_OR_CLANG_CL
+ "/GS-"
+ "/Gw"
+ "/Gy"
+ "/DNDEBUG"
+ "/Os"
+ "/Oy"
+ "/Zi"
+ "/c"
+)
+iree_select_compiler_opts(IREE_SIZE_OPTIMIZED_DEFAULT_LINKOPTS
+ MSVC_OR_CLANG_CL
+ "-DEBUG:FULL"
+ "-opt:ref,icf"
+)
+
+# Function which enables various optimization options for a sub-tree by
+# modifying the IREE_DEFAULT_COPTS and IREE_DEFAULT_LINKOPTS that targets
+# created after this point use.
+#
+# Available profiles:
+# "lto": Applies options to enable link time code generation.
+# "size": Applies a variety of options to minimize the size of the runtime,
+# generally at the expense of features but not performance. This implies
+# LTO.
+#
+# Parameters:
+# PROFILE_NAME: Name of a supported profile or falsey for none.
+# SIZE_INTERFACE_COPTS: Additional IREE_INTERFACE_COPTS to add for the
+# "size" profile.
+function(iree_enable_optimization_options)
+ cmake_parse_arguments(
+ _RULE
+ ""
+ "PROFILE_NAME"
+ "SIZE_INTERFACE_COPTS"
+ ${ARGN}
)
- iree_select_compiler_opts(IREE_SIZE_OPTIMIZED_DEFAULT_LINKOPTS
- MSVC_OR_CLANG_CL
- "-DEBUG:FULL"
- "-LTCG"
- "-opt:ref,icf"
+
+ if(NOT _RULE_PROFILE_NAME)
+ # Do nothing.
+ return()
+ endif()
+
+ set(_ADDL_COPTS)
+ set(_ADDL_INTERFACE_COPTS)
+ set(_ADDL_LINKOPTS)
+
+ if(_RULE_PROFILE_NAME STREQUAL "lto")
+ set(_ADDL_COPTS ${IREE_LTO_COPTS})
+ set(_ADDL_LINKOPTS ${IREE_LTO_LINKOPTS})
+ elseif(_RULE_PROFILE_NAME STREQUAL "size")
+ # Size optimized assumes LTO.
+ # Size optimized often also elides logging and various status reporting,
+ # which can result in unused-but-set-variable style warnings. Disable those.
+ iree_select_compiler_opts(_ADDL_COPTS
+ ALL
+ ${IREE_LTO_COPTS}
+ ${IREE_SIZE_OPTIMIZED_DEFAULT_COPTS}
+ CLANG_OR_GCC
+ -Wno-unused-but-set-variable
+ )
+ set(_ADDL_INTERFACE_COPTS "${_RULE_SIZE_INTERFACE_COPTS}")
+ set(_ADDL_LINKOPTS
+ ${IREE_LTO_LINKOPTS}
+ ${IREE_SIZE_OPTIMIZED_DEFAULT_LINKOPTS}
+ )
+ else()
+ message(FATAL_ERROR "Unrecognized size optimization profile name '${_RULE_PROFILE_NAME}'. Expected one of 'lto', 'size'")
+ endif()
+
+ message(STATUS "Enabled optimization profile '${_RULE_PROFILE_NAME}' for targets under ${CMAKE_CURRENT_SOURCE_DIR}: \n"
+ " COPTS: ${_ADDL_COPTS}\n"
+ " INTERFACE COPTS: ${_ADDL_INTERFACE_COPTS}\n"
+ " LINKOPTS: ${_ADDL_LINKOPTS}"
)
- # TODO(#898): make this only impact the runtime (IREE_RUNTIME_DEFAULT_...).
- # These flags come from iree/base/config.h:
- set(IREE_DEFAULT_COPTS
- "${IREE_DEFAULT_COPTS}"
- "${IREE_SIZE_OPTIMIZED_DEFAULT_COPTS}"
- "-DIREE_STATUS_MODE=0"
- "-DIREE_STATISTICS_ENABLE=0"
- "-DIREE_HAL_MODULE_STRING_UTIL_ENABLE=0"
- "-DIREE_HAL_COMMAND_BUFFER_VALIDATION_ENABLE=0"
- "-DIREE_VM_BACKTRACE_ENABLE=0"
- "-DIREE_VM_BYTECODE_VERIFICATION_ENABLE=0"
- "-DIREE_VM_EXT_F32_ENABLE=0"
- "-DIREE_VM_EXT_F64_ENABLE=0"
- )
- set(IREE_DEFAULT_LINKOPTS
- "${IREE_DEFAULT_LINKOPTS}"
- "${IREE_SIZE_OPTIMIZED_DEFAULT_LINKOPTS}"
- )
-endif()
+ set(IREE_DEFAULT_COPTS "${IREE_DEFAULT_COPTS};${_ADDL_COPTS}" PARENT_SCOPE)
+ set(IREE_INTERFACE_COPTS "${IREE_INTERFACE_COPTS};${_ADDL_INTERFACE_COPTS}" PARENT_SCOPE)
+ set(IREE_DEFAULT_LINKOPTS "${IREE_DEFAULT_LINKOPTS};${_ADDL_LINKOPTS}" PARENT_SCOPE)
+endfunction()
#-------------------------------------------------------------------------------
# Compiler: Clang/LLVM
diff --git a/build_tools/cmake/iree_setup_toolchain.cmake b/build_tools/cmake/iree_setup_toolchain.cmake
index d046ddd..a38aef0 100644
--- a/build_tools/cmake/iree_setup_toolchain.cmake
+++ b/build_tools/cmake/iree_setup_toolchain.cmake
@@ -22,6 +22,49 @@
# explicitly or through global properties. Please don't add to it without
# a very good reason.
macro(iree_setup_toolchain)
+ #-------------------------------------------------------------------------------
+ # Force LTO compatible tools.
+ #-------------------------------------------------------------------------------
+
+ # On older (i.e. gcc 9.x era) systems, the compiler and system toolchains
+ # were not compatible for general LTO use, and they were further not
+ # compatible amongst themselves.
+ # As an aid to CIs, we provide an option which will force toolchain specific
+ # binutils and linkers only if running on Linux. This lets us use the same
+ # runtime build scripts across platforms without further shenanigans.
+ # This is a hack and should be rolled back once 2020 era systems are not in
+ # use.
+ # Users should not use this. If they have such an old system, configure CMake
+ # to use toolchain specific tools.
+ option(IREE_FORCE_LTO_COMPAT_BINUTILS_ON_LINUX "Forces use of toolchain specific LTO compatible binutils if on Linux" OFF)
+ mark_as_advanced(IREE_FORCE_LTO_COMPAT_BINUTILS_ON_LINUX)
+ if(IREE_FORCE_LTO_COMPAT_BINUTILS_ON_LINUX AND CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+ message(STATUS "Running on an old Linux with -DIREE_FORCE_LTO_COMPAT_BINUTILS_ON_LINUX: Forcing llvm-ar, llvm-nm, llvm-ranlib, and ld.lld")
+ find_program(IREE_CMAKE_LTO_AR llvm-ar REQUIRED)
+ find_program(IREE_CMAKE_LTO_RANLIB llvm-ranlib REQUIRED)
+ find_program(IREE_CMAKE_LTO_NM llvm-nm REQUIRED)
+ set(IREE_USE_LINKER "lld")
+ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+ message(STATUS "Running on an old Linux with -DIREE_FORCE_LTO_COMPAT_BINUTILS_ON_LINUX: Forcing gcc-ar, gcc-nm, gcc-ranlib, and ld.gold")
+ find_program(IREE_CMAKE_LTO_AR gcc-ar REQUIRED)
+ find_program(IREE_CMAKE_LTO_RANLIB gcc-ranlib REQUIRED)
+ find_program(IREE_CMAKE_LTO_NM gcc-nm REQUIRED)
+ set(IREE_USE_LINKER "gold")
+ endif()
+
+ set(IREE_ENABLE_LLD OFF)
+ find_program(IREE_CMAKE_LTO_LD ld.${IREE_USE_LINKER} REQUIRED)
+ mark_as_advanced(IREE_CMAKE_LTO_AR IREE_CMAKE_LTO_RANLIB IREE_CMAKE_LTO_NM IREE_CMAKE_LTO_LD)
+
+ set(CMAKE_AR ${IREE_CMAKE_LTO_AR} CACHE FILEPATH "Forcing LTO ar instead of ar" FORCE)
+ set(CMAKE_AR ${IREE_CMAKE_LTO_AR})
+ set(CMAKE_NM ${IREE_CMAKE_LTO_NM} CACHE FILEPATH "Forcing LTO nm instead of nm" FORCE)
+ set(CMAKE_NM ${IREE_CMAKE_LTO_NM})
+ set(CMAKE_RANLIB ${IREE_CMAKE_LTO_RANLIB} CACHE FILEPATH "Forcing LTO ranlib instead of ranlib" FORCE)
+ set(CMAKE_RANLIB ${IREE_CMAKE_LTO_RANLIB})
+ endif()
+
#-----------------------------------------------------------------------------
# Supports dynamic library loading.
#-----------------------------------------------------------------------------
diff --git a/experimental/regression_suite/external_test_suite/config_cpu_llvm_sync.json b/experimental/regression_suite/external_test_suite/config_cpu_llvm_sync.json
index 946c152..d578270 100644
--- a/experimental/regression_suite/external_test_suite/config_cpu_llvm_sync.json
+++ b/experimental/regression_suite/external_test_suite/config_cpu_llvm_sync.json
@@ -794,9 +794,11 @@
"test_clip_default_int8_min",
"test_clip_default_int8_min_expanded",
"test_constant_pad",
+ "test_constantofshape_float_ones",
"test_constantofshape_int_shape_zero",
"test_constantofshape_int_zeros",
"test_div_uint8",
+ "test_dropout_default_mask_ratio",
"test_elu_default",
"test_gather_0",
"test_gather_1",
@@ -834,13 +836,14 @@
"test_pow_types_float32_uint64",
"test_qlinearconv",
"test_qlinearmatmul_2D_int8_float16",
+ "test_qlinearmatmul_2D_int8_float32",
"test_qlinearmatmul_3D_int8_float16",
"test_qlinearmatmul_3D_int8_float32",
"test_qlinearmatmul_3D_uint8_float16",
- "test_qlinearmatmul_2D_int8_float32",
"test_qlinearmatmul_3D_uint8_float32",
"test_quantizelinear",
"test_range_int32_type_negative_delta",
+ "test_reduce_min_empty_set",
"test_scatter_elements_with_negative_indices",
"test_selu_default",
"test_shape",
diff --git a/experimental/regression_suite/external_test_suite/config_gpu_vulkan.json b/experimental/regression_suite/external_test_suite/config_gpu_vulkan.json
index e0ceda7..2b21bef 100644
--- a/experimental/regression_suite/external_test_suite/config_gpu_vulkan.json
+++ b/experimental/regression_suite/external_test_suite/config_gpu_vulkan.json
@@ -795,15 +795,17 @@
"test_castlike_FLOAT_to_BFLOAT16_expanded",
"test_castlike_FLOAT_to_DOUBLE",
"test_castlike_FLOAT_to_DOUBLE_expanded",
- "test_clip_default_int8_min",
- "test_clip_default_int8_min_expanded",
"test_clip_default_int8_inbounds",
"test_clip_default_int8_max",
"test_clip_default_int8_max_expanded",
+ "test_clip_default_int8_min",
+ "test_clip_default_int8_min_expanded",
"test_constant_pad",
+ "test_constantofshape_float_ones",
"test_constantofshape_int_shape_zero",
"test_constantofshape_int_zeros",
"test_div_uint8",
+ "test_dropout_default_mask_ratio",
"test_elu_default",
"test_gather_0",
"test_gather_1",
@@ -840,12 +842,12 @@
"test_pow_types_float32_uint32",
"test_pow_types_float32_uint64",
"test_qlinearconv",
+ "test_qlinearmatmul_2D_int8_float16",
+ "test_qlinearmatmul_2D_int8_float32",
"test_qlinearmatmul_3D_int8_float16",
"test_qlinearmatmul_3D_int8_float32",
"test_qlinearmatmul_3D_uint8_float16",
"test_qlinearmatmul_3D_uint8_float32",
- "test_qlinearmatmul_2D_int8_float16",
- "test_qlinearmatmul_2D_int8_float32",
"test_quantizelinear",
"test_range_int32_type_negative_delta",
"test_scatter_elements_with_negative_indices",
diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt
index 8ee250b..4d48d72 100644
--- a/runtime/CMakeLists.txt
+++ b/runtime/CMakeLists.txt
@@ -4,6 +4,22 @@
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+iree_enable_optimization_options(
+ PROFILE_NAME
+ "${IREE_RUNTIME_OPTIMIZATION_PROFILE}"
+ # TODO: These options should be separated between those required as
+ # INTERFACE and those that can be private (i.e. to the runtime).
+ SIZE_INTERFACE_COPTS
+ "-DIREE_STATUS_MODE=0"
+ "-DIREE_STATISTICS_ENABLE=0"
+ "-DIREE_HAL_MODULE_STRING_UTIL_ENABLE=0"
+ "-DIREE_HAL_COMMAND_BUFFER_VALIDATION_ENABLE=0"
+ "-DIREE_VM_BACKTRACE_ENABLE=0"
+ "-DIREE_VM_BYTECODE_VERIFICATION_ENABLE=0"
+ "-DIREE_VM_EXT_F32_ENABLE=0"
+ "-DIREE_VM_EXT_F64_ENABLE=0"
+)
+
# Must include runtime plugins before processing the runtime sources so that
# the static link list can be set.
iree_include_cmake_plugin_dirs(
diff --git a/runtime/setup.py b/runtime/setup.py
index cf77f43..cbaee8e 100644
--- a/runtime/setup.py
+++ b/runtime/setup.py
@@ -72,6 +72,10 @@
"*** Tracy tools not enabled (enable with IREE_RUNTIME_BUILD_TRACY_TOOLS=ON)",
file=sys.stderr,
)
+# Default to LTO builds for our python releases.
+IREE_RUNTIME_OPTIMIZATION_PROFILE = os.getenv(
+ "IREE_RUNTIME_OPTIMIZATION_PROFILE", "lto"
+)
def check_pip_version():
@@ -264,6 +268,7 @@
cmake_args = [
"-GNinja",
"--log-level=VERBOSE",
+ f"-DIREE_RUNTIME_OPTIMIZATION_PROFILE={IREE_RUNTIME_OPTIMIZATION_PROFILE}",
"-DIREE_BUILD_PYTHON_BINDINGS=ON",
"-DIREE_BUILD_COMPILER=OFF",
"-DIREE_BUILD_SAMPLES=OFF",