Merge branch 'main' into google-to-main
diff --git a/.github/workflows/publish_docs.yml b/.github/workflows/publish_docs.yml
index 6f063ce..13581ea 100644
--- a/.github/workflows/publish_docs.yml
+++ b/.github/workflows/publish_docs.yml
@@ -17,7 +17,8 @@
 on:
   push:
     branches:
-      - master
+      # This will be switched to main after shifting to a GitHub-first workflow.
+      - google
 
 jobs:
   linux:
diff --git a/.github/workflows/synchronize_submodules.yml b/.github/workflows/synchronize_submodules.yml
index 20d458f..cd0aed6 100644
--- a/.github/workflows/synchronize_submodules.yml
+++ b/.github/workflows/synchronize_submodules.yml
@@ -41,7 +41,7 @@
       - name: Committing updates
         if: env.has_diff == 'true'
         run: |
-          git config --local user.email "noreply+action@github.com"
+          git config --local user.email "iree-github-actions-bot@google.com"
           git config --local user.name "Submodule Synchronize Action"
           git commit -am "Synchronize submodules"
       - name: Pushing changes
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 501cd0a..004bed5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -33,6 +33,7 @@
 # Project component configuration
 #-------------------------------------------------------------------------------
 
+# LINT.IfChange(iree_options)
 option(IREE_ENABLE_RUNTIME_TRACING "Enables instrumented runtime tracing." OFF)
 option(IREE_ENABLE_MLIR "Enables MLIR/LLVM dependencies." ON)
 option(IREE_ENABLE_EMITC "Enables MLIR EmitC dependencies." OFF)
@@ -50,6 +51,10 @@
   CACHE STRING "Semicolon-separated list of HAL drivers to build, or \"all\"." FORCE)
 set(IREE_TARGET_BACKENDS_TO_BUILD "all"
   CACHE STRING "Semicolon-separated list of target backends to build, or \"all\"." FORCE)
+# LINT.ThenChange(
+#   https://github.com/google/iree/tree/master/build_tools/cmake/iree_cross_compile.cmake:iree_cross_compile_options,
+#   https://github.com/google/iree/tree/master/build_tools/cmake/iree_cross_compile.cmake:iree_cross_compile_invoke
+# )
 
 if(${IREE_BUILD_SAMPLES} OR ${IREE_BUILD_EXPERIMENTAL})
   set(IREE_BUILD_COMPILER ON CACHE BOOL "Build the IREE compiler for sample projects." FORCE)
@@ -136,6 +141,44 @@
 )
 
 #-------------------------------------------------------------------------------
+# Cross compiling configuration
+#-------------------------------------------------------------------------------
+
+if(CMAKE_CROSSCOMPILING)
+  if(CMAKE_HOST_SYSTEM_NAME STREQUAL Windows)
+    message(FATAL_ERROR "Cross compilation with Windows host system is not supported yet")
+  endif()
+
+  message(STATUS "Detected cross compilation mode; configuring IREE on host...")
+
+  # C/C++ compilers for host compilation.
+  # Note: we need to explicitly set this because IREE does not work well with
+  # GCC at the moment: https://github.com/google/iree/issues/1269
+  set(IREE_HOST_C_COMPILER "$ENV{IREE_HOST_C_COMPILER}" CACHE FILEPATH "C compiler for host compilation")
+  set(IREE_HOST_CXX_COMPILER "$ENV{IREE_HOST_CXX_COMPILER}" CACHE FILEPATH "C++ compiler for host compilation")
+
+  # Master configuration for the binary directory containing all artifacts
+  # compiled for host.
+  if(NOT IREE_HOST_BINARY_ROOT)
+    set(IREE_HOST_BINARY_ROOT "${CMAKE_CURRENT_BINARY_DIR}/host" CACHE FILEPATH "directory containing host artifacts")
+  endif()
+
+  set(IREE_HOST_BUILD_COMPILER ON) # For iree-translate
+  set(IREE_HOST_ENABLE_LLVM ON)    # For iree-tblgen
+
+  # Set the host build directory for LLVM to our directory. Otherwise it will
+  # follow its own convention.
+  set(LLVM_NATIVE_BUILD "${IREE_HOST_BINARY_ROOT}/third_party/llvm-project/llvm")
+
+  include(iree_cross_compile)
+
+  # Use another CMake invocation to configure a build for host.
+  iree_create_configuration(HOST)
+
+  message(STATUS "Done configuring IREE on host in ${IREE_HOST_BINARY_ROOT}")
+endif()
+
+#-------------------------------------------------------------------------------
 # IREE utility definitions
 #-------------------------------------------------------------------------------
 
@@ -291,6 +334,24 @@
 add_subdirectory(build_tools/third_party/renderdoc_api EXCLUDE_FROM_ALL)
 add_subdirectory(build_tools/third_party/vulkan_extensionlayer EXCLUDE_FROM_ALL)
 
+if(CMAKE_CROSSCOMPILING)
+  # We need flatc to generate some source code. When cross-compiling, we need
+  # to make sure the flatc binary is configured under host environment.
+  iree_declare_host_excutable(flatc BUILDONLY)
+
+  # Set the FLATBUFFERS_FLATC_EXECUTABLE. It controls where to find the flatc
+  # binary in BuildFlatBuffers().
+  iree_get_executable_path(FLATBUFFERS_FLATC_EXECUTABLE flatc)
+
+  # Add a custom target to copy the flatc to the binary directory.
+  add_custom_target(iree_host_flatc
+    COMMAND "${CMAKE_COMMAND}" -E copy_if_different
+      "${IREE_HOST_BINARY_ROOT}/third_party/flatbuffers/flatc" "${IREE_HOST_BINARY_ROOT}/bin"
+    DEPENDS iree_host_build_flatc
+    COMMENT "Installing host flatc..."
+  )
+endif()
+
 if(${IREE_BUILD_COMPILER})
   add_subdirectory(build_tools/third_party/tensorflow/tensorflow/compiler/mlir/xla EXCLUDE_FROM_ALL)
 endif()
diff --git a/SUBMODULE_VERSIONS b/SUBMODULE_VERSIONS
index 82239be..b9f7575 100644
--- a/SUBMODULE_VERSIONS
+++ b/SUBMODULE_VERSIONS
@@ -3,7 +3,7 @@
 4c13807b7d43ff0946b7ffea0ae3aee9e611d778 third_party/dear_imgui
 a5d9d0f7d368054fd1691aedf1db4116efcc233e third_party/flatbuffers
 f2fb48c3b3d79a75a88a99fba6576b25d42ec528 third_party/googletest
-69d2fa9ed1c1aba6f473feb03cad257e69a0cf52 third_party/llvm-project
+f0bab7875e78e01c149d12302dcc4b6d4c43e25c third_party/llvm-project
 17b12a4481daa150e2d1ea3ada086b551b856707 third_party/marl
 67f3ccebee84f3488b46a8d3ac005178c52ff264 third_party/mlir-emitc
 80d452484c5409444b0ec19383faa84bb7a4d351 third_party/pybind11
@@ -11,7 +11,7 @@
 b73f111094da3e380a1774b56b15f16c90ae8e23 third_party/sdl2
 f8bf11a0253a32375c32cad92c841237b96696c0 third_party/spirv_headers
 57eb48aed36160c4876bc8310d9ca84d42ee9e2a third_party/swiftshader
-d04bf998887fffe640ec8cacc9094574cd596f99 third_party/tensorflow
+b3319125a036aea6b7bbc0d1c50753e7be73be27 third_party/tensorflow
 864d86e8b6d21449474db5e9313dbff90aa9c24f third_party/tracy
 8a457f8552d8d47ce3a96ed80a714ff6396f8ad8 third_party/vulkan_extensionlayer
 9bd3f561bcee3f01d22912de10bb07ce4e23d378 third_party/vulkan_headers
diff --git a/build_tools/bazel/third_party_import/llvm-project/overlay/llvm/BUILD.bazel b/build_tools/bazel/third_party_import/llvm-project/overlay/llvm/BUILD.bazel
index 50ff746..9a98f69 100644
--- a/build_tools/bazel/third_party_import/llvm-project/overlay/llvm/BUILD.bazel
+++ b/build_tools/bazel/third_party_import/llvm-project/overlay/llvm/BUILD.bazel
@@ -678,6 +678,40 @@
     ]),
 )
 
+# TODO(b/159809163): autogenerate this after enabling release-mode ML
+# InlineAdvisor
+cc_library(
+    name = "Analysis",
+    srcs = glob(
+        [
+            "lib/Analysis/*.c",
+            "lib/Analysis/*.cpp",
+            "lib/Analysis/*.inc",
+            "include/llvm/Transforms/Utils/Local.h",
+            "include/llvm/Transforms/Scalar.h",
+            "lib/Analysis/*.h",
+        ],
+        exclude = [
+            "lib/Analysis/MLInlineAdvisor.cpp",
+            "lib/Analysis/ReleaseModeModelRunner.cpp",
+        ],
+    ),
+    hdrs = glob([
+        "include/llvm/Analysis/*.h",
+        "include/llvm/Analysis/*.def",
+        "include/llvm/Analysis/*.inc",
+    ]),
+    copts = llvm_copts,
+    deps = [
+        ":BinaryFormat",
+        ":Core",
+        ":Object",
+        ":ProfileData",
+        ":Support",
+        ":config",
+    ],
+)
+
 ########################## Begin generated content ##########################
 cc_library(
     name = "AArch64AsmParser",
@@ -1394,32 +1428,6 @@
 )
 
 cc_library(
-    name = "Analysis",
-    srcs = glob([
-        "lib/Analysis/*.c",
-        "lib/Analysis/*.cpp",
-        "lib/Analysis/*.inc",
-        "include/llvm/Transforms/Utils/Local.h",
-        "include/llvm/Transforms/Scalar.h",
-        "lib/Analysis/*.h",
-    ]),
-    hdrs = glob([
-        "include/llvm/Analysis/*.h",
-        "include/llvm/Analysis/*.def",
-        "include/llvm/Analysis/*.inc",
-    ]),
-    copts = llvm_copts,
-    deps = [
-        ":BinaryFormat",
-        ":Core",
-        ":Object",
-        ":ProfileData",
-        ":Support",
-        ":config",
-    ],
-)
-
-cc_library(
     name = "AsmParser",
     srcs = glob([
         "lib/AsmParser/*.c",
diff --git a/build_tools/cmake/build_docs.sh b/build_tools/cmake/build_docs.sh
index d44ea58..3c5ce15 100755
--- a/build_tools/cmake/build_docs.sh
+++ b/build_tools/cmake/build_docs.sh
@@ -51,6 +51,7 @@
 
 # Update op_coverage.md
 scripts/update_op_coverage.py ${BUILD_DIR}
+scripts/update_e2e_coverage.py ${BUILD_DIR}
 
 # Copy a curated list of docs to publish. This is expected to cover all docs
 # under docs/ after they are refreshed.
@@ -73,6 +74,7 @@
 cp docs/GetStarted/getting_started_linux_vulkan.md ${BUILD_DIR}/doc/GetStarted/
 cp docs/GetStarted/getting_started_macos_bazel.md ${BUILD_DIR}/doc/GetStarted/
 cp docs/GetStarted/getting_started_macos_cmake.md ${BUILD_DIR}/doc/GetStarted/
+cp docs/GetStarted/getting_started_android_cmake.md ${BUILD_DIR}/doc/GetStarted/
 cp docs/GetStarted/getting_started_python.md ${BUILD_DIR}/doc/GetStarted/
 cp docs/GetStarted/generic_vulkan_env_setup.md ${BUILD_DIR}/doc/GetStarted/
 cp docs/GetStarted/cmake_options_and_variables.md ${BUILD_DIR}/doc/GetStarted/
diff --git a/build_tools/cmake/flatbuffer_cc_library.cmake b/build_tools/cmake/flatbuffer_cc_library.cmake
index 6ad2995..febf234 100644
--- a/build_tools/cmake/flatbuffer_cc_library.cmake
+++ b/build_tools/cmake/flatbuffer_cc_library.cmake
@@ -95,18 +95,24 @@
     set(FLATBUFFERS_FLATC_SCHEMA_EXTRA_ARGS ${_RULE_FLATC_ARGS})
   endif()
 
+  set(_GEN_TARGET "${_NAME}_gen")
+
   build_flatbuffers(
     "${_RULE_SRCS}"
     "${IREE_ROOT_DIR}"
-    "${_NAME}_gen"  # custom_target_name
-    "${_RULE_DEPS}" # additional_dependencies
+    "${_GEN_TARGET}" # custom_target_name
+    "${_RULE_DEPS}"  # additional_dependencies
     "${CMAKE_CURRENT_BINARY_DIR}" # generated_include_dir
     "${CMAKE_CURRENT_BINARY_DIR}" # binary_schemas_dir
     "" # copy_text_schemas_dir
   )
 
+  # Add dependency on flatc explicitly. This is needed for cross-compiling
+  # where flatc comes from another CMake invocation for host.
+  iree_add_executable_dependencies(${_GEN_TARGET} flatc)
+
   add_library(${_NAME} INTERFACE)
-  add_dependencies(${_NAME} ${_NAME}_gen)
+  add_dependencies(${_NAME} ${_GEN_TARGET})
   target_include_directories(${_NAME}
     INTERFACE
       "$<BUILD_INTERFACE:${IREE_COMMON_INCLUDE_DIRS}>"
diff --git a/build_tools/cmake/iree_bytecode_module.cmake b/build_tools/cmake/iree_bytecode_module.cmake
index f8002ec..64f8fd0 100644
--- a/build_tools/cmake/iree_bytecode_module.cmake
+++ b/build_tools/cmake/iree_bytecode_module.cmake
@@ -56,23 +56,24 @@
   if(DEFINED _RULE_TRANSLATE_TOOL)
     set(_TRANSLATE_TOOL ${_RULE_TRANSLATE_TOOL})
   else()
-    set(_TRANSLATE_TOOL "iree_tools_iree-translate")
+    set(_TRANSLATE_TOOL "iree-translate")
   endif()
 
-  # Resolve the executable binary path from the target name.
-  set(_TRANSLATE_TOOL_EXECUTABLE $<TARGET_FILE:${_TRANSLATE_TOOL}>)
+  iree_get_executable_path(_TRANSLATE_TOOL_EXECUTABLE ${_TRANSLATE_TOOL})
 
   set(_ARGS "${_FLAGS}")
   list(APPEND _ARGS "${CMAKE_CURRENT_SOURCE_DIR}/${_RULE_SRC}")
   list(APPEND _ARGS "-o")
   list(APPEND _ARGS "${_RULE_NAME}.module")
 
+  # Depending on the binary instead of the target here given we might not have
+  # a target in this CMake invocation when cross-compiling.
   add_custom_command(
     OUTPUT "${_RULE_NAME}.module"
     COMMAND ${_TRANSLATE_TOOL_EXECUTABLE} ${_ARGS}
     # Changes to either the translation tool or the input source should
     # trigger rebuilding.
-    DEPENDS ${_TRANSLATE_TOOL} ${_RULE_SRC}
+    DEPENDS ${_TRANSLATE_TOOL_EXECUTABLE} ${_RULE_SRC}
   )
 
   if(_RULE_TESTONLY)
diff --git a/build_tools/cmake/iree_cc_binary.cmake b/build_tools/cmake/iree_cc_binary.cmake
index b4d6eff..6b3653a 100644
--- a/build_tools/cmake/iree_cc_binary.cmake
+++ b/build_tools/cmake/iree_cc_binary.cmake
@@ -30,6 +30,8 @@
 # COPTS: List of private compile options
 # DEFINES: List of public defines
 # LINKOPTS: List of link options
+# TESTONLY: for testing; won't compile when tests are disabled
+# HOSTONLY: host only; compile using host toolchain when cross-compiling
 #
 # Note:
 # By default, iree_cc_binary will always create a binary named iree_${NAME}.
@@ -58,7 +60,7 @@
 function(iree_cc_binary)
   cmake_parse_arguments(
     _RULE
-    "TESTONLY"
+    "HOSTONLY;TESTONLY"
     "NAME;OUT"
     "SRCS;COPTS;DEFINES;LINKOPTS;DATA;DEPS"
     ${ARGN}
@@ -68,6 +70,14 @@
     return()
   endif()
 
+  if(_RULE_HOSTONLY AND CMAKE_CROSSCOMPILING)
+    # The binary is marked as host only. We need to declare the rules for
+    # generating them under host configuration so cross-compiling towards
+    # target we can still have this binary.
+    iree_declare_host_excutable(${_RULE_NAME})
+    return()
+  endif()
+
   # Prefix the library with the package name, so we get: iree_package_name
   iree_package_name(_PACKAGE_NAME)
   set(_NAME "${_PACKAGE_NAME}_${_RULE_NAME}")
@@ -126,6 +136,11 @@
   # Track target and deps, use in iree_complete_binary_link_options() later.
   set_property(GLOBAL APPEND PROPERTY _IREE_CC_BINARY_NAMES "${_NAME}")
   set_property(TARGET ${_NAME} PROPERTY DIRECT_DEPS ${_RULE_DEPS})
+
+  install(TARGETS ${_NAME}
+          RENAME ${_RULE_NAME}
+          COMPONENT ${_RULE_NAME}
+          RUNTIME DESTINATION bin)
 endfunction()
 
 # Lists all transitive dependencies of DIRECT_DEPS in TRANSITIVE_DEPS.
diff --git a/build_tools/cmake/iree_cc_embed_data.cmake b/build_tools/cmake/iree_cc_embed_data.cmake
index d3644ed..7eeac23 100644
--- a/build_tools/cmake/iree_cc_embed_data.cmake
+++ b/build_tools/cmake/iree_cc_embed_data.cmake
@@ -79,10 +79,12 @@
     list(APPEND _ARGS "${SRC}")
   endforeach(SRC)
 
+  iree_get_executable_path(_EXE_PATH generate_cc_embed_data)
+
   add_custom_command(
     OUTPUT "${_RULE_H_FILE_OUTPUT}" "${_RULE_CC_FILE_OUTPUT}"
-    COMMAND generate_cc_embed_data ${_ARGS}
-    DEPENDS generate_cc_embed_data ${_RULE_SRCS} ${_RULE_GENERATED_SRCS}
+    COMMAND ${_EXE_PATH} ${_ARGS}
+    DEPENDS ${_EXE_PATH} ${_RULE_SRCS} ${_RULE_GENERATED_SRCS}
   )
 
   if(_RULE_TESTONLY)
diff --git a/build_tools/cmake/iree_copts.cmake b/build_tools/cmake/iree_copts.cmake
index 1bd8584..542536b 100644
--- a/build_tools/cmake/iree_copts.cmake
+++ b/build_tools/cmake/iree_copts.cmake
@@ -12,8 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+#-------------------------------------------------------------------------------
+# Abseil configuration
+#-------------------------------------------------------------------------------
+
 include(AbseilConfigureCopts)
 
+# By default Abseil strips string literals on mobile platforms, which means
+# we cannot run IREE binaries via command-line with proper options. Turn off
+# the stripping.
+# TODO: we might still want to strip when compiling IREE into Android Java apps.
+if(CMAKE_CROSSCOMPILING AND "${CMAKE_SYSTEM_NAME}" MATCHES "Android")
+  add_definitions(-DABSL_FLAGS_STRIP_NAMES=0)
+endif()
+
 #-------------------------------------------------------------------------------
 # C++ used within IREE
 #-------------------------------------------------------------------------------
@@ -92,13 +104,19 @@
 #-------------------------------------------------------------------------------
 
 set(FLATBUFFERS_BUILD_TESTS OFF CACHE BOOL "" FORCE)
-set(FLATBUFFERS_INSTALL OFF CACHE BOOL "" FORCE)
-set(FLATBUFFERS_BUILD_FLATC ON CACHE BOOL "" FORCE)
 set(FLATBUFFERS_BUILD_FLATHASH OFF CACHE BOOL "" FORCE)
 set(FLATBUFFERS_BUILD_GRPCTEST OFF CACHE BOOL "" FORCE)
+set(FLATBUFFERS_INSTALL OFF CACHE BOOL "" FORCE)
 set(FLATBUFFERS_INCLUDE_DIRS
   "${PROJECT_SOURCE_DIR}/third_party/flatbuffers/include/"
 )
+
+if(CMAKE_CROSSCOMPILING)
+  set(FLATBUFFERS_BUILD_FLATC OFF CACHE BOOL "" FORCE)
+else()
+  set(FLATBUFFERS_BUILD_FLATC ON CACHE BOOL "" FORCE)
+endif()
+
 iree_select_compiler_opts(FLATBUFFERS_COPTS
   CLANG
     # Flatbuffers has a bunch of incorrect documentation annotations.
@@ -151,7 +169,9 @@
 endif()
 
 set(MLIR_TABLEGEN_EXE mlir-tblgen)
-set(IREE_TABLEGEN_EXE iree-tblgen)
+# iree-tblgen is not defined using the add_tablegen mechanism as other TableGen
+# tools in LLVM.
+iree_get_executable_path(IREE_TABLEGEN_EXE iree-tblgen)
 
 #-------------------------------------------------------------------------------
 # Third party: tensorflow
diff --git a/build_tools/cmake/iree_cross_compile.cmake b/build_tools/cmake/iree_cross_compile.cmake
new file mode 100644
index 0000000..2568abd
--- /dev/null
+++ b/build_tools/cmake/iree_cross_compile.cmake
@@ -0,0 +1,234 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+include(iree_macros)
+
+# iree_create_configuration
+#
+# Creates custom commands and targets for an IREE configuration. An IREE
+# configuration means a new IREE CMake invocation with its own set of
+# parameters.
+#
+# This function defines a custom target, `iree_configure_${CONFIG_NAME}`,
+# to drive the generation of a new IREE configuration's `CMakeCache.txt`
+# file. Callers can then depend on either the `CMakeCache.txt` file or the
+# `iree_configure_${CONFIG_NAME}` target to make sure the configuration
+# is invoked as a dependency.
+#
+# This function is typically useful when cross-compiling towards another
+# architecture. For example, when cross-compiling towards Android, we need
+# to have certain tools first compiled on the host so that we can use them
+# to programmatically generate some source code to be compiled together
+# with other checked-in source code. Those host tools will be generated
+# by another CMake invocation configured by this function.
+#
+# Supported CMake options:
+# - IREE_<CONFIG_NAME>_BINARY_ROOT: the root directory for containing IREE build
+#   artifacts for the given `CONFIG_NAME`. If not specified in caller, this is
+#   set to a directory named as `CONFIG_NAME` under the current CMake binary
+#   directory.
+# - IREE_<CONFIG_NAME>_C_COMPILER: C compiler for the given `CONFIG_NAME`.
+#   This must be defined by the caller.
+# - IREE_<CONFIG_NAME>_CXX_COMPILER: C++ compiler for the given `CONFIG_NAME`.
+#   This must be defined by the caller.
+# - IREE_<CONFIG_NAME>_<option>: switch for the given `option` specifically for
+#   `CONFIG_NAME`. If missing, default to OFF for bool options; default to
+#   IREE_<option> for non-bool variables.
+function(iree_create_configuration CONFIG_NAME)
+  # Set IREE_${CONFIG_NAME}_BINARY_ROOT if missing.
+  if(NOT DEFINED IREE_${CONFIG_NAME}_BINARY_ROOT)
+    set(IREE_${CONFIG_NAME}_BINARY_ROOT "${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}")
+    set(IREE_${CONFIG_NAME}_BINARY_ROOT ${IREE_${CONFIG_NAME}_BINARY_ROOT} PARENT_SCOPE)
+    message(STATUS "Setting ${CONFIG_NAME} build directory to ${IREE_${CONFIG_NAME}_BINARY_ROOT}")
+  endif()
+
+  set(_CONFIG_BINARY_ROOT ${IREE_${CONFIG_NAME}_BINARY_ROOT})
+
+  set(_CONFIG_C_COMPILER ${IREE_${CONFIG_NAME}_C_COMPILER})
+  set(_CONFIG_CXX_COMPILER ${IREE_${CONFIG_NAME}_CXX_COMPILER})
+
+  # Check the compilers are specified in the caller.
+  if("${_CONFIG_C_COMPILER}" STREQUAL "")
+    message(FATAL_ERROR "Must define IREE_${CONFIG_NAME}_C_COMPILER for \"${CONFIG_NAME}\" configuration build")
+  endif()
+  if("${_CONFIG_CXX_COMPILER}" STREQUAL "")
+    message(FATAL_ERROR "Must define IREE_${CONFIG_NAME}_CXX_COMPILER for \"${CONFIG_NAME}\" configuration build")
+  endif()
+
+  add_custom_command(OUTPUT ${_CONFIG_BINARY_ROOT}
+    COMMAND ${CMAKE_COMMAND} -E make_directory ${_CONFIG_BINARY_ROOT}
+    COMMENT "Creating ${_CONFIG_BINARY_ROOT}...")
+
+  # Give it a custom target so we can drive the generation manually
+  # when useful.
+  add_custom_target(iree_prepare_${CONFIG_NAME}_dir DEPENDS ${_CONFIG_BINARY_ROOT})
+
+  # LINT.IfChange(iree_cross_compile_options)
+  iree_to_bool(_CONFIG_ENABLE_RUNTIME_TRACING "${IREE_${CONFIG_NAME}_ENABLE_RUNTIME_TRACING}")
+  iree_to_bool(_CONFIG_ENABLE_MLIR "${IREE_${CONFIG_NAME}_ENABLE_MLIR}")
+  iree_to_bool(_CONFIG_ENABLE_EMITC "${IREE_${CONFIG_NAME}_ENABLE_EMITC}")
+
+  iree_to_bool(_CONFIG_BUILD_COMPILER "${IREE_${CONFIG_NAME}_BUILD_COMPILER}")
+  iree_to_bool(_CONFIG_BUILD_TESTS "${IREE_${CONFIG_NAME}_BUILD_TESTS}")
+  iree_to_bool(_CONFIG_BUILD_DOCS "${IREE_${CONFIG_NAME}_BUILD_DOCS}")
+  iree_to_bool(_CONFIG_BUILD_SAMPLES "${IREE_${CONFIG_NAME}_BUILD_SAMPLES}")
+  iree_to_bool(_CONFIG_BUILD_DEBUGGER "${IREE_${CONFIG_NAME}_BUILD_DEBUGGER}")
+  iree_to_bool(_CONFIG_BUILD_PYTHON_BINDINGS "${IREE_${CONFIG_NAME}_BUILD_PYTHON_BINDINGS}")
+  iree_to_bool(_CONFIG_BUILD_EXPERIMENTAL "${IREE_${CONFIG_NAME}_BUILD_EXPERIMENTAL}")
+
+  # Escape semicolons in the targets list so that CMake doesn't expand them to
+  # spaces.
+  string(REPLACE ";" "$<SEMICOLON>" _CONFIG_HAL_DRIVERS_TO_BUILD "${IREE_HAL_DRIVERS_TO_BUILD}")
+  string(REPLACE ";" "$<SEMICOLON>" _CONFIG_TARGET_BACKENDS_TO_BUILD "${IREE_TARGET_BACKENDS_TO_BUILD}")
+  # LINT.ThenChange(
+  #   https://github.com/google/iree/tree/master/CMakeLists.txt:iree_options,
+  #   https://github.com/google/iree/tree/master/build_tools/cmake/iree_cross_compile.cmake:iree_cross_compile_invoke
+  # )
+
+  message(STATUS "C compiler for ${CONFIG_NAME} build: ${_CONFIG_C_COMPILER}")
+  message(STATUS "C++ compiler for ${CONFIG_NAME} build: ${_CONFIG_CXX_COMPILER}")
+
+  add_custom_command(OUTPUT ${IREE_${CONFIG_NAME}_BINARY_ROOT}/CMakeCache.txt
+    COMMAND "${CMAKE_COMMAND}" "${PROJECT_SOURCE_DIR}" -G "${CMAKE_GENERATOR}"
+        -DCMAKE_MAKE_PROGRAM="${CMAKE_MAKE_PROGRAM}"
+        -DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}"
+        -DCMAKE_C_COMPILER="${_CONFIG_C_COMPILER}"
+        -DCMAKE_CXX_COMPILER="${_CONFIG_CXX_COMPILER}"
+        # LINT.IfChange(iree_cross_compile_invoke)
+        -DIREE_ENABLE_RUNTIME_TRACING=${_CONFIG_ENABLE_RUNTIME_TRACING}
+        -DIREE_ENABLE_MLIR=${_CONFIG_ENABLE_MLIR}
+        -DIREE_ENABLE_EMITC=${_CONFIG_ENABLE_EMITC}
+        -DIREE_BUILD_COMPILER=${_CONFIG_BUILD_COMPILER}
+        -DIREE_BUILD_TESTS=${_CONFIG_BUILD_TESTS}
+        -DIREE_BUILD_DOCS=${_CONFIG_BUILD_DOCS}
+        -DIREE_BUILD_SAMPLES=${_CONFIG_BUILD_SAMPLES}
+        -DIREE_BUILD_DEBUGGER=${_CONFIG_BUILD_DEBUGGER}
+        -DIREE_BUILD_PYTHON_BINDINGS=${_CONFIG_BUILD_PYTHON_BINDINGS}
+        -DIREE_BUILD_EXPERIMENTAL=${_CONFIG_BUILD_EXPERIMENTAL}
+        # LINT.ThenChange(
+        #   https://github.com/google/iree/tree/master/CMakeLists.txt:iree_options,
+        #   https://github.com/google/iree/tree/master/build_tools/cmake/iree_cross_compile.cmake:iree_cross_compile_options,
+        # )
+        -DIREE_HAL_DRIVERS_TO_BUILD="${_CONFIG_HAL_DRIVERS_TO_BUILD}"
+        -DIREE_TARGET_BACKENDS_TO_BUILD="${_CONFIG_TARGET_BACKENDS_TO_BUILD}"
+    WORKING_DIRECTORY ${_CONFIG_BINARY_ROOT}
+    DEPENDS iree_prepare_${CONFIG_NAME}_dir
+    COMMENT "Configuring IREE for ${CONFIG_NAME} build...")
+
+  add_custom_target(iree_configure_${CONFIG_NAME} DEPENDS ${_CONFIG_BINARY_ROOT}/CMakeCache.txt)
+endfunction()
+
+# iree_get_build_command
+#
+# Gets the CMake build command for the given `TARGET`.
+#
+# Parameters:
+# TARGET: the target to build.
+# BINDIR: root binary directory containing CMakeCache.txt.
+# CMDVAR: variable name for receiving the build command.
+function(iree_get_build_command TARGET)
+  cmake_parse_arguments(_RULE "" "BINDIR;CMDVAR;CONFIG" "" ${ARGN})
+  if(NOT _RULE_CONFIG)
+    set(_RULE_CONFIG "$<CONFIG>")
+  endif()
+  if (CMAKE_GENERATOR MATCHES "Make")
+    # Use special command for Makefiles to support parallelism.
+    set(${_RULE_CMDVAR}
+        "$(MAKE)" "-C" "${_RULE_BINDIR}" "${TARGET}" PARENT_SCOPE)
+  else()
+    set(${_RULE_CMDVAR}
+        "${CMAKE_COMMAND}" --build ${_RULE_BINDIR}
+                           --target ${TARGET}
+                           --config ${_RULE_CONFIG} PARENT_SCOPE)
+  endif()
+endfunction()
+
+# iree_host_install
+#
+# Defines custom commands and targets for installing the given `target` under
+# host configuration. The custom target for install will be named as
+# `iree_host_install_${TARGET}`.
+#
+# Precondition:
+# iree_create_configuration(HOST) is invoked previously.
+#
+# Parameters:
+# COMPONENT: installation component; used for filtering installation targets.
+# PREFIX: the root installation path prefix.
+# DEPENDS: addtional dependencies for the installation.
+function(iree_host_install TARGET)
+  cmake_parse_arguments(_RULE "" "TARGET;COMPONENT;PREFIX" "DEPENDS" ${ARGN})
+  if(_RULE_COMPONENT)
+    set(_COMPONENT_OPTION -DCMAKE_INSTALL_COMPONENT="${_RULE_COMPONENT}")
+  endif()
+  if(_RULE_PREFIX)
+    set(_PREFIX_OPTION -DCMAKE_INSTALL_PREFIX="${_RULE_PREFIX}")
+  endif()
+
+  iree_get_executable_path(_OUTPUT_PATH ${TARGET})
+
+  add_custom_command(
+    OUTPUT ${_OUTPUT_PATH}
+    DEPENDS ${_RULE_DEPENDS}
+    COMMAND "${CMAKE_COMMAND}" ${_COMPONENT_OPTION} ${_PREFIX_OPTION}
+            -P "${IREE_HOST_BINARY_ROOT}/cmake_install.cmake"
+    USES_TERMINAL)
+
+  # Give it a custom target so we can drive the generation manually
+  # when useful.
+  add_custom_target(iree_host_install_${TARGET} DEPENDS ${_OUTPUT_PATH})
+endfunction()
+
+# iree_declare_host_excutable
+#
+# Generates custom commands and targets for building and installing a tool on
+# host for cross-compilation.
+#
+# Precondition:
+# iree_create_configuration(HOST) is invoked previously.
+#
+# Parameters:
+# TARGET: the target to build on host.
+# BUILDONLY: only generates commands for building the target.
+# DEPENDS: any additional dependencies for the target.
+function(iree_declare_host_excutable TARGET)
+  cmake_parse_arguments(_RULE "BUILDONLY" "" "DEPENDS" ${ARGN})
+
+  iree_get_executable_path(_OUTPUT_PATH ${TARGET})
+
+  iree_get_build_command(${TARGET}
+    BINDIR ${IREE_HOST_BINARY_ROOT}
+    CMDVAR build_cmd)
+
+  add_custom_target(iree_host_build_${TARGET}
+                    COMMAND ${build_cmd}
+                    DEPENDS iree_configure_HOST ${_RULE_DEPENDS}
+                    WORKING_DIRECTORY "${IREE_HOST_BINARY_ROOT}"
+                    COMMENT "Building host ${TARGET}..."
+                    USES_TERMINAL)
+
+  if(_RULE_BUILDONLY)
+    return()
+  endif()
+
+  iree_host_install(${TARGET}
+                    COMPONENT ${TARGET}
+                    PREFIX ${IREE_HOST_BINARY_ROOT}
+                    DEPENDS iree_host_build_${TARGET})
+
+  # Note that this is not enabled when BUILDONLY so we can define
+  # iree_host_${TARGET} to point to another installation path to
+  # allow flexibility.
+  add_custom_target(iree_host_${TARGET} DEPENDS "${_OUTPUT_PATH}")
+endfunction()
diff --git a/build_tools/cmake/iree_macros.cmake b/build_tools/cmake/iree_macros.cmake
index 4929146..7b27392 100644
--- a/build_tools/cmake/iree_macros.cmake
+++ b/build_tools/cmake/iree_macros.cmake
@@ -25,6 +25,22 @@
 endif()
 
 #-------------------------------------------------------------------------------
+# General utilities
+#-------------------------------------------------------------------------------
+
+# iree_to_bool
+#
+# Sets `variable` to `ON` if `value` is true and `OFF` otherwise.
+function(iree_to_bool VARIABLE VALUE)
+  if(VALUE)
+    set(${VARIABLE} "ON" PARENT_SCOPE)
+  else()
+    set(${VARIABLE} "OFF" PARENT_SCOPE)
+  endif()
+endfunction()
+
+
+#-------------------------------------------------------------------------------
 # Packages and Paths
 #-------------------------------------------------------------------------------
 
@@ -72,6 +88,28 @@
   set(${PACKAGE_DIR} ${_PACKAGE_DIR} PARENT_SCOPE)
 endfunction()
 
+# iree_get_executable_path
+#
+# Gets the path to an executable in a cross-compilation-aware way. This
+# should be used when accessing binaries that are used as part of the build,
+# such as for generating files used for later build steps.
+#
+# Paramters:
+# - OUTPUT_PATH_VAR: variable name for receiving the path to the built target.
+# - TARGET: the target to build on host.
+function(iree_get_executable_path OUTPUT_PATH_VAR TARGET)
+  if(CMAKE_CROSSCOMPILING)
+    # The target is defined in the CMake invocation for host. We don't have
+    # access to the target; relying on the path here.
+    set(_OUTPUT_PATH "${IREE_HOST_BINARY_ROOT}/bin/${TARGET}")
+    set(${OUTPUT_PATH_VAR} "${_OUTPUT_PATH}" PARENT_SCOPE)
+  else()
+    # The target is defined in this CMake invocation. We can query the location
+    # directly from CMake.
+    set(${OUTPUT_PATH_VAR} "$<TARGET_FILE:${TARGET}>" PARENT_SCOPE)
+  endif()
+endfunction()
+
 #-------------------------------------------------------------------------------
 # select()-like Evaluation
 #-------------------------------------------------------------------------------
@@ -169,3 +207,20 @@
     endif()
   endforeach()
 endfunction()
+
+# iree_add_executable_dependencies
+#
+# Adds dependency on a target in a cross-compilation-aware way. This should
+# be used for depending on targets that are used as part of the build, such
+# as for generating files used for later build steps.
+#
+# Parameters:
+# TARGET: the target to take on dependencies
+# DEPENDENCY: additional dependencies to append to target
+function(iree_add_executable_dependencies TARGET DEPENDENCY)
+  if(CMAKE_CROSSCOMPILING)
+    add_dependencies(${TARGET} iree_host_${DEPENDENCY})
+  else()
+    add_dependencies(${TARGET} ${DEPENDENCY})
+  endif()
+endfunction()
diff --git a/build_tools/embed_data/CMakeLists.txt b/build_tools/embed_data/CMakeLists.txt
index ec07934..4efad40 100644
--- a/build_tools/embed_data/CMakeLists.txt
+++ b/build_tools/embed_data/CMakeLists.txt
@@ -12,13 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-add_executable(generate_cc_embed_data)
-target_sources(generate_cc_embed_data PRIVATE generate_cc_embed_data.cc)
-set_target_properties(generate_cc_embed_data PROPERTIES OUTPUT_NAME generate_cc_embed_data)
+if(CMAKE_CROSSCOMPILING)
+  iree_declare_host_excutable(generate_cc_embed_data)
+else()
+  add_executable(generate_cc_embed_data)
+  target_sources(generate_cc_embed_data PRIVATE generate_cc_embed_data.cc)
+  set_target_properties(generate_cc_embed_data PROPERTIES OUTPUT_NAME generate_cc_embed_data)
 
-target_link_libraries(generate_cc_embed_data
-  absl::flags
-  absl::flags_parse
-  absl::strings
-  absl::time
-)
+  target_link_libraries(generate_cc_embed_data
+    absl::flags
+    absl::flags_parse
+    absl::strings
+    absl::time
+  )
+  install(TARGETS generate_cc_embed_data
+          COMPONENT generate_cc_embed_data
+          RUNTIME DESTINATION bin)
+endif()
diff --git a/docs/GetStarted/cmake_options_and_variables.md b/docs/GetStarted/cmake_options_and_variables.md
index 2676efe..33b8d04 100644
--- a/docs/GetStarted/cmake_options_and_variables.md
+++ b/docs/GetStarted/cmake_options_and_variables.md
@@ -94,3 +94,42 @@
 
 Specifies the path where to look for the installed MLIR/LLVM packages. Required
 if `IREE_MLIR_DEP_MODE` is set to `INSTALLED`.
+
+## Cross-compilation
+
+Cross-compilation involves both a *host* platform and a *target* platform. One
+invokes compiler toolchains on the host platform to generate libraries and
+executables that can be run on the target platform.
+
+IREE uses tools to programmatically generate C/C++ source code from some
+domain-specific descriptions. For example, `flatc` is used to generate C/C++
+code from FlatBuffer schemas. These tools should be compiled for the host
+platform so that we can invoke them during build process. This requires
+cross-compilation for IREE to (conceptually) happen in two stages: first compile
+build tools under host platform, and then use these host tools together with
+cross-compiling toolchains to generate artifacts for the target platform. (The
+build system dependency graph may not have such clear two-stage separation.)
+
+CMake cannot handle multiple compiler toolchains in one CMake invocation. So the
+above conceptual two-stage compilation happens in two separate CMake
+invocations.
+
+#### `IREE_HOST_BINARY_ROOT`:FILEPATH
+
+Specifies the root directory for containing all host CMake invocation artifacts.
+This defaults to `CMAKE_BINARY_DIR/host` if missing.
+
+#### `IREE_HOST_C_COMPILER`:STRING
+
+Specifies the C compiler for host compilation.
+
+#### `IREE_HOST_CXX_COMPILER`:STRING
+
+Specifies the C++ compiler for host compilation.
+
+#### `IREE_HOST_<option>`:BOOL
+
+For each option described in "IREE-specific CMake Options and Variables", you
+can use the `IREE_HOST_<option>` counterpart to control the feature when
+compiling under host configuration. For example, `IREE_HOST_BUILD_TESTS` will
+enables all tests for the host configuration.
diff --git a/docs/GetStarted/getting_started_android_cmake.md b/docs/GetStarted/getting_started_android_cmake.md
new file mode 100644
index 0000000..0de3e46
--- /dev/null
+++ b/docs/GetStarted/getting_started_android_cmake.md
@@ -0,0 +1,181 @@
+# Getting Started on Android with CMake
+
+<!--
+Notes to those updating this guide:
+
+    * This document should be __simple__ and cover essential items only.
+      Notes for optional components should go in separate files.
+-->
+
+This guide walks through cross-compiling IREE core runtime towards the Android
+platform. Cross-compiling IREE compilers towards Android is not supported at the
+moment.
+
+Cross-compilation involves both a *host* platform and a *target* platform. One
+invokes compiler toolchains on the host platform to generate libraries and
+executables that can be run on the target platform.
+
+## Prerequisites
+
+### Set up host development environment
+
+The host platform should have been set up for developing IREE. Right now only
+Linux is supported; Windows and macOS support is coming. Please make sure you
+have followed the steps in
+[Get Started on Linux with CMake](./getting_started_linux_cmake.md).
+
+### Install Android NDK
+
+Android NDK provides compiler toolchains for compiling C/C++ code to target
+Android. You can download it
+[here](https://developer.android.com/ndk/downloads). We recommend to download
+the latest release; the steps in following sections may assume that.
+
+Alternatively, if you have installed
+[Android Studio](https://developer.android.com/studio), you can follow
+[this guide](https://developer.android.com/studio/projects/install-ndk) to
+install Android NDK.
+
+After downloading, it is recommended to `export` the `ANDROID_NDK` environment
+variable pointing to the directory in your shell's rc file.
+
+### Install Android Debug Bridge (ADB)
+
+Search your Linux distro's package manager to install `adb`. For example, on
+Ubuntu:
+
+```shell
+$ sudo apt install adb
+```
+
+## Build
+
+Configure:
+
+```shell
+# Assuming in IREE source root
+
+$ cmake -G Ninja -B build-android/  \
+    -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
+    -DANDROID_ABI="arm64-v8a" -DANDROID_PLATFORM=android-29 \
+    -DIREE_BUILD_COMPILER=OFF -DIREE_BUILD_TESTS=OFF -DIREE_BUILD_SAMPLES=OFF \
+    -DIREE_HOST_C_COMPILER=`which clang` -DIREE_HOST_CXX_COMPILER=`which clang++`
+```
+
+*   The above configures IREE to cross-compile towards 64-bit
+    (`-DANDROID_ABI="arm64-v8a"`) Android 10 (`-DANDROID_PLATFORM=android-29`).
+    This may require the latest Android NDK release. You can choose the suitable
+    [`ANDROID_ABI`](https://developer.android.com/ndk/guides/cmake#android_abi)
+    and
+    [`ANDROID_PLATFORM`](https://en.wikipedia.org/wiki/Android_version_history)
+    for your target device. You can also refer to Android NDK's
+    [CMake documentation](https://developer.android.com/ndk/guides/cmake) for
+    more toolchain arguments.
+*   Building IREE compilers, tests, and samples for Android is not supported at
+    the moment; they will be enabled soon.
+*   We need to define `IREE_HOST_{C|CXX}_COMPILER` to Clang here because IREE
+    does [not support](https://github.com/google/iree/issues/1269) GCC well at
+    the moment.
+
+Build all targets:
+
+```shell
+$ cmake --build build-android/
+```
+
+## Test on Android
+
+Make sure you
+[enable developer options and USB debugging](https://developer.android.com/studio/debug/dev-options#enable)
+for your Android device.
+
+Connect your Android device to the development machine and make sure you can see
+the device when:
+
+```shell
+$ adb devices
+
+List of devices attached
+XXXXXXXXXXX     device
+```
+
+### VMLA HAL backend
+
+Translate a source MLIR into IREE module:
+
+```shell
+# Assuming in IREE source root
+
+$ build-android/host/bin/iree-translate -- \
+    -iree-mlir-to-vm-bytecode-module \
+    -iree-hal-target-backends=vmla \
+    iree/tools/test/simple.mlir \
+    -o /tmp/simple-vmla.vmfb
+```
+
+Then push the IREE runtime executable and module to the device:
+
+```shell
+$ adb push iree/tools/iree-run-module /data/local/tmp/
+$ adb push /tmp/simple-vmla.vmfb /data/local/tmp/
+```
+
+Log into Android:
+
+```shell
+$ adb shell
+
+android $ cd /data/local/tmp/
+android $ ./iree-run-module -driver=vmla -input_file=simple-vmla.vmfb -entry_function=abs -inputs="i32=-5"
+
+EXEC @abs
+i32=5
+```
+
+### Vulkan HAL backend
+
+Please make sure your Android device is Vulkan capable. Vulkan is supported on
+Android since 7, but Android 10 is our primary target at the moment.
+
+Translate a source MLIR into IREE module:
+
+```shell
+# Assuming in IREE source root
+
+$ build-android/host/bin/iree-translate -- \
+    -iree-mlir-to-vm-bytecode-module \
+    -iree-hal-target-backends=vulkan-spirv \
+    iree/tools/test/simple.mlir \
+    -o /tmp/simple-vulkan.vmfb
+```
+
+Then push the IREE runtime executable and module to the device:
+
+```shell
+$ adb push iree/tools/iree-run-module /data/local/tmp/
+$ adb push /tmp/simple-vulkan.vmfb /data/local/tmp/
+```
+
+Log into Android:
+
+```shell
+$ adb shell
+
+android $ cd /data/local/tmp/
+android $ ./iree-run-module -driver=vulkan -input_file=simple-vulkan.vmfb -entry_function=abs -inputs="i32=-5"
+
+EXEC @abs
+i32=5
+```
+
+#### Common issues
+
+##### Vulkan function `vkCreateInstance` not available
+
+This can happen on Android devices with ARM Mali GPUs, where there is only one
+monolithic driver (`/vendor/lib[64]/libGLES_mali.so`) and the vulkan vendor
+driver (`/vendor/lib[64]/hw/vulkan.*.so`) is just a symlink to it. This causes
+problems for Vulkan device enumeration under `/data/local/tmp/`. A known
+workaround is to copy the `libGLES_mali.so` library under `/data/local/tmp/` and
+rename it as `libvulkan.so` and then prefix `LD_LIBRARY_PATH=/data/local/tmp`
+when invoking IREE executables.
diff --git a/docs/developer_overview.md b/docs/developer_overview.md
index 5898490..7ab969b 100644
--- a/docs/developer_overview.md
+++ b/docs/developer_overview.md
@@ -68,24 +68,35 @@
 [mlir-opt](https://github.com/llvm/llvm-project/tree/master/mlir/tools/mlir-opt)
 and runs sets of IREE's compiler passes on `.mlir` input files. See "conversion"
 in [MLIR's Glossary](https://mlir.llvm.org/getting_started/Glossary/#conversion)
-for more information.
+for more information. Transformations performed by `iree-opt` can range from
+individual passes performing isolated manipulations to broad pipelines that
+encompass a sequence of steps.
 
 Test `.mlir` files that are checked in typically include a `RUN` block at the
 top of the file that specifies which passes should be performed and if
 `FileCheck` should be used to test the generated output.
 
-For example, to run some passes on the
-[reshape.mlir](https://github.com/google/iree/blob/master/iree/compiler/Translation/SPIRV/XLAToSPIRV/test/reshape.mlir)
-test file:
+Here's an example of a small compiler pass running on a
+[test file](https://github.com/google/iree/blob/master/iree/compiler/Dialect/IREE/Transforms/test/drop_compiler_hints.mlir):
 
 ```shell
 $ bazel run iree/tools:iree-opt -- \
   -split-input-file \
-  -iree-index-computation \
-  -simplify-spirv-affine-exprs=false \
-  -convert-iree-to-spirv \
-  -verify-diagnostics \
-  $PWD/iree/compiler/Translation/SPIRV/XLAToSPIRV/test/reshape.mlir
+  -print-ir-before-all \
+  -iree-drop-compiler-hints \
+  $PWD/iree/compiler/Dialect/IREE/Transforms/test/drop_compiler_hints.mlir
+```
+
+For a more complex example, here's how to run IREE's complete transformation
+pipeline targeting the VMLA backend on the
+[fullyconnected.mlir](https://github.com/google/iree/blob/master/iree/test/e2e/models/fullyconnected.mlir)
+model file:
+
+```shell
+$ bazel run iree/tools:iree-opt -- \
+  -iree-transformation-pipeline \
+  -iree-hal-target-backends=vmla \
+  $PWD/iree/test/e2e/models/fullyconnected.mlir
 ```
 
 Custom passes may also be layered on top of `iree-opt`, see
diff --git a/experimental/ModelBuilder/test/TestVectorToGPU.cpp b/experimental/ModelBuilder/test/TestVectorToGPU.cpp
index 72902ef..9c11fae 100644
--- a/experimental/ModelBuilder/test/TestVectorToGPU.cpp
+++ b/experimental/ModelBuilder/test/TestVectorToGPU.cpp
@@ -19,6 +19,7 @@
 
 // clang-format on
 #include <string>
+#include "iree/compiler/Conversion/CodegenUtils/MatmulCodegenStrategy.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/Vector/VectorOps.h"
 #include "mlir/ExecutionEngine/CRunnerUtils.h"
@@ -156,13 +157,11 @@
 
 void testCooperativeMatMul() {
   const int warpSize = 32;
-  // Simple test a single warp.
-  // Hardcode types and size to what is supported in Cooperative Matrix
-  // extension
-  // Matrix of size 8x8x32 with uint8xuint8xuint32 types.
-  const int resRows = 8;
-  const int resColumns = 8;
-  const int reductionSize = 32;
+  // Pick twice the size of cooperative matrix to test that the matmul gets
+  // tiled correctly.
+  const int resRows = 8 * 2;
+  const int resColumns = 8 * 2;
+  const int reductionSize = 32 * 2;
   StringLiteral funcName = "kernel_matmul";
   MLIRContext context;
   ModelBuilder modelBuilder;
@@ -199,10 +198,23 @@
   CompilationOptions options;
   SmallVector<Type, 3> args = {typeA, typeB, typeC};
   options.loweringPasses = [&](mlir::PassManager &pm) {
-    mlir::OwningRewritePatternList patterns;
-    patterns.insert<linalg::LinalgVectorizationPattern<linalg::MatmulOp>>(
-        pm.getContext());
-    mlir::applyPatternsAndFoldGreedily(*modelBuilder.getModuleRef(), patterns);
+    MatmulCodegenStrategy strategy;
+    // Use hardcoded value for cooperative matrix size. Those will be pulled
+    // from device properties eventually.
+    const int cooperativeMatrixM = 8;
+    const int cooperativeMatrixK = 8;
+    const int cooperativeMatrixN = 32;
+    // TODO(thomasraoux): Use loop parallel for tiling to be able to partition
+    // the matmul on several workgroups. To be able to support this case
+    // AffineMinCanonicalizer needs to support parallel loops.
+    // TODO(thomasraooux) LICM is disabled due to limitation in SPIR-V
+    strategy
+        .tile<linalg::MatmulOp>(linalg::LinalgTilingOptions().setTileSizes(
+            {cooperativeMatrixM, cooperativeMatrixK, cooperativeMatrixN}))
+        .setHoistInvariantCode(false)
+        .vectorize<linalg::MatmulOp>();
+    modelBuilder.getModuleRef()->walk(
+        [&](FuncOp fn) { strategy.transform(fn); });
     // TODO(thomasraoux): Markers are used as a workaround, those will either
     // moved within the vectorToGPU pass only or will be replace by op
     // interface.
diff --git a/integrations/tensorflow/bindings/python/pyiree/tf/compiler/BUILD b/integrations/tensorflow/bindings/python/pyiree/tf/compiler/BUILD
index d23a095..1f980ae 100644
--- a/integrations/tensorflow/bindings/python/pyiree/tf/compiler/BUILD
+++ b/integrations/tensorflow/bindings/python/pyiree/tf/compiler/BUILD
@@ -108,6 +108,7 @@
         "@llvm-project//mlir:IR",
         "@org_tensorflow//tensorflow/cc/saved_model:loader_lite",
         "@org_tensorflow//tensorflow/compiler/mlir/tensorflow:convert_graphdef",
+        "@org_tensorflow//tensorflow/compiler/mlir/tensorflow:tf_saved_model_passes",
         "@org_tensorflow//tensorflow/core:core_cpu",
     ],
 )
diff --git a/integrations/tensorflow/e2e/BUILD b/integrations/tensorflow/e2e/BUILD
index f25c6b1..3500d99 100644
--- a/integrations/tensorflow/e2e/BUILD
+++ b/integrations/tensorflow/e2e/BUILD
@@ -12,6 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Test coverage across backends for e2e tests is defined directly in the BUILD
+# files. A coverage table generated from this file can be viewed here:
+#   https://google.github.io/iree/TensorFlowE2ECoverage
+# Updates made to test suite names should also be reflected here:
+#   https://github.com/google/iree/blob/master/scripts/update_e2e_coverage.py
+
 load(
     "//bindings/python:build_defs.oss.bzl",
     "INTREE_TENSORFLOW_PY_DEPS",
@@ -28,13 +34,29 @@
     licenses = ["notice"],  # Apache 2.0
 )
 
+# Create binaries for all test srcs to allow them to be run manually.
+[
+    py_binary(
+        name = src.replace(".py", "_manual"),
+        srcs = [src],
+        main = src,
+        python_version = "PY3",
+        deps = INTREE_TENSORFLOW_PY_DEPS + NUMPY_DEPS + [
+            "//integrations/tensorflow/bindings/python/pyiree/tf/support",
+        ],
+    )
+    for src in glob(["*_test.py"])
+]
+
 # Special cases to exclude from automatically expanding targets for all
 # backends.
+# keep sorted
 SPECIAL_CASES = [
     "explicit_backend_test.py",
     "linspace_test.py",
 ]
 
+# keep sorted
 VMLA_FAILING = [
     "fill_test.py",
     "mandelbrot_test.py",
@@ -42,6 +64,7 @@
     "strings_test.py",
 ]
 
+# keep sorted
 LLVM_FAILING = [
     "broadcasting_test.py",
     "depth_conv_test.py",
@@ -56,6 +79,7 @@
     "strings_test.py",
 ]
 
+# keep sorted
 VULKAN_FAILING = [
     "broadcasting_test.py",
     "depth_conv_test.py",
@@ -92,7 +116,7 @@
 )
 
 iree_e2e_test_suite(
-    name = "e2e",
+    name = "e2e_tests",
     backends_to_srcs = {
         "tf_also": TF_PASSING,
         "iree_vmla": VMLA_PASSING,
@@ -106,7 +130,7 @@
 )
 
 iree_e2e_test_suite(
-    name = "e2e_failing",
+    name = "e2e_tests_failing",
     backends_to_srcs = {
         "iree_vmla": VMLA_FAILING,
         "iree_llvmjit": LLVM_FAILING,
@@ -125,10 +149,11 @@
 
 # Special cases.
 
-# linspace_test passes internally, but fails in the OSS CI.
+# linspace_test passes internally, but fails in the OSS CI, so it needs
+# a "nokokoro" tag.
 iree_e2e_test_suite(
     # TODO(#2082): `linspace_test.py` fails in the `bazel-tensorflow` image.
-    name = "linspace",
+    name = "linspace_tests",
     backends_to_srcs = {
         "tf_also": ["linspace_test.py"],
         "iree_vmla": ["linspace_test.py"],
@@ -143,7 +168,7 @@
 )
 
 iree_e2e_test_suite(
-    name = "linspace_failing",
+    name = "linspace_tests_failing",
     backends_to_srcs = {
         "iree_llvmjit": ["linspace_test.py"],
         "iree_vulkan": ["linspace_test.py"],
@@ -159,7 +184,8 @@
     ],
 )
 
-# This tests explicitly writing which backends to use in Python.
+# This tests explicitly writing which backends to use in Python,
+# so overriding the backends can cause it to break.
 iree_py_test(
     name = "explicit_backend_test",
     srcs = ["explicit_backend_test.py"],
diff --git a/integrations/tensorflow/e2e/README.md b/integrations/tensorflow/e2e/README.md
index b7f21bf..6f3272f 100644
--- a/integrations/tensorflow/e2e/README.md
+++ b/integrations/tensorflow/e2e/README.md
@@ -22,38 +22,67 @@
 
 ## Running tests
 
-NOTE: We are in the process of reworking how backend specification functions, so
-you have to specify the target name including the name of the test suite and
-a specific backend pair even if you are overriding the backends. The override
-backends take precedence.
-
 ```shell
 # For locally running tests and iterating on backend development,
 # `bazel run` is preferred.
-bazel run :e2e_math_test_tf_tf_also -- --override_backends=iree_vulkan
+bazel run :math_test_manual -- --override_backends=iree_vmla
 
 # Same as above, but add `tf` backend to cross-check numerical correctness.
-bazel run :e2e_math_test_tf_tf_also -- --override_backends=tf,iree_vulkan
+bazel run :math_test_manual -- --override_backends=tf,iree_vmla
 
 # Run all tests with defaults and output on failure.
 bazel test ... --test_output=errors
 
 # Run an individual test interactively.
-bazel test simple_arithmetic_test --test_output=streamed
-
-# Run tests with an altered list of backends.
-bazel test ... --test_output=errors \
-    --test_arg=--override_backends=tf,iree_vmla,iree_vulkan
+bazel run :math_test_manual -- --test_output=streamed
 ```
 
 If you specify the same backend multiple times, for example
 `--override_backends=iree_vmla,iree_vmla`. The same backends are grouped and in
-this example `iree_vmla` will run once. If you specify `tf,iree_vmla` as backends,
-then we will test both backends and compare them with each other. If you specify
-the `tf` backend only, then we will also test tf vs tf to capture any model
-initialization/randomization issues (it is a special case for debug purpose).
-For reproducibility of the unit tests we set random seed of tf and numpy by
-calling `tf_test_utils.set_random_seed()` before model creation.
+this example `iree_vmla` will run once. If you specify `tf,iree_vmla` as
+backends, then we will test both backends and compare them with each other. If
+you specify `tf` backend only, then we will also test `tf` vs `tf` to capture
+any model initialization/randomization issues (it is a special case for debug
+purpose). For reproducibility of the unit tests we set random seed of `tf` and
+`numpy` by calling `tf_test_utils.set_random_seed()` before model creation.
+
+## Test Suites
+
+Test targets are automatically generated for each test file and for each backend
+to check numerical correctness against TensorFlow. Tests targets that pass are
+placed into the `e2e_tests` test suite. Tests that fail on particular backends
+are recorded in lists in the `BUILD` files. For example, if
+`experimental_new_test.py` fails on the `iree_llvmjit` and `iree_vulkan`
+backends then the following lines should be added to the `BUILD` file:
+
+```build
+LLVM_FAILING = [
+    ...
+    "experimental_new_test.py",
+    ...
+]
+
+VULKAN_FAILING = [
+    ...
+    "experimental_new_test.py",
+    ...
+]
+```
+
+Test targets for these backends are placed into the `e2e_tests_failing` test
+suite. Test targets in these test suites can be run as follows:
+
+```shell
+# Run all e2e tests that are expected to pass.
+bazel test :e2e_tests
+
+# Run all e2e tests that are expected to fail.
+bazel test :e2e_tests_failing
+
+# Run a specific failing e2e test target.
+# Note that generated test targets are prefixed with their test suite name.
+bazel test :e2e_tests_failing_broadcasting_test__tf__iree_vulkan
+```
 
 ## Debugging tests
 
@@ -73,15 +102,7 @@
 ### Limiting a test to only certain backends
 
 The BUILD file specifies which targets work on which backends and controls which
-backends tests are run on by using the `--override_backends` flag. If you add a
-new test that does not work on some backends, list it as failing on those
-backends in the BUILD file.
-
-```build
-VULKAN_FAILING = [
-    "my_experimental_new_test.py",
-]
-```
+backends tests are run on by using the `--override_backends` flag.
 
 The `@tf_test_utils.compile_modules` decorator on tests also takes a `backends=`
 keyword argument. Many tests still specify this, but it is ignored in the CI,
diff --git a/integrations/tensorflow/e2e/iree_e2e_test_suite.bzl b/integrations/tensorflow/e2e/iree_e2e_test_suite.bzl
index e9efa4c..7b19938 100644
--- a/integrations/tensorflow/e2e/iree_e2e_test_suite.bzl
+++ b/integrations/tensorflow/e2e/iree_e2e_test_suite.bzl
@@ -48,7 +48,7 @@
 
     for backend, srcs in backends_to_srcs.items():
         for src in srcs:
-            test_name = "{}_{}_{}_{}".format(
+            test_name = "{}_{}__{}__{}".format(
                 name,
                 src[:-3],
                 reference_backend,
diff --git a/integrations/tensorflow/e2e/keras/BUILD b/integrations/tensorflow/e2e/keras/BUILD
index 6bff6e4..7b21bf8 100644
--- a/integrations/tensorflow/e2e/keras/BUILD
+++ b/integrations/tensorflow/e2e/keras/BUILD
@@ -12,11 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Test coverage across backends for e2e tests is defined directly in the BUILD
+# files. A coverage table generated from this file can be viewed here:
+#   https://google.github.io/iree/TensorFlowE2ECoverage
+# Updates made to test suite names should also be reflected here:
+#   https://github.com/google/iree/blob/master/scripts/update_e2e_coverage.py
+
 load(
     "//bindings/python:build_defs.oss.bzl",
     "INTREE_TENSORFLOW_PY_DEPS",
     "NUMPY_DEPS",
-    "iree_py_test",
 )
 load(
     "//integrations/tensorflow/e2e/keras:iree_vision_test_suite.bzl",
@@ -32,6 +37,47 @@
     licenses = ["notice"],  # Apache 2.0
 )
 
+# @unused
+DOC = """
+vision_model_test_manual is for manual testing of all keras vision models.
+Test will run only manually with all parameters specified manually, for example:
+bazel run -c opt integrations/tensorflow/e2e/keras:vision_model_test_manual -- \
+--override_backends=tf,iree_vmla,iree_llvmjit \
+--data=imagenet \
+--include_top=1 \
+--url=https://storage.googleapis.com/iree_models/ \
+--model=ResNet50
+
+Command arguments description:
+--override_backends: can be combination of these: tf,iree_vmla,iree_llvmjit
+--data: can be 'imagenet' or 'cifar10'.
+    imagenet - input image size (1, 224, 224, 3)
+    cifar10 - input image size (1, 32, 32, 3) - it is used for quick tests
+            and needs pretrained weights, we pretrained models: ResNet50, MobileNet, MobileNetV2
+--include_top: can be 1 or 0. Include top layer 1, not include top layer 0
+--url: we need it only for cifar10 models to load weights from https://storage.googleapis.com/iree_models/
+       imagenet pretrained weights url is specified by keras
+--model: supports ResNet50, MobileNet, MobileNetV2, ResNet101, ResNet152,
+    ResNet50V2, ResNet101V2, ResNet152V2, VGG16, VGG19, Xception,
+    InceptionV3, InceptionResNetV2, DenseNet121, DenseNet169,
+    DenseNet201, NASNetMobile, NASNetLarge
+    All above models works with 'imagenet' data sets.
+    ResNet50, MobileNet, MobileNetV2 work with both 'imagenet' and 'cifar10' data sets.
+"""
+
+[
+    py_binary(
+        name = src.replace(".py", "_manual"),
+        srcs = [src],
+        main = src,
+        python_version = "PY3",
+        deps = INTREE_TENSORFLOW_PY_DEPS + NUMPY_DEPS + [
+            "//integrations/tensorflow/bindings/python/pyiree/tf/support",
+        ],
+    )
+    for src in glob(["*_test.py"])
+]
+
 SPECIAL_CASES = [
     "vision_model_test.py",
 ]
@@ -71,7 +117,7 @@
 )
 
 iree_e2e_test_suite(
-    name = "keras",
+    name = "keras_tests",
     backends_to_srcs = {
         "tf_also": TF_PASSING,
         "iree_vmla": VMLA_PASSING,
@@ -85,7 +131,7 @@
 )
 
 iree_e2e_test_suite(
-    name = "keras_failing",
+    name = "keras_tests_failing",
     backends_to_srcs = {
         "iree_vmla": VMLA_FAILING,
         "iree_llvmjit": LLVM_FAILING,
@@ -102,52 +148,8 @@
     ],
 )
 
-# @unused
-DOC = """
-vision_models_test is for manual testing of all keras vision models.
-Test will run only manually with all parameters specified manually, for example:
-bazel run -c opt integrations/tensorflow/e2e/keras/vision_models_test -- \
---override_backends=tf,iree_vmla,iree_llvmjit \
---data=imagenet \
---include_top=1 \
---url=https://storage.googleapis.com/iree_models/ \
---model=ResNet50
-
-Command arguments description:
---override_backends: can be combination of these: tf,iree_vmla,iree_llvmjit
---data: can be 'imagenet' or 'cifar10'.
-    imagenet - input image size (1, 224, 224, 3)
-    cifar10 - input image size (1, 32, 32, 3) - it is used for quick tests
-            and needs pretrained weights, we pretrained models: ResNet50, MobileNet, MobileNetV2
---include_top: can be 1 or 0. Include top layer 1, not include top layer 0
---url: we need it only for cifar10 models to load weights from https://storage.googleapis.com/iree_models/
-       imagenet pretrained weights url is specified by keras
---model: supports ResNet50, MobileNet, MobileNetV2, ResNet101, ResNet152,
-    ResNet50V2, ResNet101V2, ResNet152V2, VGG16, VGG19, Xception,
-    InceptionV3, InceptionResNetV2, DenseNet121, DenseNet169,
-    DenseNet201, NASNetMobile, NASNetLarge
-    All above models works with 'imagenet' data sets.
-    ResNet50, MobileNet, MobileNetV2 work with both 'imagenet' and 'cifar10' data sets.
-"""
-
-iree_py_test(
-    name = "vision_models_test",
-    srcs = ["vision_model_test.py"],
-    main = "vision_model_test.py",
-    python_version = "PY3",
-    tags = [
-        "external",
-        "manual",
-        "nokokoro",
-        "notap",
-    ],
-    deps = INTREE_TENSORFLOW_PY_DEPS + NUMPY_DEPS + [
-        "//integrations/tensorflow/bindings/python/pyiree/tf/support",
-    ],
-)
-
 iree_vision_test_suite(
-    name = "vision_models",
+    name = "vision_internal_tests",
     datasets = ["cifar10"],
     models_to_backends = {
         "ResNet50": [
@@ -165,7 +167,7 @@
 )
 
 iree_vision_test_suite(
-    name = "vision_models_external",
+    name = "vision_external_tests",
     datasets = [
         "cifar10",
         "imagenet",
@@ -197,9 +199,8 @@
 )
 
 iree_vision_test_suite(
-    # TODO: Combine this suite with keras_vision_models_external once these
-    # tests pass.
-    name = "vision_models_external_failing",
+    # TODO: Combine this suite with vision_external_tests once these tests pass.
+    name = "vision_external_tests_failing",
     datasets = [
         "cifar10",
         "imagenet",
diff --git a/integrations/tensorflow/e2e/keras/iree_vision_test_suite.bzl b/integrations/tensorflow/e2e/keras/iree_vision_test_suite.bzl
index b2609fb..bd6aae2 100644
--- a/integrations/tensorflow/e2e/keras/iree_vision_test_suite.bzl
+++ b/integrations/tensorflow/e2e/keras/iree_vision_test_suite.bzl
@@ -67,11 +67,11 @@
             for backend in backends:
                 for dataset in datasets:
                     test_backends = [reference_backend, backend]
-                    test_name = "{}_{}_{}_{}".format(
+                    test_name = "{}_{}_{}__{}".format(
                         name,
                         model,
                         dataset,
-                        "_".join(test_backends),
+                        "__".join(test_backends),
                     )
                     tests.append(test_name)
 
diff --git a/integrations/tensorflow/e2e/keras/train/BUILD b/integrations/tensorflow/e2e/keras/train/BUILD
index 534ac3e..1160ab2 100644
--- a/integrations/tensorflow/e2e/keras/train/BUILD
+++ b/integrations/tensorflow/e2e/keras/train/BUILD
@@ -27,8 +27,9 @@
     licenses = ["notice"],  # Apache 2.0
 )
 
+# TODO(meadowlark): Refactor this rule to match iree_vision_test_suite.bzl
 iree_train_test_suite(
-    name = "train",
+    name = "train_tests",
     configurations = [
         # tuples of (optimizer, backends)
         ("sgd", "tf"),
@@ -45,7 +46,7 @@
 )
 
 iree_train_test_suite(
-    name = "train_failing",
+    name = "train_tests_failing",
     configurations = [
         # tuples of (optimizer, backends)
         # TODO: Combine this suite with keras_model_train once these tests pass.
diff --git a/iree/compiler/Conversion/CodegenUtils/BUILD b/iree/compiler/Conversion/CodegenUtils/BUILD
index ff2ef35..a414146 100644
--- a/iree/compiler/Conversion/CodegenUtils/BUILD
+++ b/iree/compiler/Conversion/CodegenUtils/BUILD
@@ -24,15 +24,26 @@
     srcs = [
         "FunctionUtils.cpp",
         "MarkerUtils.cpp",
+        "MatmulCodegenStrategy.cpp",
     ],
     hdrs = [
         "FunctionUtils.h",
         "MarkerUtils.h",
+        "MatmulCodegenStrategy.h",
     ],
     deps = [
         "@llvm-project//llvm:Support",
+        "@llvm-project//mlir:Affine",
+        "@llvm-project//mlir:Analysis",
         "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:LinalgOps",
         "@llvm-project//mlir:LinalgTransforms",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:SCFDialect",
+        "@llvm-project//mlir:StandardOps",
         "@llvm-project//mlir:Support",
+        "@llvm-project//mlir:Transforms",
+        "@llvm-project//mlir:VectorOps",
+        "@llvm-project//mlir:VectorToSCF",
     ],
 )
diff --git a/iree/compiler/Conversion/CodegenUtils/MatmulCodegenStrategy.cpp b/iree/compiler/Conversion/CodegenUtils/MatmulCodegenStrategy.cpp
new file mode 100644
index 0000000..31c7d29
--- /dev/null
+++ b/iree/compiler/Conversion/CodegenUtils/MatmulCodegenStrategy.cpp
@@ -0,0 +1,278 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// -----------------------------------------------------------------------------
+// This is a copy of the matmul strategy infrastructure existing in mlir_edge.
+// This version will be removed once this gets upstreamed to common mlir.
+// Please try to limit changes in this code only minor changes or make sure the
+// changes are applied in mlir_edge as well.
+
+#include "iree/compiler/Conversion/CodegenUtils/MatmulCodegenStrategy.h"
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+#include "mlir/Analysis/SliceAnalysis.h"
+#include "mlir/Conversion/VectorToSCF/VectorToSCF.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
+#include "mlir/Dialect/Linalg/Transforms/Hoisting.h"
+#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
+#include "mlir/Dialect/Linalg/Utils/Utils.h"
+#include "mlir/Dialect/SCF/SCF.h"
+#include "mlir/Dialect/SCF/Utils.h"
+#include "mlir/Dialect/StandardOps/IR/Ops.h"
+#include "mlir/Dialect/Vector/EDSC/Intrinsics.h"
+#include "mlir/Dialect/Vector/VectorOps.h"
+#include "mlir/Dialect/Vector/VectorTransforms.h"
+#include "mlir/IR/AffineExpr.h"
+#include "mlir/IR/Attributes.h"
+#include "mlir/IR/BlockAndValueMapping.h"
+#include "mlir/IR/Dominance.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/IR/OperationSupport.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/IR/StandardTypes.h"
+#include "mlir/IR/Value.h"
+#include "mlir/IR/Visitors.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Support/LogicalResult.h"
+#include "mlir/Transforms/LoopUtils.h"
+#include "mlir/Transforms/Passes.h"
+
+using namespace mlir;          // NOLINT
+using namespace mlir::linalg;  // NOLINT
+
+#define DEBUG_TYPE "matmul-codegen-strategy"
+
+//===----------------------------------------------------------------------===//
+// TODO: Cleanup and upstream these to go into core. Please ignore for now !
+//===----------------------------------------------------------------------===//
+static void hoistRedundantCopies(FuncOp func) {
+  bool changed = true;
+  while (changed) {
+    changed = false;
+    func.walk([&](linalg::FillOp op) {
+      auto loop = op.getParentOfType<scf::ForOp>();
+      if (!loop) return;
+
+      for (auto operand : op.getOperands())
+        if (!loop.isDefinedOutsideOfLoop(operand)) return;
+
+      // Hoist fill before.
+      op.getOperation()->moveBefore(loop);
+      changed = true;
+    });
+
+    func.walk([&](linalg::CopyOp op) {
+      auto loop = op.getParentOfType<scf::ForOp>();
+      if (!loop) return;
+
+      for (auto operand : op.getOperands())
+        if (!loop.isDefinedOutsideOfLoop(operand)) return;
+
+      Value sourceView = op.getInput(0);
+      while (auto subViewOp = sourceView.getDefiningOp<SubViewOp>())
+        sourceView = subViewOp.getViewSource();
+
+      // Source traces back to a block argument.
+      if (sourceView.isa<BlockArgument>()) {
+        op.getOperation()->moveBefore(loop);
+      } else {
+        assert(sourceView.getDefiningOp<ViewOp>() ||
+               sourceView.getDefiningOp<AllocOp>() ||
+               sourceView.getDefiningOp<AllocaOp>());
+        op.getOperation()->moveAfter(loop);
+      }
+      changed = true;
+    });
+  }
+}
+
+/// Substitute scf.for = %lb to %ub step %step by an AffineExpr expressing:
+///   `%lb + %step * new_dim` where
+/// 1. the AffineExpr for %lb is either an AffineConstantExpr or an
+/// AffineDimExpr depending on whether the value is constant or not.
+/// 2. the AffineExpr for %step is either an AffineConstantExpr or an
+/// AffineSymbolExpr depending on whether the value is constant or not.
+///
+static void substitute(scf::ForOp forOp, SmallVectorImpl<AffineExpr> &exprs,
+                       SmallVectorImpl<Value> &dims,
+                       SmallVectorImpl<Value> &symbols) {
+  MLIRContext *ctx = forOp.getContext();
+  auto lbConstant = forOp.lowerBound().getDefiningOp<ConstantIndexOp>();
+  AffineExpr lb = lbConstant ? getAffineConstantExpr(lbConstant.getValue(), ctx)
+                             : getAffineDimExpr(dims.size(), ctx);
+
+  auto stepConstant = forOp.step().getDefiningOp<ConstantIndexOp>();
+  AffineExpr step = stepConstant
+                        ? getAffineConstantExpr(stepConstant.getValue(), ctx)
+                        : getAffineSymbolExpr(symbols.size(), ctx);
+
+  if (!lbConstant) dims.push_back(forOp.lowerBound());
+  if (!stepConstant) symbols.push_back(forOp.step());
+  exprs.push_back(lb + step * getAffineDimExpr(dims.size(), ctx));
+
+  auto ubConstant = forOp.upperBound().getDefiningOp<ConstantIndexOp>();
+  AffineExpr ub = ubConstant ? getAffineConstantExpr(ubConstant.getValue(), ctx)
+                             : getAffineDimExpr(dims.size(), ctx);
+  if (!ubConstant) dims.push_back(forOp.upperBound());
+  exprs.push_back(ub);
+
+  dims.push_back(forOp.getInductionVar());
+}
+
+/// Traverse the .
+static void substitute(AffineMinOp minOp, SmallVectorImpl<AffineExpr> &exprs,
+                       SmallVectorImpl<Value> &dims,
+                       SmallVectorImpl<Value> &symbols) {
+  MLIRContext *ctx = minOp.getContext();
+  for (Value v : minOp.getDimOperands()) {
+    if (auto forOp = scf::getForInductionVarOwner(v)) {
+      substitute(forOp, exprs, dims, symbols);
+      continue;
+    }
+    if (auto parentMinOp = v.getDefiningOp<AffineMinOp>()) {
+      substitute(parentMinOp, exprs, dims, symbols);
+      continue;
+    }
+    exprs.push_back(getAffineDimExpr(dims.size(), ctx));
+    dims.push_back(v);
+  }
+}
+
+/// Perform folding of chains of AffineMinOp.
+struct AffineMinCanonicalizationPattern : public OpRewritePattern<AffineMinOp> {
+  using OpRewritePattern<AffineMinOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(AffineMinOp minOp,
+                                PatternRewriter &rewriter) const override;
+};
+
+LogicalResult AffineMinCanonicalizationPattern::matchAndRewrite(
+    AffineMinOp minOp, PatternRewriter &rewriter) const {
+  LLVM_DEBUG(llvm::dbgs() << "\nCanonicalize AffineMin: "
+                          << *minOp.getOperation() << "\n");
+
+  int64_t min = std::numeric_limits<int64_t>::max();
+  for (auto e : minOp.map().getResults())
+    if (auto cstExpr = e.dyn_cast<AffineConstantExpr>())
+      min = std::min(min, cstExpr.getValue());
+  if (min == std::numeric_limits<int64_t>::max()) return failure();
+
+  SmallVector<AffineExpr, 4> exprs;
+  SmallVector<Value, 4> dims, symbols;
+  substitute(minOp, exprs, dims, symbols);
+
+  SmallVector<Value, 4> operands = dims;
+  operands.append(symbols.begin(), symbols.end());
+
+  MLIRContext *ctx = minOp.getContext();
+  auto map = AffineMap::get(dims.size(), symbols.size(), exprs, ctx);
+  LLVM_DEBUG(llvm::dbgs() << "Substitution map: " << map << "\n");
+
+  SmallVector<AffineExpr, 4> modExprs;
+  for (unsigned idx = 0, e = map.getNumResults(); idx < e; ++idx)
+    modExprs.push_back(getAffineDimExpr(idx, ctx) % min);
+  map = AffineMap::get(map.getNumResults(), 0, modExprs, ctx).compose(map);
+  canonicalizeMapAndOperands(&map, &operands);
+  map = simplifyAffineMap(map);
+
+  LLVM_DEBUG(llvm::dbgs() << "Post mod: " << map << "\n";
+             llvm::interleaveComma(operands, llvm::dbgs()));
+
+  if (!llvm::all_of(map.getResults(), [](AffineExpr e) {
+        if (auto cst = e.dyn_cast<AffineConstantExpr>())
+          return cst.getValue() == 0;
+        return false;
+      }))
+    return failure();
+
+  rewriter.replaceOpWithNewOp<ConstantIndexOp>(minOp, min);
+  return success();
+}
+//===----------------------------------------------------------------------===//
+// END TODO
+//===----------------------------------------------------------------------===//
+
+void MatmulCodegenStrategy::transform(FuncOp func) const {
+  MLIRContext *context = func.getContext();
+  // Emplace patterns one at a time while also maintaining a simple chained
+  // state transition.
+  unsigned stepCount = 0;
+  SmallVector<OwningRewritePatternList, 4> stage1Patterns;
+  auto zeroState = Identifier::get(std::to_string(stepCount), context);
+  auto currentState = zeroState;
+  for (auto &t : transformationSequence) {
+    auto nextState = Identifier::get(std::to_string(++stepCount), context);
+    auto marker = (currentState == zeroState)
+                      ? linalg::LinalgMarker({}, nextState)
+                      : linalg::LinalgMarker(currentState, nextState);
+    stage1Patterns.emplace_back(t->buildRewritePatterns(context, marker));
+    currentState = nextState;
+  }
+
+  OwningRewritePatternList stage2Patterns =
+      linalg::getLinalgTilingCanonicalizationPatterns(context);
+  stage2Patterns.insert<AffineMinCanonicalizationPattern>(context);
+
+  auto stage3Transforms = [this](Operation *op) {
+    // Some of these may be too aggressive as a stage 3 that is applied on each
+    // stage 1 application and may have to be split out to post staged patterns
+    // application (in which case they could just be passes, TBD).
+    if (hoistInvariantCode) {
+      PassManager pm(op->getContext());
+      pm.addPass(createLoopInvariantCodeMotionPass());
+      if (failed(pm.run(op->getParentOfType<ModuleOp>())))
+        llvm_unreachable("Unexpected failure in cleanup pass pipeline.");
+      hoistViewAllocOps(cast<FuncOp>(op));
+      hoistRedundantVectorTransfers(cast<FuncOp>(op));
+      hoistRedundantCopies(cast<FuncOp>(op));
+    }
+    promoteSingleIterationLoops(cast<FuncOp>(op));
+    return success();
+  };
+  linalg::applyStagedPatterns(func, stage1Patterns, stage2Patterns,
+                              stage3Transforms);
+  if (lowering != nullptr) lowering(func);
+}
+
+// Parametric lowering of vector contract for CPU target.
+static void cpuLowering(
+    FuncOp func, const vector::VectorTransformsOptions &vectorTransformsOptions,
+    const VectorTransferToSCFOptions &vectorToSCFOptions) {
+  // Programmatic controlled lowering of vector.contract only.
+  MLIRContext *context = func.getContext();
+  OwningRewritePatternList vectorContractLoweringPatterns;
+  vectorContractLoweringPatterns
+      .insert<ContractionOpToOuterProductOpLowering,
+              ContractionOpToMatmulOpLowering, ContractionOpLowering>(
+          vectorTransformsOptions, context);
+
+  applyPatternsAndFoldGreedily(func, vectorContractLoweringPatterns);
+
+  // Programmatic controlled lowering of vector.transfer only.
+  OwningRewritePatternList vectorToLoopsPatterns;
+  populateVectorToSCFConversionPatterns(vectorToLoopsPatterns, context,
+                                        vectorToSCFOptions);
+  applyPatternsAndFoldGreedily(func, vectorToLoopsPatterns);
+}
+
+MatmulCodegenStrategy &MatmulCodegenStrategy::setDefaultCPULowering() {
+  auto lowering = [this](FuncOp func) {
+    cpuLowering(func, vectorTransformsOptions, vectorToSCFOptions);
+  };
+  return setLoweringFunction(lowering);
+}
diff --git a/iree/compiler/Conversion/CodegenUtils/MatmulCodegenStrategy.h b/iree/compiler/Conversion/CodegenUtils/MatmulCodegenStrategy.h
new file mode 100644
index 0000000..9f4645c
--- /dev/null
+++ b/iree/compiler/Conversion/CodegenUtils/MatmulCodegenStrategy.h
@@ -0,0 +1,197 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MLIR_EDGE_BENCHMARKS_STRATEGIES_MATMULCODEGENSTRATEGIES_H_
+#define MLIR_EDGE_BENCHMARKS_STRATEGIES_MATMULCODEGENSTRATEGIES_H_
+
+#include <functional>
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "mlir/Conversion/VectorToSCF/VectorToSCF.h"
+#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
+#include "mlir/Dialect/Vector/VectorOps.h"
+#include "mlir/Dialect/Vector/VectorTransforms.h"
+#include "mlir/Support/LLVM.h"
+
+namespace mlir {
+
+class FuncOp;
+
+/// Abstract Transformation class applied in a sequence that also handles state
+/// through markers.
+struct Transformation {
+  virtual ~Transformation() = default;
+  virtual OwningRewritePatternList buildRewritePatterns(
+      MLIRContext *context, linalg::LinalgMarker m) = 0;
+  linalg::LinalgMarker marker;
+};
+
+/// Promotion transformation enqueues a particular stage-1 pattern for
+/// `Tile<LinalgOpType>`with the appropriate `options`.
+// TODO: variadic LinalgOpTypes.
+template <typename LinalgOpType>
+struct Tile : public Transformation {
+  explicit Tile(linalg::LinalgTilingOptions options) : options(options) {}
+
+  OwningRewritePatternList buildRewritePatterns(
+      MLIRContext *context, linalg::LinalgMarker m) override {
+    OwningRewritePatternList tilingPatterns;
+    tilingPatterns.insert<linalg::LinalgTilingPattern<LinalgOpType>>(
+        context, options, m);
+    return tilingPatterns;
+  }
+
+ private:
+  linalg::LinalgTilingOptions options;
+};
+
+/// Promotion transformation enqueues a particular stage-1 pattern for
+/// `Promote<LinalgOpType>`with the appropriate `options`.
+// TODO: variadic LinalgOpTypes.
+template <typename LinalgOpType>
+struct Promote : public Transformation {
+  explicit Promote(linalg::LinalgPromotionOptions options) : options(options) {}
+
+  OwningRewritePatternList buildRewritePatterns(
+      MLIRContext *context, linalg::LinalgMarker m) override {
+    OwningRewritePatternList promotionPatterns;
+    promotionPatterns.insert<linalg::LinalgPromotionPattern<LinalgOpType>>(
+        context, options, m);
+    return promotionPatterns;
+  }
+
+ private:
+  linalg::LinalgPromotionOptions options;
+};
+
+/// Vectorization transformation enqueues a particular stage-1 pattern for
+/// `LinalgVectorizationPattern<LinalgOpType>` as well as copy to vector
+/// transfer rewrite forwarding patterns.
+// TODO: variadic LinalgOpTypes.
+template <typename LinalgOpType>
+struct Vectorize : public Transformation {
+  OwningRewritePatternList buildRewritePatterns(
+      MLIRContext *context, linalg::LinalgMarker m) override {
+    OwningRewritePatternList vectorizationPatterns;
+    // FillOp may interfere with forwarding patterns atm, so we bump up the
+    // priority of LinalgCopyVTRForwardingPattern /
+    // LinalgCopyVTWForwardingPattern.
+    vectorizationPatterns
+        .insert<linalg::LinalgVectorizationPattern<LinalgOpType>>(context, m);
+    vectorizationPatterns.insert<linalg::LinalgCopyVTRForwardingPattern,
+                                 linalg::LinalgCopyVTWForwardingPattern>(
+        context, /*benefit=*/2);
+    return vectorizationPatterns;
+  }
+};
+
+/// Matmul-specific strategy object controls how a linalg.matmul is
+/// progressively lowered.
+/// The strategy uses a 3-level staged patterns strategy which allows ordering
+/// transformations by using the Linalg `applyStagedPatterns` function, where:
+///   1. The first stage consists of the successive `tile`, `promote` and
+///   `vectorize` patterns, applied sequentially.
+///   2. The second stage consists of common local canonicalization patterns
+///   that are applied eagerly after each stage-1 pattern.
+///   3. the third stage consists of more global transformation, also applied
+///   eagerly, after all stage-2 patterns. Such more global transformations
+struct MatmulCodegenStrategy {
+  /// Append a pattern to add a level of tiling for `LinalgOpType` with tiling
+  /// `options`.
+  template <typename LinalgOpType>
+  MatmulCodegenStrategy &tile(linalg::LinalgTilingOptions options) {
+    transformationSequence.emplace_back(new Tile<LinalgOpType>(options));
+    return *this;
+  }
+  /// Conditionally append a pattern to add a level of tiling for `LinalgOpType`
+  /// with tiling `options`.
+  template <typename LinalgOpType>
+  MatmulCodegenStrategy &tileIf(bool b, linalg::LinalgTilingOptions options) {
+    return b ? tile<LinalgOpType>(options) : *this;
+  }
+  /// Append a pattern to add a level of promotion for `LinalgOpType` with
+  /// promotion `options`.
+  template <typename LinalgOpType>
+  MatmulCodegenStrategy &promote(linalg::LinalgPromotionOptions options) {
+    transformationSequence.emplace_back(new Promote<LinalgOpType>(options));
+    return *this;
+  }
+  /// Conditionally append a pattern to add a level of promotion for
+  /// `LinalgOpType` with promotion `options`.
+  template <typename LinalgOpType>
+  MatmulCodegenStrategy &promoteIf(bool b,
+                                   linalg::LinalgPromotionOptions options) {
+    return b ? promote<LinalgOpType>(options) : *this;
+    return *this;
+  }
+  /// Append a pattern to rewrite `LinalgOpType` as a vector operation.
+  template <typename LinalgOpType>
+  MatmulCodegenStrategy &vectorize() {
+    transformationSequence.emplace_back(new Vectorize<LinalgOpType>());
+    return *this;
+  }
+  /// Conditionally append a pattern to rewrite `LinalgOpType` as a vector
+  /// operation.
+  template <typename LinalgOpType>
+  MatmulCodegenStrategy &vectorizeIf(bool b) {
+    return b ? vectorize<LinalgOpType>() : *this;
+    return *this;
+  }
+  /// Configure the post staged-patterns late vector transformations.
+  MatmulCodegenStrategy &setVectorTransformsOptions(
+      vector::VectorTransformsOptions options) {
+    vectorTransformsOptions = options;
+    return *this;
+  }
+  /// Configure the post staged-patterns late vector.transfer to scf conversion.
+  MatmulCodegenStrategy &setVectorTransferToSCFOptions(
+      VectorTransferToSCFOptions options) {
+    vectorToSCFOptions = options;
+    return *this;
+  }
+  /// Configure the post staged-patterns late vector.transfer to scf conversion.
+  MatmulCodegenStrategy &setHoistInvariantCode(bool b) {
+    hoistInvariantCode = b;
+    return *this;
+  }
+
+  /// Apply the transformation patterns in sequence with cleanup transformations
+  /// interleaved.
+  void transform(FuncOp func) const;
+
+  /// Set a function applying the lowering strategy. Different target need to
+  /// use different lowering.
+  MatmulCodegenStrategy &setLoweringFunction(std::function<void(FuncOp)> f) {
+    lowering = f;
+    return *this;
+  }
+
+  // Enable default lowering strategy for CPU.
+  MatmulCodegenStrategy &setDefaultCPULowering();
+
+ private:
+  LogicalResult postPatternTransforms(Operation *func) const;
+
+  std::function<void(FuncOp)> lowering = nullptr;
+  bool hoistInvariantCode = false;
+  vector::VectorTransformsOptions vectorTransformsOptions;
+  VectorTransferToSCFOptions vectorToSCFOptions;
+  SmallVector<std::unique_ptr<Transformation>, 4> transformationSequence;
+};
+
+}  // namespace mlir
+
+#endif  // MLIR_EDGE_BENCHMARKS_STRATEGIES_MATMULCODEGENSTRATEGIES_H_
diff --git a/iree/compiler/Dialect/Flow/Transforms/DispatchConfig.cpp b/iree/compiler/Dialect/Flow/Transforms/DispatchConfig.cpp
index 1df92f2..3af7f8c 100644
--- a/iree/compiler/Dialect/Flow/Transforms/DispatchConfig.cpp
+++ b/iree/compiler/Dialect/Flow/Transforms/DispatchConfig.cpp
@@ -34,7 +34,9 @@
 bool isUnsupportedFusionOp(Operation *op) {
   return isa<xla_hlo::DotOp>(op) || isa<xla_hlo::ConvOp>(op) ||
          isa<xla_hlo::ReduceOp>(op) || isa<xla_hlo::PadOp>(op) ||
-         isa<xla_hlo::ReduceWindowOp>(op);
+         isa<xla_hlo::ReduceWindowOp>(op) ||
+         isa<xla_hlo::TorchIndexSelectOp>(op) || isa<xla_hlo::SliceOp>(op) ||
+         isa<xla_hlo::ConcatenateOp>(op);
 }
 
 // Allowlist of ops that materialize to a an index-permuted copy of some kind
diff --git a/iree/compiler/Dialect/Flow/Transforms/FoldCompatibleDispatchRegions.cpp b/iree/compiler/Dialect/Flow/Transforms/FoldCompatibleDispatchRegions.cpp
index c83cb09..53878d6 100644
--- a/iree/compiler/Dialect/Flow/Transforms/FoldCompatibleDispatchRegions.cpp
+++ b/iree/compiler/Dialect/Flow/Transforms/FoldCompatibleDispatchRegions.cpp
@@ -199,7 +199,8 @@
       // TODO(b/144530470): replace with tablegen attributes/interfaces.
       if (isa<xla_hlo::ReduceOp>(op) || isa<xla_hlo::DotOp>(op) ||
           isa<xla_hlo::ConvOp>(op) || isa<xla_hlo::ReduceWindowOp>(op) ||
-          isa<xla_hlo::PadOp>(op)) {
+          isa<xla_hlo::PadOp>(op) || isa<xla_hlo::TorchIndexSelectOp>(op) ||
+          isa<xla_hlo::SliceOp>(op) || isa<xla_hlo::ConcatenateOp>(op)) {
         return false;
       }
     }
diff --git a/iree/compiler/Dialect/IREE/Transforms/test/drop_compiler_hints.mlir b/iree/compiler/Dialect/IREE/Transforms/test/drop_compiler_hints.mlir
index ef76d07..9486f96 100644
--- a/iree/compiler/Dialect/IREE/Transforms/test/drop_compiler_hints.mlir
+++ b/iree/compiler/Dialect/IREE/Transforms/test/drop_compiler_hints.mlir
@@ -1,5 +1,8 @@
 // RUN: iree-opt -split-input-file -iree-drop-compiler-hints %s | IreeFileCheck --implicit-check-not="iree.do_not_optimize" %s
 
+// This file is used as an example in docs/developer_overview.md.
+// If you move or delete it, please update the documentation accordingly.
+
 // CHECK-LABEL: @constant
 func @constant() -> i32 {
   // CHECK-NEXT: %[[C1:.+]] = constant 1
diff --git a/iree/hal/vmla/op_kernels.h b/iree/hal/vmla/op_kernels.h
index 093d52d..b7cb8b7 100644
--- a/iree/hal/vmla/op_kernels.h
+++ b/iree/hal/vmla/op_kernels.h
@@ -442,6 +442,6 @@
 }  // namespace iree
 
 #include "iree/hal/vmla/op_kernels_generic.h"  // IWYU pragma: export
-#include "iree/hal/vmla/op_kernels_ruy.h"      // IWYU pragma: export
+#include "iree/hal/vmla/op_kernels_ruy.h"  // IWYU pragma: export
 
 #endif  // IREE_HAL_VMLA_OP_KERNELS_H_
diff --git a/iree/hal/vulkan/CMakeLists.txt b/iree/hal/vulkan/CMakeLists.txt
index e534073..663437d 100644
--- a/iree/hal/vulkan/CMakeLists.txt
+++ b/iree/hal/vulkan/CMakeLists.txt
@@ -18,6 +18,11 @@
 option(IREE_HAL_VULKAN_EMULATE_TIMELINE_SEMAPHORE
        "Emulates timeline semaphore with binary semaphores and fences" OFF)
 
+# Unconditionally turn on emulated timleine semaphore for Android.
+if(CMAKE_CROSSCOMPILING AND "${CMAKE_SYSTEM_NAME}" MATCHES "Android")
+  set(IREE_HAL_VULKAN_EMULATE_TIMELINE_SEMAPHORE ON CACHE BOOL "" FORCE)
+endif()
+# Unless we are not compiling Vulkan HAL backend in.
 if(NOT IREE_HAL_DRIVER_VULKAN)
   set(IREE_HAL_VULKAN_EMULATE_TIMELINE_SEMAPHORE OFF CACHE BOOL "" FORCE)
 endif()
diff --git a/iree/tools/CMakeLists.txt b/iree/tools/CMakeLists.txt
index fce935b..5267dec 100644
--- a/iree/tools/CMakeLists.txt
+++ b/iree/tools/CMakeLists.txt
@@ -101,6 +101,7 @@
       iree::compiler::Dialect::VM::Tools
     LINKOPTS
       "-lpthread"
+    HOSTONLY
   )
 endif()
 
@@ -253,6 +254,31 @@
     PUBLIC
   )
 
+  iree_cc_library(
+    NAME
+      iree_translate_main
+    SRCS
+      "translate_main.cc"
+    DEPS
+      ::init_compiler_modules
+      ::init_iree_passes_and_dialects
+      ::init_mlir_passes_and_dialects
+      ::init_targets
+      ::init_translations
+      ::init_xla_dialects
+      LLVMSupport
+      MLIRIR
+      MLIRSCFTransforms
+      MLIRPass
+      MLIRSupport
+      MLIRTranslation
+      iree::compiler::Conversion::init_conversions
+      iree::compiler::Dialect::VM::Target::Bytecode
+      iree::compiler::Dialect::VM::Target::init_targets
+      iree::compiler::Translation::IREEVM
+    PUBLIC
+  )
+
   iree_cc_binary(
     NAME
       iree-opt
@@ -260,6 +286,7 @@
       iree-opt
     DEPS
       ::iree_opt_main
+    HOSTONLY
   )
 
   iree_cc_binary(
@@ -303,33 +330,14 @@
       iree::vm::bytecode_module
       iree::vm::value
       ${IREE_HAL_DRIVER_MODULES}
+    HOSTONLY
   )
+endif(${IREE_BUILD_COMPILER})
 
-  iree_cc_library(
-    NAME
-      iree_translate_main
-    SRCS
-      "translate_main.cc"
-    DEPS
-      ::init_compiler_modules
-      ::init_iree_passes_and_dialects
-      ::init_mlir_passes_and_dialects
-      ::init_targets
-      ::init_translations
-      ::init_xla_dialects
-      LLVMSupport
-      MLIRIR
-      MLIRSCFTransforms
-      MLIRPass
-      MLIRSupport
-      MLIRTranslation
-      iree::compiler::Conversion::init_conversions
-      iree::compiler::Dialect::VM::Target::Bytecode
-      iree::compiler::Dialect::VM::Target::init_targets
-      iree::compiler::Translation::IREEVM
-    PUBLIC
-  )
-
+# If cross-compiling, we need to declare iree-translate under host configuration
+# unconditionally because we need to run it on host to generate VM modules
+# for tests.
+if(${IREE_BUILD_COMPILER} OR CMAKE_CROSSCOMPILING)
   iree_cc_binary(
     NAME
       iree-translate
@@ -337,6 +345,7 @@
       iree-translate
     DEPS
       ::iree_translate_main
+    HOSTONLY
   )
 endif()
 
diff --git a/iree/vm/test/BUILD b/iree/vm/test/BUILD
index 28312bc..fb9848d 100644
--- a/iree/vm/test/BUILD
+++ b/iree/vm/test/BUILD
@@ -35,13 +35,11 @@
 iree_bytecode_module(
     name = "arithmetic_ops",
     src = "arithmetic_ops.mlir",
-    cc_namespace = "iree::vm::test",
     flags = ["-iree-vm-ir-to-bytecode-module"],
 )
 
 iree_bytecode_module(
     name = "control_flow_ops",
     src = "control_flow_ops.mlir",
-    cc_namespace = "iree::vm::test",
     flags = ["-iree-vm-ir-to-bytecode-module"],
 )
diff --git a/iree/vm/test/CMakeLists.txt b/iree/vm/test/CMakeLists.txt
index 7f9a4ae..6c189b6 100644
--- a/iree/vm/test/CMakeLists.txt
+++ b/iree/vm/test/CMakeLists.txt
@@ -35,8 +35,6 @@
     arithmetic_ops
   SRC
     "arithmetic_ops.mlir"
-  CC_NAMESPACE
-    "iree::vm::test"
   FLAGS
     "-iree-vm-ir-to-bytecode-module"
   PUBLIC
@@ -47,8 +45,6 @@
     control_flow_ops
   SRC
     "control_flow_ops.mlir"
-  CC_NAMESPACE
-    "iree::vm::test"
   FLAGS
     "-iree-vm-ir-to-bytecode-module"
   PUBLIC
diff --git a/scripts/prepare_doc_publication.py b/scripts/prepare_doc_publication.py
index c18d79c..a4bbad9 100755
--- a/scripts/prepare_doc_publication.py
+++ b/scripts/prepare_doc_publication.py
@@ -56,10 +56,12 @@
     'getting_started_windows_vulkan.md': 'Windows with Vulkan',
     'getting_started_macos_bazel.md': 'macOS with Bazel',
     'getting_started_macos_cmake.md': 'macOS with CMake',
+    'getting_started_android_cmake.md': 'Android with CMake',
     'generic_vulkan_env_setup.md': 'Generic Vulkan Setup',
     'getting_started_python.md': 'Python',
     'cmake_options_and_variables.md': 'CMake Options and Variables',
     'op_coverage.md': 'XLA HLO Operation Coverage',
+    'e2e_coverage.md': 'TensorFlow E2E Coverage',
     'roadmap.md': 'Short-term Focus Areas',
     'roadmap_design.md': 'Long-term Design Roadmap',
     'iree_community.md': 'Community',
@@ -81,12 +83,14 @@
     'getting_started_windows_vulkan.md': 'GetStarted/WindowsVulkan',
     'getting_started_macos_cmake.md': 'GetStarted/macOSCMake',
     'getting_started_macos_vulkan.md': 'GetStarted/macOSVulkan',
+    'getting_started_android_cmake.md': 'GetStarted/AndroidCMake',
     'generic_vulkan_env_setup.md': 'GetStarted/GenericVulkanSetup',
     'getting_started_python.md': 'GetStarted/Python',
     'cmake_options_and_variables.md': 'GetStarted/CMakeOptionsVariables',
     'developer_overview.md': 'DeveloperOverview',
     'testing_guide.md': 'TestingGuide',
     'op_coverage.md': 'HLOOpCoverage',
+    'e2e_coverage.md': 'TensorFlowE2ECoverage',
     'roadmap.md': 'FocusAreas',
     'roadmap_design.md': 'DesignRoadmap',
     'iree_community.md': 'Community',
@@ -104,8 +108,9 @@
     'roadmap_design.md': 4,
     'roadmap.md': 5,
     'op_coverage.md': 6,
-    'testing_guide.md': 7,
-    'iree_community.md': 8,
+    'e2e_coverage.md': 7,
+    'testing_guide.md': 8,
+    'iree_community.md': 9,
 
     # Within 'Getting Started' use explicit ordering.
     # Alphabetical would put 'bazel' before 'cmake' and 'python' between 'linux'
@@ -118,9 +123,10 @@
     'getting_started_windows_vulkan.md': 6,
     'getting_started_macos_cmake.md': 7,
     'getting_started_macos_bazel.md': 8,
-    'getting_started_python.md': 9,
-    'generic_vulkan_env_setup.md': 10,
-    'cmake_options_and_variables.md': 11,
+    'getting_started_android_cmake.md': 9,
+    'getting_started_python.md': 10,
+    'generic_vulkan_env_setup.md': 11,
+    'cmake_options_and_variables.md': 12,
 }
 
 # A dictionary containing source directory to section tile mappings.
@@ -136,7 +142,10 @@
 }
 
 # A dictionary containing the supporting JavaScript files for each doc.
-JS_FILES_DICT = {'op_coverage.md': ['js/add_classes.js']}
+JS_FILES_DICT = {
+    'op_coverage.md': ['js/add_classes.js'],
+    'e2e_coverage.md': ['js/add_classes.js'],
+}
 
 
 def process_file(basedir, relpath, filename):
diff --git a/scripts/update_e2e_coverage.py b/scripts/update_e2e_coverage.py
new file mode 100755
index 0000000..32a3177
--- /dev/null
+++ b/scripts/update_e2e_coverage.py
@@ -0,0 +1,178 @@
+#!/usr/bin/env python3
+
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Updates coverage of TensorFlow e2e tests on all backends.
+
+Example usage: python3 update_e2e_coverage.py build-docs
+"""
+
+import argparse
+import collections
+import os
+import subprocess
+
+REFERENCE_BACKEND = 'tf'
+# Assumes that tests are expanded for the tf_also, iree_vmla, iree_llvmjit and
+# iree_vulkan backends.
+BACKENDS_TO_TITLES = collections.OrderedDict([
+    ('tf_also', 'tensorflow'),
+    ('iree_vmla', 'vmla'),
+    ('iree_llvmjit', 'llvm-ir'),
+    ('iree_vulkan', 'vulkan-spirv'),
+])
+
+TEST_SUITES_TO_HEADERS = {
+    '//integrations/tensorflow/e2e:e2e_tests':
+        'End to end TensorFlow tests',
+    '//integrations/tensorflow/e2e/keras:keras_tests':
+        'End to end tests written using tf.keras',
+    '//integrations/tensorflow/e2e/keras:vision_external_tests':
+        'End to end tests of tf.keras.applications vision models',
+}
+
+# Some test suites are generated from a single source. This allows us to point
+# to the right test file when generating test URLs.
+SINGLE_SOURCE_SUITES = {
+    '//integrations/tensorflow/e2e/keras:vision_external_tests':
+        'vision_model_test',
+}
+
+# The symbols to show in the table if the operation is supported or not.
+SUCCESS_ELEMENT = '<span class="success-table-element">✓</span>'
+FAILURE_ELEMENT = '<span class="failure-table-element">✗</span>'
+
+MAIN_URL = 'https://github.com/google/iree/tree/main'
+TARGETS_URL = os.path.join(MAIN_URL, 'iree/compiler/Dialect/HAL/Target')
+
+E2E_COVERAGE_DESCRIPTION = f"""# TensorFlow End to End Coverage
+There are three backend [targets]({TARGETS_URL}) in IREE:
+
+- vmla
+- llvm-ir
+- vulkan-spirv
+
+The table shows the supported TensorFlow functions and models on each backend.
+
+"""
+
+
+def parse_arguments():
+  """Parses command-line options."""
+  parser = argparse.ArgumentParser(
+      description='Generates Markdown files for op coverage table')
+  parser.add_argument(
+      'build_dir', metavar='BUILD_PATH', type=str, help='Base build directory.')
+
+  parsed_args = parser.parse_args()
+  if not os.path.isdir(parsed_args.build_dir):
+    raise parser.error('expected path to a directory')
+
+  return parsed_args
+
+
+def create_markdown_table(rows):
+  """Converts a 2D array to a Markdown table."""
+  return '\n'.join([' | '.join(row) for row in rows])
+
+
+def get_name_and_backend(test_string):
+  """Splits a pathless test target into its name and comparison backend."""
+  name, backend = test_string.split(f'__{REFERENCE_BACKEND}__')
+  return name, backend
+
+
+def get_test_targets(test_suite_path):
+  """Returns a list of test targets stripped of paths and suite names."""
+  # Check if the suite exists (which may not be true for failing suites)
+  target_dir = test_suite.split(':')[0]
+  query = ['bazel', 'query', f'{target_dir}/...']
+  targets = subprocess.check_output(query, stderr=subprocess.DEVNULL)
+  if test_suite_path not in targets.decode('ascii'):
+    return []
+
+  query = ['bazel', 'query', f'tests({test_suite_path})']
+  tests = subprocess.check_output(query, stderr=subprocess.DEVNULL)
+  tests = tests.decode('ascii').split('\n')
+  tests = list(filter(lambda s: s.startswith(f'{test_suite_path}_'), tests))
+  tests = [test.replace(f'{test_suite_path}_', '') for test in tests]
+  return tests
+
+
+def get_suite_metadata(test_suite):
+  """Gets all test names, and passing and failing test-backend pairs."""
+  passing = get_test_targets(test_suite)
+  failing = get_test_targets(f'{test_suite}_failing')
+  passing = [get_name_and_backend(test) for test in passing]
+  failing = [get_name_and_backend(test) for test in failing]
+  passing_names = [test[0] for test in passing]
+  failing_names = [test[0] for test in failing]
+  all_names = list(sorted(set(passing_names + failing_names)))
+  return all_names, passing, failing
+
+
+def get_name_element(test_suite, name):
+  """Returns a Markdown hyperlink pointing to the test source on GitHub."""
+  # Convert `//path/to/tests:test_suite` to `path/to/tests`
+  test_path = test_suite.split(':')[0]
+  test_path = test_path.replace('//', '')
+
+  if test_suite in SINGLE_SOURCE_SUITES:
+    test_name = SINGLE_SOURCE_SUITES[test_suite]
+  else:
+    test_name = name
+
+  test_url = os.path.join(MAIN_URL, test_path, f'{test_name}.py')
+  return f'[{name}]({test_url})'
+
+
+def generate_table(test_suite):
+  """Generates an e2e backend coverage Markdown table."""
+  all_names, passing, _ = get_suite_metadata(test_suite)
+
+  # Generate a dictionary mapping test names to their backend coverage.
+  table = collections.defaultdict(lambda: [False] * len(BACKENDS_TO_TITLES))
+  ordered_backends = list(BACKENDS_TO_TITLES.keys())
+  for name, backend in passing:
+    table[name][ordered_backends.index(backend)] = True
+
+  # Create a header for the coverage table.
+  ordered_backend_titles = list(BACKENDS_TO_TITLES.values())
+  first_row = ['target'] + ordered_backend_titles
+  second_row = [':-:' for _ in first_row]
+
+  # Generate the coverage table as a 2D array.
+  rows = [first_row, second_row]
+  for name, backends in sorted(table.items()):
+    row = [get_name_element(test_suite, name)]
+    row.extend([
+        SUCCESS_ELEMENT if backend else FAILURE_ELEMENT for backend in backends
+    ])
+    rows.append(row)
+  return create_markdown_table(rows)
+
+
+if __name__ == '__main__':
+  args = parse_arguments()
+
+  content = []
+  for test_suite, header in TEST_SUITES_TO_HEADERS.items():
+    content.append(f'## {header}')
+    content.append(generate_table(test_suite))
+  content = '\n\n'.join(content) + '\n'  # Trailing newline.
+
+  table_path = os.path.join(args.build_dir, 'doc', 'e2e_coverage.md')
+  with open(table_path, 'w', encoding='utf-8') as f:
+    f.write(E2E_COVERAGE_DESCRIPTION)
+    f.write(content)
diff --git a/third_party/llvm-project b/third_party/llvm-project
index 69d2fa9..f0bab78 160000
--- a/third_party/llvm-project
+++ b/third_party/llvm-project
@@ -1 +1 @@
-Subproject commit 69d2fa9ed1c1aba6f473feb03cad257e69a0cf52
+Subproject commit f0bab7875e78e01c149d12302dcc4b6d4c43e25c
diff --git a/third_party/tensorflow b/third_party/tensorflow
index d04bf99..b331912 160000
--- a/third_party/tensorflow
+++ b/third_party/tensorflow
@@ -1 +1 @@
-Subproject commit d04bf998887fffe640ec8cacc9094574cd596f99
+Subproject commit b3319125a036aea6b7bbc0d1c50753e7be73be27